"""Traditional syslog (RFC 3164) parser. Handles the format written by rsyslog and syslogd on most Linux distros: May 11 14:23:01 hostname sshd[1234]: Accepted publickey for x from ... May 11 14:23:01 hostname kernel: [12345.678] usb disconnect Files: /var/log/syslog (Debian/Ubuntu), /var/log/messages (RHEL/Fedora), /var/log/auth.log, /var/log/kern.log """ from __future__ import annotations import re from datetime import datetime, timezone from typing import Iterator from app.glean.base import ( SourceState, apply_patterns, detect_severity, make_entry_id, now_iso, ) from app.services.models import LogPattern, RetrievedEntry _MONTHS = { "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6, "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12, } # May 11 14:23:01 hostname ident[pid]: message # May 1 04:00:00 hostname ident: message (no pid, day may be space-padded) # <134>May 11 14:23:01 ... (optional RFC 3164 PRI prefix from network syslog) _PRI_RE = re.compile(r"^<\d{1,3}>") _LINE_RE = re.compile( r"^(?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)" r"\s+(?P\d{1,2})\s+(?P