diff --git a/app/ingest/dmesg_log.py b/app/ingest/dmesg_log.py new file mode 100644 index 0000000..84058aa --- /dev/null +++ b/app/ingest/dmesg_log.py @@ -0,0 +1,102 @@ +"""dmesg log parser. + +Handles two formats: + +Relative (always available): + [ 0.000000] Linux version 6.8.0-65-generic + [12345.678901] usb 1-1: USB disconnect, device number 2 + +Human-readable (dmesg -T, util-linux >= 2.21): + [Mon May 11 14:23:01 2026] usb 1-1: USB disconnect, device number 2 + +The export_journal.sh script exports with -T when available, falling back +to plain dmesg. Relative-timestamp entries get no timestamp_iso. +""" +from __future__ import annotations + +import re +from datetime import datetime, timezone +from typing import Iterator + +from app.ingest.base import ( + SourceState, apply_patterns, detect_severity, make_entry_id, now_iso, +) +from app.services.models import LogPattern, RetrievedEntry + +_DAYS = {"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"} +_MONTHS_ABBR = { + "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6, + "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12, +} + +# [ 0.000000] or [12345.678901] +_RELATIVE_RE = re.compile(r"^\[\s*(?P\d+\.\d+)\]\s+(?P.+)$") +# [Mon May 11 14:23:01 2026] +_HUMAN_RE = re.compile( + r"^\[(?P\w{3})\s+(?P\w{3})\s+(?P\d{1,2})" + r"\s+(?P