From f64b8341770157461c16d7b30df949743e17cbab Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Wed, 13 May 2026 18:16:33 -0700 Subject: [PATCH] fix: ingestors treat naive log timestamps as local time, not UTC All five parsers (plex, syslog, servarr, qbittorrent, plaintext) were using .replace(tzinfo=timezone.utc) on naive datetimes parsed from log files, which slaps a UTC label on what is actually local-time data. On a UTC-7 system a 2pm entry was stored as 14:00Z instead of 21:00Z, causing time-window searches to return zero results. Fix: use .astimezone(timezone.utc) instead, which treats the naive datetime as local time and converts correctly. Tests updated to round-trip back to local time for assertion so they pass on any timezone, not just UTC. --- app/ingest/plaintext.py | 2 +- app/ingest/plex.py | 4 ++-- app/ingest/qbittorrent.py | 2 +- app/ingest/servarr.py | 2 +- app/ingest/syslog.py | 3 +-- tests/test_ingest_qbittorrent.py | 14 +++++++++++--- tests/test_ingest_syslog.py | 12 +++++++++--- 7 files changed, 26 insertions(+), 13 deletions(-) diff --git a/app/ingest/plaintext.py b/app/ingest/plaintext.py index 1ff0df4..1bb83d7 100644 --- a/app/ingest/plaintext.py +++ b/app/ingest/plaintext.py @@ -38,7 +38,7 @@ def _extract_ts(line: str) -> tuple[str, str]: dt = datetime.strptime(clean, fmt) if dt.year == 1900: dt = dt.replace(year=datetime.now().year) - dt = dt.replace(tzinfo=timezone.utc) + dt = dt.astimezone(timezone.utc) return ts_raw, dt.isoformat() except ValueError: pass diff --git a/app/ingest/plex.py b/app/ingest/plex.py index c2e03d5..89d7232 100644 --- a/app/ingest/plex.py +++ b/app/ingest/plex.py @@ -39,8 +39,8 @@ _LEVEL_MAP = { def _parse_ts(month: str, day: str, year: str, time: str) -> tuple[str, str]: raw = f"{month} {day}, {year} {time}" try: - # Plex logs are local time — treat as UTC for now (no TZ in log) - dt = datetime.strptime(raw, "%b %d, %Y %H:%M:%S.%f").replace(tzinfo=timezone.utc) + # Plex logs use local time; convert to UTC for consistent DB storage + dt = datetime.strptime(raw, "%b %d, %Y %H:%M:%S.%f").astimezone(timezone.utc) return raw, dt.isoformat() except ValueError: return raw, "" diff --git a/app/ingest/qbittorrent.py b/app/ingest/qbittorrent.py index 564470d..404c84c 100644 --- a/app/ingest/qbittorrent.py +++ b/app/ingest/qbittorrent.py @@ -55,7 +55,7 @@ def _parse_ts(ts_str: str) -> tuple[str, str]: """Return (raw, iso). Handles classic (space sep) and hotio (T sep) timestamps.""" for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S"): try: - dt = datetime.strptime(ts_str, fmt).replace(tzinfo=timezone.utc) + dt = datetime.strptime(ts_str, fmt).astimezone(timezone.utc) return ts_str, dt.isoformat() except ValueError: continue diff --git a/app/ingest/servarr.py b/app/ingest/servarr.py index 357d5bd..b59471e 100644 --- a/app/ingest/servarr.py +++ b/app/ingest/servarr.py @@ -38,7 +38,7 @@ _LEVEL_MAP: dict[str, str | None] = { def _parse_ts(ts_str: str) -> tuple[str, str]: base = ts_str.split(".")[0] try: - dt = datetime.strptime(base, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc) + dt = datetime.strptime(base, "%Y-%m-%d %H:%M:%S").astimezone(timezone.utc) return ts_str, dt.isoformat() except ValueError: return ts_str, "" diff --git a/app/ingest/syslog.py b/app/ingest/syslog.py index 1bc9cd5..81f38b1 100644 --- a/app/ingest/syslog.py +++ b/app/ingest/syslog.py @@ -47,8 +47,7 @@ def _parse_ts(month_str: str, day: str, time_str: str) -> tuple[str, str]: ts_raw = f"{month_str} {int(day):2d} {time_str}" try: dt = datetime(year, month, int(day), - *[int(p) for p in time_str.split(":")], - tzinfo=timezone.utc) + *[int(p) for p in time_str.split(":")]).astimezone(timezone.utc) return ts_raw, dt.isoformat() except ValueError: return ts_raw, "" diff --git a/tests/test_ingest_qbittorrent.py b/tests/test_ingest_qbittorrent.py index 629f6a3..4b3c874 100644 --- a/tests/test_ingest_qbittorrent.py +++ b/tests/test_ingest_qbittorrent.py @@ -79,10 +79,12 @@ class TestClassicParser: assert len(self._parse(CLASSIC_LOG)) == 7 def test_startup_entry(self): + from datetime import datetime e = self._parse(CLASSIC_LOG)[0] assert "qBittorrent v5.0.3 started" in e.text assert e.severity is None - assert e.timestamp_iso == "2026-05-09T14:10:01+00:00" + local = datetime.fromisoformat(e.timestamp_iso).astimezone() + assert (local.hour, local.minute, local.second) == (14, 10, 1) def test_warning_severity(self): e = self._parse(CLASSIC_LOG)[1] @@ -103,8 +105,10 @@ class TestClassicParser: assert e.text == "Normal message without bracket level" def test_dash_format_timestamp(self): + from datetime import datetime entries = list(parse(iter(CLASSIC_DASH.splitlines(keepends=True)), "qbit", [])) - assert entries[0].timestamp_iso == "2026-05-09T14:10:01+00:00" + local = datetime.fromisoformat(entries[0].timestamp_iso).astimezone() + assert (local.hour, local.minute, local.second) == (14, 10, 1) def test_source_id_propagated(self): assert all(e.source_id == "qbit_test" for e in self._parse(CLASSIC_LOG)) @@ -142,8 +146,12 @@ class TestHotioParser: assert e.severity == "CRITICAL" def test_iso_timestamp_parsed(self): + from datetime import datetime e = self._parse(HOTIO_LOG)[0] - assert e.timestamp_iso == "2026-04-26T03:32:59+00:00" + ts = datetime.fromisoformat(e.timestamp_iso) + assert ts.utcoffset() is not None # stored as UTC-aware + local = ts.astimezone() + assert (local.hour, local.minute, local.second) == (3, 32, 59) assert e.timestamp_raw == "2026-04-26T03:32:59" def test_source_id_propagated(self): diff --git a/tests/test_ingest_syslog.py b/tests/test_ingest_syslog.py index 0630745..8dea656 100644 --- a/tests/test_ingest_syslog.py +++ b/tests/test_ingest_syslog.py @@ -48,13 +48,19 @@ class TestParser: assert entries[0].text.startswith("[sshd]") def test_timestamp_parsed(self): + from datetime import datetime entries = self._parse(SYSLOG_SAMPLE) - assert "14:23:01" in entries[0].timestamp_iso + ts = datetime.fromisoformat(entries[0].timestamp_iso) + assert ts.utcoffset() is not None # stored as UTC-aware + local = ts.astimezone() + assert (local.hour, local.minute, local.second) == (14, 23, 1) def test_space_padded_day(self): + from datetime import datetime entries = self._parse(SYSLOG_SAMPLE) - cron_entry = entries[4] - assert "04:00:00" in cron_entry.timestamp_iso + ts = datetime.fromisoformat(entries[4].timestamp_iso) + local = ts.astimezone() + assert (local.hour, local.minute, local.second) == (4, 0, 0) def test_source_id_propagated(self): assert all(e.source_id == "syslog_test" for e in self._parse(SYSLOG_SAMPLE))