fix: ingestors treat naive log timestamps as local time, not UTC

All five parsers (plex, syslog, servarr, qbittorrent, plaintext) were
using .replace(tzinfo=timezone.utc) on naive datetimes parsed from log
files, which slaps a UTC label on what is actually local-time data.
On a UTC-7 system a 2pm entry was stored as 14:00Z instead of 21:00Z,
causing time-window searches to return zero results.

Fix: use .astimezone(timezone.utc) instead, which treats the naive
datetime as local time and converts correctly.

Tests updated to round-trip back to local time for assertion so they
pass on any timezone, not just UTC.
This commit is contained in:
pyr0ball 2026-05-13 18:16:33 -07:00
parent 251109ae96
commit 0b3d95cd26
7 changed files with 26 additions and 13 deletions

View file

@ -38,7 +38,7 @@ def _extract_ts(line: str) -> tuple[str, str]:
dt = datetime.strptime(clean, fmt)
if dt.year == 1900:
dt = dt.replace(year=datetime.now().year)
dt = dt.replace(tzinfo=timezone.utc)
dt = dt.astimezone(timezone.utc)
return ts_raw, dt.isoformat()
except ValueError:
pass

View file

@ -39,8 +39,8 @@ _LEVEL_MAP = {
def _parse_ts(month: str, day: str, year: str, time: str) -> tuple[str, str]:
raw = f"{month} {day}, {year} {time}"
try:
# Plex logs are local time — treat as UTC for now (no TZ in log)
dt = datetime.strptime(raw, "%b %d, %Y %H:%M:%S.%f").replace(tzinfo=timezone.utc)
# Plex logs use local time; convert to UTC for consistent DB storage
dt = datetime.strptime(raw, "%b %d, %Y %H:%M:%S.%f").astimezone(timezone.utc)
return raw, dt.isoformat()
except ValueError:
return raw, ""

View file

@ -55,7 +55,7 @@ def _parse_ts(ts_str: str) -> tuple[str, str]:
"""Return (raw, iso). Handles classic (space sep) and hotio (T sep) timestamps."""
for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S"):
try:
dt = datetime.strptime(ts_str, fmt).replace(tzinfo=timezone.utc)
dt = datetime.strptime(ts_str, fmt).astimezone(timezone.utc)
return ts_str, dt.isoformat()
except ValueError:
continue

View file

@ -38,7 +38,7 @@ _LEVEL_MAP: dict[str, str | None] = {
def _parse_ts(ts_str: str) -> tuple[str, str]:
base = ts_str.split(".")[0]
try:
dt = datetime.strptime(base, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
dt = datetime.strptime(base, "%Y-%m-%d %H:%M:%S").astimezone(timezone.utc)
return ts_str, dt.isoformat()
except ValueError:
return ts_str, ""

View file

@ -47,8 +47,7 @@ def _parse_ts(month_str: str, day: str, time_str: str) -> tuple[str, str]:
ts_raw = f"{month_str} {int(day):2d} {time_str}"
try:
dt = datetime(year, month, int(day),
*[int(p) for p in time_str.split(":")],
tzinfo=timezone.utc)
*[int(p) for p in time_str.split(":")]).astimezone(timezone.utc)
return ts_raw, dt.isoformat()
except ValueError:
return ts_raw, ""

View file

@ -79,10 +79,12 @@ class TestClassicParser:
assert len(self._parse(CLASSIC_LOG)) == 7
def test_startup_entry(self):
from datetime import datetime
e = self._parse(CLASSIC_LOG)[0]
assert "qBittorrent v5.0.3 started" in e.text
assert e.severity is None
assert e.timestamp_iso == "2026-05-09T14:10:01+00:00"
local = datetime.fromisoformat(e.timestamp_iso).astimezone()
assert (local.hour, local.minute, local.second) == (14, 10, 1)
def test_warning_severity(self):
e = self._parse(CLASSIC_LOG)[1]
@ -103,8 +105,10 @@ class TestClassicParser:
assert e.text == "Normal message without bracket level"
def test_dash_format_timestamp(self):
from datetime import datetime
entries = list(parse(iter(CLASSIC_DASH.splitlines(keepends=True)), "qbit", []))
assert entries[0].timestamp_iso == "2026-05-09T14:10:01+00:00"
local = datetime.fromisoformat(entries[0].timestamp_iso).astimezone()
assert (local.hour, local.minute, local.second) == (14, 10, 1)
def test_source_id_propagated(self):
assert all(e.source_id == "qbit_test" for e in self._parse(CLASSIC_LOG))
@ -142,8 +146,12 @@ class TestHotioParser:
assert e.severity == "CRITICAL"
def test_iso_timestamp_parsed(self):
from datetime import datetime
e = self._parse(HOTIO_LOG)[0]
assert e.timestamp_iso == "2026-04-26T03:32:59+00:00"
ts = datetime.fromisoformat(e.timestamp_iso)
assert ts.utcoffset() is not None # stored as UTC-aware
local = ts.astimezone()
assert (local.hour, local.minute, local.second) == (3, 32, 59)
assert e.timestamp_raw == "2026-04-26T03:32:59"
def test_source_id_propagated(self):

View file

@ -48,13 +48,19 @@ class TestParser:
assert entries[0].text.startswith("[sshd]")
def test_timestamp_parsed(self):
from datetime import datetime
entries = self._parse(SYSLOG_SAMPLE)
assert "14:23:01" in entries[0].timestamp_iso
ts = datetime.fromisoformat(entries[0].timestamp_iso)
assert ts.utcoffset() is not None # stored as UTC-aware
local = ts.astimezone()
assert (local.hour, local.minute, local.second) == (14, 23, 1)
def test_space_padded_day(self):
from datetime import datetime
entries = self._parse(SYSLOG_SAMPLE)
cron_entry = entries[4]
assert "04:00:00" in cron_entry.timestamp_iso
ts = datetime.fromisoformat(entries[4].timestamp_iso)
local = ts.astimezone()
assert (local.hour, local.minute, local.second) == (4, 0, 0)
def test_source_id_propagated(self):
assert all(e.source_id == "syslog_test" for e in self._parse(SYSLOG_SAMPLE))