fix: ingestors treat naive log timestamps as local time, not UTC
All five parsers (plex, syslog, servarr, qbittorrent, plaintext) were using .replace(tzinfo=timezone.utc) on naive datetimes parsed from log files, which slaps a UTC label on what is actually local-time data. On a UTC-7 system a 2pm entry was stored as 14:00Z instead of 21:00Z, causing time-window searches to return zero results. Fix: use .astimezone(timezone.utc) instead, which treats the naive datetime as local time and converts correctly. Tests updated to round-trip back to local time for assertion so they pass on any timezone, not just UTC.
This commit is contained in:
parent
e6075f80b3
commit
32f44700f9
7 changed files with 26 additions and 13 deletions
|
|
@ -38,7 +38,7 @@ def _extract_ts(line: str) -> tuple[str, str]:
|
|||
dt = datetime.strptime(clean, fmt)
|
||||
if dt.year == 1900:
|
||||
dt = dt.replace(year=datetime.now().year)
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
dt = dt.astimezone(timezone.utc)
|
||||
return ts_raw, dt.isoformat()
|
||||
except ValueError:
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -39,8 +39,8 @@ _LEVEL_MAP = {
|
|||
def _parse_ts(month: str, day: str, year: str, time: str) -> tuple[str, str]:
|
||||
raw = f"{month} {day}, {year} {time}"
|
||||
try:
|
||||
# Plex logs are local time — treat as UTC for now (no TZ in log)
|
||||
dt = datetime.strptime(raw, "%b %d, %Y %H:%M:%S.%f").replace(tzinfo=timezone.utc)
|
||||
# Plex logs use local time; convert to UTC for consistent DB storage
|
||||
dt = datetime.strptime(raw, "%b %d, %Y %H:%M:%S.%f").astimezone(timezone.utc)
|
||||
return raw, dt.isoformat()
|
||||
except ValueError:
|
||||
return raw, ""
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ def _parse_ts(ts_str: str) -> tuple[str, str]:
|
|||
"""Return (raw, iso). Handles classic (space sep) and hotio (T sep) timestamps."""
|
||||
for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S"):
|
||||
try:
|
||||
dt = datetime.strptime(ts_str, fmt).replace(tzinfo=timezone.utc)
|
||||
dt = datetime.strptime(ts_str, fmt).astimezone(timezone.utc)
|
||||
return ts_str, dt.isoformat()
|
||||
except ValueError:
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ _LEVEL_MAP: dict[str, str | None] = {
|
|||
def _parse_ts(ts_str: str) -> tuple[str, str]:
|
||||
base = ts_str.split(".")[0]
|
||||
try:
|
||||
dt = datetime.strptime(base, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
|
||||
dt = datetime.strptime(base, "%Y-%m-%d %H:%M:%S").astimezone(timezone.utc)
|
||||
return ts_str, dt.isoformat()
|
||||
except ValueError:
|
||||
return ts_str, ""
|
||||
|
|
|
|||
|
|
@ -47,8 +47,7 @@ def _parse_ts(month_str: str, day: str, time_str: str) -> tuple[str, str]:
|
|||
ts_raw = f"{month_str} {int(day):2d} {time_str}"
|
||||
try:
|
||||
dt = datetime(year, month, int(day),
|
||||
*[int(p) for p in time_str.split(":")],
|
||||
tzinfo=timezone.utc)
|
||||
*[int(p) for p in time_str.split(":")]).astimezone(timezone.utc)
|
||||
return ts_raw, dt.isoformat()
|
||||
except ValueError:
|
||||
return ts_raw, ""
|
||||
|
|
|
|||
|
|
@ -79,10 +79,12 @@ class TestClassicParser:
|
|||
assert len(self._parse(CLASSIC_LOG)) == 7
|
||||
|
||||
def test_startup_entry(self):
|
||||
from datetime import datetime
|
||||
e = self._parse(CLASSIC_LOG)[0]
|
||||
assert "qBittorrent v5.0.3 started" in e.text
|
||||
assert e.severity is None
|
||||
assert e.timestamp_iso == "2026-05-09T14:10:01+00:00"
|
||||
local = datetime.fromisoformat(e.timestamp_iso).astimezone()
|
||||
assert (local.hour, local.minute, local.second) == (14, 10, 1)
|
||||
|
||||
def test_warning_severity(self):
|
||||
e = self._parse(CLASSIC_LOG)[1]
|
||||
|
|
@ -103,8 +105,10 @@ class TestClassicParser:
|
|||
assert e.text == "Normal message without bracket level"
|
||||
|
||||
def test_dash_format_timestamp(self):
|
||||
from datetime import datetime
|
||||
entries = list(parse(iter(CLASSIC_DASH.splitlines(keepends=True)), "qbit", []))
|
||||
assert entries[0].timestamp_iso == "2026-05-09T14:10:01+00:00"
|
||||
local = datetime.fromisoformat(entries[0].timestamp_iso).astimezone()
|
||||
assert (local.hour, local.minute, local.second) == (14, 10, 1)
|
||||
|
||||
def test_source_id_propagated(self):
|
||||
assert all(e.source_id == "qbit_test" for e in self._parse(CLASSIC_LOG))
|
||||
|
|
@ -142,8 +146,12 @@ class TestHotioParser:
|
|||
assert e.severity == "CRITICAL"
|
||||
|
||||
def test_iso_timestamp_parsed(self):
|
||||
from datetime import datetime
|
||||
e = self._parse(HOTIO_LOG)[0]
|
||||
assert e.timestamp_iso == "2026-04-26T03:32:59+00:00"
|
||||
ts = datetime.fromisoformat(e.timestamp_iso)
|
||||
assert ts.utcoffset() is not None # stored as UTC-aware
|
||||
local = ts.astimezone()
|
||||
assert (local.hour, local.minute, local.second) == (3, 32, 59)
|
||||
assert e.timestamp_raw == "2026-04-26T03:32:59"
|
||||
|
||||
def test_source_id_propagated(self):
|
||||
|
|
|
|||
|
|
@ -48,13 +48,19 @@ class TestParser:
|
|||
assert entries[0].text.startswith("[sshd]")
|
||||
|
||||
def test_timestamp_parsed(self):
|
||||
from datetime import datetime
|
||||
entries = self._parse(SYSLOG_SAMPLE)
|
||||
assert "14:23:01" in entries[0].timestamp_iso
|
||||
ts = datetime.fromisoformat(entries[0].timestamp_iso)
|
||||
assert ts.utcoffset() is not None # stored as UTC-aware
|
||||
local = ts.astimezone()
|
||||
assert (local.hour, local.minute, local.second) == (14, 23, 1)
|
||||
|
||||
def test_space_padded_day(self):
|
||||
from datetime import datetime
|
||||
entries = self._parse(SYSLOG_SAMPLE)
|
||||
cron_entry = entries[4]
|
||||
assert "04:00:00" in cron_entry.timestamp_iso
|
||||
ts = datetime.fromisoformat(entries[4].timestamp_iso)
|
||||
local = ts.astimezone()
|
||||
assert (local.hour, local.minute, local.second) == (4, 0, 0)
|
||||
|
||||
def test_source_id_propagated(self):
|
||||
assert all(e.source_id == "syslog_test" for e in self._parse(SYSLOG_SAMPLE))
|
||||
|
|
|
|||
Loading…
Reference in a new issue