fix: ingestors treat naive log timestamps as local time, not UTC
All five parsers (plex, syslog, servarr, qbittorrent, plaintext) were using .replace(tzinfo=timezone.utc) on naive datetimes parsed from log files, which slaps a UTC label on what is actually local-time data. On a UTC-7 system a 2pm entry was stored as 14:00Z instead of 21:00Z, causing time-window searches to return zero results. Fix: use .astimezone(timezone.utc) instead, which treats the naive datetime as local time and converts correctly. Tests updated to round-trip back to local time for assertion so they pass on any timezone, not just UTC.
This commit is contained in:
parent
e6075f80b3
commit
32f44700f9
7 changed files with 26 additions and 13 deletions
|
|
@ -38,7 +38,7 @@ def _extract_ts(line: str) -> tuple[str, str]:
|
||||||
dt = datetime.strptime(clean, fmt)
|
dt = datetime.strptime(clean, fmt)
|
||||||
if dt.year == 1900:
|
if dt.year == 1900:
|
||||||
dt = dt.replace(year=datetime.now().year)
|
dt = dt.replace(year=datetime.now().year)
|
||||||
dt = dt.replace(tzinfo=timezone.utc)
|
dt = dt.astimezone(timezone.utc)
|
||||||
return ts_raw, dt.isoformat()
|
return ts_raw, dt.isoformat()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
|
|
||||||
|
|
@ -39,8 +39,8 @@ _LEVEL_MAP = {
|
||||||
def _parse_ts(month: str, day: str, year: str, time: str) -> tuple[str, str]:
|
def _parse_ts(month: str, day: str, year: str, time: str) -> tuple[str, str]:
|
||||||
raw = f"{month} {day}, {year} {time}"
|
raw = f"{month} {day}, {year} {time}"
|
||||||
try:
|
try:
|
||||||
# Plex logs are local time — treat as UTC for now (no TZ in log)
|
# Plex logs use local time; convert to UTC for consistent DB storage
|
||||||
dt = datetime.strptime(raw, "%b %d, %Y %H:%M:%S.%f").replace(tzinfo=timezone.utc)
|
dt = datetime.strptime(raw, "%b %d, %Y %H:%M:%S.%f").astimezone(timezone.utc)
|
||||||
return raw, dt.isoformat()
|
return raw, dt.isoformat()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return raw, ""
|
return raw, ""
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ def _parse_ts(ts_str: str) -> tuple[str, str]:
|
||||||
"""Return (raw, iso). Handles classic (space sep) and hotio (T sep) timestamps."""
|
"""Return (raw, iso). Handles classic (space sep) and hotio (T sep) timestamps."""
|
||||||
for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S"):
|
for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S"):
|
||||||
try:
|
try:
|
||||||
dt = datetime.strptime(ts_str, fmt).replace(tzinfo=timezone.utc)
|
dt = datetime.strptime(ts_str, fmt).astimezone(timezone.utc)
|
||||||
return ts_str, dt.isoformat()
|
return ts_str, dt.isoformat()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
continue
|
continue
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@ _LEVEL_MAP: dict[str, str | None] = {
|
||||||
def _parse_ts(ts_str: str) -> tuple[str, str]:
|
def _parse_ts(ts_str: str) -> tuple[str, str]:
|
||||||
base = ts_str.split(".")[0]
|
base = ts_str.split(".")[0]
|
||||||
try:
|
try:
|
||||||
dt = datetime.strptime(base, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
|
dt = datetime.strptime(base, "%Y-%m-%d %H:%M:%S").astimezone(timezone.utc)
|
||||||
return ts_str, dt.isoformat()
|
return ts_str, dt.isoformat()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return ts_str, ""
|
return ts_str, ""
|
||||||
|
|
|
||||||
|
|
@ -47,8 +47,7 @@ def _parse_ts(month_str: str, day: str, time_str: str) -> tuple[str, str]:
|
||||||
ts_raw = f"{month_str} {int(day):2d} {time_str}"
|
ts_raw = f"{month_str} {int(day):2d} {time_str}"
|
||||||
try:
|
try:
|
||||||
dt = datetime(year, month, int(day),
|
dt = datetime(year, month, int(day),
|
||||||
*[int(p) for p in time_str.split(":")],
|
*[int(p) for p in time_str.split(":")]).astimezone(timezone.utc)
|
||||||
tzinfo=timezone.utc)
|
|
||||||
return ts_raw, dt.isoformat()
|
return ts_raw, dt.isoformat()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return ts_raw, ""
|
return ts_raw, ""
|
||||||
|
|
|
||||||
|
|
@ -79,10 +79,12 @@ class TestClassicParser:
|
||||||
assert len(self._parse(CLASSIC_LOG)) == 7
|
assert len(self._parse(CLASSIC_LOG)) == 7
|
||||||
|
|
||||||
def test_startup_entry(self):
|
def test_startup_entry(self):
|
||||||
|
from datetime import datetime
|
||||||
e = self._parse(CLASSIC_LOG)[0]
|
e = self._parse(CLASSIC_LOG)[0]
|
||||||
assert "qBittorrent v5.0.3 started" in e.text
|
assert "qBittorrent v5.0.3 started" in e.text
|
||||||
assert e.severity is None
|
assert e.severity is None
|
||||||
assert e.timestamp_iso == "2026-05-09T14:10:01+00:00"
|
local = datetime.fromisoformat(e.timestamp_iso).astimezone()
|
||||||
|
assert (local.hour, local.minute, local.second) == (14, 10, 1)
|
||||||
|
|
||||||
def test_warning_severity(self):
|
def test_warning_severity(self):
|
||||||
e = self._parse(CLASSIC_LOG)[1]
|
e = self._parse(CLASSIC_LOG)[1]
|
||||||
|
|
@ -103,8 +105,10 @@ class TestClassicParser:
|
||||||
assert e.text == "Normal message without bracket level"
|
assert e.text == "Normal message without bracket level"
|
||||||
|
|
||||||
def test_dash_format_timestamp(self):
|
def test_dash_format_timestamp(self):
|
||||||
|
from datetime import datetime
|
||||||
entries = list(parse(iter(CLASSIC_DASH.splitlines(keepends=True)), "qbit", []))
|
entries = list(parse(iter(CLASSIC_DASH.splitlines(keepends=True)), "qbit", []))
|
||||||
assert entries[0].timestamp_iso == "2026-05-09T14:10:01+00:00"
|
local = datetime.fromisoformat(entries[0].timestamp_iso).astimezone()
|
||||||
|
assert (local.hour, local.minute, local.second) == (14, 10, 1)
|
||||||
|
|
||||||
def test_source_id_propagated(self):
|
def test_source_id_propagated(self):
|
||||||
assert all(e.source_id == "qbit_test" for e in self._parse(CLASSIC_LOG))
|
assert all(e.source_id == "qbit_test" for e in self._parse(CLASSIC_LOG))
|
||||||
|
|
@ -142,8 +146,12 @@ class TestHotioParser:
|
||||||
assert e.severity == "CRITICAL"
|
assert e.severity == "CRITICAL"
|
||||||
|
|
||||||
def test_iso_timestamp_parsed(self):
|
def test_iso_timestamp_parsed(self):
|
||||||
|
from datetime import datetime
|
||||||
e = self._parse(HOTIO_LOG)[0]
|
e = self._parse(HOTIO_LOG)[0]
|
||||||
assert e.timestamp_iso == "2026-04-26T03:32:59+00:00"
|
ts = datetime.fromisoformat(e.timestamp_iso)
|
||||||
|
assert ts.utcoffset() is not None # stored as UTC-aware
|
||||||
|
local = ts.astimezone()
|
||||||
|
assert (local.hour, local.minute, local.second) == (3, 32, 59)
|
||||||
assert e.timestamp_raw == "2026-04-26T03:32:59"
|
assert e.timestamp_raw == "2026-04-26T03:32:59"
|
||||||
|
|
||||||
def test_source_id_propagated(self):
|
def test_source_id_propagated(self):
|
||||||
|
|
|
||||||
|
|
@ -48,13 +48,19 @@ class TestParser:
|
||||||
assert entries[0].text.startswith("[sshd]")
|
assert entries[0].text.startswith("[sshd]")
|
||||||
|
|
||||||
def test_timestamp_parsed(self):
|
def test_timestamp_parsed(self):
|
||||||
|
from datetime import datetime
|
||||||
entries = self._parse(SYSLOG_SAMPLE)
|
entries = self._parse(SYSLOG_SAMPLE)
|
||||||
assert "14:23:01" in entries[0].timestamp_iso
|
ts = datetime.fromisoformat(entries[0].timestamp_iso)
|
||||||
|
assert ts.utcoffset() is not None # stored as UTC-aware
|
||||||
|
local = ts.astimezone()
|
||||||
|
assert (local.hour, local.minute, local.second) == (14, 23, 1)
|
||||||
|
|
||||||
def test_space_padded_day(self):
|
def test_space_padded_day(self):
|
||||||
|
from datetime import datetime
|
||||||
entries = self._parse(SYSLOG_SAMPLE)
|
entries = self._parse(SYSLOG_SAMPLE)
|
||||||
cron_entry = entries[4]
|
ts = datetime.fromisoformat(entries[4].timestamp_iso)
|
||||||
assert "04:00:00" in cron_entry.timestamp_iso
|
local = ts.astimezone()
|
||||||
|
assert (local.hour, local.minute, local.second) == (4, 0, 0)
|
||||||
|
|
||||||
def test_source_id_propagated(self):
|
def test_source_id_propagated(self):
|
||||||
assert all(e.source_id == "syslog_test" for e in self._parse(SYSLOG_SAMPLE))
|
assert all(e.source_id == "syslog_test" for e in self._parse(SYSLOG_SAMPLE))
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue