fix: correct time_detected logic, immutable sort pattern, add diagnose() test
This commit is contained in:
parent
51c3b769aa
commit
dd48ed1369
2 changed files with 29 additions and 11 deletions
|
|
@ -1,12 +1,16 @@
|
||||||
"""Frictionless diagnose service — NL time extraction + layered log search."""
|
"""Frictionless diagnose service — NL time extraction + layered log search."""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from app.services.search import SearchResult, entries_in_window, search
|
from app.services.search import SearchResult, entries_in_window, search
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from dateparser.search import search_dates as _search_dates # type: ignore[import]
|
from dateparser.search import search_dates as _search_dates # type: ignore[import]
|
||||||
_HAS_DATEPARSER = True
|
_HAS_DATEPARSER = True
|
||||||
|
|
@ -22,7 +26,11 @@ def parse_time_window(query: str) -> tuple[str | None, str | None, str]:
|
||||||
the matched time phrase stripped. Falls back to last-60-min window.
|
the matched time phrase stripped. Falls back to last-60-min window.
|
||||||
"""
|
"""
|
||||||
if _HAS_DATEPARSER and _search_dates is not None:
|
if _HAS_DATEPARSER and _search_dates is not None:
|
||||||
results = _search_dates(query, languages=["en"], settings={"PREFER_DATES_FROM": "past"})
|
try:
|
||||||
|
results = _search_dates(query, languages=["en"], settings={"PREFER_DATES_FROM": "past"})
|
||||||
|
except Exception:
|
||||||
|
logger.warning("dateparser failed on query %r — falling back to 60-min window", query)
|
||||||
|
results = None
|
||||||
if results:
|
if results:
|
||||||
phrase, dt = results[0]
|
phrase, dt = results[0]
|
||||||
if dt.tzinfo is None:
|
if dt.tzinfo is None:
|
||||||
|
|
@ -40,10 +48,10 @@ def diagnose(
|
||||||
query: str,
|
query: str,
|
||||||
since: str | None = None,
|
since: str | None = None,
|
||||||
until: str | None = None,
|
until: str | None = None,
|
||||||
) -> dict:
|
) -> dict[str, Any]:
|
||||||
"""Run layered log search with NL time extraction. Returns summary + entries."""
|
"""Run layered log search with NL time extraction. Returns summary + entries."""
|
||||||
time_detected = since is not None or until is not None
|
time_detected = since is not None and until is not None
|
||||||
if since is None or until is None:
|
if not time_detected:
|
||||||
parsed_since, parsed_until, keywords = parse_time_window(query)
|
parsed_since, parsed_until, keywords = parse_time_window(query)
|
||||||
since = since or parsed_since
|
since = since or parsed_since
|
||||||
until = until or parsed_until
|
until = until or parsed_until
|
||||||
|
|
@ -55,14 +63,13 @@ def diagnose(
|
||||||
window_hits = entries_in_window(db_path, since=since, until=until, limit=50)
|
window_hits = entries_in_window(db_path, since=since, until=until, limit=50)
|
||||||
|
|
||||||
seen: set[str] = set()
|
seen: set[str] = set()
|
||||||
combined: list[SearchResult] = []
|
merged: list[SearchResult] = []
|
||||||
for r in keyword_hits + window_hits:
|
for r in keyword_hits + window_hits:
|
||||||
if r.entry_id not in seen:
|
if r.entry_id not in seen:
|
||||||
seen.add(r.entry_id)
|
seen.add(r.entry_id)
|
||||||
combined.append(r)
|
merged.append(r)
|
||||||
|
|
||||||
combined.sort(key=lambda r: (r.timestamp_iso or "\xff", r.sequence))
|
combined = sorted(merged, key=lambda r: (r.timestamp_iso or "\xff", r.sequence))[:200]
|
||||||
combined = combined[:200]
|
|
||||||
|
|
||||||
by_severity: dict[str, int] = {"CRITICAL": 0, "ERROR": 0, "WARN": 0, "INFO": 0}
|
by_severity: dict[str, int] = {"CRITICAL": 0, "ERROR": 0, "WARN": 0, "INFO": 0}
|
||||||
by_source: dict[str, int] = {}
|
by_source: dict[str, int] = {}
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from datetime import datetime, timezone
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
|
||||||
from app.services.diagnose import parse_time_window
|
from app.services.diagnose import diagnose, parse_time_window
|
||||||
|
|
||||||
|
|
||||||
def test_no_time_phrase_falls_back_to_last_60_min():
|
def test_no_time_phrase_falls_back_to_last_60_min():
|
||||||
|
|
@ -18,7 +18,7 @@ def test_no_time_phrase_falls_back_to_last_60_min():
|
||||||
assert abs(diff - 3600) < 5
|
assert abs(diff - 3600) < 5
|
||||||
|
|
||||||
|
|
||||||
def test_time_phrase_detected_produces_30min_window():
|
def test_time_phrase_detected_produces_60min_window():
|
||||||
dt = datetime(2026, 5, 11, 14, 0, tzinfo=timezone.utc)
|
dt = datetime(2026, 5, 11, 14, 0, tzinfo=timezone.utc)
|
||||||
with patch("app.services.diagnose._HAS_DATEPARSER", True), \
|
with patch("app.services.diagnose._HAS_DATEPARSER", True), \
|
||||||
patch("app.services.diagnose._search_dates", return_value=[("around 2pm", dt)]):
|
patch("app.services.diagnose._search_dates", return_value=[("around 2pm", dt)]):
|
||||||
|
|
@ -37,7 +37,8 @@ def test_time_phrase_stripped_from_keywords():
|
||||||
|
|
||||||
|
|
||||||
def test_no_dateparser_falls_back_to_60min():
|
def test_no_dateparser_falls_back_to_60min():
|
||||||
with patch("app.services.diagnose._HAS_DATEPARSER", False):
|
with patch("app.services.diagnose._HAS_DATEPARSER", False), \
|
||||||
|
patch("app.services.diagnose._search_dates", None):
|
||||||
since, until, keywords = parse_time_window("plex stopped playing audio")
|
since, until, keywords = parse_time_window("plex stopped playing audio")
|
||||||
assert keywords == "plex stopped playing audio"
|
assert keywords == "plex stopped playing audio"
|
||||||
diff = (datetime.fromisoformat(until) - datetime.fromisoformat(since)).total_seconds()
|
diff = (datetime.fromisoformat(until) - datetime.fromisoformat(since)).total_seconds()
|
||||||
|
|
@ -50,3 +51,13 @@ def test_keywords_cleaned_of_extra_spaces():
|
||||||
patch("app.services.diagnose._search_dates", return_value=[("around 2pm", dt)]):
|
patch("app.services.diagnose._search_dates", return_value=[("around 2pm", dt)]):
|
||||||
_, _, keywords = parse_time_window("plex stopped audio around 2pm extra")
|
_, _, keywords = parse_time_window("plex stopped audio around 2pm extra")
|
||||||
assert " " not in keywords
|
assert " " not in keywords
|
||||||
|
|
||||||
|
|
||||||
|
def test_diagnose_with_explicit_window_sets_time_detected(tmp_path):
|
||||||
|
from app.ingest.pipeline import ensure_schema
|
||||||
|
db = tmp_path / "test.db"
|
||||||
|
ensure_schema(db)
|
||||||
|
result = diagnose(db, query="plex", since="2026-05-11T14:00:00+00:00", until="2026-05-11T15:00:00+00:00")
|
||||||
|
assert result["summary"]["time_detected"] is True
|
||||||
|
assert result["summary"]["total"] == 0 # empty DB
|
||||||
|
assert result["summary"]["window_start"] == "2026-05-11T14:00:00+00:00"
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue