fix: bypass FTS ranking for named-source error retrieval

When diagnose() auto-detects a source name, FTS keyword scoring can bury real errors whose text doesn't match the symptom query. Add recent_source_errors() — a plain-SQL scan ordered by timestamp — so the most recent errors from a known service always surface regardless of keyword overlap.
2026-05-10 08:14:23 -07:00 · 2026-05-10 08:14:23 -07:00 · 5a8dc731b8
commit 5a8dc731b8
parent 58b179b275
2 changed files with 79 additions and 12 deletions
--- a/app/rest.py
+++ b/app/rest.py
@ -25,7 +25,12 @@ from app.services.incidents import (
    get_incident_entries,
    list_incidents,
 )
-from app.services.search import search as _search, list_sources as _list_sources, format_results
+from app.services.search import (
+    search as _search,
+    list_sources as _list_sources,
+    recent_source_errors as _source_errors,
+    format_results,
+)

 DB_PATH = Path(os.environ.get("TURNSTONE_DB", Path(__file__).parent.parent / "data" / "turnstone.db"))
 DIST_DIR = Path(__file__).parent.parent / "web" / "dist"
@ -120,21 +125,19 @@ def diagnose(
    critical = _search(DB_PATH, query=q, severity="CRITICAL", limit=5, **common)
    errors = _search(DB_PATH, query=q, severity="ERROR", limit=10, **common)

-    # When a source was auto-detected, also pull its most recent errors unconstrained —
-    # the user named a service, so show what's actually broken there even if their
-    # symptom keywords don't appear literally in the error text.
+    # When a source was auto-detected, also pull its most recent errors via plain SQL —
+    # FTS ranking can bury real errors from the named service if their text doesn't
+    # match the symptom keywords. Plain-SQL scan returns actual recent errors regardless.
    source_errors: list = []
    if detected_source and not source and not errors:
-        source_errors = _search(
-            DB_PATH, query="error warning fail", severity="ERROR",
-            limit=10, or_mode=True,
-            source_filter=detected_source, since=since, until=until, include_repeats=False,
+        source_errors = _source_errors(
+            DB_PATH, source_filter=detected_source, severity="ERROR",
+            limit=10, since=since, until=until,
        )
        if not source_errors:
-            source_errors = _search(
-                DB_PATH, query="error warning fail", severity="CRITICAL",
-                limit=5, or_mode=True,
-                source_filter=detected_source, since=since, until=until, include_repeats=False,
+            source_errors = _source_errors(
+                DB_PATH, source_filter=detected_source, severity="CRITICAL",
+                limit=5, since=since, until=until,
            )

    seen: set[str] = set()
--- a/app/services/search.py
+++ b/app/services/search.py
@ -225,6 +225,70 @@ def entries_in_window(
    ]


+def recent_source_errors(
+    db_path: Path,
+    source_filter: str,
+    severity: str = "ERROR",
+    limit: int = 10,
+    since: str | None = None,
+    until: str | None = None,
+) -> list[SearchResult]:
+    """Plain-SQL scan: most recent error entries from a named source.
+
+    Bypasses FTS ranking so text content doesn't affect which errors surface.
+    Used by diagnose when FTS keyword search returns nothing for a known source.
+    """
+    conn = sqlite3.connect(str(db_path))
+    conn.execute("PRAGMA journal_mode=WAL")
+    conn.row_factory = sqlite3.Row
+
+    conditions = [
+        "source_id LIKE ?",
+        "severity = ?",
+        "repeat_count = 1",
+    ]
+    params: list = [f"%{source_filter}%", severity.upper()]
+
+    if since:
+        conditions.append("timestamp_iso >= ?")
+        params.append(since)
+    if until:
+        conditions.append("timestamp_iso <= ?")
+        params.append(until)
+
+    params.append(limit)
+    where = " AND ".join(conditions)
+
+    rows = conn.execute(
+        f"""
+        SELECT id as entry_id, source_id, sequence, timestamp_iso, severity,
+               repeat_count, out_of_order, matched_patterns, text, 0.0 as rank
+        FROM log_entries
+        WHERE {where}
+        ORDER BY timestamp_iso DESC
+        LIMIT ?
+        """,
+        params,
+    ).fetchall()
+    conn.close()
+
+    return [
+        SearchResult(
+            entry_id=r["entry_id"],
+            source_id=r["source_id"],
+            sequence=r["sequence"],
+            timestamp_iso=r["timestamp_iso"],
+            severity=r["severity"],
+            repeat_count=r["repeat_count"],
+            out_of_order=bool(r["out_of_order"]),
+            matched_patterns=json.loads(r["matched_patterns"] or "[]"),
+            text=r["text"],
+            rank=r["rank"],
+        )
+        for r in rows
+    ]
+
+
 def list_sources(db_path: Path) -> list[dict]:
    """Return distinct sources with entry counts and time ranges."""
    conn = sqlite3.connect(str(db_path))