From 90849a2c3a1fa37087e6bb513bec3a584ad7c1ab Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Sun, 10 May 2026 08:14:23 -0700
Subject: [PATCH] fix: bypass FTS ranking for named-source error retrieval
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When diagnose() auto-detects a source name, FTS keyword scoring can
bury real errors whose text doesn't match the symptom query. Add
recent_source_errors() — a plain-SQL scan ordered by timestamp — so
the most recent errors from a known service always surface regardless
of keyword overlap.
---
 app/rest.py            | 27 ++++++++++--------
 app/services/search.py | 64 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 12 deletions(-)

diff --git a/app/rest.py b/app/rest.py
index 567b539..991e937 100644
--- a/app/rest.py
+++ b/app/rest.py
@@ -25,7 +25,12 @@ from app.services.incidents import (
     get_incident_entries,
     list_incidents,
 )
-from app.services.search import search as _search, list_sources as _list_sources, format_results
+from app.services.search import (
+    search as _search,
+    list_sources as _list_sources,
+    recent_source_errors as _source_errors,
+    format_results,
+)
 
 DB_PATH = Path(os.environ.get("TURNSTONE_DB", Path(__file__).parent.parent / "data" / "turnstone.db"))
 DIST_DIR = Path(__file__).parent.parent / "web" / "dist"
@@ -120,21 +125,19 @@ def diagnose(
     critical = _search(DB_PATH, query=q, severity="CRITICAL", limit=5, **common)
     errors = _search(DB_PATH, query=q, severity="ERROR", limit=10, **common)
 
-    # When a source was auto-detected, also pull its most recent errors unconstrained —
-    # the user named a service, so show what's actually broken there even if their
-    # symptom keywords don't appear literally in the error text.
+    # When a source was auto-detected, also pull its most recent errors via plain SQL —
+    # FTS ranking can bury real errors from the named service if their text doesn't
+    # match the symptom keywords. Plain-SQL scan returns actual recent errors regardless.
     source_errors: list = []
     if detected_source and not source and not errors:
-        source_errors = _search(
-            DB_PATH, query="error warning fail", severity="ERROR",
-            limit=10, or_mode=True,
-            source_filter=detected_source, since=since, until=until, include_repeats=False,
+        source_errors = _source_errors(
+            DB_PATH, source_filter=detected_source, severity="ERROR",
+            limit=10, since=since, until=until,
         )
         if not source_errors:
-            source_errors = _search(
-                DB_PATH, query="error warning fail", severity="CRITICAL",
-                limit=5, or_mode=True,
-                source_filter=detected_source, since=since, until=until, include_repeats=False,
+            source_errors = _source_errors(
+                DB_PATH, source_filter=detected_source, severity="CRITICAL",
+                limit=5, since=since, until=until,
             )
 
     seen: set[str] = set()
diff --git a/app/services/search.py b/app/services/search.py
index 6971934..983ee04 100644
--- a/app/services/search.py
+++ b/app/services/search.py
@@ -225,6 +225,70 @@ def entries_in_window(
     ]
 
 
+def recent_source_errors(
+    db_path: Path,
+    source_filter: str,
+    severity: str = "ERROR",
+    limit: int = 10,
+    since: str | None = None,
+    until: str | None = None,
+) -> list[SearchResult]:
+    """Plain-SQL scan: most recent error entries from a named source.
+
+    Bypasses FTS ranking so text content doesn't affect which errors surface.
+    Used by diagnose when FTS keyword search returns nothing for a known source.
+    """
+    conn = sqlite3.connect(str(db_path))
+    conn.execute("PRAGMA journal_mode=WAL")
+    conn.row_factory = sqlite3.Row
+
+    conditions = [
+        "source_id LIKE ?",
+        "severity = ?",
+        "repeat_count = 1",
+    ]
+    params: list = [f"%{source_filter}%", severity.upper()]
+
+    if since:
+        conditions.append("timestamp_iso >= ?")
+        params.append(since)
+    if until:
+        conditions.append("timestamp_iso <= ?")
+        params.append(until)
+
+    params.append(limit)
+    where = " AND ".join(conditions)
+
+    rows = conn.execute(
+        f"""
+        SELECT id as entry_id, source_id, sequence, timestamp_iso, severity,
+               repeat_count, out_of_order, matched_patterns, text, 0.0 as rank
+        FROM log_entries
+        WHERE {where}
+        ORDER BY timestamp_iso DESC
+        LIMIT ?
+        """,
+        params,
+    ).fetchall()
+    conn.close()
+
+    return [
+        SearchResult(
+            entry_id=r["entry_id"],
+            source_id=r["source_id"],
+            sequence=r["sequence"],
+            timestamp_iso=r["timestamp_iso"],
+            severity=r["severity"],
+            repeat_count=r["repeat_count"],
+            out_of_order=bool(r["out_of_order"]),
+            matched_patterns=json.loads(r["matched_patterns"] or "[]"),
+            text=r["text"],
+            rank=r["rank"],
+        )
+        for r in rows
+    ]
+
+
 def list_sources(db_path: Path) -> list[dict]:
     """Return distinct sources with entry counts and time ranges."""
     conn = sqlite3.connect(str(db_path))