From 90849a2c3a1fa37087e6bb513bec3a584ad7c1ab Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sun, 10 May 2026 08:14:23 -0700 Subject: [PATCH] fix: bypass FTS ranking for named-source error retrieval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When diagnose() auto-detects a source name, FTS keyword scoring can bury real errors whose text doesn't match the symptom query. Add recent_source_errors() — a plain-SQL scan ordered by timestamp — so the most recent errors from a known service always surface regardless of keyword overlap. --- app/rest.py | 27 ++++++++++-------- app/services/search.py | 64 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 12 deletions(-) diff --git a/app/rest.py b/app/rest.py index 567b539..991e937 100644 --- a/app/rest.py +++ b/app/rest.py @@ -25,7 +25,12 @@ from app.services.incidents import ( get_incident_entries, list_incidents, ) -from app.services.search import search as _search, list_sources as _list_sources, format_results +from app.services.search import ( + search as _search, + list_sources as _list_sources, + recent_source_errors as _source_errors, + format_results, +) DB_PATH = Path(os.environ.get("TURNSTONE_DB", Path(__file__).parent.parent / "data" / "turnstone.db")) DIST_DIR = Path(__file__).parent.parent / "web" / "dist" @@ -120,21 +125,19 @@ def diagnose( critical = _search(DB_PATH, query=q, severity="CRITICAL", limit=5, **common) errors = _search(DB_PATH, query=q, severity="ERROR", limit=10, **common) - # When a source was auto-detected, also pull its most recent errors unconstrained — - # the user named a service, so show what's actually broken there even if their - # symptom keywords don't appear literally in the error text. + # When a source was auto-detected, also pull its most recent errors via plain SQL — + # FTS ranking can bury real errors from the named service if their text doesn't + # match the symptom keywords. Plain-SQL scan returns actual recent errors regardless. source_errors: list = [] if detected_source and not source and not errors: - source_errors = _search( - DB_PATH, query="error warning fail", severity="ERROR", - limit=10, or_mode=True, - source_filter=detected_source, since=since, until=until, include_repeats=False, + source_errors = _source_errors( + DB_PATH, source_filter=detected_source, severity="ERROR", + limit=10, since=since, until=until, ) if not source_errors: - source_errors = _search( - DB_PATH, query="error warning fail", severity="CRITICAL", - limit=5, or_mode=True, - source_filter=detected_source, since=since, until=until, include_repeats=False, + source_errors = _source_errors( + DB_PATH, source_filter=detected_source, severity="CRITICAL", + limit=5, since=since, until=until, ) seen: set[str] = set() diff --git a/app/services/search.py b/app/services/search.py index 6971934..983ee04 100644 --- a/app/services/search.py +++ b/app/services/search.py @@ -225,6 +225,70 @@ def entries_in_window( ] +def recent_source_errors( + db_path: Path, + source_filter: str, + severity: str = "ERROR", + limit: int = 10, + since: str | None = None, + until: str | None = None, +) -> list[SearchResult]: + """Plain-SQL scan: most recent error entries from a named source. + + Bypasses FTS ranking so text content doesn't affect which errors surface. + Used by diagnose when FTS keyword search returns nothing for a known source. + """ + conn = sqlite3.connect(str(db_path)) + conn.execute("PRAGMA journal_mode=WAL") + conn.row_factory = sqlite3.Row + + conditions = [ + "source_id LIKE ?", + "severity = ?", + "repeat_count = 1", + ] + params: list = [f"%{source_filter}%", severity.upper()] + + if since: + conditions.append("timestamp_iso >= ?") + params.append(since) + if until: + conditions.append("timestamp_iso <= ?") + params.append(until) + + params.append(limit) + where = " AND ".join(conditions) + + rows = conn.execute( + f""" + SELECT id as entry_id, source_id, sequence, timestamp_iso, severity, + repeat_count, out_of_order, matched_patterns, text, 0.0 as rank + FROM log_entries + WHERE {where} + ORDER BY timestamp_iso DESC + LIMIT ? + """, + params, + ).fetchall() + conn.close() + + return [ + SearchResult( + entry_id=r["entry_id"], + source_id=r["source_id"], + sequence=r["sequence"], + timestamp_iso=r["timestamp_iso"], + severity=r["severity"], + repeat_count=r["repeat_count"], + out_of_order=bool(r["out_of_order"]), + matched_patterns=json.loads(r["matched_patterns"] or "[]"), + text=r["text"], + rank=r["rank"], + ) + for r in rows + ] + + def list_sources(db_path: Path) -> list[dict]: """Return distinct sources with entry counts and time ranges.""" conn = sqlite3.connect(str(db_path))