turnstone/app/context/retriever.py

88 lines
3.2 KiB
Python

"""Context retrieval — structured keyword lookup (Free) + chunk search — MIT licensed."""
from __future__ import annotations
import sqlite3
from dataclasses import dataclass, field
from pathlib import Path
@dataclass
class RetrievedContext:
facts: list[dict[str, str]] = field(default_factory=list)
chunks: list[dict[str, str]] = field(default_factory=list)
def get_relevant_facts(db_path: Path, query: str) -> list[dict[str, str]]:
"""Keyword match against context_facts. Always runs — Free tier."""
try:
conn = sqlite3.connect(str(db_path))
conn.execute("PRAGMA journal_mode=WAL")
conn.row_factory = sqlite3.Row
keywords = [w.lower() for w in query.split() if len(w) > 2]
if not keywords:
rows = conn.execute(
"SELECT category, key, value, source FROM context_facts"
" ORDER BY category LIMIT 20"
).fetchall()
else:
conditions = " OR ".join(
"(LOWER(key) LIKE ? OR LOWER(value) LIKE ?)" for _ in keywords
)
params: list[str] = []
for kw in keywords:
params.extend([f"%{kw}%", f"%{kw}%"])
rows = conn.execute(
f"SELECT category, key, value, source FROM context_facts"
f" WHERE {conditions} ORDER BY category LIMIT 10",
params,
).fetchall()
conn.close()
return [dict(r) for r in rows]
except sqlite3.OperationalError:
return []
def _search_chunks(db_path: Path, query: str) -> list[dict[str, str]]:
"""Keyword search across context_chunks. Fallback when no embeddings."""
try:
conn = sqlite3.connect(str(db_path))
conn.execute("PRAGMA journal_mode=WAL")
conn.row_factory = sqlite3.Row
keywords = [w.lower() for w in query.split() if len(w) > 2][:5]
if not keywords:
conn.close()
return []
conditions = " OR ".join("LOWER(cc.text) LIKE ?" for _ in keywords)
params = [f"%{kw}%" for kw in keywords]
rows = conn.execute(
f"SELECT cc.text, cd.filename FROM context_chunks cc"
f" JOIN context_documents cd ON cc.document_id = cd.id"
f" WHERE {conditions} LIMIT 3",
params,
).fetchall()
conn.close()
return [{"text": r["text"], "filename": r["filename"]} for r in rows]
except sqlite3.OperationalError:
return []
def retrieve_context(db_path: Path, query: str) -> RetrievedContext:
"""Retrieve structured facts and relevant chunks for a query."""
return RetrievedContext(
facts=get_relevant_facts(db_path, query),
chunks=_search_chunks(db_path, query),
)
def format_context_block(ctx: RetrievedContext) -> str | None:
"""Format context for injection into LLM prompt. Returns None when empty."""
lines: list[str] = []
if ctx.facts:
lines.append("Known environment facts:")
for f in ctx.facts:
lines.append(f" [{f['category']}] {f['key']}: {f['value']}")
if ctx.chunks:
lines.append("Relevant documentation:")
for c in ctx.chunks:
lines.append(f" [{c['filename']}] {c['text'][:200]}")
return "\n".join(lines) if lines else None