"""Context retrieval — structured keyword lookup (Free) + chunk search — MIT licensed.""" from __future__ import annotations import sqlite3 from dataclasses import dataclass, field from pathlib import Path @dataclass class RetrievedContext: facts: list[dict[str, str]] = field(default_factory=list) chunks: list[dict[str, str]] = field(default_factory=list) def get_relevant_facts(db_path: Path, query: str) -> list[dict[str, str]]: """Keyword match against context_facts. Always runs — Free tier.""" try: conn = sqlite3.connect(str(db_path)) conn.execute("PRAGMA journal_mode=WAL") conn.row_factory = sqlite3.Row keywords = [w.lower() for w in query.split() if len(w) > 2] if not keywords: rows = conn.execute( "SELECT category, key, value, source FROM context_facts" " ORDER BY category LIMIT 20" ).fetchall() else: conditions = " OR ".join( "(LOWER(key) LIKE ? OR LOWER(value) LIKE ?)" for _ in keywords ) params: list[str] = [] for kw in keywords: params.extend([f"%{kw}%", f"%{kw}%"]) rows = conn.execute( f"SELECT category, key, value, source FROM context_facts" f" WHERE {conditions} ORDER BY category LIMIT 10", params, ).fetchall() conn.close() return [dict(r) for r in rows] except sqlite3.OperationalError: return [] def _search_chunks(db_path: Path, query: str) -> list[dict[str, str]]: """Keyword search across context_chunks. Fallback when no embeddings.""" try: conn = sqlite3.connect(str(db_path)) conn.execute("PRAGMA journal_mode=WAL") conn.row_factory = sqlite3.Row keywords = [w.lower() for w in query.split() if len(w) > 2][:5] if not keywords: conn.close() return [] conditions = " OR ".join("LOWER(cc.text) LIKE ?" for _ in keywords) params = [f"%{kw}%" for kw in keywords] rows = conn.execute( f"SELECT cc.text, cd.filename FROM context_chunks cc" f" JOIN context_documents cd ON cc.document_id = cd.id" f" WHERE {conditions} LIMIT 3", params, ).fetchall() conn.close() return [{"text": r["text"], "filename": r["filename"]} for r in rows] except sqlite3.OperationalError: return [] def retrieve_context(db_path: Path, query: str) -> RetrievedContext: """Retrieve structured facts and relevant chunks for a query.""" return RetrievedContext( facts=get_relevant_facts(db_path, query), chunks=_search_chunks(db_path, query), ) def format_context_block(ctx: RetrievedContext) -> str | None: """Format context for injection into LLM prompt. Returns None when empty.""" lines: list[str] = [] if ctx.facts: lines.append("Known environment facts:") for f in ctx.facts: lines.append(f" [{f['category']}] {f['key']}: {f['value']}") if ctx.chunks: lines.append("Relevant documentation:") for c in ctx.chunks: lines.append(f" [{c['filename']}] {c['text'][:200]}") return "\n".join(lines) if lines else None