# app/services/synthesizer.py """ LLM answer synthesis over retrieved chunks. BSL 1.1 — requires LLMRouter (Ollama BYOK or cloud tier). """ from __future__ import annotations from dataclasses import dataclass from app.services.retriever import RetrievedChunk _SYSTEM_PROMPT = ( "You are a document assistant. " "Answer questions using ONLY the document excerpts provided. " "Cite every claim with the source page as [p.N]. " "If the excerpts do not contain the answer, respond with exactly: " "'I could not find an answer to that question in the indexed documents.' " "Do NOT use knowledge from outside the provided excerpts. " "Do NOT speculate, infer, or guess beyond what is explicitly stated." ) _NO_RESULTS_ANSWER = ( "I could not find any relevant passages in the indexed documents for that question. " "Try rephrasing, or check that the relevant document has been ingested." ) @dataclass(frozen=True) class Citation: doc_id: str page_number: int snippet: str bm25_score: float @dataclass(frozen=True) class SynthesisResult: answer: str citations: tuple[Citation, ...] class Synthesizer: def __init__(self, llm) -> None: # LLMRouter self._llm = llm def synthesize( self, message: str, history: list[dict], chunks: list[RetrievedChunk], ) -> SynthesisResult: if not chunks: return SynthesisResult(answer=_NO_RESULTS_ANSWER, citations=()) # 1500 chars (~300 words) per chunk: enough to capture definitions that # appear mid-paragraph without blowing past a 32k-context model's limit. context_parts = [f"[p.{c.page_number}]\n{c.text[:1500]}" for c in chunks] context = "\n\n---\n\n".join(context_parts) # Repeat the no-outside-knowledge constraint inside the user turn. # Small models (7B) follow user-turn instructions more reliably than # system-prompt-only constraints when their training data conflicts. prompt = ( f"Document excerpts:\n\n{context}\n\n" f"Question: {message}\n\n" f"IMPORTANT: Answer using ONLY the excerpts above. " f"If the answer is not present in the excerpts, respond with exactly: " f"\"I could not find an answer to that question in the indexed documents.\"" ) answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT) citations = tuple( Citation( doc_id=c.doc_id, page_number=c.page_number, snippet=c.text[:400], bm25_score=c.bm25_score, ) for c in chunks ) return SynthesisResult(answer=answer, citations=citations)