pagepiper/app/services/synthesizer.py

# app/services/synthesizer.py
"""
LLM answer synthesis over retrieved chunks.

BSL 1.1 — requires LLMRouter (Ollama BYOK or cloud tier).
"""
from __future__ import annotations

from dataclasses import dataclass

from app.services.retriever import RetrievedChunk

_SYSTEM_PROMPT = (
    "You are a document assistant. "
    "Answer questions using ONLY the document excerpts provided. "
    "Cite every claim with the source page as [p.N]. "
    "If the excerpts do not contain the answer, respond with exactly: "
    "'I could not find an answer to that question in the indexed documents.' "
    "Do NOT use knowledge from outside the provided excerpts. "
    "Do NOT speculate, infer, or guess beyond what is explicitly stated."
)

_NO_RESULTS_ANSWER = (
    "I could not find any relevant passages in the indexed documents for that question. "
    "Try rephrasing, or check that the relevant document has been ingested."
)


@dataclass(frozen=True)
class Citation:
    doc_id: str
    page_number: int
    snippet: str
    bm25_score: float


@dataclass(frozen=True)
class SynthesisResult:
    answer: str
    citations: tuple[Citation, ...]


class Synthesizer:
    def __init__(self, llm) -> None:  # LLMRouter
        self._llm = llm

    def synthesize(
        self,
        message: str,
        history: list[dict],
        chunks: list[RetrievedChunk],
    ) -> SynthesisResult:
        if not chunks:
            return SynthesisResult(answer=_NO_RESULTS_ANSWER, citations=())

        # 1500 chars (~300 words) per chunk: enough to capture definitions that
        # appear mid-paragraph without blowing past a 32k-context model's limit.
        context_parts = [f"[p.{c.page_number}]\n{c.text[:1500]}" for c in chunks]
        context = "\n\n---\n\n".join(context_parts)
        # Repeat the no-outside-knowledge constraint inside the user turn.
        # Small models (7B) follow user-turn instructions more reliably than
        # system-prompt-only constraints when their training data conflicts.
        prompt = (
            f"Document excerpts:\n\n{context}\n\n"
            f"Question: {message}\n\n"
            f"IMPORTANT: Answer using ONLY the excerpts above. "
            f"If the answer is not present in the excerpts, respond with exactly: "
            f"\"I could not find an answer to that question in the indexed documents.\""
        )

        answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT)

        citations = tuple(
            Citation(
                doc_id=c.doc_id,
                page_number=c.page_number,
                snippet=c.text[:400],
                bm25_score=c.bm25_score,
            )
            for c in chunks
        )
        return SynthesisResult(answer=answer, citations=citations)