pagepiper/app/services/synthesizer.py

# app/services/synthesizer.py
"""
LLM answer synthesis over retrieved chunks.

BSL 1.1 — requires LLMRouter (Ollama BYOK or cloud tier).
"""
from __future__ import annotations

from dataclasses import dataclass

from app.services.retriever import RetrievedChunk

_SYSTEM_PROMPT = (
    "You are a strict document retrieval assistant. "
    "Your sole job is to extract and present information from the document excerpts given to you. "
    "You have no memory of books, stories, or authors. "
    "If the excerpts do not contain the answer, say so and stop. Never guess."
)

_NO_RESULTS_ANSWER = (
    "I could not find any relevant passages in the indexed documents for that question. "
    "Try rephrasing, or check that the relevant document has been ingested."
)

# Phrases the model uses when it escapes the provided context and pulls from
# training data. Any response containing one of these is replaced with the
# canned no-answer message.
_ESCAPE_PHRASES = [
    "in the series",
    "in the novel",
    "in the book",
    "in the context of the series",
    "it can be assumed",
    "based on my knowledge",
    "based on the broader",
    "the broader story",
    "by terry goodkind",
    "sword of truth",
    "legend of the seeker",
    "throughout the series",
    "throughout the novel",
    "throughout the book",
]


def _strip_escape(response: str) -> str:
    """Replace responses that leaked outside the provided context with the canned message.

    Detects the 'helpful override' pattern where the model acknowledges the
    excerpts lack the answer but supplements from training data anyway.
    """
    lower = response.lower()
    if any(phrase in lower for phrase in _ESCAPE_PHRASES):
        return (
            "I could not find an answer to that question in the indexed documents. "
            "The answer may be in a document that has not been ingested yet."
        )
    return response


@dataclass(frozen=True)
class Citation:
    doc_id: str
    page_number: int
    snippet: str
    bm25_score: float


@dataclass(frozen=True)
class SynthesisResult:
    answer: str
    citations: tuple[Citation, ...]


class Synthesizer:
    def __init__(self, llm) -> None:  # LLMRouter
        self._llm = llm

    def synthesize(
        self,
        message: str,
        history: list[dict],
        chunks: list[RetrievedChunk],
    ) -> SynthesisResult:
        if not chunks:
            return SynthesisResult(answer=_NO_RESULTS_ANSWER, citations=())

        # 1500 chars (~300 words) per chunk: enough to capture definitions that
        # appear mid-paragraph without blowing past a 32k-context model's limit.
        context_parts = [f"[p.{c.page_number}]\n{c.text[:1500]}" for c in chunks]
        context = "\n\n---\n\n".join(context_parts)
        # Quote-first structure: the model must commit to a grounding passage
        # before generating an answer. Forces an explicit "NOT FOUND" admission
        # when the excerpt doesn't contain the answer, rather than the "the excerpt
        # doesn't say... however, in the series..." escape pattern.
        prompt = (
            f"Excerpts from the indexed documents:\n\n{context}\n\n"
            f"---\n\n"
            f"Question: {message}\n\n"
            f"Step 1 — Find the relevant passage: Quote the exact sentence(s) from "
            f"the excerpts above that answer the question, or write NOT FOUND.\n\n"
            f"Step 2 — Answer: Based solely on what you quoted in Step 1, answer "
            f"the question with page citations [p.N]. If Step 1 is NOT FOUND, "
            f"write: \"I could not find an answer to that question in the indexed documents.\""
        )

        answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT)
        answer = _strip_escape(answer)

        citations = tuple(
            Citation(
                doc_id=c.doc_id,
                page_number=c.page_number,
                snippet=c.text[:400],
                bm25_score=c.bm25_score,
            )
            for c in chunks
        )
        return SynthesisResult(answer=answer, citations=citations)