pagepiper/app/services/synthesizer.py
pyr0ball 0e493ab560 feat(api): add retriever, synthesizer, and chat endpoint (BSL — BYOK gate)
- app/services/retriever.py: hybrid BM25 + semantic Retriever with BM25-only fallback when llm=None
- app/services/synthesizer.py: LLM answer synthesis with citation assembly over retrieved chunks
- app/api/chat.py: POST /api/chat endpoint with 402 gate when PAGEPIPER_OLLAMA_URL is unset
- tests/test_synthesizer.py: 3 TDD unit tests (mocked LLM, context building, system prompt)
- tests/test_chat_api.py: 2 integration tests (402 without Ollama, 200 with mocked retriever+LLM)
2026-05-04 17:47:10 -07:00

58 lines
1.5 KiB
Python

# app/services/synthesizer.py
"""
LLM answer synthesis over retrieved chunks.
BSL 1.1 — requires LLMRouter (Ollama BYOK or cloud tier).
"""
from __future__ import annotations
from dataclasses import dataclass
from app.services.retriever import RetrievedChunk
_SYSTEM_PROMPT = (
"You are a helpful document assistant. "
"Answer the user's question using ONLY the provided document excerpts. "
"For each claim, cite the source page as [p.N]. "
"If the excerpts are insufficient, say so. Do not invent information."
)
@dataclass(frozen=True)
class Citation:
doc_id: str
page_number: int
snippet: str
@dataclass(frozen=True)
class SynthesisResult:
answer: str
citations: list[Citation]
class Synthesizer:
def __init__(self, llm) -> None: # LLMRouter
self._llm = llm
def synthesize(
self,
message: str,
history: list[dict],
chunks: list[RetrievedChunk],
) -> SynthesisResult:
context_parts = [f"[p.{c.page_number}]\n{c.text[:500]}" for c in chunks]
context = "\n\n---\n\n".join(context_parts)
prompt = f"Document excerpts:\n\n{context}\n\nQuestion: {message}"
answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT)
citations = [
Citation(
doc_id=c.doc_id,
page_number=c.page_number,
snippet=c.text[:200],
)
for c in chunks
]
return SynthesisResult(answer=answer, citations=citations)