three-layer approach to stop 7B model from supplementing retrieved context
with training-data knowledge:
1. system prompt redesigned: 'no memory of books/stories/authors' eliminates
the model's self-permission to draw on parametric knowledge
2. quote-first prompt structure: model must commit to a specific quoted passage
before generating an answer — explicit NOT FOUND required when excerpts lack
the answer, preventing the 'excerpt doesn't say X... however in the series...'
escape pattern
3. _strip_escape() post-processor: catches any residual leakage by scanning for
known escape phrases ('in the series', 'by terry goodkind', 'it can be assumed',
etc.) and replacing the response with the canned no-answer message
119 lines
4 KiB
Python
119 lines
4 KiB
Python
# app/services/synthesizer.py
|
|
"""
|
|
LLM answer synthesis over retrieved chunks.
|
|
|
|
BSL 1.1 — requires LLMRouter (Ollama BYOK or cloud tier).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
|
|
from app.services.retriever import RetrievedChunk
|
|
|
|
_SYSTEM_PROMPT = (
|
|
"You are a strict document retrieval assistant. "
|
|
"Your sole job is to extract and present information from the document excerpts given to you. "
|
|
"You have no memory of books, stories, or authors. "
|
|
"If the excerpts do not contain the answer, say so and stop. Never guess."
|
|
)
|
|
|
|
_NO_RESULTS_ANSWER = (
|
|
"I could not find any relevant passages in the indexed documents for that question. "
|
|
"Try rephrasing, or check that the relevant document has been ingested."
|
|
)
|
|
|
|
# Phrases the model uses when it escapes the provided context and pulls from
|
|
# training data. Any response containing one of these is replaced with the
|
|
# canned no-answer message.
|
|
_ESCAPE_PHRASES = [
|
|
"in the series",
|
|
"in the novel",
|
|
"in the book",
|
|
"in the context of the series",
|
|
"it can be assumed",
|
|
"based on my knowledge",
|
|
"based on the broader",
|
|
"the broader story",
|
|
"by terry goodkind",
|
|
"sword of truth",
|
|
"legend of the seeker",
|
|
"throughout the series",
|
|
"throughout the novel",
|
|
"throughout the book",
|
|
]
|
|
|
|
|
|
def _strip_escape(response: str) -> str:
|
|
"""Replace responses that leaked outside the provided context with the canned message.
|
|
|
|
Detects the 'helpful override' pattern where the model acknowledges the
|
|
excerpts lack the answer but supplements from training data anyway.
|
|
"""
|
|
lower = response.lower()
|
|
if any(phrase in lower for phrase in _ESCAPE_PHRASES):
|
|
return (
|
|
"I could not find an answer to that question in the indexed documents. "
|
|
"The answer may be in a document that has not been ingested yet."
|
|
)
|
|
return response
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class Citation:
|
|
doc_id: str
|
|
page_number: int
|
|
snippet: str
|
|
bm25_score: float
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class SynthesisResult:
|
|
answer: str
|
|
citations: tuple[Citation, ...]
|
|
|
|
|
|
class Synthesizer:
|
|
def __init__(self, llm) -> None: # LLMRouter
|
|
self._llm = llm
|
|
|
|
def synthesize(
|
|
self,
|
|
message: str,
|
|
history: list[dict],
|
|
chunks: list[RetrievedChunk],
|
|
) -> SynthesisResult:
|
|
if not chunks:
|
|
return SynthesisResult(answer=_NO_RESULTS_ANSWER, citations=())
|
|
|
|
# 1500 chars (~300 words) per chunk: enough to capture definitions that
|
|
# appear mid-paragraph without blowing past a 32k-context model's limit.
|
|
context_parts = [f"[p.{c.page_number}]\n{c.text[:1500]}" for c in chunks]
|
|
context = "\n\n---\n\n".join(context_parts)
|
|
# Quote-first structure: the model must commit to a grounding passage
|
|
# before generating an answer. Forces an explicit "NOT FOUND" admission
|
|
# when the excerpt doesn't contain the answer, rather than the "the excerpt
|
|
# doesn't say... however, in the series..." escape pattern.
|
|
prompt = (
|
|
f"Excerpts from the indexed documents:\n\n{context}\n\n"
|
|
f"---\n\n"
|
|
f"Question: {message}\n\n"
|
|
f"Step 1 — Find the relevant passage: Quote the exact sentence(s) from "
|
|
f"the excerpts above that answer the question, or write NOT FOUND.\n\n"
|
|
f"Step 2 — Answer: Based solely on what you quoted in Step 1, answer "
|
|
f"the question with page citations [p.N]. If Step 1 is NOT FOUND, "
|
|
f"write: \"I could not find an answer to that question in the indexed documents.\""
|
|
)
|
|
|
|
answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT)
|
|
answer = _strip_escape(answer)
|
|
|
|
citations = tuple(
|
|
Citation(
|
|
doc_id=c.doc_id,
|
|
page_number=c.page_number,
|
|
snippet=c.text[:400],
|
|
bm25_score=c.bm25_score,
|
|
)
|
|
for c in chunks
|
|
)
|
|
return SynthesisResult(answer=answer, citations=citations)
|