From 3765fbc0f93c9f90184694a97874635d69cb789b Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Wed, 6 May 2026 10:30:11 -0700 Subject: [PATCH] fix: quote-first prompt structure + escape phrase post-processing to kill hallucinations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit three-layer approach to stop 7B model from supplementing retrieved context with training-data knowledge: 1. system prompt redesigned: 'no memory of books/stories/authors' eliminates the model's self-permission to draw on parametric knowledge 2. quote-first prompt structure: model must commit to a specific quoted passage before generating an answer — explicit NOT FOUND required when excerpts lack the answer, preventing the 'excerpt doesn't say X... however in the series...' escape pattern 3. _strip_escape() post-processor: catches any residual leakage by scanning for known escape phrases ('in the series', 'by terry goodkind', 'it can be assumed', etc.) and replacing the response with the canned no-answer message --- app/services/synthesizer.py | 65 +++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 14 deletions(-) diff --git a/app/services/synthesizer.py b/app/services/synthesizer.py index 3a93ec2..80346e5 100644 --- a/app/services/synthesizer.py +++ b/app/services/synthesizer.py @@ -11,13 +11,10 @@ from dataclasses import dataclass from app.services.retriever import RetrievedChunk _SYSTEM_PROMPT = ( - "You are a document assistant. " - "Answer questions using ONLY the document excerpts provided. " - "Cite every claim with the source page as [p.N]. " - "If the excerpts do not contain the answer, respond with exactly: " - "'I could not find an answer to that question in the indexed documents.' " - "Do NOT use knowledge from outside the provided excerpts. " - "Do NOT speculate, infer, or guess beyond what is explicitly stated." + "You are a strict document retrieval assistant. " + "Your sole job is to extract and present information from the document excerpts given to you. " + "You have no memory of books, stories, or authors. " + "If the excerpts do not contain the answer, say so and stop. Never guess." ) _NO_RESULTS_ANSWER = ( @@ -25,6 +22,41 @@ _NO_RESULTS_ANSWER = ( "Try rephrasing, or check that the relevant document has been ingested." ) +# Phrases the model uses when it escapes the provided context and pulls from +# training data. Any response containing one of these is replaced with the +# canned no-answer message. +_ESCAPE_PHRASES = [ + "in the series", + "in the novel", + "in the book", + "in the context of the series", + "it can be assumed", + "based on my knowledge", + "based on the broader", + "the broader story", + "by terry goodkind", + "sword of truth", + "legend of the seeker", + "throughout the series", + "throughout the novel", + "throughout the book", +] + + +def _strip_escape(response: str) -> str: + """Replace responses that leaked outside the provided context with the canned message. + + Detects the 'helpful override' pattern where the model acknowledges the + excerpts lack the answer but supplements from training data anyway. + """ + lower = response.lower() + if any(phrase in lower for phrase in _ESCAPE_PHRASES): + return ( + "I could not find an answer to that question in the indexed documents. " + "The answer may be in a document that has not been ingested yet." + ) + return response + @dataclass(frozen=True) class Citation: @@ -57,18 +89,23 @@ class Synthesizer: # appear mid-paragraph without blowing past a 32k-context model's limit. context_parts = [f"[p.{c.page_number}]\n{c.text[:1500]}" for c in chunks] context = "\n\n---\n\n".join(context_parts) - # Repeat the no-outside-knowledge constraint inside the user turn. - # Small models (7B) follow user-turn instructions more reliably than - # system-prompt-only constraints when their training data conflicts. + # Quote-first structure: the model must commit to a grounding passage + # before generating an answer. Forces an explicit "NOT FOUND" admission + # when the excerpt doesn't contain the answer, rather than the "the excerpt + # doesn't say... however, in the series..." escape pattern. prompt = ( - f"Document excerpts:\n\n{context}\n\n" + f"Excerpts from the indexed documents:\n\n{context}\n\n" + f"---\n\n" f"Question: {message}\n\n" - f"IMPORTANT: Answer using ONLY the excerpts above. " - f"If the answer is not present in the excerpts, respond with exactly: " - f"\"I could not find an answer to that question in the indexed documents.\"" + f"Step 1 — Find the relevant passage: Quote the exact sentence(s) from " + f"the excerpts above that answer the question, or write NOT FOUND.\n\n" + f"Step 2 — Answer: Based solely on what you quoted in Step 1, answer " + f"the question with page citations [p.N]. If Step 1 is NOT FOUND, " + f"write: \"I could not find an answer to that question in the indexed documents.\"" ) answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT) + answer = _strip_escape(answer) citations = tuple( Citation(