From 3765fbc0f93c9f90184694a97874635d69cb789b Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Wed, 6 May 2026 10:30:11 -0700
Subject: [PATCH] fix: quote-first prompt structure + escape phrase
 post-processing to kill hallucinations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

three-layer approach to stop 7B model from supplementing retrieved context
with training-data knowledge:

1. system prompt redesigned: 'no memory of books/stories/authors' eliminates
   the model's self-permission to draw on parametric knowledge

2. quote-first prompt structure: model must commit to a specific quoted passage
   before generating an answer — explicit NOT FOUND required when excerpts lack
   the answer, preventing the 'excerpt doesn't say X... however in the series...'
   escape pattern

3. _strip_escape() post-processor: catches any residual leakage by scanning for
   known escape phrases ('in the series', 'by terry goodkind', 'it can be assumed',
   etc.) and replacing the response with the canned no-answer message
---
 app/services/synthesizer.py | 65 +++++++++++++++++++++++++++++--------
 1 file changed, 51 insertions(+), 14 deletions(-)

diff --git a/app/services/synthesizer.py b/app/services/synthesizer.py
index 3a93ec2..80346e5 100644
--- a/app/services/synthesizer.py
+++ b/app/services/synthesizer.py
@@ -11,13 +11,10 @@ from dataclasses import dataclass
 from app.services.retriever import RetrievedChunk
 
 _SYSTEM_PROMPT = (
-    "You are a document assistant. "
-    "Answer questions using ONLY the document excerpts provided. "
-    "Cite every claim with the source page as [p.N]. "
-    "If the excerpts do not contain the answer, respond with exactly: "
-    "'I could not find an answer to that question in the indexed documents.' "
-    "Do NOT use knowledge from outside the provided excerpts. "
-    "Do NOT speculate, infer, or guess beyond what is explicitly stated."
+    "You are a strict document retrieval assistant. "
+    "Your sole job is to extract and present information from the document excerpts given to you. "
+    "You have no memory of books, stories, or authors. "
+    "If the excerpts do not contain the answer, say so and stop. Never guess."
 )
 
 _NO_RESULTS_ANSWER = (
@@ -25,6 +22,41 @@ _NO_RESULTS_ANSWER = (
     "Try rephrasing, or check that the relevant document has been ingested."
 )
 
+# Phrases the model uses when it escapes the provided context and pulls from
+# training data. Any response containing one of these is replaced with the
+# canned no-answer message.
+_ESCAPE_PHRASES = [
+    "in the series",
+    "in the novel",
+    "in the book",
+    "in the context of the series",
+    "it can be assumed",
+    "based on my knowledge",
+    "based on the broader",
+    "the broader story",
+    "by terry goodkind",
+    "sword of truth",
+    "legend of the seeker",
+    "throughout the series",
+    "throughout the novel",
+    "throughout the book",
+]
+
+
+def _strip_escape(response: str) -> str:
+    """Replace responses that leaked outside the provided context with the canned message.
+
+    Detects the 'helpful override' pattern where the model acknowledges the
+    excerpts lack the answer but supplements from training data anyway.
+    """
+    lower = response.lower()
+    if any(phrase in lower for phrase in _ESCAPE_PHRASES):
+        return (
+            "I could not find an answer to that question in the indexed documents. "
+            "The answer may be in a document that has not been ingested yet."
+        )
+    return response
+
 
 @dataclass(frozen=True)
 class Citation:
@@ -57,18 +89,23 @@ class Synthesizer:
         # appear mid-paragraph without blowing past a 32k-context model's limit.
         context_parts = [f"[p.{c.page_number}]\n{c.text[:1500]}" for c in chunks]
         context = "\n\n---\n\n".join(context_parts)
-        # Repeat the no-outside-knowledge constraint inside the user turn.
-        # Small models (7B) follow user-turn instructions more reliably than
-        # system-prompt-only constraints when their training data conflicts.
+        # Quote-first structure: the model must commit to a grounding passage
+        # before generating an answer. Forces an explicit "NOT FOUND" admission
+        # when the excerpt doesn't contain the answer, rather than the "the excerpt
+        # doesn't say... however, in the series..." escape pattern.
         prompt = (
-            f"Document excerpts:\n\n{context}\n\n"
+            f"Excerpts from the indexed documents:\n\n{context}\n\n"
+            f"---\n\n"
             f"Question: {message}\n\n"
-            f"IMPORTANT: Answer using ONLY the excerpts above. "
-            f"If the answer is not present in the excerpts, respond with exactly: "
-            f"\"I could not find an answer to that question in the indexed documents.\""
+            f"Step 1 — Find the relevant passage: Quote the exact sentence(s) from "
+            f"the excerpts above that answer the question, or write NOT FOUND.\n\n"
+            f"Step 2 — Answer: Based solely on what you quoted in Step 1, answer "
+            f"the question with page citations [p.N]. If Step 1 is NOT FOUND, "
+            f"write: \"I could not find an answer to that question in the indexed documents.\""
         )
 
         answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT)
+        answer = _strip_escape(answer)
 
         citations = tuple(
             Citation(