fix: quote-first prompt structure + escape phrase post-processing to kill hallucinations
three-layer approach to stop 7B model from supplementing retrieved context
with training-data knowledge:
1. system prompt redesigned: 'no memory of books/stories/authors' eliminates
the model's self-permission to draw on parametric knowledge
2. quote-first prompt structure: model must commit to a specific quoted passage
before generating an answer — explicit NOT FOUND required when excerpts lack
the answer, preventing the 'excerpt doesn't say X... however in the series...'
escape pattern
3. _strip_escape() post-processor: catches any residual leakage by scanning for
known escape phrases ('in the series', 'by terry goodkind', 'it can be assumed',
etc.) and replacing the response with the canned no-answer message
This commit is contained in:
parent
32cb21e2cd
commit
3765fbc0f9
1 changed files with 51 additions and 14 deletions
|
|
@ -11,13 +11,10 @@ from dataclasses import dataclass
|
|||
from app.services.retriever import RetrievedChunk
|
||||
|
||||
_SYSTEM_PROMPT = (
|
||||
"You are a document assistant. "
|
||||
"Answer questions using ONLY the document excerpts provided. "
|
||||
"Cite every claim with the source page as [p.N]. "
|
||||
"If the excerpts do not contain the answer, respond with exactly: "
|
||||
"'I could not find an answer to that question in the indexed documents.' "
|
||||
"Do NOT use knowledge from outside the provided excerpts. "
|
||||
"Do NOT speculate, infer, or guess beyond what is explicitly stated."
|
||||
"You are a strict document retrieval assistant. "
|
||||
"Your sole job is to extract and present information from the document excerpts given to you. "
|
||||
"You have no memory of books, stories, or authors. "
|
||||
"If the excerpts do not contain the answer, say so and stop. Never guess."
|
||||
)
|
||||
|
||||
_NO_RESULTS_ANSWER = (
|
||||
|
|
@ -25,6 +22,41 @@ _NO_RESULTS_ANSWER = (
|
|||
"Try rephrasing, or check that the relevant document has been ingested."
|
||||
)
|
||||
|
||||
# Phrases the model uses when it escapes the provided context and pulls from
|
||||
# training data. Any response containing one of these is replaced with the
|
||||
# canned no-answer message.
|
||||
_ESCAPE_PHRASES = [
|
||||
"in the series",
|
||||
"in the novel",
|
||||
"in the book",
|
||||
"in the context of the series",
|
||||
"it can be assumed",
|
||||
"based on my knowledge",
|
||||
"based on the broader",
|
||||
"the broader story",
|
||||
"by terry goodkind",
|
||||
"sword of truth",
|
||||
"legend of the seeker",
|
||||
"throughout the series",
|
||||
"throughout the novel",
|
||||
"throughout the book",
|
||||
]
|
||||
|
||||
|
||||
def _strip_escape(response: str) -> str:
|
||||
"""Replace responses that leaked outside the provided context with the canned message.
|
||||
|
||||
Detects the 'helpful override' pattern where the model acknowledges the
|
||||
excerpts lack the answer but supplements from training data anyway.
|
||||
"""
|
||||
lower = response.lower()
|
||||
if any(phrase in lower for phrase in _ESCAPE_PHRASES):
|
||||
return (
|
||||
"I could not find an answer to that question in the indexed documents. "
|
||||
"The answer may be in a document that has not been ingested yet."
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Citation:
|
||||
|
|
@ -57,18 +89,23 @@ class Synthesizer:
|
|||
# appear mid-paragraph without blowing past a 32k-context model's limit.
|
||||
context_parts = [f"[p.{c.page_number}]\n{c.text[:1500]}" for c in chunks]
|
||||
context = "\n\n---\n\n".join(context_parts)
|
||||
# Repeat the no-outside-knowledge constraint inside the user turn.
|
||||
# Small models (7B) follow user-turn instructions more reliably than
|
||||
# system-prompt-only constraints when their training data conflicts.
|
||||
# Quote-first structure: the model must commit to a grounding passage
|
||||
# before generating an answer. Forces an explicit "NOT FOUND" admission
|
||||
# when the excerpt doesn't contain the answer, rather than the "the excerpt
|
||||
# doesn't say... however, in the series..." escape pattern.
|
||||
prompt = (
|
||||
f"Document excerpts:\n\n{context}\n\n"
|
||||
f"Excerpts from the indexed documents:\n\n{context}\n\n"
|
||||
f"---\n\n"
|
||||
f"Question: {message}\n\n"
|
||||
f"IMPORTANT: Answer using ONLY the excerpts above. "
|
||||
f"If the answer is not present in the excerpts, respond with exactly: "
|
||||
f"\"I could not find an answer to that question in the indexed documents.\""
|
||||
f"Step 1 — Find the relevant passage: Quote the exact sentence(s) from "
|
||||
f"the excerpts above that answer the question, or write NOT FOUND.\n\n"
|
||||
f"Step 2 — Answer: Based solely on what you quoted in Step 1, answer "
|
||||
f"the question with page citations [p.N]. If Step 1 is NOT FOUND, "
|
||||
f"write: \"I could not find an answer to that question in the indexed documents.\""
|
||||
)
|
||||
|
||||
answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT)
|
||||
answer = _strip_escape(answer)
|
||||
|
||||
citations = tuple(
|
||||
Citation(
|
||||
|
|
|
|||
Loading…
Reference in a new issue