diff --git a/app/services/retriever.py b/app/services/retriever.py index 20f0895..9616e3c 100644 --- a/app/services/retriever.py +++ b/app/services/retriever.py @@ -170,14 +170,30 @@ class Retriever: vec = (1.0 / (1.0 + r.vector_score)) if r.vector_score is not None else 0.0 return bm25 * 0.5 + vec * 0.5 - ranked = sorted(merged.values(), key=_combined, reverse=True)[:top_k] + all_ranked = sorted(merged.values(), key=_combined, reverse=True) + # Discard results where the best match is pure noise (neither BM25 term # overlap nor vector similarity exceeded the minimum signal threshold). # This lets the caller's empty-result guard fire instead of sending # low-confidence chunks to the LLM where it fills gaps with training data. MIN_SIGNAL = 0.01 - if ranked and _combined(ranked[0]) < MIN_SIGNAL: + if all_ranked and _combined(all_ranked[0]) < MIN_SIGNAL: return [] + + # Cap per-document contribution to max_per_doc of top_k so that one book + # does not crowd out all slots when the query matches it heavily by name + # alone (e.g. a character name that appears in every chapter). + max_per_doc = max(2, top_k // 3) + ranked: list[RetrievedChunk] = [] + doc_counts: dict[str, int] = {} + for r in all_ranked: + if len(ranked) >= top_k: + break + count = doc_counts.get(r.doc_id, 0) + if count < max_per_doc: + ranked.append(r) + doc_counts[r.doc_id] = count + 1 + adjacent = _fetch_adjacent(ranked, db_path) return ranked + adjacent diff --git a/app/services/synthesizer.py b/app/services/synthesizer.py index befba3e..3a93ec2 100644 --- a/app/services/synthesizer.py +++ b/app/services/synthesizer.py @@ -57,7 +57,16 @@ class Synthesizer: # appear mid-paragraph without blowing past a 32k-context model's limit. context_parts = [f"[p.{c.page_number}]\n{c.text[:1500]}" for c in chunks] context = "\n\n---\n\n".join(context_parts) - prompt = f"Document excerpts:\n\n{context}\n\nQuestion: {message}" + # Repeat the no-outside-knowledge constraint inside the user turn. + # Small models (7B) follow user-turn instructions more reliably than + # system-prompt-only constraints when their training data conflicts. + prompt = ( + f"Document excerpts:\n\n{context}\n\n" + f"Question: {message}\n\n" + f"IMPORTANT: Answer using ONLY the excerpts above. " + f"If the answer is not present in the excerpts, respond with exactly: " + f"\"I could not find an answer to that question in the indexed documents.\"" + ) answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT)