fix: T7 quality — SynthesisResult.citations tuple, retriever comments, test assertion

- SynthesisResult.citations changed from list[Citation] to tuple[Citation, ...] so frozen=True dataclass is genuinely immutable end-to-end - synthesize() now builds tuple via generator expression - retriever._combined: add comment explaining L2 distance inversion - retriever.hybrid_search: comment on _bm25._chunks private access - test_synthesizer_builds_context_from_chunks: drop vacuous str(call_args) fallback; assert directly on call_args.args[0]
2026-05-04 17:51:22 -07:00 · 2026-05-04 17:51:22 -07:00 · 17cdb552a3
commit 17cdb552a3
parent 0e493ab560
3 changed files with 6 additions and 5 deletions
--- a/app/services/retriever.py
+++ b/app/services/retriever.py
@ -75,6 +75,7 @@ class Retriever:
                vector_score=None,
            )
        for vh in vec_hits:
            # _chunks is the loaded list of dicts from BM25Index; no public accessor exists
            text = next((c["text"] for c in self._bm25._chunks if c["id"] == vh.id), "")
            if vh.id in merged:
                existing = merged[vh.id]
@ -98,6 +99,7 @@ class Retriever:
        def _combined(r: RetrievedChunk) -> float:
            bm25 = r.bm25_score
            # sqlite-vec returns L2 distance (lower=better); invert to [0,1] higher-is-better
            vec = (1.0 / (1.0 + r.vector_score)) if r.vector_score is not None else 0.0
            return bm25 * 0.5 + vec * 0.5
--- a/app/services/synthesizer.py
+++ b/app/services/synthesizer.py
@ -28,7 +28,7 @@ class Citation:
@dataclass(frozen=True)
 class SynthesisResult:
    answer: str
-    citations: list[Citation]
+    citations: tuple[Citation, ...]
 class Synthesizer:
@ -47,12 +47,12 @@ class Synthesizer:
        answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT)
-        citations = [
+        citations = tuple(
            Citation(
                doc_id=c.doc_id,
                page_number=c.page_number,
                snippet=c.text[:200],
            )
            for c in chunks
-        ]
+        )
        return SynthesisResult(answer=answer, citations=citations)
--- a/tests/test_synthesizer.py
+++ b/tests/test_synthesizer.py
@ -40,8 +40,7 @@ def test_synthesizer_builds_context_from_chunks():
    synth = Synthesizer(mock_llm)
    synth.synthesize("Q?", [], [_chunk(text="Detailed rule text here.")])
-    call_args = mock_llm.complete.call_args
+    assert "Detailed rule text here." in mock_llm.complete.call_args.args[0]
    assert "Detailed rule text here." in call_args[0][0] or "Detailed rule text here." in str(call_args)
 def test_synthesizer_uses_system_prompt():