From 17cdb552a393e50efc17784bf682d24959d111cf Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Mon, 4 May 2026 17:51:22 -0700 Subject: [PATCH] =?UTF-8?q?fix:=20T7=20quality=20=E2=80=94=20SynthesisResu?= =?UTF-8?q?lt.citations=20tuple,=20retriever=20comments,=20test=20assertio?= =?UTF-8?q?n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - SynthesisResult.citations changed from list[Citation] to tuple[Citation, ...] so frozen=True dataclass is genuinely immutable end-to-end - synthesize() now builds tuple via generator expression - retriever._combined: add comment explaining L2 distance inversion - retriever.hybrid_search: comment on _bm25._chunks private access - test_synthesizer_builds_context_from_chunks: drop vacuous str(call_args) fallback; assert directly on call_args.args[0] --- app/services/retriever.py | 2 ++ app/services/synthesizer.py | 6 +++--- tests/test_synthesizer.py | 3 +-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/app/services/retriever.py b/app/services/retriever.py index b2d09a5..b335171 100644 --- a/app/services/retriever.py +++ b/app/services/retriever.py @@ -75,6 +75,7 @@ class Retriever: vector_score=None, ) for vh in vec_hits: + # _chunks is the loaded list of dicts from BM25Index; no public accessor exists text = next((c["text"] for c in self._bm25._chunks if c["id"] == vh.id), "") if vh.id in merged: existing = merged[vh.id] @@ -98,6 +99,7 @@ class Retriever: def _combined(r: RetrievedChunk) -> float: bm25 = r.bm25_score + # sqlite-vec returns L2 distance (lower=better); invert to [0,1] higher-is-better vec = (1.0 / (1.0 + r.vector_score)) if r.vector_score is not None else 0.0 return bm25 * 0.5 + vec * 0.5 diff --git a/app/services/synthesizer.py b/app/services/synthesizer.py index d11640b..d1273dc 100644 --- a/app/services/synthesizer.py +++ b/app/services/synthesizer.py @@ -28,7 +28,7 @@ class Citation: @dataclass(frozen=True) class SynthesisResult: answer: str - citations: list[Citation] + citations: tuple[Citation, ...] class Synthesizer: @@ -47,12 +47,12 @@ class Synthesizer: answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT) - citations = [ + citations = tuple( Citation( doc_id=c.doc_id, page_number=c.page_number, snippet=c.text[:200], ) for c in chunks - ] + ) return SynthesisResult(answer=answer, citations=citations) diff --git a/tests/test_synthesizer.py b/tests/test_synthesizer.py index b173960..2931ade 100644 --- a/tests/test_synthesizer.py +++ b/tests/test_synthesizer.py @@ -40,8 +40,7 @@ def test_synthesizer_builds_context_from_chunks(): synth = Synthesizer(mock_llm) synth.synthesize("Q?", [], [_chunk(text="Detailed rule text here.")]) - call_args = mock_llm.complete.call_args - assert "Detailed rule text here." in call_args[0][0] or "Detailed rule text here." in str(call_args) + assert "Detailed rule text here." in mock_llm.complete.call_args.args[0] def test_synthesizer_uses_system_prompt():