diff --git a/app/services/retriever.py b/app/services/retriever.py index b2d09a5..b335171 100644 --- a/app/services/retriever.py +++ b/app/services/retriever.py @@ -75,6 +75,7 @@ class Retriever: vector_score=None, ) for vh in vec_hits: + # _chunks is the loaded list of dicts from BM25Index; no public accessor exists text = next((c["text"] for c in self._bm25._chunks if c["id"] == vh.id), "") if vh.id in merged: existing = merged[vh.id] @@ -98,6 +99,7 @@ class Retriever: def _combined(r: RetrievedChunk) -> float: bm25 = r.bm25_score + # sqlite-vec returns L2 distance (lower=better); invert to [0,1] higher-is-better vec = (1.0 / (1.0 + r.vector_score)) if r.vector_score is not None else 0.0 return bm25 * 0.5 + vec * 0.5 diff --git a/app/services/synthesizer.py b/app/services/synthesizer.py index d11640b..d1273dc 100644 --- a/app/services/synthesizer.py +++ b/app/services/synthesizer.py @@ -28,7 +28,7 @@ class Citation: @dataclass(frozen=True) class SynthesisResult: answer: str - citations: list[Citation] + citations: tuple[Citation, ...] class Synthesizer: @@ -47,12 +47,12 @@ class Synthesizer: answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT) - citations = [ + citations = tuple( Citation( doc_id=c.doc_id, page_number=c.page_number, snippet=c.text[:200], ) for c in chunks - ] + ) return SynthesisResult(answer=answer, citations=citations) diff --git a/tests/test_synthesizer.py b/tests/test_synthesizer.py index b173960..2931ade 100644 --- a/tests/test_synthesizer.py +++ b/tests/test_synthesizer.py @@ -40,8 +40,7 @@ def test_synthesizer_builds_context_from_chunks(): synth = Synthesizer(mock_llm) synth.synthesize("Q?", [], [_chunk(text="Detailed rule text here.")]) - call_args = mock_llm.complete.call_args - assert "Detailed rule text here." in call_args[0][0] or "Detailed rule text here." in str(call_args) + assert "Detailed rule text here." in mock_llm.complete.call_args.args[0] def test_synthesizer_uses_system_prompt():