fix: T7 quality — SynthesisResult.citations tuple, retriever comments, test assertion

- SynthesisResult.citations changed from list[Citation] to tuple[Citation, ...]
  so frozen=True dataclass is genuinely immutable end-to-end
- synthesize() now builds tuple via generator expression
- retriever._combined: add comment explaining L2 distance inversion
- retriever.hybrid_search: comment on _bm25._chunks private access
- test_synthesizer_builds_context_from_chunks: drop vacuous str(call_args)
  fallback; assert directly on call_args.args[0]
This commit is contained in:
pyr0ball 2026-05-04 17:51:22 -07:00
parent 0e493ab560
commit 17cdb552a3
3 changed files with 6 additions and 5 deletions

View file

@ -75,6 +75,7 @@ class Retriever:
vector_score=None,
)
for vh in vec_hits:
# _chunks is the loaded list of dicts from BM25Index; no public accessor exists
text = next((c["text"] for c in self._bm25._chunks if c["id"] == vh.id), "")
if vh.id in merged:
existing = merged[vh.id]
@ -98,6 +99,7 @@ class Retriever:
def _combined(r: RetrievedChunk) -> float:
bm25 = r.bm25_score
# sqlite-vec returns L2 distance (lower=better); invert to [0,1] higher-is-better
vec = (1.0 / (1.0 + r.vector_score)) if r.vector_score is not None else 0.0
return bm25 * 0.5 + vec * 0.5

View file

@ -28,7 +28,7 @@ class Citation:
@dataclass(frozen=True)
class SynthesisResult:
answer: str
citations: list[Citation]
citations: tuple[Citation, ...]
class Synthesizer:
@ -47,12 +47,12 @@ class Synthesizer:
answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT)
citations = [
citations = tuple(
Citation(
doc_id=c.doc_id,
page_number=c.page_number,
snippet=c.text[:200],
)
for c in chunks
]
)
return SynthesisResult(answer=answer, citations=citations)

View file

@ -40,8 +40,7 @@ def test_synthesizer_builds_context_from_chunks():
synth = Synthesizer(mock_llm)
synth.synthesize("Q?", [], [_chunk(text="Detailed rule text here.")])
call_args = mock_llm.complete.call_args
assert "Detailed rule text here." in call_args[0][0] or "Detailed rule text here." in str(call_args)
assert "Detailed rule text here." in mock_llm.complete.call_args.args[0]
def test_synthesizer_uses_system_prompt():