fix: T7 quality — SynthesisResult.citations tuple, retriever comments, test assertion
- SynthesisResult.citations changed from list[Citation] to tuple[Citation, ...] so frozen=True dataclass is genuinely immutable end-to-end - synthesize() now builds tuple via generator expression - retriever._combined: add comment explaining L2 distance inversion - retriever.hybrid_search: comment on _bm25._chunks private access - test_synthesizer_builds_context_from_chunks: drop vacuous str(call_args) fallback; assert directly on call_args.args[0]
This commit is contained in:
parent
0e493ab560
commit
17cdb552a3
3 changed files with 6 additions and 5 deletions
|
|
@ -75,6 +75,7 @@ class Retriever:
|
|||
vector_score=None,
|
||||
)
|
||||
for vh in vec_hits:
|
||||
# _chunks is the loaded list of dicts from BM25Index; no public accessor exists
|
||||
text = next((c["text"] for c in self._bm25._chunks if c["id"] == vh.id), "")
|
||||
if vh.id in merged:
|
||||
existing = merged[vh.id]
|
||||
|
|
@ -98,6 +99,7 @@ class Retriever:
|
|||
|
||||
def _combined(r: RetrievedChunk) -> float:
|
||||
bm25 = r.bm25_score
|
||||
# sqlite-vec returns L2 distance (lower=better); invert to [0,1] higher-is-better
|
||||
vec = (1.0 / (1.0 + r.vector_score)) if r.vector_score is not None else 0.0
|
||||
return bm25 * 0.5 + vec * 0.5
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ class Citation:
|
|||
@dataclass(frozen=True)
|
||||
class SynthesisResult:
|
||||
answer: str
|
||||
citations: list[Citation]
|
||||
citations: tuple[Citation, ...]
|
||||
|
||||
|
||||
class Synthesizer:
|
||||
|
|
@ -47,12 +47,12 @@ class Synthesizer:
|
|||
|
||||
answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT)
|
||||
|
||||
citations = [
|
||||
citations = tuple(
|
||||
Citation(
|
||||
doc_id=c.doc_id,
|
||||
page_number=c.page_number,
|
||||
snippet=c.text[:200],
|
||||
)
|
||||
for c in chunks
|
||||
]
|
||||
)
|
||||
return SynthesisResult(answer=answer, citations=citations)
|
||||
|
|
|
|||
|
|
@ -40,8 +40,7 @@ def test_synthesizer_builds_context_from_chunks():
|
|||
synth = Synthesizer(mock_llm)
|
||||
synth.synthesize("Q?", [], [_chunk(text="Detailed rule text here.")])
|
||||
|
||||
call_args = mock_llm.complete.call_args
|
||||
assert "Detailed rule text here." in call_args[0][0] or "Detailed rule text here." in str(call_args)
|
||||
assert "Detailed rule text here." in mock_llm.complete.call_args.args[0]
|
||||
|
||||
|
||||
def test_synthesizer_uses_system_prompt():
|
||||
|
|
|
|||
Loading…
Reference in a new issue