fix: T7 quality — SynthesisResult.citations tuple, retriever comments, test assertion
- SynthesisResult.citations changed from list[Citation] to tuple[Citation, ...] so frozen=True dataclass is genuinely immutable end-to-end - synthesize() now builds tuple via generator expression - retriever._combined: add comment explaining L2 distance inversion - retriever.hybrid_search: comment on _bm25._chunks private access - test_synthesizer_builds_context_from_chunks: drop vacuous str(call_args) fallback; assert directly on call_args.args[0]
This commit is contained in:
parent
0e493ab560
commit
17cdb552a3
3 changed files with 6 additions and 5 deletions
|
|
@ -75,6 +75,7 @@ class Retriever:
|
||||||
vector_score=None,
|
vector_score=None,
|
||||||
)
|
)
|
||||||
for vh in vec_hits:
|
for vh in vec_hits:
|
||||||
|
# _chunks is the loaded list of dicts from BM25Index; no public accessor exists
|
||||||
text = next((c["text"] for c in self._bm25._chunks if c["id"] == vh.id), "")
|
text = next((c["text"] for c in self._bm25._chunks if c["id"] == vh.id), "")
|
||||||
if vh.id in merged:
|
if vh.id in merged:
|
||||||
existing = merged[vh.id]
|
existing = merged[vh.id]
|
||||||
|
|
@ -98,6 +99,7 @@ class Retriever:
|
||||||
|
|
||||||
def _combined(r: RetrievedChunk) -> float:
|
def _combined(r: RetrievedChunk) -> float:
|
||||||
bm25 = r.bm25_score
|
bm25 = r.bm25_score
|
||||||
|
# sqlite-vec returns L2 distance (lower=better); invert to [0,1] higher-is-better
|
||||||
vec = (1.0 / (1.0 + r.vector_score)) if r.vector_score is not None else 0.0
|
vec = (1.0 / (1.0 + r.vector_score)) if r.vector_score is not None else 0.0
|
||||||
return bm25 * 0.5 + vec * 0.5
|
return bm25 * 0.5 + vec * 0.5
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ class Citation:
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class SynthesisResult:
|
class SynthesisResult:
|
||||||
answer: str
|
answer: str
|
||||||
citations: list[Citation]
|
citations: tuple[Citation, ...]
|
||||||
|
|
||||||
|
|
||||||
class Synthesizer:
|
class Synthesizer:
|
||||||
|
|
@ -47,12 +47,12 @@ class Synthesizer:
|
||||||
|
|
||||||
answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT)
|
answer = self._llm.complete(prompt, system=_SYSTEM_PROMPT)
|
||||||
|
|
||||||
citations = [
|
citations = tuple(
|
||||||
Citation(
|
Citation(
|
||||||
doc_id=c.doc_id,
|
doc_id=c.doc_id,
|
||||||
page_number=c.page_number,
|
page_number=c.page_number,
|
||||||
snippet=c.text[:200],
|
snippet=c.text[:200],
|
||||||
)
|
)
|
||||||
for c in chunks
|
for c in chunks
|
||||||
]
|
)
|
||||||
return SynthesisResult(answer=answer, citations=citations)
|
return SynthesisResult(answer=answer, citations=citations)
|
||||||
|
|
|
||||||
|
|
@ -40,8 +40,7 @@ def test_synthesizer_builds_context_from_chunks():
|
||||||
synth = Synthesizer(mock_llm)
|
synth = Synthesizer(mock_llm)
|
||||||
synth.synthesize("Q?", [], [_chunk(text="Detailed rule text here.")])
|
synth.synthesize("Q?", [], [_chunk(text="Detailed rule text here.")])
|
||||||
|
|
||||||
call_args = mock_llm.complete.call_args
|
assert "Detailed rule text here." in mock_llm.complete.call_args.args[0]
|
||||||
assert "Detailed rule text here." in call_args[0][0] or "Detailed rule text here." in str(call_args)
|
|
||||||
|
|
||||||
|
|
||||||
def test_synthesizer_uses_system_prompt():
|
def test_synthesizer_uses_system_prompt():
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue