feat(classifier): add _cosine() helper for embedding similarity
This commit is contained in:
parent
bce932461a
commit
78b64d007d
2 changed files with 34 additions and 0 deletions
|
|
@ -7,6 +7,7 @@ from __future__ import annotations
|
|||
|
||||
import abc
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
__all__ = [
|
||||
|
|
@ -117,6 +118,14 @@ def compute_metrics(
|
|||
return result
|
||||
|
||||
|
||||
|
||||
def _cosine(a: list[float], b: list[float]) -> float:
|
||||
dot = sum(x * y for x, y in zip(a, b))
|
||||
norm_a = sum(x * x for x in a) ** 0.5
|
||||
norm_b = sum(x * x for x in b) ** 0.5
|
||||
return dot / (norm_a * norm_b) if norm_a and norm_b else 0.0
|
||||
|
||||
|
||||
class ClassifierAdapter(abc.ABC):
|
||||
"""Abstract base for all email classifier adapters."""
|
||||
|
||||
|
|
|
|||
|
|
@ -268,3 +268,28 @@ def test_finetuned_adapter_unload_clears_pipeline():
|
|||
assert adapter._pipeline is not None
|
||||
adapter.unload()
|
||||
assert adapter._pipeline is None
|
||||
|
||||
# ---- _cosine() tests ----
|
||||
|
||||
def test_cosine_identical_unit_vectors():
|
||||
import math
|
||||
from scripts.classifier_adapters import _cosine
|
||||
assert _cosine([1.0, 0.0], [1.0, 0.0]) == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_cosine_orthogonal_vectors():
|
||||
from scripts.classifier_adapters import _cosine
|
||||
assert _cosine([1.0, 0.0], [0.0, 1.0]) == pytest.approx(0.0)
|
||||
|
||||
|
||||
def test_cosine_known_value():
|
||||
import math
|
||||
from scripts.classifier_adapters import _cosine
|
||||
# [1,0] vs [1/sqrt(2), 1/sqrt(2)] → dot = 1/sqrt(2), both norms = 1 → 1/sqrt(2)
|
||||
v = [1.0 / math.sqrt(2), 1.0 / math.sqrt(2)]
|
||||
assert _cosine([1.0, 0.0], v) == pytest.approx(1.0 / math.sqrt(2))
|
||||
|
||||
|
||||
def test_cosine_zero_vector_returns_zero():
|
||||
from scripts.classifier_adapters import _cosine
|
||||
assert _cosine([0.0, 0.0], [1.0, 0.0]) == pytest.approx(0.0)
|
||||
|
|
|
|||
Loading…
Reference in a new issue