feat(classifier): add _cosine() helper for embedding similarity
This commit is contained in:
parent
bce932461a
commit
78b64d007d
2 changed files with 34 additions and 0 deletions
|
|
@ -7,6 +7,7 @@ from __future__ import annotations
|
||||||
|
|
||||||
import abc
|
import abc
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
|
@ -117,6 +118,14 @@ def compute_metrics(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _cosine(a: list[float], b: list[float]) -> float:
|
||||||
|
dot = sum(x * y for x, y in zip(a, b))
|
||||||
|
norm_a = sum(x * x for x in a) ** 0.5
|
||||||
|
norm_b = sum(x * x for x in b) ** 0.5
|
||||||
|
return dot / (norm_a * norm_b) if norm_a and norm_b else 0.0
|
||||||
|
|
||||||
|
|
||||||
class ClassifierAdapter(abc.ABC):
|
class ClassifierAdapter(abc.ABC):
|
||||||
"""Abstract base for all email classifier adapters."""
|
"""Abstract base for all email classifier adapters."""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -268,3 +268,28 @@ def test_finetuned_adapter_unload_clears_pipeline():
|
||||||
assert adapter._pipeline is not None
|
assert adapter._pipeline is not None
|
||||||
adapter.unload()
|
adapter.unload()
|
||||||
assert adapter._pipeline is None
|
assert adapter._pipeline is None
|
||||||
|
|
||||||
|
# ---- _cosine() tests ----
|
||||||
|
|
||||||
|
def test_cosine_identical_unit_vectors():
|
||||||
|
import math
|
||||||
|
from scripts.classifier_adapters import _cosine
|
||||||
|
assert _cosine([1.0, 0.0], [1.0, 0.0]) == pytest.approx(1.0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_cosine_orthogonal_vectors():
|
||||||
|
from scripts.classifier_adapters import _cosine
|
||||||
|
assert _cosine([1.0, 0.0], [0.0, 1.0]) == pytest.approx(0.0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_cosine_known_value():
|
||||||
|
import math
|
||||||
|
from scripts.classifier_adapters import _cosine
|
||||||
|
# [1,0] vs [1/sqrt(2), 1/sqrt(2)] → dot = 1/sqrt(2), both norms = 1 → 1/sqrt(2)
|
||||||
|
v = [1.0 / math.sqrt(2), 1.0 / math.sqrt(2)]
|
||||||
|
assert _cosine([1.0, 0.0], v) == pytest.approx(1.0 / math.sqrt(2))
|
||||||
|
|
||||||
|
|
||||||
|
def test_cosine_zero_vector_returns_zero():
|
||||||
|
from scripts.classifier_adapters import _cosine
|
||||||
|
assert _cosine([0.0, 0.0], [1.0, 0.0]) == pytest.approx(0.0)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue