feat(classifier): add _cosine() helper for embedding similarity

This commit is contained in:
pyr0ball 2026-05-04 17:41:45 -07:00
parent bce932461a
commit 78b64d007d
2 changed files with 34 additions and 0 deletions

View file

@ -7,6 +7,7 @@ from __future__ import annotations
import abc
from collections import defaultdict
from pathlib import Path
from typing import Any
__all__ = [
@ -117,6 +118,14 @@ def compute_metrics(
return result
def _cosine(a: list[float], b: list[float]) -> float:
dot = sum(x * y for x, y in zip(a, b))
norm_a = sum(x * x for x in a) ** 0.5
norm_b = sum(x * x for x in b) ** 0.5
return dot / (norm_a * norm_b) if norm_a and norm_b else 0.0
class ClassifierAdapter(abc.ABC):
"""Abstract base for all email classifier adapters."""

View file

@ -268,3 +268,28 @@ def test_finetuned_adapter_unload_clears_pipeline():
assert adapter._pipeline is not None
adapter.unload()
assert adapter._pipeline is None
# ---- _cosine() tests ----
def test_cosine_identical_unit_vectors():
import math
from scripts.classifier_adapters import _cosine
assert _cosine([1.0, 0.0], [1.0, 0.0]) == pytest.approx(1.0)
def test_cosine_orthogonal_vectors():
from scripts.classifier_adapters import _cosine
assert _cosine([1.0, 0.0], [0.0, 1.0]) == pytest.approx(0.0)
def test_cosine_known_value():
import math
from scripts.classifier_adapters import _cosine
# [1,0] vs [1/sqrt(2), 1/sqrt(2)] → dot = 1/sqrt(2), both norms = 1 → 1/sqrt(2)
v = [1.0 / math.sqrt(2), 1.0 / math.sqrt(2)]
assert _cosine([1.0, 0.0], v) == pytest.approx(1.0 / math.sqrt(2))
def test_cosine_zero_vector_returns_zero():
from scripts.classifier_adapters import _cosine
assert _cosine([0.0, 0.0], [1.0, 0.0]) == pytest.approx(0.0)