diff --git a/scripts/classifier_adapters.py b/scripts/classifier_adapters.py index f2b4fff..775e2d6 100644 --- a/scripts/classifier_adapters.py +++ b/scripts/classifier_adapters.py @@ -7,6 +7,7 @@ from __future__ import annotations import abc from collections import defaultdict +from pathlib import Path from typing import Any __all__ = [ @@ -117,6 +118,14 @@ def compute_metrics( return result + +def _cosine(a: list[float], b: list[float]) -> float: + dot = sum(x * y for x, y in zip(a, b)) + norm_a = sum(x * x for x in a) ** 0.5 + norm_b = sum(x * x for x in b) ** 0.5 + return dot / (norm_a * norm_b) if norm_a and norm_b else 0.0 + + class ClassifierAdapter(abc.ABC): """Abstract base for all email classifier adapters.""" diff --git a/tests/test_classifier_adapters.py b/tests/test_classifier_adapters.py index 13f8a94..2d429e2 100644 --- a/tests/test_classifier_adapters.py +++ b/tests/test_classifier_adapters.py @@ -268,3 +268,28 @@ def test_finetuned_adapter_unload_clears_pipeline(): assert adapter._pipeline is not None adapter.unload() assert adapter._pipeline is None + +# ---- _cosine() tests ---- + +def test_cosine_identical_unit_vectors(): + import math + from scripts.classifier_adapters import _cosine + assert _cosine([1.0, 0.0], [1.0, 0.0]) == pytest.approx(1.0) + + +def test_cosine_orthogonal_vectors(): + from scripts.classifier_adapters import _cosine + assert _cosine([1.0, 0.0], [0.0, 1.0]) == pytest.approx(0.0) + + +def test_cosine_known_value(): + import math + from scripts.classifier_adapters import _cosine + # [1,0] vs [1/sqrt(2), 1/sqrt(2)] → dot = 1/sqrt(2), both norms = 1 → 1/sqrt(2) + v = [1.0 / math.sqrt(2), 1.0 / math.sqrt(2)] + assert _cosine([1.0, 0.0], v) == pytest.approx(1.0 / math.sqrt(2)) + + +def test_cosine_zero_vector_returns_zero(): + from scripts.classifier_adapters import _cosine + assert _cosine([0.0, 0.0], [1.0, 0.0]) == pytest.approx(0.0)