From 276bdadb92528a82efea39b31fa31a31cb6ff43c Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Thu, 7 May 2026 08:37:18 -0700
Subject: [PATCH] feat: add embed_bench module scaffold and _cosine() helper

---
 app/eval/embed_bench.py   | 81 +++++++++++++++++++++++++++++++++++++++
 tests/test_embed_bench.py | 50 ++++++++++++++++++++++++
 2 files changed, 131 insertions(+)
 create mode 100644 app/eval/embed_bench.py
 create mode 100644 tests/test_embed_bench.py

diff --git a/app/eval/embed_bench.py b/app/eval/embed_bench.py
new file mode 100644
index 0000000..7ca4ccb
--- /dev/null
+++ b/app/eval/embed_bench.py
@@ -0,0 +1,81 @@
+"""Avocet — embedding model comparison harness.
+
+Exposes FastAPI routes under /api/embed-bench (mounted via app/eval/cforch.py).
+All computation is local: no LLM inference, Ollama only. MIT tier throughout.
+"""
+from __future__ import annotations
+
+import csv
+import io
+import json
+import logging
+import math
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+import httpx
+import yaml
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel, field_validator
+
+logger = logging.getLogger(__name__)
+
+_ROOT = Path(__file__).parent.parent.parent
+_CONFIG_DIR: Path | None = None   # override via set_config_dir() in tests
+_RUN_ACTIVE: bool = False
+_RATINGS_FILE = _ROOT / "data" / "embed_bench_ratings.jsonl"
+
+router = APIRouter()
+
+
+# ── Testability seam ──────────────────────────────────────────────────────────
+
+def set_config_dir(path: Path | None) -> None:
+    global _CONFIG_DIR
+    _CONFIG_DIR = path
+
+
+# ── Internal helpers ──────────────────────────────────────────────────────────
+
+def _config_file() -> Path:
+    if _CONFIG_DIR is not None:
+        return _CONFIG_DIR / "label_tool.yaml"
+    return _ROOT / "config" / "label_tool.yaml"
+
+
+def _load_config() -> dict:
+    f = _config_file()
+    if not f.exists():
+        return {}
+    try:
+        return yaml.safe_load(f.read_text(encoding="utf-8")) or {}
+    except yaml.YAMLError as exc:
+        logger.warning("Failed to parse embed_bench config %s: %s", f, exc)
+        return {}
+
+
+def _ollama_url() -> str:
+    cfg = _load_config()
+    embed_cfg = cfg.get("embed_bench", {}) or {}
+    cforch_cfg = cfg.get("cforch", {}) or {}
+    return (
+        embed_cfg.get("ollama_url")
+        or cforch_cfg.get("ollama_url", "http://localhost:11434")
+    )
+
+
+def _ratings_path() -> Path:
+    if _CONFIG_DIR is not None:
+        return _CONFIG_DIR / "embed_bench_ratings.jsonl"
+    return _RATINGS_FILE
+
+
+def _cosine(a: list[float], b: list[float]) -> float:
+    dot = sum(x * y for x, y in zip(a, b))
+    mag_a = math.sqrt(sum(x * x for x in a))
+    mag_b = math.sqrt(sum(x * x for x in b))
+    if mag_a == 0.0 or mag_b == 0.0:
+        return 0.0
+    return dot / (mag_a * mag_b)
diff --git a/tests/test_embed_bench.py b/tests/test_embed_bench.py
new file mode 100644
index 0000000..38601e7
--- /dev/null
+++ b/tests/test_embed_bench.py
@@ -0,0 +1,50 @@
+"""Tests for app/eval/embed_bench.py."""
+from __future__ import annotations
+
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+from fastapi.testclient import TestClient
+
+
+# ── Fixtures ──────────────────────────────────────────────────────────────────
+
+@pytest.fixture(autouse=True)
+def reset_embed_bench_globals(tmp_path):
+    """Redirect config dir to tmp_path and reset running flag."""
+    from app.eval import embed_bench as mod
+
+    prev_config_dir = mod._CONFIG_DIR
+    prev_running = mod._RUN_ACTIVE
+
+    mod.set_config_dir(tmp_path)
+    mod._RUN_ACTIVE = False
+
+    yield tmp_path
+
+    mod.set_config_dir(prev_config_dir)
+    mod._RUN_ACTIVE = prev_running
+
+
+@pytest.fixture
+def client():
+    from app.api import app
+    return TestClient(app)
+
+
+# ── cosine helper ──────────────────────────────────────────────────────────────
+
+def test_cosine_identical():
+    from app.eval.embed_bench import _cosine
+    assert _cosine([1.0, 0.0], [1.0, 0.0]) == pytest.approx(1.0)
+
+
+def test_cosine_orthogonal():
+    from app.eval.embed_bench import _cosine
+    assert _cosine([1.0, 0.0], [0.0, 1.0]) == pytest.approx(0.0)
+
+
+def test_cosine_opposite():
+    from app.eval.embed_bench import _cosine
+    assert _cosine([1.0, 0.0], [-1.0, 0.0]) == pytest.approx(-1.0)