From 276bdadb92528a82efea39b31fa31a31cb6ff43c Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Thu, 7 May 2026 08:37:18 -0700 Subject: [PATCH] feat: add embed_bench module scaffold and _cosine() helper --- app/eval/embed_bench.py | 81 +++++++++++++++++++++++++++++++++++++++ tests/test_embed_bench.py | 50 ++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 app/eval/embed_bench.py create mode 100644 tests/test_embed_bench.py diff --git a/app/eval/embed_bench.py b/app/eval/embed_bench.py new file mode 100644 index 0000000..7ca4ccb --- /dev/null +++ b/app/eval/embed_bench.py @@ -0,0 +1,81 @@ +"""Avocet — embedding model comparison harness. + +Exposes FastAPI routes under /api/embed-bench (mounted via app/eval/cforch.py). +All computation is local: no LLM inference, Ollama only. MIT tier throughout. +""" +from __future__ import annotations + +import csv +import io +import json +import logging +import math +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import httpx +import yaml +from fastapi import APIRouter, HTTPException +from fastapi.responses import StreamingResponse +from pydantic import BaseModel, field_validator + +logger = logging.getLogger(__name__) + +_ROOT = Path(__file__).parent.parent.parent +_CONFIG_DIR: Path | None = None # override via set_config_dir() in tests +_RUN_ACTIVE: bool = False +_RATINGS_FILE = _ROOT / "data" / "embed_bench_ratings.jsonl" + +router = APIRouter() + + +# ── Testability seam ────────────────────────────────────────────────────────── + +def set_config_dir(path: Path | None) -> None: + global _CONFIG_DIR + _CONFIG_DIR = path + + +# ── Internal helpers ────────────────────────────────────────────────────────── + +def _config_file() -> Path: + if _CONFIG_DIR is not None: + return _CONFIG_DIR / "label_tool.yaml" + return _ROOT / "config" / "label_tool.yaml" + + +def _load_config() -> dict: + f = _config_file() + if not f.exists(): + return {} + try: + return yaml.safe_load(f.read_text(encoding="utf-8")) or {} + except yaml.YAMLError as exc: + logger.warning("Failed to parse embed_bench config %s: %s", f, exc) + return {} + + +def _ollama_url() -> str: + cfg = _load_config() + embed_cfg = cfg.get("embed_bench", {}) or {} + cforch_cfg = cfg.get("cforch", {}) or {} + return ( + embed_cfg.get("ollama_url") + or cforch_cfg.get("ollama_url", "http://localhost:11434") + ) + + +def _ratings_path() -> Path: + if _CONFIG_DIR is not None: + return _CONFIG_DIR / "embed_bench_ratings.jsonl" + return _RATINGS_FILE + + +def _cosine(a: list[float], b: list[float]) -> float: + dot = sum(x * y for x, y in zip(a, b)) + mag_a = math.sqrt(sum(x * x for x in a)) + mag_b = math.sqrt(sum(x * x for x in b)) + if mag_a == 0.0 or mag_b == 0.0: + return 0.0 + return dot / (mag_a * mag_b) diff --git a/tests/test_embed_bench.py b/tests/test_embed_bench.py new file mode 100644 index 0000000..38601e7 --- /dev/null +++ b/tests/test_embed_bench.py @@ -0,0 +1,50 @@ +"""Tests for app/eval/embed_bench.py.""" +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from fastapi.testclient import TestClient + + +# ── Fixtures ────────────────────────────────────────────────────────────────── + +@pytest.fixture(autouse=True) +def reset_embed_bench_globals(tmp_path): + """Redirect config dir to tmp_path and reset running flag.""" + from app.eval import embed_bench as mod + + prev_config_dir = mod._CONFIG_DIR + prev_running = mod._RUN_ACTIVE + + mod.set_config_dir(tmp_path) + mod._RUN_ACTIVE = False + + yield tmp_path + + mod.set_config_dir(prev_config_dir) + mod._RUN_ACTIVE = prev_running + + +@pytest.fixture +def client(): + from app.api import app + return TestClient(app) + + +# ── cosine helper ────────────────────────────────────────────────────────────── + +def test_cosine_identical(): + from app.eval.embed_bench import _cosine + assert _cosine([1.0, 0.0], [1.0, 0.0]) == pytest.approx(1.0) + + +def test_cosine_orthogonal(): + from app.eval.embed_bench import _cosine + assert _cosine([1.0, 0.0], [0.0, 1.0]) == pytest.approx(0.0) + + +def test_cosine_opposite(): + from app.eval.embed_bench import _cosine + assert _cosine([1.0, 0.0], [-1.0, 0.0]) == pytest.approx(-1.0)