diff --git a/app/api.py b/app/api.py index 1ad4129..f71ab50 100644 --- a/app/api.py +++ b/app/api.py @@ -147,15 +147,12 @@ from app.models import router as models_router import app.models as _models_module app.include_router(models_router, prefix="/api/models") -from app.cforch import router as cforch_router -app.include_router(cforch_router, prefix="/api/cforch") +from app.eval.cforch import router as eval_router +app.include_router(eval_router, prefix="/api") from app.imitate import router as imitate_router app.include_router(imitate_router, prefix="/api/imitate") -from app.style import router as style_router -app.include_router(style_router, prefix="/api/style") - from app.data.fetch import router as fetch_router app.include_router(fetch_router, prefix="/api") @@ -163,99 +160,6 @@ app.include_router(fetch_router, prefix="/api") from fastapi.responses import StreamingResponse -# --------------------------------------------------------------------------- -# Benchmark endpoints -# --------------------------------------------------------------------------- - -@app.get("/api/benchmark/models") -def get_benchmark_models() -> dict: - """Return installed models grouped by adapter_type category.""" - models_dir: Path = _models_module._MODELS_DIR - categories: dict[str, list[dict]] = { - "ZeroShotAdapter": [], - "RerankerAdapter": [], - "GenerationAdapter": [], - "Unknown": [], - } - if models_dir.exists(): - for sub in models_dir.iterdir(): - if not sub.is_dir(): - continue - info_path = sub / "model_info.json" - adapter_type = "Unknown" - repo_id: str | None = None - if info_path.exists(): - try: - info = json.loads(info_path.read_text(encoding="utf-8")) - adapter_type = info.get("adapter_type") or info.get("adapter_recommendation") or "Unknown" - repo_id = info.get("repo_id") - except Exception: - pass - bucket = adapter_type if adapter_type in categories else "Unknown" - entry: dict = {"name": sub.name, "repo_id": repo_id, "adapter_type": adapter_type} - categories[bucket].append(entry) - return {"categories": categories} - - -@app.get("/api/benchmark/results") -def get_benchmark_results(): - """Return the most recently saved benchmark results, or an empty envelope.""" - path = _DATA_DIR / "benchmark_results.json" - if not path.exists(): - return {"models": {}, "sample_count": 0, "timestamp": None} - return json.loads(path.read_text()) - - -@app.get("/api/benchmark/run") -def run_benchmark(include_slow: bool = False, model_names: str = ""): - """Spawn the benchmark script and stream stdout as SSE progress events.""" - python_bin = "/devl/miniconda3/envs/job-seeker-classifiers/bin/python" - script = str(_ROOT / "scripts" / "benchmark_classifier.py") - cmd = [python_bin, script, "--score", "--save"] - if include_slow: - cmd.append("--include-slow") - if model_names: - names = [n.strip() for n in model_names.split(",") if n.strip()] - if names: - cmd.extend(["--models"] + names) - - def generate(): - try: - proc = _subprocess.Popen( - cmd, - stdout=_subprocess.PIPE, - stderr=_subprocess.STDOUT, - text=True, - bufsize=1, - cwd=str(_ROOT), - ) - _running_procs["benchmark"] = proc - _cancelled_jobs.discard("benchmark") # clear any stale flag from a prior run - try: - for line in proc.stdout: - line = line.rstrip() - if line: - yield f"data: {json.dumps({'type': 'progress', 'message': line})}\n\n" - proc.wait() - if proc.returncode == 0: - yield f"data: {json.dumps({'type': 'complete'})}\n\n" - elif "benchmark" in _cancelled_jobs: - _cancelled_jobs.discard("benchmark") - yield f"data: {json.dumps({'type': 'cancelled'})}\n\n" - else: - yield f"data: {json.dumps({'type': 'error', 'message': f'Process exited with code {proc.returncode}'})}\n\n" - finally: - _running_procs.pop("benchmark", None) - except Exception as exc: - yield f"data: {json.dumps({'type': 'error', 'message': str(exc)})}\n\n" - - return StreamingResponse( - generate(), - media_type="text/event-stream", - headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"}, - ) - - # --------------------------------------------------------------------------- # Finetune endpoints # --------------------------------------------------------------------------- @@ -347,20 +251,6 @@ def run_finetune_endpoint( ) -@app.post("/api/benchmark/cancel") -def cancel_benchmark(): - """Kill the running benchmark subprocess. 404 if none is running.""" - proc = _running_procs.get("benchmark") - if proc is None: - raise HTTPException(404, "No benchmark is running") - _cancelled_jobs.add("benchmark") - proc.terminate() - try: - proc.wait(timeout=3) - except _subprocess.TimeoutExpired: - proc.kill() - return {"status": "cancelled"} - @app.post("/api/finetune/cancel") def cancel_finetune(): diff --git a/app/eval/__init__.py b/app/eval/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/eval/cforch.py b/app/eval/cforch.py new file mode 100644 index 0000000..62d28e0 --- /dev/null +++ b/app/eval/cforch.py @@ -0,0 +1,38 @@ +"""Avocet -- eval router aggregator. + +Collects benchmark sub-routers into a single importable `router` +for the api.py factory. Each sub-router retains its established prefix +so no frontend URL changes are needed. + +Route prefixes when mounted at /api in api.py: + /api/cforch/* -- cf-orch benchmark routes + /api/style/* -- writing style benchmark routes + /api/voice/* -- voice benchmark routes + /api/plans-bench/* -- plans benchmark routes +""" +from __future__ import annotations + +from fastapi import APIRouter + +from app.cforch import router as _cforch_router +from app.style import router as _style_router +from app.voice import router as _voice_router +from app.plans_bench import router as _plans_router + +router = APIRouter() +router.include_router(_cforch_router, prefix="/cforch") +router.include_router(_style_router, prefix="/style") +router.include_router(_voice_router, prefix="/voice") +router.include_router(_plans_router, prefix="/plans-bench") + + +def set_config_dir(path) -> None: + """Propagate config dir override to all sub-modules -- used by tests.""" + import app.cforch as _cforch_mod + import app.style as _style_mod + import app.voice as _voice_mod + import app.plans_bench as _plans_mod + _cforch_mod.set_config_dir(path) + _style_mod.set_config_dir(path) + _voice_mod.set_config_dir(path) + _plans_mod.set_config_dir(path) diff --git a/app/plans_bench.py b/app/plans_bench.py new file mode 100644 index 0000000..e872fff --- /dev/null +++ b/app/plans_bench.py @@ -0,0 +1,30 @@ +"""Avocet -- Plans benchmark integration API (stub). + +Placeholder module so that app/eval/cforch.py can import and include +this router. Full implementation follows in a subsequent task. + +All endpoints are registered on `router` (a FastAPI APIRouter). +api.py (via the eval aggregator) includes this router at +prefix="/api/plans-bench". +""" +from __future__ import annotations + +from pathlib import Path + +from fastapi import APIRouter + +router = APIRouter() + +_CONFIG_DIR: Path | None = None # override in tests via set_config_dir() + + +def set_config_dir(path: Path | None) -> None: + """Override config directory -- used by tests.""" + global _CONFIG_DIR + _CONFIG_DIR = path + + +@router.get("/status") +def get_plans_bench_status() -> dict: + """Return placeholder status for the plans benchmark module.""" + return {"status": "not_implemented"} diff --git a/tests/test_api.py b/tests/test_api.py index b1ad948..693098d 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -443,13 +443,6 @@ def test_finetune_run_passes_score_files_to_subprocess(client): # ---- Cancel endpoint tests ---- -def test_benchmark_cancel_returns_404_when_not_running(client): - """POST /api/benchmark/cancel must return 404 if no benchmark is running.""" - from app import api as api_module - api_module._running_procs.pop("benchmark", None) - r = client.post("/api/benchmark/cancel") - assert r.status_code == 404 - def test_finetune_cancel_returns_404_when_not_running(client): """POST /api/finetune/cancel must return 404 if no finetune is running.""" @@ -459,24 +452,6 @@ def test_finetune_cancel_returns_404_when_not_running(client): assert r.status_code == 404 -def test_benchmark_cancel_terminates_running_process(client): - """POST /api/benchmark/cancel must call terminate() on the running process.""" - from unittest.mock import MagicMock - from app import api as api_module - - mock_proc = MagicMock() - mock_proc.wait = MagicMock() - api_module._running_procs["benchmark"] = mock_proc - - try: - r = client.post("/api/benchmark/cancel") - assert r.status_code == 200 - assert r.json()["status"] == "cancelled" - mock_proc.terminate.assert_called_once() - finally: - api_module._running_procs.pop("benchmark", None) - api_module._cancelled_jobs.discard("benchmark") - def test_finetune_cancel_terminates_running_process(client): """POST /api/finetune/cancel must call terminate() on the running process.""" @@ -497,24 +472,6 @@ def test_finetune_cancel_terminates_running_process(client): api_module._cancelled_jobs.discard("finetune") -def test_benchmark_cancel_kills_process_on_timeout(client): - """POST /api/benchmark/cancel must call kill() if the process does not exit within 3 s.""" - import subprocess - from unittest.mock import MagicMock - from app import api as api_module - - mock_proc = MagicMock() - mock_proc.wait.side_effect = subprocess.TimeoutExpired(cmd="benchmark", timeout=3) - api_module._running_procs["benchmark"] = mock_proc - - try: - r = client.post("/api/benchmark/cancel") - assert r.status_code == 200 - mock_proc.kill.assert_called_once() - finally: - api_module._running_procs.pop("benchmark", None) - api_module._cancelled_jobs.discard("benchmark") - def test_finetune_run_emits_cancelled_event(client): """GET /api/finetune/run must emit cancelled (not error) when job was cancelled.""" @@ -542,29 +499,3 @@ def test_finetune_run_emits_cancelled_event(client): finally: api_module._cancelled_jobs.discard("finetune") - -def test_benchmark_run_emits_cancelled_event(client): - """GET /api/benchmark/run must emit cancelled (not error) when job was cancelled.""" - from unittest.mock import patch, MagicMock - from app import api as api_module - - mock_proc = MagicMock() - mock_proc.stdout = iter([]) - mock_proc.returncode = -15 - - def mock_wait(): - # Simulate cancel being called while the process is running (after discard clears stale flag) - api_module._cancelled_jobs.add("benchmark") - - mock_proc.wait = mock_wait - - def mock_popen(cmd, **kwargs): - return mock_proc - - try: - with patch("app.api._subprocess.Popen",side_effect=mock_popen): - r = client.get("/api/benchmark/run") - assert '{"type": "cancelled"}' in r.text - assert '"type": "error"' not in r.text - finally: - api_module._cancelled_jobs.discard("benchmark") diff --git a/tests/test_cforch.py b/tests/test_cforch.py index 282aa4e..45b109e 100644 --- a/tests/test_cforch.py +++ b/tests/test_cforch.py @@ -367,3 +367,13 @@ def test_run_passes_license_key_env_to_subprocess(client, config_dir, tmp_path, client.get("/api/cforch/run") assert captured_env.get("CF_LICENSE_KEY") == "CFG-AVCT-ENV-ONLY-KEY" + + +def test_eval_cforch_router_includes_all_sub_routers(): + """eval/cforch.py router must include routes from all four sub-routers.""" + from app.eval.cforch import router + paths = {r.path for r in router.routes} + assert any("/cforch/" in p for p in paths), f"no /cforch/ routes found in {paths}" + assert any("/style/" in p for p in paths), f"no /style/ routes found in {paths}" + assert any("/voice/" in p for p in paths), f"no /voice/ routes found in {paths}" + assert any("/plans-bench/" in p for p in paths), f"no /plans-bench/ routes found in {paths}"