feat: build app/eval/cforch.py aggregating eval benchmark routers
This commit is contained in:
parent
d74ad3f972
commit
bccb385f61
6 changed files with 80 additions and 181 deletions
114
app/api.py
114
app/api.py
|
|
@ -147,15 +147,12 @@ from app.models import router as models_router
|
||||||
import app.models as _models_module
|
import app.models as _models_module
|
||||||
app.include_router(models_router, prefix="/api/models")
|
app.include_router(models_router, prefix="/api/models")
|
||||||
|
|
||||||
from app.cforch import router as cforch_router
|
from app.eval.cforch import router as eval_router
|
||||||
app.include_router(cforch_router, prefix="/api/cforch")
|
app.include_router(eval_router, prefix="/api")
|
||||||
|
|
||||||
from app.imitate import router as imitate_router
|
from app.imitate import router as imitate_router
|
||||||
app.include_router(imitate_router, prefix="/api/imitate")
|
app.include_router(imitate_router, prefix="/api/imitate")
|
||||||
|
|
||||||
from app.style import router as style_router
|
|
||||||
app.include_router(style_router, prefix="/api/style")
|
|
||||||
|
|
||||||
from app.data.fetch import router as fetch_router
|
from app.data.fetch import router as fetch_router
|
||||||
app.include_router(fetch_router, prefix="/api")
|
app.include_router(fetch_router, prefix="/api")
|
||||||
|
|
||||||
|
|
@ -163,99 +160,6 @@ app.include_router(fetch_router, prefix="/api")
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Benchmark endpoints
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@app.get("/api/benchmark/models")
|
|
||||||
def get_benchmark_models() -> dict:
|
|
||||||
"""Return installed models grouped by adapter_type category."""
|
|
||||||
models_dir: Path = _models_module._MODELS_DIR
|
|
||||||
categories: dict[str, list[dict]] = {
|
|
||||||
"ZeroShotAdapter": [],
|
|
||||||
"RerankerAdapter": [],
|
|
||||||
"GenerationAdapter": [],
|
|
||||||
"Unknown": [],
|
|
||||||
}
|
|
||||||
if models_dir.exists():
|
|
||||||
for sub in models_dir.iterdir():
|
|
||||||
if not sub.is_dir():
|
|
||||||
continue
|
|
||||||
info_path = sub / "model_info.json"
|
|
||||||
adapter_type = "Unknown"
|
|
||||||
repo_id: str | None = None
|
|
||||||
if info_path.exists():
|
|
||||||
try:
|
|
||||||
info = json.loads(info_path.read_text(encoding="utf-8"))
|
|
||||||
adapter_type = info.get("adapter_type") or info.get("adapter_recommendation") or "Unknown"
|
|
||||||
repo_id = info.get("repo_id")
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
bucket = adapter_type if adapter_type in categories else "Unknown"
|
|
||||||
entry: dict = {"name": sub.name, "repo_id": repo_id, "adapter_type": adapter_type}
|
|
||||||
categories[bucket].append(entry)
|
|
||||||
return {"categories": categories}
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/benchmark/results")
|
|
||||||
def get_benchmark_results():
|
|
||||||
"""Return the most recently saved benchmark results, or an empty envelope."""
|
|
||||||
path = _DATA_DIR / "benchmark_results.json"
|
|
||||||
if not path.exists():
|
|
||||||
return {"models": {}, "sample_count": 0, "timestamp": None}
|
|
||||||
return json.loads(path.read_text())
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/benchmark/run")
|
|
||||||
def run_benchmark(include_slow: bool = False, model_names: str = ""):
|
|
||||||
"""Spawn the benchmark script and stream stdout as SSE progress events."""
|
|
||||||
python_bin = "/devl/miniconda3/envs/job-seeker-classifiers/bin/python"
|
|
||||||
script = str(_ROOT / "scripts" / "benchmark_classifier.py")
|
|
||||||
cmd = [python_bin, script, "--score", "--save"]
|
|
||||||
if include_slow:
|
|
||||||
cmd.append("--include-slow")
|
|
||||||
if model_names:
|
|
||||||
names = [n.strip() for n in model_names.split(",") if n.strip()]
|
|
||||||
if names:
|
|
||||||
cmd.extend(["--models"] + names)
|
|
||||||
|
|
||||||
def generate():
|
|
||||||
try:
|
|
||||||
proc = _subprocess.Popen(
|
|
||||||
cmd,
|
|
||||||
stdout=_subprocess.PIPE,
|
|
||||||
stderr=_subprocess.STDOUT,
|
|
||||||
text=True,
|
|
||||||
bufsize=1,
|
|
||||||
cwd=str(_ROOT),
|
|
||||||
)
|
|
||||||
_running_procs["benchmark"] = proc
|
|
||||||
_cancelled_jobs.discard("benchmark") # clear any stale flag from a prior run
|
|
||||||
try:
|
|
||||||
for line in proc.stdout:
|
|
||||||
line = line.rstrip()
|
|
||||||
if line:
|
|
||||||
yield f"data: {json.dumps({'type': 'progress', 'message': line})}\n\n"
|
|
||||||
proc.wait()
|
|
||||||
if proc.returncode == 0:
|
|
||||||
yield f"data: {json.dumps({'type': 'complete'})}\n\n"
|
|
||||||
elif "benchmark" in _cancelled_jobs:
|
|
||||||
_cancelled_jobs.discard("benchmark")
|
|
||||||
yield f"data: {json.dumps({'type': 'cancelled'})}\n\n"
|
|
||||||
else:
|
|
||||||
yield f"data: {json.dumps({'type': 'error', 'message': f'Process exited with code {proc.returncode}'})}\n\n"
|
|
||||||
finally:
|
|
||||||
_running_procs.pop("benchmark", None)
|
|
||||||
except Exception as exc:
|
|
||||||
yield f"data: {json.dumps({'type': 'error', 'message': str(exc)})}\n\n"
|
|
||||||
|
|
||||||
return StreamingResponse(
|
|
||||||
generate(),
|
|
||||||
media_type="text/event-stream",
|
|
||||||
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Finetune endpoints
|
# Finetune endpoints
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -347,20 +251,6 @@ def run_finetune_endpoint(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/benchmark/cancel")
|
|
||||||
def cancel_benchmark():
|
|
||||||
"""Kill the running benchmark subprocess. 404 if none is running."""
|
|
||||||
proc = _running_procs.get("benchmark")
|
|
||||||
if proc is None:
|
|
||||||
raise HTTPException(404, "No benchmark is running")
|
|
||||||
_cancelled_jobs.add("benchmark")
|
|
||||||
proc.terminate()
|
|
||||||
try:
|
|
||||||
proc.wait(timeout=3)
|
|
||||||
except _subprocess.TimeoutExpired:
|
|
||||||
proc.kill()
|
|
||||||
return {"status": "cancelled"}
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/finetune/cancel")
|
@app.post("/api/finetune/cancel")
|
||||||
def cancel_finetune():
|
def cancel_finetune():
|
||||||
|
|
|
||||||
0
app/eval/__init__.py
Normal file
0
app/eval/__init__.py
Normal file
38
app/eval/cforch.py
Normal file
38
app/eval/cforch.py
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
"""Avocet -- eval router aggregator.
|
||||||
|
|
||||||
|
Collects benchmark sub-routers into a single importable `router`
|
||||||
|
for the api.py factory. Each sub-router retains its established prefix
|
||||||
|
so no frontend URL changes are needed.
|
||||||
|
|
||||||
|
Route prefixes when mounted at /api in api.py:
|
||||||
|
/api/cforch/* -- cf-orch benchmark routes
|
||||||
|
/api/style/* -- writing style benchmark routes
|
||||||
|
/api/voice/* -- voice benchmark routes
|
||||||
|
/api/plans-bench/* -- plans benchmark routes
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
from app.cforch import router as _cforch_router
|
||||||
|
from app.style import router as _style_router
|
||||||
|
from app.voice import router as _voice_router
|
||||||
|
from app.plans_bench import router as _plans_router
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
router.include_router(_cforch_router, prefix="/cforch")
|
||||||
|
router.include_router(_style_router, prefix="/style")
|
||||||
|
router.include_router(_voice_router, prefix="/voice")
|
||||||
|
router.include_router(_plans_router, prefix="/plans-bench")
|
||||||
|
|
||||||
|
|
||||||
|
def set_config_dir(path) -> None:
|
||||||
|
"""Propagate config dir override to all sub-modules -- used by tests."""
|
||||||
|
import app.cforch as _cforch_mod
|
||||||
|
import app.style as _style_mod
|
||||||
|
import app.voice as _voice_mod
|
||||||
|
import app.plans_bench as _plans_mod
|
||||||
|
_cforch_mod.set_config_dir(path)
|
||||||
|
_style_mod.set_config_dir(path)
|
||||||
|
_voice_mod.set_config_dir(path)
|
||||||
|
_plans_mod.set_config_dir(path)
|
||||||
30
app/plans_bench.py
Normal file
30
app/plans_bench.py
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
"""Avocet -- Plans benchmark integration API (stub).
|
||||||
|
|
||||||
|
Placeholder module so that app/eval/cforch.py can import and include
|
||||||
|
this router. Full implementation follows in a subsequent task.
|
||||||
|
|
||||||
|
All endpoints are registered on `router` (a FastAPI APIRouter).
|
||||||
|
api.py (via the eval aggregator) includes this router at
|
||||||
|
prefix="/api/plans-bench".
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from fastapi import APIRouter
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
_CONFIG_DIR: Path | None = None # override in tests via set_config_dir()
|
||||||
|
|
||||||
|
|
||||||
|
def set_config_dir(path: Path | None) -> None:
|
||||||
|
"""Override config directory -- used by tests."""
|
||||||
|
global _CONFIG_DIR
|
||||||
|
_CONFIG_DIR = path
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/status")
|
||||||
|
def get_plans_bench_status() -> dict:
|
||||||
|
"""Return placeholder status for the plans benchmark module."""
|
||||||
|
return {"status": "not_implemented"}
|
||||||
|
|
@ -443,13 +443,6 @@ def test_finetune_run_passes_score_files_to_subprocess(client):
|
||||||
|
|
||||||
# ---- Cancel endpoint tests ----
|
# ---- Cancel endpoint tests ----
|
||||||
|
|
||||||
def test_benchmark_cancel_returns_404_when_not_running(client):
|
|
||||||
"""POST /api/benchmark/cancel must return 404 if no benchmark is running."""
|
|
||||||
from app import api as api_module
|
|
||||||
api_module._running_procs.pop("benchmark", None)
|
|
||||||
r = client.post("/api/benchmark/cancel")
|
|
||||||
assert r.status_code == 404
|
|
||||||
|
|
||||||
|
|
||||||
def test_finetune_cancel_returns_404_when_not_running(client):
|
def test_finetune_cancel_returns_404_when_not_running(client):
|
||||||
"""POST /api/finetune/cancel must return 404 if no finetune is running."""
|
"""POST /api/finetune/cancel must return 404 if no finetune is running."""
|
||||||
|
|
@ -459,24 +452,6 @@ def test_finetune_cancel_returns_404_when_not_running(client):
|
||||||
assert r.status_code == 404
|
assert r.status_code == 404
|
||||||
|
|
||||||
|
|
||||||
def test_benchmark_cancel_terminates_running_process(client):
|
|
||||||
"""POST /api/benchmark/cancel must call terminate() on the running process."""
|
|
||||||
from unittest.mock import MagicMock
|
|
||||||
from app import api as api_module
|
|
||||||
|
|
||||||
mock_proc = MagicMock()
|
|
||||||
mock_proc.wait = MagicMock()
|
|
||||||
api_module._running_procs["benchmark"] = mock_proc
|
|
||||||
|
|
||||||
try:
|
|
||||||
r = client.post("/api/benchmark/cancel")
|
|
||||||
assert r.status_code == 200
|
|
||||||
assert r.json()["status"] == "cancelled"
|
|
||||||
mock_proc.terminate.assert_called_once()
|
|
||||||
finally:
|
|
||||||
api_module._running_procs.pop("benchmark", None)
|
|
||||||
api_module._cancelled_jobs.discard("benchmark")
|
|
||||||
|
|
||||||
|
|
||||||
def test_finetune_cancel_terminates_running_process(client):
|
def test_finetune_cancel_terminates_running_process(client):
|
||||||
"""POST /api/finetune/cancel must call terminate() on the running process."""
|
"""POST /api/finetune/cancel must call terminate() on the running process."""
|
||||||
|
|
@ -497,24 +472,6 @@ def test_finetune_cancel_terminates_running_process(client):
|
||||||
api_module._cancelled_jobs.discard("finetune")
|
api_module._cancelled_jobs.discard("finetune")
|
||||||
|
|
||||||
|
|
||||||
def test_benchmark_cancel_kills_process_on_timeout(client):
|
|
||||||
"""POST /api/benchmark/cancel must call kill() if the process does not exit within 3 s."""
|
|
||||||
import subprocess
|
|
||||||
from unittest.mock import MagicMock
|
|
||||||
from app import api as api_module
|
|
||||||
|
|
||||||
mock_proc = MagicMock()
|
|
||||||
mock_proc.wait.side_effect = subprocess.TimeoutExpired(cmd="benchmark", timeout=3)
|
|
||||||
api_module._running_procs["benchmark"] = mock_proc
|
|
||||||
|
|
||||||
try:
|
|
||||||
r = client.post("/api/benchmark/cancel")
|
|
||||||
assert r.status_code == 200
|
|
||||||
mock_proc.kill.assert_called_once()
|
|
||||||
finally:
|
|
||||||
api_module._running_procs.pop("benchmark", None)
|
|
||||||
api_module._cancelled_jobs.discard("benchmark")
|
|
||||||
|
|
||||||
|
|
||||||
def test_finetune_run_emits_cancelled_event(client):
|
def test_finetune_run_emits_cancelled_event(client):
|
||||||
"""GET /api/finetune/run must emit cancelled (not error) when job was cancelled."""
|
"""GET /api/finetune/run must emit cancelled (not error) when job was cancelled."""
|
||||||
|
|
@ -542,29 +499,3 @@ def test_finetune_run_emits_cancelled_event(client):
|
||||||
finally:
|
finally:
|
||||||
api_module._cancelled_jobs.discard("finetune")
|
api_module._cancelled_jobs.discard("finetune")
|
||||||
|
|
||||||
|
|
||||||
def test_benchmark_run_emits_cancelled_event(client):
|
|
||||||
"""GET /api/benchmark/run must emit cancelled (not error) when job was cancelled."""
|
|
||||||
from unittest.mock import patch, MagicMock
|
|
||||||
from app import api as api_module
|
|
||||||
|
|
||||||
mock_proc = MagicMock()
|
|
||||||
mock_proc.stdout = iter([])
|
|
||||||
mock_proc.returncode = -15
|
|
||||||
|
|
||||||
def mock_wait():
|
|
||||||
# Simulate cancel being called while the process is running (after discard clears stale flag)
|
|
||||||
api_module._cancelled_jobs.add("benchmark")
|
|
||||||
|
|
||||||
mock_proc.wait = mock_wait
|
|
||||||
|
|
||||||
def mock_popen(cmd, **kwargs):
|
|
||||||
return mock_proc
|
|
||||||
|
|
||||||
try:
|
|
||||||
with patch("app.api._subprocess.Popen",side_effect=mock_popen):
|
|
||||||
r = client.get("/api/benchmark/run")
|
|
||||||
assert '{"type": "cancelled"}' in r.text
|
|
||||||
assert '"type": "error"' not in r.text
|
|
||||||
finally:
|
|
||||||
api_module._cancelled_jobs.discard("benchmark")
|
|
||||||
|
|
|
||||||
|
|
@ -367,3 +367,13 @@ def test_run_passes_license_key_env_to_subprocess(client, config_dir, tmp_path,
|
||||||
client.get("/api/cforch/run")
|
client.get("/api/cforch/run")
|
||||||
|
|
||||||
assert captured_env.get("CF_LICENSE_KEY") == "CFG-AVCT-ENV-ONLY-KEY"
|
assert captured_env.get("CF_LICENSE_KEY") == "CFG-AVCT-ENV-ONLY-KEY"
|
||||||
|
|
||||||
|
|
||||||
|
def test_eval_cforch_router_includes_all_sub_routers():
|
||||||
|
"""eval/cforch.py router must include routes from all four sub-routers."""
|
||||||
|
from app.eval.cforch import router
|
||||||
|
paths = {r.path for r in router.routes}
|
||||||
|
assert any("/cforch/" in p for p in paths), f"no /cforch/ routes found in {paths}"
|
||||||
|
assert any("/style/" in p for p in paths), f"no /style/ routes found in {paths}"
|
||||||
|
assert any("/voice/" in p for p in paths), f"no /voice/ routes found in {paths}"
|
||||||
|
assert any("/plans-bench/" in p for p in paths), f"no /plans-bench/ routes found in {paths}"
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue