19 changed files with 27 additions and 3372 deletions
--- a/.env.example
+++ b/.env.example
@ -1,19 +0,0 @@
-# Avocet — environment variable configuration
-# Copy to .env and fill in values. All keys are optional.
-# label_tool.yaml takes precedence over env vars where both exist.
-
-# ── Local inference (Ollama) ───────────────────────────────────────────────────
-# OLLAMA_HOST defaults to http://localhost:11434 if unset.
-OLLAMA_HOST=http://localhost:11434
-OLLAMA_MODEL=llama3.2:3b
-
-# ── cf-orch coordinator (paid/premium tiers) ───────────────────────────────────
-# Required for multi-GPU LLM benchmarking via the cf-orch benchmark harness.
-# Free-tier users can leave these unset and use Ollama only.
-CF_ORCH_URL=http://localhost:7700
-CF_LICENSE_KEY=CFG-AVCT-xxxx-xxxx-xxxx
-
-# ── Cloud LLM backends (optional — paid/premium) ──────────────────────────────
-# Set one of these to use a cloud LLM instead of a local model.
-# ANTHROPIC_API_KEY=sk-ant-...
-# OPENAI_API_KEY=sk-...
--- a/app/api.py
+++ b/app/api.py
@ -149,12 +149,6 @@ from app.models import router as models_router
 import app.models as _models_module
 app.include_router(models_router, prefix="/api/models")

-from app.cforch import router as cforch_router
-app.include_router(cforch_router, prefix="/api/cforch")
-
-from app.imitate import router as imitate_router
-app.include_router(imitate_router, prefix="/api/imitate")
-
 # In-memory last-action store (single user, local tool — in-memory is fine)
 _last_action: dict | None = None

--- a/app/cforch.py
+++ b/app/cforch.py
@ -1,337 +0,0 @@
-"""Avocet — cf-orch benchmark integration API.
-
-Wraps cf-orch's benchmark.py script and exposes it via the Avocet API.
-Config is read from label_tool.yaml under the `cforch:` key.
-
-All endpoints are registered on `router` (a FastAPI APIRouter).
-api.py includes this router with prefix="/api/cforch".
-
-Module-level globals (_CONFIG_DIR, _BENCH_RUNNING, _bench_proc) follow the
-same testability pattern as sft.py — override _CONFIG_DIR via set_config_dir()
-in test fixtures.
-"""
-from __future__ import annotations
-
-import json
-import logging
-import os
-import re
-import subprocess as _subprocess
-from pathlib import Path
-from typing import Any
-
-import yaml
-from fastapi import APIRouter, HTTPException
-from fastapi.responses import StreamingResponse
-
-logger = logging.getLogger(__name__)
-
-_ROOT = Path(__file__).parent.parent
-_CONFIG_DIR: Path | None = None   # override in tests
-_BENCH_RUNNING: bool = False
-_bench_proc: Any = None            # live Popen object while benchmark runs
-
-router = APIRouter()
-
-
-# ── Testability seams ──────────────────────────────────────────────────────────
-
-def set_config_dir(path: Path | None) -> None:
-    global _CONFIG_DIR
-    _CONFIG_DIR = path
-
-
-# ── Internal helpers ───────────────────────────────────────────────────────────
-
-def _config_file() -> Path:
-    if _CONFIG_DIR is not None:
-        return _CONFIG_DIR / "label_tool.yaml"
-    return _ROOT / "config" / "label_tool.yaml"
-
-
-def _load_cforch_config() -> dict:
-    """Read label_tool.yaml cforch section, falling back to environment variables.
-
-    Priority (highest to lowest):
-      1. label_tool.yaml cforch: key
-      2. Environment variables (CF_ORCH_URL, CF_LICENSE_KEY, OLLAMA_HOST, OLLAMA_MODEL)
-    """
-    f = _config_file()
-    file_cfg: dict = {}
-    if f.exists():
-        try:
-            raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
-            file_cfg = raw.get("cforch", {}) or {}
-        except yaml.YAMLError as exc:
-            logger.warning("Failed to parse cforch config %s: %s", f, exc)
-
-    # Env var fallbacks — only used when the yaml key is absent or empty
-    def _coalesce(file_val: str, env_key: str) -> str:
-        return file_val if file_val else os.environ.get(env_key, "")
-
-    return {
-        **file_cfg,
-        "coordinator_url": _coalesce(file_cfg.get("coordinator_url", ""), "CF_ORCH_URL"),
-        "license_key":     _coalesce(file_cfg.get("license_key", ""),     "CF_LICENSE_KEY"),
-        "ollama_url":      _coalesce(file_cfg.get("ollama_url", ""),       "OLLAMA_HOST"),
-        "ollama_model":    _coalesce(file_cfg.get("ollama_model", ""),     "OLLAMA_MODEL"),
-    }
-
-
-def _strip_ansi(text: str) -> str:
-    """Remove ANSI escape codes from a string."""
-    return re.sub(r'\x1b\[[0-9;]*m', '', text)
-
-
-def _find_latest_summary(results_dir: str | None) -> Path | None:
-    """Find the newest summary.json under results_dir, or None if not found."""
-    if not results_dir:
-        return None
-    rdir = Path(results_dir)
-    if not rdir.exists():
-        return None
-    # Subdirs are named YYYY-MM-DD-HHMMSS; sort lexicographically for chronological order
-    subdirs = sorted(
-        [d for d in rdir.iterdir() if d.is_dir()],
-        key=lambda d: d.name,
-    )
-    for subdir in reversed(subdirs):
-        summary = subdir / "summary.json"
-        if summary.exists():
-            return summary
-    return None
-
-
-# ── GET /tasks ─────────────────────────────────────────────────────────────────
-
-@router.get("/tasks")
-def get_tasks() -> dict:
-    """Return task list from bench_tasks.yaml."""
-    cfg = _load_cforch_config()
-    tasks_path = cfg.get("bench_tasks", "")
-    if not tasks_path:
-        return {"tasks": [], "types": []}
-
-    p = Path(tasks_path)
-    if not p.exists():
-        return {"tasks": [], "types": []}
-
-    try:
-        raw = yaml.safe_load(p.read_text(encoding="utf-8")) or {}
-    except yaml.YAMLError as exc:
-        logger.warning("Failed to parse bench_tasks.yaml %s: %s", p, exc)
-        return {"tasks": [], "types": []}
-
-    tasks_raw = raw.get("tasks", []) or []
-    tasks: list[dict] = []
-    seen_types: list[str] = []
-    types_set: set[str] = set()
-
-    for t in tasks_raw:
-        if not isinstance(t, dict):
-            continue
-        tasks.append({
-            "id":     t.get("id", ""),
-            "name":   t.get("name", ""),
-            "type":   t.get("type", ""),
-            "prompt": (t.get("prompt") or "").strip(),
-            "system": (t.get("system") or "").strip(),
-        })
-        task_type = t.get("type", "")
-        if task_type and task_type not in types_set:
-            seen_types.append(task_type)
-            types_set.add(task_type)
-
-    return {"tasks": tasks, "types": seen_types}
-
-
-# ── GET /models ────────────────────────────────────────────────────────────────
-
-@router.get("/models")
-def get_models() -> dict:
-    """Return model list from bench_models.yaml."""
-    cfg = _load_cforch_config()
-    models_path = cfg.get("bench_models", "")
-    if not models_path:
-        return {"models": []}
-
-    p = Path(models_path)
-    if not p.exists():
-        return {"models": []}
-
-    try:
-        raw = yaml.safe_load(p.read_text(encoding="utf-8")) or {}
-    except yaml.YAMLError as exc:
-        logger.warning("Failed to parse bench_models.yaml %s: %s", p, exc)
-        return {"models": []}
-
-    models_raw = raw.get("models", []) or []
-    models: list[dict] = []
-    for m in models_raw:
-        if not isinstance(m, dict):
-            continue
-        models.append({
-            "name": m.get("name", ""),
-            "id": m.get("id", ""),
-            "service": m.get("service", "ollama"),
-            "tags": m.get("tags", []) or [],
-            "vram_estimate_mb": m.get("vram_estimate_mb", 0),
-        })
-
-    return {"models": models}
-
-
-# ── GET /run ───────────────────────────────────────────────────────────────────
-
-@router.get("/run")
-def run_benchmark(
-    task_ids: str = "",
-    model_tags: str = "",
-    coordinator_url: str = "",
-    ollama_url: str = "",
-) -> StreamingResponse:
-    """Spawn cf-orch benchmark.py and stream stdout as SSE progress events."""
-    global _BENCH_RUNNING, _bench_proc
-
-    if _BENCH_RUNNING:
-        raise HTTPException(409, "A benchmark is already running")
-
-    cfg = _load_cforch_config()
-    bench_script = cfg.get("bench_script", "")
-    bench_tasks = cfg.get("bench_tasks", "")
-    bench_models = cfg.get("bench_models", "")
-    results_dir = cfg.get("results_dir", "")
-    python_bin = cfg.get("python_bin", "/devl/miniconda3/envs/cf/bin/python")
-    cfg_coordinator = cfg.get("coordinator_url", "")
-    cfg_ollama      = cfg.get("ollama_url", "")
-    cfg_license_key = cfg.get("license_key", "")
-
-    def generate():
-        global _BENCH_RUNNING, _bench_proc
-
-        if not bench_script or not Path(bench_script).exists():
-            yield f"data: {json.dumps({'type': 'error', 'message': 'bench_script not configured or not found'})}\n\n"
-            return
-
-        cmd = [
-            python_bin,
-            bench_script,
-            "--tasks", bench_tasks,
-            "--models", bench_models,
-            "--output", results_dir,
-        ]
-
-        if task_ids:
-            cmd.extend(["--filter-tasks"] + task_ids.split(","))
-        if model_tags:
-            cmd.extend(["--filter-tags"] + model_tags.split(","))
-
-        # query param overrides config, config overrides env var (already resolved by _load_cforch_config)
-        effective_coordinator = coordinator_url if coordinator_url else cfg_coordinator
-        effective_ollama      = ollama_url      if ollama_url      else cfg_ollama
-        if effective_coordinator:
-            cmd.extend(["--coordinator", effective_coordinator])
-        if effective_ollama:
-            cmd.extend(["--ollama-url", effective_ollama])
-
-        # Pass license key as env var so subprocess can authenticate with cf-orch
-        proc_env = {**os.environ}
-        if cfg_license_key:
-            proc_env["CF_LICENSE_KEY"] = cfg_license_key
-
-        _BENCH_RUNNING = True
-        try:
-            proc = _subprocess.Popen(
-                cmd,
-                stdout=_subprocess.PIPE,
-                stderr=_subprocess.STDOUT,
-                text=True,
-                bufsize=1,
-                env=proc_env,
-            )
-            _bench_proc = proc
-            try:
-                for line in proc.stdout:
-                    line = _strip_ansi(line.rstrip())
-                    if line:
-                        yield f"data: {json.dumps({'type': 'progress', 'message': line})}\n\n"
-                proc.wait()
-                if proc.returncode == 0:
-                    summary_path = _find_latest_summary(results_dir)
-                    if summary_path is not None:
-                        try:
-                            summary = json.loads(summary_path.read_text(encoding="utf-8"))
-                            yield f"data: {json.dumps({'type': 'result', 'summary': summary})}\n\n"
-                        except Exception as exc:
-                            logger.warning("Failed to read summary.json: %s", exc)
-                    yield f"data: {json.dumps({'type': 'complete'})}\n\n"
-                else:
-                    yield f"data: {json.dumps({'type': 'error', 'message': f'Process exited with code {proc.returncode}'})}\n\n"
-            finally:
-                _bench_proc = None
-        except Exception as exc:
-            yield f"data: {json.dumps({'type': 'error', 'message': str(exc)})}\n\n"
-        finally:
-            _BENCH_RUNNING = False
-
-    return StreamingResponse(
-        generate(),
-        media_type="text/event-stream",
-        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
-    )
-
-
-# ── GET /config ────────────────────────────────────────────────────────────────
-
-@router.get("/config")
-def get_cforch_config() -> dict:
-    """Return resolved cf-orch connection config (env vars merged with yaml).
-
-    Redacts license_key — only returns whether it is set, not the value.
-    Used by the Settings UI to show current connection state.
-    """
-    cfg = _load_cforch_config()
-    return {
-        "coordinator_url": cfg.get("coordinator_url", ""),
-        "ollama_url":      cfg.get("ollama_url", ""),
-        "ollama_model":    cfg.get("ollama_model", ""),
-        "license_key_set": bool(cfg.get("license_key", "")),
-        "source": "env" if not _config_file().exists() else "yaml+env",
-    }
-
-
-# ── GET /results ───────────────────────────────────────────────────────────────
-
-@router.get("/results")
-def get_results() -> dict:
-    """Return the latest benchmark summary.json from results_dir."""
-    cfg = _load_cforch_config()
-    results_dir = cfg.get("results_dir", "")
-    summary_path = _find_latest_summary(results_dir)
-    if summary_path is None:
-        raise HTTPException(404, "No benchmark results found")
-    try:
-        return json.loads(summary_path.read_text(encoding="utf-8"))
-    except Exception as exc:
-        raise HTTPException(500, f"Failed to read summary.json: {exc}") from exc
-
-
-# ── POST /cancel ───────────────────────────────────────────────────────────────
-
-@router.post("/cancel")
-def cancel_benchmark() -> dict:
-    """Kill the running benchmark subprocess."""
-    global _BENCH_RUNNING, _bench_proc
-
-    if not _BENCH_RUNNING:
-        raise HTTPException(404, "No benchmark is currently running")
-
-    if _bench_proc is not None:
-        try:
-            _bench_proc.terminate()
-        except Exception as exc:
-            logger.warning("Failed to terminate benchmark process: %s", exc)
-
-    _BENCH_RUNNING = False
-    _bench_proc = None
-    return {"status": "cancelled"}
--- a/app/imitate.py
+++ b/app/imitate.py
@ -1,352 +0,0 @@
-"""Avocet — Imitate tab API.
-
-Fetches real samples from sibling CF product APIs, sends them through selected
-local LLMs (ollama), and streams responses back to the UI. Results can be
-pushed into the SFT corrections queue for human review.
-
-All endpoints registered on `router`. api.py includes this with prefix="/api/imitate".
-
-Module-level globals follow the same testability pattern as cforch.py and sft.py:
-override _CONFIG_DIR and _DATA_DIR via set_config_dir() / set_data_dir() in tests.
-"""
-from __future__ import annotations
-
-import json
-import logging
-import time
-import uuid
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any
-from urllib.error import URLError
-from urllib.request import Request, urlopen
-
-import yaml
-from fastapi import APIRouter, HTTPException
-from fastapi.responses import StreamingResponse
-from pydantic import BaseModel
-
-from app.utils import append_jsonl
-
-logger = logging.getLogger(__name__)
-
-_ROOT = Path(__file__).parent.parent
-_CONFIG_DIR: Path | None = None
-_DATA_DIR: Path = _ROOT / "data"
-
-router = APIRouter()
-
-
-# ── Testability seams ──────────────────────────────────────────────────────────
-
-def set_config_dir(path: Path | None) -> None:
-    global _CONFIG_DIR
-    _CONFIG_DIR = path
-
-
-def set_data_dir(path: Path) -> None:
-    global _DATA_DIR
-    _DATA_DIR = path
-
-
-# ── Internal helpers ───────────────────────────────────────────────────────────
-
-def _config_file() -> Path:
-    if _CONFIG_DIR is not None:
-        return _CONFIG_DIR / "label_tool.yaml"
-    return _ROOT / "config" / "label_tool.yaml"
-
-
-def _load_imitate_config() -> dict:
-    """Read label_tool.yaml and return the imitate sub-dict (or {} if absent)."""
-    f = _config_file()
-    if not f.exists():
-        return {}
-    try:
-        raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
-    except yaml.YAMLError as exc:
-        logger.warning("Failed to parse imitate config %s: %s", f, exc)
-        return {}
-    return raw.get("imitate", {}) or {}
-
-
-def _load_cforch_config() -> dict:
-    """Read cforch section for ollama_url fallback."""
-    f = _config_file()
-    if not f.exists():
-        return {}
-    try:
-        raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
-    except yaml.YAMLError as exc:
-        return {}
-    return raw.get("cforch", {}) or {}
-
-
-def _ollama_url(cfg: dict) -> str:
-    cforch = _load_cforch_config()
-    return cfg.get("ollama_url") or cforch.get("ollama_url") or "http://localhost:11434"
-
-
-def _http_get_json(url: str, timeout: int = 5) -> Any:
-    """Fetch JSON from url; raise URLError on failure."""
-    req = Request(url, headers={"Accept": "application/json"})
-    with urlopen(req, timeout=timeout) as resp:
-        return json.loads(resp.read().decode("utf-8"))
-
-
-def _is_online(base_url: str, health_path: str = "/api/health") -> bool:
-    """Return True if the product's health endpoint responds OK."""
-    try:
-        data = _http_get_json(f"{base_url.rstrip('/')}{health_path}", timeout=2)
-        return bool(data)
-    except Exception:
-        return False
-
-
-def _extract_sample(
-    raw: Any, text_fields: list[str], sample_index: int = 0
-) -> dict[str, Any]:
-    """Pull one item from a list or dict response and extract text_fields."""
-    item: dict[str, Any]
-    if isinstance(raw, list):
-        if not raw:
-            return {}
-        item = raw[min(sample_index, len(raw) - 1)]
-    elif isinstance(raw, dict):
-        # may be {items: [...]} or the item itself
-        for key in ("items", "results", "data", "jobs", "listings", "pantry",
-                    "saved_searches", "entries", "calls", "records"):
-            if key in raw and isinstance(raw[key], list):
-                lst = raw[key]
-                item = lst[min(sample_index, len(lst) - 1)] if lst else {}
-                break
-        else:
-            item = raw
-    else:
-        return {}
-
-    parts = []
-    for field in text_fields:
-        val = item.get(field)
-        if val and str(val).strip():
-            parts.append(f"**{field}**: {val}")
-    return {"item": item, "text": "\n\n".join(parts)}
-
-
-def _candidates_file() -> Path:
-    return _DATA_DIR / "sft_candidates.jsonl"
-
-
-def _sse(data: dict) -> str:
-    return f"data: {json.dumps(data)}\n\n"
-
-
-def _run_ollama_streaming(
-    ollama_base: str,
-    model_id: str,
-    prompt: str,
-    temperature: float,
-) -> tuple[str, int]:
-    """Call ollama /api/generate with stream=True; return (full_response, elapsed_ms).
-
-    Blocks until the model finishes; yields nothing — streaming is handled by
-    the SSE generator in run_imitate().
-    """
-    url = f"{ollama_base.rstrip('/')}/api/generate"
-    payload = json.dumps({
-        "model": model_id,
-        "prompt": prompt,
-        "stream": False,
-        "options": {"temperature": temperature},
-    }).encode("utf-8")
-    req = Request(url, data=payload, method="POST",
-                  headers={"Content-Type": "application/json"})
-    t0 = time.time()
-    try:
-        with urlopen(req, timeout=120) as resp:
-            body = json.loads(resp.read().decode("utf-8"))
-        elapsed = int((time.time() - t0) * 1000)
-        return body.get("response", ""), elapsed
-    except Exception as exc:
-        elapsed = int((time.time() - t0) * 1000)
-        raise RuntimeError(str(exc)) from exc
-
-
-# ── GET /products ──────────────────────────────────────────────────────────────
-
-@router.get("/products")
-def get_products() -> dict:
-    """List configured CF products with live online status."""
-    cfg = _load_imitate_config()
-    products_raw = cfg.get("products", []) or []
-    products = []
-    for p in products_raw:
-        if not isinstance(p, dict):
-            continue
-        base_url = p.get("base_url", "")
-        products.append({
-            "id":          p.get("id", ""),
-            "name":        p.get("name", ""),
-            "icon":        p.get("icon", "📦"),
-            "description": p.get("description", ""),
-            "base_url":    base_url,
-            "online":      _is_online(base_url, p.get("health_path", "/api/health")) if base_url else False,
-        })
-    return {"products": products}
-
-
-# ── GET /products/{product_id}/sample ─────────────────────────────────────────
-
-@router.get("/products/{product_id}/sample")
-def get_sample(product_id: str, index: int = 0) -> dict:
-    """Fetch a real sample from the given product's API."""
-    cfg = _load_imitate_config()
-    products_raw = cfg.get("products", []) or []
-
-    product: dict | None = None
-    for p in products_raw:
-        if isinstance(p, dict) and p.get("id") == product_id:
-            product = p
-            break
-
-    if product is None:
-        raise HTTPException(404, f"Product '{product_id}' not in config")
-
-    base_url = product.get("base_url", "").rstrip("/")
-    endpoint = product.get("sample_endpoint", "")
-    if not base_url or not endpoint:
-        raise HTTPException(422, "Product missing base_url or sample_endpoint")
-
-    url = f"{base_url}{endpoint}"
-    try:
-        raw = _http_get_json(url, timeout=5)
-    except URLError as exc:
-        raise HTTPException(503, f"Product API unreachable: {exc}") from exc
-    except Exception as exc:
-        raise HTTPException(502, f"Bad response from product API: {exc}") from exc
-
-    text_fields = product.get("text_fields", []) or []
-    extracted = _extract_sample(raw, text_fields, index)
-    if not extracted:
-        raise HTTPException(404, "No sample items returned by product API")
-
-    prompt_template = product.get("prompt_template", "{text}")
-    prompt = prompt_template.replace("{text}", extracted["text"])
-
-    return {
-        "product_id":    product_id,
-        "sample_index":  index,
-        "text":          extracted["text"],
-        "prompt":        prompt,
-        "raw_item":      extracted.get("item", {}),
-    }
-
-
-# ── GET /run (SSE) ─────────────────────────────────────────────────────────────
-
-@router.get("/run")
-def run_imitate(
-    prompt: str = "",
-    model_ids: str = "",      # comma-separated ollama model IDs
-    temperature: float = 0.7,
-    product_id: str = "",
-) -> StreamingResponse:
-    """Run a prompt through selected ollama models and stream results as SSE."""
-
-    if not prompt.strip():
-        raise HTTPException(422, "prompt is required")
-
-    ids = [m.strip() for m in model_ids.split(",") if m.strip()]
-    if not ids:
-        raise HTTPException(422, "model_ids is required")
-
-    cfg = _load_imitate_config()
-    ollama_base = _ollama_url(cfg)
-
-    def generate():
-        results: list[dict] = []
-        yield _sse({"type": "start", "total_models": len(ids)})
-
-        for model_id in ids:
-            yield _sse({"type": "model_start", "model": model_id})
-            try:
-                response, elapsed_ms = _run_ollama_streaming(
-                    ollama_base, model_id, prompt, temperature
-                )
-                result = {
-                    "model":      model_id,
-                    "response":   response,
-                    "elapsed_ms": elapsed_ms,
-                    "error":      None,
-                }
-            except Exception as exc:
-                result = {
-                    "model":      model_id,
-                    "response":   "",
-                    "elapsed_ms": 0,
-                    "error":      str(exc),
-                }
-            results.append(result)
-            yield _sse({"type": "model_done", **result})
-
-        yield _sse({"type": "complete", "results": results})
-
-    return StreamingResponse(
-        generate(),
-        media_type="text/event-stream",
-        headers={
-            "Cache-Control": "no-cache",
-            "X-Accel-Buffering": "no",
-        },
-    )
-
-
-# ── POST /push-corrections ─────────────────────────────────────────────────────
-
-class ImitateResult(BaseModel):
-    model: str
-    response: str
-    elapsed_ms: int
-    error: str | None = None
-
-
-class PushCorrectionsRequest(BaseModel):
-    product_id: str
-    prompt: str
-    results: list[ImitateResult]
-
-
-@router.post("/push-corrections")
-def push_corrections(req: PushCorrectionsRequest) -> dict:
-    """Append imitate results to sft_candidates.jsonl for human review."""
-    if not req.prompt.strip():
-        raise HTTPException(422, "prompt is required")
-    if not req.results:
-        raise HTTPException(422, "results list is empty")
-
-    ts = datetime.now(timezone.utc).isoformat()
-    records = []
-    for r in req.results:
-        if r.error or not r.response.strip():
-            continue
-        records.append({
-            "id":             str(uuid.uuid4()),
-            "source":         "imitate",
-            "product_id":     req.product_id,
-            "prompt_messages": [{"role": "user", "content": req.prompt}],
-            "model_response": r.response,
-            "model_id":       r.model,
-            "elapsed_ms":     r.elapsed_ms,
-            "status":         "pending",
-            "created_at":     ts,
-        })
-
-    if not records:
-        raise HTTPException(422, "No non-error results to push")
-
-    dest = _candidates_file()
-    dest.parent.mkdir(parents=True, exist_ok=True)
-    for record in records:
-        append_jsonl(dest, record)
-
-    return {"pushed": len(records)}
--- a/app/models.py
+++ b/app/models.py
@ -200,26 +200,8 @@ def lookup_model(repo_id: str) -> dict:
    data = resp.json()
    pipeline_tag = data.get("pipeline_tag")
    adapter_recommendation = _TAG_TO_ADAPTER.get(pipeline_tag) if pipeline_tag else None
-
-    # Determine compatibility and surface a human-readable warning
-    _supported = ", ".join(sorted(_TAG_TO_ADAPTER.keys()))
-    if adapter_recommendation is not None:
-        compatible = True
-        warning: str | None = None
-    elif pipeline_tag is None:
-        compatible = False
-        warning = (
-            "This model has no task tag on HuggingFace — adapter type is unknown. "
-            "It may not work with Avocet's email classification pipeline."
-        )
-        logger.warning("No pipeline_tag for %s — no adapter recommendation", repo_id)
-    else:
-        compatible = False
-        warning = (
-            f"\"{pipeline_tag}\" models are not supported by Avocet's email classification adapters. "
-            f"Supported task types: {_supported}."
-        )
-        logger.warning("Unsupported pipeline_tag %r for %s", pipeline_tag, repo_id)
+    if pipeline_tag and adapter_recommendation is None:
+        logger.warning("Unknown pipeline_tag %r for %s — no adapter recommendation", pipeline_tag, repo_id)

    # Estimate model size from siblings list
    siblings = data.get("siblings") or []
@ -234,8 +216,6 @@ def lookup_model(repo_id: str) -> dict:
        "repo_id": repo_id,
        "pipeline_tag": pipeline_tag,
        "adapter_recommendation": adapter_recommendation,
-        "compatible": compatible,
-        "warning": warning,
        "model_size_bytes": model_size_bytes,
        "description": description,
        "tags": data.get("tags") or [],
--- a/app/sft.py
+++ b/app/sft.py
@ -51,26 +51,17 @@ def _config_file() -> Path:
    return _ROOT / "config" / "label_tool.yaml"


-_DEFAULT_BENCH_RESULTS_DIR = "/Library/Development/CircuitForge/circuitforge-orch/scripts/bench_results"
-
-
-def set_default_bench_results_dir(path: str) -> None:
-    """Override the default bench_results_dir — used by tests to avoid real filesystem."""
-    global _DEFAULT_BENCH_RESULTS_DIR
-    _DEFAULT_BENCH_RESULTS_DIR = path
-
-
 def _get_bench_results_dir() -> Path:
    f = _config_file()
-    if f.exists():
-        try:
-            raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
-            d = raw.get("sft", {}).get("bench_results_dir", "")
-            if d:
-                return Path(d)
-        except yaml.YAMLError as exc:
-            logger.warning("Failed to parse SFT config %s: %s", f, exc)
-    return Path(_DEFAULT_BENCH_RESULTS_DIR)
+    if not f.exists():
+        return Path("/nonexistent-bench-results")
+    try:
+        raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
+    except yaml.YAMLError as exc:
+        logger.warning("Failed to parse SFT config %s: %s", f, exc)
+        return Path("/nonexistent-bench-results")
+    d = raw.get("sft", {}).get("bench_results_dir", "")
+    return Path(d) if d else Path("/nonexistent-bench-results")


 def _candidates_file() -> Path:
--- a/config/label_tool.yaml.example
+++ b/config/label_tool.yaml.example
@ -26,66 +26,3 @@ max_per_account: 500
 # produced by circuitforge-orch's benchmark harness.
 sft:
  bench_results_dir: /path/to/circuitforge-orch/scripts/bench_results
-
-# cf-orch integration — LLM benchmark harness via cf-orch coordinator.
-# All keys here override the corresponding environment variables.
-# Omit any key to fall back to the env var (see .env.example).
-cforch:
-  # Path to cf-orch's benchmark.py script
-  bench_script: /path/to/circuitforge-orch/scripts/benchmark.py
-  # Task and model definition files (yaml)
-  bench_tasks:  /path/to/circuitforge-orch/scripts/bench_tasks.yaml
-  bench_models: /path/to/circuitforge-orch/scripts/bench_models.yaml
-  # Where benchmark results are written (also used for SFT candidate discovery)
-  results_dir:  /path/to/circuitforge-orch/scripts/bench_results
-  # Python interpreter with cf-orch installed
-  python_bin:   /devl/miniconda3/envs/cf/bin/python
-
-  # Connection config — override env vars CF_ORCH_URL / CF_LICENSE_KEY / OLLAMA_HOST
-  # coordinator_url: http://localhost:7700
-  # license_key:     CFG-AVCT-xxxx-xxxx-xxxx
-  # ollama_url:      http://localhost:11434
-  # ollama_model:    llama3.2:3b
-
-# Imitate tab — pull real samples from sibling CF product APIs and run them
-# through local LLMs to build a corrections dataset.
-# ollama_url defaults to cforch.ollama_url if omitted here.
-imitate:
-  ollama_url: http://localhost:11434   # optional — falls back to cforch.ollama_url
-
-  products:
-    - id: peregrine
-      name: Peregrine
-      icon: "🦅"
-      description: Job search assistant
-      base_url: http://localhost:8502
-      sample_endpoint: /api/jobs
-      text_fields: [title, description]
-      prompt_template: "Analyze this job listing and identify key requirements:\n\n{text}"
-
-    - id: kiwi
-      name: Kiwi
-      icon: "🥝"
-      description: Pantry tracker
-      base_url: http://localhost:8511
-      sample_endpoint: /api/inventory
-      text_fields: [name, category, notes]
-      prompt_template: "Describe this pantry item and estimate how best to use it:\n\n{text}"
-
-    - id: snipe
-      name: Snipe
-      icon: "🎯"
-      description: eBay trust scoring
-      base_url: http://localhost:8509
-      sample_endpoint: /api/listings
-      text_fields: [title, description, seller_info]
-      prompt_template: "Evaluate the trustworthiness of this listing and flag any red flags:\n\n{text}"
-
-    - id: osprey
-      name: Osprey
-      icon: "📞"
-      description: Gov't hold-line automation
-      base_url: http://localhost:8520
-      sample_endpoint: /api/calls/recent
-      text_fields: [agency, issue, notes]
-      prompt_template: "Draft a concise summary of this government call record:\n\n{text}"
--- a/environment.yml
+++ b/environment.yml
@ -22,8 +22,5 @@ dependencies:
    # Optional: BGE reranker adapter
    # - FlagEmbedding

-    # CircuitForge shared core (LLM router, tier system, config)
-    - circuitforge-core>=0.9.0
-
    # Dev
    - pytest>=8.0
--- a/tests/test_cforch.py
+++ b/tests/test_cforch.py
@ -1,369 +0,0 @@
-"""Tests for app/cforch.py — /api/cforch/* endpoints."""
-from __future__ import annotations
-
-import json
-from pathlib import Path
-from unittest.mock import MagicMock, patch
-
-import pytest
-import yaml
-from fastapi.testclient import TestClient
-
-
-# ── Fixtures ───────────────────────────────────────────────────────────────────
-
-@pytest.fixture(autouse=True)
-def reset_cforch_globals(tmp_path):
-    """Redirect _CONFIG_DIR to tmp_path and reset running-state globals."""
-    from app import cforch as cforch_module
-
-    prev_config_dir = cforch_module._CONFIG_DIR
-    prev_running = cforch_module._BENCH_RUNNING
-    prev_proc = cforch_module._bench_proc
-
-    cforch_module.set_config_dir(tmp_path)
-    cforch_module._BENCH_RUNNING = False
-    cforch_module._bench_proc = None
-
-    yield tmp_path
-
-    cforch_module.set_config_dir(prev_config_dir)
-    cforch_module._BENCH_RUNNING = prev_running
-    cforch_module._bench_proc = prev_proc
-
-
-@pytest.fixture
-def client():
-    from app.api import app
-    return TestClient(app)
-
-
-@pytest.fixture
-def config_dir(reset_cforch_globals):
-    """Return the tmp config dir (already set as _CONFIG_DIR)."""
-    return reset_cforch_globals
-
-
-def _write_config(config_dir: Path, cforch_cfg: dict) -> None:
-    """Write a label_tool.yaml with the given cforch block into config_dir."""
-    cfg = {"cforch": cforch_cfg}
-    (config_dir / "label_tool.yaml").write_text(
-        yaml.dump(cfg), encoding="utf-8"
-    )
-
-
-def _write_tasks_yaml(path: Path, tasks: list[dict]) -> None:
-    path.write_text(yaml.dump({"tasks": tasks}), encoding="utf-8")
-
-
-def _write_models_yaml(path: Path, models: list[dict]) -> None:
-    path.write_text(yaml.dump({"models": models}), encoding="utf-8")
-
-
-# ── GET /tasks ─────────────────────────────────────────────────────────────────
-
-def test_tasks_returns_empty_when_not_configured(client):
-    """No config file present — endpoint returns empty lists."""
-    r = client.get("/api/cforch/tasks")
-    assert r.status_code == 200
-    data = r.json()
-    assert data == {"tasks": [], "types": []}
-
-
-def test_tasks_parses_yaml(client, config_dir, tmp_path):
-    tasks_file = tmp_path / "bench_tasks.yaml"
-    _write_tasks_yaml(tasks_file, [
-        {"id": "t1", "name": "Task One", "type": "instruction"},
-        {"id": "t2", "name": "Task Two", "type": "reasoning"},
-    ])
-    _write_config(config_dir, {"bench_tasks": str(tasks_file)})
-
-    r = client.get("/api/cforch/tasks")
-    assert r.status_code == 200
-    data = r.json()
-    assert len(data["tasks"]) == 2
-    # TaskEntry now includes optional prompt/system fields (default "")
-    t1 = data["tasks"][0]
-    assert t1["id"] == "t1" and t1["name"] == "Task One" and t1["type"] == "instruction"
-    t2 = data["tasks"][1]
-    assert t2["id"] == "t2" and t2["name"] == "Task Two" and t2["type"] == "reasoning"
-    assert "instruction" in data["types"]
-    assert "reasoning" in data["types"]
-
-
-def test_tasks_returns_types_deduplicated(client, config_dir, tmp_path):
-    """Multiple tasks sharing a type — types list must not duplicate."""
-    tasks_file = tmp_path / "bench_tasks.yaml"
-    _write_tasks_yaml(tasks_file, [
-        {"id": "t1", "name": "A", "type": "instruction"},
-        {"id": "t2", "name": "B", "type": "instruction"},
-        {"id": "t3", "name": "C", "type": "reasoning"},
-    ])
-    _write_config(config_dir, {"bench_tasks": str(tasks_file)})
-
-    r = client.get("/api/cforch/tasks")
-    data = r.json()
-    assert data["types"].count("instruction") == 1
-    assert len(data["types"]) == 2
-
-
-# ── GET /models ────────────────────────────────────────────────────────────────
-
-def test_models_returns_empty_when_not_configured(client):
-    """No config file present — endpoint returns empty model list."""
-    r = client.get("/api/cforch/models")
-    assert r.status_code == 200
-    assert r.json() == {"models": []}
-
-
-def test_models_parses_bench_models_yaml(client, config_dir, tmp_path):
-    models_file = tmp_path / "bench_models.yaml"
-    _write_models_yaml(models_file, [
-        {
-            "name": "llama3",
-            "id": "llama3:8b",
-            "service": "ollama",
-            "tags": ["fast", "small"],
-            "vram_estimate_mb": 6000,
-        }
-    ])
-    _write_config(config_dir, {"bench_models": str(models_file)})
-
-    r = client.get("/api/cforch/models")
-    assert r.status_code == 200
-    data = r.json()
-    assert len(data["models"]) == 1
-    m = data["models"][0]
-    assert m["name"] == "llama3"
-    assert m["id"] == "llama3:8b"
-    assert m["service"] == "ollama"
-    assert m["tags"] == ["fast", "small"]
-    assert m["vram_estimate_mb"] == 6000
-
-
-# ── GET /run ───────────────────────────────────────────────────────────────────
-
-def test_run_returns_409_when_already_running(client):
-    """If _BENCH_RUNNING is True, GET /run returns 409."""
-    from app import cforch as cforch_module
-    cforch_module._BENCH_RUNNING = True
-
-    r = client.get("/api/cforch/run")
-    assert r.status_code == 409
-
-
-def test_run_returns_error_when_bench_script_not_configured(client):
-    """No config at all — SSE stream contains an error event."""
-    r = client.get("/api/cforch/run")
-    assert r.status_code == 200
-    assert '"type": "error"' in r.text
-    assert "bench_script not configured" in r.text
-
-
-def test_run_streams_progress_events(client, config_dir, tmp_path):
-    """Mock subprocess — SSE stream emits progress events from stdout."""
-    bench_script = tmp_path / "fake_benchmark.py"
-    bench_script.write_text("# fake", encoding="utf-8")
-
-    tasks_file = tmp_path / "bench_tasks.yaml"
-    tasks_file.write_text(yaml.dump({"tasks": []}), encoding="utf-8")
-    models_file = tmp_path / "bench_models.yaml"
-    models_file.write_text(yaml.dump({"models": []}), encoding="utf-8")
-    results_dir = tmp_path / "results"
-    results_dir.mkdir()
-
-    _write_config(config_dir, {
-        "bench_script": str(bench_script),
-        "bench_tasks": str(tasks_file),
-        "bench_models": str(models_file),
-        "results_dir": str(results_dir),
-        "python_bin": "/usr/bin/python3",
-    })
-
-    mock_proc = MagicMock()
-    mock_proc.stdout = iter(["Running task 1\n", "Running task 2\n"])
-    mock_proc.returncode = 1  # non-zero so we don't need summary.json
-
-    def mock_wait():
-        pass
-
-    mock_proc.wait = mock_wait
-
-    with patch("app.cforch._subprocess.Popen", return_value=mock_proc):
-        r = client.get("/api/cforch/run")
-
-    assert r.status_code == 200
-    assert '"type": "progress"' in r.text
-    assert "Running task 1" in r.text
-    assert "Running task 2" in r.text
-
-
-def test_run_emits_result_on_success(client, config_dir, tmp_path):
-    """Mock subprocess exit 0 + write fake summary.json — stream emits result event."""
-    bench_script = tmp_path / "fake_benchmark.py"
-    bench_script.write_text("# fake", encoding="utf-8")
-
-    tasks_file = tmp_path / "bench_tasks.yaml"
-    tasks_file.write_text(yaml.dump({"tasks": []}), encoding="utf-8")
-    models_file = tmp_path / "bench_models.yaml"
-    models_file.write_text(yaml.dump({"models": []}), encoding="utf-8")
-
-    results_dir = tmp_path / "results"
-    run_dir = results_dir / "2026-04-08-120000"
-    run_dir.mkdir(parents=True)
-    summary_data = {"score": 0.92, "models_evaluated": 3}
-    (run_dir / "summary.json").write_text(json.dumps(summary_data), encoding="utf-8")
-
-    _write_config(config_dir, {
-        "bench_script": str(bench_script),
-        "bench_tasks": str(tasks_file),
-        "bench_models": str(models_file),
-        "results_dir": str(results_dir),
-        "python_bin": "/usr/bin/python3",
-    })
-
-    mock_proc = MagicMock()
-    mock_proc.stdout = iter([])
-    mock_proc.returncode = 0
-    mock_proc.wait = MagicMock()
-
-    with patch("app.cforch._subprocess.Popen", return_value=mock_proc):
-        r = client.get("/api/cforch/run")
-
-    assert r.status_code == 200
-    assert '"type": "result"' in r.text
-    assert '"score": 0.92' in r.text
-    assert '"type": "complete"' in r.text
-
-
-# ── GET /results ───────────────────────────────────────────────────────────────
-
-def test_results_returns_404_when_no_results(client):
-    """No results_dir configured — endpoint returns 404."""
-    r = client.get("/api/cforch/results")
-    assert r.status_code == 404
-
-
-def test_results_returns_latest_summary(client, config_dir, tmp_path):
-    """Write fake results dir with one subdir containing summary.json."""
-    results_dir = tmp_path / "results"
-    run_dir = results_dir / "2026-04-08-150000"
-    run_dir.mkdir(parents=True)
-    summary_data = {"score": 0.88, "run": "test"}
-    (run_dir / "summary.json").write_text(json.dumps(summary_data), encoding="utf-8")
-
-    _write_config(config_dir, {"results_dir": str(results_dir)})
-
-    r = client.get("/api/cforch/results")
-    assert r.status_code == 200
-    data = r.json()
-    assert data["score"] == 0.88
-    assert data["run"] == "test"
-
-
-# ── POST /cancel ───────────────────────────────────────────────────────────────
-
-def test_cancel_returns_404_when_not_running(client):
-    """POST /cancel when no benchmark running — returns 404."""
-    r = client.post("/api/cforch/cancel")
-    assert r.status_code == 404
-
-
-def test_cancel_terminates_running_benchmark(client):
-    """POST /cancel when benchmark is running — terminates proc and returns cancelled."""
-    from app import cforch as cforch_module
-
-    mock_proc = MagicMock()
-    cforch_module._BENCH_RUNNING = True
-    cforch_module._bench_proc = mock_proc
-
-    r = client.post("/api/cforch/cancel")
-    assert r.status_code == 200
-    assert r.json() == {"status": "cancelled"}
-    mock_proc.terminate.assert_called_once()
-    assert cforch_module._BENCH_RUNNING is False
-    assert cforch_module._bench_proc is None
-
-
-# ── GET /config ────────────────────────────────────────────────────────────────
-
-def test_config_returns_empty_when_no_yaml_no_env(client, monkeypatch):
-    """No yaml, no env vars — all fields empty, license_key_set False."""
-    for key in ("CF_ORCH_URL", "CF_LICENSE_KEY", "OLLAMA_HOST", "OLLAMA_MODEL"):
-        monkeypatch.delenv(key, raising=False)
-
-    r = client.get("/api/cforch/config")
-    assert r.status_code == 200
-    data = r.json()
-    assert data["coordinator_url"] == ""
-    assert data["ollama_url"] == ""
-    assert data["license_key_set"] is False
-
-
-def test_config_reads_env_vars_when_no_yaml(client, monkeypatch):
-    """Env vars populate fields when label_tool.yaml has no cforch section."""
-    monkeypatch.setenv("CF_ORCH_URL",      "http://orch.example.com:7700")
-    monkeypatch.setenv("CF_LICENSE_KEY",   "CFG-AVCT-TEST-TEST-TEST")
-    monkeypatch.setenv("OLLAMA_HOST",      "http://ollama.local:11434")
-    monkeypatch.setenv("OLLAMA_MODEL",     "mistral:7b")
-
-    r = client.get("/api/cforch/config")
-    assert r.status_code == 200
-    data = r.json()
-    assert data["coordinator_url"] == "http://orch.example.com:7700"
-    assert data["ollama_url"]      == "http://ollama.local:11434"
-    assert data["ollama_model"]    == "mistral:7b"
-    assert data["license_key_set"] is True   # set, but value not exposed
-
-
-def test_config_yaml_overrides_env(client, config_dir, monkeypatch):
-    """label_tool.yaml cforch values take priority over env vars."""
-    monkeypatch.setenv("CF_ORCH_URL",  "http://env-orch:7700")
-    monkeypatch.setenv("OLLAMA_HOST",  "http://env-ollama:11434")
-
-    _write_config(config_dir, {
-        "coordinator_url": "http://yaml-orch:7700",
-        "ollama_url":      "http://yaml-ollama:11434",
-    })
-
-    r = client.get("/api/cforch/config")
-    assert r.status_code == 200
-    data = r.json()
-    assert data["coordinator_url"] == "http://yaml-orch:7700"
-    assert data["ollama_url"]      == "http://yaml-ollama:11434"
-    assert data["source"] == "yaml+env"
-
-
-def test_run_passes_license_key_env_to_subprocess(client, config_dir, tmp_path, monkeypatch):
-    """CF_LICENSE_KEY must be forwarded to the benchmark subprocess env."""
-    monkeypatch.setenv("CF_LICENSE_KEY", "CFG-AVCT-ENV-ONLY-KEY")
-
-    bench_script = tmp_path / "benchmark.py"
-    bench_script.write_text("# stub", encoding="utf-8")
-    tasks_file   = tmp_path / "bench_tasks.yaml"
-    tasks_file.write_text(yaml.dump({"tasks": []}), encoding="utf-8")
-    models_file  = tmp_path / "bench_models.yaml"
-    models_file.write_text(yaml.dump({"models": []}), encoding="utf-8")
-
-    _write_config(config_dir, {
-        "bench_script":  str(bench_script),
-        "bench_tasks":   str(tasks_file),
-        "bench_models":  str(models_file),
-        "results_dir":   str(tmp_path / "results"),
-        "python_bin":    "/usr/bin/python3",
-    })
-
-    captured_env: dict = {}
-
-    def fake_popen(cmd, **kwargs):
-        captured_env.update(kwargs.get("env", {}))
-        mock = MagicMock()
-        mock.stdout = iter([])
-        mock.returncode = 0
-        mock.wait = MagicMock()
-        return mock
-
-    with patch("app.cforch._subprocess.Popen", side_effect=fake_popen):
-        client.get("/api/cforch/run")
-
-    assert captured_env.get("CF_LICENSE_KEY") == "CFG-AVCT-ENV-ONLY-KEY"
--- a/tests/test_imitate.py
+++ b/tests/test_imitate.py
@ -1,242 +0,0 @@
-"""Tests for app/imitate.py — product registry, sample extraction, corrections push."""
-from __future__ import annotations
-
-import json
-from pathlib import Path
-from unittest.mock import MagicMock, patch
-
-import pytest
-from fastapi.testclient import TestClient
-
-from app.api import app
-from app import imitate as _imitate_module
-
-
-# ── Fixtures ───────────────────────────────────────────────────────────────────
-
-@pytest.fixture(autouse=True)
-def reset_module_globals(tmp_path):
-    """Reset module-level config + data dir globals after each test."""
-    orig_cfg  = _imitate_module._CONFIG_DIR
-    orig_data = _imitate_module._DATA_DIR
-    yield
-    _imitate_module._CONFIG_DIR = orig_cfg
-    _imitate_module._DATA_DIR   = orig_data
-
-
-@pytest.fixture()
-def config_dir(tmp_path) -> Path:
-    _imitate_module.set_config_dir(tmp_path)
-    return tmp_path
-
-
-@pytest.fixture()
-def data_dir(tmp_path) -> Path:
-    _imitate_module.set_data_dir(tmp_path)
-    return tmp_path
-
-
-@pytest.fixture()
-def cfg_with_products(config_dir: Path) -> Path:
-    """Write a label_tool.yaml with two products."""
-    (config_dir / "label_tool.yaml").write_text(
-        """
-imitate:
-  ollama_url: http://localhost:11434
-  products:
-    - id: peregrine
-      name: Peregrine
-      icon: "🦅"
-      description: Job search assistant
-      base_url: http://peregrine.local
-      sample_endpoint: /api/jobs
-      text_fields: [title, description]
-      prompt_template: "Analyze: {text}"
-    - id: kiwi
-      name: Kiwi
-      icon: "🥝"
-      description: Pantry tracker
-      base_url: http://kiwi.local
-      sample_endpoint: /api/inventory
-      text_fields: [name, notes]
-      prompt_template: "Describe: {text}"
-"""
-    )
-    return config_dir
-
-
-@pytest.fixture()
-def client() -> TestClient:
-    return TestClient(app, raise_server_exceptions=True)
-
-
-# ── GET /products ──────────────────────────────────────────────────────────────
-
-def test_products_empty_when_no_config(config_dir, client):
-    """Returns empty list when label_tool.yaml has no imitate section."""
-    (config_dir / "label_tool.yaml").write_text("accounts: []\n")
-    resp = client.get("/api/imitate/products")
-    assert resp.status_code == 200
-    assert resp.json()["products"] == []
-
-
-def test_products_listed(cfg_with_products, client):
-    """All configured products are returned with expected fields."""
-    with patch.object(_imitate_module, "_is_online", return_value=True):
-        resp = client.get("/api/imitate/products")
-    assert resp.status_code == 200
-    products = resp.json()["products"]
-    assert len(products) == 2
-    ids = {p["id"] for p in products}
-    assert ids == {"peregrine", "kiwi"}
-    peregrine = next(p for p in products if p["id"] == "peregrine")
-    assert peregrine["name"] == "Peregrine"
-    assert peregrine["icon"] == "🦅"
-    assert peregrine["online"] is True
-
-
-def test_products_offline_when_unreachable(cfg_with_products, client):
-    """Products with unreachable base_url are marked offline."""
-    with patch.object(_imitate_module, "_is_online", return_value=False):
-        resp = client.get("/api/imitate/products")
-    assert all(not p["online"] for p in resp.json()["products"])
-
-
-# ── GET /products/{id}/sample ─────────────────────────────────────────────────
-
-def test_sample_unknown_product(cfg_with_products, client):
-    """Returns 404 for a product id not in config."""
-    resp = client.get("/api/imitate/products/nonexistent/sample")
-    assert resp.status_code == 404
-
-
-def test_sample_fetched_from_list(cfg_with_products, client):
-    """Extracts first item from a list API response."""
-    fake_api = [
-        {"title": "Engineer", "description": "Build things"},
-        {"title": "Other",    "description": "Ignore me"},
-    ]
-    with patch.object(_imitate_module, "_http_get_json", return_value=fake_api):
-        resp = client.get("/api/imitate/products/peregrine/sample")
-    assert resp.status_code == 200
-    body = resp.json()
-    assert "Engineer" in body["text"]
-    assert "Build things" in body["text"]
-    assert "Analyze:" in body["prompt"]
-
-
-def test_sample_fetched_from_dict_with_items_key(cfg_with_products, client):
-    """Extracts from a wrapper dict with a recognised list key."""
-    fake_api = {"items": [{"title": "Wrapped Job", "description": "In a wrapper"}]}
-    with patch.object(_imitate_module, "_http_get_json", return_value=fake_api):
-        resp = client.get("/api/imitate/products/peregrine/sample")
-    assert resp.status_code == 200
-    assert "Wrapped Job" in resp.json()["text"]
-
-
-def test_sample_503_when_api_unreachable(cfg_with_products, client):
-    """Returns 503 when the product API is not reachable."""
-    from urllib.error import URLError
-    with patch.object(_imitate_module, "_http_get_json", side_effect=URLError("refused")):
-        resp = client.get("/api/imitate/products/peregrine/sample")
-    assert resp.status_code == 503
-
-
-def test_sample_404_on_empty_list(cfg_with_products, client):
-    """Returns 404 when product API returns an empty list."""
-    with patch.object(_imitate_module, "_http_get_json", return_value=[]):
-        resp = client.get("/api/imitate/products/peregrine/sample")
-    assert resp.status_code == 404
-
-
-# ── POST /push-corrections ─────────────────────────────────────────────────────
-
-def test_push_corrections_appends_jsonl(cfg_with_products, data_dir, client):
-    """Successful push writes records to sft_candidates.jsonl."""
-    payload = {
-        "product_id": "peregrine",
-        "prompt":     "Analyze this job:",
-        "results": [
-            {"model": "qwen2.5:0.5b", "response": "It's a good job.", "elapsed_ms": 800, "error": None},
-            {"model": "llama3.1:8b",  "response": "Strong candidate.", "elapsed_ms": 1500, "error": None},
-        ],
-    }
-    resp = client.post("/api/imitate/push-corrections", json=payload)
-    assert resp.status_code == 200
-    assert resp.json()["pushed"] == 2
-
-    candidates = (data_dir / "sft_candidates.jsonl").read_text().splitlines()
-    assert len(candidates) == 2
-    for line in candidates:
-        record = json.loads(line)
-        assert record["source"] == "imitate"
-        assert record["product_id"] == "peregrine"
-        assert record["status"] == "pending"
-        assert record["prompt_messages"][0]["role"] == "user"
-
-
-def test_push_corrections_skips_errors(cfg_with_products, data_dir, client):
-    """Results with errors are not written to the corrections file."""
-    payload = {
-        "product_id": "peregrine",
-        "prompt":     "Analyze:",
-        "results": [
-            {"model": "good-model",  "response": "Good answer.", "elapsed_ms": 500, "error": None},
-            {"model": "bad-model",   "response": "",             "elapsed_ms": 0,   "error": "connection refused"},
-        ],
-    }
-    resp = client.post("/api/imitate/push-corrections", json=payload)
-    assert resp.status_code == 200
-    assert resp.json()["pushed"] == 1
-
-
-def test_push_corrections_empty_prompt_422(cfg_with_products, data_dir, client):
-    """Empty prompt returns 422."""
-    payload = {
-        "product_id": "peregrine",
-        "prompt":     "   ",
-        "results": [{"model": "m", "response": "r", "elapsed_ms": 1, "error": None}],
-    }
-    resp = client.post("/api/imitate/push-corrections", json=payload)
-    assert resp.status_code == 422
-
-
-def test_push_corrections_all_errors_422(cfg_with_products, data_dir, client):
-    """422 when every result has an error (nothing to push)."""
-    payload = {
-        "product_id": "peregrine",
-        "prompt":     "Analyze:",
-        "results": [
-            {"model": "m", "response": "", "elapsed_ms": 0, "error": "timed out"},
-        ],
-    }
-    resp = client.post("/api/imitate/push-corrections", json=payload)
-    assert resp.status_code == 422
-
-
-# ── _extract_sample helper ─────────────────────────────────────────────────────
-
-def test_extract_sample_list():
-    result = _imitate_module._extract_sample(
-        [{"title": "A", "description": "B"}],
-        text_fields=["title", "description"],
-    )
-    assert "A" in result["text"]
-    assert "B" in result["text"]
-
-
-def test_extract_sample_empty_list():
-    result = _imitate_module._extract_sample([], text_fields=["title"])
-    assert result == {}
-
-
-def test_extract_sample_respects_index():
-    items = [{"title": "First"}, {"title": "Second"}]
-    result = _imitate_module._extract_sample(items, ["title"], sample_index=1)
-    assert "Second" in result["text"]
-
-
-def test_extract_sample_clamps_index():
-    items = [{"title": "Only"}]
-    result = _imitate_module._extract_sample(items, ["title"], sample_index=99)
-    assert "Only" in result["text"]
--- a/tests/test_models.py
+++ b/tests/test_models.py
@ -371,18 +371,15 @@ def test_delete_installed_not_found_returns_404(client):


 def test_delete_installed_path_traversal_blocked(client):
-    """DELETE /installed/../../etc must be blocked.
-    Path traversal normalises to a different URL (/api/etc); if web/dist exists
-    the StaticFiles mount intercepts it and returns 405 (GET/HEAD only).
-    """
+    """DELETE /installed/../../etc must be blocked (400 or 422)."""
    r = client.delete("/api/models/installed/../../etc")
-    assert r.status_code in (400, 404, 405, 422)
+    assert r.status_code in (400, 404, 422)


 def test_delete_installed_dotdot_name_blocked(client):
    """A name containing '..' in any form must be rejected."""
    r = client.delete("/api/models/installed/..%2F..%2Fetc")
-    assert r.status_code in (400, 404, 405, 422)
+    assert r.status_code in (400, 404, 422)


 def test_delete_installed_name_with_slash_blocked(client):
--- a/tests/test_sft.py
+++ b/tests/test_sft.py
@ -8,16 +8,13 @@ from pathlib import Path
@pytest.fixture(autouse=True)
 def reset_sft_globals(tmp_path):
    from app import sft as sft_module
-    _prev_data    = sft_module._SFT_DATA_DIR
-    _prev_cfg     = sft_module._SFT_CONFIG_DIR
-    _prev_default = sft_module._DEFAULT_BENCH_RESULTS_DIR
+    _prev_data = sft_module._SFT_DATA_DIR
+    _prev_cfg = sft_module._SFT_CONFIG_DIR
    sft_module.set_sft_data_dir(tmp_path)
    sft_module.set_sft_config_dir(tmp_path)
-    sft_module.set_default_bench_results_dir(str(tmp_path / "bench_results"))
    yield
    sft_module.set_sft_data_dir(_prev_data)
    sft_module.set_sft_config_dir(_prev_cfg)
-    sft_module.set_default_bench_results_dir(_prev_default)


@pytest.fixture
--- a/web/src/components/AppSidebar.vue
+++ b/web/src/components/AppSidebar.vue
@ -67,7 +67,6 @@ const navItems = [
  { path: '/stats',     icon: '📊', label: 'Stats'     },
  { path: '/benchmark',   icon: '🏁', label: 'Benchmark'   },
  { path: '/models',      icon: '🤗', label: 'Models'      },
-  { path: '/imitate',     icon: '🪞', label: 'Imitate'     },
  { path: '/corrections', icon: '✍️', label: 'Corrections' },
  { path: '/settings',    icon: '⚙️', label: 'Settings'    },
 ]
--- a/web/src/router/index.ts
+++ b/web/src/router/index.ts
@ -8,7 +8,6 @@ const BenchmarkView = () => import('../views/BenchmarkView.vue')
 const SettingsView    = () => import('../views/SettingsView.vue')
 const CorrectionsView = () => import('../views/CorrectionsView.vue')
 const ModelsView      = () => import('../views/ModelsView.vue')
-const ImitateView     = () => import('../views/ImitateView.vue')

 export const router = createRouter({
  history: createWebHashHistory(),
@ -18,7 +17,6 @@ export const router = createRouter({
    { path: '/stats',       component: StatsView,       meta: { title: 'Stats' } },
    { path: '/benchmark',   component: BenchmarkView,   meta: { title: 'Benchmark' } },
    { path: '/models',      component: ModelsView,      meta: { title: 'Models' } },
-    { path: '/imitate',     component: ImitateView,     meta: { title: 'Imitate' }     },
    { path: '/corrections', component: CorrectionsView, meta: { title: 'Corrections' } },
    { path: '/settings',    component: SettingsView,    meta: { title: 'Settings' } },
  ],
--- a/web/src/views/BenchmarkView.vue
+++ b/web/src/views/BenchmarkView.vue
@ -3,339 +3,26 @@
    <header class="bench-header">
      <h1 class="page-title">🏁 Benchmark</h1>
      <div class="header-actions">
-        <label class="slow-toggle" :class="{ disabled: running }" v-if="benchMode === 'classifier'">
+        <label class="slow-toggle" :class="{ disabled: running }">
          <input type="checkbox" v-model="includeSlow" :disabled="running" />
          Include slow models
        </label>
-        <template v-if="benchMode === 'classifier'">
-          <button
-            class="btn-run"
-            :disabled="running"
-            @click="startBenchmark"
-          >
-            {{ running ? '⏳ Running…' : results ? '🔄 Re-run' : '▶ Run Benchmark' }}
-          </button>
-          <button
-            v-if="running"
-            class="btn-cancel"
-            @click="cancelBenchmark"
-          >
-            ✕ Cancel
-          </button>
-        </template>
-      </div>
-    </header>
-
-    <!-- Mode toggle -->
-    <div class="mode-toggle" role="group" aria-label="Benchmark mode">
-      <button
-        class="mode-btn"
-        :class="{ active: benchMode === 'classifier' }"
-        @click="benchMode = 'classifier'"
-      >Classifier</button>
-      <button
-        class="mode-btn"
-        :class="{ active: benchMode === 'llm' }"
-        @click="benchMode = 'llm'"
-      >🤖 LLM Eval</button>
-      <button
-        class="mode-btn"
-        :class="{ active: benchMode === 'compare' }"
-        @click="benchMode = 'compare'; ensureCompareReady()"
-      >⚖️ Compare</button>
-    </div>
-
-    <!-- ── LLM Eval panel ─────────────────────────────────────── -->
-    <template v-if="benchMode === 'llm'">
-
-      <!-- Task Selection -->
-      <details class="model-picker" open>
-        <summary class="picker-summary">
-          <span class="picker-title">📋 Task Selection</span>
-          <span class="picker-badge">{{ llmTaskBadge }}</span>
-        </summary>
-        <div class="picker-body">
-          <div v-if="llmTasksLoading" class="picker-loading">Loading tasks…</div>
-          <div v-else-if="Object.keys(llmTasksByType).length === 0" class="picker-empty">
-            No tasks found — check API connection.
-          </div>
-          <template v-else>
-            <div
-              v-for="(tasks, type) in llmTasksByType"
-              :key="type"
-              class="picker-category"
-            >
-              <label class="picker-cat-header">
-                <input
-                  type="checkbox"
-                  :checked="isTaskTypeAllSelected(tasks)"
-                  :indeterminate="isTaskTypeIndeterminate(tasks)"
-                  @change="toggleTaskType(tasks, ($event.target as HTMLInputElement).checked)"
-                />
-                <span class="picker-cat-name">{{ type }}</span>
-                <span class="picker-cat-count">({{ tasks.length }})</span>
-              </label>
-              <div class="picker-model-list">
-                <label
-                  v-for="t in tasks"
-                  :key="t.id"
-                  class="picker-model-row"
-                >
-                  <input
-                    type="checkbox"
-                    :checked="selectedLlmTasks.has(t.id)"
-                    @change="toggleLlmTask(t.id, ($event.target as HTMLInputElement).checked)"
-                  />
-                  <span class="picker-model-name" :title="t.name">{{ t.name }}</span>
-                </label>
-              </div>
-            </div>
-          </template>
-        </div>
-      </details>
-
-      <!-- Model Selection -->
-      <details class="model-picker" open>
-        <summary class="picker-summary">
-          <span class="picker-title">🎯 Model Selection</span>
-          <span class="picker-badge">{{ llmModelBadge }}</span>
-        </summary>
-        <div class="picker-body">
-          <div v-if="llmModelsLoading" class="picker-loading">Loading models…</div>
-          <div v-else-if="Object.keys(llmModelsByService).length === 0" class="picker-empty">
-            No models found — check cf-orch connection.
-          </div>
-          <template v-else>
-            <div
-              v-for="(models, service) in llmModelsByService"
-              :key="service"
-              class="picker-category"
-            >
-              <label class="picker-cat-header">
-                <input
-                  type="checkbox"
-                  :checked="isServiceAllSelected(models)"
-                  :indeterminate="isServiceIndeterminate(models)"
-                  @change="toggleService(models, ($event.target as HTMLInputElement).checked)"
-                />
-                <span class="picker-cat-name">{{ service }}</span>
-                <span class="picker-cat-count">({{ models.length }})</span>
-              </label>
-              <div class="picker-model-list">
-                <label
-                  v-for="m in models"
-                  :key="m.id"
-                  class="picker-model-row"
-                >
-                  <input
-                    type="checkbox"
-                    :checked="selectedLlmModels.has(m.id)"
-                    @change="toggleLlmModel(m.id, ($event.target as HTMLInputElement).checked)"
-                  />
-                  <span class="picker-model-name" :title="m.name">{{ m.name }}</span>
-                  <span class="picker-adapter-type" v-if="m.tags.length">{{ m.tags.join(', ') }}</span>
-                </label>
-              </div>
-            </div>
-          </template>
-        </div>
-      </details>
-
-      <!-- Run Controls -->
-      <div class="llm-run-controls">
        <button
          class="btn-run"
-          :disabled="llmRunning || selectedLlmTasks.size === 0 || selectedLlmModels.size === 0"
-          @click="startLlmBenchmark"
+          :disabled="running"
+          @click="startBenchmark"
        >
-          {{ llmRunning ? '⏳ Running…' : '▶ Run LLM Eval' }}
+          {{ running ? '⏳ Running…' : results ? '🔄 Re-run' : '▶ Run Benchmark' }}
        </button>
        <button
-          v-if="llmRunning"
+          v-if="running"
          class="btn-cancel"
-          @click="cancelLlmBenchmark"
+          @click="cancelBenchmark"
        >
          ✕ Cancel
        </button>
-        <span v-if="selectedLlmTasks.size === 0 || selectedLlmModels.size === 0" class="llm-run-hint">
-          Select at least one task and one model to run.
-        </span>
      </div>
-
-      <!-- Progress log -->
-      <div v-if="llmRunning || llmRunLog.length" class="run-log">
-        <div class="run-log-title">
-          <span>{{ llmRunning ? '⏳ Running LLM eval…' : llmError ? '❌ Failed' : '✅ Done' }}</span>
-          <button class="btn-ghost" @click="llmRunLog = []; llmError = ''">Clear</button>
-        </div>
-        <div class="log-lines" ref="llmLogEl">
-          <div
-            v-for="(line, i) in llmRunLog"
-            :key="i"
-            class="log-line"
-            :class="{ 'log-error': line.startsWith('ERROR') || line.startsWith('[error]') }"
-          >{{ line }}</div>
-        </div>
-        <p v-if="llmError" class="run-error">{{ llmError }}</p>
-      </div>
-
-      <!-- LLM Results table -->
-      <template v-if="llmResults.length > 0">
-        <h2 class="chart-title">LLM Eval Results</h2>
-        <div class="heatmap-scroll">
-          <table class="heatmap llm-results-table">
-            <thead>
-              <tr>
-                <th class="hm-label-col">Model</th>
-                <th class="hm-model-col">overall</th>
-                <th
-                  v-for="col in llmTaskTypeCols"
-                  :key="col"
-                  class="hm-model-col"
-                >{{ col }}</th>
-                <th class="hm-model-col">tok/s</th>
-              </tr>
-            </thead>
-            <tbody>
-              <tr v-for="row in llmResults" :key="row.model_id">
-                <td class="hm-label-cell llm-model-name-cell" :title="row.model_id">{{ row.model_name }}</td>
-                <td
-                  class="hm-value-cell"
-                  :class="{ 'bt-best': llmBestByCol['overall'] === row.model_id }"
-                >{{ pct(row.avg_quality_score) }}</td>
-                <td
-                  v-for="col in llmTaskTypeCols"
-                  :key="col"
-                  class="hm-value-cell"
-                  :class="{ 'bt-best': llmBestByCol[col] === row.model_id }"
-                >{{ row.quality_by_task_type[col] != null ? pct(row.quality_by_task_type[col]) : '—' }}</td>
-                <td class="hm-value-cell llm-tps-cell">{{ row.avg_tokens_per_sec.toFixed(1) }}</td>
-              </tr>
-            </tbody>
-          </table>
-        </div>
-        <p class="heatmap-hint">Run LLM Eval on the Benchmark tab to refresh. Green = best per column.</p>
-      </template>
-
-    </template>
-
-    <!-- ── Compare panel ─────────────────────────────────────── -->
-    <template v-if="benchMode === 'compare'">
-
-      <!-- Task selector (radio — one at a time) -->
-      <details class="model-picker" open>
-        <summary class="picker-summary">
-          <span class="picker-title">📋 Pick a Task</span>
-          <span class="picker-badge">{{ cmpSelectedTask ? cmpSelectedTask.name : 'None selected' }}</span>
-        </summary>
-        <div class="picker-body">
-          <div v-if="llmTasksLoading" class="picker-loading">Loading tasks…</div>
-          <div v-else-if="llmTasks.length === 0" class="picker-empty">No tasks found — check cforch config.</div>
-          <template v-else>
-            <div v-for="(tasks, type) in llmTasksByType" :key="type" class="picker-category">
-              <span class="picker-cat-name" style="font-weight:600; padding: 0.35rem 0; display:block">{{ type }}</span>
-              <div class="picker-model-list">
-                <label v-for="t in tasks" :key="t.id" class="picker-model-row">
-                  <input
-                    type="radio"
-                    name="cmp-task"
-                    :checked="cmpSelectedTask?.id === t.id"
-                    @change="selectCmpTask(t)"
-                  />
-                  <span class="picker-model-name" :title="t.name">{{ t.name }}</span>
-                </label>
-              </div>
-            </div>
-          </template>
-        </div>
-      </details>
-
-      <!-- Prompt editor -->
-      <template v-if="cmpSelectedTask">
-        <label class="prompt-label" for="cmp-prompt">Prompt</label>
-        <textarea
-          id="cmp-prompt"
-          class="cmp-prompt-editor"
-          v-model="cmpPrompt"
-          rows="6"
-        />
-
-        <!-- Model picker (ollama only) -->
-        <details class="model-picker" open>
-          <summary class="picker-summary">
-            <span class="picker-title">🤖 Ollama Models</span>
-            <span class="picker-badge">{{ cmpSelectedModels.size }} / {{ ollamaLlmModels.length }}</span>
-          </summary>
-          <div class="picker-body">
-            <label class="picker-cat-header">
-              <input
-                type="checkbox"
-                :checked="cmpSelectedModels.size === ollamaLlmModels.length"
-                :indeterminate="cmpSelectedModels.size > 0 && cmpSelectedModels.size < ollamaLlmModels.length"
-                @change="toggleAllCmpModels(($event.target as HTMLInputElement).checked)"
-              />
-              <span class="picker-cat-name">All ollama models</span>
-            </label>
-            <div class="picker-model-list">
-              <label v-for="m in ollamaLlmModels" :key="m.id" class="picker-model-row">
-                <input
-                  type="checkbox"
-                  :checked="cmpSelectedModels.has(m.id)"
-                  @change="toggleCmpModel(m.id, ($event.target as HTMLInputElement).checked)"
-                />
-                <span class="picker-model-name">{{ m.name }}</span>
-                <span class="picker-adapter-type">{{ m.tags.slice(0,3).join(', ') }}</span>
-              </label>
-            </div>
-          </div>
-        </details>
-
-        <!-- Run controls -->
-        <div class="llm-run-controls">
-          <button
-            class="btn-run"
-            :disabled="cmpRunning || cmpSelectedModels.size === 0"
-            @click="startCompare"
-          >{{ cmpRunning ? '⏳ Running…' : '⚖️ Compare Models' }}</button>
-          <button v-if="cmpRunning" class="btn-cancel" @click="cancelCompare">✕ Cancel</button>
-        </div>
-
-        <!-- Progress log -->
-        <div v-if="cmpLog.length > 0" class="run-log">
-          <div class="log-lines">
-            <div v-for="(line, i) in cmpLog" :key="i" class="log-line">{{ line }}</div>
-          </div>
-        </div>
-
-        <!-- Side-by-side results -->
-        <template v-if="cmpResults.length > 0">
-          <h2 class="chart-title">Side-by-Side Responses</h2>
-          <div class="cmp-results-grid">
-            <div
-              v-for="r in cmpResults"
-              :key="r.model"
-              class="cmp-result-card"
-              :class="{ 'cmp-error': !!r.error }"
-            >
-              <div class="cmp-result-header">
-                <span class="cmp-model-name">{{ r.model }}</span>
-                <span class="cmp-meta">
-                  <template v-if="r.error"><span class="err-badge">error</span></template>
-                  <template v-else>{{ (r.elapsed_ms / 1000).toFixed(1) }}s</template>
-                </span>
-              </div>
-              <pre v-if="r.error" class="cmp-error-text">{{ r.error }}</pre>
-              <pre v-else class="cmp-response">{{ r.response }}</pre>
-            </div>
-          </div>
-        </template>
-      </template>
-
-    </template>
-    <!-- ── /Compare panel ─────────────────────────────────────── -->
-
-    <!-- ── Classifier panel ──────────────────────────────────── -->
-    <template v-if="benchMode === 'classifier'">
+    </header>

    <!-- Model Picker -->
    <details class="model-picker" ref="pickerEl">
@ -563,10 +250,6 @@
        </div>
      </div>
    </details>
-
-    </template>
-    <!-- ── /Classifier panel ─────────────────────────────────── -->
-
  </div>
 </template>

@ -595,35 +278,6 @@ interface AvailableModel {
  adapter_type: string
 }

-// cf-orch types
-interface CfOrchTask {
-  id: string
-  name: string
-  type: string
-  prompt: string
-  system: string
-}
-
-interface CfOrchModel {
-  name: string
-  id: string
-  service: string
-  tags: string[]
-  vram_estimate_mb?: number
-}
-
-interface LlmModelResult {
-  model_name: string
-  model_id: string
-  node_id: string
-  avg_tokens_per_sec: number
-  avg_completion_ms: number
-  avg_quality_score: number
-  finetune_candidates: number
-  error_count: number
-  quality_by_task_type: Record<string, number>
-}
-
 interface ModelCategoriesResponse {
  categories: Record<string, AvailableModel[]>
 }
@ -675,129 +329,8 @@ const ftError          = ref('')
 const ftLogEl          = ref<HTMLElement | null>(null)

 const runCancelled = ref(false)
-
-// ── Mode toggle ───────────────────────────────────────────────────────────────
-const benchMode = ref<'classifier' | 'llm' | 'compare'>('classifier')
-
-// ── LLM Eval state ───────────────────────────────────────────────────────────
-const llmTasks        = ref<CfOrchTask[]>([])
-const llmTasksLoading = ref(false)
-const llmModels       = ref<CfOrchModel[]>([])
-const llmModelsLoading = ref(false)
-
-const selectedLlmTasks  = ref<Set<string>>(new Set())
-const selectedLlmModels = ref<Set<string>>(new Set())
-
-const llmRunning      = ref(false)
-const llmRunLog       = ref<string[]>([])
-const llmError        = ref('')
-const llmResults      = ref<LlmModelResult[]>([])
-const llmEventSource  = ref<EventSource | null>(null)
-const llmLogEl        = ref<HTMLElement | null>(null)
 const ftCancelled  = ref(false)

-// ── Compare mode state ────────────────────────────────────────────────────────
-interface CmpResult {
-  model: string
-  response: string
-  elapsed_ms: number
-  error: string | null
-}
-
-const cmpSelectedTask    = ref<CfOrchTask & { prompt: string; system: string } | null>(null)
-const cmpPrompt          = ref('')
-const cmpSelectedModels  = ref<Set<string>>(new Set())
-const cmpRunning         = ref(false)
-const cmpLog             = ref<string[]>([])
-const cmpResults         = ref<CmpResult[]>([])
-const cmpEventSource     = ref<EventSource | null>(null)
-
-const ollamaLlmModels = computed(() =>
-  llmModels.value.filter(m => m.service === 'ollama')
-)
-
-function selectCmpTask(t: CfOrchTask & { prompt: string; system: string }) {
-  cmpSelectedTask.value = t
-  cmpPrompt.value = t.prompt || ''
-  cmpResults.value = []
-  cmpLog.value = []
-}
-
-function toggleCmpModel(id: string, checked: boolean) {
-  const next = new Set(cmpSelectedModels.value)
-  checked ? next.add(id) : next.delete(id)
-  cmpSelectedModels.value = next
-}
-
-function toggleAllCmpModels(checked: boolean) {
-  cmpSelectedModels.value = checked
-    ? new Set(ollamaLlmModels.value.map(m => m.id))
-    : new Set()
-}
-
-function ensureCompareReady() {
-  // Trigger task + model loads if not already done (shares llmTasks/llmModels)
-  if (llmTasks.value.length === 0) loadLlmTasks()
-  if (llmModels.value.length === 0) loadLlmModels()
-  // Pre-select all ollama models for compare mode
-  if (cmpSelectedModels.value.size === 0 && ollamaLlmModels.value.length > 0) {
-    cmpSelectedModels.value = new Set(ollamaLlmModels.value.map(m => m.id))
-  }
-}
-
-function startCompare() {
-  if (!cmpPrompt.value.trim() || cmpSelectedModels.value.size === 0) return
-  cmpRunning.value = true
-  cmpResults.value = []
-  cmpLog.value = []
-
-  const params = new URLSearchParams({
-    prompt:    cmpPrompt.value,
-    model_ids: [...cmpSelectedModels.value].join(','),
-  })
-
-  const es = new EventSource(`/api/imitate/run?${params}`)
-  cmpEventSource.value = es
-
-  es.onmessage = (event: MessageEvent) => {
-    try {
-      const msg = JSON.parse(event.data)
-      if (msg.type === 'start') {
-        cmpLog.value.push(`Comparing ${msg.total_models} models…`)
-      } else if (msg.type === 'model_start') {
-        cmpLog.value.push(`→ ${msg.model}…`)
-      } else if (msg.type === 'model_done') {
-        const status = msg.error
-          ? `✕ ${msg.error}`
-          : `✓ ${(msg.elapsed_ms / 1000).toFixed(1)}s`
-        cmpLog.value.push(`  ${msg.model}: ${status}`)
-        cmpResults.value.push({
-          model:      msg.model,
-          response:   msg.response,
-          elapsed_ms: msg.elapsed_ms,
-          error:      msg.error ?? null,
-        })
-      } else if (msg.type === 'complete') {
-        cmpRunning.value = false
-        es.close()
-      }
-    } catch { /* ignore malformed frames */ }
-  }
-
-  es.onerror = () => {
-    cmpLog.value.push('Connection error.')
-    cmpRunning.value = false
-    es.close()
-  }
-}
-
-function cancelCompare() {
-  cmpEventSource.value?.close()
-  cmpEventSource.value = null
-  cmpRunning.value = false
-  cmpLog.value.push('Cancelled.')
-}
-
 async function cancelBenchmark() {
  await fetch('/api/benchmark/cancel', { method: 'POST' }).catch(() => {})
 }
@ -806,197 +339,6 @@ async function cancelFinetune() {
  await fetch('/api/finetune/cancel', { method: 'POST' }).catch(() => {})
 }

-// ── LLM Eval computed ─────────────────────────────────────────────────────────
-const llmTasksByType = computed((): Record<string, CfOrchTask[]> => {
-  const groups: Record<string, CfOrchTask[]> = {}
-  for (const t of llmTasks.value) {
-    if (!groups[t.type]) groups[t.type] = []
-    groups[t.type].push(t)
-  }
-  return groups
-})
-
-const llmModelsByService = computed((): Record<string, CfOrchModel[]> => {
-  const groups: Record<string, CfOrchModel[]> = {}
-  for (const m of llmModels.value) {
-    if (!groups[m.service]) groups[m.service] = []
-    groups[m.service].push(m)
-  }
-  return groups
-})
-
-const llmTaskBadge = computed(() => {
-  const total = llmTasks.value.length
-  if (total === 0) return 'No tasks available'
-  const sel = selectedLlmTasks.value.size
-  if (sel === total) return `All tasks (${total})`
-  return `${sel} of ${total} tasks selected`
-})
-
-const llmModelBadge = computed(() => {
-  const total = llmModels.value.length
-  if (total === 0) return 'No models available'
-  const sel = selectedLlmModels.value.size
-  if (sel === total) return `All models (${total})`
-  return `${sel} of ${total} selected`
-})
-
-// All task type columns present in any result row
-const llmTaskTypeCols = computed(() => {
-  const types = new Set<string>()
-  for (const r of llmResults.value) {
-    for (const k of Object.keys(r.quality_by_task_type)) types.add(k)
-  }
-  return [...types].sort()
-})
-
-// Best model id per column (overall + each task type col)
-const llmBestByCol = computed((): Record<string, string> => {
-  const best: Record<string, string> = {}
-  if (llmResults.value.length === 0) return best
-
-  // overall
-  let bestId = '', bestVal = -Infinity
-  for (const r of llmResults.value) {
-    if (r.avg_quality_score > bestVal) { bestVal = r.avg_quality_score; bestId = r.model_id }
-  }
-  best['overall'] = bestId
-
-  for (const col of llmTaskTypeCols.value) {
-    bestId = ''; bestVal = -Infinity
-    for (const r of llmResults.value) {
-      const v = r.quality_by_task_type[col]
-      if (v != null && v > bestVal) { bestVal = v; bestId = r.model_id }
-    }
-    best[col] = bestId
-  }
-  return best
-})
-
-function pct(v: number): string {
-  return `${(v * 100).toFixed(1)}%`
-}
-
-// Task picker helpers
-function isTaskTypeAllSelected(tasks: CfOrchTask[]): boolean {
-  return tasks.length > 0 && tasks.every(t => selectedLlmTasks.value.has(t.id))
-}
-function isTaskTypeIndeterminate(tasks: CfOrchTask[]): boolean {
-  const some = tasks.some(t => selectedLlmTasks.value.has(t.id))
-  return some && !isTaskTypeAllSelected(tasks)
-}
-function toggleLlmTask(id: string, checked: boolean) {
-  const next = new Set(selectedLlmTasks.value)
-  if (checked) next.add(id)
-  else next.delete(id)
-  selectedLlmTasks.value = next
-}
-function toggleTaskType(tasks: CfOrchTask[], checked: boolean) {
-  const next = new Set(selectedLlmTasks.value)
-  for (const t of tasks) {
-    if (checked) next.add(t.id)
-    else next.delete(t.id)
-  }
-  selectedLlmTasks.value = next
-}
-
-// Model picker helpers
-function isServiceAllSelected(models: CfOrchModel[]): boolean {
-  return models.length > 0 && models.every(m => selectedLlmModels.value.has(m.id))
-}
-function isServiceIndeterminate(models: CfOrchModel[]): boolean {
-  const some = models.some(m => selectedLlmModels.value.has(m.id))
-  return some && !isServiceAllSelected(models)
-}
-function toggleLlmModel(id: string, checked: boolean) {
-  const next = new Set(selectedLlmModels.value)
-  if (checked) next.add(id)
-  else next.delete(id)
-  selectedLlmModels.value = next
-}
-function toggleService(models: CfOrchModel[], checked: boolean) {
-  const next = new Set(selectedLlmModels.value)
-  for (const m of models) {
-    if (checked) next.add(m.id)
-    else next.delete(m.id)
-  }
-  selectedLlmModels.value = next
-}
-
-// Data loaders
-async function loadLlmTasks() {
-  llmTasksLoading.value = true
-  const { data } = await useApiFetch<{ tasks: CfOrchTask[]; types: string[] }>('/api/cforch/tasks')
-  llmTasksLoading.value = false
-  if (data?.tasks) {
-    llmTasks.value = data.tasks
-    selectedLlmTasks.value = new Set(data.tasks.map(t => t.id))
-  }
-}
-
-async function loadLlmModels() {
-  llmModelsLoading.value = true
-  const { data } = await useApiFetch<{ models: CfOrchModel[] }>('/api/cforch/models')
-  llmModelsLoading.value = false
-  if (data?.models) {
-    llmModels.value = data.models
-    selectedLlmModels.value = new Set(data.models.map(m => m.id))
-  }
-}
-
-async function loadLlmResults() {
-  const { data } = await useApiFetch<LlmModelResult[]>('/api/cforch/results')
-  if (Array.isArray(data) && data.length > 0) {
-    llmResults.value = data
-  }
-}
-
-async function cancelLlmBenchmark() {
-  llmEventSource.value?.close()
-  llmEventSource.value = null
-  llmRunning.value = false
-  await fetch('/api/cforch/cancel', { method: 'POST' }).catch(() => {})
-}
-
-function startLlmBenchmark() {
-  llmRunning.value = true
-  llmRunLog.value  = []
-  llmError.value   = ''
-
-  const params = new URLSearchParams()
-  const taskIds = [...selectedLlmTasks.value].join(',')
-  if (taskIds) params.set('task_ids', taskIds)
-
-  const es = new EventSource(`/api/cforch/run?${params}`)
-  llmEventSource.value = es
-
-  es.onmessage = async (e: MessageEvent) => {
-    const msg = JSON.parse(e.data)
-    if (msg.type === 'progress' && typeof msg.message === 'string') {
-      llmRunLog.value.push(msg.message)
-      await nextTick()
-      llmLogEl.value?.scrollTo({ top: llmLogEl.value.scrollHeight, behavior: 'smooth' })
-    } else if (msg.type === 'result' && Array.isArray(msg.summary)) {
-      llmResults.value = msg.summary
-    } else if (msg.type === 'complete') {
-      llmRunning.value = false
-      es.close()
-      llmEventSource.value = null
-    } else if (msg.type === 'error' && typeof msg.message === 'string') {
-      llmError.value = msg.message
-      llmRunning.value = false
-      es.close()
-      llmEventSource.value = null
-    }
-  }
-  es.onerror = () => {
-    if (llmRunning.value) llmError.value = 'Connection lost'
-    llmRunning.value = false
-    es.close()
-    llmEventSource.value = null
-  }
-}
-
 // ── Model picker computed ─────────────────────────────────────────────────────
 const pickerSummaryText = computed(() => {
  const total = allModels.value.length
@ -1206,9 +548,6 @@ onMounted(() => {
  loadResults()
  loadFineTunedModels()
  loadModelCategories()
-  loadLlmTasks()
-  loadLlmModels()
-  loadLlmResults()
 })
 </script>

@ -1753,173 +1092,4 @@ details[open] .ft-summary::before { content: '▼  '; }
  .ft-controls { flex-direction: column; align-items: stretch; }
  .ft-select { min-width: 0; width: 100%; }
 }
-
-/* ── Mode toggle (segmented control / pill) ─────── */
-.mode-toggle {
-  display: inline-flex;
-  border: 1px solid var(--color-border, #d0d7e8);
-  border-radius: 0.5rem;
-  overflow: hidden;
-  align-self: flex-start;
-}
-
-.mode-btn {
-  padding: 0.4rem 1.1rem;
-  font-size: 0.85rem;
-  font-family: var(--font-body, sans-serif);
-  font-weight: 500;
-  border: none;
-  background: var(--color-surface, #fff);
-  color: var(--color-text-secondary, #6b7a99);
-  cursor: pointer;
-  transition: background 0.15s, color 0.15s;
-}
-
-.mode-btn:not(:last-child) {
-  border-right: 1px solid var(--color-border, #d0d7e8);
-}
-
-.mode-btn.active {
-  background: var(--app-primary, #2A6080);
-  color: #fff;
-}
-
-.mode-btn:not(.active):hover {
-  background: var(--color-surface-raised, #e4ebf5);
-}
-
-/* ── LLM run controls ───────────────────────────── */
-.llm-run-controls {
-  display: flex;
-  align-items: center;
-  gap: 0.75rem;
-  flex-wrap: wrap;
-}
-
-.llm-run-hint {
-  font-size: 0.8rem;
-  color: var(--color-text-secondary, #6b7a99);
-}
-
-/* ── LLM results table tweaks ───────────────────── */
-.llm-results-table .bt-best {
-  color: var(--color-success, #3a7a32);
-  font-weight: 700;
-  background: color-mix(in srgb, var(--color-success, #3a7a32) 8%, transparent);
-}
-
-.llm-model-name-cell {
-  font-family: var(--font-mono, monospace);
-  font-size: 0.75rem;
-  white-space: nowrap;
-  max-width: 16rem;
-  overflow: hidden;
-  text-overflow: ellipsis;
-  background: var(--color-surface, #fff);
-  border-top: 1px solid var(--color-border, #d0d7e8);
-  padding: 0.35rem 0.6rem;
-  position: sticky;
-  left: 0;
-}
-
-.llm-tps-cell {
-  font-family: var(--font-mono, monospace);
-  font-variant-numeric: tabular-nums;
-  white-space: nowrap;
-}
-
-/* ── Compare mode ─────────────────────────────────────────────────────────── */
-
-.prompt-label {
-  font-size: 0.85rem;
-  font-weight: 600;
-  color: var(--color-text-secondary, #6b7a99);
-  margin-top: 0.5rem;
-}
-
-.cmp-prompt-editor {
-  width: 100%;
-  font-family: var(--font-mono, monospace);
-  font-size: 0.85rem;
-  padding: 0.75rem;
-  border: 1px solid var(--color-border, #d0d7e8);
-  border-radius: 0.375rem;
-  background: var(--color-surface, #f0f4fc);
-  color: var(--color-text, #1a2338);
-  resize: vertical;
-  line-height: 1.5;
-}
-
-.cmp-prompt-editor:focus {
-  outline: 2px solid var(--app-primary, #2A6080);
-  outline-offset: -1px;
-}
-
-.cmp-results-grid {
-  display: grid;
-  grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
-  gap: 1rem;
-  margin-top: 0.5rem;
-}
-
-.cmp-result-card {
-  border: 1px solid var(--color-border, #d0d7e8);
-  border-radius: 0.5rem;
-  overflow: hidden;
-  background: var(--color-surface, #f0f4fc);
-  display: flex;
-  flex-direction: column;
-}
-
-.cmp-result-card.cmp-error {
-  border-color: #fca5a5;
-}
-
-.cmp-result-header {
-  display: flex;
-  justify-content: space-between;
-  align-items: center;
-  padding: 0.5rem 0.75rem;
-  background: var(--color-surface-raised, #e4ebf5);
-  border-bottom: 1px solid var(--color-border, #d0d7e8);
-}
-
-.cmp-model-name {
-  font-size: 0.82rem;
-  font-weight: 600;
-  color: var(--color-text, #1a2338);
-  overflow: hidden;
-  text-overflow: ellipsis;
-  white-space: nowrap;
-}
-
-.cmp-meta {
-  font-size: 0.75rem;
-  color: var(--color-text-secondary, #6b7a99);
-  flex-shrink: 0;
-  margin-left: 0.5rem;
-}
-
-.err-badge {
-  background: #fee2e2;
-  color: #991b1b;
-  border-radius: 9999px;
-  padding: 0.1rem 0.45rem;
-  font-size: 0.7rem;
-  font-weight: 600;
-}
-
-.cmp-response, .cmp-error-text {
-  padding: 0.75rem;
-  font-size: 0.82rem;
-  white-space: pre-wrap;
-  word-break: break-word;
-  max-height: 300px;
-  overflow-y: auto;
-  margin: 0;
-  flex: 1;
-  color: var(--color-text, #1a2338);
-}
-
-.cmp-error-text { color: #b91c1c; }
 </style>
--- a/web/src/views/ImitateView.vue
+++ b/web/src/views/ImitateView.vue
@ -1,898 +0,0 @@
-<template>
-  <div class="imitate-view">
-    <header class="bench-header">
-      <h1 class="page-title">🪞 Imitate</h1>
-      <p class="page-subtitle">Pull real samples from CF product APIs and compare LLM responses</p>
-    </header>
-
-    <!-- ── Step 1: Product selection ──────────────────────────────── -->
-    <section class="step-section">
-      <h2 class="step-heading">1. Select Product</h2>
-      <div v-if="productsLoading" class="picker-loading">Loading products…</div>
-      <div v-else-if="products.length === 0" class="picker-empty">
-        No products configured — add an <code>imitate:</code> section to
-        <code>config/label_tool.yaml</code>.
-      </div>
-      <div v-else class="product-grid">
-        <button
-          v-for="p in products"
-          :key="p.id"
-          class="product-card"
-          :class="{
-            selected: selectedProduct?.id === p.id,
-            offline: !p.online,
-          }"
-          :disabled="!p.online"
-          :title="p.online ? p.description : `${p.name} is offline`"
-          @click="selectProduct(p)"
-        >
-          <span class="product-icon">{{ p.icon }}</span>
-          <span class="product-name">{{ p.name }}</span>
-          <span class="product-status" :class="p.online ? 'status-on' : 'status-off'">
-            {{ p.online ? 'online' : 'offline' }}
-          </span>
-        </button>
-      </div>
-    </section>
-
-    <!-- ── Step 2: Sample + Prompt ────────────────────────────────── -->
-    <section v-if="selectedProduct" class="step-section">
-      <h2 class="step-heading">2. Sample &amp; Prompt</h2>
-      <div class="sample-toolbar">
-        <span class="sample-product-label">{{ selectedProduct.icon }} {{ selectedProduct.name }}</span>
-        <button class="btn-refresh" :disabled="sampleLoading" @click="fetchSample">
-          {{ sampleLoading ? '⏳ Fetching…' : '🔄 Refresh Sample' }}
-        </button>
-        <span v-if="sampleError" class="sample-error">{{ sampleError }}</span>
-      </div>
-
-      <div v-if="sampleLoading" class="picker-loading">Fetching sample from API…</div>
-
-      <template v-else-if="rawSample">
-        <!-- Fetched text preview -->
-        <details class="sample-preview" open>
-          <summary class="sample-preview-toggle">Raw sample text</summary>
-          <pre class="sample-text">{{ rawSample.text }}</pre>
-        </details>
-
-        <!-- Prompt editor -->
-        <label class="prompt-label" for="prompt-editor">Prompt sent to models</label>
-        <textarea
-          id="prompt-editor"
-          class="prompt-editor"
-          v-model="editedPrompt"
-          rows="8"
-        />
-      </template>
-
-      <div v-else-if="!sampleLoading && selectedProduct" class="picker-empty">
-        Click "Refresh Sample" to fetch a real sample from {{ selectedProduct.name }}.
-      </div>
-    </section>
-
-    <!-- ── Step 3: Models + Run ───────────────────────────────────── -->
-    <section v-if="editedPrompt" class="step-section">
-      <h2 class="step-heading">3. Models &amp; Run</h2>
-
-      <!-- Ollama model picker -->
-      <details class="model-picker" open>
-        <summary class="picker-summary">
-          <span class="picker-title">🤖 Ollama Models</span>
-          <span class="picker-badge">{{ selectedModels.size }} / {{ ollamaModels.length }}</span>
-        </summary>
-        <div class="picker-body">
-          <div v-if="modelsLoading" class="picker-loading">Loading models…</div>
-          <div v-else-if="ollamaModels.length === 0" class="picker-empty">
-            No ollama models in bench_models.yaml — add models with <code>service: ollama</code>.
-          </div>
-          <template v-else>
-            <label class="picker-cat-header">
-              <input
-                type="checkbox"
-                :checked="selectedModels.size === ollamaModels.length"
-                :indeterminate="selectedModels.size > 0 && selectedModels.size < ollamaModels.length"
-                @change="toggleAllModels(($event.target as HTMLInputElement).checked)"
-              />
-              <span class="picker-cat-name">All ollama models</span>
-            </label>
-            <div class="picker-model-list">
-              <label v-for="m in ollamaModels" :key="m.id" class="picker-model-row">
-                <input
-                  type="checkbox"
-                  :checked="selectedModels.has(m.id)"
-                  @change="toggleModel(m.id, ($event.target as HTMLInputElement).checked)"
-                />
-                <span class="picker-model-name" :title="m.name">{{ m.name }}</span>
-                <span class="picker-model-tags">
-                  <span v-for="tag in m.tags.slice(0, 3)" :key="tag" class="tag">{{ tag }}</span>
-                </span>
-              </label>
-            </div>
-          </template>
-        </div>
-      </details>
-
-      <!-- Temperature -->
-      <div class="temp-row">
-        <label for="temp-slider" class="temp-label">Temperature: <strong>{{ temperature.toFixed(1) }}</strong></label>
-        <input
-          id="temp-slider"
-          type="range" min="0" max="1" step="0.1"
-          :value="temperature"
-          @input="temperature = parseFloat(($event.target as HTMLInputElement).value)"
-          class="temp-slider"
-        />
-      </div>
-
-      <!-- Run controls -->
-      <div class="run-row">
-        <button
-          class="btn-run"
-          :disabled="running || selectedModels.size === 0"
-          @click="startRun"
-        >
-          {{ running ? '⏳ Running…' : '▶ Run' }}
-        </button>
-        <button v-if="running" class="btn-cancel" @click="cancelRun">✕ Cancel</button>
-      </div>
-
-      <!-- Progress log -->
-      <div v-if="runLog.length > 0" class="run-log" aria-live="polite">
-        <div v-for="(line, i) in runLog" :key="i" class="log-line">{{ line }}</div>
-      </div>
-    </section>
-
-    <!-- ── Step 4: Results ────────────────────────────────────────── -->
-    <section v-if="results.length > 0" class="step-section">
-      <h2 class="step-heading">4. Results</h2>
-
-      <div class="results-grid">
-        <div
-          v-for="r in results"
-          :key="r.model"
-          class="result-card"
-          :class="{ 'result-error': !!r.error }"
-        >
-          <div class="result-header">
-            <span class="result-model">{{ r.model }}</span>
-            <span class="result-meta">
-              <template v-if="r.error">
-                <span class="result-err-badge">error</span>
-              </template>
-              <template v-else>
-                {{ (r.elapsed_ms / 1000).toFixed(1) }}s
-              </template>
-            </span>
-          </div>
-          <pre v-if="r.error" class="result-error-text">{{ r.error }}</pre>
-          <pre v-else class="result-response">{{ r.response }}</pre>
-        </div>
-      </div>
-
-      <div class="corrections-row">
-        <button
-          class="btn-corrections"
-          :disabled="pushingCorrections || !selectedProduct || successfulResults.length === 0"
-          @click="pushCorrections"
-        >
-          {{ pushingCorrections ? '⏳ Pushing…' : `✍ Send ${successfulResults.length} to Corrections` }}
-        </button>
-        <span v-if="correctionsPushMsg" class="corrections-msg" :class="correctionsPushOk ? 'msg-ok' : 'msg-err'">
-          {{ correctionsPushMsg }}
-        </span>
-      </div>
-    </section>
-  </div>
-</template>
-
-<script setup lang="ts">
-import { ref, computed, onMounted } from 'vue'
-
-// ── Types ──────────────────────────────────────────────────────────────────────
-
-interface Product {
-  id: string
-  name: string
-  icon: string
-  description: string
-  base_url: string
-  online: boolean
-}
-
-interface Sample {
-  product_id: string
-  sample_index: number
-  text: string
-  prompt: string
-  raw_item: Record<string, unknown>
-}
-
-interface ModelEntry {
-  id: string
-  name: string
-  service: string
-  tags: string[]
-  vram_estimate_mb: number
-}
-
-interface RunResult {
-  model: string
-  response: string
-  elapsed_ms: number
-  error: string | null
-}
-
-// ── State ──────────────────────────────────────────────────────────────────────
-
-const productsLoading  = ref(false)
-const products         = ref<Product[]>([])
-const selectedProduct  = ref<Product | null>(null)
-
-const sampleLoading    = ref(false)
-const sampleError      = ref<string | null>(null)
-const rawSample        = ref<Sample | null>(null)
-const editedPrompt     = ref('')
-
-const modelsLoading    = ref(false)
-const allModels        = ref<ModelEntry[]>([])
-const selectedModels   = ref<Set<string>>(new Set())
-
-const temperature      = ref(0.7)
-
-const running          = ref(false)
-const eventSource      = ref<EventSource | null>(null)
-const runLog           = ref<string[]>([])
-const results          = ref<RunResult[]>([])
-
-const pushingCorrections = ref(false)
-const correctionsPushMsg = ref<string | null>(null)
-const correctionsPushOk  = ref(false)
-
-// ── Computed ───────────────────────────────────────────────────────────────────
-
-const ollamaModels = computed(() =>
-  allModels.value.filter(m => m.service === 'ollama')
-)
-
-const successfulResults = computed(() =>
-  results.value.filter(r => !r.error && r.response.trim())
-)
-
-// ── Lifecycle ─────────────────────────────────────────────────────────────────
-
-onMounted(async () => {
-  await Promise.all([loadProducts(), loadModels()])
-})
-
-// ── Methods ────────────────────────────────────────────────────────────────────
-
-async function loadProducts() {
-  productsLoading.value = true
-  try {
-    const resp = await fetch('/api/imitate/products')
-    if (!resp.ok) throw new Error(`HTTP ${resp.status}`)
-    const data = await resp.json()
-    products.value = data.products ?? []
-  } catch {
-    products.value = []
-  } finally {
-    productsLoading.value = false
-  }
-}
-
-async function loadModels() {
-  modelsLoading.value = true
-  try {
-    const resp = await fetch('/api/cforch/models')
-    if (!resp.ok) throw new Error(`HTTP ${resp.status}`)
-    const data = await resp.json()
-    allModels.value = data.models ?? []
-    // Select all ollama models by default
-    for (const m of allModels.value) {
-      if (m.service === 'ollama') selectedModels.value.add(m.id)
-    }
-  } catch {
-    allModels.value = []
-  } finally {
-    modelsLoading.value = false
-  }
-}
-
-async function selectProduct(p: Product) {
-  selectedProduct.value = p
-  rawSample.value = null
-  editedPrompt.value = ''
-  sampleError.value = null
-  results.value = []
-  runLog.value = []
-  await fetchSample()
-}
-
-async function fetchSample() {
-  if (!selectedProduct.value) return
-  sampleLoading.value = true
-  sampleError.value = null
-  try {
-    const resp = await fetch(`/api/imitate/products/${selectedProduct.value.id}/sample`)
-    if (!resp.ok) {
-      const body = await resp.json().catch(() => ({ detail: 'Unknown error' }))
-      throw new Error(body.detail ?? `HTTP ${resp.status}`)
-    }
-    const data: Sample = await resp.json()
-    rawSample.value = data
-    editedPrompt.value = data.prompt
-  } catch (err: unknown) {
-    sampleError.value = err instanceof Error ? err.message : String(err)
-  } finally {
-    sampleLoading.value = false
-  }
-}
-
-function toggleModel(id: string, checked: boolean) {
-  const next = new Set(selectedModels.value)
-  checked ? next.add(id) : next.delete(id)
-  selectedModels.value = next
-}
-
-function toggleAllModels(checked: boolean) {
-  selectedModels.value = checked
-    ? new Set(ollamaModels.value.map(m => m.id))
-    : new Set()
-}
-
-function startRun() {
-  if (running.value || !editedPrompt.value.trim() || selectedModels.value.size === 0) return
-
-  running.value = true
-  results.value = []
-  runLog.value = []
-  correctionsPushMsg.value = null
-
-  const params = new URLSearchParams({
-    prompt:     editedPrompt.value,
-    model_ids:  [...selectedModels.value].join(','),
-    temperature: temperature.value.toString(),
-    product_id: selectedProduct.value?.id ?? '',
-  })
-
-  const es = new EventSource(`/api/imitate/run?${params}`)
-  eventSource.value = es
-
-  es.onmessage = (event: MessageEvent) => {
-    try {
-      const msg = JSON.parse(event.data)
-      if (msg.type === 'start') {
-        runLog.value.push(`Running ${msg.total_models} model(s)…`)
-      } else if (msg.type === 'model_start') {
-        runLog.value.push(`→ ${msg.model}…`)
-      } else if (msg.type === 'model_done') {
-        const status = msg.error
-          ? `✕ error: ${msg.error}`
-          : `✓ done (${(msg.elapsed_ms / 1000).toFixed(1)}s)`
-        runLog.value.push(`  ${msg.model}: ${status}`)
-        results.value.push({
-          model:      msg.model,
-          response:   msg.response,
-          elapsed_ms: msg.elapsed_ms,
-          error:      msg.error ?? null,
-        })
-      } else if (msg.type === 'complete') {
-        runLog.value.push(`Complete. ${results.value.length} responses.`)
-        running.value = false
-        es.close()
-      }
-    } catch {
-      // ignore malformed SSE frames
-    }
-  }
-
-  es.onerror = () => {
-    runLog.value.push('Connection error — run may be incomplete.')
-    running.value = false
-    es.close()
-  }
-}
-
-function cancelRun() {
-  eventSource.value?.close()
-  eventSource.value = null
-  running.value = false
-  runLog.value.push('Cancelled.')
-}
-
-async function pushCorrections() {
-  if (!selectedProduct.value || successfulResults.value.length === 0) return
-
-  pushingCorrections.value = true
-  correctionsPushMsg.value = null
-  try {
-    const resp = await fetch('/api/imitate/push-corrections', {
-      method:  'POST',
-      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({
-        product_id: selectedProduct.value.id,
-        prompt:     editedPrompt.value,
-        results:    successfulResults.value,
-      }),
-    })
-    if (!resp.ok) {
-      const body = await resp.json().catch(() => ({ detail: 'Unknown error' }))
-      throw new Error(body.detail ?? `HTTP ${resp.status}`)
-    }
-    const data = await resp.json()
-    correctionsPushMsg.value = `${data.pushed} record(s) added to Corrections queue.`
-    correctionsPushOk.value = true
-  } catch (err: unknown) {
-    correctionsPushMsg.value = err instanceof Error ? err.message : String(err)
-    correctionsPushOk.value = false
-  } finally {
-    pushingCorrections.value = false
-  }
-}
-</script>
-
-<style scoped>
-.imitate-view {
-  max-width: 1100px;
-  margin: 0 auto;
-  padding: 1.5rem;
-  display: flex;
-  flex-direction: column;
-  gap: 1.5rem;
-}
-
-.bench-header {
-  display: flex;
-  flex-direction: column;
-  gap: 0.25rem;
-}
-
-.page-title {
-  font-size: 1.6rem;
-  font-weight: 700;
-  color: var(--color-text, #1a2338);
-}
-
-.page-subtitle {
-  font-size: 0.9rem;
-  color: var(--color-text-secondary, #6b7a99);
-}
-
-/* Steps */
-.step-section {
-  background: var(--color-surface-raised, #e4ebf5);
-  border: 1px solid var(--color-border, #d0d7e8);
-  border-radius: 0.5rem;
-  padding: 1.25rem;
-  display: flex;
-  flex-direction: column;
-  gap: 1rem;
-}
-
-.step-heading {
-  font-size: 1rem;
-  font-weight: 600;
-  color: var(--color-text-secondary, #6b7a99);
-  text-transform: uppercase;
-  letter-spacing: 0.05em;
-  border-bottom: 1px solid var(--color-border, #d0d7e8);
-  padding-bottom: 0.5rem;
-}
-
-/* Product grid */
-.product-grid {
-  display: grid;
-  grid-template-columns: repeat(auto-fill, minmax(160px, 1fr));
-  gap: 0.75rem;
-}
-
-.product-card {
-  display: flex;
-  flex-direction: column;
-  align-items: center;
-  gap: 0.35rem;
-  padding: 1rem 0.75rem;
-  border: 2px solid var(--color-border, #d0d7e8);
-  border-radius: 0.5rem;
-  background: var(--color-surface, #f0f4fc);
-  cursor: pointer;
-  transition: border-color 0.15s, background 0.15s;
-  font-size: 0.9rem;
-}
-
-.product-card:hover:not(:disabled) {
-  border-color: var(--app-primary, #2A6080);
-  background: color-mix(in srgb, var(--app-primary, #2A6080) 6%, var(--color-surface, #f0f4fc));
-}
-
-.product-card.selected {
-  border-color: var(--app-primary, #2A6080);
-  background: color-mix(in srgb, var(--app-primary, #2A6080) 12%, var(--color-surface, #f0f4fc));
-}
-
-.product-card.offline {
-  opacity: 0.45;
-  cursor: not-allowed;
-}
-
-.product-icon {
-  font-size: 2rem;
-}
-
-.product-name {
-  font-weight: 600;
-  color: var(--color-text, #1a2338);
-}
-
-.product-status {
-  font-size: 0.72rem;
-  padding: 0.1rem 0.45rem;
-  border-radius: 9999px;
-  font-weight: 600;
-}
-
-.status-on {
-  background: #d1fae5;
-  color: #065f46;
-}
-
-.status-off {
-  background: #fee2e2;
-  color: #991b1b;
-}
-
-/* Sample panel */
-.sample-toolbar {
-  display: flex;
-  align-items: center;
-  gap: 0.75rem;
-  flex-wrap: wrap;
-}
-
-.sample-product-label {
-  font-weight: 600;
-  color: var(--app-primary, #2A6080);
-}
-
-.sample-error {
-  color: #b91c1c;
-  font-size: 0.85rem;
-}
-
-.sample-preview {
-  border: 1px solid var(--color-border, #d0d7e8);
-  border-radius: 0.375rem;
-  overflow: hidden;
-}
-
-.sample-preview-toggle {
-  padding: 0.5rem 0.75rem;
-  cursor: pointer;
-  font-size: 0.85rem;
-  color: var(--color-text-secondary, #6b7a99);
-  background: var(--color-surface, #f0f4fc);
-  user-select: none;
-}
-
-.sample-text {
-  padding: 0.75rem;
-  font-size: 0.82rem;
-  white-space: pre-wrap;
-  word-break: break-word;
-  max-height: 180px;
-  overflow-y: auto;
-  background: var(--color-bg, #f0f4fc);
-  margin: 0;
-  color: var(--color-text, #1a2338);
-}
-
-.prompt-label {
-  font-size: 0.85rem;
-  font-weight: 600;
-  color: var(--color-text-secondary, #6b7a99);
-}
-
-.prompt-editor {
-  width: 100%;
-  font-family: var(--font-mono, monospace);
-  font-size: 0.85rem;
-  padding: 0.75rem;
-  border: 1px solid var(--color-border, #d0d7e8);
-  border-radius: 0.375rem;
-  background: var(--color-surface, #f0f4fc);
-  color: var(--color-text, #1a2338);
-  resize: vertical;
-  line-height: 1.5;
-}
-
-.prompt-editor:focus {
-  outline: 2px solid var(--app-primary, #2A6080);
-  outline-offset: -1px;
-}
-
-/* Model picker — reuse bench-view classes */
-.model-picker {
-  border: 1px solid var(--color-border, #d0d7e8);
-  border-radius: 0.5rem;
-  overflow: hidden;
-}
-
-.picker-summary {
-  display: flex;
-  align-items: center;
-  justify-content: space-between;
-  padding: 0.75rem 1rem;
-  background: var(--color-surface, #f0f4fc);
-  cursor: pointer;
-  font-size: 0.95rem;
-  font-weight: 600;
-  user-select: none;
-  list-style: none;
-}
-
-.picker-title { flex: 1; }
-
-.picker-badge {
-  font-size: 0.8rem;
-  background: var(--app-primary, #2A6080);
-  color: #fff;
-  border-radius: 9999px;
-  padding: 0.15rem 0.6rem;
-}
-
-.picker-body {
-  padding: 0.75rem 1rem;
-  display: flex;
-  flex-direction: column;
-  gap: 0.25rem;
-}
-
-.picker-loading, .picker-empty {
-  font-size: 0.85rem;
-  color: var(--color-text-secondary, #6b7a99);
-  padding: 0.5rem 0;
-}
-
-.picker-cat-header {
-  display: flex;
-  align-items: center;
-  gap: 0.5rem;
-  font-weight: 600;
-  font-size: 0.9rem;
-  padding: 0.35rem 0;
-  cursor: pointer;
-}
-
-.picker-model-list {
-  display: flex;
-  flex-wrap: wrap;
-  gap: 0.25rem;
-  padding-left: 1.25rem;
-  padding-bottom: 0.5rem;
-}
-
-.picker-model-row {
-  display: flex;
-  align-items: center;
-  gap: 0.4rem;
-  font-size: 0.85rem;
-  cursor: pointer;
-  padding: 0.2rem 0.5rem;
-  border-radius: 0.25rem;
-  min-width: 220px;
-}
-
-.picker-model-row:hover {
-  background: color-mix(in srgb, var(--app-primary, #2A6080) 8%, transparent);
-}
-
-.picker-model-name {
-  flex: 1;
-  overflow: hidden;
-  text-overflow: ellipsis;
-  white-space: nowrap;
-}
-
-.picker-model-tags {
-  display: flex;
-  gap: 0.2rem;
-  flex-shrink: 0;
-}
-
-.tag {
-  font-size: 0.68rem;
-  background: var(--color-border, #d0d7e8);
-  border-radius: 9999px;
-  padding: 0.05rem 0.4rem;
-  color: var(--color-text-secondary, #6b7a99);
-  white-space: nowrap;
-}
-
-/* Temperature */
-.temp-row {
-  display: flex;
-  align-items: center;
-  gap: 0.75rem;
-}
-
-.temp-label {
-  font-size: 0.85rem;
-  white-space: nowrap;
-  min-width: 160px;
-}
-
-.temp-slider {
-  flex: 1;
-  accent-color: var(--app-primary, #2A6080);
-}
-
-/* Run controls */
-.run-row {
-  display: flex;
-  align-items: center;
-  gap: 0.75rem;
-}
-
-.btn-run {
-  background: var(--app-primary, #2A6080);
-  color: #fff;
-  border: none;
-  border-radius: 0.375rem;
-  padding: 0.55rem 1.25rem;
-  font-size: 0.9rem;
-  font-weight: 600;
-  cursor: pointer;
-  transition: opacity 0.15s;
-}
-
-.btn-run:disabled {
-  opacity: 0.4;
-  cursor: not-allowed;
-}
-
-.btn-cancel {
-  background: transparent;
-  border: 1px solid var(--color-border, #d0d7e8);
-  border-radius: 0.375rem;
-  padding: 0.5rem 0.9rem;
-  font-size: 0.85rem;
-  cursor: pointer;
-  color: var(--color-text-secondary, #6b7a99);
-}
-
-.btn-refresh {
-  background: transparent;
-  border: 1px solid var(--app-primary, #2A6080);
-  border-radius: 0.375rem;
-  padding: 0.35rem 0.8rem;
-  font-size: 0.85rem;
-  color: var(--app-primary, #2A6080);
-  cursor: pointer;
-  transition: background 0.15s;
-}
-
-.btn-refresh:hover:not(:disabled) {
-  background: color-mix(in srgb, var(--app-primary, #2A6080) 10%, transparent);
-}
-
-.btn-refresh:disabled { opacity: 0.5; cursor: not-allowed; }
-
-/* Run log */
-.run-log {
-  background: var(--color-bg, #f0f4fc);
-  border: 1px solid var(--color-border, #d0d7e8);
-  border-radius: 0.375rem;
-  padding: 0.75rem;
-  font-family: var(--font-mono, monospace);
-  font-size: 0.8rem;
-  max-height: 140px;
-  overflow-y: auto;
-}
-
-.log-line {
-  padding: 0.05rem 0;
-  color: var(--color-text, #1a2338);
-}
-
-/* Results */
-.results-grid {
-  display: grid;
-  grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
-  gap: 1rem;
-}
-
-.result-card {
-  border: 1px solid var(--color-border, #d0d7e8);
-  border-radius: 0.5rem;
-  overflow: hidden;
-  background: var(--color-surface, #f0f4fc);
-  display: flex;
-  flex-direction: column;
-}
-
-.result-card.result-error {
-  border-color: #fca5a5;
-}
-
-.result-header {
-  display: flex;
-  justify-content: space-between;
-  align-items: center;
-  padding: 0.5rem 0.75rem;
-  background: var(--color-surface-raised, #e4ebf5);
-  border-bottom: 1px solid var(--color-border, #d0d7e8);
-}
-
-.result-model {
-  font-size: 0.82rem;
-  font-weight: 600;
-  color: var(--color-text, #1a2338);
-  overflow: hidden;
-  text-overflow: ellipsis;
-  white-space: nowrap;
-}
-
-.result-meta {
-  font-size: 0.75rem;
-  color: var(--color-text-secondary, #6b7a99);
-  flex-shrink: 0;
-  margin-left: 0.5rem;
-}
-
-.result-err-badge {
-  background: #fee2e2;
-  color: #991b1b;
-  border-radius: 9999px;
-  padding: 0.1rem 0.45rem;
-  font-size: 0.7rem;
-  font-weight: 600;
-}
-
-.result-response, .result-error-text {
-  padding: 0.75rem;
-  font-size: 0.82rem;
-  white-space: pre-wrap;
-  word-break: break-word;
-  max-height: 280px;
-  overflow-y: auto;
-  margin: 0;
-  flex: 1;
-  color: var(--color-text, #1a2338);
-}
-
-.result-error-text {
-  color: #b91c1c;
-}
-
-/* Corrections */
-.corrections-row {
-  display: flex;
-  align-items: center;
-  gap: 0.75rem;
-  flex-wrap: wrap;
-}
-
-.btn-corrections {
-  background: var(--color-accent-warm, #b45309);
-  color: #fff;
-  border: none;
-  border-radius: 0.375rem;
-  padding: 0.55rem 1.25rem;
-  font-size: 0.9rem;
-  font-weight: 600;
-  cursor: pointer;
-  transition: opacity 0.15s;
-}
-
-.btn-corrections:disabled {
-  opacity: 0.4;
-  cursor: not-allowed;
-}
-
-.corrections-msg {
-  font-size: 0.85rem;
-}
-
-.msg-ok { color: #065f46; }
-.msg-err { color: #b91c1c; }
-</style>
--- a/web/src/views/ModelsView.vue
+++ b/web/src/views/ModelsView.vue
@ -54,18 +54,12 @@
          {{ lookupResult.description }}
        </p>

-        <div v-if="lookupResult.warning" class="compat-warning" role="alert">
-          <span class="compat-warning-icon">⚠️</span>
-          <span>{{ lookupResult.warning }}</span>
-        </div>
-
        <button
          class="btn-primary btn-add-queue"
-          :class="{ 'btn-add-queue-warn': !lookupResult.compatible }"
          :disabled="lookupResult.already_installed || lookupResult.already_queued || addingToQueue"
          @click="addToQueue"
        >
-          {{ addingToQueue ? 'Adding…' : lookupResult.compatible ? 'Add to queue' : 'Add anyway' }}
+          {{ addingToQueue ? 'Adding…' : 'Add to queue' }}
        </button>
      </div>
    </section>
@ -194,8 +188,6 @@ interface LookupResult {
  repo_id: string
  pipeline_tag: string | null
  adapter_recommendation: string | null
-  compatible: boolean
-  warning: string | null
  size: number | null
  description: string | null
  already_installed: boolean
@ -573,34 +565,10 @@ onUnmounted(() => {
  overflow: hidden;
 }

-.compat-warning {
-  display: flex;
-  align-items: flex-start;
-  gap: 0.5rem;
-  padding: 0.6rem 0.75rem;
-  border-radius: var(--radius-sm, 0.25rem);
-  background: color-mix(in srgb, var(--color-warning, #f59e0b) 12%, transparent);
-  border: 1px solid color-mix(in srgb, var(--color-warning, #f59e0b) 40%, transparent);
-  font-size: 0.82rem;
-  color: var(--color-text, #1a2338);
-  line-height: 1.45;
-}
-
-.compat-warning-icon {
-  flex-shrink: 0;
-  line-height: 1.45;
-}
-
 .btn-add-queue {
  align-self: flex-start;
 }

-.btn-add-queue-warn {
-  background: var(--color-surface-raised, #e4ebf5);
-  color: var(--color-text-secondary, #6b7a99);
-  border: 1px solid var(--color-border, #d0d7e8);
-}
-
 /* ── Model cards (queue + downloads) ── */
 .model-card {
  border: 1px solid var(--color-border, #a8b8d0);
--- a/web/src/views/SettingsView.vue
+++ b/web/src/views/SettingsView.vue
@ -115,18 +115,8 @@
      <h2 class="section-title">cf-orch Integration</h2>
      <p class="section-desc">
        Import SFT (supervised fine-tuning) candidates from cf-orch benchmark runs.
-        Connection settings fall back to environment variables
-        (<code>CF_ORCH_URL</code>, <code>CF_LICENSE_KEY</code>, <code>OLLAMA_HOST</code>)
-        when not set here.
      </p>

-      <!-- Connection status pill -->
-      <div v-if="orchConfig" class="orch-status-row">
-        <span class="orch-status-pill" :class="orchStatusClass">{{ orchStatusLabel }}</span>
-        <span v-if="orchConfig.source === 'env'" class="orch-source-note">via env vars</span>
-        <span v-else class="orch-source-note">via label_tool.yaml</span>
-      </div>
-
      <div class="field-row">
        <label class="field field-grow">
          <span>bench_results_dir</span>
@ -191,7 +181,7 @@
 </template>

 <script setup lang="ts">
-import { ref, computed, onMounted } from 'vue'
+import { ref, onMounted } from 'vue'
 import { useApiFetch } from '../composables/useApi'

 interface Account {
@ -209,27 +199,12 @@ const saveOk        = ref(true)
 const richMotion    = ref(localStorage.getItem('cf-avocet-rich-motion') !== 'false')
 const keyHints      = ref(localStorage.getItem('cf-avocet-key-hints') !== 'false')

-// SFT / cf-orch integration state
+// SFT integration state
 const benchResultsDir = ref('')
 const runs            = ref<Array<{ run_id: string; timestamp: string; candidate_count: number; already_imported: boolean }>>([])
 const importingRunId  = ref<string | null>(null)
 const importResult    = ref<{ imported: number; skipped: number } | null>(null)
 const saveStatus      = ref('')
-const orchConfig      = ref<{ coordinator_url: string; ollama_url: string; ollama_model: string; license_key_set: boolean; source: string } | null>(null)
-
-const orchStatusClass = computed(() => {
-  if (!orchConfig.value) return 'status-unknown'
-  if (orchConfig.value.coordinator_url) return 'status-connected'
-  if (orchConfig.value.ollama_url) return 'status-local'
-  return 'status-unconfigured'
-})
-
-const orchStatusLabel = computed(() => {
-  if (!orchConfig.value) return 'Unknown'
-  if (orchConfig.value.coordinator_url) return '● cf-orch coordinator'
-  if (orchConfig.value.ollama_url) return '● Ollama (local)'
-  return '○ Not configured'
-})

 async function loadSftConfig() {
  try {
@ -243,15 +218,6 @@ async function loadSftConfig() {
  }
 }

-async function loadOrchConfig() {
-  try {
-    const res = await fetch('/api/cforch/config')
-    if (res.ok) orchConfig.value = await res.json()
-  } catch {
-    // non-fatal
-  }
-}
-
 async function saveSftConfig() {
  saveStatus.value = 'Saving…'
  try {
@ -371,7 +337,6 @@ function onKeyHintsChange() {
 onMounted(() => {
  reload()
  loadSftConfig()
-  loadOrchConfig()
 })
 </script>

@ -599,31 +564,6 @@ onMounted(() => {
  width: 100%;
 }

-.orch-status-row {
-  display: flex;
-  align-items: center;
-  gap: var(--space-2);
-  margin-bottom: var(--space-3);
-}
-
-.orch-status-pill {
-  font-size: 0.8rem;
-  font-weight: 600;
-  padding: var(--space-1) var(--space-3);
-  border-radius: var(--radius-full);
-}
-
-.status-connected    { background: color-mix(in srgb, var(--color-success, #3a7a32) 12%, transparent); color: var(--color-success, #3a7a32); }
-.status-local        { background: color-mix(in srgb, var(--color-primary) 12%, transparent); color: var(--color-primary); }
-.status-unconfigured { background: var(--color-surface-alt); color: var(--color-text-muted); }
-.status-unknown      { background: var(--color-surface-alt); color: var(--color-text-muted); }
-
-.orch-source-note {
-  font-size: 0.75rem;
-  color: var(--color-text-muted);
-  font-style: italic;
-}
-
 .runs-table {
  width: 100%;
  border-collapse: collapse;
--- a/web/src/views/StatsView.vue
+++ b/web/src/views/StatsView.vue
@ -68,44 +68,6 @@
        <p class="bench-hint">Highlighted cells are the best-scoring model per metric.</p>
      </template>

-      <!-- LLM Benchmark Results -->
-      <template v-if="llmResults.length > 0">
-        <h2 class="section-title">🤖 LLM Benchmark</h2>
-        <div class="bench-table-wrap">
-          <table class="bench-table">
-            <thead>
-              <tr>
-                <th class="bt-model-col">Model</th>
-                <th class="bt-metric-col">overall</th>
-                <th
-                  v-for="col in llmTaskTypeCols"
-                  :key="col"
-                  class="bt-metric-col"
-                >{{ col }}</th>
-                <th class="bt-metric-col">tok/s</th>
-              </tr>
-            </thead>
-            <tbody>
-              <tr v-for="row in llmResults" :key="row.model_id">
-                <td class="bt-model-cell" :title="row.model_id">{{ row.model_name }}</td>
-                <td
-                  class="bt-metric-cell"
-                  :class="{ 'bt-best': llmBestByCol['overall'] === row.model_id }"
-                >{{ llmPct(row.avg_quality_score) }}</td>
-                <td
-                  v-for="col in llmTaskTypeCols"
-                  :key="col"
-                  class="bt-metric-cell"
-                  :class="{ 'bt-best': llmBestByCol[col] === row.model_id }"
-                >{{ row.quality_by_task_type[col] != null ? llmPct(row.quality_by_task_type[col]) : '—' }}</td>
-                <td class="bt-metric-cell">{{ row.avg_tokens_per_sec.toFixed(1) }}</td>
-              </tr>
-            </tbody>
-          </table>
-        </div>
-        <p class="bench-hint">Run LLM Eval on the Benchmark tab to refresh. Highlighted = best per column.</p>
-      </template>
-
      <div class="file-info">
        <span class="file-path">Score file: <code>data/email_score.jsonl</code></span>
        <span class="file-size">{{ fileSizeLabel }}</span>
@ -132,18 +94,6 @@ interface BenchmarkModelResult {
  [key: string]: number | undefined
 }

-interface LlmModelResult {
-  model_name: string
-  model_id: string
-  node_id: string
-  avg_tokens_per_sec: number
-  avg_completion_ms: number
-  avg_quality_score: number
-  finetune_candidates: number
-  error_count: number
-  quality_by_task_type: Record<string, number>
-}
-
 interface StatsResponse {
  total: number
  counts: Record<string, number>
@ -235,49 +185,6 @@ function formatMetric(v: number | undefined): string {
  return `${v.toFixed(1)}%`
 }

-// ── LLM Benchmark results ────────────────────────────────────────────────────
-const llmResults = ref<LlmModelResult[]>([])
-
-const llmTaskTypeCols = computed(() => {
-  const types = new Set<string>()
-  for (const r of llmResults.value) {
-    for (const k of Object.keys(r.quality_by_task_type)) types.add(k)
-  }
-  return [...types].sort()
-})
-
-const llmBestByCol = computed((): Record<string, string> => {
-  const best: Record<string, string> = {}
-  if (llmResults.value.length === 0) return best
-
-  let bestId = '', bestVal = -Infinity
-  for (const r of llmResults.value) {
-    if (r.avg_quality_score > bestVal) { bestVal = r.avg_quality_score; bestId = r.model_id }
-  }
-  best['overall'] = bestId
-
-  for (const col of llmTaskTypeCols.value) {
-    bestId = ''; bestVal = -Infinity
-    for (const r of llmResults.value) {
-      const v = r.quality_by_task_type[col]
-      if (v != null && v > bestVal) { bestVal = v; bestId = r.model_id }
-    }
-    best[col] = bestId
-  }
-  return best
-})
-
-function llmPct(v: number): string {
-  return `${(v * 100).toFixed(1)}%`
-}
-
-async function loadLlmResults() {
-  const { data } = await useApiFetch<LlmModelResult[]>('/api/cforch/results')
-  if (Array.isArray(data) && data.length > 0) {
-    llmResults.value = data
-  }
-}
-
 async function load() {
  loading.value = true
  error.value   = ''
@ -290,10 +197,7 @@ async function load() {
  }
 }

-onMounted(() => {
-  load()
-  loadLlmResults()
-})
+onMounted(load)
 </script>

 <style scoped>