feat: Imitate tab — pull CF product samples, compare LLM responses

Backend (app/imitate.py): - GET /api/imitate/products — reads imitate: config, checks online status - GET /api/imitate/products/{id}/sample — fetches real item from product API - GET /api/imitate/run (SSE) — streams ollama responses for selected models - POST /api/imitate/push-corrections — queues results in SFT corrections JSONL Frontend (ImitateView.vue): - Step 1: product picker grid (online/offline status, icon from config) - Step 2: raw sample preview + editable prompt textarea - Step 3: ollama model multi-select, temperature slider, SSE run with live log - Step 4: response cards side by side, push to Corrections button Wiring: - app/api.py: include imitate_router at /api/imitate - web/src/router: /imitate route + lazy import - AppSidebar: Imitate nav entry (mirror icon) - config/label_tool.yaml.example: imitate: section with peregrine example - 16 unit tests (100% passing) Also: BenchmarkView.vue Compare panel — side-by-side run diff for bench results
2026-04-09 20:04:45 -07:00 · 2026-04-09 20:04:45 -07:00 · 3299c0e23a
commit 3299c0e23a
parent dc246df42d
9 changed files with 1865 additions and 4 deletions
--- a/app/api.py
+++ b/app/api.py
@ -152,6 +152,9 @@ app.include_router(models_router, prefix="/api/models")
 from app.cforch import router as cforch_router
 app.include_router(cforch_router, prefix="/api/cforch")

+from app.imitate import router as imitate_router
+app.include_router(imitate_router, prefix="/api/imitate")
+
 # In-memory last-action store (single user, local tool — in-memory is fine)
 _last_action: dict | None = None

--- a/app/cforch.py
+++ b/app/cforch.py
@ -134,6 +134,8 @@ def get_tasks() -> dict:
            "id":     t.get("id", ""),
            "name":   t.get("name", ""),
            "type":   t.get("type", ""),
+            "prompt": (t.get("prompt") or "").strip(),
+            "system": (t.get("system") or "").strip(),
        })
        task_type = t.get("type", "")
        if task_type and task_type not in types_set:
--- a/app/imitate.py
+++ b/app/imitate.py
@ -0,0 +1,351 @@
+"""Avocet — Imitate tab API.
+
+Fetches real samples from sibling CF product APIs, sends them through selected
+local LLMs (ollama), and streams responses back to the UI. Results can be
+pushed into the SFT corrections queue for human review.
+
+All endpoints registered on `router`. api.py includes this with prefix="/api/imitate".
+
+Module-level globals follow the same testability pattern as cforch.py and sft.py:
+override _CONFIG_DIR and _DATA_DIR via set_config_dir() / set_data_dir() in tests.
+"""
+from __future__ import annotations
+
+import json
+import logging
+import time
+import uuid
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+from urllib.error import URLError
+from urllib.request import Request, urlopen
+
+import yaml
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+
+from app.utils import append_jsonl
+
+logger = logging.getLogger(__name__)
+
+_ROOT = Path(__file__).parent.parent
+_CONFIG_DIR: Path | None = None
+_DATA_DIR: Path = _ROOT / "data"
+
+router = APIRouter()
+
+
+# ── Testability seams ──────────────────────────────────────────────────────────
+
+def set_config_dir(path: Path | None) -> None:
+    global _CONFIG_DIR
+    _CONFIG_DIR = path
+
+
+def set_data_dir(path: Path) -> None:
+    global _DATA_DIR
+    _DATA_DIR = path
+
+
+# ── Internal helpers ───────────────────────────────────────────────────────────
+
+def _config_file() -> Path:
+    if _CONFIG_DIR is not None:
+        return _CONFIG_DIR / "label_tool.yaml"
+    return _ROOT / "config" / "label_tool.yaml"
+
+
+def _load_imitate_config() -> dict:
+    """Read label_tool.yaml and return the imitate sub-dict (or {} if absent)."""
+    f = _config_file()
+    if not f.exists():
+        return {}
+    try:
+        raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
+    except yaml.YAMLError as exc:
+        logger.warning("Failed to parse imitate config %s: %s", f, exc)
+        return {}
+    return raw.get("imitate", {}) or {}
+
+
+def _load_cforch_config() -> dict:
+    """Read cforch section for ollama_url fallback."""
+    f = _config_file()
+    if not f.exists():
+        return {}
+    try:
+        raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
+    except yaml.YAMLError as exc:
+        return {}
+    return raw.get("cforch", {}) or {}
+
+
+def _ollama_url(cfg: dict) -> str:
+    cforch = _load_cforch_config()
+    return cfg.get("ollama_url") or cforch.get("ollama_url") or "http://localhost:11434"
+
+
+def _http_get_json(url: str, timeout: int = 5) -> Any:
+    """Fetch JSON from url; raise URLError on failure."""
+    req = Request(url, headers={"Accept": "application/json"})
+    with urlopen(req, timeout=timeout) as resp:
+        return json.loads(resp.read().decode("utf-8"))
+
+
+def _is_online(base_url: str) -> bool:
+    """Return True if the product's /api/health endpoint responds OK."""
+    try:
+        data = _http_get_json(f"{base_url.rstrip('/')}/api/health", timeout=2)
+        return bool(data)
+    except Exception:
+        return False
+
+
+def _extract_sample(
+    raw: Any, text_fields: list[str], sample_index: int = 0
+) -> dict[str, Any]:
+    """Pull one item from a list or dict response and extract text_fields."""
+    item: dict[str, Any]
+    if isinstance(raw, list):
+        if not raw:
+            return {}
+        item = raw[min(sample_index, len(raw) - 1)]
+    elif isinstance(raw, dict):
+        # may be {items: [...]} or the item itself
+        for key in ("items", "results", "data", "jobs", "listings", "pantry"):
+            if key in raw and isinstance(raw[key], list):
+                lst = raw[key]
+                item = lst[min(sample_index, len(lst) - 1)] if lst else {}
+                break
+        else:
+            item = raw
+    else:
+        return {}
+
+    parts = []
+    for field in text_fields:
+        val = item.get(field)
+        if val and str(val).strip():
+            parts.append(f"**{field}**: {val}")
+    return {"item": item, "text": "\n\n".join(parts)}
+
+
+def _candidates_file() -> Path:
+    return _DATA_DIR / "sft_candidates.jsonl"
+
+
+def _sse(data: dict) -> str:
+    return f"data: {json.dumps(data)}\n\n"
+
+
+def _run_ollama_streaming(
+    ollama_base: str,
+    model_id: str,
+    prompt: str,
+    temperature: float,
+) -> tuple[str, int]:
+    """Call ollama /api/generate with stream=True; return (full_response, elapsed_ms).
+
+    Blocks until the model finishes; yields nothing — streaming is handled by
+    the SSE generator in run_imitate().
+    """
+    url = f"{ollama_base.rstrip('/')}/api/generate"
+    payload = json.dumps({
+        "model": model_id,
+        "prompt": prompt,
+        "stream": False,
+        "options": {"temperature": temperature},
+    }).encode("utf-8")
+    req = Request(url, data=payload, method="POST",
+                  headers={"Content-Type": "application/json"})
+    t0 = time.time()
+    try:
+        with urlopen(req, timeout=120) as resp:
+            body = json.loads(resp.read().decode("utf-8"))
+        elapsed = int((time.time() - t0) * 1000)
+        return body.get("response", ""), elapsed
+    except Exception as exc:
+        elapsed = int((time.time() - t0) * 1000)
+        raise RuntimeError(str(exc)) from exc
+
+
+# ── GET /products ──────────────────────────────────────────────────────────────
+
+@router.get("/products")
+def get_products() -> dict:
+    """List configured CF products with live online status."""
+    cfg = _load_imitate_config()
+    products_raw = cfg.get("products", []) or []
+    products = []
+    for p in products_raw:
+        if not isinstance(p, dict):
+            continue
+        base_url = p.get("base_url", "")
+        products.append({
+            "id":          p.get("id", ""),
+            "name":        p.get("name", ""),
+            "icon":        p.get("icon", "📦"),
+            "description": p.get("description", ""),
+            "base_url":    base_url,
+            "online":      _is_online(base_url) if base_url else False,
+        })
+    return {"products": products}
+
+
+# ── GET /products/{product_id}/sample ─────────────────────────────────────────
+
+@router.get("/products/{product_id}/sample")
+def get_sample(product_id: str, index: int = 0) -> dict:
+    """Fetch a real sample from the given product's API."""
+    cfg = _load_imitate_config()
+    products_raw = cfg.get("products", []) or []
+
+    product: dict | None = None
+    for p in products_raw:
+        if isinstance(p, dict) and p.get("id") == product_id:
+            product = p
+            break
+
+    if product is None:
+        raise HTTPException(404, f"Product '{product_id}' not in config")
+
+    base_url = product.get("base_url", "").rstrip("/")
+    endpoint = product.get("sample_endpoint", "")
+    if not base_url or not endpoint:
+        raise HTTPException(422, "Product missing base_url or sample_endpoint")
+
+    url = f"{base_url}{endpoint}"
+    try:
+        raw = _http_get_json(url, timeout=5)
+    except URLError as exc:
+        raise HTTPException(503, f"Product API unreachable: {exc}") from exc
+    except Exception as exc:
+        raise HTTPException(502, f"Bad response from product API: {exc}") from exc
+
+    text_fields = product.get("text_fields", []) or []
+    extracted = _extract_sample(raw, text_fields, index)
+    if not extracted:
+        raise HTTPException(404, "No sample items returned by product API")
+
+    prompt_template = product.get("prompt_template", "{text}")
+    prompt = prompt_template.replace("{text}", extracted["text"])
+
+    return {
+        "product_id":    product_id,
+        "sample_index":  index,
+        "text":          extracted["text"],
+        "prompt":        prompt,
+        "raw_item":      extracted.get("item", {}),
+    }
+
+
+# ── GET /run (SSE) ─────────────────────────────────────────────────────────────
+
+@router.get("/run")
+def run_imitate(
+    prompt: str = "",
+    model_ids: str = "",      # comma-separated ollama model IDs
+    temperature: float = 0.7,
+    product_id: str = "",
+) -> StreamingResponse:
+    """Run a prompt through selected ollama models and stream results as SSE."""
+
+    if not prompt.strip():
+        raise HTTPException(422, "prompt is required")
+
+    ids = [m.strip() for m in model_ids.split(",") if m.strip()]
+    if not ids:
+        raise HTTPException(422, "model_ids is required")
+
+    cfg = _load_imitate_config()
+    ollama_base = _ollama_url(cfg)
+
+    def generate():
+        results: list[dict] = []
+        yield _sse({"type": "start", "total_models": len(ids)})
+
+        for model_id in ids:
+            yield _sse({"type": "model_start", "model": model_id})
+            try:
+                response, elapsed_ms = _run_ollama_streaming(
+                    ollama_base, model_id, prompt, temperature
+                )
+                result = {
+                    "model":      model_id,
+                    "response":   response,
+                    "elapsed_ms": elapsed_ms,
+                    "error":      None,
+                }
+            except Exception as exc:
+                result = {
+                    "model":      model_id,
+                    "response":   "",
+                    "elapsed_ms": 0,
+                    "error":      str(exc),
+                }
+            results.append(result)
+            yield _sse({"type": "model_done", **result})
+
+        yield _sse({"type": "complete", "results": results})
+
+    return StreamingResponse(
+        generate(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "X-Accel-Buffering": "no",
+        },
+    )
+
+
+# ── POST /push-corrections ─────────────────────────────────────────────────────
+
+class ImitateResult(BaseModel):
+    model: str
+    response: str
+    elapsed_ms: int
+    error: str | None = None
+
+
+class PushCorrectionsRequest(BaseModel):
+    product_id: str
+    prompt: str
+    results: list[ImitateResult]
+
+
+@router.post("/push-corrections")
+def push_corrections(req: PushCorrectionsRequest) -> dict:
+    """Append imitate results to sft_candidates.jsonl for human review."""
+    if not req.prompt.strip():
+        raise HTTPException(422, "prompt is required")
+    if not req.results:
+        raise HTTPException(422, "results list is empty")
+
+    ts = datetime.now(timezone.utc).isoformat()
+    records = []
+    for r in req.results:
+        if r.error or not r.response.strip():
+            continue
+        records.append({
+            "id":             str(uuid.uuid4()),
+            "source":         "imitate",
+            "product_id":     req.product_id,
+            "prompt_messages": [{"role": "user", "content": req.prompt}],
+            "model_response": r.response,
+            "model_id":       r.model,
+            "elapsed_ms":     r.elapsed_ms,
+            "status":         "pending",
+            "created_at":     ts,
+        })
+
+    if not records:
+        raise HTTPException(422, "No non-error results to push")
+
+    dest = _candidates_file()
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    for record in records:
+        append_jsonl(dest, record)
+
+    return {"pushed": len(records)}
--- a/config/label_tool.yaml.example
+++ b/config/label_tool.yaml.example
@ -46,3 +46,46 @@ cforch:
  # license_key:     CFG-AVCT-xxxx-xxxx-xxxx
  # ollama_url:      http://localhost:11434
  # ollama_model:    llama3.2:3b
+
+# Imitate tab — pull real samples from sibling CF product APIs and run them
+# through local LLMs to build a corrections dataset.
+# ollama_url defaults to cforch.ollama_url if omitted here.
+imitate:
+  ollama_url: http://localhost:11434   # optional — falls back to cforch.ollama_url
+
+  products:
+    - id: peregrine
+      name: Peregrine
+      icon: "🦅"
+      description: Job search assistant
+      base_url: http://localhost:8502
+      sample_endpoint: /api/jobs
+      text_fields: [title, description]
+      prompt_template: "Analyze this job listing and identify key requirements:\n\n{text}"
+
+    - id: kiwi
+      name: Kiwi
+      icon: "🥝"
+      description: Pantry tracker
+      base_url: http://localhost:8511
+      sample_endpoint: /api/inventory
+      text_fields: [name, category, notes]
+      prompt_template: "Describe this pantry item and estimate how best to use it:\n\n{text}"
+
+    - id: snipe
+      name: Snipe
+      icon: "🎯"
+      description: eBay trust scoring
+      base_url: http://localhost:8509
+      sample_endpoint: /api/listings
+      text_fields: [title, description, seller_info]
+      prompt_template: "Evaluate the trustworthiness of this listing and flag any red flags:\n\n{text}"
+
+    - id: osprey
+      name: Osprey
+      icon: "📞"
+      description: Gov't hold-line automation
+      base_url: http://localhost:8520
+      sample_endpoint: /api/calls/recent
+      text_fields: [agency, issue, notes]
+      prompt_template: "Draft a concise summary of this government call record:\n\n{text}"
--- a/tests/test_imitate.py
+++ b/tests/test_imitate.py
@ -0,0 +1,242 @@
+"""Tests for app/imitate.py — product registry, sample extraction, corrections push."""
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+from fastapi.testclient import TestClient
+
+from app.api import app
+from app import imitate as _imitate_module
+
+
+# ── Fixtures ───────────────────────────────────────────────────────────────────
+
+@pytest.fixture(autouse=True)
+def reset_module_globals(tmp_path):
+    """Reset module-level config + data dir globals after each test."""
+    orig_cfg  = _imitate_module._CONFIG_DIR
+    orig_data = _imitate_module._DATA_DIR
+    yield
+    _imitate_module._CONFIG_DIR = orig_cfg
+    _imitate_module._DATA_DIR   = orig_data
+
+
+@pytest.fixture()
+def config_dir(tmp_path) -> Path:
+    _imitate_module.set_config_dir(tmp_path)
+    return tmp_path
+
+
+@pytest.fixture()
+def data_dir(tmp_path) -> Path:
+    _imitate_module.set_data_dir(tmp_path)
+    return tmp_path
+
+
+@pytest.fixture()
+def cfg_with_products(config_dir: Path) -> Path:
+    """Write a label_tool.yaml with two products."""
+    (config_dir / "label_tool.yaml").write_text(
+        """
+imitate:
+  ollama_url: http://localhost:11434
+  products:
+    - id: peregrine
+      name: Peregrine
+      icon: "🦅"
+      description: Job search assistant
+      base_url: http://peregrine.local
+      sample_endpoint: /api/jobs
+      text_fields: [title, description]
+      prompt_template: "Analyze: {text}"
+    - id: kiwi
+      name: Kiwi
+      icon: "🥝"
+      description: Pantry tracker
+      base_url: http://kiwi.local
+      sample_endpoint: /api/inventory
+      text_fields: [name, notes]
+      prompt_template: "Describe: {text}"
+"""
+    )
+    return config_dir
+
+
+@pytest.fixture()
+def client() -> TestClient:
+    return TestClient(app, raise_server_exceptions=True)
+
+
+# ── GET /products ──────────────────────────────────────────────────────────────
+
+def test_products_empty_when_no_config(config_dir, client):
+    """Returns empty list when label_tool.yaml has no imitate section."""
+    (config_dir / "label_tool.yaml").write_text("accounts: []\n")
+    resp = client.get("/api/imitate/products")
+    assert resp.status_code == 200
+    assert resp.json()["products"] == []
+
+
+def test_products_listed(cfg_with_products, client):
+    """All configured products are returned with expected fields."""
+    with patch.object(_imitate_module, "_is_online", return_value=True):
+        resp = client.get("/api/imitate/products")
+    assert resp.status_code == 200
+    products = resp.json()["products"]
+    assert len(products) == 2
+    ids = {p["id"] for p in products}
+    assert ids == {"peregrine", "kiwi"}
+    peregrine = next(p for p in products if p["id"] == "peregrine")
+    assert peregrine["name"] == "Peregrine"
+    assert peregrine["icon"] == "🦅"
+    assert peregrine["online"] is True
+
+
+def test_products_offline_when_unreachable(cfg_with_products, client):
+    """Products with unreachable base_url are marked offline."""
+    with patch.object(_imitate_module, "_is_online", return_value=False):
+        resp = client.get("/api/imitate/products")
+    assert all(not p["online"] for p in resp.json()["products"])
+
+
+# ── GET /products/{id}/sample ─────────────────────────────────────────────────
+
+def test_sample_unknown_product(cfg_with_products, client):
+    """Returns 404 for a product id not in config."""
+    resp = client.get("/api/imitate/products/nonexistent/sample")
+    assert resp.status_code == 404
+
+
+def test_sample_fetched_from_list(cfg_with_products, client):
+    """Extracts first item from a list API response."""
+    fake_api = [
+        {"title": "Engineer", "description": "Build things"},
+        {"title": "Other",    "description": "Ignore me"},
+    ]
+    with patch.object(_imitate_module, "_http_get_json", return_value=fake_api):
+        resp = client.get("/api/imitate/products/peregrine/sample")
+    assert resp.status_code == 200
+    body = resp.json()
+    assert "Engineer" in body["text"]
+    assert "Build things" in body["text"]
+    assert "Analyze:" in body["prompt"]
+
+
+def test_sample_fetched_from_dict_with_items_key(cfg_with_products, client):
+    """Extracts from a wrapper dict with a recognised list key."""
+    fake_api = {"items": [{"title": "Wrapped Job", "description": "In a wrapper"}]}
+    with patch.object(_imitate_module, "_http_get_json", return_value=fake_api):
+        resp = client.get("/api/imitate/products/peregrine/sample")
+    assert resp.status_code == 200
+    assert "Wrapped Job" in resp.json()["text"]
+
+
+def test_sample_503_when_api_unreachable(cfg_with_products, client):
+    """Returns 503 when the product API is not reachable."""
+    from urllib.error import URLError
+    with patch.object(_imitate_module, "_http_get_json", side_effect=URLError("refused")):
+        resp = client.get("/api/imitate/products/peregrine/sample")
+    assert resp.status_code == 503
+
+
+def test_sample_404_on_empty_list(cfg_with_products, client):
+    """Returns 404 when product API returns an empty list."""
+    with patch.object(_imitate_module, "_http_get_json", return_value=[]):
+        resp = client.get("/api/imitate/products/peregrine/sample")
+    assert resp.status_code == 404
+
+
+# ── POST /push-corrections ─────────────────────────────────────────────────────
+
+def test_push_corrections_appends_jsonl(cfg_with_products, data_dir, client):
+    """Successful push writes records to sft_candidates.jsonl."""
+    payload = {
+        "product_id": "peregrine",
+        "prompt":     "Analyze this job:",
+        "results": [
+            {"model": "qwen2.5:0.5b", "response": "It's a good job.", "elapsed_ms": 800, "error": None},
+            {"model": "llama3.1:8b",  "response": "Strong candidate.", "elapsed_ms": 1500, "error": None},
+        ],
+    }
+    resp = client.post("/api/imitate/push-corrections", json=payload)
+    assert resp.status_code == 200
+    assert resp.json()["pushed"] == 2
+
+    candidates = (data_dir / "sft_candidates.jsonl").read_text().splitlines()
+    assert len(candidates) == 2
+    for line in candidates:
+        record = json.loads(line)
+        assert record["source"] == "imitate"
+        assert record["product_id"] == "peregrine"
+        assert record["status"] == "pending"
+        assert record["prompt_messages"][0]["role"] == "user"
+
+
+def test_push_corrections_skips_errors(cfg_with_products, data_dir, client):
+    """Results with errors are not written to the corrections file."""
+    payload = {
+        "product_id": "peregrine",
+        "prompt":     "Analyze:",
+        "results": [
+            {"model": "good-model",  "response": "Good answer.", "elapsed_ms": 500, "error": None},
+            {"model": "bad-model",   "response": "",             "elapsed_ms": 0,   "error": "connection refused"},
+        ],
+    }
+    resp = client.post("/api/imitate/push-corrections", json=payload)
+    assert resp.status_code == 200
+    assert resp.json()["pushed"] == 1
+
+
+def test_push_corrections_empty_prompt_422(cfg_with_products, data_dir, client):
+    """Empty prompt returns 422."""
+    payload = {
+        "product_id": "peregrine",
+        "prompt":     "   ",
+        "results": [{"model": "m", "response": "r", "elapsed_ms": 1, "error": None}],
+    }
+    resp = client.post("/api/imitate/push-corrections", json=payload)
+    assert resp.status_code == 422
+
+
+def test_push_corrections_all_errors_422(cfg_with_products, data_dir, client):
+    """422 when every result has an error (nothing to push)."""
+    payload = {
+        "product_id": "peregrine",
+        "prompt":     "Analyze:",
+        "results": [
+            {"model": "m", "response": "", "elapsed_ms": 0, "error": "timed out"},
+        ],
+    }
+    resp = client.post("/api/imitate/push-corrections", json=payload)
+    assert resp.status_code == 422
+
+
+# ── _extract_sample helper ─────────────────────────────────────────────────────
+
+def test_extract_sample_list():
+    result = _imitate_module._extract_sample(
+        [{"title": "A", "description": "B"}],
+        text_fields=["title", "description"],
+    )
+    assert "A" in result["text"]
+    assert "B" in result["text"]
+
+
+def test_extract_sample_empty_list():
+    result = _imitate_module._extract_sample([], text_fields=["title"])
+    assert result == {}
+
+
+def test_extract_sample_respects_index():
+    items = [{"title": "First"}, {"title": "Second"}]
+    result = _imitate_module._extract_sample(items, ["title"], sample_index=1)
+    assert "Second" in result["text"]
+
+
+def test_extract_sample_clamps_index():
+    items = [{"title": "Only"}]
+    result = _imitate_module._extract_sample(items, ["title"], sample_index=99)
+    assert "Only" in result["text"]
--- a/web/src/components/AppSidebar.vue
+++ b/web/src/components/AppSidebar.vue
@ -67,6 +67,7 @@ const navItems = [
  { path: '/stats',     icon: '📊', label: 'Stats'     },
  { path: '/benchmark',   icon: '🏁', label: 'Benchmark'   },
  { path: '/models',      icon: '🤗', label: 'Models'      },
+  { path: '/imitate',     icon: '🪞', label: 'Imitate'     },
  { path: '/corrections', icon: '✍️', label: 'Corrections' },
  { path: '/settings',    icon: '⚙️', label: 'Settings'    },
 ]
--- a/web/src/router/index.ts
+++ b/web/src/router/index.ts
@ -8,6 +8,7 @@ const BenchmarkView = () => import('../views/BenchmarkView.vue')
 const SettingsView    = () => import('../views/SettingsView.vue')
 const CorrectionsView = () => import('../views/CorrectionsView.vue')
 const ModelsView      = () => import('../views/ModelsView.vue')
+const ImitateView     = () => import('../views/ImitateView.vue')

 export const router = createRouter({
  history: createWebHashHistory(),
@ -17,6 +18,7 @@ export const router = createRouter({
    { path: '/stats',       component: StatsView,       meta: { title: 'Stats' } },
    { path: '/benchmark',   component: BenchmarkView,   meta: { title: 'Benchmark' } },
    { path: '/models',      component: ModelsView,      meta: { title: 'Models' } },
+    { path: '/imitate',     component: ImitateView,     meta: { title: 'Imitate' }     },
    { path: '/corrections', component: CorrectionsView, meta: { title: 'Corrections' } },
    { path: '/settings',    component: SettingsView,    meta: { title: 'Settings' } },
  ],
--- a/web/src/views/BenchmarkView.vue
+++ b/web/src/views/BenchmarkView.vue
@ -38,6 +38,11 @@
        :class="{ active: benchMode === 'llm' }"
        @click="benchMode = 'llm'"
      >🤖 LLM Eval</button>
+      <button
+        class="mode-btn"
+        :class="{ active: benchMode === 'compare' }"
+        @click="benchMode = 'compare'; ensureCompareReady()"
+      >⚖️ Compare</button>
    </div>

    <!-- ── LLM Eval panel ─────────────────────────────────────── -->
@ -214,6 +219,121 @@

    </template>

+    <!-- ── Compare panel ─────────────────────────────────────── -->
+    <template v-if="benchMode === 'compare'">
+
+      <!-- Task selector (radio — one at a time) -->
+      <details class="model-picker" open>
+        <summary class="picker-summary">
+          <span class="picker-title">📋 Pick a Task</span>
+          <span class="picker-badge">{{ cmpSelectedTask ? cmpSelectedTask.name : 'None selected' }}</span>
+        </summary>
+        <div class="picker-body">
+          <div v-if="llmTasksLoading" class="picker-loading">Loading tasks…</div>
+          <div v-else-if="llmTasks.length === 0" class="picker-empty">No tasks found — check cforch config.</div>
+          <template v-else>
+            <div v-for="(tasks, type) in llmTasksByType" :key="type" class="picker-category">
+              <span class="picker-cat-name" style="font-weight:600; padding: 0.35rem 0; display:block">{{ type }}</span>
+              <div class="picker-model-list">
+                <label v-for="t in tasks" :key="t.id" class="picker-model-row">
+                  <input
+                    type="radio"
+                    name="cmp-task"
+                    :checked="cmpSelectedTask?.id === t.id"
+                    @change="selectCmpTask(t)"
+                  />
+                  <span class="picker-model-name" :title="t.name">{{ t.name }}</span>
+                </label>
+              </div>
+            </div>
+          </template>
+        </div>
+      </details>
+
+      <!-- Prompt editor -->
+      <template v-if="cmpSelectedTask">
+        <label class="prompt-label" for="cmp-prompt">Prompt</label>
+        <textarea
+          id="cmp-prompt"
+          class="cmp-prompt-editor"
+          v-model="cmpPrompt"
+          rows="6"
+        />
+
+        <!-- Model picker (ollama only) -->
+        <details class="model-picker" open>
+          <summary class="picker-summary">
+            <span class="picker-title">🤖 Ollama Models</span>
+            <span class="picker-badge">{{ cmpSelectedModels.size }} / {{ ollamaLlmModels.length }}</span>
+          </summary>
+          <div class="picker-body">
+            <label class="picker-cat-header">
+              <input
+                type="checkbox"
+                :checked="cmpSelectedModels.size === ollamaLlmModels.length"
+                :indeterminate="cmpSelectedModels.size > 0 && cmpSelectedModels.size < ollamaLlmModels.length"
+                @change="toggleAllCmpModels(($event.target as HTMLInputElement).checked)"
+              />
+              <span class="picker-cat-name">All ollama models</span>
+            </label>
+            <div class="picker-model-list">
+              <label v-for="m in ollamaLlmModels" :key="m.id" class="picker-model-row">
+                <input
+                  type="checkbox"
+                  :checked="cmpSelectedModels.has(m.id)"
+                  @change="toggleCmpModel(m.id, ($event.target as HTMLInputElement).checked)"
+                />
+                <span class="picker-model-name">{{ m.name }}</span>
+                <span class="picker-adapter-type">{{ m.tags.slice(0,3).join(', ') }}</span>
+              </label>
+            </div>
+          </div>
+        </details>
+
+        <!-- Run controls -->
+        <div class="llm-run-controls">
+          <button
+            class="btn-run"
+            :disabled="cmpRunning || cmpSelectedModels.size === 0"
+            @click="startCompare"
+          >{{ cmpRunning ? '⏳ Running…' : '⚖️ Compare Models' }}</button>
+          <button v-if="cmpRunning" class="btn-cancel" @click="cancelCompare">✕ Cancel</button>
+        </div>
+
+        <!-- Progress log -->
+        <div v-if="cmpLog.length > 0" class="run-log">
+          <div class="log-lines">
+            <div v-for="(line, i) in cmpLog" :key="i" class="log-line">{{ line }}</div>
+          </div>
+        </div>
+
+        <!-- Side-by-side results -->
+        <template v-if="cmpResults.length > 0">
+          <h2 class="chart-title">Side-by-Side Responses</h2>
+          <div class="cmp-results-grid">
+            <div
+              v-for="r in cmpResults"
+              :key="r.model"
+              class="cmp-result-card"
+              :class="{ 'cmp-error': !!r.error }"
+            >
+              <div class="cmp-result-header">
+                <span class="cmp-model-name">{{ r.model }}</span>
+                <span class="cmp-meta">
+                  <template v-if="r.error"><span class="err-badge">error</span></template>
+                  <template v-else>{{ (r.elapsed_ms / 1000).toFixed(1) }}s</template>
+                </span>
+              </div>
+              <pre v-if="r.error" class="cmp-error-text">{{ r.error }}</pre>
+              <pre v-else class="cmp-response">{{ r.response }}</pre>
+            </div>
+          </div>
+        </template>
+      </template>
+
+    </template>
+    <!-- ── /Compare panel ─────────────────────────────────────── -->
+
    <!-- ── Classifier panel ──────────────────────────────────── -->
    <template v-if="benchMode === 'classifier'">

@ -480,6 +600,8 @@ interface CfOrchTask {
  id: string
  name: string
  type: string
+  prompt: string
+  system: string
 }

 interface CfOrchModel {
@ -555,7 +677,7 @@ const ftLogEl          = ref<HTMLElement | null>(null)
 const runCancelled = ref(false)

 // ── Mode toggle ───────────────────────────────────────────────────────────────
-const benchMode = ref<'classifier' | 'llm'>('classifier')
+const benchMode = ref<'classifier' | 'llm' | 'compare'>('classifier')

 // ── LLM Eval state ───────────────────────────────────────────────────────────
 const llmTasks        = ref<CfOrchTask[]>([])
@ -574,6 +696,108 @@ const llmEventSource  = ref<EventSource | null>(null)
 const llmLogEl        = ref<HTMLElement | null>(null)
 const ftCancelled  = ref(false)

+// ── Compare mode state ────────────────────────────────────────────────────────
+interface CmpResult {
+  model: string
+  response: string
+  elapsed_ms: number
+  error: string | null
+}
+
+const cmpSelectedTask    = ref<CfOrchTask & { prompt: string; system: string } | null>(null)
+const cmpPrompt          = ref('')
+const cmpSelectedModels  = ref<Set<string>>(new Set())
+const cmpRunning         = ref(false)
+const cmpLog             = ref<string[]>([])
+const cmpResults         = ref<CmpResult[]>([])
+const cmpEventSource     = ref<EventSource | null>(null)
+
+const ollamaLlmModels = computed(() =>
+  llmModels.value.filter(m => m.service === 'ollama')
+)
+
+function selectCmpTask(t: CfOrchTask & { prompt: string; system: string }) {
+  cmpSelectedTask.value = t
+  cmpPrompt.value = t.prompt || ''
+  cmpResults.value = []
+  cmpLog.value = []
+}
+
+function toggleCmpModel(id: string, checked: boolean) {
+  const next = new Set(cmpSelectedModels.value)
+  checked ? next.add(id) : next.delete(id)
+  cmpSelectedModels.value = next
+}
+
+function toggleAllCmpModels(checked: boolean) {
+  cmpSelectedModels.value = checked
+    ? new Set(ollamaLlmModels.value.map(m => m.id))
+    : new Set()
+}
+
+function ensureCompareReady() {
+  // Trigger task + model loads if not already done (shares llmTasks/llmModels)
+  if (llmTasks.value.length === 0) loadLlmTasks()
+  if (llmModels.value.length === 0) loadLlmModels()
+  // Pre-select all ollama models for compare mode
+  if (cmpSelectedModels.value.size === 0 && ollamaLlmModels.value.length > 0) {
+    cmpSelectedModels.value = new Set(ollamaLlmModels.value.map(m => m.id))
+  }
+}
+
+function startCompare() {
+  if (!cmpPrompt.value.trim() || cmpSelectedModels.value.size === 0) return
+  cmpRunning.value = true
+  cmpResults.value = []
+  cmpLog.value = []
+
+  const params = new URLSearchParams({
+    prompt:    cmpPrompt.value,
+    model_ids: [...cmpSelectedModels.value].join(','),
+  })
+
+  const es = new EventSource(`/api/imitate/run?${params}`)
+  cmpEventSource.value = es
+
+  es.onmessage = (event: MessageEvent) => {
+    try {
+      const msg = JSON.parse(event.data)
+      if (msg.type === 'start') {
+        cmpLog.value.push(`Comparing ${msg.total_models} models…`)
+      } else if (msg.type === 'model_start') {
+        cmpLog.value.push(`→ ${msg.model}…`)
+      } else if (msg.type === 'model_done') {
+        const status = msg.error
+          ? `✕ ${msg.error}`
+          : `✓ ${(msg.elapsed_ms / 1000).toFixed(1)}s`
+        cmpLog.value.push(`  ${msg.model}: ${status}`)
+        cmpResults.value.push({
+          model:      msg.model,
+          response:   msg.response,
+          elapsed_ms: msg.elapsed_ms,
+          error:      msg.error ?? null,
+        })
+      } else if (msg.type === 'complete') {
+        cmpRunning.value = false
+        es.close()
+      }
+    } catch { /* ignore malformed frames */ }
+  }
+
+  es.onerror = () => {
+    cmpLog.value.push('Connection error.')
+    cmpRunning.value = false
+    es.close()
+  }
+}
+
+function cancelCompare() {
+  cmpEventSource.value?.close()
+  cmpEventSource.value = null
+  cmpRunning.value = false
+  cmpLog.value.push('Cancelled.')
+}
+
 async function cancelBenchmark() {
  await fetch('/api/benchmark/cancel', { method: 'POST' }).catch(() => {})
 }
@ -1603,4 +1827,99 @@ details[open] .ft-summary::before { content: '▼  '; }
  font-variant-numeric: tabular-nums;
  white-space: nowrap;
 }
+
+/* ── Compare mode ─────────────────────────────────────────────────────────── */
+
+.prompt-label {
+  font-size: 0.85rem;
+  font-weight: 600;
+  color: var(--color-text-secondary, #6b7a99);
+  margin-top: 0.5rem;
+}
+
+.cmp-prompt-editor {
+  width: 100%;
+  font-family: var(--font-mono, monospace);
+  font-size: 0.85rem;
+  padding: 0.75rem;
+  border: 1px solid var(--color-border, #d0d7e8);
+  border-radius: 0.375rem;
+  background: var(--color-surface, #f0f4fc);
+  color: var(--color-text, #1a2338);
+  resize: vertical;
+  line-height: 1.5;
+}
+
+.cmp-prompt-editor:focus {
+  outline: 2px solid var(--app-primary, #2A6080);
+  outline-offset: -1px;
+}
+
+.cmp-results-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
+  gap: 1rem;
+  margin-top: 0.5rem;
+}
+
+.cmp-result-card {
+  border: 1px solid var(--color-border, #d0d7e8);
+  border-radius: 0.5rem;
+  overflow: hidden;
+  background: var(--color-surface, #f0f4fc);
+  display: flex;
+  flex-direction: column;
+}
+
+.cmp-result-card.cmp-error {
+  border-color: #fca5a5;
+}
+
+.cmp-result-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  padding: 0.5rem 0.75rem;
+  background: var(--color-surface-raised, #e4ebf5);
+  border-bottom: 1px solid var(--color-border, #d0d7e8);
+}
+
+.cmp-model-name {
+  font-size: 0.82rem;
+  font-weight: 600;
+  color: var(--color-text, #1a2338);
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+
+.cmp-meta {
+  font-size: 0.75rem;
+  color: var(--color-text-secondary, #6b7a99);
+  flex-shrink: 0;
+  margin-left: 0.5rem;
+}
+
+.err-badge {
+  background: #fee2e2;
+  color: #991b1b;
+  border-radius: 9999px;
+  padding: 0.1rem 0.45rem;
+  font-size: 0.7rem;
+  font-weight: 600;
+}
+
+.cmp-response, .cmp-error-text {
+  padding: 0.75rem;
+  font-size: 0.82rem;
+  white-space: pre-wrap;
+  word-break: break-word;
+  max-height: 300px;
+  overflow-y: auto;
+  margin: 0;
+  flex: 1;
+  color: var(--color-text, #1a2338);
+}
+
+.cmp-error-text { color: #b91c1c; }
 </style>
--- a/web/src/views/ImitateView.vue
+++ b/web/src/views/ImitateView.vue
@ -0,0 +1,898 @@
+<template>
+  <div class="imitate-view">
+    <header class="bench-header">
+      <h1 class="page-title">🪞 Imitate</h1>
+      <p class="page-subtitle">Pull real samples from CF product APIs and compare LLM responses</p>
+    </header>
+
+    <!-- ── Step 1: Product selection ──────────────────────────────── -->
+    <section class="step-section">
+      <h2 class="step-heading">1. Select Product</h2>
+      <div v-if="productsLoading" class="picker-loading">Loading products…</div>
+      <div v-else-if="products.length === 0" class="picker-empty">
+        No products configured — add an <code>imitate:</code> section to
+        <code>config/label_tool.yaml</code>.
+      </div>
+      <div v-else class="product-grid">
+        <button
+          v-for="p in products"
+          :key="p.id"
+          class="product-card"
+          :class="{
+            selected: selectedProduct?.id === p.id,
+            offline: !p.online,
+          }"
+          :disabled="!p.online"
+          :title="p.online ? p.description : `${p.name} is offline`"
+          @click="selectProduct(p)"
+        >
+          <span class="product-icon">{{ p.icon }}</span>
+          <span class="product-name">{{ p.name }}</span>
+          <span class="product-status" :class="p.online ? 'status-on' : 'status-off'">
+            {{ p.online ? 'online' : 'offline' }}
+          </span>
+        </button>
+      </div>
+    </section>
+
+    <!-- ── Step 2: Sample + Prompt ────────────────────────────────── -->
+    <section v-if="selectedProduct" class="step-section">
+      <h2 class="step-heading">2. Sample &amp; Prompt</h2>
+      <div class="sample-toolbar">
+        <span class="sample-product-label">{{ selectedProduct.icon }} {{ selectedProduct.name }}</span>
+        <button class="btn-refresh" :disabled="sampleLoading" @click="fetchSample">
+          {{ sampleLoading ? '⏳ Fetching…' : '🔄 Refresh Sample' }}
+        </button>
+        <span v-if="sampleError" class="sample-error">{{ sampleError }}</span>
+      </div>
+
+      <div v-if="sampleLoading" class="picker-loading">Fetching sample from API…</div>
+
+      <template v-else-if="rawSample">
+        <!-- Fetched text preview -->
+        <details class="sample-preview" open>
+          <summary class="sample-preview-toggle">Raw sample text</summary>
+          <pre class="sample-text">{{ rawSample.text }}</pre>
+        </details>
+
+        <!-- Prompt editor -->
+        <label class="prompt-label" for="prompt-editor">Prompt sent to models</label>
+        <textarea
+          id="prompt-editor"
+          class="prompt-editor"
+          v-model="editedPrompt"
+          rows="8"
+        />
+      </template>
+
+      <div v-else-if="!sampleLoading && selectedProduct" class="picker-empty">
+        Click "Refresh Sample" to fetch a real sample from {{ selectedProduct.name }}.
+      </div>
+    </section>
+
+    <!-- ── Step 3: Models + Run ───────────────────────────────────── -->
+    <section v-if="editedPrompt" class="step-section">
+      <h2 class="step-heading">3. Models &amp; Run</h2>
+
+      <!-- Ollama model picker -->
+      <details class="model-picker" open>
+        <summary class="picker-summary">
+          <span class="picker-title">🤖 Ollama Models</span>
+          <span class="picker-badge">{{ selectedModels.size }} / {{ ollamaModels.length }}</span>
+        </summary>
+        <div class="picker-body">
+          <div v-if="modelsLoading" class="picker-loading">Loading models…</div>
+          <div v-else-if="ollamaModels.length === 0" class="picker-empty">
+            No ollama models in bench_models.yaml — add models with <code>service: ollama</code>.
+          </div>
+          <template v-else>
+            <label class="picker-cat-header">
+              <input
+                type="checkbox"
+                :checked="selectedModels.size === ollamaModels.length"
+                :indeterminate="selectedModels.size > 0 && selectedModels.size < ollamaModels.length"
+                @change="toggleAllModels(($event.target as HTMLInputElement).checked)"
+              />
+              <span class="picker-cat-name">All ollama models</span>
+            </label>
+            <div class="picker-model-list">
+              <label v-for="m in ollamaModels" :key="m.id" class="picker-model-row">
+                <input
+                  type="checkbox"
+                  :checked="selectedModels.has(m.id)"
+                  @change="toggleModel(m.id, ($event.target as HTMLInputElement).checked)"
+                />
+                <span class="picker-model-name" :title="m.name">{{ m.name }}</span>
+                <span class="picker-model-tags">
+                  <span v-for="tag in m.tags.slice(0, 3)" :key="tag" class="tag">{{ tag }}</span>
+                </span>
+              </label>
+            </div>
+          </template>
+        </div>
+      </details>
+
+      <!-- Temperature -->
+      <div class="temp-row">
+        <label for="temp-slider" class="temp-label">Temperature: <strong>{{ temperature.toFixed(1) }}</strong></label>
+        <input
+          id="temp-slider"
+          type="range" min="0" max="1" step="0.1"
+          :value="temperature"
+          @input="temperature = parseFloat(($event.target as HTMLInputElement).value)"
+          class="temp-slider"
+        />
+      </div>
+
+      <!-- Run controls -->
+      <div class="run-row">
+        <button
+          class="btn-run"
+          :disabled="running || selectedModels.size === 0"
+          @click="startRun"
+        >
+          {{ running ? '⏳ Running…' : '▶ Run' }}
+        </button>
+        <button v-if="running" class="btn-cancel" @click="cancelRun">✕ Cancel</button>
+      </div>
+
+      <!-- Progress log -->
+      <div v-if="runLog.length > 0" class="run-log" aria-live="polite">
+        <div v-for="(line, i) in runLog" :key="i" class="log-line">{{ line }}</div>
+      </div>
+    </section>
+
+    <!-- ── Step 4: Results ────────────────────────────────────────── -->
+    <section v-if="results.length > 0" class="step-section">
+      <h2 class="step-heading">4. Results</h2>
+
+      <div class="results-grid">
+        <div
+          v-for="r in results"
+          :key="r.model"
+          class="result-card"
+          :class="{ 'result-error': !!r.error }"
+        >
+          <div class="result-header">
+            <span class="result-model">{{ r.model }}</span>
+            <span class="result-meta">
+              <template v-if="r.error">
+                <span class="result-err-badge">error</span>
+              </template>
+              <template v-else>
+                {{ (r.elapsed_ms / 1000).toFixed(1) }}s
+              </template>
+            </span>
+          </div>
+          <pre v-if="r.error" class="result-error-text">{{ r.error }}</pre>
+          <pre v-else class="result-response">{{ r.response }}</pre>
+        </div>
+      </div>
+
+      <div class="corrections-row">
+        <button
+          class="btn-corrections"
+          :disabled="pushingCorrections || !selectedProduct || successfulResults.length === 0"
+          @click="pushCorrections"
+        >
+          {{ pushingCorrections ? '⏳ Pushing…' : `✍ Send ${successfulResults.length} to Corrections` }}
+        </button>
+        <span v-if="correctionsPushMsg" class="corrections-msg" :class="correctionsPushOk ? 'msg-ok' : 'msg-err'">
+          {{ correctionsPushMsg }}
+        </span>
+      </div>
+    </section>
+  </div>
+</template>
+
+<script setup lang="ts">
+import { ref, computed, onMounted } from 'vue'
+
+// ── Types ──────────────────────────────────────────────────────────────────────
+
+interface Product {
+  id: string
+  name: string
+  icon: string
+  description: string
+  base_url: string
+  online: boolean
+}
+
+interface Sample {
+  product_id: string
+  sample_index: number
+  text: string
+  prompt: string
+  raw_item: Record<string, unknown>
+}
+
+interface ModelEntry {
+  id: string
+  name: string
+  service: string
+  tags: string[]
+  vram_estimate_mb: number
+}
+
+interface RunResult {
+  model: string
+  response: string
+  elapsed_ms: number
+  error: string | null
+}
+
+// ── State ──────────────────────────────────────────────────────────────────────
+
+const productsLoading  = ref(false)
+const products         = ref<Product[]>([])
+const selectedProduct  = ref<Product | null>(null)
+
+const sampleLoading    = ref(false)
+const sampleError      = ref<string | null>(null)
+const rawSample        = ref<Sample | null>(null)
+const editedPrompt     = ref('')
+
+const modelsLoading    = ref(false)
+const allModels        = ref<ModelEntry[]>([])
+const selectedModels   = ref<Set<string>>(new Set())
+
+const temperature      = ref(0.7)
+
+const running          = ref(false)
+const eventSource      = ref<EventSource | null>(null)
+const runLog           = ref<string[]>([])
+const results          = ref<RunResult[]>([])
+
+const pushingCorrections = ref(false)
+const correctionsPushMsg = ref<string | null>(null)
+const correctionsPushOk  = ref(false)
+
+// ── Computed ───────────────────────────────────────────────────────────────────
+
+const ollamaModels = computed(() =>
+  allModels.value.filter(m => m.service === 'ollama')
+)
+
+const successfulResults = computed(() =>
+  results.value.filter(r => !r.error && r.response.trim())
+)
+
+// ── Lifecycle ─────────────────────────────────────────────────────────────────
+
+onMounted(async () => {
+  await Promise.all([loadProducts(), loadModels()])
+})
+
+// ── Methods ────────────────────────────────────────────────────────────────────
+
+async function loadProducts() {
+  productsLoading.value = true
+  try {
+    const resp = await fetch('/api/imitate/products')
+    if (!resp.ok) throw new Error(`HTTP ${resp.status}`)
+    const data = await resp.json()
+    products.value = data.products ?? []
+  } catch {
+    products.value = []
+  } finally {
+    productsLoading.value = false
+  }
+}
+
+async function loadModels() {
+  modelsLoading.value = true
+  try {
+    const resp = await fetch('/api/cforch/models')
+    if (!resp.ok) throw new Error(`HTTP ${resp.status}`)
+    const data = await resp.json()
+    allModels.value = data.models ?? []
+    // Select all ollama models by default
+    for (const m of allModels.value) {
+      if (m.service === 'ollama') selectedModels.value.add(m.id)
+    }
+  } catch {
+    allModels.value = []
+  } finally {
+    modelsLoading.value = false
+  }
+}
+
+async function selectProduct(p: Product) {
+  selectedProduct.value = p
+  rawSample.value = null
+  editedPrompt.value = ''
+  sampleError.value = null
+  results.value = []
+  runLog.value = []
+  await fetchSample()
+}
+
+async function fetchSample() {
+  if (!selectedProduct.value) return
+  sampleLoading.value = true
+  sampleError.value = null
+  try {
+    const resp = await fetch(`/api/imitate/products/${selectedProduct.value.id}/sample`)
+    if (!resp.ok) {
+      const body = await resp.json().catch(() => ({ detail: 'Unknown error' }))
+      throw new Error(body.detail ?? `HTTP ${resp.status}`)
+    }
+    const data: Sample = await resp.json()
+    rawSample.value = data
+    editedPrompt.value = data.prompt
+  } catch (err: unknown) {
+    sampleError.value = err instanceof Error ? err.message : String(err)
+  } finally {
+    sampleLoading.value = false
+  }
+}
+
+function toggleModel(id: string, checked: boolean) {
+  const next = new Set(selectedModels.value)
+  checked ? next.add(id) : next.delete(id)
+  selectedModels.value = next
+}
+
+function toggleAllModels(checked: boolean) {
+  selectedModels.value = checked
+    ? new Set(ollamaModels.value.map(m => m.id))
+    : new Set()
+}
+
+function startRun() {
+  if (running.value || !editedPrompt.value.trim() || selectedModels.value.size === 0) return
+
+  running.value = true
+  results.value = []
+  runLog.value = []
+  correctionsPushMsg.value = null
+
+  const params = new URLSearchParams({
+    prompt:     editedPrompt.value,
+    model_ids:  [...selectedModels.value].join(','),
+    temperature: temperature.value.toString(),
+    product_id: selectedProduct.value?.id ?? '',
+  })
+
+  const es = new EventSource(`/api/imitate/run?${params}`)
+  eventSource.value = es
+
+  es.onmessage = (event: MessageEvent) => {
+    try {
+      const msg = JSON.parse(event.data)
+      if (msg.type === 'start') {
+        runLog.value.push(`Running ${msg.total_models} model(s)…`)
+      } else if (msg.type === 'model_start') {
+        runLog.value.push(`→ ${msg.model}…`)
+      } else if (msg.type === 'model_done') {
+        const status = msg.error
+          ? `✕ error: ${msg.error}`
+          : `✓ done (${(msg.elapsed_ms / 1000).toFixed(1)}s)`
+        runLog.value.push(`  ${msg.model}: ${status}`)
+        results.value.push({
+          model:      msg.model,
+          response:   msg.response,
+          elapsed_ms: msg.elapsed_ms,
+          error:      msg.error ?? null,
+        })
+      } else if (msg.type === 'complete') {
+        runLog.value.push(`Complete. ${results.value.length} responses.`)
+        running.value = false
+        es.close()
+      }
+    } catch {
+      // ignore malformed SSE frames
+    }
+  }
+
+  es.onerror = () => {
+    runLog.value.push('Connection error — run may be incomplete.')
+    running.value = false
+    es.close()
+  }
+}
+
+function cancelRun() {
+  eventSource.value?.close()
+  eventSource.value = null
+  running.value = false
+  runLog.value.push('Cancelled.')
+}
+
+async function pushCorrections() {
+  if (!selectedProduct.value || successfulResults.value.length === 0) return
+
+  pushingCorrections.value = true
+  correctionsPushMsg.value = null
+  try {
+    const resp = await fetch('/api/imitate/push-corrections', {
+      method:  'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        product_id: selectedProduct.value.id,
+        prompt:     editedPrompt.value,
+        results:    successfulResults.value,
+      }),
+    })
+    if (!resp.ok) {
+      const body = await resp.json().catch(() => ({ detail: 'Unknown error' }))
+      throw new Error(body.detail ?? `HTTP ${resp.status}`)
+    }
+    const data = await resp.json()
+    correctionsPushMsg.value = `${data.pushed} record(s) added to Corrections queue.`
+    correctionsPushOk.value = true
+  } catch (err: unknown) {
+    correctionsPushMsg.value = err instanceof Error ? err.message : String(err)
+    correctionsPushOk.value = false
+  } finally {
+    pushingCorrections.value = false
+  }
+}
+</script>
+
+<style scoped>
+.imitate-view {
+  max-width: 1100px;
+  margin: 0 auto;
+  padding: 1.5rem;
+  display: flex;
+  flex-direction: column;
+  gap: 1.5rem;
+}
+
+.bench-header {
+  display: flex;
+  flex-direction: column;
+  gap: 0.25rem;
+}
+
+.page-title {
+  font-size: 1.6rem;
+  font-weight: 700;
+  color: var(--color-text, #1a2338);
+}
+
+.page-subtitle {
+  font-size: 0.9rem;
+  color: var(--color-text-secondary, #6b7a99);
+}
+
+/* Steps */
+.step-section {
+  background: var(--color-surface-raised, #e4ebf5);
+  border: 1px solid var(--color-border, #d0d7e8);
+  border-radius: 0.5rem;
+  padding: 1.25rem;
+  display: flex;
+  flex-direction: column;
+  gap: 1rem;
+}
+
+.step-heading {
+  font-size: 1rem;
+  font-weight: 600;
+  color: var(--color-text-secondary, #6b7a99);
+  text-transform: uppercase;
+  letter-spacing: 0.05em;
+  border-bottom: 1px solid var(--color-border, #d0d7e8);
+  padding-bottom: 0.5rem;
+}
+
+/* Product grid */
+.product-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fill, minmax(160px, 1fr));
+  gap: 0.75rem;
+}
+
+.product-card {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  gap: 0.35rem;
+  padding: 1rem 0.75rem;
+  border: 2px solid var(--color-border, #d0d7e8);
+  border-radius: 0.5rem;
+  background: var(--color-surface, #f0f4fc);
+  cursor: pointer;
+  transition: border-color 0.15s, background 0.15s;
+  font-size: 0.9rem;
+}
+
+.product-card:hover:not(:disabled) {
+  border-color: var(--app-primary, #2A6080);
+  background: color-mix(in srgb, var(--app-primary, #2A6080) 6%, var(--color-surface, #f0f4fc));
+}
+
+.product-card.selected {
+  border-color: var(--app-primary, #2A6080);
+  background: color-mix(in srgb, var(--app-primary, #2A6080) 12%, var(--color-surface, #f0f4fc));
+}
+
+.product-card.offline {
+  opacity: 0.45;
+  cursor: not-allowed;
+}
+
+.product-icon {
+  font-size: 2rem;
+}
+
+.product-name {
+  font-weight: 600;
+  color: var(--color-text, #1a2338);
+}
+
+.product-status {
+  font-size: 0.72rem;
+  padding: 0.1rem 0.45rem;
+  border-radius: 9999px;
+  font-weight: 600;
+}
+
+.status-on {
+  background: #d1fae5;
+  color: #065f46;
+}
+
+.status-off {
+  background: #fee2e2;
+  color: #991b1b;
+}
+
+/* Sample panel */
+.sample-toolbar {
+  display: flex;
+  align-items: center;
+  gap: 0.75rem;
+  flex-wrap: wrap;
+}
+
+.sample-product-label {
+  font-weight: 600;
+  color: var(--app-primary, #2A6080);
+}
+
+.sample-error {
+  color: #b91c1c;
+  font-size: 0.85rem;
+}
+
+.sample-preview {
+  border: 1px solid var(--color-border, #d0d7e8);
+  border-radius: 0.375rem;
+  overflow: hidden;
+}
+
+.sample-preview-toggle {
+  padding: 0.5rem 0.75rem;
+  cursor: pointer;
+  font-size: 0.85rem;
+  color: var(--color-text-secondary, #6b7a99);
+  background: var(--color-surface, #f0f4fc);
+  user-select: none;
+}
+
+.sample-text {
+  padding: 0.75rem;
+  font-size: 0.82rem;
+  white-space: pre-wrap;
+  word-break: break-word;
+  max-height: 180px;
+  overflow-y: auto;
+  background: var(--color-bg, #f0f4fc);
+  margin: 0;
+  color: var(--color-text, #1a2338);
+}
+
+.prompt-label {
+  font-size: 0.85rem;
+  font-weight: 600;
+  color: var(--color-text-secondary, #6b7a99);
+}
+
+.prompt-editor {
+  width: 100%;
+  font-family: var(--font-mono, monospace);
+  font-size: 0.85rem;
+  padding: 0.75rem;
+  border: 1px solid var(--color-border, #d0d7e8);
+  border-radius: 0.375rem;
+  background: var(--color-surface, #f0f4fc);
+  color: var(--color-text, #1a2338);
+  resize: vertical;
+  line-height: 1.5;
+}
+
+.prompt-editor:focus {
+  outline: 2px solid var(--app-primary, #2A6080);
+  outline-offset: -1px;
+}
+
+/* Model picker — reuse bench-view classes */
+.model-picker {
+  border: 1px solid var(--color-border, #d0d7e8);
+  border-radius: 0.5rem;
+  overflow: hidden;
+}
+
+.picker-summary {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: 0.75rem 1rem;
+  background: var(--color-surface, #f0f4fc);
+  cursor: pointer;
+  font-size: 0.95rem;
+  font-weight: 600;
+  user-select: none;
+  list-style: none;
+}
+
+.picker-title { flex: 1; }
+
+.picker-badge {
+  font-size: 0.8rem;
+  background: var(--app-primary, #2A6080);
+  color: #fff;
+  border-radius: 9999px;
+  padding: 0.15rem 0.6rem;
+}
+
+.picker-body {
+  padding: 0.75rem 1rem;
+  display: flex;
+  flex-direction: column;
+  gap: 0.25rem;
+}
+
+.picker-loading, .picker-empty {
+  font-size: 0.85rem;
+  color: var(--color-text-secondary, #6b7a99);
+  padding: 0.5rem 0;
+}
+
+.picker-cat-header {
+  display: flex;
+  align-items: center;
+  gap: 0.5rem;
+  font-weight: 600;
+  font-size: 0.9rem;
+  padding: 0.35rem 0;
+  cursor: pointer;
+}
+
+.picker-model-list {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 0.25rem;
+  padding-left: 1.25rem;
+  padding-bottom: 0.5rem;
+}
+
+.picker-model-row {
+  display: flex;
+  align-items: center;
+  gap: 0.4rem;
+  font-size: 0.85rem;
+  cursor: pointer;
+  padding: 0.2rem 0.5rem;
+  border-radius: 0.25rem;
+  min-width: 220px;
+}
+
+.picker-model-row:hover {
+  background: color-mix(in srgb, var(--app-primary, #2A6080) 8%, transparent);
+}
+
+.picker-model-name {
+  flex: 1;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+
+.picker-model-tags {
+  display: flex;
+  gap: 0.2rem;
+  flex-shrink: 0;
+}
+
+.tag {
+  font-size: 0.68rem;
+  background: var(--color-border, #d0d7e8);
+  border-radius: 9999px;
+  padding: 0.05rem 0.4rem;
+  color: var(--color-text-secondary, #6b7a99);
+  white-space: nowrap;
+}
+
+/* Temperature */
+.temp-row {
+  display: flex;
+  align-items: center;
+  gap: 0.75rem;
+}
+
+.temp-label {
+  font-size: 0.85rem;
+  white-space: nowrap;
+  min-width: 160px;
+}
+
+.temp-slider {
+  flex: 1;
+  accent-color: var(--app-primary, #2A6080);
+}
+
+/* Run controls */
+.run-row {
+  display: flex;
+  align-items: center;
+  gap: 0.75rem;
+}
+
+.btn-run {
+  background: var(--app-primary, #2A6080);
+  color: #fff;
+  border: none;
+  border-radius: 0.375rem;
+  padding: 0.55rem 1.25rem;
+  font-size: 0.9rem;
+  font-weight: 600;
+  cursor: pointer;
+  transition: opacity 0.15s;
+}
+
+.btn-run:disabled {
+  opacity: 0.4;
+  cursor: not-allowed;
+}
+
+.btn-cancel {
+  background: transparent;
+  border: 1px solid var(--color-border, #d0d7e8);
+  border-radius: 0.375rem;
+  padding: 0.5rem 0.9rem;
+  font-size: 0.85rem;
+  cursor: pointer;
+  color: var(--color-text-secondary, #6b7a99);
+}
+
+.btn-refresh {
+  background: transparent;
+  border: 1px solid var(--app-primary, #2A6080);
+  border-radius: 0.375rem;
+  padding: 0.35rem 0.8rem;
+  font-size: 0.85rem;
+  color: var(--app-primary, #2A6080);
+  cursor: pointer;
+  transition: background 0.15s;
+}
+
+.btn-refresh:hover:not(:disabled) {
+  background: color-mix(in srgb, var(--app-primary, #2A6080) 10%, transparent);
+}
+
+.btn-refresh:disabled { opacity: 0.5; cursor: not-allowed; }
+
+/* Run log */
+.run-log {
+  background: var(--color-bg, #f0f4fc);
+  border: 1px solid var(--color-border, #d0d7e8);
+  border-radius: 0.375rem;
+  padding: 0.75rem;
+  font-family: var(--font-mono, monospace);
+  font-size: 0.8rem;
+  max-height: 140px;
+  overflow-y: auto;
+}
+
+.log-line {
+  padding: 0.05rem 0;
+  color: var(--color-text, #1a2338);
+}
+
+/* Results */
+.results-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
+  gap: 1rem;
+}
+
+.result-card {
+  border: 1px solid var(--color-border, #d0d7e8);
+  border-radius: 0.5rem;
+  overflow: hidden;
+  background: var(--color-surface, #f0f4fc);
+  display: flex;
+  flex-direction: column;
+}
+
+.result-card.result-error {
+  border-color: #fca5a5;
+}
+
+.result-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  padding: 0.5rem 0.75rem;
+  background: var(--color-surface-raised, #e4ebf5);
+  border-bottom: 1px solid var(--color-border, #d0d7e8);
+}
+
+.result-model {
+  font-size: 0.82rem;
+  font-weight: 600;
+  color: var(--color-text, #1a2338);
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+
+.result-meta {
+  font-size: 0.75rem;
+  color: var(--color-text-secondary, #6b7a99);
+  flex-shrink: 0;
+  margin-left: 0.5rem;
+}
+
+.result-err-badge {
+  background: #fee2e2;
+  color: #991b1b;
+  border-radius: 9999px;
+  padding: 0.1rem 0.45rem;
+  font-size: 0.7rem;
+  font-weight: 600;
+}
+
+.result-response, .result-error-text {
+  padding: 0.75rem;
+  font-size: 0.82rem;
+  white-space: pre-wrap;
+  word-break: break-word;
+  max-height: 280px;
+  overflow-y: auto;
+  margin: 0;
+  flex: 1;
+  color: var(--color-text, #1a2338);
+}
+
+.result-error-text {
+  color: #b91c1c;
+}
+
+/* Corrections */
+.corrections-row {
+  display: flex;
+  align-items: center;
+  gap: 0.75rem;
+  flex-wrap: wrap;
+}
+
+.btn-corrections {
+  background: var(--color-accent-warm, #b45309);
+  color: #fff;
+  border: none;
+  border-radius: 0.375rem;
+  padding: 0.55rem 1.25rem;
+  font-size: 0.9rem;
+  font-weight: 600;
+  cursor: pointer;
+  transition: opacity 0.15s;
+}
+
+.btn-corrections:disabled {
+  opacity: 0.4;
+  cursor: not-allowed;
+}
+
+.corrections-msg {
+  font-size: 0.85rem;
+}
+
+.msg-ok { color: #065f46; }
+.msg-err { color: #b91c1c; }
+</style>