feat: Imitate tab — pull CF product samples, compare LLM responses

Backend (app/imitate.py):
- GET /api/imitate/products — reads imitate: config, checks online status
- GET /api/imitate/products/{id}/sample — fetches real item from product API
- GET /api/imitate/run (SSE) — streams ollama responses for selected models
- POST /api/imitate/push-corrections — queues results in SFT corrections JSONL

Frontend (ImitateView.vue):
- Step 1: product picker grid (online/offline status, icon from config)
- Step 2: raw sample preview + editable prompt textarea
- Step 3: ollama model multi-select, temperature slider, SSE run with live log
- Step 4: response cards side by side, push to Corrections button

Wiring:
- app/api.py: include imitate_router at /api/imitate
- web/src/router: /imitate route + lazy import
- AppSidebar: Imitate nav entry (mirror icon)
- config/label_tool.yaml.example: imitate: section with peregrine example
- 16 unit tests (100% passing)

Also: BenchmarkView.vue Compare panel — side-by-side run diff for bench results
This commit is contained in:
pyr0ball 2026-04-09 20:04:45 -07:00
parent dc246df42d
commit 3299c0e23a
9 changed files with 1865 additions and 4 deletions

View file

@ -152,6 +152,9 @@ app.include_router(models_router, prefix="/api/models")
from app.cforch import router as cforch_router
app.include_router(cforch_router, prefix="/api/cforch")
from app.imitate import router as imitate_router
app.include_router(imitate_router, prefix="/api/imitate")
# In-memory last-action store (single user, local tool — in-memory is fine)
_last_action: dict | None = None

View file

@ -134,6 +134,8 @@ def get_tasks() -> dict:
"id": t.get("id", ""),
"name": t.get("name", ""),
"type": t.get("type", ""),
"prompt": (t.get("prompt") or "").strip(),
"system": (t.get("system") or "").strip(),
})
task_type = t.get("type", "")
if task_type and task_type not in types_set:

351
app/imitate.py Normal file
View file

@ -0,0 +1,351 @@
"""Avocet — Imitate tab API.
Fetches real samples from sibling CF product APIs, sends them through selected
local LLMs (ollama), and streams responses back to the UI. Results can be
pushed into the SFT corrections queue for human review.
All endpoints registered on `router`. api.py includes this with prefix="/api/imitate".
Module-level globals follow the same testability pattern as cforch.py and sft.py:
override _CONFIG_DIR and _DATA_DIR via set_config_dir() / set_data_dir() in tests.
"""
from __future__ import annotations
import json
import logging
import time
import uuid
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from urllib.error import URLError
from urllib.request import Request, urlopen
import yaml
from fastapi import APIRouter, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from app.utils import append_jsonl
logger = logging.getLogger(__name__)
_ROOT = Path(__file__).parent.parent
_CONFIG_DIR: Path | None = None
_DATA_DIR: Path = _ROOT / "data"
router = APIRouter()
# ── Testability seams ──────────────────────────────────────────────────────────
def set_config_dir(path: Path | None) -> None:
global _CONFIG_DIR
_CONFIG_DIR = path
def set_data_dir(path: Path) -> None:
global _DATA_DIR
_DATA_DIR = path
# ── Internal helpers ───────────────────────────────────────────────────────────
def _config_file() -> Path:
if _CONFIG_DIR is not None:
return _CONFIG_DIR / "label_tool.yaml"
return _ROOT / "config" / "label_tool.yaml"
def _load_imitate_config() -> dict:
"""Read label_tool.yaml and return the imitate sub-dict (or {} if absent)."""
f = _config_file()
if not f.exists():
return {}
try:
raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
except yaml.YAMLError as exc:
logger.warning("Failed to parse imitate config %s: %s", f, exc)
return {}
return raw.get("imitate", {}) or {}
def _load_cforch_config() -> dict:
"""Read cforch section for ollama_url fallback."""
f = _config_file()
if not f.exists():
return {}
try:
raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
except yaml.YAMLError as exc:
return {}
return raw.get("cforch", {}) or {}
def _ollama_url(cfg: dict) -> str:
cforch = _load_cforch_config()
return cfg.get("ollama_url") or cforch.get("ollama_url") or "http://localhost:11434"
def _http_get_json(url: str, timeout: int = 5) -> Any:
"""Fetch JSON from url; raise URLError on failure."""
req = Request(url, headers={"Accept": "application/json"})
with urlopen(req, timeout=timeout) as resp:
return json.loads(resp.read().decode("utf-8"))
def _is_online(base_url: str) -> bool:
"""Return True if the product's /api/health endpoint responds OK."""
try:
data = _http_get_json(f"{base_url.rstrip('/')}/api/health", timeout=2)
return bool(data)
except Exception:
return False
def _extract_sample(
raw: Any, text_fields: list[str], sample_index: int = 0
) -> dict[str, Any]:
"""Pull one item from a list or dict response and extract text_fields."""
item: dict[str, Any]
if isinstance(raw, list):
if not raw:
return {}
item = raw[min(sample_index, len(raw) - 1)]
elif isinstance(raw, dict):
# may be {items: [...]} or the item itself
for key in ("items", "results", "data", "jobs", "listings", "pantry"):
if key in raw and isinstance(raw[key], list):
lst = raw[key]
item = lst[min(sample_index, len(lst) - 1)] if lst else {}
break
else:
item = raw
else:
return {}
parts = []
for field in text_fields:
val = item.get(field)
if val and str(val).strip():
parts.append(f"**{field}**: {val}")
return {"item": item, "text": "\n\n".join(parts)}
def _candidates_file() -> Path:
return _DATA_DIR / "sft_candidates.jsonl"
def _sse(data: dict) -> str:
return f"data: {json.dumps(data)}\n\n"
def _run_ollama_streaming(
ollama_base: str,
model_id: str,
prompt: str,
temperature: float,
) -> tuple[str, int]:
"""Call ollama /api/generate with stream=True; return (full_response, elapsed_ms).
Blocks until the model finishes; yields nothing streaming is handled by
the SSE generator in run_imitate().
"""
url = f"{ollama_base.rstrip('/')}/api/generate"
payload = json.dumps({
"model": model_id,
"prompt": prompt,
"stream": False,
"options": {"temperature": temperature},
}).encode("utf-8")
req = Request(url, data=payload, method="POST",
headers={"Content-Type": "application/json"})
t0 = time.time()
try:
with urlopen(req, timeout=120) as resp:
body = json.loads(resp.read().decode("utf-8"))
elapsed = int((time.time() - t0) * 1000)
return body.get("response", ""), elapsed
except Exception as exc:
elapsed = int((time.time() - t0) * 1000)
raise RuntimeError(str(exc)) from exc
# ── GET /products ──────────────────────────────────────────────────────────────
@router.get("/products")
def get_products() -> dict:
"""List configured CF products with live online status."""
cfg = _load_imitate_config()
products_raw = cfg.get("products", []) or []
products = []
for p in products_raw:
if not isinstance(p, dict):
continue
base_url = p.get("base_url", "")
products.append({
"id": p.get("id", ""),
"name": p.get("name", ""),
"icon": p.get("icon", "📦"),
"description": p.get("description", ""),
"base_url": base_url,
"online": _is_online(base_url) if base_url else False,
})
return {"products": products}
# ── GET /products/{product_id}/sample ─────────────────────────────────────────
@router.get("/products/{product_id}/sample")
def get_sample(product_id: str, index: int = 0) -> dict:
"""Fetch a real sample from the given product's API."""
cfg = _load_imitate_config()
products_raw = cfg.get("products", []) or []
product: dict | None = None
for p in products_raw:
if isinstance(p, dict) and p.get("id") == product_id:
product = p
break
if product is None:
raise HTTPException(404, f"Product '{product_id}' not in config")
base_url = product.get("base_url", "").rstrip("/")
endpoint = product.get("sample_endpoint", "")
if not base_url or not endpoint:
raise HTTPException(422, "Product missing base_url or sample_endpoint")
url = f"{base_url}{endpoint}"
try:
raw = _http_get_json(url, timeout=5)
except URLError as exc:
raise HTTPException(503, f"Product API unreachable: {exc}") from exc
except Exception as exc:
raise HTTPException(502, f"Bad response from product API: {exc}") from exc
text_fields = product.get("text_fields", []) or []
extracted = _extract_sample(raw, text_fields, index)
if not extracted:
raise HTTPException(404, "No sample items returned by product API")
prompt_template = product.get("prompt_template", "{text}")
prompt = prompt_template.replace("{text}", extracted["text"])
return {
"product_id": product_id,
"sample_index": index,
"text": extracted["text"],
"prompt": prompt,
"raw_item": extracted.get("item", {}),
}
# ── GET /run (SSE) ─────────────────────────────────────────────────────────────
@router.get("/run")
def run_imitate(
prompt: str = "",
model_ids: str = "", # comma-separated ollama model IDs
temperature: float = 0.7,
product_id: str = "",
) -> StreamingResponse:
"""Run a prompt through selected ollama models and stream results as SSE."""
if not prompt.strip():
raise HTTPException(422, "prompt is required")
ids = [m.strip() for m in model_ids.split(",") if m.strip()]
if not ids:
raise HTTPException(422, "model_ids is required")
cfg = _load_imitate_config()
ollama_base = _ollama_url(cfg)
def generate():
results: list[dict] = []
yield _sse({"type": "start", "total_models": len(ids)})
for model_id in ids:
yield _sse({"type": "model_start", "model": model_id})
try:
response, elapsed_ms = _run_ollama_streaming(
ollama_base, model_id, prompt, temperature
)
result = {
"model": model_id,
"response": response,
"elapsed_ms": elapsed_ms,
"error": None,
}
except Exception as exc:
result = {
"model": model_id,
"response": "",
"elapsed_ms": 0,
"error": str(exc),
}
results.append(result)
yield _sse({"type": "model_done", **result})
yield _sse({"type": "complete", "results": results})
return StreamingResponse(
generate(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"X-Accel-Buffering": "no",
},
)
# ── POST /push-corrections ─────────────────────────────────────────────────────
class ImitateResult(BaseModel):
model: str
response: str
elapsed_ms: int
error: str | None = None
class PushCorrectionsRequest(BaseModel):
product_id: str
prompt: str
results: list[ImitateResult]
@router.post("/push-corrections")
def push_corrections(req: PushCorrectionsRequest) -> dict:
"""Append imitate results to sft_candidates.jsonl for human review."""
if not req.prompt.strip():
raise HTTPException(422, "prompt is required")
if not req.results:
raise HTTPException(422, "results list is empty")
ts = datetime.now(timezone.utc).isoformat()
records = []
for r in req.results:
if r.error or not r.response.strip():
continue
records.append({
"id": str(uuid.uuid4()),
"source": "imitate",
"product_id": req.product_id,
"prompt_messages": [{"role": "user", "content": req.prompt}],
"model_response": r.response,
"model_id": r.model,
"elapsed_ms": r.elapsed_ms,
"status": "pending",
"created_at": ts,
})
if not records:
raise HTTPException(422, "No non-error results to push")
dest = _candidates_file()
dest.parent.mkdir(parents=True, exist_ok=True)
for record in records:
append_jsonl(dest, record)
return {"pushed": len(records)}

View file

@ -46,3 +46,46 @@ cforch:
# license_key: CFG-AVCT-xxxx-xxxx-xxxx
# ollama_url: http://localhost:11434
# ollama_model: llama3.2:3b
# Imitate tab — pull real samples from sibling CF product APIs and run them
# through local LLMs to build a corrections dataset.
# ollama_url defaults to cforch.ollama_url if omitted here.
imitate:
ollama_url: http://localhost:11434 # optional — falls back to cforch.ollama_url
products:
- id: peregrine
name: Peregrine
icon: "🦅"
description: Job search assistant
base_url: http://localhost:8502
sample_endpoint: /api/jobs
text_fields: [title, description]
prompt_template: "Analyze this job listing and identify key requirements:\n\n{text}"
- id: kiwi
name: Kiwi
icon: "🥝"
description: Pantry tracker
base_url: http://localhost:8511
sample_endpoint: /api/inventory
text_fields: [name, category, notes]
prompt_template: "Describe this pantry item and estimate how best to use it:\n\n{text}"
- id: snipe
name: Snipe
icon: "🎯"
description: eBay trust scoring
base_url: http://localhost:8509
sample_endpoint: /api/listings
text_fields: [title, description, seller_info]
prompt_template: "Evaluate the trustworthiness of this listing and flag any red flags:\n\n{text}"
- id: osprey
name: Osprey
icon: "📞"
description: Gov't hold-line automation
base_url: http://localhost:8520
sample_endpoint: /api/calls/recent
text_fields: [agency, issue, notes]
prompt_template: "Draft a concise summary of this government call record:\n\n{text}"

242
tests/test_imitate.py Normal file
View file

@ -0,0 +1,242 @@
"""Tests for app/imitate.py — product registry, sample extraction, corrections push."""
from __future__ import annotations
import json
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from fastapi.testclient import TestClient
from app.api import app
from app import imitate as _imitate_module
# ── Fixtures ───────────────────────────────────────────────────────────────────
@pytest.fixture(autouse=True)
def reset_module_globals(tmp_path):
"""Reset module-level config + data dir globals after each test."""
orig_cfg = _imitate_module._CONFIG_DIR
orig_data = _imitate_module._DATA_DIR
yield
_imitate_module._CONFIG_DIR = orig_cfg
_imitate_module._DATA_DIR = orig_data
@pytest.fixture()
def config_dir(tmp_path) -> Path:
_imitate_module.set_config_dir(tmp_path)
return tmp_path
@pytest.fixture()
def data_dir(tmp_path) -> Path:
_imitate_module.set_data_dir(tmp_path)
return tmp_path
@pytest.fixture()
def cfg_with_products(config_dir: Path) -> Path:
"""Write a label_tool.yaml with two products."""
(config_dir / "label_tool.yaml").write_text(
"""
imitate:
ollama_url: http://localhost:11434
products:
- id: peregrine
name: Peregrine
icon: "🦅"
description: Job search assistant
base_url: http://peregrine.local
sample_endpoint: /api/jobs
text_fields: [title, description]
prompt_template: "Analyze: {text}"
- id: kiwi
name: Kiwi
icon: "🥝"
description: Pantry tracker
base_url: http://kiwi.local
sample_endpoint: /api/inventory
text_fields: [name, notes]
prompt_template: "Describe: {text}"
"""
)
return config_dir
@pytest.fixture()
def client() -> TestClient:
return TestClient(app, raise_server_exceptions=True)
# ── GET /products ──────────────────────────────────────────────────────────────
def test_products_empty_when_no_config(config_dir, client):
"""Returns empty list when label_tool.yaml has no imitate section."""
(config_dir / "label_tool.yaml").write_text("accounts: []\n")
resp = client.get("/api/imitate/products")
assert resp.status_code == 200
assert resp.json()["products"] == []
def test_products_listed(cfg_with_products, client):
"""All configured products are returned with expected fields."""
with patch.object(_imitate_module, "_is_online", return_value=True):
resp = client.get("/api/imitate/products")
assert resp.status_code == 200
products = resp.json()["products"]
assert len(products) == 2
ids = {p["id"] for p in products}
assert ids == {"peregrine", "kiwi"}
peregrine = next(p for p in products if p["id"] == "peregrine")
assert peregrine["name"] == "Peregrine"
assert peregrine["icon"] == "🦅"
assert peregrine["online"] is True
def test_products_offline_when_unreachable(cfg_with_products, client):
"""Products with unreachable base_url are marked offline."""
with patch.object(_imitate_module, "_is_online", return_value=False):
resp = client.get("/api/imitate/products")
assert all(not p["online"] for p in resp.json()["products"])
# ── GET /products/{id}/sample ─────────────────────────────────────────────────
def test_sample_unknown_product(cfg_with_products, client):
"""Returns 404 for a product id not in config."""
resp = client.get("/api/imitate/products/nonexistent/sample")
assert resp.status_code == 404
def test_sample_fetched_from_list(cfg_with_products, client):
"""Extracts first item from a list API response."""
fake_api = [
{"title": "Engineer", "description": "Build things"},
{"title": "Other", "description": "Ignore me"},
]
with patch.object(_imitate_module, "_http_get_json", return_value=fake_api):
resp = client.get("/api/imitate/products/peregrine/sample")
assert resp.status_code == 200
body = resp.json()
assert "Engineer" in body["text"]
assert "Build things" in body["text"]
assert "Analyze:" in body["prompt"]
def test_sample_fetched_from_dict_with_items_key(cfg_with_products, client):
"""Extracts from a wrapper dict with a recognised list key."""
fake_api = {"items": [{"title": "Wrapped Job", "description": "In a wrapper"}]}
with patch.object(_imitate_module, "_http_get_json", return_value=fake_api):
resp = client.get("/api/imitate/products/peregrine/sample")
assert resp.status_code == 200
assert "Wrapped Job" in resp.json()["text"]
def test_sample_503_when_api_unreachable(cfg_with_products, client):
"""Returns 503 when the product API is not reachable."""
from urllib.error import URLError
with patch.object(_imitate_module, "_http_get_json", side_effect=URLError("refused")):
resp = client.get("/api/imitate/products/peregrine/sample")
assert resp.status_code == 503
def test_sample_404_on_empty_list(cfg_with_products, client):
"""Returns 404 when product API returns an empty list."""
with patch.object(_imitate_module, "_http_get_json", return_value=[]):
resp = client.get("/api/imitate/products/peregrine/sample")
assert resp.status_code == 404
# ── POST /push-corrections ─────────────────────────────────────────────────────
def test_push_corrections_appends_jsonl(cfg_with_products, data_dir, client):
"""Successful push writes records to sft_candidates.jsonl."""
payload = {
"product_id": "peregrine",
"prompt": "Analyze this job:",
"results": [
{"model": "qwen2.5:0.5b", "response": "It's a good job.", "elapsed_ms": 800, "error": None},
{"model": "llama3.1:8b", "response": "Strong candidate.", "elapsed_ms": 1500, "error": None},
],
}
resp = client.post("/api/imitate/push-corrections", json=payload)
assert resp.status_code == 200
assert resp.json()["pushed"] == 2
candidates = (data_dir / "sft_candidates.jsonl").read_text().splitlines()
assert len(candidates) == 2
for line in candidates:
record = json.loads(line)
assert record["source"] == "imitate"
assert record["product_id"] == "peregrine"
assert record["status"] == "pending"
assert record["prompt_messages"][0]["role"] == "user"
def test_push_corrections_skips_errors(cfg_with_products, data_dir, client):
"""Results with errors are not written to the corrections file."""
payload = {
"product_id": "peregrine",
"prompt": "Analyze:",
"results": [
{"model": "good-model", "response": "Good answer.", "elapsed_ms": 500, "error": None},
{"model": "bad-model", "response": "", "elapsed_ms": 0, "error": "connection refused"},
],
}
resp = client.post("/api/imitate/push-corrections", json=payload)
assert resp.status_code == 200
assert resp.json()["pushed"] == 1
def test_push_corrections_empty_prompt_422(cfg_with_products, data_dir, client):
"""Empty prompt returns 422."""
payload = {
"product_id": "peregrine",
"prompt": " ",
"results": [{"model": "m", "response": "r", "elapsed_ms": 1, "error": None}],
}
resp = client.post("/api/imitate/push-corrections", json=payload)
assert resp.status_code == 422
def test_push_corrections_all_errors_422(cfg_with_products, data_dir, client):
"""422 when every result has an error (nothing to push)."""
payload = {
"product_id": "peregrine",
"prompt": "Analyze:",
"results": [
{"model": "m", "response": "", "elapsed_ms": 0, "error": "timed out"},
],
}
resp = client.post("/api/imitate/push-corrections", json=payload)
assert resp.status_code == 422
# ── _extract_sample helper ─────────────────────────────────────────────────────
def test_extract_sample_list():
result = _imitate_module._extract_sample(
[{"title": "A", "description": "B"}],
text_fields=["title", "description"],
)
assert "A" in result["text"]
assert "B" in result["text"]
def test_extract_sample_empty_list():
result = _imitate_module._extract_sample([], text_fields=["title"])
assert result == {}
def test_extract_sample_respects_index():
items = [{"title": "First"}, {"title": "Second"}]
result = _imitate_module._extract_sample(items, ["title"], sample_index=1)
assert "Second" in result["text"]
def test_extract_sample_clamps_index():
items = [{"title": "Only"}]
result = _imitate_module._extract_sample(items, ["title"], sample_index=99)
assert "Only" in result["text"]

View file

@ -67,6 +67,7 @@ const navItems = [
{ path: '/stats', icon: '📊', label: 'Stats' },
{ path: '/benchmark', icon: '🏁', label: 'Benchmark' },
{ path: '/models', icon: '🤗', label: 'Models' },
{ path: '/imitate', icon: '🪞', label: 'Imitate' },
{ path: '/corrections', icon: '✍️', label: 'Corrections' },
{ path: '/settings', icon: '⚙️', label: 'Settings' },
]

View file

@ -8,6 +8,7 @@ const BenchmarkView = () => import('../views/BenchmarkView.vue')
const SettingsView = () => import('../views/SettingsView.vue')
const CorrectionsView = () => import('../views/CorrectionsView.vue')
const ModelsView = () => import('../views/ModelsView.vue')
const ImitateView = () => import('../views/ImitateView.vue')
export const router = createRouter({
history: createWebHashHistory(),
@ -17,6 +18,7 @@ export const router = createRouter({
{ path: '/stats', component: StatsView, meta: { title: 'Stats' } },
{ path: '/benchmark', component: BenchmarkView, meta: { title: 'Benchmark' } },
{ path: '/models', component: ModelsView, meta: { title: 'Models' } },
{ path: '/imitate', component: ImitateView, meta: { title: 'Imitate' } },
{ path: '/corrections', component: CorrectionsView, meta: { title: 'Corrections' } },
{ path: '/settings', component: SettingsView, meta: { title: 'Settings' } },
],

View file

@ -38,6 +38,11 @@
:class="{ active: benchMode === 'llm' }"
@click="benchMode = 'llm'"
>🤖 LLM Eval</button>
<button
class="mode-btn"
:class="{ active: benchMode === 'compare' }"
@click="benchMode = 'compare'; ensureCompareReady()"
> Compare</button>
</div>
<!-- LLM Eval panel -->
@ -214,6 +219,121 @@
</template>
<!-- Compare panel -->
<template v-if="benchMode === 'compare'">
<!-- Task selector (radio one at a time) -->
<details class="model-picker" open>
<summary class="picker-summary">
<span class="picker-title">📋 Pick a Task</span>
<span class="picker-badge">{{ cmpSelectedTask ? cmpSelectedTask.name : 'None selected' }}</span>
</summary>
<div class="picker-body">
<div v-if="llmTasksLoading" class="picker-loading">Loading tasks</div>
<div v-else-if="llmTasks.length === 0" class="picker-empty">No tasks found check cforch config.</div>
<template v-else>
<div v-for="(tasks, type) in llmTasksByType" :key="type" class="picker-category">
<span class="picker-cat-name" style="font-weight:600; padding: 0.35rem 0; display:block">{{ type }}</span>
<div class="picker-model-list">
<label v-for="t in tasks" :key="t.id" class="picker-model-row">
<input
type="radio"
name="cmp-task"
:checked="cmpSelectedTask?.id === t.id"
@change="selectCmpTask(t)"
/>
<span class="picker-model-name" :title="t.name">{{ t.name }}</span>
</label>
</div>
</div>
</template>
</div>
</details>
<!-- Prompt editor -->
<template v-if="cmpSelectedTask">
<label class="prompt-label" for="cmp-prompt">Prompt</label>
<textarea
id="cmp-prompt"
class="cmp-prompt-editor"
v-model="cmpPrompt"
rows="6"
/>
<!-- Model picker (ollama only) -->
<details class="model-picker" open>
<summary class="picker-summary">
<span class="picker-title">🤖 Ollama Models</span>
<span class="picker-badge">{{ cmpSelectedModels.size }} / {{ ollamaLlmModels.length }}</span>
</summary>
<div class="picker-body">
<label class="picker-cat-header">
<input
type="checkbox"
:checked="cmpSelectedModels.size === ollamaLlmModels.length"
:indeterminate="cmpSelectedModels.size > 0 && cmpSelectedModels.size < ollamaLlmModels.length"
@change="toggleAllCmpModels(($event.target as HTMLInputElement).checked)"
/>
<span class="picker-cat-name">All ollama models</span>
</label>
<div class="picker-model-list">
<label v-for="m in ollamaLlmModels" :key="m.id" class="picker-model-row">
<input
type="checkbox"
:checked="cmpSelectedModels.has(m.id)"
@change="toggleCmpModel(m.id, ($event.target as HTMLInputElement).checked)"
/>
<span class="picker-model-name">{{ m.name }}</span>
<span class="picker-adapter-type">{{ m.tags.slice(0,3).join(', ') }}</span>
</label>
</div>
</div>
</details>
<!-- Run controls -->
<div class="llm-run-controls">
<button
class="btn-run"
:disabled="cmpRunning || cmpSelectedModels.size === 0"
@click="startCompare"
>{{ cmpRunning ? '⏳ Running…' : '⚖️ Compare Models' }}</button>
<button v-if="cmpRunning" class="btn-cancel" @click="cancelCompare"> Cancel</button>
</div>
<!-- Progress log -->
<div v-if="cmpLog.length > 0" class="run-log">
<div class="log-lines">
<div v-for="(line, i) in cmpLog" :key="i" class="log-line">{{ line }}</div>
</div>
</div>
<!-- Side-by-side results -->
<template v-if="cmpResults.length > 0">
<h2 class="chart-title">Side-by-Side Responses</h2>
<div class="cmp-results-grid">
<div
v-for="r in cmpResults"
:key="r.model"
class="cmp-result-card"
:class="{ 'cmp-error': !!r.error }"
>
<div class="cmp-result-header">
<span class="cmp-model-name">{{ r.model }}</span>
<span class="cmp-meta">
<template v-if="r.error"><span class="err-badge">error</span></template>
<template v-else>{{ (r.elapsed_ms / 1000).toFixed(1) }}s</template>
</span>
</div>
<pre v-if="r.error" class="cmp-error-text">{{ r.error }}</pre>
<pre v-else class="cmp-response">{{ r.response }}</pre>
</div>
</div>
</template>
</template>
</template>
<!-- /Compare panel -->
<!-- Classifier panel -->
<template v-if="benchMode === 'classifier'">
@ -480,6 +600,8 @@ interface CfOrchTask {
id: string
name: string
type: string
prompt: string
system: string
}
interface CfOrchModel {
@ -555,7 +677,7 @@ const ftLogEl = ref<HTMLElement | null>(null)
const runCancelled = ref(false)
// Mode toggle
const benchMode = ref<'classifier' | 'llm'>('classifier')
const benchMode = ref<'classifier' | 'llm' | 'compare'>('classifier')
// LLM Eval state
const llmTasks = ref<CfOrchTask[]>([])
@ -574,6 +696,108 @@ const llmEventSource = ref<EventSource | null>(null)
const llmLogEl = ref<HTMLElement | null>(null)
const ftCancelled = ref(false)
// Compare mode state
interface CmpResult {
model: string
response: string
elapsed_ms: number
error: string | null
}
const cmpSelectedTask = ref<CfOrchTask & { prompt: string; system: string } | null>(null)
const cmpPrompt = ref('')
const cmpSelectedModels = ref<Set<string>>(new Set())
const cmpRunning = ref(false)
const cmpLog = ref<string[]>([])
const cmpResults = ref<CmpResult[]>([])
const cmpEventSource = ref<EventSource | null>(null)
const ollamaLlmModels = computed(() =>
llmModels.value.filter(m => m.service === 'ollama')
)
function selectCmpTask(t: CfOrchTask & { prompt: string; system: string }) {
cmpSelectedTask.value = t
cmpPrompt.value = t.prompt || ''
cmpResults.value = []
cmpLog.value = []
}
function toggleCmpModel(id: string, checked: boolean) {
const next = new Set(cmpSelectedModels.value)
checked ? next.add(id) : next.delete(id)
cmpSelectedModels.value = next
}
function toggleAllCmpModels(checked: boolean) {
cmpSelectedModels.value = checked
? new Set(ollamaLlmModels.value.map(m => m.id))
: new Set()
}
function ensureCompareReady() {
// Trigger task + model loads if not already done (shares llmTasks/llmModels)
if (llmTasks.value.length === 0) loadLlmTasks()
if (llmModels.value.length === 0) loadLlmModels()
// Pre-select all ollama models for compare mode
if (cmpSelectedModels.value.size === 0 && ollamaLlmModels.value.length > 0) {
cmpSelectedModels.value = new Set(ollamaLlmModels.value.map(m => m.id))
}
}
function startCompare() {
if (!cmpPrompt.value.trim() || cmpSelectedModels.value.size === 0) return
cmpRunning.value = true
cmpResults.value = []
cmpLog.value = []
const params = new URLSearchParams({
prompt: cmpPrompt.value,
model_ids: [...cmpSelectedModels.value].join(','),
})
const es = new EventSource(`/api/imitate/run?${params}`)
cmpEventSource.value = es
es.onmessage = (event: MessageEvent) => {
try {
const msg = JSON.parse(event.data)
if (msg.type === 'start') {
cmpLog.value.push(`Comparing ${msg.total_models} models…`)
} else if (msg.type === 'model_start') {
cmpLog.value.push(`${msg.model}`)
} else if (msg.type === 'model_done') {
const status = msg.error
? `${msg.error}`
: `${(msg.elapsed_ms / 1000).toFixed(1)}s`
cmpLog.value.push(` ${msg.model}: ${status}`)
cmpResults.value.push({
model: msg.model,
response: msg.response,
elapsed_ms: msg.elapsed_ms,
error: msg.error ?? null,
})
} else if (msg.type === 'complete') {
cmpRunning.value = false
es.close()
}
} catch { /* ignore malformed frames */ }
}
es.onerror = () => {
cmpLog.value.push('Connection error.')
cmpRunning.value = false
es.close()
}
}
function cancelCompare() {
cmpEventSource.value?.close()
cmpEventSource.value = null
cmpRunning.value = false
cmpLog.value.push('Cancelled.')
}
async function cancelBenchmark() {
await fetch('/api/benchmark/cancel', { method: 'POST' }).catch(() => {})
}
@ -1603,4 +1827,99 @@ details[open] .ft-summary::before { content: '▼ '; }
font-variant-numeric: tabular-nums;
white-space: nowrap;
}
/* ── Compare mode ─────────────────────────────────────────────────────────── */
.prompt-label {
font-size: 0.85rem;
font-weight: 600;
color: var(--color-text-secondary, #6b7a99);
margin-top: 0.5rem;
}
.cmp-prompt-editor {
width: 100%;
font-family: var(--font-mono, monospace);
font-size: 0.85rem;
padding: 0.75rem;
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.375rem;
background: var(--color-surface, #f0f4fc);
color: var(--color-text, #1a2338);
resize: vertical;
line-height: 1.5;
}
.cmp-prompt-editor:focus {
outline: 2px solid var(--app-primary, #2A6080);
outline-offset: -1px;
}
.cmp-results-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
gap: 1rem;
margin-top: 0.5rem;
}
.cmp-result-card {
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.5rem;
overflow: hidden;
background: var(--color-surface, #f0f4fc);
display: flex;
flex-direction: column;
}
.cmp-result-card.cmp-error {
border-color: #fca5a5;
}
.cmp-result-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 0.5rem 0.75rem;
background: var(--color-surface-raised, #e4ebf5);
border-bottom: 1px solid var(--color-border, #d0d7e8);
}
.cmp-model-name {
font-size: 0.82rem;
font-weight: 600;
color: var(--color-text, #1a2338);
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.cmp-meta {
font-size: 0.75rem;
color: var(--color-text-secondary, #6b7a99);
flex-shrink: 0;
margin-left: 0.5rem;
}
.err-badge {
background: #fee2e2;
color: #991b1b;
border-radius: 9999px;
padding: 0.1rem 0.45rem;
font-size: 0.7rem;
font-weight: 600;
}
.cmp-response, .cmp-error-text {
padding: 0.75rem;
font-size: 0.82rem;
white-space: pre-wrap;
word-break: break-word;
max-height: 300px;
overflow-y: auto;
margin: 0;
flex: 1;
color: var(--color-text, #1a2338);
}
.cmp-error-text { color: #b91c1c; }
</style>

View file

@ -0,0 +1,898 @@
<template>
<div class="imitate-view">
<header class="bench-header">
<h1 class="page-title">🪞 Imitate</h1>
<p class="page-subtitle">Pull real samples from CF product APIs and compare LLM responses</p>
</header>
<!-- Step 1: Product selection -->
<section class="step-section">
<h2 class="step-heading">1. Select Product</h2>
<div v-if="productsLoading" class="picker-loading">Loading products</div>
<div v-else-if="products.length === 0" class="picker-empty">
No products configured add an <code>imitate:</code> section to
<code>config/label_tool.yaml</code>.
</div>
<div v-else class="product-grid">
<button
v-for="p in products"
:key="p.id"
class="product-card"
:class="{
selected: selectedProduct?.id === p.id,
offline: !p.online,
}"
:disabled="!p.online"
:title="p.online ? p.description : `${p.name} is offline`"
@click="selectProduct(p)"
>
<span class="product-icon">{{ p.icon }}</span>
<span class="product-name">{{ p.name }}</span>
<span class="product-status" :class="p.online ? 'status-on' : 'status-off'">
{{ p.online ? 'online' : 'offline' }}
</span>
</button>
</div>
</section>
<!-- Step 2: Sample + Prompt -->
<section v-if="selectedProduct" class="step-section">
<h2 class="step-heading">2. Sample &amp; Prompt</h2>
<div class="sample-toolbar">
<span class="sample-product-label">{{ selectedProduct.icon }} {{ selectedProduct.name }}</span>
<button class="btn-refresh" :disabled="sampleLoading" @click="fetchSample">
{{ sampleLoading ? '⏳ Fetching…' : '🔄 Refresh Sample' }}
</button>
<span v-if="sampleError" class="sample-error">{{ sampleError }}</span>
</div>
<div v-if="sampleLoading" class="picker-loading">Fetching sample from API</div>
<template v-else-if="rawSample">
<!-- Fetched text preview -->
<details class="sample-preview" open>
<summary class="sample-preview-toggle">Raw sample text</summary>
<pre class="sample-text">{{ rawSample.text }}</pre>
</details>
<!-- Prompt editor -->
<label class="prompt-label" for="prompt-editor">Prompt sent to models</label>
<textarea
id="prompt-editor"
class="prompt-editor"
v-model="editedPrompt"
rows="8"
/>
</template>
<div v-else-if="!sampleLoading && selectedProduct" class="picker-empty">
Click "Refresh Sample" to fetch a real sample from {{ selectedProduct.name }}.
</div>
</section>
<!-- Step 3: Models + Run -->
<section v-if="editedPrompt" class="step-section">
<h2 class="step-heading">3. Models &amp; Run</h2>
<!-- Ollama model picker -->
<details class="model-picker" open>
<summary class="picker-summary">
<span class="picker-title">🤖 Ollama Models</span>
<span class="picker-badge">{{ selectedModels.size }} / {{ ollamaModels.length }}</span>
</summary>
<div class="picker-body">
<div v-if="modelsLoading" class="picker-loading">Loading models</div>
<div v-else-if="ollamaModels.length === 0" class="picker-empty">
No ollama models in bench_models.yaml add models with <code>service: ollama</code>.
</div>
<template v-else>
<label class="picker-cat-header">
<input
type="checkbox"
:checked="selectedModels.size === ollamaModels.length"
:indeterminate="selectedModels.size > 0 && selectedModels.size < ollamaModels.length"
@change="toggleAllModels(($event.target as HTMLInputElement).checked)"
/>
<span class="picker-cat-name">All ollama models</span>
</label>
<div class="picker-model-list">
<label v-for="m in ollamaModels" :key="m.id" class="picker-model-row">
<input
type="checkbox"
:checked="selectedModels.has(m.id)"
@change="toggleModel(m.id, ($event.target as HTMLInputElement).checked)"
/>
<span class="picker-model-name" :title="m.name">{{ m.name }}</span>
<span class="picker-model-tags">
<span v-for="tag in m.tags.slice(0, 3)" :key="tag" class="tag">{{ tag }}</span>
</span>
</label>
</div>
</template>
</div>
</details>
<!-- Temperature -->
<div class="temp-row">
<label for="temp-slider" class="temp-label">Temperature: <strong>{{ temperature.toFixed(1) }}</strong></label>
<input
id="temp-slider"
type="range" min="0" max="1" step="0.1"
:value="temperature"
@input="temperature = parseFloat(($event.target as HTMLInputElement).value)"
class="temp-slider"
/>
</div>
<!-- Run controls -->
<div class="run-row">
<button
class="btn-run"
:disabled="running || selectedModels.size === 0"
@click="startRun"
>
{{ running ? '⏳ Running…' : '▶ Run' }}
</button>
<button v-if="running" class="btn-cancel" @click="cancelRun"> Cancel</button>
</div>
<!-- Progress log -->
<div v-if="runLog.length > 0" class="run-log" aria-live="polite">
<div v-for="(line, i) in runLog" :key="i" class="log-line">{{ line }}</div>
</div>
</section>
<!-- Step 4: Results -->
<section v-if="results.length > 0" class="step-section">
<h2 class="step-heading">4. Results</h2>
<div class="results-grid">
<div
v-for="r in results"
:key="r.model"
class="result-card"
:class="{ 'result-error': !!r.error }"
>
<div class="result-header">
<span class="result-model">{{ r.model }}</span>
<span class="result-meta">
<template v-if="r.error">
<span class="result-err-badge">error</span>
</template>
<template v-else>
{{ (r.elapsed_ms / 1000).toFixed(1) }}s
</template>
</span>
</div>
<pre v-if="r.error" class="result-error-text">{{ r.error }}</pre>
<pre v-else class="result-response">{{ r.response }}</pre>
</div>
</div>
<div class="corrections-row">
<button
class="btn-corrections"
:disabled="pushingCorrections || !selectedProduct || successfulResults.length === 0"
@click="pushCorrections"
>
{{ pushingCorrections ? '⏳ Pushing…' : `✍ Send ${successfulResults.length} to Corrections` }}
</button>
<span v-if="correctionsPushMsg" class="corrections-msg" :class="correctionsPushOk ? 'msg-ok' : 'msg-err'">
{{ correctionsPushMsg }}
</span>
</div>
</section>
</div>
</template>
<script setup lang="ts">
import { ref, computed, onMounted } from 'vue'
// Types
interface Product {
id: string
name: string
icon: string
description: string
base_url: string
online: boolean
}
interface Sample {
product_id: string
sample_index: number
text: string
prompt: string
raw_item: Record<string, unknown>
}
interface ModelEntry {
id: string
name: string
service: string
tags: string[]
vram_estimate_mb: number
}
interface RunResult {
model: string
response: string
elapsed_ms: number
error: string | null
}
// State
const productsLoading = ref(false)
const products = ref<Product[]>([])
const selectedProduct = ref<Product | null>(null)
const sampleLoading = ref(false)
const sampleError = ref<string | null>(null)
const rawSample = ref<Sample | null>(null)
const editedPrompt = ref('')
const modelsLoading = ref(false)
const allModels = ref<ModelEntry[]>([])
const selectedModels = ref<Set<string>>(new Set())
const temperature = ref(0.7)
const running = ref(false)
const eventSource = ref<EventSource | null>(null)
const runLog = ref<string[]>([])
const results = ref<RunResult[]>([])
const pushingCorrections = ref(false)
const correctionsPushMsg = ref<string | null>(null)
const correctionsPushOk = ref(false)
// Computed
const ollamaModels = computed(() =>
allModels.value.filter(m => m.service === 'ollama')
)
const successfulResults = computed(() =>
results.value.filter(r => !r.error && r.response.trim())
)
// Lifecycle
onMounted(async () => {
await Promise.all([loadProducts(), loadModels()])
})
// Methods
async function loadProducts() {
productsLoading.value = true
try {
const resp = await fetch('/api/imitate/products')
if (!resp.ok) throw new Error(`HTTP ${resp.status}`)
const data = await resp.json()
products.value = data.products ?? []
} catch {
products.value = []
} finally {
productsLoading.value = false
}
}
async function loadModels() {
modelsLoading.value = true
try {
const resp = await fetch('/api/cforch/models')
if (!resp.ok) throw new Error(`HTTP ${resp.status}`)
const data = await resp.json()
allModels.value = data.models ?? []
// Select all ollama models by default
for (const m of allModels.value) {
if (m.service === 'ollama') selectedModels.value.add(m.id)
}
} catch {
allModels.value = []
} finally {
modelsLoading.value = false
}
}
async function selectProduct(p: Product) {
selectedProduct.value = p
rawSample.value = null
editedPrompt.value = ''
sampleError.value = null
results.value = []
runLog.value = []
await fetchSample()
}
async function fetchSample() {
if (!selectedProduct.value) return
sampleLoading.value = true
sampleError.value = null
try {
const resp = await fetch(`/api/imitate/products/${selectedProduct.value.id}/sample`)
if (!resp.ok) {
const body = await resp.json().catch(() => ({ detail: 'Unknown error' }))
throw new Error(body.detail ?? `HTTP ${resp.status}`)
}
const data: Sample = await resp.json()
rawSample.value = data
editedPrompt.value = data.prompt
} catch (err: unknown) {
sampleError.value = err instanceof Error ? err.message : String(err)
} finally {
sampleLoading.value = false
}
}
function toggleModel(id: string, checked: boolean) {
const next = new Set(selectedModels.value)
checked ? next.add(id) : next.delete(id)
selectedModels.value = next
}
function toggleAllModels(checked: boolean) {
selectedModels.value = checked
? new Set(ollamaModels.value.map(m => m.id))
: new Set()
}
function startRun() {
if (running.value || !editedPrompt.value.trim() || selectedModels.value.size === 0) return
running.value = true
results.value = []
runLog.value = []
correctionsPushMsg.value = null
const params = new URLSearchParams({
prompt: editedPrompt.value,
model_ids: [...selectedModels.value].join(','),
temperature: temperature.value.toString(),
product_id: selectedProduct.value?.id ?? '',
})
const es = new EventSource(`/api/imitate/run?${params}`)
eventSource.value = es
es.onmessage = (event: MessageEvent) => {
try {
const msg = JSON.parse(event.data)
if (msg.type === 'start') {
runLog.value.push(`Running ${msg.total_models} model(s)…`)
} else if (msg.type === 'model_start') {
runLog.value.push(`${msg.model}`)
} else if (msg.type === 'model_done') {
const status = msg.error
? `✕ error: ${msg.error}`
: `✓ done (${(msg.elapsed_ms / 1000).toFixed(1)}s)`
runLog.value.push(` ${msg.model}: ${status}`)
results.value.push({
model: msg.model,
response: msg.response,
elapsed_ms: msg.elapsed_ms,
error: msg.error ?? null,
})
} else if (msg.type === 'complete') {
runLog.value.push(`Complete. ${results.value.length} responses.`)
running.value = false
es.close()
}
} catch {
// ignore malformed SSE frames
}
}
es.onerror = () => {
runLog.value.push('Connection error — run may be incomplete.')
running.value = false
es.close()
}
}
function cancelRun() {
eventSource.value?.close()
eventSource.value = null
running.value = false
runLog.value.push('Cancelled.')
}
async function pushCorrections() {
if (!selectedProduct.value || successfulResults.value.length === 0) return
pushingCorrections.value = true
correctionsPushMsg.value = null
try {
const resp = await fetch('/api/imitate/push-corrections', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
product_id: selectedProduct.value.id,
prompt: editedPrompt.value,
results: successfulResults.value,
}),
})
if (!resp.ok) {
const body = await resp.json().catch(() => ({ detail: 'Unknown error' }))
throw new Error(body.detail ?? `HTTP ${resp.status}`)
}
const data = await resp.json()
correctionsPushMsg.value = `${data.pushed} record(s) added to Corrections queue.`
correctionsPushOk.value = true
} catch (err: unknown) {
correctionsPushMsg.value = err instanceof Error ? err.message : String(err)
correctionsPushOk.value = false
} finally {
pushingCorrections.value = false
}
}
</script>
<style scoped>
.imitate-view {
max-width: 1100px;
margin: 0 auto;
padding: 1.5rem;
display: flex;
flex-direction: column;
gap: 1.5rem;
}
.bench-header {
display: flex;
flex-direction: column;
gap: 0.25rem;
}
.page-title {
font-size: 1.6rem;
font-weight: 700;
color: var(--color-text, #1a2338);
}
.page-subtitle {
font-size: 0.9rem;
color: var(--color-text-secondary, #6b7a99);
}
/* Steps */
.step-section {
background: var(--color-surface-raised, #e4ebf5);
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.5rem;
padding: 1.25rem;
display: flex;
flex-direction: column;
gap: 1rem;
}
.step-heading {
font-size: 1rem;
font-weight: 600;
color: var(--color-text-secondary, #6b7a99);
text-transform: uppercase;
letter-spacing: 0.05em;
border-bottom: 1px solid var(--color-border, #d0d7e8);
padding-bottom: 0.5rem;
}
/* Product grid */
.product-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(160px, 1fr));
gap: 0.75rem;
}
.product-card {
display: flex;
flex-direction: column;
align-items: center;
gap: 0.35rem;
padding: 1rem 0.75rem;
border: 2px solid var(--color-border, #d0d7e8);
border-radius: 0.5rem;
background: var(--color-surface, #f0f4fc);
cursor: pointer;
transition: border-color 0.15s, background 0.15s;
font-size: 0.9rem;
}
.product-card:hover:not(:disabled) {
border-color: var(--app-primary, #2A6080);
background: color-mix(in srgb, var(--app-primary, #2A6080) 6%, var(--color-surface, #f0f4fc));
}
.product-card.selected {
border-color: var(--app-primary, #2A6080);
background: color-mix(in srgb, var(--app-primary, #2A6080) 12%, var(--color-surface, #f0f4fc));
}
.product-card.offline {
opacity: 0.45;
cursor: not-allowed;
}
.product-icon {
font-size: 2rem;
}
.product-name {
font-weight: 600;
color: var(--color-text, #1a2338);
}
.product-status {
font-size: 0.72rem;
padding: 0.1rem 0.45rem;
border-radius: 9999px;
font-weight: 600;
}
.status-on {
background: #d1fae5;
color: #065f46;
}
.status-off {
background: #fee2e2;
color: #991b1b;
}
/* Sample panel */
.sample-toolbar {
display: flex;
align-items: center;
gap: 0.75rem;
flex-wrap: wrap;
}
.sample-product-label {
font-weight: 600;
color: var(--app-primary, #2A6080);
}
.sample-error {
color: #b91c1c;
font-size: 0.85rem;
}
.sample-preview {
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.375rem;
overflow: hidden;
}
.sample-preview-toggle {
padding: 0.5rem 0.75rem;
cursor: pointer;
font-size: 0.85rem;
color: var(--color-text-secondary, #6b7a99);
background: var(--color-surface, #f0f4fc);
user-select: none;
}
.sample-text {
padding: 0.75rem;
font-size: 0.82rem;
white-space: pre-wrap;
word-break: break-word;
max-height: 180px;
overflow-y: auto;
background: var(--color-bg, #f0f4fc);
margin: 0;
color: var(--color-text, #1a2338);
}
.prompt-label {
font-size: 0.85rem;
font-weight: 600;
color: var(--color-text-secondary, #6b7a99);
}
.prompt-editor {
width: 100%;
font-family: var(--font-mono, monospace);
font-size: 0.85rem;
padding: 0.75rem;
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.375rem;
background: var(--color-surface, #f0f4fc);
color: var(--color-text, #1a2338);
resize: vertical;
line-height: 1.5;
}
.prompt-editor:focus {
outline: 2px solid var(--app-primary, #2A6080);
outline-offset: -1px;
}
/* Model picker — reuse bench-view classes */
.model-picker {
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.5rem;
overflow: hidden;
}
.picker-summary {
display: flex;
align-items: center;
justify-content: space-between;
padding: 0.75rem 1rem;
background: var(--color-surface, #f0f4fc);
cursor: pointer;
font-size: 0.95rem;
font-weight: 600;
user-select: none;
list-style: none;
}
.picker-title { flex: 1; }
.picker-badge {
font-size: 0.8rem;
background: var(--app-primary, #2A6080);
color: #fff;
border-radius: 9999px;
padding: 0.15rem 0.6rem;
}
.picker-body {
padding: 0.75rem 1rem;
display: flex;
flex-direction: column;
gap: 0.25rem;
}
.picker-loading, .picker-empty {
font-size: 0.85rem;
color: var(--color-text-secondary, #6b7a99);
padding: 0.5rem 0;
}
.picker-cat-header {
display: flex;
align-items: center;
gap: 0.5rem;
font-weight: 600;
font-size: 0.9rem;
padding: 0.35rem 0;
cursor: pointer;
}
.picker-model-list {
display: flex;
flex-wrap: wrap;
gap: 0.25rem;
padding-left: 1.25rem;
padding-bottom: 0.5rem;
}
.picker-model-row {
display: flex;
align-items: center;
gap: 0.4rem;
font-size: 0.85rem;
cursor: pointer;
padding: 0.2rem 0.5rem;
border-radius: 0.25rem;
min-width: 220px;
}
.picker-model-row:hover {
background: color-mix(in srgb, var(--app-primary, #2A6080) 8%, transparent);
}
.picker-model-name {
flex: 1;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.picker-model-tags {
display: flex;
gap: 0.2rem;
flex-shrink: 0;
}
.tag {
font-size: 0.68rem;
background: var(--color-border, #d0d7e8);
border-radius: 9999px;
padding: 0.05rem 0.4rem;
color: var(--color-text-secondary, #6b7a99);
white-space: nowrap;
}
/* Temperature */
.temp-row {
display: flex;
align-items: center;
gap: 0.75rem;
}
.temp-label {
font-size: 0.85rem;
white-space: nowrap;
min-width: 160px;
}
.temp-slider {
flex: 1;
accent-color: var(--app-primary, #2A6080);
}
/* Run controls */
.run-row {
display: flex;
align-items: center;
gap: 0.75rem;
}
.btn-run {
background: var(--app-primary, #2A6080);
color: #fff;
border: none;
border-radius: 0.375rem;
padding: 0.55rem 1.25rem;
font-size: 0.9rem;
font-weight: 600;
cursor: pointer;
transition: opacity 0.15s;
}
.btn-run:disabled {
opacity: 0.4;
cursor: not-allowed;
}
.btn-cancel {
background: transparent;
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.375rem;
padding: 0.5rem 0.9rem;
font-size: 0.85rem;
cursor: pointer;
color: var(--color-text-secondary, #6b7a99);
}
.btn-refresh {
background: transparent;
border: 1px solid var(--app-primary, #2A6080);
border-radius: 0.375rem;
padding: 0.35rem 0.8rem;
font-size: 0.85rem;
color: var(--app-primary, #2A6080);
cursor: pointer;
transition: background 0.15s;
}
.btn-refresh:hover:not(:disabled) {
background: color-mix(in srgb, var(--app-primary, #2A6080) 10%, transparent);
}
.btn-refresh:disabled { opacity: 0.5; cursor: not-allowed; }
/* Run log */
.run-log {
background: var(--color-bg, #f0f4fc);
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.375rem;
padding: 0.75rem;
font-family: var(--font-mono, monospace);
font-size: 0.8rem;
max-height: 140px;
overflow-y: auto;
}
.log-line {
padding: 0.05rem 0;
color: var(--color-text, #1a2338);
}
/* Results */
.results-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
gap: 1rem;
}
.result-card {
border: 1px solid var(--color-border, #d0d7e8);
border-radius: 0.5rem;
overflow: hidden;
background: var(--color-surface, #f0f4fc);
display: flex;
flex-direction: column;
}
.result-card.result-error {
border-color: #fca5a5;
}
.result-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 0.5rem 0.75rem;
background: var(--color-surface-raised, #e4ebf5);
border-bottom: 1px solid var(--color-border, #d0d7e8);
}
.result-model {
font-size: 0.82rem;
font-weight: 600;
color: var(--color-text, #1a2338);
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.result-meta {
font-size: 0.75rem;
color: var(--color-text-secondary, #6b7a99);
flex-shrink: 0;
margin-left: 0.5rem;
}
.result-err-badge {
background: #fee2e2;
color: #991b1b;
border-radius: 9999px;
padding: 0.1rem 0.45rem;
font-size: 0.7rem;
font-weight: 600;
}
.result-response, .result-error-text {
padding: 0.75rem;
font-size: 0.82rem;
white-space: pre-wrap;
word-break: break-word;
max-height: 280px;
overflow-y: auto;
margin: 0;
flex: 1;
color: var(--color-text, #1a2338);
}
.result-error-text {
color: #b91c1c;
}
/* Corrections */
.corrections-row {
display: flex;
align-items: center;
gap: 0.75rem;
flex-wrap: wrap;
}
.btn-corrections {
background: var(--color-accent-warm, #b45309);
color: #fff;
border: none;
border-radius: 0.375rem;
padding: 0.55rem 1.25rem;
font-size: 0.9rem;
font-weight: 600;
cursor: pointer;
transition: opacity 0.15s;
}
.btn-corrections:disabled {
opacity: 0.4;
cursor: not-allowed;
}
.corrections-msg {
font-size: 0.85rem;
}
.msg-ok { color: #065f46; }
.msg-err { color: #b91c1c; }
</style>