feat(models): extended model registry + manage.sh benchmark subcommands

- app/models.py: add StyleModel and VoiceModel entries; expand cf-text and benchmark model metadata (vram_mb, description, tags) - tests/test_models.py: coverage for new model types and registry helpers - ModelsView.vue: updated model browser with style/voice filter tabs - manage.sh: add benchmark-style and benchmark-voice subcommands - config/label_tool.yaml.example: add style + voice benchmark config stubs - web/.gitignore: add node_modules and dist entries
2026-04-24 14:56:24 -07:00 · 2026-04-24 14:56:24 -07:00 · ea3da701c6
commit ea3da701c6
parent ddb56efb89
6 changed files with 1150 additions and 152 deletions
--- a/app/models.py
+++ b/app/models.py
@ -14,11 +14,12 @@ from __future__ import annotations
 import json
 import logging
 import os
 import shutil
 import threading
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any
+from typing import Any, TypedDict
 from uuid import uuid4
 import httpx
@ -39,21 +40,67 @@ _ROOT = Path(__file__).parent.parent
 _MODELS_DIR: Path = _ROOT / "models"
 _QUEUE_DIR: Path = _ROOT / "data"
 # Service-specific model destinations.
 # cf-text models land on the NFS-mounted shared asset store so every cluster
 # node can reach them without a separate download. Avocet classifiers stay local
 # because they are fine-tuned in-place and are only consumed by avocet itself.
 # Override via CF_TEXT_MODELS_DIR env var (useful for dev / non-NFS setups).
 _CF_TEXT_MODELS_DIR: Path = Path(
    os.environ.get("CF_TEXT_MODELS_DIR", "/Library/Assets/LLM/cf-text/models")
 )
 # Directory containing per-node YAML profiles for cf-orch.
 # Auto-registration writes new catalog entries here on model download.
 _CF_ORCH_PROFILES_DIR: Path = Path(
    os.environ.get(
        "CF_ORCH_PROFILES_DIR",
        "/Library/Development/CircuitForge/circuitforge-orch/circuitforge_orch/profiles/nodes",
    )
 )
 router = APIRouter()
 # ── Download progress shared state ────────────────────────────────────────────
 # Updated by the background download thread; read by GET /download/stream.
 _download_progress: dict[str, Any] = {}
-# ── HF pipeline_tag → adapter recommendation ──────────────────────────────────
+# ── HF pipeline_tag → CF service info ────────────────────────────────────────
-_TAG_TO_ADAPTER: dict[str, str] = {
+
-    "zero-shot-classification": "ZeroShotAdapter",
+
-    "text-classification": "ZeroShotAdapter",
+class _TagInfo(TypedDict):
-    "natural-language-inference": "ZeroShotAdapter",
+    adapter: str | None   # Avocet adapter class, or None if handled by another service
-    "sentence-similarity": "RerankerAdapter",
+    role: str             # Human-readable model role (classifier, stt, tts, vision, …)
-    "text-ranking": "RerankerAdapter",
+    service: str          # CF service that consumes this model type
-    "text-generation": "GenerationAdapter",
+
-    "text2text-generation": "GenerationAdapter",
+
 _TAG_TO_INFO: dict[str, _TagInfo] = {
    # Avocet email classifiers
    "zero-shot-classification":       {"adapter": "ZeroShotAdapter",   "role": "classifier",  "service": "avocet"},
    "text-classification":            {"adapter": "ZeroShotAdapter",   "role": "classifier",  "service": "avocet"},
    "natural-language-inference":     {"adapter": "ZeroShotAdapter",   "role": "classifier",  "service": "avocet"},
    "sentence-similarity":            {"adapter": "RerankerAdapter",   "role": "reranker",    "service": "avocet"},
    "text-ranking":                   {"adapter": "RerankerAdapter",   "role": "reranker",    "service": "avocet"},
    "text-generation":                {"adapter": "GenerationAdapter", "role": "generator",   "service": "cf-text"},
    "text2text-generation":           {"adapter": "GenerationAdapter", "role": "generator",   "service": "cf-text"},
    "summarization":                  {"adapter": "GenerationAdapter", "role": "generator",   "service": "cf-text"},
    # STT — cf-stt speech recognition service
    "automatic-speech-recognition":   {"adapter": None, "role": "stt",       "service": "cf-stt"},
    # Audio language models — audio + text → text (understanding, QA, captioning)
    "audio-text-to-text":             {"adapter": None, "role": "alm",       "service": "cf-stt"},
    # Audio classification — cf-voice sidecar context stream
    "audio-classification":           {"adapter": None, "role": "classifier", "service": "cf-voice"},
    # TTS — cf-tts text-to-speech service
    "text-to-speech":                 {"adapter": None, "role": "tts",       "service": "cf-tts"},
    # Vision — cf-vision image classification / embedding / VLM service
    "image-classification":           {"adapter": None, "role": "vision",    "service": "cf-vision"},
    "zero-shot-image-classification": {"adapter": None, "role": "vision",    "service": "cf-vision"},
    "image-feature-extraction":       {"adapter": None, "role": "embedding", "service": "cf-vision"},
    "image-text-to-text":             {"adapter": None, "role": "vlm",       "service": "cf-vision"},
    "visual-question-answering":      {"adapter": None, "role": "vlm",       "service": "cf-vision"},
    # Image generation — cf-image (text → image; distinct from cf-vision image understanding)
    "text-to-image":                  {"adapter": None, "role": "image-gen", "service": "cf-image"},
    # Embedding — cf-core shared embedding layer
    "feature-extraction":             {"adapter": None, "role": "embedding", "service": "cf-core"},
 }
@ -84,14 +131,31 @@ def _write_queue(records: list[dict]) -> None:
 def _safe_model_name(repo_id: str) -> str:
-    """Convert repo_id to a filesystem-safe directory name (HF convention)."""
+    """Convert repo_id to a filesystem-safe directory name.
    Uses the HuggingFace Hub convention: owner/model-name → owner--model-name.
    This matches what snapshot_download produces under local_dir and what
    cf-orch uses when constructing model paths for cf-text allocations.
    """
    return repo_id.replace("/", "--")
-def _is_installed(repo_id: str) -> bool:
+def _model_dir_for(repo_id: str, service: str | None) -> Path:
-    """Check if a model is already downloaded in _MODELS_DIR."""
+    """Return the download destination directory for a model.
    cf-text models → NFS shared asset store (_CF_TEXT_MODELS_DIR) so every
    cluster node can load them without a separate download.
    All other services (avocet classifiers, fine-tunes) → local _MODELS_DIR.
    """
    safe_name = _safe_model_name(repo_id)
-    model_dir = _MODELS_DIR / safe_name
+    if service == "cf-text":
        return _CF_TEXT_MODELS_DIR / safe_name
    return _MODELS_DIR / safe_name
 def _is_installed(repo_id: str, service: str | None = None) -> bool:
    """Check if a model is already downloaded in the appropriate destination."""
    model_dir = _model_dir_for(repo_id, service)
    return model_dir.exists() and (
        (model_dir / "config.json").exists()
        or (model_dir / "training_info.json").exists()
@ -125,48 +189,289 @@ def _get_queue_entry(entry_id: str) -> dict | None:
    return None
 # ── cf-orch catalog auto-registration ─────────────────────────────────────────
 def _catalog_key(repo_id: str) -> str:
    """Derive a readable catalog key from repo_id.
    ibm-granite/granite-4.1-8b  →  granite-4.1-8b
    facebook/bart-large-cnn     →  bart-large-cnn
    """
    return repo_id.split("/", 1)[-1].lower()
 def _insert_catalog_entry(content: str, entry_lines: str) -> str:
    """Insert entry_lines at the end of the cf-text.catalog section.
    Scans line by line to preserve all comments and original formatting.
    Returns content unchanged if the catalog section cannot be located.
    """
    lines = content.splitlines(keepends=True)
    in_cf_text = False
    in_catalog = False
    for i, line in enumerate(lines):
        stripped = line.lstrip()
        indent = len(line) - len(stripped)
        blank_or_comment = not stripped or stripped.startswith("#")
        if not in_cf_text:
            if indent == 2 and stripped.startswith("cf-text:"):
                in_cf_text = True
            continue
        if not in_catalog:
            if indent == 4 and stripped.startswith("catalog:"):
                in_catalog = True
            elif not blank_or_comment and indent <= 2:
                # Left cf-text section without finding a catalog
                return content
            continue
        # Inside catalog: first non-blank/comment line with indent < 6 ends it
        if not blank_or_comment and indent < 6:
            prefix = "\n" if lines[i - 1].strip() else ""
            lines.insert(i, prefix + entry_lines)
            return "".join(lines)
    # Catalog ran to EOF — append there
    if in_catalog:
        prefix = "\n" if lines and lines[-1].strip() else ""
        lines.append(prefix + entry_lines)
        return "".join(lines)
    return content
 def _register_in_node_catalogs(
    repo_id: str,
    local_path: Path,
    vram_mb_fp16: int,
    role: str,
 ) -> list[str]:
    """Insert a cf-text catalog entry into every eligible node YAML.
    A node is eligible when:
    - It has a ``cf-text.catalog`` section
    - The model fits within the node's ``cf-text.max_mb`` at FP16 *or* 4-bit
    - Neither the model key nor the local path is already in the catalog
    Returns the list of node names that were updated.
    """
    try:
        import yaml  # lazy — not in the critical import path
    except ImportError:
        logger.warning("PyYAML not available — skipping catalog registration for %s", repo_id)
        return []
    profiles_dir = _CF_ORCH_PROFILES_DIR
    if not profiles_dir.exists():
        logger.warning(
            "cf-orch profiles dir not found: %s — skipping catalog registration", profiles_dir
        )
        return []
    model_key = _catalog_key(repo_id)
    local_path_str = str(local_path)
    vram_4bit = round(vram_mb_fp16 / 4 * 1.1)
    updated: list[str] = []
    for yaml_file in sorted(profiles_dir.glob("*.yaml")):
        try:
            content = yaml_file.read_text(encoding="utf-8")
            data = yaml.safe_load(content)
            cf_text = (data.get("services") or {}).get("cf-text")
            if not cf_text:
                continue
            max_mb: int = cf_text.get("max_mb", 0)
            catalog: dict = cf_text.get("catalog") or {}
            # Skip if key already exists
            if model_key in catalog:
                logger.debug("Key %r already in %s — skipping", model_key, yaml_file.name)
                continue
            # Skip if any existing entry already points at this path (or a file within it)
            registered_paths = {
                str(entry.get("path", ""))
                for entry in catalog.values()
                if isinstance(entry, dict)
            }
            if local_path_str in registered_paths or any(
                p.startswith(local_path_str + "/") for p in registered_paths
            ):
                logger.debug("Path %s already registered in %s — skipping", local_path_str, yaml_file.name)
                continue
            # Determine whether model fits at FP16 or needs 4-bit
            if vram_mb_fp16 <= max_mb:
                vram_for_node = vram_mb_fp16
                needs_4bit = False
            elif vram_4bit <= max_mb:
                vram_for_node = vram_4bit
                needs_4bit = True
            else:
                logger.debug(
                    "%s too large for %s (fp16=%d MB, 4bit=%d MB, max=%d MB)",
                    repo_id, yaml_file.name, vram_mb_fp16, vram_4bit, max_mb,
                )
                continue
            desc = f"{repo_id} ({role}, downloaded via avocet)"
            if needs_4bit:
                desc += " — CF_TEXT_4BIT=1 required"
            vram_comment = (
                f"  # 4-bit estimate; FP16 footprint is {vram_mb_fp16} MB"
                if needs_4bit
                else f"  # FP16 file-size estimate"
            )
            entry_block = (
                f"      # auto-registered by avocet on download\n"
                f"      {model_key}:\n"
                f"        path: {local_path_str}\n"
                f"        vram_mb: {vram_for_node}{vram_comment}\n"
                f"        description: \"{desc}\"\n"
            )
            new_content = _insert_catalog_entry(content, entry_block)
            if new_content == content:
                logger.warning("Could not find catalog insertion point in %s", yaml_file.name)
                continue
            yaml_file.write_text(new_content, encoding="utf-8")
            updated.append(yaml_file.stem)
            logger.info(
                "Registered %s in %s (vram_mb=%d, 4bit=%s)",
                model_key, yaml_file.name, vram_for_node, needs_4bit,
            )
        except Exception as exc:
            logger.warning("Could not update %s: %s", yaml_file.name, exc)
    return updated
 # ── Background download ────────────────────────────────────────────────────────
-def _run_download(entry_id: str, repo_id: str, pipeline_tag: str | None, adapter_recommendation: str | None) -> None:
+def _poll_disk_progress(local_dir: Path, total_bytes: int, stop_event: threading.Event) -> None:
-    """Background thread: download model via huggingface_hub.snapshot_download."""
+    """Side-thread: poll local_dir size every 2s and update _download_progress.
    snapshot_download is a blocking call with no progress callback, so we watch
    the destination directory grow on disk as a proxy for download progress.
    total_bytes=0 means we don't know the target size; pct stays 0 until done.
    """
    import time
    while not stop_event.is_set():
        try:
            downloaded = sum(
                f.stat().st_size for f in local_dir.rglob("*") if f.is_file()
            )
            _download_progress["downloaded_bytes"] = downloaded
            if total_bytes > 0:
                _download_progress["total_bytes"] = total_bytes
                _download_progress["pct"] = min(downloaded / total_bytes * 100, 99.0)
        except Exception:
            pass
        time.sleep(2)
 def _run_download(
    entry_id: str,
    repo_id: str,
    pipeline_tag: str | None,
    adapter_recommendation: str | None,
    role: str | None = None,
    service: str | None = None,
    model_size_bytes: int = 0,
 ) -> None:
    """Background thread: download model via huggingface_hub.snapshot_download.
    model_size_bytes is the sum of file sizes reported by the HF API (siblings).
    It is used to estimate vram_mb and written to model_info.json so cf-orch can
    budget VRAM when allocating a cf-text instance for this model.
    """
    global _download_progress
-    safe_name = _safe_model_name(repo_id)
+    local_dir = _model_dir_for(repo_id, service)
    local_dir = _MODELS_DIR / safe_name
    _download_progress = {
        "active": True,
        "repo_id": repo_id,
        "downloaded_bytes": 0,
-        "total_bytes": 0,
+        "total_bytes": model_size_bytes,
        "pct": 0.0,
        "done": False,
        "error": None,
    }
    stop_poll = threading.Event()
    poll_thread = threading.Thread(
        target=_poll_disk_progress,
        args=(local_dir, model_size_bytes, stop_poll),
        daemon=True,
        name=f"model-poll-{entry_id}",
    )
    try:
        if snapshot_download is None:
            raise RuntimeError("huggingface_hub is not installed")
        local_dir.mkdir(parents=True, exist_ok=True)
        poll_thread.start()
        snapshot_download(
            repo_id=repo_id,
            local_dir=str(local_dir),
        )
-        # Write model_info.json alongside downloaded files
+        # Estimate VRAM from reported file size.
        # HF siblings sizes are pre-quantisation file sizes; add 10% for KV cache
        # and runtime overhead. Falls back to a stat of the local dir if 0.
        if model_size_bytes == 0:
            model_size_bytes = sum(
                f.stat().st_size for f in local_dir.rglob("*") if f.is_file()
            )
        vram_mb = int(model_size_bytes / (1024 * 1024) * 1.1)
        # Write model_info.json alongside downloaded files.
        # local_path + vram_mb are read by cf-orch at allocation time to resolve
        # the full model path and grant the correct VRAM lease.
        model_info = {
            "repo_id": repo_id,
            "pipeline_tag": pipeline_tag,
            "adapter_recommendation": adapter_recommendation,
            "role": role,
            "service": service,
            "model_size_bytes": model_size_bytes,
            "vram_mb": vram_mb,
            "local_path": str(local_dir),
            "downloaded_at": datetime.now(timezone.utc).isoformat(),
        }
        local_dir.mkdir(parents=True, exist_ok=True)
        (local_dir / "model_info.json").write_text(
            json.dumps(model_info, indent=2), encoding="utf-8"
        )
        # Auto-register cf-text models in the cf-orch node YAML catalogs so they
        # appear in the benchmark model list without a manual YAML edit.
        if service == "cf-text":
            registered_on = _register_in_node_catalogs(
                repo_id=repo_id,
                local_path=local_dir,
                vram_mb_fp16=vram_mb,
                role=role or "generator",
            )
            if registered_on:
                logger.info(
                    "Auto-registered %s in node catalogs: %s",
                    repo_id, ", ".join(registered_on),
                )
        _download_progress["done"] = True
        _download_progress["pct"] = 100.0
-        _update_queue_entry(entry_id, {"status": "ready"})
+        _update_queue_entry(entry_id, {"status": "ready", "local_path": str(local_dir)})
    except Exception as exc:
        logger.exception("Download failed for %s: %s", repo_id, exc)
@ -174,6 +479,7 @@ def _run_download(entry_id: str, repo_id: str, pipeline_tag: str | None, adapter
        _download_progress["done"] = True
        _update_queue_entry(entry_id, {"status": "failed", "error": str(exc)})
    finally:
        stop_poll.set()
        _download_progress["active"] = False
@ -199,11 +505,15 @@ def lookup_model(repo_id: str) -> dict:
    data = resp.json()
    pipeline_tag = data.get("pipeline_tag")
-    adapter_recommendation = _TAG_TO_ADAPTER.get(pipeline_tag) if pipeline_tag else None
+    tag_info = _TAG_TO_INFO.get(pipeline_tag) if pipeline_tag else None
    adapter_recommendation = tag_info["adapter"] if tag_info else None
    role = tag_info["role"] if tag_info else None
    service = tag_info["service"] if tag_info else None
    # Determine compatibility and surface a human-readable warning
-    _supported = ", ".join(sorted(_TAG_TO_ADAPTER.keys()))
+    _supported = ", ".join(sorted(_TAG_TO_INFO.keys()))
-    if adapter_recommendation is not None:
+    if tag_info is not None:
        # Any recognized tag is compatible — avocet adapters or another CF service
        compatible = True
        warning: str | None = None
    elif pipeline_tag is None:
@ -216,7 +526,7 @@ def lookup_model(repo_id: str) -> dict:
    else:
        compatible = False
        warning = (
-            f"\"{pipeline_tag}\" models are not supported by Avocet's email classification adapters. "
+            f"\"{pipeline_tag}\" models are not yet supported by the CircuitForge model ecosystem. "
            f"Supported task types: {_supported}."
        )
        logger.warning("Unsupported pipeline_tag %r for %s", pipeline_tag, repo_id)
@ -234,6 +544,8 @@ def lookup_model(repo_id: str) -> dict:
        "repo_id": repo_id,
        "pipeline_tag": pipeline_tag,
        "adapter_recommendation": adapter_recommendation,
        "role": role,
        "service": service,
        "compatible": compatible,
        "warning": warning,
        "model_size_bytes": model_size_bytes,
@ -261,12 +573,18 @@ class QueueAddRequest(BaseModel):
    repo_id: str
    pipeline_tag: str | None = None
    adapter_recommendation: str | None = None
    role: str | None = None
    service: str | None = None
    # Sum of file sizes from HF API siblings list; 0 if unknown.
    # Stored in the queue entry so approve can pass it to _run_download
    # without a second HF API round-trip.
    model_size_bytes: int = 0
@router.post("/queue", status_code=201)
 def add_to_queue(req: QueueAddRequest) -> dict:
    """Add a model to the approval queue with status 'pending'."""
-    if _is_installed(req.repo_id):
+    if _is_installed(req.repo_id, service=req.service):
        raise HTTPException(409, f"{req.repo_id!r} is already installed")
    if _is_queued(req.repo_id):
        raise HTTPException(409, f"{req.repo_id!r} is already in the queue")
@ -276,6 +594,9 @@ def add_to_queue(req: QueueAddRequest) -> dict:
        "repo_id": req.repo_id,
        "pipeline_tag": req.pipeline_tag,
        "adapter_recommendation": req.adapter_recommendation,
        "role": req.role,
        "service": req.service,
        "model_size_bytes": req.model_size_bytes,
        "status": "pending",
        "queued_at": datetime.now(timezone.utc).isoformat(),
    }
@ -300,7 +621,15 @@ def approve_queue_entry(entry_id: str) -> dict:
    thread = threading.Thread(
        target=_run_download,
-        args=(entry_id, entry["repo_id"], entry.get("pipeline_tag"), entry.get("adapter_recommendation")),
+        args=(
            entry_id,
            entry["repo_id"],
            entry.get("pipeline_tag"),
            entry.get("adapter_recommendation"),
            entry.get("role"),
            entry.get("service"),
            entry.get("model_size_bytes", 0),
        ),
        daemon=True,
        name=f"model-download-{entry_id}",
    )
@ -368,18 +697,104 @@ def download_stream() -> StreamingResponse:
    )
 # ── POST /sync-catalogs ────────────────────────────────────────────────────────
@router.post("/sync-catalogs")
 def sync_catalogs() -> dict:
    """Scan all installed cf-text models and register any missing from node YAMLs.
    Reads model_info.json from each directory in the cf-text models dir and calls
    _register_in_node_catalogs() for each. Idempotent — skips models already
    present by key or path.
    Returns a summary of registrations performed.
    """
    if not _CF_TEXT_MODELS_DIR.exists():
        return {"registered": {}, "skipped": [], "message": "cf-text models dir not found"}
    registered: dict[str, list[str]] = {}
    skipped: list[str] = []
    for model_dir in sorted(_CF_TEXT_MODELS_DIR.iterdir()):
        if not model_dir.is_dir():
            continue
        info_file = model_dir / "model_info.json"
        if not info_file.exists():
            skipped.append(model_dir.name)
            continue
        try:
            info = json.loads(info_file.read_text(encoding="utf-8"))
        except Exception as exc:
            logger.warning("Could not read model_info.json for %s: %s", model_dir.name, exc)
            skipped.append(model_dir.name)
            continue
        if info.get("service") != "cf-text":
            skipped.append(model_dir.name)
            continue
        repo_id = info.get("repo_id", model_dir.name)
        vram_mb = info.get("vram_mb", 0)
        role = info.get("role", "generator")
        updated_nodes = _register_in_node_catalogs(
            repo_id=repo_id,
            local_path=model_dir,
            vram_mb_fp16=vram_mb,
            role=role,
        )
        if updated_nodes:
            registered[repo_id] = updated_nodes
        else:
            skipped.append(repo_id)
    return {
        "registered": registered,
        "skipped": skipped,
        "message": (
            f"Registered {len(registered)} model(s) on "
            f"{sum(len(v) for v in registered.values())} node(s)"
            if registered
            else "All models already registered (or no eligible nodes found)"
        ),
    }
 # ── GET /installed ─────────────────────────────────────────────────────────────
@router.get("/installed")
 def list_installed() -> list[dict]:
-    """Scan _MODELS_DIR and return info on each installed model."""
+    """Scan all model directories and return info on each installed model.
-    if not _MODELS_DIR.exists():
+
-        return []
+    Scans both the local avocet models dir (classifiers, fine-tunes) and the
    shared NFS cf-text models dir, deduplicating by directory path.
    Falls back to queue entry data when model_info.json has null service/role,
    so models downloaded before the pipeline_tag registry existed still group
    correctly in the UI.
    """
    scan_dirs = [_MODELS_DIR]
    if _CF_TEXT_MODELS_DIR != _MODELS_DIR and _CF_TEXT_MODELS_DIR.exists():
        scan_dirs.append(_CF_TEXT_MODELS_DIR)
    # Build a lookup from safe directory name → queue entry for fallback enrichment.
    queue_by_safe_name: dict[str, dict] = {
        _safe_model_name(r["repo_id"]): r
        for r in _read_queue()
        if r.get("repo_id") and r.get("status") not in ("dismissed",)
    }
    results: list[dict] = []
-    for sub in _MODELS_DIR.iterdir():
+    seen: set[Path] = set()
-        if not sub.is_dir():
+
    for scan_dir in scan_dirs:
        if not scan_dir.exists():
            continue
        for sub in scan_dir.iterdir():
            if not sub.is_dir() or sub in seen:
                continue
            seen.add(sub)
            has_training_info = (sub / "training_info.json").exists()
            has_config = (sub / "config.json").exists()
@ -393,15 +808,20 @@ def list_installed() -> list[dict]:
            # Compute directory size
            size_bytes = sum(f.stat().st_size for f in sub.rglob("*") if f.is_file())
        # Load adapter/model_id from model_info.json or training_info.json
            adapter: str | None = None
            model_id: str | None = None
            role: str | None = None
            service: str | None = None
            vram_mb: int | None = None
            if has_model_info:
                try:
                    info = json.loads((sub / "model_info.json").read_text(encoding="utf-8"))
                    adapter = info.get("adapter_recommendation")
                    model_id = info.get("repo_id")
                    role = info.get("role")
                    service = info.get("service")
                    vram_mb = info.get("vram_mb")
                except Exception:
                    pass
            elif has_training_info:
@ -409,40 +829,154 @@ def list_installed() -> list[dict]:
                    info = json.loads((sub / "training_info.json").read_text(encoding="utf-8"))
                    adapter = info.get("adapter")
                    model_id = info.get("base_model") or info.get("model_id")
                    role = info.get("role", "classifier")
                    service = info.get("service", "avocet")
                except Exception:
                    pass
            # Fall back to queue entry when model_info.json has null service/role.
            # This covers models downloaded before the pipeline_tag registry existed.
            if (role is None or service is None) and sub.name in queue_by_safe_name:
                q = queue_by_safe_name[sub.name]
                role = role or q.get("role")
                service = service or q.get("service")
                model_id = model_id or q.get("repo_id")
            # Last resort: re-derive from pipeline_tag if we still have no service.
            if service is None and model_id:
                hf_url = f"https://huggingface.co/api/models/{model_id}"
                # Only attempt if we have a pipeline_tag cached somewhere.
                for q in queue_by_safe_name.values():
                    if q.get("repo_id") == model_id and q.get("pipeline_tag"):
                        tag_info = _TAG_TO_INFO.get(q["pipeline_tag"])
                        if tag_info:
                            role = role or tag_info["role"]
                            service = service or tag_info["service"]
                        break
            results.append({
                "name": sub.name,
                "path": str(sub),
                "type": model_type,
                "adapter": adapter,
                "role": role,
                "service": service,
                "size_bytes": size_bytes,
                "vram_mb": vram_mb,
                "model_id": model_id,
            })
    return results
 # ── PATCH /installed/{name} ────────────────────────────────────────────────────
 class InstalledModelPatch(BaseModel):
    service: str
    role: str
@router.patch("/installed/{name}")
 def patch_installed(name: str, body: InstalledModelPatch) -> dict:
    """Manually assign service and role to an installed model.
    Writes the updated values back to model_info.json so they survive restarts,
    and updates any matching queue entry so the UI shows the correct chip.
    """
    if "/" in name or "\\" in name or ".." in name or not name or name.startswith("."):
        raise HTTPException(400, f"Invalid model name {name!r}")
    candidate_dirs = [_MODELS_DIR]
    if _CF_TEXT_MODELS_DIR != _MODELS_DIR:
        candidate_dirs.append(_CF_TEXT_MODELS_DIR)
    model_path: Path | None = None
    for base in candidate_dirs:
        candidate = base / name
        try:
            candidate.resolve().relative_to(base.resolve())
        except ValueError:
            raise HTTPException(400, f"Path traversal detected for name {name!r}")
        if candidate.exists():
            model_path = candidate
            break
    if model_path is None:
        raise HTTPException(404, f"Installed model {name!r} not found")
    info_path = model_path / "model_info.json"
    if info_path.exists():
        try:
            info = json.loads(info_path.read_text(encoding="utf-8"))
        except Exception:
            info = {}
    else:
        info = {}
    info["service"] = body.service
    info["role"] = body.role
    info_path.write_text(json.dumps(info, indent=2), encoding="utf-8")
    # Mirror the update into any matching queue entry.
    records = _read_queue()
    updated = False
    for r in records:
        local = r.get("local_path", "")
        matches = (local and Path(local).name == name) or _safe_model_name(r.get("repo_id", "")) == name
        if matches and r.get("status") not in ("dismissed",):
            r["service"] = body.service
            r["role"] = body.role
            updated = True
    if updated:
        _write_queue(records)
    return {"ok": True, "service": body.service, "role": body.role}
 # ── DELETE /installed/{name} ───────────────────────────────────────────────────
@router.delete("/installed/{name}")
 def delete_installed(name: str) -> dict:
-    """Remove an installed model directory by name. Blocks path traversal."""
+    """Remove an installed model directory by name. Blocks path traversal.
-    # Validate: single path component, no slashes or '..'
+
    Searches both the local avocet models dir and the shared cf-text models dir.
    Also dismisses any matching queue entry so the UI doesn't show a stale "ready" card.
    """
    if "/" in name or "\\" in name or ".." in name or not name or name.startswith("."):
        raise HTTPException(400, f"Invalid model name {name!r}: must be a single directory name with no path separators or '..'")
-    model_path = _MODELS_DIR / name
+    # Search both model directories
    candidate_dirs = [_MODELS_DIR]
    if _CF_TEXT_MODELS_DIR != _MODELS_DIR:
        candidate_dirs.append(_CF_TEXT_MODELS_DIR)
-    # Extra safety: confirm resolved path is inside _MODELS_DIR
+    model_path: Path | None = None
    for base in candidate_dirs:
        candidate = base / name
        try:
-        model_path.resolve().relative_to(_MODELS_DIR.resolve())
+            candidate.resolve().relative_to(base.resolve())
        except ValueError:
            raise HTTPException(400, f"Path traversal detected for name {name!r}")
        if candidate.exists():
            model_path = candidate
            break
-    if not model_path.exists():
+    if model_path is None:
-        raise HTTPException(404, f"Installed model {name!r} not found")
+        raise HTTPException(404, f"Installed model {name!r} not found in any model directory")
    shutil.rmtree(model_path)
    # Dismiss any queue entries whose local_path matches, or whose repo_id maps to this dir name.
    records = _read_queue()
    updated = False
    for r in records:
        local = r.get("local_path", "")
        matches_path = local and Path(local).name == name
        matches_name = _safe_model_name(r.get("repo_id", "")) == name
        if (matches_path or matches_name) and r.get("status") != "dismissed":
            r["status"] = "dismissed"
            updated = True
    if updated:
        _write_queue(records)
    return {"ok": True}
--- a/config/label_tool.yaml.example
+++ b/config/label_tool.yaml.example
@ -57,11 +57,32 @@ imitate:
    - id: peregrine
      name: Peregrine
      icon: "🦅"
-      description: Job search assistant
+      description: Job search assistant — live job listings
-      base_url: http://localhost:8502
+      base_url: http://localhost:8601
-      sample_endpoint: /api/jobs
+      health_path: /api/jobs/counts
-      text_fields: [title, description]
+      sample_endpoint: /api/jobs?status=pending&limit=5
-      prompt_template: "Analyze this job listing and identify key requirements:\n\n{text}"
+      text_fields: [title, company, description]
      prompt_template: "Analyze this job listing and identify the key requirements, must-have skills, and any culture signals that would help tailor an application:\n\n{text}"
    - id: osprey
      name: Osprey
      icon: "📞"
      description: Gov't hold-line automation — recent call records
      base_url: http://localhost:8520
      health_path: /api/health
      sample_endpoint: /api/calls/recent
      text_fields: [agency, issue, notes]
      prompt_template: "Draft a clear, professional follow-up letter for this government hold-line call. Include what was discussed, what action the agency committed to, and a polite deadline for response:\n\n{text}"
    - id: linnet
      name: Linnet
      icon: "🐦"
      description: Real-time tone annotation — Elcor-style subtext for ND users
      base_url: http://localhost:8522
      health_path: /health
      sample_endpoint: /samples
      text_fields: [text, context]
      prompt_template: "Annotate the emotional tone and subtext of the following text using explicit Elcor-style markers (e.g. [SINCERELY], [UNCERTAIN], [FRUSTRATED]). Identify implied emotions, potential sarcasm, and any ambiguity that might be misread by neurodivergent readers:\n\n{text}"
    - id: kiwi
      name: Kiwi
--- a/manage.sh
+++ b/manage.sh
@ -90,6 +90,12 @@ usage() {
    echo -e "    ${GREEN}score [args]${NC}             Shortcut: --score [args]"
    echo -e "    ${GREEN}compare [args]${NC}           Shortcut: --compare [args]"
    echo ""
    echo "  Writing Style Benchmark:"
    echo -e "    ${GREEN}style-bench [args]${NC}       Run benchmark_style.py (args passed through)"
    echo -e "    ${GREEN}style-list${NC}               List available ollama models for style bench"
    echo -e "    ${GREEN}style-run [args]${NC}         Run writing style benchmark (--models, --samples, --include-large, --scan-disk PATH, --cforch)"
    echo -e "    ${GREEN}style-last${NC}               Print most recent writing style benchmark report"
    echo ""
    echo "  Dev:"
    echo -e "    ${GREEN}dev${NC}                      Hot-reload: uvicorn --reload (:8503) + Vite HMR (:5173)"
    echo -e "    ${GREEN}test${NC}                     Run pytest suite"
@ -249,6 +255,26 @@ case "$CMD" in
        exec "$0" benchmark --compare "$@"
        ;;
    style-bench)
        info "Running writing style benchmark (${ENV_BM})…"
        if [[ ! -x "$PYTHON_BM" ]]; then
            error "Python not found in ${ENV_BM} env at ${PYTHON_BM}"
        fi
        "$PYTHON_BM" scripts/benchmark_style.py "$@"
        ;;
    style-list)
        exec "$0" style-bench --list-models
        ;;
    style-run)
        exec "$0" style-bench --run "$@"
        ;;
    style-last)
        exec "$0" style-bench --show-last
        ;;
    help|--help|-h)
        usage
        ;;
--- a/tests/test_models.py
+++ b/tests/test_models.py
@ -122,17 +122,88 @@ def test_lookup_returns_correct_shape(client):
    assert data["already_queued"] is False
-def test_lookup_unknown_pipeline_tag_returns_null_adapter(client):
+def test_lookup_unknown_pipeline_tag_returns_null_adapter_and_incompatible(client):
-    """An unrecognised pipeline_tag yields adapter_recommendation=null."""
+    """An unrecognised pipeline_tag yields adapter_recommendation=null and compatible=False."""
    mock_resp = MagicMock()
    mock_resp.status_code = 200
-    mock_resp.json.return_value = _make_hf_response("org/m", "audio-classification")
+    mock_resp.json.return_value = _make_hf_response("org/m", "reinforcement-learning")
    with patch("app.models.httpx.get", return_value=mock_resp):
        r = client.get("/api/models/lookup", params={"repo_id": "org/m"})
    assert r.status_code == 200
-    assert r.json()["adapter_recommendation"] is None
+    data = r.json()
    assert data["adapter_recommendation"] is None
    assert data["compatible"] is False
    assert data["role"] is None
    assert data["service"] is None
    assert "CircuitForge model ecosystem" in data["warning"]
 def test_lookup_stt_tag_returns_compatible_with_cf_stt_service(client):
    """automatic-speech-recognition tag yields compatible=True, service=cf-stt."""
    mock_resp = MagicMock()
    mock_resp.status_code = 200
    mock_resp.json.return_value = _make_hf_response("openai/whisper-base", "automatic-speech-recognition")
    with patch("app.models.httpx.get", return_value=mock_resp):
        r = client.get("/api/models/lookup", params={"repo_id": "openai/whisper-base"})
    assert r.status_code == 200
    data = r.json()
    assert data["compatible"] is True
    assert data["adapter_recommendation"] is None
    assert data["role"] == "stt"
    assert data["service"] == "cf-stt"
    assert data["warning"] is None
 def test_lookup_vision_tag_returns_compatible_with_cf_vision_service(client):
    """image-classification tag yields compatible=True, service=cf-vision."""
    mock_resp = MagicMock()
    mock_resp.status_code = 200
    mock_resp.json.return_value = _make_hf_response("google/siglip-base", "image-classification")
    with patch("app.models.httpx.get", return_value=mock_resp):
        r = client.get("/api/models/lookup", params={"repo_id": "google/siglip-base"})
    assert r.status_code == 200
    data = r.json()
    assert data["compatible"] is True
    assert data["role"] == "vision"
    assert data["service"] == "cf-vision"
 def test_lookup_audio_classification_tag_returns_cf_voice_service(client):
    """audio-classification tag yields compatible=True, service=cf-voice."""
    mock_resp = MagicMock()
    mock_resp.status_code = 200
    mock_resp.json.return_value = _make_hf_response("org/audio-model", "audio-classification")
    with patch("app.models.httpx.get", return_value=mock_resp):
        r = client.get("/api/models/lookup", params={"repo_id": "org/audio-model"})
    assert r.status_code == 200
    data = r.json()
    assert data["compatible"] is True
    assert data["role"] == "classifier"
    assert data["service"] == "cf-voice"
 def test_lookup_embedding_tag_returns_compatible_with_cf_core_service(client):
    """feature-extraction tag yields compatible=True, service=cf-core."""
    mock_resp = MagicMock()
    mock_resp.status_code = 200
    mock_resp.json.return_value = _make_hf_response("BAAI/bge-small-en", "feature-extraction")
    with patch("app.models.httpx.get", return_value=mock_resp):
        r = client.get("/api/models/lookup", params={"repo_id": "BAAI/bge-small-en"})
    assert r.status_code == 200
    data = r.json()
    assert data["compatible"] is True
    assert data["role"] == "embedding"
    assert data["service"] == "cf-core"
 def test_lookup_already_queued_flag(client):
@ -181,6 +252,26 @@ def test_queue_add_returns_entry_fields(client):
    assert entry["adapter_recommendation"] == "ZeroShotAdapter"
 def test_queue_preserves_role_and_service(client):
    """POST /queue with role/service fields round-trips them through GET /queue."""
    r = client.post("/api/models/queue", json={
        "repo_id": "openai/whisper-base",
        "pipeline_tag": "automatic-speech-recognition",
        "adapter_recommendation": None,
        "role": "stt",
        "service": "cf-stt",
    })
    assert r.status_code == 201
    entry = r.json()
    assert entry["role"] == "stt"
    assert entry["service"] == "cf-stt"
    r2 = client.get("/api/models/queue")
    items = r2.json()
    assert items[0]["role"] == "stt"
    assert items[0]["service"] == "cf-stt"
 # ── POST /queue — 409 duplicate ────────────────────────────────────────────────
 def test_queue_duplicate_returns_409(client):
@ -317,7 +408,12 @@ def test_installed_detects_downloaded_model(client, tmp_path):
    model_dir.mkdir()
    (model_dir / "config.json").write_text(json.dumps({"model_type": "bert"}), encoding="utf-8")
    (model_dir / "model_info.json").write_text(
-        json.dumps({"repo_id": "org/mymodel", "adapter_recommendation": "ZeroShotAdapter"}),
+        json.dumps({
            "repo_id": "org/mymodel",
            "adapter_recommendation": "ZeroShotAdapter",
            "role": "classifier",
            "service": "avocet",
        }),
        encoding="utf-8",
    )
@ -329,6 +425,51 @@ def test_installed_detects_downloaded_model(client, tmp_path):
    assert items[0]["name"] == "org--mymodel"
    assert items[0]["adapter"] == "ZeroShotAdapter"
    assert items[0]["model_id"] == "org/mymodel"
    assert items[0]["role"] == "classifier"
    assert items[0]["service"] == "avocet"
 def test_installed_stt_model_surfaces_role_and_service(client):
    """A downloaded STT model's role/service are returned by GET /installed."""
    from app import models as models_module
    model_dir = models_module._MODELS_DIR / "openai--whisper-base"
    model_dir.mkdir()
    (model_dir / "config.json").write_text(json.dumps({"model_type": "whisper"}), encoding="utf-8")
    (model_dir / "model_info.json").write_text(
        json.dumps({
            "repo_id": "openai/whisper-base",
            "adapter_recommendation": None,
            "role": "stt",
            "service": "cf-stt",
        }),
        encoding="utf-8",
    )
    r = client.get("/api/models/installed")
    assert r.status_code == 200
    items = r.json()
    assert items[0]["role"] == "stt"
    assert items[0]["service"] == "cf-stt"
    assert items[0]["adapter"] is None
 def test_installed_finetuned_model_defaults_to_avocet_service(client):
    """Fine-tuned models with no role/service in training_info default to avocet/classifier."""
    from app import models as models_module
    model_dir = models_module._MODELS_DIR / "my-finetuned-v2"
    model_dir.mkdir()
    (model_dir / "training_info.json").write_text(
        json.dumps({"base_model": "microsoft/deberta-v3-base", "epochs": 3}),
        encoding="utf-8",
    )
    r = client.get("/api/models/installed")
    assert r.status_code == 200
    items = r.json()
    assert items[0]["role"] == "classifier"
    assert items[0]["service"] == "avocet"
 def test_installed_detects_finetuned_model(client):
--- a/web/.gitignore
+++ b/web/.gitignore
@ -22,3 +22,7 @@ dist-ssr
 *.njsproj
 *.sln
 *.sw?
 # Local environment overrides
 .env
--- a/web/src/views/ModelsView.vue
+++ b/web/src/views/ModelsView.vue
@ -42,6 +42,12 @@
          <span v-if="lookupResult.pipeline_tag" class="chip chip-pipeline">
            {{ lookupResult.pipeline_tag }}
          </span>
          <span v-if="lookupResult.role" class="chip chip-role">
            {{ lookupResult.role }}
          </span>
          <span v-if="lookupResult.service" class="chip" :class="serviceChipClass(lookupResult.service)">
            {{ lookupResult.service }}
          </span>
          <span v-if="lookupResult.adapter_recommendation" class="chip chip-adapter">
            {{ lookupResult.adapter_recommendation }}
          </span>
@ -61,11 +67,10 @@
        <button
          class="btn-primary btn-add-queue"
          :class="{ 'btn-add-queue-warn': !lookupResult.compatible }"
          :disabled="lookupResult.already_installed || lookupResult.already_queued || addingToQueue"
          @click="addToQueue"
        >
-          {{ addingToQueue ? 'Adding…' : lookupResult.compatible ? 'Add to queue' : 'Add anyway' }}
+          {{ addingToQueue ? 'Adding…' : 'Add to queue' }}
        </button>
      </div>
    </section>
@ -91,6 +96,8 @@
        </div>
        <div class="model-meta">
          <span v-if="model.pipeline_tag"          class="chip chip-pipeline">{{ model.pipeline_tag }}</span>
          <span v-if="model.role"                  class="chip chip-role">{{ model.role }}</span>
          <span v-if="model.service"               class="chip" :class="serviceChipClass(model.service)">{{ model.service }}</span>
          <span v-if="model.adapter_recommendation" class="chip chip-adapter">{{ model.adapter_recommendation }}</span>
        </div>
        <div class="model-card-actions">
@ -116,6 +123,8 @@
        </div>
        <div class="model-meta">
          <span v-if="model.pipeline_tag" class="chip chip-pipeline">{{ model.pipeline_tag }}</span>
          <span v-if="model.role"         class="chip chip-role">{{ model.role }}</span>
          <span v-if="model.service"      class="chip" :class="serviceChipClass(model.service)">{{ model.service }}</span>
        </div>
        <div v-if="downloadErrors[model.id]" class="download-error" role="alert">
@ -124,14 +133,19 @@
        <div v-else class="progress-wrap" :aria-label="`Download progress for ${model.repo_id}`">
          <div
            class="progress-bar"
-            :style="{ width: `${downloadProgress[model.id] ?? 0}%` }"
+            :style="{ width: `${downloadProgress[model.repo_id]?.pct ?? 0}%` }"
            role="progressbar"
-            :aria-valuenow="downloadProgress[model.id] ?? 0"
+            :aria-valuenow="downloadProgress[model.repo_id]?.pct ?? 0"
            aria-valuemin="0"
            aria-valuemax="100"
          />
          <span class="progress-label">
-            {{ downloadProgress[model.id] == null ? 'Preparing…' : `${downloadProgress[model.id]}%` }}
+            {{
              !downloadProgress[model.repo_id] ? 'Preparing…'
              : downloadProgress[model.repo_id].pct != null ? `${Math.round(downloadProgress[model.repo_id].pct!)}%`
              : downloadProgress[model.repo_id].bytes > 0 ? `${(downloadProgress[model.repo_id].bytes / 1024 / 1024).toFixed(0)} MB downloaded…`
              : 'Preparing…'
            }}
          </span>
        </div>
      </div>
@ -145,20 +159,33 @@
        No models installed yet.
      </div>
-      <div v-else class="installed-table-wrap">
+      <template v-else>
        <div
          v-for="group in installedByService"
          :key="group.service"
          class="installed-group"
        >
          <div class="installed-group-header">
            <span class="chip" :class="serviceChipClass(group.service)">
              {{ serviceLabel(group.service) }}
            </span>
            <span class="installed-group-count">{{ group.models.length }} model{{ group.models.length !== 1 ? 's' : '' }}</span>
          </div>
          <div class="installed-table-wrap">
            <table class="installed-table">
              <thead>
                <tr>
                  <th>Name</th>
                  <th>Type</th>
-              <th>Adapter</th>
+                  <th>Role</th>
                  <th>Size</th>
                  <th></th>
                </tr>
              </thead>
              <tbody>
-            <tr v-for="model in installedModels" :key="model.name">
+                <tr v-for="model in group.models" :key="model.name">
-              <td class="td-name">{{ model.name }}</td>
+                  <td class="td-name">{{ model.model_id ?? model.name }}</td>
                  <td>
                    <span
                      class="badge"
@ -167,9 +194,42 @@
                      {{ model.type }}
                    </span>
                  </td>
              <td>{{ model.adapter ?? '—' }}</td>
              <td>{{ humanBytes(model.size) }}</td>
                  <td>
                    <span v-if="model.role" class="chip chip-role chip-sm">{{ model.role }}</span>
                    <span v-else>—</span>
                  </td>
                  <td>{{ humanBytes(model.size_bytes) }}</td>
                  <td class="td-actions">
                    <div v-if="!model.service" class="classify-row">
                      <select
                        class="classify-select"
                        :value="classifyDraft[model.name]?.service ?? ''"
                        @change="onServiceChange(model.name, ($event.target as HTMLSelectElement).value)"
                        aria-label="Assign service"
                      >
                        <option value="" disabled>Service…</option>
                        <option v-for="svc in CLASSIFIABLE_SERVICES" :key="svc.value" :value="svc.value">{{ svc.label }}</option>
                      </select>
                      <select
                        class="classify-select"
                        :value="classifyDraft[model.name]?.role ?? ''"
                        :disabled="!classifyDraft[model.name]?.service"
                        @change="(e) => setClassifyRole(model.name, (e.target as HTMLSelectElement).value)"
                        aria-label="Assign role"
                      >
                        <option value="" disabled>Role…</option>
                        <option
                          v-for="role in rolesForService(classifyDraft[model.name]?.service ?? '')"
                          :key="role"
                          :value="role"
                        >{{ role }}</option>
                      </select>
                      <button
                        class="btn-primary btn-sm"
                        :disabled="!classifyDraft[model.name]?.service || !classifyDraft[model.name]?.role"
                        @click="saveClassify(model.name)"
                      >Save</button>
                    </div>
                    <button
                      class="btn-danger btn-sm"
                      @click="deleteInstalled(model.name)"
@ -181,6 +241,8 @@
              </tbody>
            </table>
          </div>
        </div>
      </template>
    </section>
  </div>
 </template>
@ -194,6 +256,8 @@ interface LookupResult {
  repo_id: string
  pipeline_tag: string | null
  adapter_recommendation: string | null
  role: string | null
  service: string | null
  compatible: boolean
  warning: string | null
  size: number | null
@ -208,20 +272,27 @@ interface QueuedModel {
  status: 'pending' | 'downloading' | 'done' | 'error'
  pipeline_tag: string | null
  adapter_recommendation: string | null
  role: string | null
  service: string | null
 }
 interface InstalledModel {
  name: string
  type: 'finetuned' | 'downloaded'
  adapter: string | null
-  size: number
+  role: string | null
  service: string | null
  size_bytes: number
  model_id: string | null
 }
 interface SseProgressEvent {
-  model_id: string
+  type: 'progress' | 'done' | 'error' | 'idle'
-  pct: number | null
+  repo_id?: string
-  status: 'progress' | 'done' | 'error'
+  pct?: number
-  message?: string
+  downloaded_bytes?: number
  total_bytes?: number
  error?: string
 }
 // ── State ─────────────────────────────────────────────
@ -235,7 +306,8 @@ const addingToQueue = ref(false)
 const queuedModels    = ref<QueuedModel[]>([])
 const installedModels = ref<InstalledModel[]>([])
-const downloadProgress = ref<Record<string, number>>({})
+const downloadProgress  = ref<Record<string, { pct: number | null; bytes: number }>>({})
 const classifyDraft     = ref<Record<string, { service: string; role: string }>>({})
 const downloadErrors   = ref<Record<string, string>>({})
 let pollInterval: ReturnType<typeof setInterval> | null = null
@ -251,8 +323,69 @@ const downloadingModels = computed(() =>
  queuedModels.value.filter(m => m.status === 'downloading')
 )
 const SERVICE_ORDER = ['avocet', 'cf-text', 'cf-stt', 'cf-tts', 'cf-vision', 'cf-image', 'cf-core', 'cf-voice', 'other']
 const CLASSIFIABLE_SERVICES = [
  { value: 'avocet',    label: 'Avocet — Email Classifiers' },
  { value: 'cf-text',   label: 'cf-text — Language Models' },
  { value: 'cf-stt',    label: 'cf-stt — Speech Recognition' },
  { value: 'cf-tts',    label: 'cf-tts — Text to Speech' },
  { value: 'cf-vision', label: 'cf-vision — Vision / VLM' },
  { value: 'cf-image',  label: 'cf-image — Image Generation' },
  { value: 'cf-core',   label: 'cf-core — Embeddings' },
  { value: 'cf-voice',  label: 'cf-voice — Audio Classification' },
 ]
 const SERVICE_ROLES: Record<string, string[]> = {
  'avocet':    ['classifier', 'reranker'],
  'cf-text':   ['generator'],
  'cf-stt':    ['stt', 'alm'],
  'cf-tts':    ['tts'],
  'cf-vision': ['vision', 'vlm', 'embedding'],
  'cf-image':  ['image-gen'],
  'cf-core':   ['embedding'],
  'cf-voice':  ['classifier'],
 }
 function rolesForService(service: string): string[] {
  return SERVICE_ROLES[service] ?? []
 }
 const installedByService = computed(() => {
  const grouped: Record<string, InstalledModel[]> = {}
  for (const model of installedModels.value) {
    const key = model.service ?? 'other'
    if (!grouped[key]) grouped[key] = []
    grouped[key].push(model)
  }
  // Return ordered sections: known services first, then anything else
  const keys = [...SERVICE_ORDER.filter(s => grouped[s]), ...Object.keys(grouped).filter(k => !SERVICE_ORDER.includes(k))]
  return keys.map(key => ({ service: key, models: grouped[key] }))
 })
 // ── Helpers ───────────────────────────────────────────
 const SERVICE_LABELS: Record<string, string> = {
  'avocet':    'Avocet — Email Classifiers',
  'cf-text':   'cf-text — Language Models',
  'cf-stt':    'cf-stt — Speech Recognition',
  'cf-tts':    'cf-tts — Text to Speech',
  'cf-vision': 'cf-vision — Vision / VLM',
  'cf-image':  'cf-image — Image Generation',
  'cf-core':   'cf-core — Embeddings',
  'cf-voice':  'cf-voice — Audio Classification',
  'other':     'Other — Unclassified',
 }
 function serviceLabel(service: string): string {
  return SERVICE_LABELS[service] ?? service
 }
 function serviceChipClass(service: string | null): string {
  if (!service) return 'chip-service-other'
  return `chip-service-${service.replace(/[^a-z0-9]/g, '-')}`
 }
 function humanBytes(bytes: number | null): string {
  if (bytes == null) return '—'
  const units = ['B', 'KB', 'MB', 'GB', 'TB']
@ -305,10 +438,11 @@ async function addToQueue() {
  if (!lookupResult.value) return
  addingToQueue.value = true
  try {
    const { repo_id, pipeline_tag, adapter_recommendation, role, service } = lookupResult.value
    const res = await fetch('/api/models/queue', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({ repo_id: lookupResult.value.repo_id }),
+      body: JSON.stringify({ repo_id, pipeline_tag, adapter_recommendation, role, service }),
    })
    if (res.ok) {
      lookupResult.value = { ...lookupResult.value, already_queued: true }
@ -339,12 +473,50 @@ async function dismissModel(id: string) {
  } catch { /* ignore */ }
 }
 function onServiceChange(name: string, service: string) {
  const roles = SERVICE_ROLES[service] ?? []
  classifyDraft.value = {
    ...classifyDraft.value,
    [name]: { service, role: roles.length === 1 ? roles[0] : '' },
  }
 }
 function setClassifyRole(name: string, role: string) {
  classifyDraft.value = {
    ...classifyDraft.value,
    [name]: { ...classifyDraft.value[name], role },
  }
 }
 async function saveClassify(name: string) {
  const draft = classifyDraft.value[name]
  if (!draft?.service || !draft?.role) return
  try {
    const res = await fetch(`/api/models/installed/${encodeURIComponent(name)}`, {
      method: 'PATCH',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ service: draft.service, role: draft.role }),
    })
    if (res.ok) {
      // Update in-place so the model moves to the correct service group
      installedModels.value = installedModels.value.map(m =>
        m.name === name ? { ...m, service: draft.service, role: draft.role } : m
      )
      const updated = { ...classifyDraft.value }
      delete updated[name]
      classifyDraft.value = updated
      await loadQueue()
    }
  } catch { /* non-fatal */ }
 }
 async function deleteInstalled(name: string) {
  if (!window.confirm(`Delete installed model "${name}"? This cannot be undone.`)) return
  try {
    const res = await fetch(`/api/models/installed/${encodeURIComponent(name)}`, { method: 'DELETE' })
    if (res.ok) {
      installedModels.value = installedModels.value.filter(m => m.name !== name)
      await loadQueue()
    }
  } catch { /* ignore */ }
 }
@ -378,21 +550,28 @@ function startSse() {
      return
    }
-    const { model_id, pct, status, message } = event
+    const { type, repo_id, pct, downloaded_bytes, error } = event
    if (!repo_id) return
-    if (status === 'progress' && pct != null) {
+    if (type === 'progress') {
-      downloadProgress.value = { ...downloadProgress.value, [model_id]: pct }
+      const bytes = downloaded_bytes ?? 0
-    } else if (status === 'done') {
+      // pct stays null when total_bytes is unknown so we can show "X MB" instead
      const progress = (pct != null && pct > 0) ? pct : (bytes > 0 ? null : undefined)
      downloadProgress.value = { ...downloadProgress.value, [repo_id]: { pct: progress ?? null, bytes } }
    } else if (type === 'done') {
      const updated = { ...downloadProgress.value }
-      delete updated[model_id]
+      delete updated[repo_id]
      downloadProgress.value = updated
-      queuedModels.value = queuedModels.value.filter(m => m.id !== model_id)
+      queuedModels.value = queuedModels.value.filter(m => m.repo_id !== repo_id)
      loadInstalled()
-    } else if (status === 'error') {
+    } else if (type === 'error') {
      const entry = queuedModels.value.find(m => m.repo_id === repo_id)
      if (entry) {
        downloadErrors.value = {
          ...downloadErrors.value,
-        [model_id]: message ?? 'Download failed.',
+          [entry.id]: error ?? 'Download failed.',
        }
      }
    }
  })
@ -595,12 +774,6 @@ onUnmounted(() => {
  align-self: flex-start;
 }
 .btn-add-queue-warn {
  background: var(--color-surface-raised, #e4ebf5);
  color: var(--color-text-secondary, #6b7a99);
  border: 1px solid var(--color-border, #d0d7e8);
 }
 /* ── Model cards (queue + downloads) ── */
 .model-card {
  border: 1px solid var(--color-border, #a8b8d0);
@ -715,6 +888,35 @@ onUnmounted(() => {
  word-break: break-all;
 }
 .td-actions {
  display: flex;
  flex-direction: column;
  gap: 0.4rem;
  align-items: flex-start;
 }
 .classify-row {
  display: flex;
  gap: 0.35rem;
  align-items: center;
  flex-wrap: wrap;
 }
 .classify-select {
  font-size: 0.78rem;
  padding: 0.2rem 0.4rem;
  border-radius: 4px;
  border: 1px solid var(--color-border, #444);
  background: var(--color-surface, #1e1e2e);
  color: var(--color-text, #cdd6f4);
  cursor: pointer;
 }
 .classify-select:disabled {
  opacity: 0.4;
  cursor: not-allowed;
 }
 /* ── Badges ── */
 .badge-group {
  display: flex;
@ -777,6 +979,76 @@ onUnmounted(() => {
  background: color-mix(in srgb, var(--color-accent, #c4732a) 12%, var(--color-surface-alt, #dde4f0));
 }
 .chip-role {
  color: var(--color-info, #1e6091);
  background: color-mix(in srgb, var(--color-info, #1e6091) 12%, var(--color-surface-alt, #dde4f0));
 }
 .chip-sm {
  font-size: 0.68rem;
  padding: 0.1rem 0.4rem;
 }
 /* Service chips — one colour per CF service */
 .chip-service-avocet {
  color: var(--color-primary, #2d5a27);
  background: color-mix(in srgb, var(--color-primary, #2d5a27) 15%, var(--color-surface-alt, #dde4f0));
 }
 .chip-service-cf-text {
  color: #c2410c;
  background: color-mix(in srgb, #c2410c 12%, var(--color-surface-alt, #dde4f0));
 }
 .chip-service-cf-stt {
  color: #5e35b1;
  background: color-mix(in srgb, #5e35b1 12%, var(--color-surface-alt, #dde4f0));
 }
 .chip-service-cf-tts {
  color: #0277bd;
  background: color-mix(in srgb, #0277bd 12%, var(--color-surface-alt, #dde4f0));
 }
 .chip-service-cf-vision {
  color: #00695c;
  background: color-mix(in srgb, #00695c 12%, var(--color-surface-alt, #dde4f0));
 }
 .chip-service-cf-core {
  color: #6d4c41;
  background: color-mix(in srgb, #6d4c41 12%, var(--color-surface-alt, #dde4f0));
 }
 .chip-service-cf-voice {
  color: #ad1457;
  background: color-mix(in srgb, #ad1457 12%, var(--color-surface-alt, #dde4f0));
 }
 .chip-service-other {
  color: var(--color-text-muted, #4a5c7a);
  background: var(--color-surface-alt, #dde4f0);
 }
 /* ── Installed group ── */
 .installed-group {
  display: flex;
  flex-direction: column;
  gap: 0.5rem;
 }
 .installed-group-header {
  display: flex;
  align-items: center;
  gap: 0.5rem;
  padding: 0.25rem 0;
 }
 .installed-group-count {
  font-size: 0.78rem;
  color: var(--color-text-muted, #4a5c7a);
 }
 /* ── Buttons ── */
 .btn-primary, .btn-danger {
  padding: 0.4rem 0.9rem;
@ -852,7 +1124,7 @@ onUnmounted(() => {
  .installed-table th:nth-child(3),
  .installed-table td:nth-child(3) {
-    display: none;  /* hide Adapter column on very narrow screens */
+    display: none;  /* hide Role column on very narrow screens */
  }
 }
 </style>