feat(models): extended model registry + manage.sh benchmark subcommands

- app/models.py: add StyleModel and VoiceModel entries; expand cf-text and benchmark model metadata (vram_mb, description, tags) - tests/test_models.py: coverage for new model types and registry helpers - ModelsView.vue: updated model browser with style/voice filter tabs - manage.sh: add benchmark-style and benchmark-voice subcommands - config/label_tool.yaml.example: add style + voice benchmark config stubs - web/.gitignore: add node_modules and dist entries
2026-04-24 14:56:24 -07:00 · 2026-04-24 14:56:24 -07:00 · ea3da701c6
commit ea3da701c6
parent ddb56efb89
6 changed files with 1150 additions and 152 deletions
--- a/app/models.py
+++ b/app/models.py
@ -14,11 +14,12 @@ from __future__ import annotations

 import json
 import logging
+import os
 import shutil
 import threading
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any
+from typing import Any, TypedDict
 from uuid import uuid4

 import httpx
@ -39,21 +40,67 @@ _ROOT = Path(__file__).parent.parent
 _MODELS_DIR: Path = _ROOT / "models"
 _QUEUE_DIR: Path = _ROOT / "data"

+# Service-specific model destinations.
+# cf-text models land on the NFS-mounted shared asset store so every cluster
+# node can reach them without a separate download. Avocet classifiers stay local
+# because they are fine-tuned in-place and are only consumed by avocet itself.
+# Override via CF_TEXT_MODELS_DIR env var (useful for dev / non-NFS setups).
+_CF_TEXT_MODELS_DIR: Path = Path(
+    os.environ.get("CF_TEXT_MODELS_DIR", "/Library/Assets/LLM/cf-text/models")
+)
+
+# Directory containing per-node YAML profiles for cf-orch.
+# Auto-registration writes new catalog entries here on model download.
+_CF_ORCH_PROFILES_DIR: Path = Path(
+    os.environ.get(
+        "CF_ORCH_PROFILES_DIR",
+        "/Library/Development/CircuitForge/circuitforge-orch/circuitforge_orch/profiles/nodes",
+    )
+)
+
 router = APIRouter()

 # ── Download progress shared state ────────────────────────────────────────────
 # Updated by the background download thread; read by GET /download/stream.
 _download_progress: dict[str, Any] = {}

-# ── HF pipeline_tag → adapter recommendation ──────────────────────────────────
-_TAG_TO_ADAPTER: dict[str, str] = {
-    "zero-shot-classification": "ZeroShotAdapter",
-    "text-classification": "ZeroShotAdapter",
-    "natural-language-inference": "ZeroShotAdapter",
-    "sentence-similarity": "RerankerAdapter",
-    "text-ranking": "RerankerAdapter",
-    "text-generation": "GenerationAdapter",
-    "text2text-generation": "GenerationAdapter",
+# ── HF pipeline_tag → CF service info ────────────────────────────────────────
+
+
+class _TagInfo(TypedDict):
+    adapter: str | None   # Avocet adapter class, or None if handled by another service
+    role: str             # Human-readable model role (classifier, stt, tts, vision, …)
+    service: str          # CF service that consumes this model type
+
+
+_TAG_TO_INFO: dict[str, _TagInfo] = {
+    # Avocet email classifiers
+    "zero-shot-classification":       {"adapter": "ZeroShotAdapter",   "role": "classifier",  "service": "avocet"},
+    "text-classification":            {"adapter": "ZeroShotAdapter",   "role": "classifier",  "service": "avocet"},
+    "natural-language-inference":     {"adapter": "ZeroShotAdapter",   "role": "classifier",  "service": "avocet"},
+    "sentence-similarity":            {"adapter": "RerankerAdapter",   "role": "reranker",    "service": "avocet"},
+    "text-ranking":                   {"adapter": "RerankerAdapter",   "role": "reranker",    "service": "avocet"},
+    "text-generation":                {"adapter": "GenerationAdapter", "role": "generator",   "service": "cf-text"},
+    "text2text-generation":           {"adapter": "GenerationAdapter", "role": "generator",   "service": "cf-text"},
+    "summarization":                  {"adapter": "GenerationAdapter", "role": "generator",   "service": "cf-text"},
+    # STT — cf-stt speech recognition service
+    "automatic-speech-recognition":   {"adapter": None, "role": "stt",       "service": "cf-stt"},
+    # Audio language models — audio + text → text (understanding, QA, captioning)
+    "audio-text-to-text":             {"adapter": None, "role": "alm",       "service": "cf-stt"},
+    # Audio classification — cf-voice sidecar context stream
+    "audio-classification":           {"adapter": None, "role": "classifier", "service": "cf-voice"},
+    # TTS — cf-tts text-to-speech service
+    "text-to-speech":                 {"adapter": None, "role": "tts",       "service": "cf-tts"},
+    # Vision — cf-vision image classification / embedding / VLM service
+    "image-classification":           {"adapter": None, "role": "vision",    "service": "cf-vision"},
+    "zero-shot-image-classification": {"adapter": None, "role": "vision",    "service": "cf-vision"},
+    "image-feature-extraction":       {"adapter": None, "role": "embedding", "service": "cf-vision"},
+    "image-text-to-text":             {"adapter": None, "role": "vlm",       "service": "cf-vision"},
+    "visual-question-answering":      {"adapter": None, "role": "vlm",       "service": "cf-vision"},
+    # Image generation — cf-image (text → image; distinct from cf-vision image understanding)
+    "text-to-image":                  {"adapter": None, "role": "image-gen", "service": "cf-image"},
+    # Embedding — cf-core shared embedding layer
+    "feature-extraction":             {"adapter": None, "role": "embedding", "service": "cf-core"},
 }


@ -84,14 +131,31 @@ def _write_queue(records: list[dict]) -> None:


 def _safe_model_name(repo_id: str) -> str:
-    """Convert repo_id to a filesystem-safe directory name (HF convention)."""
+    """Convert repo_id to a filesystem-safe directory name.
+
+    Uses the HuggingFace Hub convention: owner/model-name → owner--model-name.
+    This matches what snapshot_download produces under local_dir and what
+    cf-orch uses when constructing model paths for cf-text allocations.
+    """
    return repo_id.replace("/", "--")


-def _is_installed(repo_id: str) -> bool:
-    """Check if a model is already downloaded in _MODELS_DIR."""
+def _model_dir_for(repo_id: str, service: str | None) -> Path:
+    """Return the download destination directory for a model.
+
+    cf-text models → NFS shared asset store (_CF_TEXT_MODELS_DIR) so every
+    cluster node can load them without a separate download.
+    All other services (avocet classifiers, fine-tunes) → local _MODELS_DIR.
+    """
    safe_name = _safe_model_name(repo_id)
-    model_dir = _MODELS_DIR / safe_name
+    if service == "cf-text":
+        return _CF_TEXT_MODELS_DIR / safe_name
+    return _MODELS_DIR / safe_name
+
+
+def _is_installed(repo_id: str, service: str | None = None) -> bool:
+    """Check if a model is already downloaded in the appropriate destination."""
+    model_dir = _model_dir_for(repo_id, service)
    return model_dir.exists() and (
        (model_dir / "config.json").exists()
        or (model_dir / "training_info.json").exists()
@ -125,48 +189,289 @@ def _get_queue_entry(entry_id: str) -> dict | None:
    return None


+# ── cf-orch catalog auto-registration ─────────────────────────────────────────
+
+
+def _catalog_key(repo_id: str) -> str:
+    """Derive a readable catalog key from repo_id.
+
+    ibm-granite/granite-4.1-8b  →  granite-4.1-8b
+    facebook/bart-large-cnn     →  bart-large-cnn
+    """
+    return repo_id.split("/", 1)[-1].lower()
+
+
+def _insert_catalog_entry(content: str, entry_lines: str) -> str:
+    """Insert entry_lines at the end of the cf-text.catalog section.
+
+    Scans line by line to preserve all comments and original formatting.
+    Returns content unchanged if the catalog section cannot be located.
+    """
+    lines = content.splitlines(keepends=True)
+
+    in_cf_text = False
+    in_catalog = False
+
+    for i, line in enumerate(lines):
+        stripped = line.lstrip()
+        indent = len(line) - len(stripped)
+        blank_or_comment = not stripped or stripped.startswith("#")
+
+        if not in_cf_text:
+            if indent == 2 and stripped.startswith("cf-text:"):
+                in_cf_text = True
+            continue
+
+        if not in_catalog:
+            if indent == 4 and stripped.startswith("catalog:"):
+                in_catalog = True
+            elif not blank_or_comment and indent <= 2:
+                # Left cf-text section without finding a catalog
+                return content
+            continue
+
+        # Inside catalog: first non-blank/comment line with indent < 6 ends it
+        if not blank_or_comment and indent < 6:
+            prefix = "\n" if lines[i - 1].strip() else ""
+            lines.insert(i, prefix + entry_lines)
+            return "".join(lines)
+
+    # Catalog ran to EOF — append there
+    if in_catalog:
+        prefix = "\n" if lines and lines[-1].strip() else ""
+        lines.append(prefix + entry_lines)
+        return "".join(lines)
+
+    return content
+
+
+def _register_in_node_catalogs(
+    repo_id: str,
+    local_path: Path,
+    vram_mb_fp16: int,
+    role: str,
+) -> list[str]:
+    """Insert a cf-text catalog entry into every eligible node YAML.
+
+    A node is eligible when:
+    - It has a ``cf-text.catalog`` section
+    - The model fits within the node's ``cf-text.max_mb`` at FP16 *or* 4-bit
+    - Neither the model key nor the local path is already in the catalog
+
+    Returns the list of node names that were updated.
+    """
+    try:
+        import yaml  # lazy — not in the critical import path
+    except ImportError:
+        logger.warning("PyYAML not available — skipping catalog registration for %s", repo_id)
+        return []
+
+    profiles_dir = _CF_ORCH_PROFILES_DIR
+    if not profiles_dir.exists():
+        logger.warning(
+            "cf-orch profiles dir not found: %s — skipping catalog registration", profiles_dir
+        )
+        return []
+
+    model_key = _catalog_key(repo_id)
+    local_path_str = str(local_path)
+    vram_4bit = round(vram_mb_fp16 / 4 * 1.1)
+    updated: list[str] = []
+
+    for yaml_file in sorted(profiles_dir.glob("*.yaml")):
+        try:
+            content = yaml_file.read_text(encoding="utf-8")
+            data = yaml.safe_load(content)
+
+            cf_text = (data.get("services") or {}).get("cf-text")
+            if not cf_text:
+                continue
+
+            max_mb: int = cf_text.get("max_mb", 0)
+            catalog: dict = cf_text.get("catalog") or {}
+
+            # Skip if key already exists
+            if model_key in catalog:
+                logger.debug("Key %r already in %s — skipping", model_key, yaml_file.name)
+                continue
+
+            # Skip if any existing entry already points at this path (or a file within it)
+            registered_paths = {
+                str(entry.get("path", ""))
+                for entry in catalog.values()
+                if isinstance(entry, dict)
+            }
+            if local_path_str in registered_paths or any(
+                p.startswith(local_path_str + "/") for p in registered_paths
+            ):
+                logger.debug("Path %s already registered in %s — skipping", local_path_str, yaml_file.name)
+                continue
+
+            # Determine whether model fits at FP16 or needs 4-bit
+            if vram_mb_fp16 <= max_mb:
+                vram_for_node = vram_mb_fp16
+                needs_4bit = False
+            elif vram_4bit <= max_mb:
+                vram_for_node = vram_4bit
+                needs_4bit = True
+            else:
+                logger.debug(
+                    "%s too large for %s (fp16=%d MB, 4bit=%d MB, max=%d MB)",
+                    repo_id, yaml_file.name, vram_mb_fp16, vram_4bit, max_mb,
+                )
+                continue
+
+            desc = f"{repo_id} ({role}, downloaded via avocet)"
+            if needs_4bit:
+                desc += " — CF_TEXT_4BIT=1 required"
+
+            vram_comment = (
+                f"  # 4-bit estimate; FP16 footprint is {vram_mb_fp16} MB"
+                if needs_4bit
+                else f"  # FP16 file-size estimate"
+            )
+            entry_block = (
+                f"      # auto-registered by avocet on download\n"
+                f"      {model_key}:\n"
+                f"        path: {local_path_str}\n"
+                f"        vram_mb: {vram_for_node}{vram_comment}\n"
+                f"        description: \"{desc}\"\n"
+            )
+
+            new_content = _insert_catalog_entry(content, entry_block)
+            if new_content == content:
+                logger.warning("Could not find catalog insertion point in %s", yaml_file.name)
+                continue
+
+            yaml_file.write_text(new_content, encoding="utf-8")
+            updated.append(yaml_file.stem)
+            logger.info(
+                "Registered %s in %s (vram_mb=%d, 4bit=%s)",
+                model_key, yaml_file.name, vram_for_node, needs_4bit,
+            )
+
+        except Exception as exc:
+            logger.warning("Could not update %s: %s", yaml_file.name, exc)
+
+    return updated
+
+
 # ── Background download ────────────────────────────────────────────────────────

-def _run_download(entry_id: str, repo_id: str, pipeline_tag: str | None, adapter_recommendation: str | None) -> None:
-    """Background thread: download model via huggingface_hub.snapshot_download."""
+def _poll_disk_progress(local_dir: Path, total_bytes: int, stop_event: threading.Event) -> None:
+    """Side-thread: poll local_dir size every 2s and update _download_progress.
+
+    snapshot_download is a blocking call with no progress callback, so we watch
+    the destination directory grow on disk as a proxy for download progress.
+    total_bytes=0 means we don't know the target size; pct stays 0 until done.
+    """
+    import time
+    while not stop_event.is_set():
+        try:
+            downloaded = sum(
+                f.stat().st_size for f in local_dir.rglob("*") if f.is_file()
+            )
+            _download_progress["downloaded_bytes"] = downloaded
+            if total_bytes > 0:
+                _download_progress["total_bytes"] = total_bytes
+                _download_progress["pct"] = min(downloaded / total_bytes * 100, 99.0)
+        except Exception:
+            pass
+        time.sleep(2)
+
+
+def _run_download(
+    entry_id: str,
+    repo_id: str,
+    pipeline_tag: str | None,
+    adapter_recommendation: str | None,
+    role: str | None = None,
+    service: str | None = None,
+    model_size_bytes: int = 0,
+) -> None:
+    """Background thread: download model via huggingface_hub.snapshot_download.
+
+    model_size_bytes is the sum of file sizes reported by the HF API (siblings).
+    It is used to estimate vram_mb and written to model_info.json so cf-orch can
+    budget VRAM when allocating a cf-text instance for this model.
+    """
    global _download_progress
-    safe_name = _safe_model_name(repo_id)
-    local_dir = _MODELS_DIR / safe_name
+    local_dir = _model_dir_for(repo_id, service)

    _download_progress = {
        "active": True,
        "repo_id": repo_id,
        "downloaded_bytes": 0,
-        "total_bytes": 0,
+        "total_bytes": model_size_bytes,
        "pct": 0.0,
        "done": False,
        "error": None,
    }

+    stop_poll = threading.Event()
+    poll_thread = threading.Thread(
+        target=_poll_disk_progress,
+        args=(local_dir, model_size_bytes, stop_poll),
+        daemon=True,
+        name=f"model-poll-{entry_id}",
+    )
+
    try:
        if snapshot_download is None:
            raise RuntimeError("huggingface_hub is not installed")

+        local_dir.mkdir(parents=True, exist_ok=True)
+        poll_thread.start()
        snapshot_download(
            repo_id=repo_id,
            local_dir=str(local_dir),
        )

-        # Write model_info.json alongside downloaded files
+        # Estimate VRAM from reported file size.
+        # HF siblings sizes are pre-quantisation file sizes; add 10% for KV cache
+        # and runtime overhead. Falls back to a stat of the local dir if 0.
+        if model_size_bytes == 0:
+            model_size_bytes = sum(
+                f.stat().st_size for f in local_dir.rglob("*") if f.is_file()
+            )
+        vram_mb = int(model_size_bytes / (1024 * 1024) * 1.1)
+
+        # Write model_info.json alongside downloaded files.
+        # local_path + vram_mb are read by cf-orch at allocation time to resolve
+        # the full model path and grant the correct VRAM lease.
        model_info = {
            "repo_id": repo_id,
            "pipeline_tag": pipeline_tag,
            "adapter_recommendation": adapter_recommendation,
+            "role": role,
+            "service": service,
+            "model_size_bytes": model_size_bytes,
+            "vram_mb": vram_mb,
+            "local_path": str(local_dir),
            "downloaded_at": datetime.now(timezone.utc).isoformat(),
        }
-        local_dir.mkdir(parents=True, exist_ok=True)
        (local_dir / "model_info.json").write_text(
            json.dumps(model_info, indent=2), encoding="utf-8"
        )

+        # Auto-register cf-text models in the cf-orch node YAML catalogs so they
+        # appear in the benchmark model list without a manual YAML edit.
+        if service == "cf-text":
+            registered_on = _register_in_node_catalogs(
+                repo_id=repo_id,
+                local_path=local_dir,
+                vram_mb_fp16=vram_mb,
+                role=role or "generator",
+            )
+            if registered_on:
+                logger.info(
+                    "Auto-registered %s in node catalogs: %s",
+                    repo_id, ", ".join(registered_on),
+                )
+
        _download_progress["done"] = True
        _download_progress["pct"] = 100.0
-        _update_queue_entry(entry_id, {"status": "ready"})
+        _update_queue_entry(entry_id, {"status": "ready", "local_path": str(local_dir)})

    except Exception as exc:
        logger.exception("Download failed for %s: %s", repo_id, exc)
@ -174,6 +479,7 @@ def _run_download(entry_id: str, repo_id: str, pipeline_tag: str | None, adapter
        _download_progress["done"] = True
        _update_queue_entry(entry_id, {"status": "failed", "error": str(exc)})
    finally:
+        stop_poll.set()
        _download_progress["active"] = False


@ -199,11 +505,15 @@ def lookup_model(repo_id: str) -> dict:

    data = resp.json()
    pipeline_tag = data.get("pipeline_tag")
-    adapter_recommendation = _TAG_TO_ADAPTER.get(pipeline_tag) if pipeline_tag else None
+    tag_info = _TAG_TO_INFO.get(pipeline_tag) if pipeline_tag else None
+    adapter_recommendation = tag_info["adapter"] if tag_info else None
+    role = tag_info["role"] if tag_info else None
+    service = tag_info["service"] if tag_info else None

    # Determine compatibility and surface a human-readable warning
-    _supported = ", ".join(sorted(_TAG_TO_ADAPTER.keys()))
-    if adapter_recommendation is not None:
+    _supported = ", ".join(sorted(_TAG_TO_INFO.keys()))
+    if tag_info is not None:
+        # Any recognized tag is compatible — avocet adapters or another CF service
        compatible = True
        warning: str | None = None
    elif pipeline_tag is None:
@ -216,7 +526,7 @@ def lookup_model(repo_id: str) -> dict:
    else:
        compatible = False
        warning = (
-            f"\"{pipeline_tag}\" models are not supported by Avocet's email classification adapters. "
+            f"\"{pipeline_tag}\" models are not yet supported by the CircuitForge model ecosystem. "
            f"Supported task types: {_supported}."
        )
        logger.warning("Unsupported pipeline_tag %r for %s", pipeline_tag, repo_id)
@ -234,6 +544,8 @@ def lookup_model(repo_id: str) -> dict:
        "repo_id": repo_id,
        "pipeline_tag": pipeline_tag,
        "adapter_recommendation": adapter_recommendation,
+        "role": role,
+        "service": service,
        "compatible": compatible,
        "warning": warning,
        "model_size_bytes": model_size_bytes,
@ -261,12 +573,18 @@ class QueueAddRequest(BaseModel):
    repo_id: str
    pipeline_tag: str | None = None
    adapter_recommendation: str | None = None
+    role: str | None = None
+    service: str | None = None
+    # Sum of file sizes from HF API siblings list; 0 if unknown.
+    # Stored in the queue entry so approve can pass it to _run_download
+    # without a second HF API round-trip.
+    model_size_bytes: int = 0


@router.post("/queue", status_code=201)
 def add_to_queue(req: QueueAddRequest) -> dict:
    """Add a model to the approval queue with status 'pending'."""
-    if _is_installed(req.repo_id):
+    if _is_installed(req.repo_id, service=req.service):
        raise HTTPException(409, f"{req.repo_id!r} is already installed")
    if _is_queued(req.repo_id):
        raise HTTPException(409, f"{req.repo_id!r} is already in the queue")
@ -276,6 +594,9 @@ def add_to_queue(req: QueueAddRequest) -> dict:
        "repo_id": req.repo_id,
        "pipeline_tag": req.pipeline_tag,
        "adapter_recommendation": req.adapter_recommendation,
+        "role": req.role,
+        "service": req.service,
+        "model_size_bytes": req.model_size_bytes,
        "status": "pending",
        "queued_at": datetime.now(timezone.utc).isoformat(),
    }
@ -300,7 +621,15 @@ def approve_queue_entry(entry_id: str) -> dict:

    thread = threading.Thread(
        target=_run_download,
-        args=(entry_id, entry["repo_id"], entry.get("pipeline_tag"), entry.get("adapter_recommendation")),
+        args=(
+            entry_id,
+            entry["repo_id"],
+            entry.get("pipeline_tag"),
+            entry.get("adapter_recommendation"),
+            entry.get("role"),
+            entry.get("service"),
+            entry.get("model_size_bytes", 0),
+        ),
        daemon=True,
        name=f"model-download-{entry_id}",
    )
@ -368,81 +697,286 @@ def download_stream() -> StreamingResponse:
    )


+# ── POST /sync-catalogs ────────────────────────────────────────────────────────
+
+@router.post("/sync-catalogs")
+def sync_catalogs() -> dict:
+    """Scan all installed cf-text models and register any missing from node YAMLs.
+
+    Reads model_info.json from each directory in the cf-text models dir and calls
+    _register_in_node_catalogs() for each. Idempotent — skips models already
+    present by key or path.
+
+    Returns a summary of registrations performed.
+    """
+    if not _CF_TEXT_MODELS_DIR.exists():
+        return {"registered": {}, "skipped": [], "message": "cf-text models dir not found"}
+
+    registered: dict[str, list[str]] = {}
+    skipped: list[str] = []
+
+    for model_dir in sorted(_CF_TEXT_MODELS_DIR.iterdir()):
+        if not model_dir.is_dir():
+            continue
+        info_file = model_dir / "model_info.json"
+        if not info_file.exists():
+            skipped.append(model_dir.name)
+            continue
+
+        try:
+            info = json.loads(info_file.read_text(encoding="utf-8"))
+        except Exception as exc:
+            logger.warning("Could not read model_info.json for %s: %s", model_dir.name, exc)
+            skipped.append(model_dir.name)
+            continue
+
+        if info.get("service") != "cf-text":
+            skipped.append(model_dir.name)
+            continue
+
+        repo_id = info.get("repo_id", model_dir.name)
+        vram_mb = info.get("vram_mb", 0)
+        role = info.get("role", "generator")
+
+        updated_nodes = _register_in_node_catalogs(
+            repo_id=repo_id,
+            local_path=model_dir,
+            vram_mb_fp16=vram_mb,
+            role=role,
+        )
+        if updated_nodes:
+            registered[repo_id] = updated_nodes
+        else:
+            skipped.append(repo_id)
+
+    return {
+        "registered": registered,
+        "skipped": skipped,
+        "message": (
+            f"Registered {len(registered)} model(s) on "
+            f"{sum(len(v) for v in registered.values())} node(s)"
+            if registered
+            else "All models already registered (or no eligible nodes found)"
+        ),
+    }
+
+
 # ── GET /installed ─────────────────────────────────────────────────────────────

@router.get("/installed")
 def list_installed() -> list[dict]:
-    """Scan _MODELS_DIR and return info on each installed model."""
-    if not _MODELS_DIR.exists():
-        return []
+    """Scan all model directories and return info on each installed model.
+
+    Scans both the local avocet models dir (classifiers, fine-tunes) and the
+    shared NFS cf-text models dir, deduplicating by directory path.
+
+    Falls back to queue entry data when model_info.json has null service/role,
+    so models downloaded before the pipeline_tag registry existed still group
+    correctly in the UI.
+    """
+    scan_dirs = [_MODELS_DIR]
+    if _CF_TEXT_MODELS_DIR != _MODELS_DIR and _CF_TEXT_MODELS_DIR.exists():
+        scan_dirs.append(_CF_TEXT_MODELS_DIR)
+
+    # Build a lookup from safe directory name → queue entry for fallback enrichment.
+    queue_by_safe_name: dict[str, dict] = {
+        _safe_model_name(r["repo_id"]): r
+        for r in _read_queue()
+        if r.get("repo_id") and r.get("status") not in ("dismissed",)
+    }

    results: list[dict] = []
-    for sub in _MODELS_DIR.iterdir():
-        if not sub.is_dir():
+    seen: set[Path] = set()
+
+    for scan_dir in scan_dirs:
+        if not scan_dir.exists():
            continue
+        for sub in scan_dir.iterdir():
+            if not sub.is_dir() or sub in seen:
+                continue
+            seen.add(sub)

-        has_training_info = (sub / "training_info.json").exists()
-        has_config = (sub / "config.json").exists()
-        has_model_info = (sub / "model_info.json").exists()
+            has_training_info = (sub / "training_info.json").exists()
+            has_config = (sub / "config.json").exists()
+            has_model_info = (sub / "model_info.json").exists()

-        if not (has_training_info or has_config or has_model_info):
-            continue
+            if not (has_training_info or has_config or has_model_info):
+                continue

-        model_type = "finetuned" if has_training_info else "downloaded"
+            model_type = "finetuned" if has_training_info else "downloaded"

-        # Compute directory size
-        size_bytes = sum(f.stat().st_size for f in sub.rglob("*") if f.is_file())
+            # Compute directory size
+            size_bytes = sum(f.stat().st_size for f in sub.rglob("*") if f.is_file())

-        # Load adapter/model_id from model_info.json or training_info.json
-        adapter: str | None = None
-        model_id: str | None = None
+            adapter: str | None = None
+            model_id: str | None = None
+            role: str | None = None
+            service: str | None = None
+            vram_mb: int | None = None

-        if has_model_info:
-            try:
-                info = json.loads((sub / "model_info.json").read_text(encoding="utf-8"))
-                adapter = info.get("adapter_recommendation")
-                model_id = info.get("repo_id")
-            except Exception:
-                pass
-        elif has_training_info:
-            try:
-                info = json.loads((sub / "training_info.json").read_text(encoding="utf-8"))
-                adapter = info.get("adapter")
-                model_id = info.get("base_model") or info.get("model_id")
-            except Exception:
-                pass
+            if has_model_info:
+                try:
+                    info = json.loads((sub / "model_info.json").read_text(encoding="utf-8"))
+                    adapter = info.get("adapter_recommendation")
+                    model_id = info.get("repo_id")
+                    role = info.get("role")
+                    service = info.get("service")
+                    vram_mb = info.get("vram_mb")
+                except Exception:
+                    pass
+            elif has_training_info:
+                try:
+                    info = json.loads((sub / "training_info.json").read_text(encoding="utf-8"))
+                    adapter = info.get("adapter")
+                    model_id = info.get("base_model") or info.get("model_id")
+                    role = info.get("role", "classifier")
+                    service = info.get("service", "avocet")
+                except Exception:
+                    pass

-        results.append({
-            "name": sub.name,
-            "path": str(sub),
-            "type": model_type,
-            "adapter": adapter,
-            "size_bytes": size_bytes,
-            "model_id": model_id,
-        })
+            # Fall back to queue entry when model_info.json has null service/role.
+            # This covers models downloaded before the pipeline_tag registry existed.
+            if (role is None or service is None) and sub.name in queue_by_safe_name:
+                q = queue_by_safe_name[sub.name]
+                role = role or q.get("role")
+                service = service or q.get("service")
+                model_id = model_id or q.get("repo_id")
+
+            # Last resort: re-derive from pipeline_tag if we still have no service.
+            if service is None and model_id:
+                hf_url = f"https://huggingface.co/api/models/{model_id}"
+                # Only attempt if we have a pipeline_tag cached somewhere.
+                for q in queue_by_safe_name.values():
+                    if q.get("repo_id") == model_id and q.get("pipeline_tag"):
+                        tag_info = _TAG_TO_INFO.get(q["pipeline_tag"])
+                        if tag_info:
+                            role = role or tag_info["role"]
+                            service = service or tag_info["service"]
+                        break
+
+            results.append({
+                "name": sub.name,
+                "path": str(sub),
+                "type": model_type,
+                "adapter": adapter,
+                "role": role,
+                "service": service,
+                "size_bytes": size_bytes,
+                "vram_mb": vram_mb,
+                "model_id": model_id,
+            })

    return results


+# ── PATCH /installed/{name} ────────────────────────────────────────────────────
+
+class InstalledModelPatch(BaseModel):
+    service: str
+    role: str
+
+
+@router.patch("/installed/{name}")
+def patch_installed(name: str, body: InstalledModelPatch) -> dict:
+    """Manually assign service and role to an installed model.
+
+    Writes the updated values back to model_info.json so they survive restarts,
+    and updates any matching queue entry so the UI shows the correct chip.
+    """
+    if "/" in name or "\\" in name or ".." in name or not name or name.startswith("."):
+        raise HTTPException(400, f"Invalid model name {name!r}")
+
+    candidate_dirs = [_MODELS_DIR]
+    if _CF_TEXT_MODELS_DIR != _MODELS_DIR:
+        candidate_dirs.append(_CF_TEXT_MODELS_DIR)
+
+    model_path: Path | None = None
+    for base in candidate_dirs:
+        candidate = base / name
+        try:
+            candidate.resolve().relative_to(base.resolve())
+        except ValueError:
+            raise HTTPException(400, f"Path traversal detected for name {name!r}")
+        if candidate.exists():
+            model_path = candidate
+            break
+
+    if model_path is None:
+        raise HTTPException(404, f"Installed model {name!r} not found")
+
+    info_path = model_path / "model_info.json"
+    if info_path.exists():
+        try:
+            info = json.loads(info_path.read_text(encoding="utf-8"))
+        except Exception:
+            info = {}
+    else:
+        info = {}
+
+    info["service"] = body.service
+    info["role"] = body.role
+    info_path.write_text(json.dumps(info, indent=2), encoding="utf-8")
+
+    # Mirror the update into any matching queue entry.
+    records = _read_queue()
+    updated = False
+    for r in records:
+        local = r.get("local_path", "")
+        matches = (local and Path(local).name == name) or _safe_model_name(r.get("repo_id", "")) == name
+        if matches and r.get("status") not in ("dismissed",):
+            r["service"] = body.service
+            r["role"] = body.role
+            updated = True
+    if updated:
+        _write_queue(records)
+
+    return {"ok": True, "service": body.service, "role": body.role}
+
+
 # ── DELETE /installed/{name} ───────────────────────────────────────────────────

@router.delete("/installed/{name}")
 def delete_installed(name: str) -> dict:
-    """Remove an installed model directory by name. Blocks path traversal."""
-    # Validate: single path component, no slashes or '..'
+    """Remove an installed model directory by name. Blocks path traversal.
+
+    Searches both the local avocet models dir and the shared cf-text models dir.
+    Also dismisses any matching queue entry so the UI doesn't show a stale "ready" card.
+    """
    if "/" in name or "\\" in name or ".." in name or not name or name.startswith("."):
        raise HTTPException(400, f"Invalid model name {name!r}: must be a single directory name with no path separators or '..'")

-    model_path = _MODELS_DIR / name
+    # Search both model directories
+    candidate_dirs = [_MODELS_DIR]
+    if _CF_TEXT_MODELS_DIR != _MODELS_DIR:
+        candidate_dirs.append(_CF_TEXT_MODELS_DIR)

-    # Extra safety: confirm resolved path is inside _MODELS_DIR
-    try:
-        model_path.resolve().relative_to(_MODELS_DIR.resolve())
-    except ValueError:
-        raise HTTPException(400, f"Path traversal detected for name {name!r}")
+    model_path: Path | None = None
+    for base in candidate_dirs:
+        candidate = base / name
+        try:
+            candidate.resolve().relative_to(base.resolve())
+        except ValueError:
+            raise HTTPException(400, f"Path traversal detected for name {name!r}")
+        if candidate.exists():
+            model_path = candidate
+            break

-    if not model_path.exists():
-        raise HTTPException(404, f"Installed model {name!r} not found")
+    if model_path is None:
+        raise HTTPException(404, f"Installed model {name!r} not found in any model directory")

    shutil.rmtree(model_path)
+
+    # Dismiss any queue entries whose local_path matches, or whose repo_id maps to this dir name.
+    records = _read_queue()
+    updated = False
+    for r in records:
+        local = r.get("local_path", "")
+        matches_path = local and Path(local).name == name
+        matches_name = _safe_model_name(r.get("repo_id", "")) == name
+        if (matches_path or matches_name) and r.get("status") != "dismissed":
+            r["status"] = "dismissed"
+            updated = True
+    if updated:
+        _write_queue(records)
+
    return {"ok": True}
--- a/config/label_tool.yaml.example
+++ b/config/label_tool.yaml.example
@ -57,11 +57,32 @@ imitate:
    - id: peregrine
      name: Peregrine
      icon: "🦅"
-      description: Job search assistant
-      base_url: http://localhost:8502
-      sample_endpoint: /api/jobs
-      text_fields: [title, description]
-      prompt_template: "Analyze this job listing and identify key requirements:\n\n{text}"
+      description: Job search assistant — live job listings
+      base_url: http://localhost:8601
+      health_path: /api/jobs/counts
+      sample_endpoint: /api/jobs?status=pending&limit=5
+      text_fields: [title, company, description]
+      prompt_template: "Analyze this job listing and identify the key requirements, must-have skills, and any culture signals that would help tailor an application:\n\n{text}"
+
+    - id: osprey
+      name: Osprey
+      icon: "📞"
+      description: Gov't hold-line automation — recent call records
+      base_url: http://localhost:8520
+      health_path: /api/health
+      sample_endpoint: /api/calls/recent
+      text_fields: [agency, issue, notes]
+      prompt_template: "Draft a clear, professional follow-up letter for this government hold-line call. Include what was discussed, what action the agency committed to, and a polite deadline for response:\n\n{text}"
+
+    - id: linnet
+      name: Linnet
+      icon: "🐦"
+      description: Real-time tone annotation — Elcor-style subtext for ND users
+      base_url: http://localhost:8522
+      health_path: /health
+      sample_endpoint: /samples
+      text_fields: [text, context]
+      prompt_template: "Annotate the emotional tone and subtext of the following text using explicit Elcor-style markers (e.g. [SINCERELY], [UNCERTAIN], [FRUSTRATED]). Identify implied emotions, potential sarcasm, and any ambiguity that might be misread by neurodivergent readers:\n\n{text}"

    - id: kiwi
      name: Kiwi
--- a/manage.sh
+++ b/manage.sh
@ -90,6 +90,12 @@ usage() {
    echo -e "    ${GREEN}score [args]${NC}             Shortcut: --score [args]"
    echo -e "    ${GREEN}compare [args]${NC}           Shortcut: --compare [args]"
    echo ""
+    echo "  Writing Style Benchmark:"
+    echo -e "    ${GREEN}style-bench [args]${NC}       Run benchmark_style.py (args passed through)"
+    echo -e "    ${GREEN}style-list${NC}               List available ollama models for style bench"
+    echo -e "    ${GREEN}style-run [args]${NC}         Run writing style benchmark (--models, --samples, --include-large, --scan-disk PATH, --cforch)"
+    echo -e "    ${GREEN}style-last${NC}               Print most recent writing style benchmark report"
+    echo ""
    echo "  Dev:"
    echo -e "    ${GREEN}dev${NC}                      Hot-reload: uvicorn --reload (:8503) + Vite HMR (:5173)"
    echo -e "    ${GREEN}test${NC}                     Run pytest suite"
@ -249,6 +255,26 @@ case "$CMD" in
        exec "$0" benchmark --compare "$@"
        ;;

+    style-bench)
+        info "Running writing style benchmark (${ENV_BM})…"
+        if [[ ! -x "$PYTHON_BM" ]]; then
+            error "Python not found in ${ENV_BM} env at ${PYTHON_BM}"
+        fi
+        "$PYTHON_BM" scripts/benchmark_style.py "$@"
+        ;;
+
+    style-list)
+        exec "$0" style-bench --list-models
+        ;;
+
+    style-run)
+        exec "$0" style-bench --run "$@"
+        ;;
+
+    style-last)
+        exec "$0" style-bench --show-last
+        ;;
+
    help|--help|-h)
        usage
        ;;
--- a/tests/test_models.py
+++ b/tests/test_models.py
@ -122,17 +122,88 @@ def test_lookup_returns_correct_shape(client):
    assert data["already_queued"] is False


-def test_lookup_unknown_pipeline_tag_returns_null_adapter(client):
-    """An unrecognised pipeline_tag yields adapter_recommendation=null."""
+def test_lookup_unknown_pipeline_tag_returns_null_adapter_and_incompatible(client):
+    """An unrecognised pipeline_tag yields adapter_recommendation=null and compatible=False."""
    mock_resp = MagicMock()
    mock_resp.status_code = 200
-    mock_resp.json.return_value = _make_hf_response("org/m", "audio-classification")
+    mock_resp.json.return_value = _make_hf_response("org/m", "reinforcement-learning")

    with patch("app.models.httpx.get", return_value=mock_resp):
        r = client.get("/api/models/lookup", params={"repo_id": "org/m"})

    assert r.status_code == 200
-    assert r.json()["adapter_recommendation"] is None
+    data = r.json()
+    assert data["adapter_recommendation"] is None
+    assert data["compatible"] is False
+    assert data["role"] is None
+    assert data["service"] is None
+    assert "CircuitForge model ecosystem" in data["warning"]
+
+
+def test_lookup_stt_tag_returns_compatible_with_cf_stt_service(client):
+    """automatic-speech-recognition tag yields compatible=True, service=cf-stt."""
+    mock_resp = MagicMock()
+    mock_resp.status_code = 200
+    mock_resp.json.return_value = _make_hf_response("openai/whisper-base", "automatic-speech-recognition")
+
+    with patch("app.models.httpx.get", return_value=mock_resp):
+        r = client.get("/api/models/lookup", params={"repo_id": "openai/whisper-base"})
+
+    assert r.status_code == 200
+    data = r.json()
+    assert data["compatible"] is True
+    assert data["adapter_recommendation"] is None
+    assert data["role"] == "stt"
+    assert data["service"] == "cf-stt"
+    assert data["warning"] is None
+
+
+def test_lookup_vision_tag_returns_compatible_with_cf_vision_service(client):
+    """image-classification tag yields compatible=True, service=cf-vision."""
+    mock_resp = MagicMock()
+    mock_resp.status_code = 200
+    mock_resp.json.return_value = _make_hf_response("google/siglip-base", "image-classification")
+
+    with patch("app.models.httpx.get", return_value=mock_resp):
+        r = client.get("/api/models/lookup", params={"repo_id": "google/siglip-base"})
+
+    assert r.status_code == 200
+    data = r.json()
+    assert data["compatible"] is True
+    assert data["role"] == "vision"
+    assert data["service"] == "cf-vision"
+
+
+def test_lookup_audio_classification_tag_returns_cf_voice_service(client):
+    """audio-classification tag yields compatible=True, service=cf-voice."""
+    mock_resp = MagicMock()
+    mock_resp.status_code = 200
+    mock_resp.json.return_value = _make_hf_response("org/audio-model", "audio-classification")
+
+    with patch("app.models.httpx.get", return_value=mock_resp):
+        r = client.get("/api/models/lookup", params={"repo_id": "org/audio-model"})
+
+    assert r.status_code == 200
+    data = r.json()
+    assert data["compatible"] is True
+    assert data["role"] == "classifier"
+    assert data["service"] == "cf-voice"
+
+
+def test_lookup_embedding_tag_returns_compatible_with_cf_core_service(client):
+    """feature-extraction tag yields compatible=True, service=cf-core."""
+    mock_resp = MagicMock()
+    mock_resp.status_code = 200
+    mock_resp.json.return_value = _make_hf_response("BAAI/bge-small-en", "feature-extraction")
+
+    with patch("app.models.httpx.get", return_value=mock_resp):
+        r = client.get("/api/models/lookup", params={"repo_id": "BAAI/bge-small-en"})
+
+    assert r.status_code == 200
+    data = r.json()
+    assert data["compatible"] is True
+    assert data["role"] == "embedding"
+    assert data["service"] == "cf-core"


 def test_lookup_already_queued_flag(client):
@ -181,6 +252,26 @@ def test_queue_add_returns_entry_fields(client):
    assert entry["adapter_recommendation"] == "ZeroShotAdapter"


+def test_queue_preserves_role_and_service(client):
+    """POST /queue with role/service fields round-trips them through GET /queue."""
+    r = client.post("/api/models/queue", json={
+        "repo_id": "openai/whisper-base",
+        "pipeline_tag": "automatic-speech-recognition",
+        "adapter_recommendation": None,
+        "role": "stt",
+        "service": "cf-stt",
+    })
+    assert r.status_code == 201
+    entry = r.json()
+    assert entry["role"] == "stt"
+    assert entry["service"] == "cf-stt"
+
+    r2 = client.get("/api/models/queue")
+    items = r2.json()
+    assert items[0]["role"] == "stt"
+    assert items[0]["service"] == "cf-stt"
+
+
 # ── POST /queue — 409 duplicate ────────────────────────────────────────────────

 def test_queue_duplicate_returns_409(client):
@ -317,7 +408,12 @@ def test_installed_detects_downloaded_model(client, tmp_path):
    model_dir.mkdir()
    (model_dir / "config.json").write_text(json.dumps({"model_type": "bert"}), encoding="utf-8")
    (model_dir / "model_info.json").write_text(
-        json.dumps({"repo_id": "org/mymodel", "adapter_recommendation": "ZeroShotAdapter"}),
+        json.dumps({
+            "repo_id": "org/mymodel",
+            "adapter_recommendation": "ZeroShotAdapter",
+            "role": "classifier",
+            "service": "avocet",
+        }),
        encoding="utf-8",
    )

@ -329,6 +425,51 @@ def test_installed_detects_downloaded_model(client, tmp_path):
    assert items[0]["name"] == "org--mymodel"
    assert items[0]["adapter"] == "ZeroShotAdapter"
    assert items[0]["model_id"] == "org/mymodel"
+    assert items[0]["role"] == "classifier"
+    assert items[0]["service"] == "avocet"
+
+
+def test_installed_stt_model_surfaces_role_and_service(client):
+    """A downloaded STT model's role/service are returned by GET /installed."""
+    from app import models as models_module
+
+    model_dir = models_module._MODELS_DIR / "openai--whisper-base"
+    model_dir.mkdir()
+    (model_dir / "config.json").write_text(json.dumps({"model_type": "whisper"}), encoding="utf-8")
+    (model_dir / "model_info.json").write_text(
+        json.dumps({
+            "repo_id": "openai/whisper-base",
+            "adapter_recommendation": None,
+            "role": "stt",
+            "service": "cf-stt",
+        }),
+        encoding="utf-8",
+    )
+
+    r = client.get("/api/models/installed")
+    assert r.status_code == 200
+    items = r.json()
+    assert items[0]["role"] == "stt"
+    assert items[0]["service"] == "cf-stt"
+    assert items[0]["adapter"] is None
+
+
+def test_installed_finetuned_model_defaults_to_avocet_service(client):
+    """Fine-tuned models with no role/service in training_info default to avocet/classifier."""
+    from app import models as models_module
+
+    model_dir = models_module._MODELS_DIR / "my-finetuned-v2"
+    model_dir.mkdir()
+    (model_dir / "training_info.json").write_text(
+        json.dumps({"base_model": "microsoft/deberta-v3-base", "epochs": 3}),
+        encoding="utf-8",
+    )
+
+    r = client.get("/api/models/installed")
+    assert r.status_code == 200
+    items = r.json()
+    assert items[0]["role"] == "classifier"
+    assert items[0]["service"] == "avocet"


 def test_installed_detects_finetuned_model(client):
--- a/web/.gitignore
+++ b/web/.gitignore
@ -22,3 +22,7 @@ dist-ssr
 *.njsproj
 *.sln
 *.sw?
+
+# Local environment overrides
+.env
+
--- a/web/src/views/ModelsView.vue
+++ b/web/src/views/ModelsView.vue
@ -42,6 +42,12 @@
          <span v-if="lookupResult.pipeline_tag" class="chip chip-pipeline">
            {{ lookupResult.pipeline_tag }}
          </span>
+          <span v-if="lookupResult.role" class="chip chip-role">
+            {{ lookupResult.role }}
+          </span>
+          <span v-if="lookupResult.service" class="chip" :class="serviceChipClass(lookupResult.service)">
+            {{ lookupResult.service }}
+          </span>
          <span v-if="lookupResult.adapter_recommendation" class="chip chip-adapter">
            {{ lookupResult.adapter_recommendation }}
          </span>
@ -61,11 +67,10 @@

        <button
          class="btn-primary btn-add-queue"
-          :class="{ 'btn-add-queue-warn': !lookupResult.compatible }"
          :disabled="lookupResult.already_installed || lookupResult.already_queued || addingToQueue"
          @click="addToQueue"
        >
-          {{ addingToQueue ? 'Adding…' : lookupResult.compatible ? 'Add to queue' : 'Add anyway' }}
+          {{ addingToQueue ? 'Adding…' : 'Add to queue' }}
        </button>
      </div>
    </section>
@ -90,7 +95,9 @@
          </button>
        </div>
        <div class="model-meta">
-          <span v-if="model.pipeline_tag"         class="chip chip-pipeline">{{ model.pipeline_tag }}</span>
+          <span v-if="model.pipeline_tag"          class="chip chip-pipeline">{{ model.pipeline_tag }}</span>
+          <span v-if="model.role"                  class="chip chip-role">{{ model.role }}</span>
+          <span v-if="model.service"               class="chip" :class="serviceChipClass(model.service)">{{ model.service }}</span>
          <span v-if="model.adapter_recommendation" class="chip chip-adapter">{{ model.adapter_recommendation }}</span>
        </div>
        <div class="model-card-actions">
@ -116,6 +123,8 @@
        </div>
        <div class="model-meta">
          <span v-if="model.pipeline_tag" class="chip chip-pipeline">{{ model.pipeline_tag }}</span>
+          <span v-if="model.role"         class="chip chip-role">{{ model.role }}</span>
+          <span v-if="model.service"      class="chip" :class="serviceChipClass(model.service)">{{ model.service }}</span>
        </div>

        <div v-if="downloadErrors[model.id]" class="download-error" role="alert">
@ -124,14 +133,19 @@
        <div v-else class="progress-wrap" :aria-label="`Download progress for ${model.repo_id}`">
          <div
            class="progress-bar"
-            :style="{ width: `${downloadProgress[model.id] ?? 0}%` }"
+            :style="{ width: `${downloadProgress[model.repo_id]?.pct ?? 0}%` }"
            role="progressbar"
-            :aria-valuenow="downloadProgress[model.id] ?? 0"
+            :aria-valuenow="downloadProgress[model.repo_id]?.pct ?? 0"
            aria-valuemin="0"
            aria-valuemax="100"
          />
          <span class="progress-label">
-            {{ downloadProgress[model.id] == null ? 'Preparing…' : `${downloadProgress[model.id]}%` }}
+            {{
+              !downloadProgress[model.repo_id] ? 'Preparing…'
+              : downloadProgress[model.repo_id].pct != null ? `${Math.round(downloadProgress[model.repo_id].pct!)}%`
+              : downloadProgress[model.repo_id].bytes > 0 ? `${(downloadProgress[model.repo_id].bytes / 1024 / 1024).toFixed(0)} MB downloaded…`
+              : 'Preparing…'
+            }}
          </span>
        </div>
      </div>
@ -145,42 +159,90 @@
        No models installed yet.
      </div>

-      <div v-else class="installed-table-wrap">
-        <table class="installed-table">
-          <thead>
-            <tr>
-              <th>Name</th>
-              <th>Type</th>
-              <th>Adapter</th>
-              <th>Size</th>
-              <th></th>
-            </tr>
-          </thead>
-          <tbody>
-            <tr v-for="model in installedModels" :key="model.name">
-              <td class="td-name">{{ model.name }}</td>
-              <td>
-                <span
-                  class="badge"
-                  :class="model.type === 'finetuned' ? 'badge-accent' : 'badge-info'"
-                >
-                  {{ model.type }}
-                </span>
-              </td>
-              <td>{{ model.adapter ?? '—' }}</td>
-              <td>{{ humanBytes(model.size) }}</td>
-              <td>
-                <button
-                  class="btn-danger btn-sm"
-                  @click="deleteInstalled(model.name)"
-                >
-                  Delete
-                </button>
-              </td>
-            </tr>
-          </tbody>
-        </table>
-      </div>
+      <template v-else>
+        <div
+          v-for="group in installedByService"
+          :key="group.service"
+          class="installed-group"
+        >
+          <div class="installed-group-header">
+            <span class="chip" :class="serviceChipClass(group.service)">
+              {{ serviceLabel(group.service) }}
+            </span>
+            <span class="installed-group-count">{{ group.models.length }} model{{ group.models.length !== 1 ? 's' : '' }}</span>
+          </div>
+
+          <div class="installed-table-wrap">
+            <table class="installed-table">
+              <thead>
+                <tr>
+                  <th>Name</th>
+                  <th>Type</th>
+                  <th>Role</th>
+                  <th>Size</th>
+                  <th></th>
+                </tr>
+              </thead>
+              <tbody>
+                <tr v-for="model in group.models" :key="model.name">
+                  <td class="td-name">{{ model.model_id ?? model.name }}</td>
+                  <td>
+                    <span
+                      class="badge"
+                      :class="model.type === 'finetuned' ? 'badge-accent' : 'badge-info'"
+                    >
+                      {{ model.type }}
+                    </span>
+                  </td>
+                  <td>
+                    <span v-if="model.role" class="chip chip-role chip-sm">{{ model.role }}</span>
+                    <span v-else>—</span>
+                  </td>
+                  <td>{{ humanBytes(model.size_bytes) }}</td>
+                  <td class="td-actions">
+                    <div v-if="!model.service" class="classify-row">
+                      <select
+                        class="classify-select"
+                        :value="classifyDraft[model.name]?.service ?? ''"
+                        @change="onServiceChange(model.name, ($event.target as HTMLSelectElement).value)"
+                        aria-label="Assign service"
+                      >
+                        <option value="" disabled>Service…</option>
+                        <option v-for="svc in CLASSIFIABLE_SERVICES" :key="svc.value" :value="svc.value">{{ svc.label }}</option>
+                      </select>
+                      <select
+                        class="classify-select"
+                        :value="classifyDraft[model.name]?.role ?? ''"
+                        :disabled="!classifyDraft[model.name]?.service"
+                        @change="(e) => setClassifyRole(model.name, (e.target as HTMLSelectElement).value)"
+                        aria-label="Assign role"
+                      >
+                        <option value="" disabled>Role…</option>
+                        <option
+                          v-for="role in rolesForService(classifyDraft[model.name]?.service ?? '')"
+                          :key="role"
+                          :value="role"
+                        >{{ role }}</option>
+                      </select>
+                      <button
+                        class="btn-primary btn-sm"
+                        :disabled="!classifyDraft[model.name]?.service || !classifyDraft[model.name]?.role"
+                        @click="saveClassify(model.name)"
+                      >Save</button>
+                    </div>
+                    <button
+                      class="btn-danger btn-sm"
+                      @click="deleteInstalled(model.name)"
+                    >
+                      Delete
+                    </button>
+                  </td>
+                </tr>
+              </tbody>
+            </table>
+          </div>
+        </div>
+      </template>
    </section>
  </div>
 </template>
@ -194,6 +256,8 @@ interface LookupResult {
  repo_id: string
  pipeline_tag: string | null
  adapter_recommendation: string | null
+  role: string | null
+  service: string | null
  compatible: boolean
  warning: string | null
  size: number | null
@ -208,20 +272,27 @@ interface QueuedModel {
  status: 'pending' | 'downloading' | 'done' | 'error'
  pipeline_tag: string | null
  adapter_recommendation: string | null
+  role: string | null
+  service: string | null
 }

 interface InstalledModel {
  name: string
  type: 'finetuned' | 'downloaded'
  adapter: string | null
-  size: number
+  role: string | null
+  service: string | null
+  size_bytes: number
+  model_id: string | null
 }

 interface SseProgressEvent {
-  model_id: string
-  pct: number | null
-  status: 'progress' | 'done' | 'error'
-  message?: string
+  type: 'progress' | 'done' | 'error' | 'idle'
+  repo_id?: string
+  pct?: number
+  downloaded_bytes?: number
+  total_bytes?: number
+  error?: string
 }

 // ── State ─────────────────────────────────────────────
@ -235,7 +306,8 @@ const addingToQueue = ref(false)
 const queuedModels    = ref<QueuedModel[]>([])
 const installedModels = ref<InstalledModel[]>([])

-const downloadProgress = ref<Record<string, number>>({})
+const downloadProgress  = ref<Record<string, { pct: number | null; bytes: number }>>({})
+const classifyDraft     = ref<Record<string, { service: string; role: string }>>({})
 const downloadErrors   = ref<Record<string, string>>({})

 let pollInterval: ReturnType<typeof setInterval> | null = null
@ -251,8 +323,69 @@ const downloadingModels = computed(() =>
  queuedModels.value.filter(m => m.status === 'downloading')
 )

+const SERVICE_ORDER = ['avocet', 'cf-text', 'cf-stt', 'cf-tts', 'cf-vision', 'cf-image', 'cf-core', 'cf-voice', 'other']
+
+const CLASSIFIABLE_SERVICES = [
+  { value: 'avocet',    label: 'Avocet — Email Classifiers' },
+  { value: 'cf-text',   label: 'cf-text — Language Models' },
+  { value: 'cf-stt',    label: 'cf-stt — Speech Recognition' },
+  { value: 'cf-tts',    label: 'cf-tts — Text to Speech' },
+  { value: 'cf-vision', label: 'cf-vision — Vision / VLM' },
+  { value: 'cf-image',  label: 'cf-image — Image Generation' },
+  { value: 'cf-core',   label: 'cf-core — Embeddings' },
+  { value: 'cf-voice',  label: 'cf-voice — Audio Classification' },
+]
+
+const SERVICE_ROLES: Record<string, string[]> = {
+  'avocet':    ['classifier', 'reranker'],
+  'cf-text':   ['generator'],
+  'cf-stt':    ['stt', 'alm'],
+  'cf-tts':    ['tts'],
+  'cf-vision': ['vision', 'vlm', 'embedding'],
+  'cf-image':  ['image-gen'],
+  'cf-core':   ['embedding'],
+  'cf-voice':  ['classifier'],
+}
+
+function rolesForService(service: string): string[] {
+  return SERVICE_ROLES[service] ?? []
+}
+
+const installedByService = computed(() => {
+  const grouped: Record<string, InstalledModel[]> = {}
+  for (const model of installedModels.value) {
+    const key = model.service ?? 'other'
+    if (!grouped[key]) grouped[key] = []
+    grouped[key].push(model)
+  }
+  // Return ordered sections: known services first, then anything else
+  const keys = [...SERVICE_ORDER.filter(s => grouped[s]), ...Object.keys(grouped).filter(k => !SERVICE_ORDER.includes(k))]
+  return keys.map(key => ({ service: key, models: grouped[key] }))
+})
+
 // ── Helpers ───────────────────────────────────────────

+const SERVICE_LABELS: Record<string, string> = {
+  'avocet':    'Avocet — Email Classifiers',
+  'cf-text':   'cf-text — Language Models',
+  'cf-stt':    'cf-stt — Speech Recognition',
+  'cf-tts':    'cf-tts — Text to Speech',
+  'cf-vision': 'cf-vision — Vision / VLM',
+  'cf-image':  'cf-image — Image Generation',
+  'cf-core':   'cf-core — Embeddings',
+  'cf-voice':  'cf-voice — Audio Classification',
+  'other':     'Other — Unclassified',
+}
+
+function serviceLabel(service: string): string {
+  return SERVICE_LABELS[service] ?? service
+}
+
+function serviceChipClass(service: string | null): string {
+  if (!service) return 'chip-service-other'
+  return `chip-service-${service.replace(/[^a-z0-9]/g, '-')}`
+}
+
 function humanBytes(bytes: number | null): string {
  if (bytes == null) return '—'
  const units = ['B', 'KB', 'MB', 'GB', 'TB']
@ -305,10 +438,11 @@ async function addToQueue() {
  if (!lookupResult.value) return
  addingToQueue.value = true
  try {
+    const { repo_id, pipeline_tag, adapter_recommendation, role, service } = lookupResult.value
    const res = await fetch('/api/models/queue', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({ repo_id: lookupResult.value.repo_id }),
+      body: JSON.stringify({ repo_id, pipeline_tag, adapter_recommendation, role, service }),
    })
    if (res.ok) {
      lookupResult.value = { ...lookupResult.value, already_queued: true }
@ -339,12 +473,50 @@ async function dismissModel(id: string) {
  } catch { /* ignore */ }
 }

+function onServiceChange(name: string, service: string) {
+  const roles = SERVICE_ROLES[service] ?? []
+  classifyDraft.value = {
+    ...classifyDraft.value,
+    [name]: { service, role: roles.length === 1 ? roles[0] : '' },
+  }
+}
+
+function setClassifyRole(name: string, role: string) {
+  classifyDraft.value = {
+    ...classifyDraft.value,
+    [name]: { ...classifyDraft.value[name], role },
+  }
+}
+
+async function saveClassify(name: string) {
+  const draft = classifyDraft.value[name]
+  if (!draft?.service || !draft?.role) return
+  try {
+    const res = await fetch(`/api/models/installed/${encodeURIComponent(name)}`, {
+      method: 'PATCH',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ service: draft.service, role: draft.role }),
+    })
+    if (res.ok) {
+      // Update in-place so the model moves to the correct service group
+      installedModels.value = installedModels.value.map(m =>
+        m.name === name ? { ...m, service: draft.service, role: draft.role } : m
+      )
+      const updated = { ...classifyDraft.value }
+      delete updated[name]
+      classifyDraft.value = updated
+      await loadQueue()
+    }
+  } catch { /* non-fatal */ }
+}
+
 async function deleteInstalled(name: string) {
  if (!window.confirm(`Delete installed model "${name}"? This cannot be undone.`)) return
  try {
    const res = await fetch(`/api/models/installed/${encodeURIComponent(name)}`, { method: 'DELETE' })
    if (res.ok) {
      installedModels.value = installedModels.value.filter(m => m.name !== name)
+      await loadQueue()
    }
  } catch { /* ignore */ }
 }
@ -378,21 +550,28 @@ function startSse() {
      return
    }

-    const { model_id, pct, status, message } = event
+    const { type, repo_id, pct, downloaded_bytes, error } = event
+    if (!repo_id) return

-    if (status === 'progress' && pct != null) {
-      downloadProgress.value = { ...downloadProgress.value, [model_id]: pct }
-    } else if (status === 'done') {
+    if (type === 'progress') {
+      const bytes = downloaded_bytes ?? 0
+      // pct stays null when total_bytes is unknown so we can show "X MB" instead
+      const progress = (pct != null && pct > 0) ? pct : (bytes > 0 ? null : undefined)
+      downloadProgress.value = { ...downloadProgress.value, [repo_id]: { pct: progress ?? null, bytes } }
+    } else if (type === 'done') {
      const updated = { ...downloadProgress.value }
-      delete updated[model_id]
+      delete updated[repo_id]
      downloadProgress.value = updated

-      queuedModels.value = queuedModels.value.filter(m => m.id !== model_id)
+      queuedModels.value = queuedModels.value.filter(m => m.repo_id !== repo_id)
      loadInstalled()
-    } else if (status === 'error') {
-      downloadErrors.value = {
-        ...downloadErrors.value,
-        [model_id]: message ?? 'Download failed.',
+    } else if (type === 'error') {
+      const entry = queuedModels.value.find(m => m.repo_id === repo_id)
+      if (entry) {
+        downloadErrors.value = {
+          ...downloadErrors.value,
+          [entry.id]: error ?? 'Download failed.',
+        }
      }
    }
  })
@ -595,12 +774,6 @@ onUnmounted(() => {
  align-self: flex-start;
 }

-.btn-add-queue-warn {
-  background: var(--color-surface-raised, #e4ebf5);
-  color: var(--color-text-secondary, #6b7a99);
-  border: 1px solid var(--color-border, #d0d7e8);
-}
-
 /* ── Model cards (queue + downloads) ── */
 .model-card {
  border: 1px solid var(--color-border, #a8b8d0);
@ -715,6 +888,35 @@ onUnmounted(() => {
  word-break: break-all;
 }

+.td-actions {
+  display: flex;
+  flex-direction: column;
+  gap: 0.4rem;
+  align-items: flex-start;
+}
+
+.classify-row {
+  display: flex;
+  gap: 0.35rem;
+  align-items: center;
+  flex-wrap: wrap;
+}
+
+.classify-select {
+  font-size: 0.78rem;
+  padding: 0.2rem 0.4rem;
+  border-radius: 4px;
+  border: 1px solid var(--color-border, #444);
+  background: var(--color-surface, #1e1e2e);
+  color: var(--color-text, #cdd6f4);
+  cursor: pointer;
+}
+
+.classify-select:disabled {
+  opacity: 0.4;
+  cursor: not-allowed;
+}
+
 /* ── Badges ── */
 .badge-group {
  display: flex;
@ -777,6 +979,76 @@ onUnmounted(() => {
  background: color-mix(in srgb, var(--color-accent, #c4732a) 12%, var(--color-surface-alt, #dde4f0));
 }

+.chip-role {
+  color: var(--color-info, #1e6091);
+  background: color-mix(in srgb, var(--color-info, #1e6091) 12%, var(--color-surface-alt, #dde4f0));
+}
+
+.chip-sm {
+  font-size: 0.68rem;
+  padding: 0.1rem 0.4rem;
+}
+
+/* Service chips — one colour per CF service */
+.chip-service-avocet {
+  color: var(--color-primary, #2d5a27);
+  background: color-mix(in srgb, var(--color-primary, #2d5a27) 15%, var(--color-surface-alt, #dde4f0));
+}
+
+.chip-service-cf-text {
+  color: #c2410c;
+  background: color-mix(in srgb, #c2410c 12%, var(--color-surface-alt, #dde4f0));
+}
+
+.chip-service-cf-stt {
+  color: #5e35b1;
+  background: color-mix(in srgb, #5e35b1 12%, var(--color-surface-alt, #dde4f0));
+}
+
+.chip-service-cf-tts {
+  color: #0277bd;
+  background: color-mix(in srgb, #0277bd 12%, var(--color-surface-alt, #dde4f0));
+}
+
+.chip-service-cf-vision {
+  color: #00695c;
+  background: color-mix(in srgb, #00695c 12%, var(--color-surface-alt, #dde4f0));
+}
+
+.chip-service-cf-core {
+  color: #6d4c41;
+  background: color-mix(in srgb, #6d4c41 12%, var(--color-surface-alt, #dde4f0));
+}
+
+.chip-service-cf-voice {
+  color: #ad1457;
+  background: color-mix(in srgb, #ad1457 12%, var(--color-surface-alt, #dde4f0));
+}
+
+.chip-service-other {
+  color: var(--color-text-muted, #4a5c7a);
+  background: var(--color-surface-alt, #dde4f0);
+}
+
+/* ── Installed group ── */
+.installed-group {
+  display: flex;
+  flex-direction: column;
+  gap: 0.5rem;
+}
+
+.installed-group-header {
+  display: flex;
+  align-items: center;
+  gap: 0.5rem;
+  padding: 0.25rem 0;
+}
+
+.installed-group-count {
+  font-size: 0.78rem;
+  color: var(--color-text-muted, #4a5c7a);
+}
+
 /* ── Buttons ── */
 .btn-primary, .btn-danger {
  padding: 0.4rem 0.9rem;
@ -852,7 +1124,7 @@ onUnmounted(() => {

  .installed-table th:nth-child(3),
  .installed-table td:nth-child(3) {
-    display: none;  /* hide Adapter column on very narrow screens */
+    display: none;  /* hide Role column on very narrow screens */
  }
 }
 </style>