feat(models): extended model registry + manage.sh benchmark subcommands

- app/models.py: add StyleModel and VoiceModel entries; expand cf-text and
  benchmark model metadata (vram_mb, description, tags)
- tests/test_models.py: coverage for new model types and registry helpers
- ModelsView.vue: updated model browser with style/voice filter tabs
- manage.sh: add benchmark-style and benchmark-voice subcommands
- config/label_tool.yaml.example: add style + voice benchmark config stubs
- web/.gitignore: add node_modules and dist entries
This commit is contained in:
pyr0ball 2026-04-24 14:56:24 -07:00
parent ddb56efb89
commit ea3da701c6
6 changed files with 1150 additions and 152 deletions

View file

@ -14,11 +14,12 @@ from __future__ import annotations
import json import json
import logging import logging
import os
import shutil import shutil
import threading import threading
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any, TypedDict
from uuid import uuid4 from uuid import uuid4
import httpx import httpx
@ -39,21 +40,67 @@ _ROOT = Path(__file__).parent.parent
_MODELS_DIR: Path = _ROOT / "models" _MODELS_DIR: Path = _ROOT / "models"
_QUEUE_DIR: Path = _ROOT / "data" _QUEUE_DIR: Path = _ROOT / "data"
# Service-specific model destinations.
# cf-text models land on the NFS-mounted shared asset store so every cluster
# node can reach them without a separate download. Avocet classifiers stay local
# because they are fine-tuned in-place and are only consumed by avocet itself.
# Override via CF_TEXT_MODELS_DIR env var (useful for dev / non-NFS setups).
_CF_TEXT_MODELS_DIR: Path = Path(
os.environ.get("CF_TEXT_MODELS_DIR", "/Library/Assets/LLM/cf-text/models")
)
# Directory containing per-node YAML profiles for cf-orch.
# Auto-registration writes new catalog entries here on model download.
_CF_ORCH_PROFILES_DIR: Path = Path(
os.environ.get(
"CF_ORCH_PROFILES_DIR",
"/Library/Development/CircuitForge/circuitforge-orch/circuitforge_orch/profiles/nodes",
)
)
router = APIRouter() router = APIRouter()
# ── Download progress shared state ──────────────────────────────────────────── # ── Download progress shared state ────────────────────────────────────────────
# Updated by the background download thread; read by GET /download/stream. # Updated by the background download thread; read by GET /download/stream.
_download_progress: dict[str, Any] = {} _download_progress: dict[str, Any] = {}
# ── HF pipeline_tag → adapter recommendation ────────────────────────────────── # ── HF pipeline_tag → CF service info ────────────────────────────────────────
_TAG_TO_ADAPTER: dict[str, str] = {
"zero-shot-classification": "ZeroShotAdapter",
"text-classification": "ZeroShotAdapter", class _TagInfo(TypedDict):
"natural-language-inference": "ZeroShotAdapter", adapter: str | None # Avocet adapter class, or None if handled by another service
"sentence-similarity": "RerankerAdapter", role: str # Human-readable model role (classifier, stt, tts, vision, …)
"text-ranking": "RerankerAdapter", service: str # CF service that consumes this model type
"text-generation": "GenerationAdapter",
"text2text-generation": "GenerationAdapter",
_TAG_TO_INFO: dict[str, _TagInfo] = {
# Avocet email classifiers
"zero-shot-classification": {"adapter": "ZeroShotAdapter", "role": "classifier", "service": "avocet"},
"text-classification": {"adapter": "ZeroShotAdapter", "role": "classifier", "service": "avocet"},
"natural-language-inference": {"adapter": "ZeroShotAdapter", "role": "classifier", "service": "avocet"},
"sentence-similarity": {"adapter": "RerankerAdapter", "role": "reranker", "service": "avocet"},
"text-ranking": {"adapter": "RerankerAdapter", "role": "reranker", "service": "avocet"},
"text-generation": {"adapter": "GenerationAdapter", "role": "generator", "service": "cf-text"},
"text2text-generation": {"adapter": "GenerationAdapter", "role": "generator", "service": "cf-text"},
"summarization": {"adapter": "GenerationAdapter", "role": "generator", "service": "cf-text"},
# STT — cf-stt speech recognition service
"automatic-speech-recognition": {"adapter": None, "role": "stt", "service": "cf-stt"},
# Audio language models — audio + text → text (understanding, QA, captioning)
"audio-text-to-text": {"adapter": None, "role": "alm", "service": "cf-stt"},
# Audio classification — cf-voice sidecar context stream
"audio-classification": {"adapter": None, "role": "classifier", "service": "cf-voice"},
# TTS — cf-tts text-to-speech service
"text-to-speech": {"adapter": None, "role": "tts", "service": "cf-tts"},
# Vision — cf-vision image classification / embedding / VLM service
"image-classification": {"adapter": None, "role": "vision", "service": "cf-vision"},
"zero-shot-image-classification": {"adapter": None, "role": "vision", "service": "cf-vision"},
"image-feature-extraction": {"adapter": None, "role": "embedding", "service": "cf-vision"},
"image-text-to-text": {"adapter": None, "role": "vlm", "service": "cf-vision"},
"visual-question-answering": {"adapter": None, "role": "vlm", "service": "cf-vision"},
# Image generation — cf-image (text → image; distinct from cf-vision image understanding)
"text-to-image": {"adapter": None, "role": "image-gen", "service": "cf-image"},
# Embedding — cf-core shared embedding layer
"feature-extraction": {"adapter": None, "role": "embedding", "service": "cf-core"},
} }
@ -84,14 +131,31 @@ def _write_queue(records: list[dict]) -> None:
def _safe_model_name(repo_id: str) -> str: def _safe_model_name(repo_id: str) -> str:
"""Convert repo_id to a filesystem-safe directory name (HF convention).""" """Convert repo_id to a filesystem-safe directory name.
Uses the HuggingFace Hub convention: owner/model-name owner--model-name.
This matches what snapshot_download produces under local_dir and what
cf-orch uses when constructing model paths for cf-text allocations.
"""
return repo_id.replace("/", "--") return repo_id.replace("/", "--")
def _is_installed(repo_id: str) -> bool: def _model_dir_for(repo_id: str, service: str | None) -> Path:
"""Check if a model is already downloaded in _MODELS_DIR.""" """Return the download destination directory for a model.
cf-text models NFS shared asset store (_CF_TEXT_MODELS_DIR) so every
cluster node can load them without a separate download.
All other services (avocet classifiers, fine-tunes) local _MODELS_DIR.
"""
safe_name = _safe_model_name(repo_id) safe_name = _safe_model_name(repo_id)
model_dir = _MODELS_DIR / safe_name if service == "cf-text":
return _CF_TEXT_MODELS_DIR / safe_name
return _MODELS_DIR / safe_name
def _is_installed(repo_id: str, service: str | None = None) -> bool:
"""Check if a model is already downloaded in the appropriate destination."""
model_dir = _model_dir_for(repo_id, service)
return model_dir.exists() and ( return model_dir.exists() and (
(model_dir / "config.json").exists() (model_dir / "config.json").exists()
or (model_dir / "training_info.json").exists() or (model_dir / "training_info.json").exists()
@ -125,48 +189,289 @@ def _get_queue_entry(entry_id: str) -> dict | None:
return None return None
# ── cf-orch catalog auto-registration ─────────────────────────────────────────
def _catalog_key(repo_id: str) -> str:
"""Derive a readable catalog key from repo_id.
ibm-granite/granite-4.1-8b granite-4.1-8b
facebook/bart-large-cnn bart-large-cnn
"""
return repo_id.split("/", 1)[-1].lower()
def _insert_catalog_entry(content: str, entry_lines: str) -> str:
"""Insert entry_lines at the end of the cf-text.catalog section.
Scans line by line to preserve all comments and original formatting.
Returns content unchanged if the catalog section cannot be located.
"""
lines = content.splitlines(keepends=True)
in_cf_text = False
in_catalog = False
for i, line in enumerate(lines):
stripped = line.lstrip()
indent = len(line) - len(stripped)
blank_or_comment = not stripped or stripped.startswith("#")
if not in_cf_text:
if indent == 2 and stripped.startswith("cf-text:"):
in_cf_text = True
continue
if not in_catalog:
if indent == 4 and stripped.startswith("catalog:"):
in_catalog = True
elif not blank_or_comment and indent <= 2:
# Left cf-text section without finding a catalog
return content
continue
# Inside catalog: first non-blank/comment line with indent < 6 ends it
if not blank_or_comment and indent < 6:
prefix = "\n" if lines[i - 1].strip() else ""
lines.insert(i, prefix + entry_lines)
return "".join(lines)
# Catalog ran to EOF — append there
if in_catalog:
prefix = "\n" if lines and lines[-1].strip() else ""
lines.append(prefix + entry_lines)
return "".join(lines)
return content
def _register_in_node_catalogs(
repo_id: str,
local_path: Path,
vram_mb_fp16: int,
role: str,
) -> list[str]:
"""Insert a cf-text catalog entry into every eligible node YAML.
A node is eligible when:
- It has a ``cf-text.catalog`` section
- The model fits within the node's ``cf-text.max_mb`` at FP16 *or* 4-bit
- Neither the model key nor the local path is already in the catalog
Returns the list of node names that were updated.
"""
try:
import yaml # lazy — not in the critical import path
except ImportError:
logger.warning("PyYAML not available — skipping catalog registration for %s", repo_id)
return []
profiles_dir = _CF_ORCH_PROFILES_DIR
if not profiles_dir.exists():
logger.warning(
"cf-orch profiles dir not found: %s — skipping catalog registration", profiles_dir
)
return []
model_key = _catalog_key(repo_id)
local_path_str = str(local_path)
vram_4bit = round(vram_mb_fp16 / 4 * 1.1)
updated: list[str] = []
for yaml_file in sorted(profiles_dir.glob("*.yaml")):
try:
content = yaml_file.read_text(encoding="utf-8")
data = yaml.safe_load(content)
cf_text = (data.get("services") or {}).get("cf-text")
if not cf_text:
continue
max_mb: int = cf_text.get("max_mb", 0)
catalog: dict = cf_text.get("catalog") or {}
# Skip if key already exists
if model_key in catalog:
logger.debug("Key %r already in %s — skipping", model_key, yaml_file.name)
continue
# Skip if any existing entry already points at this path (or a file within it)
registered_paths = {
str(entry.get("path", ""))
for entry in catalog.values()
if isinstance(entry, dict)
}
if local_path_str in registered_paths or any(
p.startswith(local_path_str + "/") for p in registered_paths
):
logger.debug("Path %s already registered in %s — skipping", local_path_str, yaml_file.name)
continue
# Determine whether model fits at FP16 or needs 4-bit
if vram_mb_fp16 <= max_mb:
vram_for_node = vram_mb_fp16
needs_4bit = False
elif vram_4bit <= max_mb:
vram_for_node = vram_4bit
needs_4bit = True
else:
logger.debug(
"%s too large for %s (fp16=%d MB, 4bit=%d MB, max=%d MB)",
repo_id, yaml_file.name, vram_mb_fp16, vram_4bit, max_mb,
)
continue
desc = f"{repo_id} ({role}, downloaded via avocet)"
if needs_4bit:
desc += " — CF_TEXT_4BIT=1 required"
vram_comment = (
f" # 4-bit estimate; FP16 footprint is {vram_mb_fp16} MB"
if needs_4bit
else f" # FP16 file-size estimate"
)
entry_block = (
f" # auto-registered by avocet on download\n"
f" {model_key}:\n"
f" path: {local_path_str}\n"
f" vram_mb: {vram_for_node}{vram_comment}\n"
f" description: \"{desc}\"\n"
)
new_content = _insert_catalog_entry(content, entry_block)
if new_content == content:
logger.warning("Could not find catalog insertion point in %s", yaml_file.name)
continue
yaml_file.write_text(new_content, encoding="utf-8")
updated.append(yaml_file.stem)
logger.info(
"Registered %s in %s (vram_mb=%d, 4bit=%s)",
model_key, yaml_file.name, vram_for_node, needs_4bit,
)
except Exception as exc:
logger.warning("Could not update %s: %s", yaml_file.name, exc)
return updated
# ── Background download ──────────────────────────────────────────────────────── # ── Background download ────────────────────────────────────────────────────────
def _run_download(entry_id: str, repo_id: str, pipeline_tag: str | None, adapter_recommendation: str | None) -> None: def _poll_disk_progress(local_dir: Path, total_bytes: int, stop_event: threading.Event) -> None:
"""Background thread: download model via huggingface_hub.snapshot_download.""" """Side-thread: poll local_dir size every 2s and update _download_progress.
snapshot_download is a blocking call with no progress callback, so we watch
the destination directory grow on disk as a proxy for download progress.
total_bytes=0 means we don't know the target size; pct stays 0 until done.
"""
import time
while not stop_event.is_set():
try:
downloaded = sum(
f.stat().st_size for f in local_dir.rglob("*") if f.is_file()
)
_download_progress["downloaded_bytes"] = downloaded
if total_bytes > 0:
_download_progress["total_bytes"] = total_bytes
_download_progress["pct"] = min(downloaded / total_bytes * 100, 99.0)
except Exception:
pass
time.sleep(2)
def _run_download(
entry_id: str,
repo_id: str,
pipeline_tag: str | None,
adapter_recommendation: str | None,
role: str | None = None,
service: str | None = None,
model_size_bytes: int = 0,
) -> None:
"""Background thread: download model via huggingface_hub.snapshot_download.
model_size_bytes is the sum of file sizes reported by the HF API (siblings).
It is used to estimate vram_mb and written to model_info.json so cf-orch can
budget VRAM when allocating a cf-text instance for this model.
"""
global _download_progress global _download_progress
safe_name = _safe_model_name(repo_id) local_dir = _model_dir_for(repo_id, service)
local_dir = _MODELS_DIR / safe_name
_download_progress = { _download_progress = {
"active": True, "active": True,
"repo_id": repo_id, "repo_id": repo_id,
"downloaded_bytes": 0, "downloaded_bytes": 0,
"total_bytes": 0, "total_bytes": model_size_bytes,
"pct": 0.0, "pct": 0.0,
"done": False, "done": False,
"error": None, "error": None,
} }
stop_poll = threading.Event()
poll_thread = threading.Thread(
target=_poll_disk_progress,
args=(local_dir, model_size_bytes, stop_poll),
daemon=True,
name=f"model-poll-{entry_id}",
)
try: try:
if snapshot_download is None: if snapshot_download is None:
raise RuntimeError("huggingface_hub is not installed") raise RuntimeError("huggingface_hub is not installed")
local_dir.mkdir(parents=True, exist_ok=True)
poll_thread.start()
snapshot_download( snapshot_download(
repo_id=repo_id, repo_id=repo_id,
local_dir=str(local_dir), local_dir=str(local_dir),
) )
# Write model_info.json alongside downloaded files # Estimate VRAM from reported file size.
# HF siblings sizes are pre-quantisation file sizes; add 10% for KV cache
# and runtime overhead. Falls back to a stat of the local dir if 0.
if model_size_bytes == 0:
model_size_bytes = sum(
f.stat().st_size for f in local_dir.rglob("*") if f.is_file()
)
vram_mb = int(model_size_bytes / (1024 * 1024) * 1.1)
# Write model_info.json alongside downloaded files.
# local_path + vram_mb are read by cf-orch at allocation time to resolve
# the full model path and grant the correct VRAM lease.
model_info = { model_info = {
"repo_id": repo_id, "repo_id": repo_id,
"pipeline_tag": pipeline_tag, "pipeline_tag": pipeline_tag,
"adapter_recommendation": adapter_recommendation, "adapter_recommendation": adapter_recommendation,
"role": role,
"service": service,
"model_size_bytes": model_size_bytes,
"vram_mb": vram_mb,
"local_path": str(local_dir),
"downloaded_at": datetime.now(timezone.utc).isoformat(), "downloaded_at": datetime.now(timezone.utc).isoformat(),
} }
local_dir.mkdir(parents=True, exist_ok=True)
(local_dir / "model_info.json").write_text( (local_dir / "model_info.json").write_text(
json.dumps(model_info, indent=2), encoding="utf-8" json.dumps(model_info, indent=2), encoding="utf-8"
) )
# Auto-register cf-text models in the cf-orch node YAML catalogs so they
# appear in the benchmark model list without a manual YAML edit.
if service == "cf-text":
registered_on = _register_in_node_catalogs(
repo_id=repo_id,
local_path=local_dir,
vram_mb_fp16=vram_mb,
role=role or "generator",
)
if registered_on:
logger.info(
"Auto-registered %s in node catalogs: %s",
repo_id, ", ".join(registered_on),
)
_download_progress["done"] = True _download_progress["done"] = True
_download_progress["pct"] = 100.0 _download_progress["pct"] = 100.0
_update_queue_entry(entry_id, {"status": "ready"}) _update_queue_entry(entry_id, {"status": "ready", "local_path": str(local_dir)})
except Exception as exc: except Exception as exc:
logger.exception("Download failed for %s: %s", repo_id, exc) logger.exception("Download failed for %s: %s", repo_id, exc)
@ -174,6 +479,7 @@ def _run_download(entry_id: str, repo_id: str, pipeline_tag: str | None, adapter
_download_progress["done"] = True _download_progress["done"] = True
_update_queue_entry(entry_id, {"status": "failed", "error": str(exc)}) _update_queue_entry(entry_id, {"status": "failed", "error": str(exc)})
finally: finally:
stop_poll.set()
_download_progress["active"] = False _download_progress["active"] = False
@ -199,11 +505,15 @@ def lookup_model(repo_id: str) -> dict:
data = resp.json() data = resp.json()
pipeline_tag = data.get("pipeline_tag") pipeline_tag = data.get("pipeline_tag")
adapter_recommendation = _TAG_TO_ADAPTER.get(pipeline_tag) if pipeline_tag else None tag_info = _TAG_TO_INFO.get(pipeline_tag) if pipeline_tag else None
adapter_recommendation = tag_info["adapter"] if tag_info else None
role = tag_info["role"] if tag_info else None
service = tag_info["service"] if tag_info else None
# Determine compatibility and surface a human-readable warning # Determine compatibility and surface a human-readable warning
_supported = ", ".join(sorted(_TAG_TO_ADAPTER.keys())) _supported = ", ".join(sorted(_TAG_TO_INFO.keys()))
if adapter_recommendation is not None: if tag_info is not None:
# Any recognized tag is compatible — avocet adapters or another CF service
compatible = True compatible = True
warning: str | None = None warning: str | None = None
elif pipeline_tag is None: elif pipeline_tag is None:
@ -216,7 +526,7 @@ def lookup_model(repo_id: str) -> dict:
else: else:
compatible = False compatible = False
warning = ( warning = (
f"\"{pipeline_tag}\" models are not supported by Avocet's email classification adapters. " f"\"{pipeline_tag}\" models are not yet supported by the CircuitForge model ecosystem. "
f"Supported task types: {_supported}." f"Supported task types: {_supported}."
) )
logger.warning("Unsupported pipeline_tag %r for %s", pipeline_tag, repo_id) logger.warning("Unsupported pipeline_tag %r for %s", pipeline_tag, repo_id)
@ -234,6 +544,8 @@ def lookup_model(repo_id: str) -> dict:
"repo_id": repo_id, "repo_id": repo_id,
"pipeline_tag": pipeline_tag, "pipeline_tag": pipeline_tag,
"adapter_recommendation": adapter_recommendation, "adapter_recommendation": adapter_recommendation,
"role": role,
"service": service,
"compatible": compatible, "compatible": compatible,
"warning": warning, "warning": warning,
"model_size_bytes": model_size_bytes, "model_size_bytes": model_size_bytes,
@ -261,12 +573,18 @@ class QueueAddRequest(BaseModel):
repo_id: str repo_id: str
pipeline_tag: str | None = None pipeline_tag: str | None = None
adapter_recommendation: str | None = None adapter_recommendation: str | None = None
role: str | None = None
service: str | None = None
# Sum of file sizes from HF API siblings list; 0 if unknown.
# Stored in the queue entry so approve can pass it to _run_download
# without a second HF API round-trip.
model_size_bytes: int = 0
@router.post("/queue", status_code=201) @router.post("/queue", status_code=201)
def add_to_queue(req: QueueAddRequest) -> dict: def add_to_queue(req: QueueAddRequest) -> dict:
"""Add a model to the approval queue with status 'pending'.""" """Add a model to the approval queue with status 'pending'."""
if _is_installed(req.repo_id): if _is_installed(req.repo_id, service=req.service):
raise HTTPException(409, f"{req.repo_id!r} is already installed") raise HTTPException(409, f"{req.repo_id!r} is already installed")
if _is_queued(req.repo_id): if _is_queued(req.repo_id):
raise HTTPException(409, f"{req.repo_id!r} is already in the queue") raise HTTPException(409, f"{req.repo_id!r} is already in the queue")
@ -276,6 +594,9 @@ def add_to_queue(req: QueueAddRequest) -> dict:
"repo_id": req.repo_id, "repo_id": req.repo_id,
"pipeline_tag": req.pipeline_tag, "pipeline_tag": req.pipeline_tag,
"adapter_recommendation": req.adapter_recommendation, "adapter_recommendation": req.adapter_recommendation,
"role": req.role,
"service": req.service,
"model_size_bytes": req.model_size_bytes,
"status": "pending", "status": "pending",
"queued_at": datetime.now(timezone.utc).isoformat(), "queued_at": datetime.now(timezone.utc).isoformat(),
} }
@ -300,7 +621,15 @@ def approve_queue_entry(entry_id: str) -> dict:
thread = threading.Thread( thread = threading.Thread(
target=_run_download, target=_run_download,
args=(entry_id, entry["repo_id"], entry.get("pipeline_tag"), entry.get("adapter_recommendation")), args=(
entry_id,
entry["repo_id"],
entry.get("pipeline_tag"),
entry.get("adapter_recommendation"),
entry.get("role"),
entry.get("service"),
entry.get("model_size_bytes", 0),
),
daemon=True, daemon=True,
name=f"model-download-{entry_id}", name=f"model-download-{entry_id}",
) )
@ -368,18 +697,104 @@ def download_stream() -> StreamingResponse:
) )
# ── POST /sync-catalogs ────────────────────────────────────────────────────────
@router.post("/sync-catalogs")
def sync_catalogs() -> dict:
"""Scan all installed cf-text models and register any missing from node YAMLs.
Reads model_info.json from each directory in the cf-text models dir and calls
_register_in_node_catalogs() for each. Idempotent skips models already
present by key or path.
Returns a summary of registrations performed.
"""
if not _CF_TEXT_MODELS_DIR.exists():
return {"registered": {}, "skipped": [], "message": "cf-text models dir not found"}
registered: dict[str, list[str]] = {}
skipped: list[str] = []
for model_dir in sorted(_CF_TEXT_MODELS_DIR.iterdir()):
if not model_dir.is_dir():
continue
info_file = model_dir / "model_info.json"
if not info_file.exists():
skipped.append(model_dir.name)
continue
try:
info = json.loads(info_file.read_text(encoding="utf-8"))
except Exception as exc:
logger.warning("Could not read model_info.json for %s: %s", model_dir.name, exc)
skipped.append(model_dir.name)
continue
if info.get("service") != "cf-text":
skipped.append(model_dir.name)
continue
repo_id = info.get("repo_id", model_dir.name)
vram_mb = info.get("vram_mb", 0)
role = info.get("role", "generator")
updated_nodes = _register_in_node_catalogs(
repo_id=repo_id,
local_path=model_dir,
vram_mb_fp16=vram_mb,
role=role,
)
if updated_nodes:
registered[repo_id] = updated_nodes
else:
skipped.append(repo_id)
return {
"registered": registered,
"skipped": skipped,
"message": (
f"Registered {len(registered)} model(s) on "
f"{sum(len(v) for v in registered.values())} node(s)"
if registered
else "All models already registered (or no eligible nodes found)"
),
}
# ── GET /installed ───────────────────────────────────────────────────────────── # ── GET /installed ─────────────────────────────────────────────────────────────
@router.get("/installed") @router.get("/installed")
def list_installed() -> list[dict]: def list_installed() -> list[dict]:
"""Scan _MODELS_DIR and return info on each installed model.""" """Scan all model directories and return info on each installed model.
if not _MODELS_DIR.exists():
return [] Scans both the local avocet models dir (classifiers, fine-tunes) and the
shared NFS cf-text models dir, deduplicating by directory path.
Falls back to queue entry data when model_info.json has null service/role,
so models downloaded before the pipeline_tag registry existed still group
correctly in the UI.
"""
scan_dirs = [_MODELS_DIR]
if _CF_TEXT_MODELS_DIR != _MODELS_DIR and _CF_TEXT_MODELS_DIR.exists():
scan_dirs.append(_CF_TEXT_MODELS_DIR)
# Build a lookup from safe directory name → queue entry for fallback enrichment.
queue_by_safe_name: dict[str, dict] = {
_safe_model_name(r["repo_id"]): r
for r in _read_queue()
if r.get("repo_id") and r.get("status") not in ("dismissed",)
}
results: list[dict] = [] results: list[dict] = []
for sub in _MODELS_DIR.iterdir(): seen: set[Path] = set()
if not sub.is_dir():
for scan_dir in scan_dirs:
if not scan_dir.exists():
continue continue
for sub in scan_dir.iterdir():
if not sub.is_dir() or sub in seen:
continue
seen.add(sub)
has_training_info = (sub / "training_info.json").exists() has_training_info = (sub / "training_info.json").exists()
has_config = (sub / "config.json").exists() has_config = (sub / "config.json").exists()
@ -393,15 +808,20 @@ def list_installed() -> list[dict]:
# Compute directory size # Compute directory size
size_bytes = sum(f.stat().st_size for f in sub.rglob("*") if f.is_file()) size_bytes = sum(f.stat().st_size for f in sub.rglob("*") if f.is_file())
# Load adapter/model_id from model_info.json or training_info.json
adapter: str | None = None adapter: str | None = None
model_id: str | None = None model_id: str | None = None
role: str | None = None
service: str | None = None
vram_mb: int | None = None
if has_model_info: if has_model_info:
try: try:
info = json.loads((sub / "model_info.json").read_text(encoding="utf-8")) info = json.loads((sub / "model_info.json").read_text(encoding="utf-8"))
adapter = info.get("adapter_recommendation") adapter = info.get("adapter_recommendation")
model_id = info.get("repo_id") model_id = info.get("repo_id")
role = info.get("role")
service = info.get("service")
vram_mb = info.get("vram_mb")
except Exception: except Exception:
pass pass
elif has_training_info: elif has_training_info:
@ -409,40 +829,154 @@ def list_installed() -> list[dict]:
info = json.loads((sub / "training_info.json").read_text(encoding="utf-8")) info = json.loads((sub / "training_info.json").read_text(encoding="utf-8"))
adapter = info.get("adapter") adapter = info.get("adapter")
model_id = info.get("base_model") or info.get("model_id") model_id = info.get("base_model") or info.get("model_id")
role = info.get("role", "classifier")
service = info.get("service", "avocet")
except Exception: except Exception:
pass pass
# Fall back to queue entry when model_info.json has null service/role.
# This covers models downloaded before the pipeline_tag registry existed.
if (role is None or service is None) and sub.name in queue_by_safe_name:
q = queue_by_safe_name[sub.name]
role = role or q.get("role")
service = service or q.get("service")
model_id = model_id or q.get("repo_id")
# Last resort: re-derive from pipeline_tag if we still have no service.
if service is None and model_id:
hf_url = f"https://huggingface.co/api/models/{model_id}"
# Only attempt if we have a pipeline_tag cached somewhere.
for q in queue_by_safe_name.values():
if q.get("repo_id") == model_id and q.get("pipeline_tag"):
tag_info = _TAG_TO_INFO.get(q["pipeline_tag"])
if tag_info:
role = role or tag_info["role"]
service = service or tag_info["service"]
break
results.append({ results.append({
"name": sub.name, "name": sub.name,
"path": str(sub), "path": str(sub),
"type": model_type, "type": model_type,
"adapter": adapter, "adapter": adapter,
"role": role,
"service": service,
"size_bytes": size_bytes, "size_bytes": size_bytes,
"vram_mb": vram_mb,
"model_id": model_id, "model_id": model_id,
}) })
return results return results
# ── PATCH /installed/{name} ────────────────────────────────────────────────────
class InstalledModelPatch(BaseModel):
service: str
role: str
@router.patch("/installed/{name}")
def patch_installed(name: str, body: InstalledModelPatch) -> dict:
"""Manually assign service and role to an installed model.
Writes the updated values back to model_info.json so they survive restarts,
and updates any matching queue entry so the UI shows the correct chip.
"""
if "/" in name or "\\" in name or ".." in name or not name or name.startswith("."):
raise HTTPException(400, f"Invalid model name {name!r}")
candidate_dirs = [_MODELS_DIR]
if _CF_TEXT_MODELS_DIR != _MODELS_DIR:
candidate_dirs.append(_CF_TEXT_MODELS_DIR)
model_path: Path | None = None
for base in candidate_dirs:
candidate = base / name
try:
candidate.resolve().relative_to(base.resolve())
except ValueError:
raise HTTPException(400, f"Path traversal detected for name {name!r}")
if candidate.exists():
model_path = candidate
break
if model_path is None:
raise HTTPException(404, f"Installed model {name!r} not found")
info_path = model_path / "model_info.json"
if info_path.exists():
try:
info = json.loads(info_path.read_text(encoding="utf-8"))
except Exception:
info = {}
else:
info = {}
info["service"] = body.service
info["role"] = body.role
info_path.write_text(json.dumps(info, indent=2), encoding="utf-8")
# Mirror the update into any matching queue entry.
records = _read_queue()
updated = False
for r in records:
local = r.get("local_path", "")
matches = (local and Path(local).name == name) or _safe_model_name(r.get("repo_id", "")) == name
if matches and r.get("status") not in ("dismissed",):
r["service"] = body.service
r["role"] = body.role
updated = True
if updated:
_write_queue(records)
return {"ok": True, "service": body.service, "role": body.role}
# ── DELETE /installed/{name} ─────────────────────────────────────────────────── # ── DELETE /installed/{name} ───────────────────────────────────────────────────
@router.delete("/installed/{name}") @router.delete("/installed/{name}")
def delete_installed(name: str) -> dict: def delete_installed(name: str) -> dict:
"""Remove an installed model directory by name. Blocks path traversal.""" """Remove an installed model directory by name. Blocks path traversal.
# Validate: single path component, no slashes or '..'
Searches both the local avocet models dir and the shared cf-text models dir.
Also dismisses any matching queue entry so the UI doesn't show a stale "ready" card.
"""
if "/" in name or "\\" in name or ".." in name or not name or name.startswith("."): if "/" in name or "\\" in name or ".." in name or not name or name.startswith("."):
raise HTTPException(400, f"Invalid model name {name!r}: must be a single directory name with no path separators or '..'") raise HTTPException(400, f"Invalid model name {name!r}: must be a single directory name with no path separators or '..'")
model_path = _MODELS_DIR / name # Search both model directories
candidate_dirs = [_MODELS_DIR]
if _CF_TEXT_MODELS_DIR != _MODELS_DIR:
candidate_dirs.append(_CF_TEXT_MODELS_DIR)
# Extra safety: confirm resolved path is inside _MODELS_DIR model_path: Path | None = None
for base in candidate_dirs:
candidate = base / name
try: try:
model_path.resolve().relative_to(_MODELS_DIR.resolve()) candidate.resolve().relative_to(base.resolve())
except ValueError: except ValueError:
raise HTTPException(400, f"Path traversal detected for name {name!r}") raise HTTPException(400, f"Path traversal detected for name {name!r}")
if candidate.exists():
model_path = candidate
break
if not model_path.exists(): if model_path is None:
raise HTTPException(404, f"Installed model {name!r} not found") raise HTTPException(404, f"Installed model {name!r} not found in any model directory")
shutil.rmtree(model_path) shutil.rmtree(model_path)
# Dismiss any queue entries whose local_path matches, or whose repo_id maps to this dir name.
records = _read_queue()
updated = False
for r in records:
local = r.get("local_path", "")
matches_path = local and Path(local).name == name
matches_name = _safe_model_name(r.get("repo_id", "")) == name
if (matches_path or matches_name) and r.get("status") != "dismissed":
r["status"] = "dismissed"
updated = True
if updated:
_write_queue(records)
return {"ok": True} return {"ok": True}

View file

@ -57,11 +57,32 @@ imitate:
- id: peregrine - id: peregrine
name: Peregrine name: Peregrine
icon: "🦅" icon: "🦅"
description: Job search assistant description: Job search assistant — live job listings
base_url: http://localhost:8502 base_url: http://localhost:8601
sample_endpoint: /api/jobs health_path: /api/jobs/counts
text_fields: [title, description] sample_endpoint: /api/jobs?status=pending&limit=5
prompt_template: "Analyze this job listing and identify key requirements:\n\n{text}" text_fields: [title, company, description]
prompt_template: "Analyze this job listing and identify the key requirements, must-have skills, and any culture signals that would help tailor an application:\n\n{text}"
- id: osprey
name: Osprey
icon: "📞"
description: Gov't hold-line automation — recent call records
base_url: http://localhost:8520
health_path: /api/health
sample_endpoint: /api/calls/recent
text_fields: [agency, issue, notes]
prompt_template: "Draft a clear, professional follow-up letter for this government hold-line call. Include what was discussed, what action the agency committed to, and a polite deadline for response:\n\n{text}"
- id: linnet
name: Linnet
icon: "🐦"
description: Real-time tone annotation — Elcor-style subtext for ND users
base_url: http://localhost:8522
health_path: /health
sample_endpoint: /samples
text_fields: [text, context]
prompt_template: "Annotate the emotional tone and subtext of the following text using explicit Elcor-style markers (e.g. [SINCERELY], [UNCERTAIN], [FRUSTRATED]). Identify implied emotions, potential sarcasm, and any ambiguity that might be misread by neurodivergent readers:\n\n{text}"
- id: kiwi - id: kiwi
name: Kiwi name: Kiwi

View file

@ -90,6 +90,12 @@ usage() {
echo -e " ${GREEN}score [args]${NC} Shortcut: --score [args]" echo -e " ${GREEN}score [args]${NC} Shortcut: --score [args]"
echo -e " ${GREEN}compare [args]${NC} Shortcut: --compare [args]" echo -e " ${GREEN}compare [args]${NC} Shortcut: --compare [args]"
echo "" echo ""
echo " Writing Style Benchmark:"
echo -e " ${GREEN}style-bench [args]${NC} Run benchmark_style.py (args passed through)"
echo -e " ${GREEN}style-list${NC} List available ollama models for style bench"
echo -e " ${GREEN}style-run [args]${NC} Run writing style benchmark (--models, --samples, --include-large, --scan-disk PATH, --cforch)"
echo -e " ${GREEN}style-last${NC} Print most recent writing style benchmark report"
echo ""
echo " Dev:" echo " Dev:"
echo -e " ${GREEN}dev${NC} Hot-reload: uvicorn --reload (:8503) + Vite HMR (:5173)" echo -e " ${GREEN}dev${NC} Hot-reload: uvicorn --reload (:8503) + Vite HMR (:5173)"
echo -e " ${GREEN}test${NC} Run pytest suite" echo -e " ${GREEN}test${NC} Run pytest suite"
@ -249,6 +255,26 @@ case "$CMD" in
exec "$0" benchmark --compare "$@" exec "$0" benchmark --compare "$@"
;; ;;
style-bench)
info "Running writing style benchmark (${ENV_BM})…"
if [[ ! -x "$PYTHON_BM" ]]; then
error "Python not found in ${ENV_BM} env at ${PYTHON_BM}"
fi
"$PYTHON_BM" scripts/benchmark_style.py "$@"
;;
style-list)
exec "$0" style-bench --list-models
;;
style-run)
exec "$0" style-bench --run "$@"
;;
style-last)
exec "$0" style-bench --show-last
;;
help|--help|-h) help|--help|-h)
usage usage
;; ;;

View file

@ -122,17 +122,88 @@ def test_lookup_returns_correct_shape(client):
assert data["already_queued"] is False assert data["already_queued"] is False
def test_lookup_unknown_pipeline_tag_returns_null_adapter(client): def test_lookup_unknown_pipeline_tag_returns_null_adapter_and_incompatible(client):
"""An unrecognised pipeline_tag yields adapter_recommendation=null.""" """An unrecognised pipeline_tag yields adapter_recommendation=null and compatible=False."""
mock_resp = MagicMock() mock_resp = MagicMock()
mock_resp.status_code = 200 mock_resp.status_code = 200
mock_resp.json.return_value = _make_hf_response("org/m", "audio-classification") mock_resp.json.return_value = _make_hf_response("org/m", "reinforcement-learning")
with patch("app.models.httpx.get", return_value=mock_resp): with patch("app.models.httpx.get", return_value=mock_resp):
r = client.get("/api/models/lookup", params={"repo_id": "org/m"}) r = client.get("/api/models/lookup", params={"repo_id": "org/m"})
assert r.status_code == 200 assert r.status_code == 200
assert r.json()["adapter_recommendation"] is None data = r.json()
assert data["adapter_recommendation"] is None
assert data["compatible"] is False
assert data["role"] is None
assert data["service"] is None
assert "CircuitForge model ecosystem" in data["warning"]
def test_lookup_stt_tag_returns_compatible_with_cf_stt_service(client):
"""automatic-speech-recognition tag yields compatible=True, service=cf-stt."""
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.json.return_value = _make_hf_response("openai/whisper-base", "automatic-speech-recognition")
with patch("app.models.httpx.get", return_value=mock_resp):
r = client.get("/api/models/lookup", params={"repo_id": "openai/whisper-base"})
assert r.status_code == 200
data = r.json()
assert data["compatible"] is True
assert data["adapter_recommendation"] is None
assert data["role"] == "stt"
assert data["service"] == "cf-stt"
assert data["warning"] is None
def test_lookup_vision_tag_returns_compatible_with_cf_vision_service(client):
"""image-classification tag yields compatible=True, service=cf-vision."""
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.json.return_value = _make_hf_response("google/siglip-base", "image-classification")
with patch("app.models.httpx.get", return_value=mock_resp):
r = client.get("/api/models/lookup", params={"repo_id": "google/siglip-base"})
assert r.status_code == 200
data = r.json()
assert data["compatible"] is True
assert data["role"] == "vision"
assert data["service"] == "cf-vision"
def test_lookup_audio_classification_tag_returns_cf_voice_service(client):
"""audio-classification tag yields compatible=True, service=cf-voice."""
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.json.return_value = _make_hf_response("org/audio-model", "audio-classification")
with patch("app.models.httpx.get", return_value=mock_resp):
r = client.get("/api/models/lookup", params={"repo_id": "org/audio-model"})
assert r.status_code == 200
data = r.json()
assert data["compatible"] is True
assert data["role"] == "classifier"
assert data["service"] == "cf-voice"
def test_lookup_embedding_tag_returns_compatible_with_cf_core_service(client):
"""feature-extraction tag yields compatible=True, service=cf-core."""
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.json.return_value = _make_hf_response("BAAI/bge-small-en", "feature-extraction")
with patch("app.models.httpx.get", return_value=mock_resp):
r = client.get("/api/models/lookup", params={"repo_id": "BAAI/bge-small-en"})
assert r.status_code == 200
data = r.json()
assert data["compatible"] is True
assert data["role"] == "embedding"
assert data["service"] == "cf-core"
def test_lookup_already_queued_flag(client): def test_lookup_already_queued_flag(client):
@ -181,6 +252,26 @@ def test_queue_add_returns_entry_fields(client):
assert entry["adapter_recommendation"] == "ZeroShotAdapter" assert entry["adapter_recommendation"] == "ZeroShotAdapter"
def test_queue_preserves_role_and_service(client):
"""POST /queue with role/service fields round-trips them through GET /queue."""
r = client.post("/api/models/queue", json={
"repo_id": "openai/whisper-base",
"pipeline_tag": "automatic-speech-recognition",
"adapter_recommendation": None,
"role": "stt",
"service": "cf-stt",
})
assert r.status_code == 201
entry = r.json()
assert entry["role"] == "stt"
assert entry["service"] == "cf-stt"
r2 = client.get("/api/models/queue")
items = r2.json()
assert items[0]["role"] == "stt"
assert items[0]["service"] == "cf-stt"
# ── POST /queue — 409 duplicate ──────────────────────────────────────────────── # ── POST /queue — 409 duplicate ────────────────────────────────────────────────
def test_queue_duplicate_returns_409(client): def test_queue_duplicate_returns_409(client):
@ -317,7 +408,12 @@ def test_installed_detects_downloaded_model(client, tmp_path):
model_dir.mkdir() model_dir.mkdir()
(model_dir / "config.json").write_text(json.dumps({"model_type": "bert"}), encoding="utf-8") (model_dir / "config.json").write_text(json.dumps({"model_type": "bert"}), encoding="utf-8")
(model_dir / "model_info.json").write_text( (model_dir / "model_info.json").write_text(
json.dumps({"repo_id": "org/mymodel", "adapter_recommendation": "ZeroShotAdapter"}), json.dumps({
"repo_id": "org/mymodel",
"adapter_recommendation": "ZeroShotAdapter",
"role": "classifier",
"service": "avocet",
}),
encoding="utf-8", encoding="utf-8",
) )
@ -329,6 +425,51 @@ def test_installed_detects_downloaded_model(client, tmp_path):
assert items[0]["name"] == "org--mymodel" assert items[0]["name"] == "org--mymodel"
assert items[0]["adapter"] == "ZeroShotAdapter" assert items[0]["adapter"] == "ZeroShotAdapter"
assert items[0]["model_id"] == "org/mymodel" assert items[0]["model_id"] == "org/mymodel"
assert items[0]["role"] == "classifier"
assert items[0]["service"] == "avocet"
def test_installed_stt_model_surfaces_role_and_service(client):
"""A downloaded STT model's role/service are returned by GET /installed."""
from app import models as models_module
model_dir = models_module._MODELS_DIR / "openai--whisper-base"
model_dir.mkdir()
(model_dir / "config.json").write_text(json.dumps({"model_type": "whisper"}), encoding="utf-8")
(model_dir / "model_info.json").write_text(
json.dumps({
"repo_id": "openai/whisper-base",
"adapter_recommendation": None,
"role": "stt",
"service": "cf-stt",
}),
encoding="utf-8",
)
r = client.get("/api/models/installed")
assert r.status_code == 200
items = r.json()
assert items[0]["role"] == "stt"
assert items[0]["service"] == "cf-stt"
assert items[0]["adapter"] is None
def test_installed_finetuned_model_defaults_to_avocet_service(client):
"""Fine-tuned models with no role/service in training_info default to avocet/classifier."""
from app import models as models_module
model_dir = models_module._MODELS_DIR / "my-finetuned-v2"
model_dir.mkdir()
(model_dir / "training_info.json").write_text(
json.dumps({"base_model": "microsoft/deberta-v3-base", "epochs": 3}),
encoding="utf-8",
)
r = client.get("/api/models/installed")
assert r.status_code == 200
items = r.json()
assert items[0]["role"] == "classifier"
assert items[0]["service"] == "avocet"
def test_installed_detects_finetuned_model(client): def test_installed_detects_finetuned_model(client):

4
web/.gitignore vendored
View file

@ -22,3 +22,7 @@ dist-ssr
*.njsproj *.njsproj
*.sln *.sln
*.sw? *.sw?
# Local environment overrides
.env

View file

@ -42,6 +42,12 @@
<span v-if="lookupResult.pipeline_tag" class="chip chip-pipeline"> <span v-if="lookupResult.pipeline_tag" class="chip chip-pipeline">
{{ lookupResult.pipeline_tag }} {{ lookupResult.pipeline_tag }}
</span> </span>
<span v-if="lookupResult.role" class="chip chip-role">
{{ lookupResult.role }}
</span>
<span v-if="lookupResult.service" class="chip" :class="serviceChipClass(lookupResult.service)">
{{ lookupResult.service }}
</span>
<span v-if="lookupResult.adapter_recommendation" class="chip chip-adapter"> <span v-if="lookupResult.adapter_recommendation" class="chip chip-adapter">
{{ lookupResult.adapter_recommendation }} {{ lookupResult.adapter_recommendation }}
</span> </span>
@ -61,11 +67,10 @@
<button <button
class="btn-primary btn-add-queue" class="btn-primary btn-add-queue"
:class="{ 'btn-add-queue-warn': !lookupResult.compatible }"
:disabled="lookupResult.already_installed || lookupResult.already_queued || addingToQueue" :disabled="lookupResult.already_installed || lookupResult.already_queued || addingToQueue"
@click="addToQueue" @click="addToQueue"
> >
{{ addingToQueue ? 'Adding…' : lookupResult.compatible ? 'Add to queue' : 'Add anyway' }} {{ addingToQueue ? 'Adding…' : 'Add to queue' }}
</button> </button>
</div> </div>
</section> </section>
@ -91,6 +96,8 @@
</div> </div>
<div class="model-meta"> <div class="model-meta">
<span v-if="model.pipeline_tag" class="chip chip-pipeline">{{ model.pipeline_tag }}</span> <span v-if="model.pipeline_tag" class="chip chip-pipeline">{{ model.pipeline_tag }}</span>
<span v-if="model.role" class="chip chip-role">{{ model.role }}</span>
<span v-if="model.service" class="chip" :class="serviceChipClass(model.service)">{{ model.service }}</span>
<span v-if="model.adapter_recommendation" class="chip chip-adapter">{{ model.adapter_recommendation }}</span> <span v-if="model.adapter_recommendation" class="chip chip-adapter">{{ model.adapter_recommendation }}</span>
</div> </div>
<div class="model-card-actions"> <div class="model-card-actions">
@ -116,6 +123,8 @@
</div> </div>
<div class="model-meta"> <div class="model-meta">
<span v-if="model.pipeline_tag" class="chip chip-pipeline">{{ model.pipeline_tag }}</span> <span v-if="model.pipeline_tag" class="chip chip-pipeline">{{ model.pipeline_tag }}</span>
<span v-if="model.role" class="chip chip-role">{{ model.role }}</span>
<span v-if="model.service" class="chip" :class="serviceChipClass(model.service)">{{ model.service }}</span>
</div> </div>
<div v-if="downloadErrors[model.id]" class="download-error" role="alert"> <div v-if="downloadErrors[model.id]" class="download-error" role="alert">
@ -124,14 +133,19 @@
<div v-else class="progress-wrap" :aria-label="`Download progress for ${model.repo_id}`"> <div v-else class="progress-wrap" :aria-label="`Download progress for ${model.repo_id}`">
<div <div
class="progress-bar" class="progress-bar"
:style="{ width: `${downloadProgress[model.id] ?? 0}%` }" :style="{ width: `${downloadProgress[model.repo_id]?.pct ?? 0}%` }"
role="progressbar" role="progressbar"
:aria-valuenow="downloadProgress[model.id] ?? 0" :aria-valuenow="downloadProgress[model.repo_id]?.pct ?? 0"
aria-valuemin="0" aria-valuemin="0"
aria-valuemax="100" aria-valuemax="100"
/> />
<span class="progress-label"> <span class="progress-label">
{{ downloadProgress[model.id] == null ? 'Preparing…' : `${downloadProgress[model.id]}%` }} {{
!downloadProgress[model.repo_id] ? 'Preparing…'
: downloadProgress[model.repo_id].pct != null ? `${Math.round(downloadProgress[model.repo_id].pct!)}%`
: downloadProgress[model.repo_id].bytes > 0 ? `${(downloadProgress[model.repo_id].bytes / 1024 / 1024).toFixed(0)} MB downloaded…`
: 'Preparing…'
}}
</span> </span>
</div> </div>
</div> </div>
@ -145,20 +159,33 @@
No models installed yet. No models installed yet.
</div> </div>
<div v-else class="installed-table-wrap"> <template v-else>
<div
v-for="group in installedByService"
:key="group.service"
class="installed-group"
>
<div class="installed-group-header">
<span class="chip" :class="serviceChipClass(group.service)">
{{ serviceLabel(group.service) }}
</span>
<span class="installed-group-count">{{ group.models.length }} model{{ group.models.length !== 1 ? 's' : '' }}</span>
</div>
<div class="installed-table-wrap">
<table class="installed-table"> <table class="installed-table">
<thead> <thead>
<tr> <tr>
<th>Name</th> <th>Name</th>
<th>Type</th> <th>Type</th>
<th>Adapter</th> <th>Role</th>
<th>Size</th> <th>Size</th>
<th></th> <th></th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
<tr v-for="model in installedModels" :key="model.name"> <tr v-for="model in group.models" :key="model.name">
<td class="td-name">{{ model.name }}</td> <td class="td-name">{{ model.model_id ?? model.name }}</td>
<td> <td>
<span <span
class="badge" class="badge"
@ -167,9 +194,42 @@
{{ model.type }} {{ model.type }}
</span> </span>
</td> </td>
<td>{{ model.adapter ?? '—' }}</td>
<td>{{ humanBytes(model.size) }}</td>
<td> <td>
<span v-if="model.role" class="chip chip-role chip-sm">{{ model.role }}</span>
<span v-else></span>
</td>
<td>{{ humanBytes(model.size_bytes) }}</td>
<td class="td-actions">
<div v-if="!model.service" class="classify-row">
<select
class="classify-select"
:value="classifyDraft[model.name]?.service ?? ''"
@change="onServiceChange(model.name, ($event.target as HTMLSelectElement).value)"
aria-label="Assign service"
>
<option value="" disabled>Service</option>
<option v-for="svc in CLASSIFIABLE_SERVICES" :key="svc.value" :value="svc.value">{{ svc.label }}</option>
</select>
<select
class="classify-select"
:value="classifyDraft[model.name]?.role ?? ''"
:disabled="!classifyDraft[model.name]?.service"
@change="(e) => setClassifyRole(model.name, (e.target as HTMLSelectElement).value)"
aria-label="Assign role"
>
<option value="" disabled>Role</option>
<option
v-for="role in rolesForService(classifyDraft[model.name]?.service ?? '')"
:key="role"
:value="role"
>{{ role }}</option>
</select>
<button
class="btn-primary btn-sm"
:disabled="!classifyDraft[model.name]?.service || !classifyDraft[model.name]?.role"
@click="saveClassify(model.name)"
>Save</button>
</div>
<button <button
class="btn-danger btn-sm" class="btn-danger btn-sm"
@click="deleteInstalled(model.name)" @click="deleteInstalled(model.name)"
@ -181,6 +241,8 @@
</tbody> </tbody>
</table> </table>
</div> </div>
</div>
</template>
</section> </section>
</div> </div>
</template> </template>
@ -194,6 +256,8 @@ interface LookupResult {
repo_id: string repo_id: string
pipeline_tag: string | null pipeline_tag: string | null
adapter_recommendation: string | null adapter_recommendation: string | null
role: string | null
service: string | null
compatible: boolean compatible: boolean
warning: string | null warning: string | null
size: number | null size: number | null
@ -208,20 +272,27 @@ interface QueuedModel {
status: 'pending' | 'downloading' | 'done' | 'error' status: 'pending' | 'downloading' | 'done' | 'error'
pipeline_tag: string | null pipeline_tag: string | null
adapter_recommendation: string | null adapter_recommendation: string | null
role: string | null
service: string | null
} }
interface InstalledModel { interface InstalledModel {
name: string name: string
type: 'finetuned' | 'downloaded' type: 'finetuned' | 'downloaded'
adapter: string | null adapter: string | null
size: number role: string | null
service: string | null
size_bytes: number
model_id: string | null
} }
interface SseProgressEvent { interface SseProgressEvent {
model_id: string type: 'progress' | 'done' | 'error' | 'idle'
pct: number | null repo_id?: string
status: 'progress' | 'done' | 'error' pct?: number
message?: string downloaded_bytes?: number
total_bytes?: number
error?: string
} }
// State // State
@ -235,7 +306,8 @@ const addingToQueue = ref(false)
const queuedModels = ref<QueuedModel[]>([]) const queuedModels = ref<QueuedModel[]>([])
const installedModels = ref<InstalledModel[]>([]) const installedModels = ref<InstalledModel[]>([])
const downloadProgress = ref<Record<string, number>>({}) const downloadProgress = ref<Record<string, { pct: number | null; bytes: number }>>({})
const classifyDraft = ref<Record<string, { service: string; role: string }>>({})
const downloadErrors = ref<Record<string, string>>({}) const downloadErrors = ref<Record<string, string>>({})
let pollInterval: ReturnType<typeof setInterval> | null = null let pollInterval: ReturnType<typeof setInterval> | null = null
@ -251,8 +323,69 @@ const downloadingModels = computed(() =>
queuedModels.value.filter(m => m.status === 'downloading') queuedModels.value.filter(m => m.status === 'downloading')
) )
const SERVICE_ORDER = ['avocet', 'cf-text', 'cf-stt', 'cf-tts', 'cf-vision', 'cf-image', 'cf-core', 'cf-voice', 'other']
const CLASSIFIABLE_SERVICES = [
{ value: 'avocet', label: 'Avocet — Email Classifiers' },
{ value: 'cf-text', label: 'cf-text — Language Models' },
{ value: 'cf-stt', label: 'cf-stt — Speech Recognition' },
{ value: 'cf-tts', label: 'cf-tts — Text to Speech' },
{ value: 'cf-vision', label: 'cf-vision — Vision / VLM' },
{ value: 'cf-image', label: 'cf-image — Image Generation' },
{ value: 'cf-core', label: 'cf-core — Embeddings' },
{ value: 'cf-voice', label: 'cf-voice — Audio Classification' },
]
const SERVICE_ROLES: Record<string, string[]> = {
'avocet': ['classifier', 'reranker'],
'cf-text': ['generator'],
'cf-stt': ['stt', 'alm'],
'cf-tts': ['tts'],
'cf-vision': ['vision', 'vlm', 'embedding'],
'cf-image': ['image-gen'],
'cf-core': ['embedding'],
'cf-voice': ['classifier'],
}
function rolesForService(service: string): string[] {
return SERVICE_ROLES[service] ?? []
}
const installedByService = computed(() => {
const grouped: Record<string, InstalledModel[]> = {}
for (const model of installedModels.value) {
const key = model.service ?? 'other'
if (!grouped[key]) grouped[key] = []
grouped[key].push(model)
}
// Return ordered sections: known services first, then anything else
const keys = [...SERVICE_ORDER.filter(s => grouped[s]), ...Object.keys(grouped).filter(k => !SERVICE_ORDER.includes(k))]
return keys.map(key => ({ service: key, models: grouped[key] }))
})
// Helpers // Helpers
const SERVICE_LABELS: Record<string, string> = {
'avocet': 'Avocet — Email Classifiers',
'cf-text': 'cf-text — Language Models',
'cf-stt': 'cf-stt — Speech Recognition',
'cf-tts': 'cf-tts — Text to Speech',
'cf-vision': 'cf-vision — Vision / VLM',
'cf-image': 'cf-image — Image Generation',
'cf-core': 'cf-core — Embeddings',
'cf-voice': 'cf-voice — Audio Classification',
'other': 'Other — Unclassified',
}
function serviceLabel(service: string): string {
return SERVICE_LABELS[service] ?? service
}
function serviceChipClass(service: string | null): string {
if (!service) return 'chip-service-other'
return `chip-service-${service.replace(/[^a-z0-9]/g, '-')}`
}
function humanBytes(bytes: number | null): string { function humanBytes(bytes: number | null): string {
if (bytes == null) return '—' if (bytes == null) return '—'
const units = ['B', 'KB', 'MB', 'GB', 'TB'] const units = ['B', 'KB', 'MB', 'GB', 'TB']
@ -305,10 +438,11 @@ async function addToQueue() {
if (!lookupResult.value) return if (!lookupResult.value) return
addingToQueue.value = true addingToQueue.value = true
try { try {
const { repo_id, pipeline_tag, adapter_recommendation, role, service } = lookupResult.value
const res = await fetch('/api/models/queue', { const res = await fetch('/api/models/queue', {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ repo_id: lookupResult.value.repo_id }), body: JSON.stringify({ repo_id, pipeline_tag, adapter_recommendation, role, service }),
}) })
if (res.ok) { if (res.ok) {
lookupResult.value = { ...lookupResult.value, already_queued: true } lookupResult.value = { ...lookupResult.value, already_queued: true }
@ -339,12 +473,50 @@ async function dismissModel(id: string) {
} catch { /* ignore */ } } catch { /* ignore */ }
} }
function onServiceChange(name: string, service: string) {
const roles = SERVICE_ROLES[service] ?? []
classifyDraft.value = {
...classifyDraft.value,
[name]: { service, role: roles.length === 1 ? roles[0] : '' },
}
}
function setClassifyRole(name: string, role: string) {
classifyDraft.value = {
...classifyDraft.value,
[name]: { ...classifyDraft.value[name], role },
}
}
async function saveClassify(name: string) {
const draft = classifyDraft.value[name]
if (!draft?.service || !draft?.role) return
try {
const res = await fetch(`/api/models/installed/${encodeURIComponent(name)}`, {
method: 'PATCH',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ service: draft.service, role: draft.role }),
})
if (res.ok) {
// Update in-place so the model moves to the correct service group
installedModels.value = installedModels.value.map(m =>
m.name === name ? { ...m, service: draft.service, role: draft.role } : m
)
const updated = { ...classifyDraft.value }
delete updated[name]
classifyDraft.value = updated
await loadQueue()
}
} catch { /* non-fatal */ }
}
async function deleteInstalled(name: string) { async function deleteInstalled(name: string) {
if (!window.confirm(`Delete installed model "${name}"? This cannot be undone.`)) return if (!window.confirm(`Delete installed model "${name}"? This cannot be undone.`)) return
try { try {
const res = await fetch(`/api/models/installed/${encodeURIComponent(name)}`, { method: 'DELETE' }) const res = await fetch(`/api/models/installed/${encodeURIComponent(name)}`, { method: 'DELETE' })
if (res.ok) { if (res.ok) {
installedModels.value = installedModels.value.filter(m => m.name !== name) installedModels.value = installedModels.value.filter(m => m.name !== name)
await loadQueue()
} }
} catch { /* ignore */ } } catch { /* ignore */ }
} }
@ -378,21 +550,28 @@ function startSse() {
return return
} }
const { model_id, pct, status, message } = event const { type, repo_id, pct, downloaded_bytes, error } = event
if (!repo_id) return
if (status === 'progress' && pct != null) { if (type === 'progress') {
downloadProgress.value = { ...downloadProgress.value, [model_id]: pct } const bytes = downloaded_bytes ?? 0
} else if (status === 'done') { // pct stays null when total_bytes is unknown so we can show "X MB" instead
const progress = (pct != null && pct > 0) ? pct : (bytes > 0 ? null : undefined)
downloadProgress.value = { ...downloadProgress.value, [repo_id]: { pct: progress ?? null, bytes } }
} else if (type === 'done') {
const updated = { ...downloadProgress.value } const updated = { ...downloadProgress.value }
delete updated[model_id] delete updated[repo_id]
downloadProgress.value = updated downloadProgress.value = updated
queuedModels.value = queuedModels.value.filter(m => m.id !== model_id) queuedModels.value = queuedModels.value.filter(m => m.repo_id !== repo_id)
loadInstalled() loadInstalled()
} else if (status === 'error') { } else if (type === 'error') {
const entry = queuedModels.value.find(m => m.repo_id === repo_id)
if (entry) {
downloadErrors.value = { downloadErrors.value = {
...downloadErrors.value, ...downloadErrors.value,
[model_id]: message ?? 'Download failed.', [entry.id]: error ?? 'Download failed.',
}
} }
} }
}) })
@ -595,12 +774,6 @@ onUnmounted(() => {
align-self: flex-start; align-self: flex-start;
} }
.btn-add-queue-warn {
background: var(--color-surface-raised, #e4ebf5);
color: var(--color-text-secondary, #6b7a99);
border: 1px solid var(--color-border, #d0d7e8);
}
/* ── Model cards (queue + downloads) ── */ /* ── Model cards (queue + downloads) ── */
.model-card { .model-card {
border: 1px solid var(--color-border, #a8b8d0); border: 1px solid var(--color-border, #a8b8d0);
@ -715,6 +888,35 @@ onUnmounted(() => {
word-break: break-all; word-break: break-all;
} }
.td-actions {
display: flex;
flex-direction: column;
gap: 0.4rem;
align-items: flex-start;
}
.classify-row {
display: flex;
gap: 0.35rem;
align-items: center;
flex-wrap: wrap;
}
.classify-select {
font-size: 0.78rem;
padding: 0.2rem 0.4rem;
border-radius: 4px;
border: 1px solid var(--color-border, #444);
background: var(--color-surface, #1e1e2e);
color: var(--color-text, #cdd6f4);
cursor: pointer;
}
.classify-select:disabled {
opacity: 0.4;
cursor: not-allowed;
}
/* ── Badges ── */ /* ── Badges ── */
.badge-group { .badge-group {
display: flex; display: flex;
@ -777,6 +979,76 @@ onUnmounted(() => {
background: color-mix(in srgb, var(--color-accent, #c4732a) 12%, var(--color-surface-alt, #dde4f0)); background: color-mix(in srgb, var(--color-accent, #c4732a) 12%, var(--color-surface-alt, #dde4f0));
} }
.chip-role {
color: var(--color-info, #1e6091);
background: color-mix(in srgb, var(--color-info, #1e6091) 12%, var(--color-surface-alt, #dde4f0));
}
.chip-sm {
font-size: 0.68rem;
padding: 0.1rem 0.4rem;
}
/* Service chips — one colour per CF service */
.chip-service-avocet {
color: var(--color-primary, #2d5a27);
background: color-mix(in srgb, var(--color-primary, #2d5a27) 15%, var(--color-surface-alt, #dde4f0));
}
.chip-service-cf-text {
color: #c2410c;
background: color-mix(in srgb, #c2410c 12%, var(--color-surface-alt, #dde4f0));
}
.chip-service-cf-stt {
color: #5e35b1;
background: color-mix(in srgb, #5e35b1 12%, var(--color-surface-alt, #dde4f0));
}
.chip-service-cf-tts {
color: #0277bd;
background: color-mix(in srgb, #0277bd 12%, var(--color-surface-alt, #dde4f0));
}
.chip-service-cf-vision {
color: #00695c;
background: color-mix(in srgb, #00695c 12%, var(--color-surface-alt, #dde4f0));
}
.chip-service-cf-core {
color: #6d4c41;
background: color-mix(in srgb, #6d4c41 12%, var(--color-surface-alt, #dde4f0));
}
.chip-service-cf-voice {
color: #ad1457;
background: color-mix(in srgb, #ad1457 12%, var(--color-surface-alt, #dde4f0));
}
.chip-service-other {
color: var(--color-text-muted, #4a5c7a);
background: var(--color-surface-alt, #dde4f0);
}
/* ── Installed group ── */
.installed-group {
display: flex;
flex-direction: column;
gap: 0.5rem;
}
.installed-group-header {
display: flex;
align-items: center;
gap: 0.5rem;
padding: 0.25rem 0;
}
.installed-group-count {
font-size: 0.78rem;
color: var(--color-text-muted, #4a5c7a);
}
/* ── Buttons ── */ /* ── Buttons ── */
.btn-primary, .btn-danger { .btn-primary, .btn-danger {
padding: 0.4rem 0.9rem; padding: 0.4rem 0.9rem;
@ -852,7 +1124,7 @@ onUnmounted(() => {
.installed-table th:nth-child(3), .installed-table th:nth-child(3),
.installed-table td:nth-child(3) { .installed-table td:nth-child(3) {
display: none; /* hide Adapter column on very narrow screens */ display: none; /* hide Role column on very narrow screens */
} }
} }
</style> </style>