feat(models): extended model registry + manage.sh benchmark subcommands

- app/models.py: add StyleModel and VoiceModel entries; expand cf-text and
  benchmark model metadata (vram_mb, description, tags)
- tests/test_models.py: coverage for new model types and registry helpers
- ModelsView.vue: updated model browser with style/voice filter tabs
- manage.sh: add benchmark-style and benchmark-voice subcommands
- config/label_tool.yaml.example: add style + voice benchmark config stubs
- web/.gitignore: add node_modules and dist entries
This commit is contained in:
pyr0ball 2026-04-24 14:56:24 -07:00
parent ddb56efb89
commit ea3da701c6
6 changed files with 1150 additions and 152 deletions

View file

@ -14,11 +14,12 @@ from __future__ import annotations
import json
import logging
import os
import shutil
import threading
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from typing import Any, TypedDict
from uuid import uuid4
import httpx
@ -39,21 +40,67 @@ _ROOT = Path(__file__).parent.parent
_MODELS_DIR: Path = _ROOT / "models"
_QUEUE_DIR: Path = _ROOT / "data"
# Service-specific model destinations.
# cf-text models land on the NFS-mounted shared asset store so every cluster
# node can reach them without a separate download. Avocet classifiers stay local
# because they are fine-tuned in-place and are only consumed by avocet itself.
# Override via CF_TEXT_MODELS_DIR env var (useful for dev / non-NFS setups).
_CF_TEXT_MODELS_DIR: Path = Path(
os.environ.get("CF_TEXT_MODELS_DIR", "/Library/Assets/LLM/cf-text/models")
)
# Directory containing per-node YAML profiles for cf-orch.
# Auto-registration writes new catalog entries here on model download.
_CF_ORCH_PROFILES_DIR: Path = Path(
os.environ.get(
"CF_ORCH_PROFILES_DIR",
"/Library/Development/CircuitForge/circuitforge-orch/circuitforge_orch/profiles/nodes",
)
)
router = APIRouter()
# ── Download progress shared state ────────────────────────────────────────────
# Updated by the background download thread; read by GET /download/stream.
_download_progress: dict[str, Any] = {}
# ── HF pipeline_tag → adapter recommendation ──────────────────────────────────
_TAG_TO_ADAPTER: dict[str, str] = {
"zero-shot-classification": "ZeroShotAdapter",
"text-classification": "ZeroShotAdapter",
"natural-language-inference": "ZeroShotAdapter",
"sentence-similarity": "RerankerAdapter",
"text-ranking": "RerankerAdapter",
"text-generation": "GenerationAdapter",
"text2text-generation": "GenerationAdapter",
# ── HF pipeline_tag → CF service info ────────────────────────────────────────
class _TagInfo(TypedDict):
adapter: str | None # Avocet adapter class, or None if handled by another service
role: str # Human-readable model role (classifier, stt, tts, vision, …)
service: str # CF service that consumes this model type
_TAG_TO_INFO: dict[str, _TagInfo] = {
# Avocet email classifiers
"zero-shot-classification": {"adapter": "ZeroShotAdapter", "role": "classifier", "service": "avocet"},
"text-classification": {"adapter": "ZeroShotAdapter", "role": "classifier", "service": "avocet"},
"natural-language-inference": {"adapter": "ZeroShotAdapter", "role": "classifier", "service": "avocet"},
"sentence-similarity": {"adapter": "RerankerAdapter", "role": "reranker", "service": "avocet"},
"text-ranking": {"adapter": "RerankerAdapter", "role": "reranker", "service": "avocet"},
"text-generation": {"adapter": "GenerationAdapter", "role": "generator", "service": "cf-text"},
"text2text-generation": {"adapter": "GenerationAdapter", "role": "generator", "service": "cf-text"},
"summarization": {"adapter": "GenerationAdapter", "role": "generator", "service": "cf-text"},
# STT — cf-stt speech recognition service
"automatic-speech-recognition": {"adapter": None, "role": "stt", "service": "cf-stt"},
# Audio language models — audio + text → text (understanding, QA, captioning)
"audio-text-to-text": {"adapter": None, "role": "alm", "service": "cf-stt"},
# Audio classification — cf-voice sidecar context stream
"audio-classification": {"adapter": None, "role": "classifier", "service": "cf-voice"},
# TTS — cf-tts text-to-speech service
"text-to-speech": {"adapter": None, "role": "tts", "service": "cf-tts"},
# Vision — cf-vision image classification / embedding / VLM service
"image-classification": {"adapter": None, "role": "vision", "service": "cf-vision"},
"zero-shot-image-classification": {"adapter": None, "role": "vision", "service": "cf-vision"},
"image-feature-extraction": {"adapter": None, "role": "embedding", "service": "cf-vision"},
"image-text-to-text": {"adapter": None, "role": "vlm", "service": "cf-vision"},
"visual-question-answering": {"adapter": None, "role": "vlm", "service": "cf-vision"},
# Image generation — cf-image (text → image; distinct from cf-vision image understanding)
"text-to-image": {"adapter": None, "role": "image-gen", "service": "cf-image"},
# Embedding — cf-core shared embedding layer
"feature-extraction": {"adapter": None, "role": "embedding", "service": "cf-core"},
}
@ -84,14 +131,31 @@ def _write_queue(records: list[dict]) -> None:
def _safe_model_name(repo_id: str) -> str:
"""Convert repo_id to a filesystem-safe directory name (HF convention)."""
"""Convert repo_id to a filesystem-safe directory name.
Uses the HuggingFace Hub convention: owner/model-name owner--model-name.
This matches what snapshot_download produces under local_dir and what
cf-orch uses when constructing model paths for cf-text allocations.
"""
return repo_id.replace("/", "--")
def _is_installed(repo_id: str) -> bool:
"""Check if a model is already downloaded in _MODELS_DIR."""
def _model_dir_for(repo_id: str, service: str | None) -> Path:
"""Return the download destination directory for a model.
cf-text models NFS shared asset store (_CF_TEXT_MODELS_DIR) so every
cluster node can load them without a separate download.
All other services (avocet classifiers, fine-tunes) local _MODELS_DIR.
"""
safe_name = _safe_model_name(repo_id)
model_dir = _MODELS_DIR / safe_name
if service == "cf-text":
return _CF_TEXT_MODELS_DIR / safe_name
return _MODELS_DIR / safe_name
def _is_installed(repo_id: str, service: str | None = None) -> bool:
"""Check if a model is already downloaded in the appropriate destination."""
model_dir = _model_dir_for(repo_id, service)
return model_dir.exists() and (
(model_dir / "config.json").exists()
or (model_dir / "training_info.json").exists()
@ -125,48 +189,289 @@ def _get_queue_entry(entry_id: str) -> dict | None:
return None
# ── cf-orch catalog auto-registration ─────────────────────────────────────────
def _catalog_key(repo_id: str) -> str:
"""Derive a readable catalog key from repo_id.
ibm-granite/granite-4.1-8b granite-4.1-8b
facebook/bart-large-cnn bart-large-cnn
"""
return repo_id.split("/", 1)[-1].lower()
def _insert_catalog_entry(content: str, entry_lines: str) -> str:
"""Insert entry_lines at the end of the cf-text.catalog section.
Scans line by line to preserve all comments and original formatting.
Returns content unchanged if the catalog section cannot be located.
"""
lines = content.splitlines(keepends=True)
in_cf_text = False
in_catalog = False
for i, line in enumerate(lines):
stripped = line.lstrip()
indent = len(line) - len(stripped)
blank_or_comment = not stripped or stripped.startswith("#")
if not in_cf_text:
if indent == 2 and stripped.startswith("cf-text:"):
in_cf_text = True
continue
if not in_catalog:
if indent == 4 and stripped.startswith("catalog:"):
in_catalog = True
elif not blank_or_comment and indent <= 2:
# Left cf-text section without finding a catalog
return content
continue
# Inside catalog: first non-blank/comment line with indent < 6 ends it
if not blank_or_comment and indent < 6:
prefix = "\n" if lines[i - 1].strip() else ""
lines.insert(i, prefix + entry_lines)
return "".join(lines)
# Catalog ran to EOF — append there
if in_catalog:
prefix = "\n" if lines and lines[-1].strip() else ""
lines.append(prefix + entry_lines)
return "".join(lines)
return content
def _register_in_node_catalogs(
repo_id: str,
local_path: Path,
vram_mb_fp16: int,
role: str,
) -> list[str]:
"""Insert a cf-text catalog entry into every eligible node YAML.
A node is eligible when:
- It has a ``cf-text.catalog`` section
- The model fits within the node's ``cf-text.max_mb`` at FP16 *or* 4-bit
- Neither the model key nor the local path is already in the catalog
Returns the list of node names that were updated.
"""
try:
import yaml # lazy — not in the critical import path
except ImportError:
logger.warning("PyYAML not available — skipping catalog registration for %s", repo_id)
return []
profiles_dir = _CF_ORCH_PROFILES_DIR
if not profiles_dir.exists():
logger.warning(
"cf-orch profiles dir not found: %s — skipping catalog registration", profiles_dir
)
return []
model_key = _catalog_key(repo_id)
local_path_str = str(local_path)
vram_4bit = round(vram_mb_fp16 / 4 * 1.1)
updated: list[str] = []
for yaml_file in sorted(profiles_dir.glob("*.yaml")):
try:
content = yaml_file.read_text(encoding="utf-8")
data = yaml.safe_load(content)
cf_text = (data.get("services") or {}).get("cf-text")
if not cf_text:
continue
max_mb: int = cf_text.get("max_mb", 0)
catalog: dict = cf_text.get("catalog") or {}
# Skip if key already exists
if model_key in catalog:
logger.debug("Key %r already in %s — skipping", model_key, yaml_file.name)
continue
# Skip if any existing entry already points at this path (or a file within it)
registered_paths = {
str(entry.get("path", ""))
for entry in catalog.values()
if isinstance(entry, dict)
}
if local_path_str in registered_paths or any(
p.startswith(local_path_str + "/") for p in registered_paths
):
logger.debug("Path %s already registered in %s — skipping", local_path_str, yaml_file.name)
continue
# Determine whether model fits at FP16 or needs 4-bit
if vram_mb_fp16 <= max_mb:
vram_for_node = vram_mb_fp16
needs_4bit = False
elif vram_4bit <= max_mb:
vram_for_node = vram_4bit
needs_4bit = True
else:
logger.debug(
"%s too large for %s (fp16=%d MB, 4bit=%d MB, max=%d MB)",
repo_id, yaml_file.name, vram_mb_fp16, vram_4bit, max_mb,
)
continue
desc = f"{repo_id} ({role}, downloaded via avocet)"
if needs_4bit:
desc += " — CF_TEXT_4BIT=1 required"
vram_comment = (
f" # 4-bit estimate; FP16 footprint is {vram_mb_fp16} MB"
if needs_4bit
else f" # FP16 file-size estimate"
)
entry_block = (
f" # auto-registered by avocet on download\n"
f" {model_key}:\n"
f" path: {local_path_str}\n"
f" vram_mb: {vram_for_node}{vram_comment}\n"
f" description: \"{desc}\"\n"
)
new_content = _insert_catalog_entry(content, entry_block)
if new_content == content:
logger.warning("Could not find catalog insertion point in %s", yaml_file.name)
continue
yaml_file.write_text(new_content, encoding="utf-8")
updated.append(yaml_file.stem)
logger.info(
"Registered %s in %s (vram_mb=%d, 4bit=%s)",
model_key, yaml_file.name, vram_for_node, needs_4bit,
)
except Exception as exc:
logger.warning("Could not update %s: %s", yaml_file.name, exc)
return updated
# ── Background download ────────────────────────────────────────────────────────
def _run_download(entry_id: str, repo_id: str, pipeline_tag: str | None, adapter_recommendation: str | None) -> None:
"""Background thread: download model via huggingface_hub.snapshot_download."""
def _poll_disk_progress(local_dir: Path, total_bytes: int, stop_event: threading.Event) -> None:
"""Side-thread: poll local_dir size every 2s and update _download_progress.
snapshot_download is a blocking call with no progress callback, so we watch
the destination directory grow on disk as a proxy for download progress.
total_bytes=0 means we don't know the target size; pct stays 0 until done.
"""
import time
while not stop_event.is_set():
try:
downloaded = sum(
f.stat().st_size for f in local_dir.rglob("*") if f.is_file()
)
_download_progress["downloaded_bytes"] = downloaded
if total_bytes > 0:
_download_progress["total_bytes"] = total_bytes
_download_progress["pct"] = min(downloaded / total_bytes * 100, 99.0)
except Exception:
pass
time.sleep(2)
def _run_download(
entry_id: str,
repo_id: str,
pipeline_tag: str | None,
adapter_recommendation: str | None,
role: str | None = None,
service: str | None = None,
model_size_bytes: int = 0,
) -> None:
"""Background thread: download model via huggingface_hub.snapshot_download.
model_size_bytes is the sum of file sizes reported by the HF API (siblings).
It is used to estimate vram_mb and written to model_info.json so cf-orch can
budget VRAM when allocating a cf-text instance for this model.
"""
global _download_progress
safe_name = _safe_model_name(repo_id)
local_dir = _MODELS_DIR / safe_name
local_dir = _model_dir_for(repo_id, service)
_download_progress = {
"active": True,
"repo_id": repo_id,
"downloaded_bytes": 0,
"total_bytes": 0,
"total_bytes": model_size_bytes,
"pct": 0.0,
"done": False,
"error": None,
}
stop_poll = threading.Event()
poll_thread = threading.Thread(
target=_poll_disk_progress,
args=(local_dir, model_size_bytes, stop_poll),
daemon=True,
name=f"model-poll-{entry_id}",
)
try:
if snapshot_download is None:
raise RuntimeError("huggingface_hub is not installed")
local_dir.mkdir(parents=True, exist_ok=True)
poll_thread.start()
snapshot_download(
repo_id=repo_id,
local_dir=str(local_dir),
)
# Write model_info.json alongside downloaded files
# Estimate VRAM from reported file size.
# HF siblings sizes are pre-quantisation file sizes; add 10% for KV cache
# and runtime overhead. Falls back to a stat of the local dir if 0.
if model_size_bytes == 0:
model_size_bytes = sum(
f.stat().st_size for f in local_dir.rglob("*") if f.is_file()
)
vram_mb = int(model_size_bytes / (1024 * 1024) * 1.1)
# Write model_info.json alongside downloaded files.
# local_path + vram_mb are read by cf-orch at allocation time to resolve
# the full model path and grant the correct VRAM lease.
model_info = {
"repo_id": repo_id,
"pipeline_tag": pipeline_tag,
"adapter_recommendation": adapter_recommendation,
"role": role,
"service": service,
"model_size_bytes": model_size_bytes,
"vram_mb": vram_mb,
"local_path": str(local_dir),
"downloaded_at": datetime.now(timezone.utc).isoformat(),
}
local_dir.mkdir(parents=True, exist_ok=True)
(local_dir / "model_info.json").write_text(
json.dumps(model_info, indent=2), encoding="utf-8"
)
# Auto-register cf-text models in the cf-orch node YAML catalogs so they
# appear in the benchmark model list without a manual YAML edit.
if service == "cf-text":
registered_on = _register_in_node_catalogs(
repo_id=repo_id,
local_path=local_dir,
vram_mb_fp16=vram_mb,
role=role or "generator",
)
if registered_on:
logger.info(
"Auto-registered %s in node catalogs: %s",
repo_id, ", ".join(registered_on),
)
_download_progress["done"] = True
_download_progress["pct"] = 100.0
_update_queue_entry(entry_id, {"status": "ready"})
_update_queue_entry(entry_id, {"status": "ready", "local_path": str(local_dir)})
except Exception as exc:
logger.exception("Download failed for %s: %s", repo_id, exc)
@ -174,6 +479,7 @@ def _run_download(entry_id: str, repo_id: str, pipeline_tag: str | None, adapter
_download_progress["done"] = True
_update_queue_entry(entry_id, {"status": "failed", "error": str(exc)})
finally:
stop_poll.set()
_download_progress["active"] = False
@ -199,11 +505,15 @@ def lookup_model(repo_id: str) -> dict:
data = resp.json()
pipeline_tag = data.get("pipeline_tag")
adapter_recommendation = _TAG_TO_ADAPTER.get(pipeline_tag) if pipeline_tag else None
tag_info = _TAG_TO_INFO.get(pipeline_tag) if pipeline_tag else None
adapter_recommendation = tag_info["adapter"] if tag_info else None
role = tag_info["role"] if tag_info else None
service = tag_info["service"] if tag_info else None
# Determine compatibility and surface a human-readable warning
_supported = ", ".join(sorted(_TAG_TO_ADAPTER.keys()))
if adapter_recommendation is not None:
_supported = ", ".join(sorted(_TAG_TO_INFO.keys()))
if tag_info is not None:
# Any recognized tag is compatible — avocet adapters or another CF service
compatible = True
warning: str | None = None
elif pipeline_tag is None:
@ -216,7 +526,7 @@ def lookup_model(repo_id: str) -> dict:
else:
compatible = False
warning = (
f"\"{pipeline_tag}\" models are not supported by Avocet's email classification adapters. "
f"\"{pipeline_tag}\" models are not yet supported by the CircuitForge model ecosystem. "
f"Supported task types: {_supported}."
)
logger.warning("Unsupported pipeline_tag %r for %s", pipeline_tag, repo_id)
@ -234,6 +544,8 @@ def lookup_model(repo_id: str) -> dict:
"repo_id": repo_id,
"pipeline_tag": pipeline_tag,
"adapter_recommendation": adapter_recommendation,
"role": role,
"service": service,
"compatible": compatible,
"warning": warning,
"model_size_bytes": model_size_bytes,
@ -261,12 +573,18 @@ class QueueAddRequest(BaseModel):
repo_id: str
pipeline_tag: str | None = None
adapter_recommendation: str | None = None
role: str | None = None
service: str | None = None
# Sum of file sizes from HF API siblings list; 0 if unknown.
# Stored in the queue entry so approve can pass it to _run_download
# without a second HF API round-trip.
model_size_bytes: int = 0
@router.post("/queue", status_code=201)
def add_to_queue(req: QueueAddRequest) -> dict:
"""Add a model to the approval queue with status 'pending'."""
if _is_installed(req.repo_id):
if _is_installed(req.repo_id, service=req.service):
raise HTTPException(409, f"{req.repo_id!r} is already installed")
if _is_queued(req.repo_id):
raise HTTPException(409, f"{req.repo_id!r} is already in the queue")
@ -276,6 +594,9 @@ def add_to_queue(req: QueueAddRequest) -> dict:
"repo_id": req.repo_id,
"pipeline_tag": req.pipeline_tag,
"adapter_recommendation": req.adapter_recommendation,
"role": req.role,
"service": req.service,
"model_size_bytes": req.model_size_bytes,
"status": "pending",
"queued_at": datetime.now(timezone.utc).isoformat(),
}
@ -300,7 +621,15 @@ def approve_queue_entry(entry_id: str) -> dict:
thread = threading.Thread(
target=_run_download,
args=(entry_id, entry["repo_id"], entry.get("pipeline_tag"), entry.get("adapter_recommendation")),
args=(
entry_id,
entry["repo_id"],
entry.get("pipeline_tag"),
entry.get("adapter_recommendation"),
entry.get("role"),
entry.get("service"),
entry.get("model_size_bytes", 0),
),
daemon=True,
name=f"model-download-{entry_id}",
)
@ -368,81 +697,286 @@ def download_stream() -> StreamingResponse:
)
# ── POST /sync-catalogs ────────────────────────────────────────────────────────
@router.post("/sync-catalogs")
def sync_catalogs() -> dict:
"""Scan all installed cf-text models and register any missing from node YAMLs.
Reads model_info.json from each directory in the cf-text models dir and calls
_register_in_node_catalogs() for each. Idempotent skips models already
present by key or path.
Returns a summary of registrations performed.
"""
if not _CF_TEXT_MODELS_DIR.exists():
return {"registered": {}, "skipped": [], "message": "cf-text models dir not found"}
registered: dict[str, list[str]] = {}
skipped: list[str] = []
for model_dir in sorted(_CF_TEXT_MODELS_DIR.iterdir()):
if not model_dir.is_dir():
continue
info_file = model_dir / "model_info.json"
if not info_file.exists():
skipped.append(model_dir.name)
continue
try:
info = json.loads(info_file.read_text(encoding="utf-8"))
except Exception as exc:
logger.warning("Could not read model_info.json for %s: %s", model_dir.name, exc)
skipped.append(model_dir.name)
continue
if info.get("service") != "cf-text":
skipped.append(model_dir.name)
continue
repo_id = info.get("repo_id", model_dir.name)
vram_mb = info.get("vram_mb", 0)
role = info.get("role", "generator")
updated_nodes = _register_in_node_catalogs(
repo_id=repo_id,
local_path=model_dir,
vram_mb_fp16=vram_mb,
role=role,
)
if updated_nodes:
registered[repo_id] = updated_nodes
else:
skipped.append(repo_id)
return {
"registered": registered,
"skipped": skipped,
"message": (
f"Registered {len(registered)} model(s) on "
f"{sum(len(v) for v in registered.values())} node(s)"
if registered
else "All models already registered (or no eligible nodes found)"
),
}
# ── GET /installed ─────────────────────────────────────────────────────────────
@router.get("/installed")
def list_installed() -> list[dict]:
"""Scan _MODELS_DIR and return info on each installed model."""
if not _MODELS_DIR.exists():
return []
"""Scan all model directories and return info on each installed model.
Scans both the local avocet models dir (classifiers, fine-tunes) and the
shared NFS cf-text models dir, deduplicating by directory path.
Falls back to queue entry data when model_info.json has null service/role,
so models downloaded before the pipeline_tag registry existed still group
correctly in the UI.
"""
scan_dirs = [_MODELS_DIR]
if _CF_TEXT_MODELS_DIR != _MODELS_DIR and _CF_TEXT_MODELS_DIR.exists():
scan_dirs.append(_CF_TEXT_MODELS_DIR)
# Build a lookup from safe directory name → queue entry for fallback enrichment.
queue_by_safe_name: dict[str, dict] = {
_safe_model_name(r["repo_id"]): r
for r in _read_queue()
if r.get("repo_id") and r.get("status") not in ("dismissed",)
}
results: list[dict] = []
for sub in _MODELS_DIR.iterdir():
if not sub.is_dir():
seen: set[Path] = set()
for scan_dir in scan_dirs:
if not scan_dir.exists():
continue
for sub in scan_dir.iterdir():
if not sub.is_dir() or sub in seen:
continue
seen.add(sub)
has_training_info = (sub / "training_info.json").exists()
has_config = (sub / "config.json").exists()
has_model_info = (sub / "model_info.json").exists()
has_training_info = (sub / "training_info.json").exists()
has_config = (sub / "config.json").exists()
has_model_info = (sub / "model_info.json").exists()
if not (has_training_info or has_config or has_model_info):
continue
if not (has_training_info or has_config or has_model_info):
continue
model_type = "finetuned" if has_training_info else "downloaded"
model_type = "finetuned" if has_training_info else "downloaded"
# Compute directory size
size_bytes = sum(f.stat().st_size for f in sub.rglob("*") if f.is_file())
# Compute directory size
size_bytes = sum(f.stat().st_size for f in sub.rglob("*") if f.is_file())
# Load adapter/model_id from model_info.json or training_info.json
adapter: str | None = None
model_id: str | None = None
adapter: str | None = None
model_id: str | None = None
role: str | None = None
service: str | None = None
vram_mb: int | None = None
if has_model_info:
try:
info = json.loads((sub / "model_info.json").read_text(encoding="utf-8"))
adapter = info.get("adapter_recommendation")
model_id = info.get("repo_id")
except Exception:
pass
elif has_training_info:
try:
info = json.loads((sub / "training_info.json").read_text(encoding="utf-8"))
adapter = info.get("adapter")
model_id = info.get("base_model") or info.get("model_id")
except Exception:
pass
if has_model_info:
try:
info = json.loads((sub / "model_info.json").read_text(encoding="utf-8"))
adapter = info.get("adapter_recommendation")
model_id = info.get("repo_id")
role = info.get("role")
service = info.get("service")
vram_mb = info.get("vram_mb")
except Exception:
pass
elif has_training_info:
try:
info = json.loads((sub / "training_info.json").read_text(encoding="utf-8"))
adapter = info.get("adapter")
model_id = info.get("base_model") or info.get("model_id")
role = info.get("role", "classifier")
service = info.get("service", "avocet")
except Exception:
pass
results.append({
"name": sub.name,
"path": str(sub),
"type": model_type,
"adapter": adapter,
"size_bytes": size_bytes,
"model_id": model_id,
})
# Fall back to queue entry when model_info.json has null service/role.
# This covers models downloaded before the pipeline_tag registry existed.
if (role is None or service is None) and sub.name in queue_by_safe_name:
q = queue_by_safe_name[sub.name]
role = role or q.get("role")
service = service or q.get("service")
model_id = model_id or q.get("repo_id")
# Last resort: re-derive from pipeline_tag if we still have no service.
if service is None and model_id:
hf_url = f"https://huggingface.co/api/models/{model_id}"
# Only attempt if we have a pipeline_tag cached somewhere.
for q in queue_by_safe_name.values():
if q.get("repo_id") == model_id and q.get("pipeline_tag"):
tag_info = _TAG_TO_INFO.get(q["pipeline_tag"])
if tag_info:
role = role or tag_info["role"]
service = service or tag_info["service"]
break
results.append({
"name": sub.name,
"path": str(sub),
"type": model_type,
"adapter": adapter,
"role": role,
"service": service,
"size_bytes": size_bytes,
"vram_mb": vram_mb,
"model_id": model_id,
})
return results
# ── PATCH /installed/{name} ────────────────────────────────────────────────────
class InstalledModelPatch(BaseModel):
service: str
role: str
@router.patch("/installed/{name}")
def patch_installed(name: str, body: InstalledModelPatch) -> dict:
"""Manually assign service and role to an installed model.
Writes the updated values back to model_info.json so they survive restarts,
and updates any matching queue entry so the UI shows the correct chip.
"""
if "/" in name or "\\" in name or ".." in name or not name or name.startswith("."):
raise HTTPException(400, f"Invalid model name {name!r}")
candidate_dirs = [_MODELS_DIR]
if _CF_TEXT_MODELS_DIR != _MODELS_DIR:
candidate_dirs.append(_CF_TEXT_MODELS_DIR)
model_path: Path | None = None
for base in candidate_dirs:
candidate = base / name
try:
candidate.resolve().relative_to(base.resolve())
except ValueError:
raise HTTPException(400, f"Path traversal detected for name {name!r}")
if candidate.exists():
model_path = candidate
break
if model_path is None:
raise HTTPException(404, f"Installed model {name!r} not found")
info_path = model_path / "model_info.json"
if info_path.exists():
try:
info = json.loads(info_path.read_text(encoding="utf-8"))
except Exception:
info = {}
else:
info = {}
info["service"] = body.service
info["role"] = body.role
info_path.write_text(json.dumps(info, indent=2), encoding="utf-8")
# Mirror the update into any matching queue entry.
records = _read_queue()
updated = False
for r in records:
local = r.get("local_path", "")
matches = (local and Path(local).name == name) or _safe_model_name(r.get("repo_id", "")) == name
if matches and r.get("status") not in ("dismissed",):
r["service"] = body.service
r["role"] = body.role
updated = True
if updated:
_write_queue(records)
return {"ok": True, "service": body.service, "role": body.role}
# ── DELETE /installed/{name} ───────────────────────────────────────────────────
@router.delete("/installed/{name}")
def delete_installed(name: str) -> dict:
"""Remove an installed model directory by name. Blocks path traversal."""
# Validate: single path component, no slashes or '..'
"""Remove an installed model directory by name. Blocks path traversal.
Searches both the local avocet models dir and the shared cf-text models dir.
Also dismisses any matching queue entry so the UI doesn't show a stale "ready" card.
"""
if "/" in name or "\\" in name or ".." in name or not name or name.startswith("."):
raise HTTPException(400, f"Invalid model name {name!r}: must be a single directory name with no path separators or '..'")
model_path = _MODELS_DIR / name
# Search both model directories
candidate_dirs = [_MODELS_DIR]
if _CF_TEXT_MODELS_DIR != _MODELS_DIR:
candidate_dirs.append(_CF_TEXT_MODELS_DIR)
# Extra safety: confirm resolved path is inside _MODELS_DIR
try:
model_path.resolve().relative_to(_MODELS_DIR.resolve())
except ValueError:
raise HTTPException(400, f"Path traversal detected for name {name!r}")
model_path: Path | None = None
for base in candidate_dirs:
candidate = base / name
try:
candidate.resolve().relative_to(base.resolve())
except ValueError:
raise HTTPException(400, f"Path traversal detected for name {name!r}")
if candidate.exists():
model_path = candidate
break
if not model_path.exists():
raise HTTPException(404, f"Installed model {name!r} not found")
if model_path is None:
raise HTTPException(404, f"Installed model {name!r} not found in any model directory")
shutil.rmtree(model_path)
# Dismiss any queue entries whose local_path matches, or whose repo_id maps to this dir name.
records = _read_queue()
updated = False
for r in records:
local = r.get("local_path", "")
matches_path = local and Path(local).name == name
matches_name = _safe_model_name(r.get("repo_id", "")) == name
if (matches_path or matches_name) and r.get("status") != "dismissed":
r["status"] = "dismissed"
updated = True
if updated:
_write_queue(records)
return {"ok": True}

View file

@ -57,11 +57,32 @@ imitate:
- id: peregrine
name: Peregrine
icon: "🦅"
description: Job search assistant
base_url: http://localhost:8502
sample_endpoint: /api/jobs
text_fields: [title, description]
prompt_template: "Analyze this job listing and identify key requirements:\n\n{text}"
description: Job search assistant — live job listings
base_url: http://localhost:8601
health_path: /api/jobs/counts
sample_endpoint: /api/jobs?status=pending&limit=5
text_fields: [title, company, description]
prompt_template: "Analyze this job listing and identify the key requirements, must-have skills, and any culture signals that would help tailor an application:\n\n{text}"
- id: osprey
name: Osprey
icon: "📞"
description: Gov't hold-line automation — recent call records
base_url: http://localhost:8520
health_path: /api/health
sample_endpoint: /api/calls/recent
text_fields: [agency, issue, notes]
prompt_template: "Draft a clear, professional follow-up letter for this government hold-line call. Include what was discussed, what action the agency committed to, and a polite deadline for response:\n\n{text}"
- id: linnet
name: Linnet
icon: "🐦"
description: Real-time tone annotation — Elcor-style subtext for ND users
base_url: http://localhost:8522
health_path: /health
sample_endpoint: /samples
text_fields: [text, context]
prompt_template: "Annotate the emotional tone and subtext of the following text using explicit Elcor-style markers (e.g. [SINCERELY], [UNCERTAIN], [FRUSTRATED]). Identify implied emotions, potential sarcasm, and any ambiguity that might be misread by neurodivergent readers:\n\n{text}"
- id: kiwi
name: Kiwi

View file

@ -90,6 +90,12 @@ usage() {
echo -e " ${GREEN}score [args]${NC} Shortcut: --score [args]"
echo -e " ${GREEN}compare [args]${NC} Shortcut: --compare [args]"
echo ""
echo " Writing Style Benchmark:"
echo -e " ${GREEN}style-bench [args]${NC} Run benchmark_style.py (args passed through)"
echo -e " ${GREEN}style-list${NC} List available ollama models for style bench"
echo -e " ${GREEN}style-run [args]${NC} Run writing style benchmark (--models, --samples, --include-large, --scan-disk PATH, --cforch)"
echo -e " ${GREEN}style-last${NC} Print most recent writing style benchmark report"
echo ""
echo " Dev:"
echo -e " ${GREEN}dev${NC} Hot-reload: uvicorn --reload (:8503) + Vite HMR (:5173)"
echo -e " ${GREEN}test${NC} Run pytest suite"
@ -249,6 +255,26 @@ case "$CMD" in
exec "$0" benchmark --compare "$@"
;;
style-bench)
info "Running writing style benchmark (${ENV_BM})…"
if [[ ! -x "$PYTHON_BM" ]]; then
error "Python not found in ${ENV_BM} env at ${PYTHON_BM}"
fi
"$PYTHON_BM" scripts/benchmark_style.py "$@"
;;
style-list)
exec "$0" style-bench --list-models
;;
style-run)
exec "$0" style-bench --run "$@"
;;
style-last)
exec "$0" style-bench --show-last
;;
help|--help|-h)
usage
;;

View file

@ -122,17 +122,88 @@ def test_lookup_returns_correct_shape(client):
assert data["already_queued"] is False
def test_lookup_unknown_pipeline_tag_returns_null_adapter(client):
"""An unrecognised pipeline_tag yields adapter_recommendation=null."""
def test_lookup_unknown_pipeline_tag_returns_null_adapter_and_incompatible(client):
"""An unrecognised pipeline_tag yields adapter_recommendation=null and compatible=False."""
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.json.return_value = _make_hf_response("org/m", "audio-classification")
mock_resp.json.return_value = _make_hf_response("org/m", "reinforcement-learning")
with patch("app.models.httpx.get", return_value=mock_resp):
r = client.get("/api/models/lookup", params={"repo_id": "org/m"})
assert r.status_code == 200
assert r.json()["adapter_recommendation"] is None
data = r.json()
assert data["adapter_recommendation"] is None
assert data["compatible"] is False
assert data["role"] is None
assert data["service"] is None
assert "CircuitForge model ecosystem" in data["warning"]
def test_lookup_stt_tag_returns_compatible_with_cf_stt_service(client):
"""automatic-speech-recognition tag yields compatible=True, service=cf-stt."""
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.json.return_value = _make_hf_response("openai/whisper-base", "automatic-speech-recognition")
with patch("app.models.httpx.get", return_value=mock_resp):
r = client.get("/api/models/lookup", params={"repo_id": "openai/whisper-base"})
assert r.status_code == 200
data = r.json()
assert data["compatible"] is True
assert data["adapter_recommendation"] is None
assert data["role"] == "stt"
assert data["service"] == "cf-stt"
assert data["warning"] is None
def test_lookup_vision_tag_returns_compatible_with_cf_vision_service(client):
"""image-classification tag yields compatible=True, service=cf-vision."""
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.json.return_value = _make_hf_response("google/siglip-base", "image-classification")
with patch("app.models.httpx.get", return_value=mock_resp):
r = client.get("/api/models/lookup", params={"repo_id": "google/siglip-base"})
assert r.status_code == 200
data = r.json()
assert data["compatible"] is True
assert data["role"] == "vision"
assert data["service"] == "cf-vision"
def test_lookup_audio_classification_tag_returns_cf_voice_service(client):
"""audio-classification tag yields compatible=True, service=cf-voice."""
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.json.return_value = _make_hf_response("org/audio-model", "audio-classification")
with patch("app.models.httpx.get", return_value=mock_resp):
r = client.get("/api/models/lookup", params={"repo_id": "org/audio-model"})
assert r.status_code == 200
data = r.json()
assert data["compatible"] is True
assert data["role"] == "classifier"
assert data["service"] == "cf-voice"
def test_lookup_embedding_tag_returns_compatible_with_cf_core_service(client):
"""feature-extraction tag yields compatible=True, service=cf-core."""
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.json.return_value = _make_hf_response("BAAI/bge-small-en", "feature-extraction")
with patch("app.models.httpx.get", return_value=mock_resp):
r = client.get("/api/models/lookup", params={"repo_id": "BAAI/bge-small-en"})
assert r.status_code == 200
data = r.json()
assert data["compatible"] is True
assert data["role"] == "embedding"
assert data["service"] == "cf-core"
def test_lookup_already_queued_flag(client):
@ -181,6 +252,26 @@ def test_queue_add_returns_entry_fields(client):
assert entry["adapter_recommendation"] == "ZeroShotAdapter"
def test_queue_preserves_role_and_service(client):
"""POST /queue with role/service fields round-trips them through GET /queue."""
r = client.post("/api/models/queue", json={
"repo_id": "openai/whisper-base",
"pipeline_tag": "automatic-speech-recognition",
"adapter_recommendation": None,
"role": "stt",
"service": "cf-stt",
})
assert r.status_code == 201
entry = r.json()
assert entry["role"] == "stt"
assert entry["service"] == "cf-stt"
r2 = client.get("/api/models/queue")
items = r2.json()
assert items[0]["role"] == "stt"
assert items[0]["service"] == "cf-stt"
# ── POST /queue — 409 duplicate ────────────────────────────────────────────────
def test_queue_duplicate_returns_409(client):
@ -317,7 +408,12 @@ def test_installed_detects_downloaded_model(client, tmp_path):
model_dir.mkdir()
(model_dir / "config.json").write_text(json.dumps({"model_type": "bert"}), encoding="utf-8")
(model_dir / "model_info.json").write_text(
json.dumps({"repo_id": "org/mymodel", "adapter_recommendation": "ZeroShotAdapter"}),
json.dumps({
"repo_id": "org/mymodel",
"adapter_recommendation": "ZeroShotAdapter",
"role": "classifier",
"service": "avocet",
}),
encoding="utf-8",
)
@ -329,6 +425,51 @@ def test_installed_detects_downloaded_model(client, tmp_path):
assert items[0]["name"] == "org--mymodel"
assert items[0]["adapter"] == "ZeroShotAdapter"
assert items[0]["model_id"] == "org/mymodel"
assert items[0]["role"] == "classifier"
assert items[0]["service"] == "avocet"
def test_installed_stt_model_surfaces_role_and_service(client):
"""A downloaded STT model's role/service are returned by GET /installed."""
from app import models as models_module
model_dir = models_module._MODELS_DIR / "openai--whisper-base"
model_dir.mkdir()
(model_dir / "config.json").write_text(json.dumps({"model_type": "whisper"}), encoding="utf-8")
(model_dir / "model_info.json").write_text(
json.dumps({
"repo_id": "openai/whisper-base",
"adapter_recommendation": None,
"role": "stt",
"service": "cf-stt",
}),
encoding="utf-8",
)
r = client.get("/api/models/installed")
assert r.status_code == 200
items = r.json()
assert items[0]["role"] == "stt"
assert items[0]["service"] == "cf-stt"
assert items[0]["adapter"] is None
def test_installed_finetuned_model_defaults_to_avocet_service(client):
"""Fine-tuned models with no role/service in training_info default to avocet/classifier."""
from app import models as models_module
model_dir = models_module._MODELS_DIR / "my-finetuned-v2"
model_dir.mkdir()
(model_dir / "training_info.json").write_text(
json.dumps({"base_model": "microsoft/deberta-v3-base", "epochs": 3}),
encoding="utf-8",
)
r = client.get("/api/models/installed")
assert r.status_code == 200
items = r.json()
assert items[0]["role"] == "classifier"
assert items[0]["service"] == "avocet"
def test_installed_detects_finetuned_model(client):

4
web/.gitignore vendored
View file

@ -22,3 +22,7 @@ dist-ssr
*.njsproj
*.sln
*.sw?
# Local environment overrides
.env

View file

@ -42,6 +42,12 @@
<span v-if="lookupResult.pipeline_tag" class="chip chip-pipeline">
{{ lookupResult.pipeline_tag }}
</span>
<span v-if="lookupResult.role" class="chip chip-role">
{{ lookupResult.role }}
</span>
<span v-if="lookupResult.service" class="chip" :class="serviceChipClass(lookupResult.service)">
{{ lookupResult.service }}
</span>
<span v-if="lookupResult.adapter_recommendation" class="chip chip-adapter">
{{ lookupResult.adapter_recommendation }}
</span>
@ -61,11 +67,10 @@
<button
class="btn-primary btn-add-queue"
:class="{ 'btn-add-queue-warn': !lookupResult.compatible }"
:disabled="lookupResult.already_installed || lookupResult.already_queued || addingToQueue"
@click="addToQueue"
>
{{ addingToQueue ? 'Adding…' : lookupResult.compatible ? 'Add to queue' : 'Add anyway' }}
{{ addingToQueue ? 'Adding…' : 'Add to queue' }}
</button>
</div>
</section>
@ -90,7 +95,9 @@
</button>
</div>
<div class="model-meta">
<span v-if="model.pipeline_tag" class="chip chip-pipeline">{{ model.pipeline_tag }}</span>
<span v-if="model.pipeline_tag" class="chip chip-pipeline">{{ model.pipeline_tag }}</span>
<span v-if="model.role" class="chip chip-role">{{ model.role }}</span>
<span v-if="model.service" class="chip" :class="serviceChipClass(model.service)">{{ model.service }}</span>
<span v-if="model.adapter_recommendation" class="chip chip-adapter">{{ model.adapter_recommendation }}</span>
</div>
<div class="model-card-actions">
@ -116,6 +123,8 @@
</div>
<div class="model-meta">
<span v-if="model.pipeline_tag" class="chip chip-pipeline">{{ model.pipeline_tag }}</span>
<span v-if="model.role" class="chip chip-role">{{ model.role }}</span>
<span v-if="model.service" class="chip" :class="serviceChipClass(model.service)">{{ model.service }}</span>
</div>
<div v-if="downloadErrors[model.id]" class="download-error" role="alert">
@ -124,14 +133,19 @@
<div v-else class="progress-wrap" :aria-label="`Download progress for ${model.repo_id}`">
<div
class="progress-bar"
:style="{ width: `${downloadProgress[model.id] ?? 0}%` }"
:style="{ width: `${downloadProgress[model.repo_id]?.pct ?? 0}%` }"
role="progressbar"
:aria-valuenow="downloadProgress[model.id] ?? 0"
:aria-valuenow="downloadProgress[model.repo_id]?.pct ?? 0"
aria-valuemin="0"
aria-valuemax="100"
/>
<span class="progress-label">
{{ downloadProgress[model.id] == null ? 'Preparing…' : `${downloadProgress[model.id]}%` }}
{{
!downloadProgress[model.repo_id] ? 'Preparing…'
: downloadProgress[model.repo_id].pct != null ? `${Math.round(downloadProgress[model.repo_id].pct!)}%`
: downloadProgress[model.repo_id].bytes > 0 ? `${(downloadProgress[model.repo_id].bytes / 1024 / 1024).toFixed(0)} MB downloaded…`
: 'Preparing…'
}}
</span>
</div>
</div>
@ -145,42 +159,90 @@
No models installed yet.
</div>
<div v-else class="installed-table-wrap">
<table class="installed-table">
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Adapter</th>
<th>Size</th>
<th></th>
</tr>
</thead>
<tbody>
<tr v-for="model in installedModels" :key="model.name">
<td class="td-name">{{ model.name }}</td>
<td>
<span
class="badge"
:class="model.type === 'finetuned' ? 'badge-accent' : 'badge-info'"
>
{{ model.type }}
</span>
</td>
<td>{{ model.adapter ?? '—' }}</td>
<td>{{ humanBytes(model.size) }}</td>
<td>
<button
class="btn-danger btn-sm"
@click="deleteInstalled(model.name)"
>
Delete
</button>
</td>
</tr>
</tbody>
</table>
</div>
<template v-else>
<div
v-for="group in installedByService"
:key="group.service"
class="installed-group"
>
<div class="installed-group-header">
<span class="chip" :class="serviceChipClass(group.service)">
{{ serviceLabel(group.service) }}
</span>
<span class="installed-group-count">{{ group.models.length }} model{{ group.models.length !== 1 ? 's' : '' }}</span>
</div>
<div class="installed-table-wrap">
<table class="installed-table">
<thead>
<tr>
<th>Name</th>
<th>Type</th>
<th>Role</th>
<th>Size</th>
<th></th>
</tr>
</thead>
<tbody>
<tr v-for="model in group.models" :key="model.name">
<td class="td-name">{{ model.model_id ?? model.name }}</td>
<td>
<span
class="badge"
:class="model.type === 'finetuned' ? 'badge-accent' : 'badge-info'"
>
{{ model.type }}
</span>
</td>
<td>
<span v-if="model.role" class="chip chip-role chip-sm">{{ model.role }}</span>
<span v-else></span>
</td>
<td>{{ humanBytes(model.size_bytes) }}</td>
<td class="td-actions">
<div v-if="!model.service" class="classify-row">
<select
class="classify-select"
:value="classifyDraft[model.name]?.service ?? ''"
@change="onServiceChange(model.name, ($event.target as HTMLSelectElement).value)"
aria-label="Assign service"
>
<option value="" disabled>Service</option>
<option v-for="svc in CLASSIFIABLE_SERVICES" :key="svc.value" :value="svc.value">{{ svc.label }}</option>
</select>
<select
class="classify-select"
:value="classifyDraft[model.name]?.role ?? ''"
:disabled="!classifyDraft[model.name]?.service"
@change="(e) => setClassifyRole(model.name, (e.target as HTMLSelectElement).value)"
aria-label="Assign role"
>
<option value="" disabled>Role</option>
<option
v-for="role in rolesForService(classifyDraft[model.name]?.service ?? '')"
:key="role"
:value="role"
>{{ role }}</option>
</select>
<button
class="btn-primary btn-sm"
:disabled="!classifyDraft[model.name]?.service || !classifyDraft[model.name]?.role"
@click="saveClassify(model.name)"
>Save</button>
</div>
<button
class="btn-danger btn-sm"
@click="deleteInstalled(model.name)"
>
Delete
</button>
</td>
</tr>
</tbody>
</table>
</div>
</div>
</template>
</section>
</div>
</template>
@ -194,6 +256,8 @@ interface LookupResult {
repo_id: string
pipeline_tag: string | null
adapter_recommendation: string | null
role: string | null
service: string | null
compatible: boolean
warning: string | null
size: number | null
@ -208,20 +272,27 @@ interface QueuedModel {
status: 'pending' | 'downloading' | 'done' | 'error'
pipeline_tag: string | null
adapter_recommendation: string | null
role: string | null
service: string | null
}
interface InstalledModel {
name: string
type: 'finetuned' | 'downloaded'
adapter: string | null
size: number
role: string | null
service: string | null
size_bytes: number
model_id: string | null
}
interface SseProgressEvent {
model_id: string
pct: number | null
status: 'progress' | 'done' | 'error'
message?: string
type: 'progress' | 'done' | 'error' | 'idle'
repo_id?: string
pct?: number
downloaded_bytes?: number
total_bytes?: number
error?: string
}
// State
@ -235,7 +306,8 @@ const addingToQueue = ref(false)
const queuedModels = ref<QueuedModel[]>([])
const installedModels = ref<InstalledModel[]>([])
const downloadProgress = ref<Record<string, number>>({})
const downloadProgress = ref<Record<string, { pct: number | null; bytes: number }>>({})
const classifyDraft = ref<Record<string, { service: string; role: string }>>({})
const downloadErrors = ref<Record<string, string>>({})
let pollInterval: ReturnType<typeof setInterval> | null = null
@ -251,8 +323,69 @@ const downloadingModels = computed(() =>
queuedModels.value.filter(m => m.status === 'downloading')
)
const SERVICE_ORDER = ['avocet', 'cf-text', 'cf-stt', 'cf-tts', 'cf-vision', 'cf-image', 'cf-core', 'cf-voice', 'other']
const CLASSIFIABLE_SERVICES = [
{ value: 'avocet', label: 'Avocet — Email Classifiers' },
{ value: 'cf-text', label: 'cf-text — Language Models' },
{ value: 'cf-stt', label: 'cf-stt — Speech Recognition' },
{ value: 'cf-tts', label: 'cf-tts — Text to Speech' },
{ value: 'cf-vision', label: 'cf-vision — Vision / VLM' },
{ value: 'cf-image', label: 'cf-image — Image Generation' },
{ value: 'cf-core', label: 'cf-core — Embeddings' },
{ value: 'cf-voice', label: 'cf-voice — Audio Classification' },
]
const SERVICE_ROLES: Record<string, string[]> = {
'avocet': ['classifier', 'reranker'],
'cf-text': ['generator'],
'cf-stt': ['stt', 'alm'],
'cf-tts': ['tts'],
'cf-vision': ['vision', 'vlm', 'embedding'],
'cf-image': ['image-gen'],
'cf-core': ['embedding'],
'cf-voice': ['classifier'],
}
function rolesForService(service: string): string[] {
return SERVICE_ROLES[service] ?? []
}
const installedByService = computed(() => {
const grouped: Record<string, InstalledModel[]> = {}
for (const model of installedModels.value) {
const key = model.service ?? 'other'
if (!grouped[key]) grouped[key] = []
grouped[key].push(model)
}
// Return ordered sections: known services first, then anything else
const keys = [...SERVICE_ORDER.filter(s => grouped[s]), ...Object.keys(grouped).filter(k => !SERVICE_ORDER.includes(k))]
return keys.map(key => ({ service: key, models: grouped[key] }))
})
// Helpers
const SERVICE_LABELS: Record<string, string> = {
'avocet': 'Avocet — Email Classifiers',
'cf-text': 'cf-text — Language Models',
'cf-stt': 'cf-stt — Speech Recognition',
'cf-tts': 'cf-tts — Text to Speech',
'cf-vision': 'cf-vision — Vision / VLM',
'cf-image': 'cf-image — Image Generation',
'cf-core': 'cf-core — Embeddings',
'cf-voice': 'cf-voice — Audio Classification',
'other': 'Other — Unclassified',
}
function serviceLabel(service: string): string {
return SERVICE_LABELS[service] ?? service
}
function serviceChipClass(service: string | null): string {
if (!service) return 'chip-service-other'
return `chip-service-${service.replace(/[^a-z0-9]/g, '-')}`
}
function humanBytes(bytes: number | null): string {
if (bytes == null) return '—'
const units = ['B', 'KB', 'MB', 'GB', 'TB']
@ -305,10 +438,11 @@ async function addToQueue() {
if (!lookupResult.value) return
addingToQueue.value = true
try {
const { repo_id, pipeline_tag, adapter_recommendation, role, service } = lookupResult.value
const res = await fetch('/api/models/queue', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ repo_id: lookupResult.value.repo_id }),
body: JSON.stringify({ repo_id, pipeline_tag, adapter_recommendation, role, service }),
})
if (res.ok) {
lookupResult.value = { ...lookupResult.value, already_queued: true }
@ -339,12 +473,50 @@ async function dismissModel(id: string) {
} catch { /* ignore */ }
}
function onServiceChange(name: string, service: string) {
const roles = SERVICE_ROLES[service] ?? []
classifyDraft.value = {
...classifyDraft.value,
[name]: { service, role: roles.length === 1 ? roles[0] : '' },
}
}
function setClassifyRole(name: string, role: string) {
classifyDraft.value = {
...classifyDraft.value,
[name]: { ...classifyDraft.value[name], role },
}
}
async function saveClassify(name: string) {
const draft = classifyDraft.value[name]
if (!draft?.service || !draft?.role) return
try {
const res = await fetch(`/api/models/installed/${encodeURIComponent(name)}`, {
method: 'PATCH',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ service: draft.service, role: draft.role }),
})
if (res.ok) {
// Update in-place so the model moves to the correct service group
installedModels.value = installedModels.value.map(m =>
m.name === name ? { ...m, service: draft.service, role: draft.role } : m
)
const updated = { ...classifyDraft.value }
delete updated[name]
classifyDraft.value = updated
await loadQueue()
}
} catch { /* non-fatal */ }
}
async function deleteInstalled(name: string) {
if (!window.confirm(`Delete installed model "${name}"? This cannot be undone.`)) return
try {
const res = await fetch(`/api/models/installed/${encodeURIComponent(name)}`, { method: 'DELETE' })
if (res.ok) {
installedModels.value = installedModels.value.filter(m => m.name !== name)
await loadQueue()
}
} catch { /* ignore */ }
}
@ -378,21 +550,28 @@ function startSse() {
return
}
const { model_id, pct, status, message } = event
const { type, repo_id, pct, downloaded_bytes, error } = event
if (!repo_id) return
if (status === 'progress' && pct != null) {
downloadProgress.value = { ...downloadProgress.value, [model_id]: pct }
} else if (status === 'done') {
if (type === 'progress') {
const bytes = downloaded_bytes ?? 0
// pct stays null when total_bytes is unknown so we can show "X MB" instead
const progress = (pct != null && pct > 0) ? pct : (bytes > 0 ? null : undefined)
downloadProgress.value = { ...downloadProgress.value, [repo_id]: { pct: progress ?? null, bytes } }
} else if (type === 'done') {
const updated = { ...downloadProgress.value }
delete updated[model_id]
delete updated[repo_id]
downloadProgress.value = updated
queuedModels.value = queuedModels.value.filter(m => m.id !== model_id)
queuedModels.value = queuedModels.value.filter(m => m.repo_id !== repo_id)
loadInstalled()
} else if (status === 'error') {
downloadErrors.value = {
...downloadErrors.value,
[model_id]: message ?? 'Download failed.',
} else if (type === 'error') {
const entry = queuedModels.value.find(m => m.repo_id === repo_id)
if (entry) {
downloadErrors.value = {
...downloadErrors.value,
[entry.id]: error ?? 'Download failed.',
}
}
}
})
@ -595,12 +774,6 @@ onUnmounted(() => {
align-self: flex-start;
}
.btn-add-queue-warn {
background: var(--color-surface-raised, #e4ebf5);
color: var(--color-text-secondary, #6b7a99);
border: 1px solid var(--color-border, #d0d7e8);
}
/* ── Model cards (queue + downloads) ── */
.model-card {
border: 1px solid var(--color-border, #a8b8d0);
@ -715,6 +888,35 @@ onUnmounted(() => {
word-break: break-all;
}
.td-actions {
display: flex;
flex-direction: column;
gap: 0.4rem;
align-items: flex-start;
}
.classify-row {
display: flex;
gap: 0.35rem;
align-items: center;
flex-wrap: wrap;
}
.classify-select {
font-size: 0.78rem;
padding: 0.2rem 0.4rem;
border-radius: 4px;
border: 1px solid var(--color-border, #444);
background: var(--color-surface, #1e1e2e);
color: var(--color-text, #cdd6f4);
cursor: pointer;
}
.classify-select:disabled {
opacity: 0.4;
cursor: not-allowed;
}
/* ── Badges ── */
.badge-group {
display: flex;
@ -777,6 +979,76 @@ onUnmounted(() => {
background: color-mix(in srgb, var(--color-accent, #c4732a) 12%, var(--color-surface-alt, #dde4f0));
}
.chip-role {
color: var(--color-info, #1e6091);
background: color-mix(in srgb, var(--color-info, #1e6091) 12%, var(--color-surface-alt, #dde4f0));
}
.chip-sm {
font-size: 0.68rem;
padding: 0.1rem 0.4rem;
}
/* Service chips — one colour per CF service */
.chip-service-avocet {
color: var(--color-primary, #2d5a27);
background: color-mix(in srgb, var(--color-primary, #2d5a27) 15%, var(--color-surface-alt, #dde4f0));
}
.chip-service-cf-text {
color: #c2410c;
background: color-mix(in srgb, #c2410c 12%, var(--color-surface-alt, #dde4f0));
}
.chip-service-cf-stt {
color: #5e35b1;
background: color-mix(in srgb, #5e35b1 12%, var(--color-surface-alt, #dde4f0));
}
.chip-service-cf-tts {
color: #0277bd;
background: color-mix(in srgb, #0277bd 12%, var(--color-surface-alt, #dde4f0));
}
.chip-service-cf-vision {
color: #00695c;
background: color-mix(in srgb, #00695c 12%, var(--color-surface-alt, #dde4f0));
}
.chip-service-cf-core {
color: #6d4c41;
background: color-mix(in srgb, #6d4c41 12%, var(--color-surface-alt, #dde4f0));
}
.chip-service-cf-voice {
color: #ad1457;
background: color-mix(in srgb, #ad1457 12%, var(--color-surface-alt, #dde4f0));
}
.chip-service-other {
color: var(--color-text-muted, #4a5c7a);
background: var(--color-surface-alt, #dde4f0);
}
/* ── Installed group ── */
.installed-group {
display: flex;
flex-direction: column;
gap: 0.5rem;
}
.installed-group-header {
display: flex;
align-items: center;
gap: 0.5rem;
padding: 0.25rem 0;
}
.installed-group-count {
font-size: 0.78rem;
color: var(--color-text-muted, #4a5c7a);
}
/* ── Buttons ── */
.btn-primary, .btn-danger {
padding: 0.4rem 0.9rem;
@ -852,7 +1124,7 @@ onUnmounted(() => {
.installed-table th:nth-child(3),
.installed-table td:nth-child(3) {
display: none; /* hide Adapter column on very narrow screens */
display: none; /* hide Role column on very narrow screens */
}
}
</style>