feat(models): extended model registry + manage.sh benchmark subcommands
- app/models.py: add StyleModel and VoiceModel entries; expand cf-text and benchmark model metadata (vram_mb, description, tags) - tests/test_models.py: coverage for new model types and registry helpers - ModelsView.vue: updated model browser with style/voice filter tabs - manage.sh: add benchmark-style and benchmark-voice subcommands - config/label_tool.yaml.example: add style + voice benchmark config stubs - web/.gitignore: add node_modules and dist entries
This commit is contained in:
parent
ddb56efb89
commit
ea3da701c6
6 changed files with 1150 additions and 152 deletions
616
app/models.py
616
app/models.py
|
|
@ -14,11 +14,12 @@ from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import threading
|
import threading
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any, TypedDict
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
@ -39,21 +40,67 @@ _ROOT = Path(__file__).parent.parent
|
||||||
_MODELS_DIR: Path = _ROOT / "models"
|
_MODELS_DIR: Path = _ROOT / "models"
|
||||||
_QUEUE_DIR: Path = _ROOT / "data"
|
_QUEUE_DIR: Path = _ROOT / "data"
|
||||||
|
|
||||||
|
# Service-specific model destinations.
|
||||||
|
# cf-text models land on the NFS-mounted shared asset store so every cluster
|
||||||
|
# node can reach them without a separate download. Avocet classifiers stay local
|
||||||
|
# because they are fine-tuned in-place and are only consumed by avocet itself.
|
||||||
|
# Override via CF_TEXT_MODELS_DIR env var (useful for dev / non-NFS setups).
|
||||||
|
_CF_TEXT_MODELS_DIR: Path = Path(
|
||||||
|
os.environ.get("CF_TEXT_MODELS_DIR", "/Library/Assets/LLM/cf-text/models")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Directory containing per-node YAML profiles for cf-orch.
|
||||||
|
# Auto-registration writes new catalog entries here on model download.
|
||||||
|
_CF_ORCH_PROFILES_DIR: Path = Path(
|
||||||
|
os.environ.get(
|
||||||
|
"CF_ORCH_PROFILES_DIR",
|
||||||
|
"/Library/Development/CircuitForge/circuitforge-orch/circuitforge_orch/profiles/nodes",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
# ── Download progress shared state ────────────────────────────────────────────
|
# ── Download progress shared state ────────────────────────────────────────────
|
||||||
# Updated by the background download thread; read by GET /download/stream.
|
# Updated by the background download thread; read by GET /download/stream.
|
||||||
_download_progress: dict[str, Any] = {}
|
_download_progress: dict[str, Any] = {}
|
||||||
|
|
||||||
# ── HF pipeline_tag → adapter recommendation ──────────────────────────────────
|
# ── HF pipeline_tag → CF service info ────────────────────────────────────────
|
||||||
_TAG_TO_ADAPTER: dict[str, str] = {
|
|
||||||
"zero-shot-classification": "ZeroShotAdapter",
|
|
||||||
"text-classification": "ZeroShotAdapter",
|
class _TagInfo(TypedDict):
|
||||||
"natural-language-inference": "ZeroShotAdapter",
|
adapter: str | None # Avocet adapter class, or None if handled by another service
|
||||||
"sentence-similarity": "RerankerAdapter",
|
role: str # Human-readable model role (classifier, stt, tts, vision, …)
|
||||||
"text-ranking": "RerankerAdapter",
|
service: str # CF service that consumes this model type
|
||||||
"text-generation": "GenerationAdapter",
|
|
||||||
"text2text-generation": "GenerationAdapter",
|
|
||||||
|
_TAG_TO_INFO: dict[str, _TagInfo] = {
|
||||||
|
# Avocet email classifiers
|
||||||
|
"zero-shot-classification": {"adapter": "ZeroShotAdapter", "role": "classifier", "service": "avocet"},
|
||||||
|
"text-classification": {"adapter": "ZeroShotAdapter", "role": "classifier", "service": "avocet"},
|
||||||
|
"natural-language-inference": {"adapter": "ZeroShotAdapter", "role": "classifier", "service": "avocet"},
|
||||||
|
"sentence-similarity": {"adapter": "RerankerAdapter", "role": "reranker", "service": "avocet"},
|
||||||
|
"text-ranking": {"adapter": "RerankerAdapter", "role": "reranker", "service": "avocet"},
|
||||||
|
"text-generation": {"adapter": "GenerationAdapter", "role": "generator", "service": "cf-text"},
|
||||||
|
"text2text-generation": {"adapter": "GenerationAdapter", "role": "generator", "service": "cf-text"},
|
||||||
|
"summarization": {"adapter": "GenerationAdapter", "role": "generator", "service": "cf-text"},
|
||||||
|
# STT — cf-stt speech recognition service
|
||||||
|
"automatic-speech-recognition": {"adapter": None, "role": "stt", "service": "cf-stt"},
|
||||||
|
# Audio language models — audio + text → text (understanding, QA, captioning)
|
||||||
|
"audio-text-to-text": {"adapter": None, "role": "alm", "service": "cf-stt"},
|
||||||
|
# Audio classification — cf-voice sidecar context stream
|
||||||
|
"audio-classification": {"adapter": None, "role": "classifier", "service": "cf-voice"},
|
||||||
|
# TTS — cf-tts text-to-speech service
|
||||||
|
"text-to-speech": {"adapter": None, "role": "tts", "service": "cf-tts"},
|
||||||
|
# Vision — cf-vision image classification / embedding / VLM service
|
||||||
|
"image-classification": {"adapter": None, "role": "vision", "service": "cf-vision"},
|
||||||
|
"zero-shot-image-classification": {"adapter": None, "role": "vision", "service": "cf-vision"},
|
||||||
|
"image-feature-extraction": {"adapter": None, "role": "embedding", "service": "cf-vision"},
|
||||||
|
"image-text-to-text": {"adapter": None, "role": "vlm", "service": "cf-vision"},
|
||||||
|
"visual-question-answering": {"adapter": None, "role": "vlm", "service": "cf-vision"},
|
||||||
|
# Image generation — cf-image (text → image; distinct from cf-vision image understanding)
|
||||||
|
"text-to-image": {"adapter": None, "role": "image-gen", "service": "cf-image"},
|
||||||
|
# Embedding — cf-core shared embedding layer
|
||||||
|
"feature-extraction": {"adapter": None, "role": "embedding", "service": "cf-core"},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -84,14 +131,31 @@ def _write_queue(records: list[dict]) -> None:
|
||||||
|
|
||||||
|
|
||||||
def _safe_model_name(repo_id: str) -> str:
|
def _safe_model_name(repo_id: str) -> str:
|
||||||
"""Convert repo_id to a filesystem-safe directory name (HF convention)."""
|
"""Convert repo_id to a filesystem-safe directory name.
|
||||||
|
|
||||||
|
Uses the HuggingFace Hub convention: owner/model-name → owner--model-name.
|
||||||
|
This matches what snapshot_download produces under local_dir and what
|
||||||
|
cf-orch uses when constructing model paths for cf-text allocations.
|
||||||
|
"""
|
||||||
return repo_id.replace("/", "--")
|
return repo_id.replace("/", "--")
|
||||||
|
|
||||||
|
|
||||||
def _is_installed(repo_id: str) -> bool:
|
def _model_dir_for(repo_id: str, service: str | None) -> Path:
|
||||||
"""Check if a model is already downloaded in _MODELS_DIR."""
|
"""Return the download destination directory for a model.
|
||||||
|
|
||||||
|
cf-text models → NFS shared asset store (_CF_TEXT_MODELS_DIR) so every
|
||||||
|
cluster node can load them without a separate download.
|
||||||
|
All other services (avocet classifiers, fine-tunes) → local _MODELS_DIR.
|
||||||
|
"""
|
||||||
safe_name = _safe_model_name(repo_id)
|
safe_name = _safe_model_name(repo_id)
|
||||||
model_dir = _MODELS_DIR / safe_name
|
if service == "cf-text":
|
||||||
|
return _CF_TEXT_MODELS_DIR / safe_name
|
||||||
|
return _MODELS_DIR / safe_name
|
||||||
|
|
||||||
|
|
||||||
|
def _is_installed(repo_id: str, service: str | None = None) -> bool:
|
||||||
|
"""Check if a model is already downloaded in the appropriate destination."""
|
||||||
|
model_dir = _model_dir_for(repo_id, service)
|
||||||
return model_dir.exists() and (
|
return model_dir.exists() and (
|
||||||
(model_dir / "config.json").exists()
|
(model_dir / "config.json").exists()
|
||||||
or (model_dir / "training_info.json").exists()
|
or (model_dir / "training_info.json").exists()
|
||||||
|
|
@ -125,48 +189,289 @@ def _get_queue_entry(entry_id: str) -> dict | None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ── cf-orch catalog auto-registration ─────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _catalog_key(repo_id: str) -> str:
|
||||||
|
"""Derive a readable catalog key from repo_id.
|
||||||
|
|
||||||
|
ibm-granite/granite-4.1-8b → granite-4.1-8b
|
||||||
|
facebook/bart-large-cnn → bart-large-cnn
|
||||||
|
"""
|
||||||
|
return repo_id.split("/", 1)[-1].lower()
|
||||||
|
|
||||||
|
|
||||||
|
def _insert_catalog_entry(content: str, entry_lines: str) -> str:
|
||||||
|
"""Insert entry_lines at the end of the cf-text.catalog section.
|
||||||
|
|
||||||
|
Scans line by line to preserve all comments and original formatting.
|
||||||
|
Returns content unchanged if the catalog section cannot be located.
|
||||||
|
"""
|
||||||
|
lines = content.splitlines(keepends=True)
|
||||||
|
|
||||||
|
in_cf_text = False
|
||||||
|
in_catalog = False
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
stripped = line.lstrip()
|
||||||
|
indent = len(line) - len(stripped)
|
||||||
|
blank_or_comment = not stripped or stripped.startswith("#")
|
||||||
|
|
||||||
|
if not in_cf_text:
|
||||||
|
if indent == 2 and stripped.startswith("cf-text:"):
|
||||||
|
in_cf_text = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not in_catalog:
|
||||||
|
if indent == 4 and stripped.startswith("catalog:"):
|
||||||
|
in_catalog = True
|
||||||
|
elif not blank_or_comment and indent <= 2:
|
||||||
|
# Left cf-text section without finding a catalog
|
||||||
|
return content
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Inside catalog: first non-blank/comment line with indent < 6 ends it
|
||||||
|
if not blank_or_comment and indent < 6:
|
||||||
|
prefix = "\n" if lines[i - 1].strip() else ""
|
||||||
|
lines.insert(i, prefix + entry_lines)
|
||||||
|
return "".join(lines)
|
||||||
|
|
||||||
|
# Catalog ran to EOF — append there
|
||||||
|
if in_catalog:
|
||||||
|
prefix = "\n" if lines and lines[-1].strip() else ""
|
||||||
|
lines.append(prefix + entry_lines)
|
||||||
|
return "".join(lines)
|
||||||
|
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def _register_in_node_catalogs(
|
||||||
|
repo_id: str,
|
||||||
|
local_path: Path,
|
||||||
|
vram_mb_fp16: int,
|
||||||
|
role: str,
|
||||||
|
) -> list[str]:
|
||||||
|
"""Insert a cf-text catalog entry into every eligible node YAML.
|
||||||
|
|
||||||
|
A node is eligible when:
|
||||||
|
- It has a ``cf-text.catalog`` section
|
||||||
|
- The model fits within the node's ``cf-text.max_mb`` at FP16 *or* 4-bit
|
||||||
|
- Neither the model key nor the local path is already in the catalog
|
||||||
|
|
||||||
|
Returns the list of node names that were updated.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import yaml # lazy — not in the critical import path
|
||||||
|
except ImportError:
|
||||||
|
logger.warning("PyYAML not available — skipping catalog registration for %s", repo_id)
|
||||||
|
return []
|
||||||
|
|
||||||
|
profiles_dir = _CF_ORCH_PROFILES_DIR
|
||||||
|
if not profiles_dir.exists():
|
||||||
|
logger.warning(
|
||||||
|
"cf-orch profiles dir not found: %s — skipping catalog registration", profiles_dir
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
model_key = _catalog_key(repo_id)
|
||||||
|
local_path_str = str(local_path)
|
||||||
|
vram_4bit = round(vram_mb_fp16 / 4 * 1.1)
|
||||||
|
updated: list[str] = []
|
||||||
|
|
||||||
|
for yaml_file in sorted(profiles_dir.glob("*.yaml")):
|
||||||
|
try:
|
||||||
|
content = yaml_file.read_text(encoding="utf-8")
|
||||||
|
data = yaml.safe_load(content)
|
||||||
|
|
||||||
|
cf_text = (data.get("services") or {}).get("cf-text")
|
||||||
|
if not cf_text:
|
||||||
|
continue
|
||||||
|
|
||||||
|
max_mb: int = cf_text.get("max_mb", 0)
|
||||||
|
catalog: dict = cf_text.get("catalog") or {}
|
||||||
|
|
||||||
|
# Skip if key already exists
|
||||||
|
if model_key in catalog:
|
||||||
|
logger.debug("Key %r already in %s — skipping", model_key, yaml_file.name)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip if any existing entry already points at this path (or a file within it)
|
||||||
|
registered_paths = {
|
||||||
|
str(entry.get("path", ""))
|
||||||
|
for entry in catalog.values()
|
||||||
|
if isinstance(entry, dict)
|
||||||
|
}
|
||||||
|
if local_path_str in registered_paths or any(
|
||||||
|
p.startswith(local_path_str + "/") for p in registered_paths
|
||||||
|
):
|
||||||
|
logger.debug("Path %s already registered in %s — skipping", local_path_str, yaml_file.name)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Determine whether model fits at FP16 or needs 4-bit
|
||||||
|
if vram_mb_fp16 <= max_mb:
|
||||||
|
vram_for_node = vram_mb_fp16
|
||||||
|
needs_4bit = False
|
||||||
|
elif vram_4bit <= max_mb:
|
||||||
|
vram_for_node = vram_4bit
|
||||||
|
needs_4bit = True
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
"%s too large for %s (fp16=%d MB, 4bit=%d MB, max=%d MB)",
|
||||||
|
repo_id, yaml_file.name, vram_mb_fp16, vram_4bit, max_mb,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
desc = f"{repo_id} ({role}, downloaded via avocet)"
|
||||||
|
if needs_4bit:
|
||||||
|
desc += " — CF_TEXT_4BIT=1 required"
|
||||||
|
|
||||||
|
vram_comment = (
|
||||||
|
f" # 4-bit estimate; FP16 footprint is {vram_mb_fp16} MB"
|
||||||
|
if needs_4bit
|
||||||
|
else f" # FP16 file-size estimate"
|
||||||
|
)
|
||||||
|
entry_block = (
|
||||||
|
f" # auto-registered by avocet on download\n"
|
||||||
|
f" {model_key}:\n"
|
||||||
|
f" path: {local_path_str}\n"
|
||||||
|
f" vram_mb: {vram_for_node}{vram_comment}\n"
|
||||||
|
f" description: \"{desc}\"\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
new_content = _insert_catalog_entry(content, entry_block)
|
||||||
|
if new_content == content:
|
||||||
|
logger.warning("Could not find catalog insertion point in %s", yaml_file.name)
|
||||||
|
continue
|
||||||
|
|
||||||
|
yaml_file.write_text(new_content, encoding="utf-8")
|
||||||
|
updated.append(yaml_file.stem)
|
||||||
|
logger.info(
|
||||||
|
"Registered %s in %s (vram_mb=%d, 4bit=%s)",
|
||||||
|
model_key, yaml_file.name, vram_for_node, needs_4bit,
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Could not update %s: %s", yaml_file.name, exc)
|
||||||
|
|
||||||
|
return updated
|
||||||
|
|
||||||
|
|
||||||
# ── Background download ────────────────────────────────────────────────────────
|
# ── Background download ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
def _run_download(entry_id: str, repo_id: str, pipeline_tag: str | None, adapter_recommendation: str | None) -> None:
|
def _poll_disk_progress(local_dir: Path, total_bytes: int, stop_event: threading.Event) -> None:
|
||||||
"""Background thread: download model via huggingface_hub.snapshot_download."""
|
"""Side-thread: poll local_dir size every 2s and update _download_progress.
|
||||||
|
|
||||||
|
snapshot_download is a blocking call with no progress callback, so we watch
|
||||||
|
the destination directory grow on disk as a proxy for download progress.
|
||||||
|
total_bytes=0 means we don't know the target size; pct stays 0 until done.
|
||||||
|
"""
|
||||||
|
import time
|
||||||
|
while not stop_event.is_set():
|
||||||
|
try:
|
||||||
|
downloaded = sum(
|
||||||
|
f.stat().st_size for f in local_dir.rglob("*") if f.is_file()
|
||||||
|
)
|
||||||
|
_download_progress["downloaded_bytes"] = downloaded
|
||||||
|
if total_bytes > 0:
|
||||||
|
_download_progress["total_bytes"] = total_bytes
|
||||||
|
_download_progress["pct"] = min(downloaded / total_bytes * 100, 99.0)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
|
||||||
|
def _run_download(
|
||||||
|
entry_id: str,
|
||||||
|
repo_id: str,
|
||||||
|
pipeline_tag: str | None,
|
||||||
|
adapter_recommendation: str | None,
|
||||||
|
role: str | None = None,
|
||||||
|
service: str | None = None,
|
||||||
|
model_size_bytes: int = 0,
|
||||||
|
) -> None:
|
||||||
|
"""Background thread: download model via huggingface_hub.snapshot_download.
|
||||||
|
|
||||||
|
model_size_bytes is the sum of file sizes reported by the HF API (siblings).
|
||||||
|
It is used to estimate vram_mb and written to model_info.json so cf-orch can
|
||||||
|
budget VRAM when allocating a cf-text instance for this model.
|
||||||
|
"""
|
||||||
global _download_progress
|
global _download_progress
|
||||||
safe_name = _safe_model_name(repo_id)
|
local_dir = _model_dir_for(repo_id, service)
|
||||||
local_dir = _MODELS_DIR / safe_name
|
|
||||||
|
|
||||||
_download_progress = {
|
_download_progress = {
|
||||||
"active": True,
|
"active": True,
|
||||||
"repo_id": repo_id,
|
"repo_id": repo_id,
|
||||||
"downloaded_bytes": 0,
|
"downloaded_bytes": 0,
|
||||||
"total_bytes": 0,
|
"total_bytes": model_size_bytes,
|
||||||
"pct": 0.0,
|
"pct": 0.0,
|
||||||
"done": False,
|
"done": False,
|
||||||
"error": None,
|
"error": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
stop_poll = threading.Event()
|
||||||
|
poll_thread = threading.Thread(
|
||||||
|
target=_poll_disk_progress,
|
||||||
|
args=(local_dir, model_size_bytes, stop_poll),
|
||||||
|
daemon=True,
|
||||||
|
name=f"model-poll-{entry_id}",
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if snapshot_download is None:
|
if snapshot_download is None:
|
||||||
raise RuntimeError("huggingface_hub is not installed")
|
raise RuntimeError("huggingface_hub is not installed")
|
||||||
|
|
||||||
|
local_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
poll_thread.start()
|
||||||
snapshot_download(
|
snapshot_download(
|
||||||
repo_id=repo_id,
|
repo_id=repo_id,
|
||||||
local_dir=str(local_dir),
|
local_dir=str(local_dir),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Write model_info.json alongside downloaded files
|
# Estimate VRAM from reported file size.
|
||||||
|
# HF siblings sizes are pre-quantisation file sizes; add 10% for KV cache
|
||||||
|
# and runtime overhead. Falls back to a stat of the local dir if 0.
|
||||||
|
if model_size_bytes == 0:
|
||||||
|
model_size_bytes = sum(
|
||||||
|
f.stat().st_size for f in local_dir.rglob("*") if f.is_file()
|
||||||
|
)
|
||||||
|
vram_mb = int(model_size_bytes / (1024 * 1024) * 1.1)
|
||||||
|
|
||||||
|
# Write model_info.json alongside downloaded files.
|
||||||
|
# local_path + vram_mb are read by cf-orch at allocation time to resolve
|
||||||
|
# the full model path and grant the correct VRAM lease.
|
||||||
model_info = {
|
model_info = {
|
||||||
"repo_id": repo_id,
|
"repo_id": repo_id,
|
||||||
"pipeline_tag": pipeline_tag,
|
"pipeline_tag": pipeline_tag,
|
||||||
"adapter_recommendation": adapter_recommendation,
|
"adapter_recommendation": adapter_recommendation,
|
||||||
|
"role": role,
|
||||||
|
"service": service,
|
||||||
|
"model_size_bytes": model_size_bytes,
|
||||||
|
"vram_mb": vram_mb,
|
||||||
|
"local_path": str(local_dir),
|
||||||
"downloaded_at": datetime.now(timezone.utc).isoformat(),
|
"downloaded_at": datetime.now(timezone.utc).isoformat(),
|
||||||
}
|
}
|
||||||
local_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
(local_dir / "model_info.json").write_text(
|
(local_dir / "model_info.json").write_text(
|
||||||
json.dumps(model_info, indent=2), encoding="utf-8"
|
json.dumps(model_info, indent=2), encoding="utf-8"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Auto-register cf-text models in the cf-orch node YAML catalogs so they
|
||||||
|
# appear in the benchmark model list without a manual YAML edit.
|
||||||
|
if service == "cf-text":
|
||||||
|
registered_on = _register_in_node_catalogs(
|
||||||
|
repo_id=repo_id,
|
||||||
|
local_path=local_dir,
|
||||||
|
vram_mb_fp16=vram_mb,
|
||||||
|
role=role or "generator",
|
||||||
|
)
|
||||||
|
if registered_on:
|
||||||
|
logger.info(
|
||||||
|
"Auto-registered %s in node catalogs: %s",
|
||||||
|
repo_id, ", ".join(registered_on),
|
||||||
|
)
|
||||||
|
|
||||||
_download_progress["done"] = True
|
_download_progress["done"] = True
|
||||||
_download_progress["pct"] = 100.0
|
_download_progress["pct"] = 100.0
|
||||||
_update_queue_entry(entry_id, {"status": "ready"})
|
_update_queue_entry(entry_id, {"status": "ready", "local_path": str(local_dir)})
|
||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.exception("Download failed for %s: %s", repo_id, exc)
|
logger.exception("Download failed for %s: %s", repo_id, exc)
|
||||||
|
|
@ -174,6 +479,7 @@ def _run_download(entry_id: str, repo_id: str, pipeline_tag: str | None, adapter
|
||||||
_download_progress["done"] = True
|
_download_progress["done"] = True
|
||||||
_update_queue_entry(entry_id, {"status": "failed", "error": str(exc)})
|
_update_queue_entry(entry_id, {"status": "failed", "error": str(exc)})
|
||||||
finally:
|
finally:
|
||||||
|
stop_poll.set()
|
||||||
_download_progress["active"] = False
|
_download_progress["active"] = False
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -199,11 +505,15 @@ def lookup_model(repo_id: str) -> dict:
|
||||||
|
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
pipeline_tag = data.get("pipeline_tag")
|
pipeline_tag = data.get("pipeline_tag")
|
||||||
adapter_recommendation = _TAG_TO_ADAPTER.get(pipeline_tag) if pipeline_tag else None
|
tag_info = _TAG_TO_INFO.get(pipeline_tag) if pipeline_tag else None
|
||||||
|
adapter_recommendation = tag_info["adapter"] if tag_info else None
|
||||||
|
role = tag_info["role"] if tag_info else None
|
||||||
|
service = tag_info["service"] if tag_info else None
|
||||||
|
|
||||||
# Determine compatibility and surface a human-readable warning
|
# Determine compatibility and surface a human-readable warning
|
||||||
_supported = ", ".join(sorted(_TAG_TO_ADAPTER.keys()))
|
_supported = ", ".join(sorted(_TAG_TO_INFO.keys()))
|
||||||
if adapter_recommendation is not None:
|
if tag_info is not None:
|
||||||
|
# Any recognized tag is compatible — avocet adapters or another CF service
|
||||||
compatible = True
|
compatible = True
|
||||||
warning: str | None = None
|
warning: str | None = None
|
||||||
elif pipeline_tag is None:
|
elif pipeline_tag is None:
|
||||||
|
|
@ -216,7 +526,7 @@ def lookup_model(repo_id: str) -> dict:
|
||||||
else:
|
else:
|
||||||
compatible = False
|
compatible = False
|
||||||
warning = (
|
warning = (
|
||||||
f"\"{pipeline_tag}\" models are not supported by Avocet's email classification adapters. "
|
f"\"{pipeline_tag}\" models are not yet supported by the CircuitForge model ecosystem. "
|
||||||
f"Supported task types: {_supported}."
|
f"Supported task types: {_supported}."
|
||||||
)
|
)
|
||||||
logger.warning("Unsupported pipeline_tag %r for %s", pipeline_tag, repo_id)
|
logger.warning("Unsupported pipeline_tag %r for %s", pipeline_tag, repo_id)
|
||||||
|
|
@ -234,6 +544,8 @@ def lookup_model(repo_id: str) -> dict:
|
||||||
"repo_id": repo_id,
|
"repo_id": repo_id,
|
||||||
"pipeline_tag": pipeline_tag,
|
"pipeline_tag": pipeline_tag,
|
||||||
"adapter_recommendation": adapter_recommendation,
|
"adapter_recommendation": adapter_recommendation,
|
||||||
|
"role": role,
|
||||||
|
"service": service,
|
||||||
"compatible": compatible,
|
"compatible": compatible,
|
||||||
"warning": warning,
|
"warning": warning,
|
||||||
"model_size_bytes": model_size_bytes,
|
"model_size_bytes": model_size_bytes,
|
||||||
|
|
@ -261,12 +573,18 @@ class QueueAddRequest(BaseModel):
|
||||||
repo_id: str
|
repo_id: str
|
||||||
pipeline_tag: str | None = None
|
pipeline_tag: str | None = None
|
||||||
adapter_recommendation: str | None = None
|
adapter_recommendation: str | None = None
|
||||||
|
role: str | None = None
|
||||||
|
service: str | None = None
|
||||||
|
# Sum of file sizes from HF API siblings list; 0 if unknown.
|
||||||
|
# Stored in the queue entry so approve can pass it to _run_download
|
||||||
|
# without a second HF API round-trip.
|
||||||
|
model_size_bytes: int = 0
|
||||||
|
|
||||||
|
|
||||||
@router.post("/queue", status_code=201)
|
@router.post("/queue", status_code=201)
|
||||||
def add_to_queue(req: QueueAddRequest) -> dict:
|
def add_to_queue(req: QueueAddRequest) -> dict:
|
||||||
"""Add a model to the approval queue with status 'pending'."""
|
"""Add a model to the approval queue with status 'pending'."""
|
||||||
if _is_installed(req.repo_id):
|
if _is_installed(req.repo_id, service=req.service):
|
||||||
raise HTTPException(409, f"{req.repo_id!r} is already installed")
|
raise HTTPException(409, f"{req.repo_id!r} is already installed")
|
||||||
if _is_queued(req.repo_id):
|
if _is_queued(req.repo_id):
|
||||||
raise HTTPException(409, f"{req.repo_id!r} is already in the queue")
|
raise HTTPException(409, f"{req.repo_id!r} is already in the queue")
|
||||||
|
|
@ -276,6 +594,9 @@ def add_to_queue(req: QueueAddRequest) -> dict:
|
||||||
"repo_id": req.repo_id,
|
"repo_id": req.repo_id,
|
||||||
"pipeline_tag": req.pipeline_tag,
|
"pipeline_tag": req.pipeline_tag,
|
||||||
"adapter_recommendation": req.adapter_recommendation,
|
"adapter_recommendation": req.adapter_recommendation,
|
||||||
|
"role": req.role,
|
||||||
|
"service": req.service,
|
||||||
|
"model_size_bytes": req.model_size_bytes,
|
||||||
"status": "pending",
|
"status": "pending",
|
||||||
"queued_at": datetime.now(timezone.utc).isoformat(),
|
"queued_at": datetime.now(timezone.utc).isoformat(),
|
||||||
}
|
}
|
||||||
|
|
@ -300,7 +621,15 @@ def approve_queue_entry(entry_id: str) -> dict:
|
||||||
|
|
||||||
thread = threading.Thread(
|
thread = threading.Thread(
|
||||||
target=_run_download,
|
target=_run_download,
|
||||||
args=(entry_id, entry["repo_id"], entry.get("pipeline_tag"), entry.get("adapter_recommendation")),
|
args=(
|
||||||
|
entry_id,
|
||||||
|
entry["repo_id"],
|
||||||
|
entry.get("pipeline_tag"),
|
||||||
|
entry.get("adapter_recommendation"),
|
||||||
|
entry.get("role"),
|
||||||
|
entry.get("service"),
|
||||||
|
entry.get("model_size_bytes", 0),
|
||||||
|
),
|
||||||
daemon=True,
|
daemon=True,
|
||||||
name=f"model-download-{entry_id}",
|
name=f"model-download-{entry_id}",
|
||||||
)
|
)
|
||||||
|
|
@ -368,18 +697,104 @@ def download_stream() -> StreamingResponse:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── POST /sync-catalogs ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@router.post("/sync-catalogs")
|
||||||
|
def sync_catalogs() -> dict:
|
||||||
|
"""Scan all installed cf-text models and register any missing from node YAMLs.
|
||||||
|
|
||||||
|
Reads model_info.json from each directory in the cf-text models dir and calls
|
||||||
|
_register_in_node_catalogs() for each. Idempotent — skips models already
|
||||||
|
present by key or path.
|
||||||
|
|
||||||
|
Returns a summary of registrations performed.
|
||||||
|
"""
|
||||||
|
if not _CF_TEXT_MODELS_DIR.exists():
|
||||||
|
return {"registered": {}, "skipped": [], "message": "cf-text models dir not found"}
|
||||||
|
|
||||||
|
registered: dict[str, list[str]] = {}
|
||||||
|
skipped: list[str] = []
|
||||||
|
|
||||||
|
for model_dir in sorted(_CF_TEXT_MODELS_DIR.iterdir()):
|
||||||
|
if not model_dir.is_dir():
|
||||||
|
continue
|
||||||
|
info_file = model_dir / "model_info.json"
|
||||||
|
if not info_file.exists():
|
||||||
|
skipped.append(model_dir.name)
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
info = json.loads(info_file.read_text(encoding="utf-8"))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Could not read model_info.json for %s: %s", model_dir.name, exc)
|
||||||
|
skipped.append(model_dir.name)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if info.get("service") != "cf-text":
|
||||||
|
skipped.append(model_dir.name)
|
||||||
|
continue
|
||||||
|
|
||||||
|
repo_id = info.get("repo_id", model_dir.name)
|
||||||
|
vram_mb = info.get("vram_mb", 0)
|
||||||
|
role = info.get("role", "generator")
|
||||||
|
|
||||||
|
updated_nodes = _register_in_node_catalogs(
|
||||||
|
repo_id=repo_id,
|
||||||
|
local_path=model_dir,
|
||||||
|
vram_mb_fp16=vram_mb,
|
||||||
|
role=role,
|
||||||
|
)
|
||||||
|
if updated_nodes:
|
||||||
|
registered[repo_id] = updated_nodes
|
||||||
|
else:
|
||||||
|
skipped.append(repo_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"registered": registered,
|
||||||
|
"skipped": skipped,
|
||||||
|
"message": (
|
||||||
|
f"Registered {len(registered)} model(s) on "
|
||||||
|
f"{sum(len(v) for v in registered.values())} node(s)"
|
||||||
|
if registered
|
||||||
|
else "All models already registered (or no eligible nodes found)"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# ── GET /installed ─────────────────────────────────────────────────────────────
|
# ── GET /installed ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@router.get("/installed")
|
@router.get("/installed")
|
||||||
def list_installed() -> list[dict]:
|
def list_installed() -> list[dict]:
|
||||||
"""Scan _MODELS_DIR and return info on each installed model."""
|
"""Scan all model directories and return info on each installed model.
|
||||||
if not _MODELS_DIR.exists():
|
|
||||||
return []
|
Scans both the local avocet models dir (classifiers, fine-tunes) and the
|
||||||
|
shared NFS cf-text models dir, deduplicating by directory path.
|
||||||
|
|
||||||
|
Falls back to queue entry data when model_info.json has null service/role,
|
||||||
|
so models downloaded before the pipeline_tag registry existed still group
|
||||||
|
correctly in the UI.
|
||||||
|
"""
|
||||||
|
scan_dirs = [_MODELS_DIR]
|
||||||
|
if _CF_TEXT_MODELS_DIR != _MODELS_DIR and _CF_TEXT_MODELS_DIR.exists():
|
||||||
|
scan_dirs.append(_CF_TEXT_MODELS_DIR)
|
||||||
|
|
||||||
|
# Build a lookup from safe directory name → queue entry for fallback enrichment.
|
||||||
|
queue_by_safe_name: dict[str, dict] = {
|
||||||
|
_safe_model_name(r["repo_id"]): r
|
||||||
|
for r in _read_queue()
|
||||||
|
if r.get("repo_id") and r.get("status") not in ("dismissed",)
|
||||||
|
}
|
||||||
|
|
||||||
results: list[dict] = []
|
results: list[dict] = []
|
||||||
for sub in _MODELS_DIR.iterdir():
|
seen: set[Path] = set()
|
||||||
if not sub.is_dir():
|
|
||||||
|
for scan_dir in scan_dirs:
|
||||||
|
if not scan_dir.exists():
|
||||||
continue
|
continue
|
||||||
|
for sub in scan_dir.iterdir():
|
||||||
|
if not sub.is_dir() or sub in seen:
|
||||||
|
continue
|
||||||
|
seen.add(sub)
|
||||||
|
|
||||||
has_training_info = (sub / "training_info.json").exists()
|
has_training_info = (sub / "training_info.json").exists()
|
||||||
has_config = (sub / "config.json").exists()
|
has_config = (sub / "config.json").exists()
|
||||||
|
|
@ -393,15 +808,20 @@ def list_installed() -> list[dict]:
|
||||||
# Compute directory size
|
# Compute directory size
|
||||||
size_bytes = sum(f.stat().st_size for f in sub.rglob("*") if f.is_file())
|
size_bytes = sum(f.stat().st_size for f in sub.rglob("*") if f.is_file())
|
||||||
|
|
||||||
# Load adapter/model_id from model_info.json or training_info.json
|
|
||||||
adapter: str | None = None
|
adapter: str | None = None
|
||||||
model_id: str | None = None
|
model_id: str | None = None
|
||||||
|
role: str | None = None
|
||||||
|
service: str | None = None
|
||||||
|
vram_mb: int | None = None
|
||||||
|
|
||||||
if has_model_info:
|
if has_model_info:
|
||||||
try:
|
try:
|
||||||
info = json.loads((sub / "model_info.json").read_text(encoding="utf-8"))
|
info = json.loads((sub / "model_info.json").read_text(encoding="utf-8"))
|
||||||
adapter = info.get("adapter_recommendation")
|
adapter = info.get("adapter_recommendation")
|
||||||
model_id = info.get("repo_id")
|
model_id = info.get("repo_id")
|
||||||
|
role = info.get("role")
|
||||||
|
service = info.get("service")
|
||||||
|
vram_mb = info.get("vram_mb")
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
elif has_training_info:
|
elif has_training_info:
|
||||||
|
|
@ -409,40 +829,154 @@ def list_installed() -> list[dict]:
|
||||||
info = json.loads((sub / "training_info.json").read_text(encoding="utf-8"))
|
info = json.loads((sub / "training_info.json").read_text(encoding="utf-8"))
|
||||||
adapter = info.get("adapter")
|
adapter = info.get("adapter")
|
||||||
model_id = info.get("base_model") or info.get("model_id")
|
model_id = info.get("base_model") or info.get("model_id")
|
||||||
|
role = info.get("role", "classifier")
|
||||||
|
service = info.get("service", "avocet")
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Fall back to queue entry when model_info.json has null service/role.
|
||||||
|
# This covers models downloaded before the pipeline_tag registry existed.
|
||||||
|
if (role is None or service is None) and sub.name in queue_by_safe_name:
|
||||||
|
q = queue_by_safe_name[sub.name]
|
||||||
|
role = role or q.get("role")
|
||||||
|
service = service or q.get("service")
|
||||||
|
model_id = model_id or q.get("repo_id")
|
||||||
|
|
||||||
|
# Last resort: re-derive from pipeline_tag if we still have no service.
|
||||||
|
if service is None and model_id:
|
||||||
|
hf_url = f"https://huggingface.co/api/models/{model_id}"
|
||||||
|
# Only attempt if we have a pipeline_tag cached somewhere.
|
||||||
|
for q in queue_by_safe_name.values():
|
||||||
|
if q.get("repo_id") == model_id and q.get("pipeline_tag"):
|
||||||
|
tag_info = _TAG_TO_INFO.get(q["pipeline_tag"])
|
||||||
|
if tag_info:
|
||||||
|
role = role or tag_info["role"]
|
||||||
|
service = service or tag_info["service"]
|
||||||
|
break
|
||||||
|
|
||||||
results.append({
|
results.append({
|
||||||
"name": sub.name,
|
"name": sub.name,
|
||||||
"path": str(sub),
|
"path": str(sub),
|
||||||
"type": model_type,
|
"type": model_type,
|
||||||
"adapter": adapter,
|
"adapter": adapter,
|
||||||
|
"role": role,
|
||||||
|
"service": service,
|
||||||
"size_bytes": size_bytes,
|
"size_bytes": size_bytes,
|
||||||
|
"vram_mb": vram_mb,
|
||||||
"model_id": model_id,
|
"model_id": model_id,
|
||||||
})
|
})
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
# ── PATCH /installed/{name} ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class InstalledModelPatch(BaseModel):
|
||||||
|
service: str
|
||||||
|
role: str
|
||||||
|
|
||||||
|
|
||||||
|
@router.patch("/installed/{name}")
|
||||||
|
def patch_installed(name: str, body: InstalledModelPatch) -> dict:
|
||||||
|
"""Manually assign service and role to an installed model.
|
||||||
|
|
||||||
|
Writes the updated values back to model_info.json so they survive restarts,
|
||||||
|
and updates any matching queue entry so the UI shows the correct chip.
|
||||||
|
"""
|
||||||
|
if "/" in name or "\\" in name or ".." in name or not name or name.startswith("."):
|
||||||
|
raise HTTPException(400, f"Invalid model name {name!r}")
|
||||||
|
|
||||||
|
candidate_dirs = [_MODELS_DIR]
|
||||||
|
if _CF_TEXT_MODELS_DIR != _MODELS_DIR:
|
||||||
|
candidate_dirs.append(_CF_TEXT_MODELS_DIR)
|
||||||
|
|
||||||
|
model_path: Path | None = None
|
||||||
|
for base in candidate_dirs:
|
||||||
|
candidate = base / name
|
||||||
|
try:
|
||||||
|
candidate.resolve().relative_to(base.resolve())
|
||||||
|
except ValueError:
|
||||||
|
raise HTTPException(400, f"Path traversal detected for name {name!r}")
|
||||||
|
if candidate.exists():
|
||||||
|
model_path = candidate
|
||||||
|
break
|
||||||
|
|
||||||
|
if model_path is None:
|
||||||
|
raise HTTPException(404, f"Installed model {name!r} not found")
|
||||||
|
|
||||||
|
info_path = model_path / "model_info.json"
|
||||||
|
if info_path.exists():
|
||||||
|
try:
|
||||||
|
info = json.loads(info_path.read_text(encoding="utf-8"))
|
||||||
|
except Exception:
|
||||||
|
info = {}
|
||||||
|
else:
|
||||||
|
info = {}
|
||||||
|
|
||||||
|
info["service"] = body.service
|
||||||
|
info["role"] = body.role
|
||||||
|
info_path.write_text(json.dumps(info, indent=2), encoding="utf-8")
|
||||||
|
|
||||||
|
# Mirror the update into any matching queue entry.
|
||||||
|
records = _read_queue()
|
||||||
|
updated = False
|
||||||
|
for r in records:
|
||||||
|
local = r.get("local_path", "")
|
||||||
|
matches = (local and Path(local).name == name) or _safe_model_name(r.get("repo_id", "")) == name
|
||||||
|
if matches and r.get("status") not in ("dismissed",):
|
||||||
|
r["service"] = body.service
|
||||||
|
r["role"] = body.role
|
||||||
|
updated = True
|
||||||
|
if updated:
|
||||||
|
_write_queue(records)
|
||||||
|
|
||||||
|
return {"ok": True, "service": body.service, "role": body.role}
|
||||||
|
|
||||||
|
|
||||||
# ── DELETE /installed/{name} ───────────────────────────────────────────────────
|
# ── DELETE /installed/{name} ───────────────────────────────────────────────────
|
||||||
|
|
||||||
@router.delete("/installed/{name}")
|
@router.delete("/installed/{name}")
|
||||||
def delete_installed(name: str) -> dict:
|
def delete_installed(name: str) -> dict:
|
||||||
"""Remove an installed model directory by name. Blocks path traversal."""
|
"""Remove an installed model directory by name. Blocks path traversal.
|
||||||
# Validate: single path component, no slashes or '..'
|
|
||||||
|
Searches both the local avocet models dir and the shared cf-text models dir.
|
||||||
|
Also dismisses any matching queue entry so the UI doesn't show a stale "ready" card.
|
||||||
|
"""
|
||||||
if "/" in name or "\\" in name or ".." in name or not name or name.startswith("."):
|
if "/" in name or "\\" in name or ".." in name or not name or name.startswith("."):
|
||||||
raise HTTPException(400, f"Invalid model name {name!r}: must be a single directory name with no path separators or '..'")
|
raise HTTPException(400, f"Invalid model name {name!r}: must be a single directory name with no path separators or '..'")
|
||||||
|
|
||||||
model_path = _MODELS_DIR / name
|
# Search both model directories
|
||||||
|
candidate_dirs = [_MODELS_DIR]
|
||||||
|
if _CF_TEXT_MODELS_DIR != _MODELS_DIR:
|
||||||
|
candidate_dirs.append(_CF_TEXT_MODELS_DIR)
|
||||||
|
|
||||||
# Extra safety: confirm resolved path is inside _MODELS_DIR
|
model_path: Path | None = None
|
||||||
|
for base in candidate_dirs:
|
||||||
|
candidate = base / name
|
||||||
try:
|
try:
|
||||||
model_path.resolve().relative_to(_MODELS_DIR.resolve())
|
candidate.resolve().relative_to(base.resolve())
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise HTTPException(400, f"Path traversal detected for name {name!r}")
|
raise HTTPException(400, f"Path traversal detected for name {name!r}")
|
||||||
|
if candidate.exists():
|
||||||
|
model_path = candidate
|
||||||
|
break
|
||||||
|
|
||||||
if not model_path.exists():
|
if model_path is None:
|
||||||
raise HTTPException(404, f"Installed model {name!r} not found")
|
raise HTTPException(404, f"Installed model {name!r} not found in any model directory")
|
||||||
|
|
||||||
shutil.rmtree(model_path)
|
shutil.rmtree(model_path)
|
||||||
|
|
||||||
|
# Dismiss any queue entries whose local_path matches, or whose repo_id maps to this dir name.
|
||||||
|
records = _read_queue()
|
||||||
|
updated = False
|
||||||
|
for r in records:
|
||||||
|
local = r.get("local_path", "")
|
||||||
|
matches_path = local and Path(local).name == name
|
||||||
|
matches_name = _safe_model_name(r.get("repo_id", "")) == name
|
||||||
|
if (matches_path or matches_name) and r.get("status") != "dismissed":
|
||||||
|
r["status"] = "dismissed"
|
||||||
|
updated = True
|
||||||
|
if updated:
|
||||||
|
_write_queue(records)
|
||||||
|
|
||||||
return {"ok": True}
|
return {"ok": True}
|
||||||
|
|
|
||||||
|
|
@ -57,11 +57,32 @@ imitate:
|
||||||
- id: peregrine
|
- id: peregrine
|
||||||
name: Peregrine
|
name: Peregrine
|
||||||
icon: "🦅"
|
icon: "🦅"
|
||||||
description: Job search assistant
|
description: Job search assistant — live job listings
|
||||||
base_url: http://localhost:8502
|
base_url: http://localhost:8601
|
||||||
sample_endpoint: /api/jobs
|
health_path: /api/jobs/counts
|
||||||
text_fields: [title, description]
|
sample_endpoint: /api/jobs?status=pending&limit=5
|
||||||
prompt_template: "Analyze this job listing and identify key requirements:\n\n{text}"
|
text_fields: [title, company, description]
|
||||||
|
prompt_template: "Analyze this job listing and identify the key requirements, must-have skills, and any culture signals that would help tailor an application:\n\n{text}"
|
||||||
|
|
||||||
|
- id: osprey
|
||||||
|
name: Osprey
|
||||||
|
icon: "📞"
|
||||||
|
description: Gov't hold-line automation — recent call records
|
||||||
|
base_url: http://localhost:8520
|
||||||
|
health_path: /api/health
|
||||||
|
sample_endpoint: /api/calls/recent
|
||||||
|
text_fields: [agency, issue, notes]
|
||||||
|
prompt_template: "Draft a clear, professional follow-up letter for this government hold-line call. Include what was discussed, what action the agency committed to, and a polite deadline for response:\n\n{text}"
|
||||||
|
|
||||||
|
- id: linnet
|
||||||
|
name: Linnet
|
||||||
|
icon: "🐦"
|
||||||
|
description: Real-time tone annotation — Elcor-style subtext for ND users
|
||||||
|
base_url: http://localhost:8522
|
||||||
|
health_path: /health
|
||||||
|
sample_endpoint: /samples
|
||||||
|
text_fields: [text, context]
|
||||||
|
prompt_template: "Annotate the emotional tone and subtext of the following text using explicit Elcor-style markers (e.g. [SINCERELY], [UNCERTAIN], [FRUSTRATED]). Identify implied emotions, potential sarcasm, and any ambiguity that might be misread by neurodivergent readers:\n\n{text}"
|
||||||
|
|
||||||
- id: kiwi
|
- id: kiwi
|
||||||
name: Kiwi
|
name: Kiwi
|
||||||
|
|
|
||||||
26
manage.sh
26
manage.sh
|
|
@ -90,6 +90,12 @@ usage() {
|
||||||
echo -e " ${GREEN}score [args]${NC} Shortcut: --score [args]"
|
echo -e " ${GREEN}score [args]${NC} Shortcut: --score [args]"
|
||||||
echo -e " ${GREEN}compare [args]${NC} Shortcut: --compare [args]"
|
echo -e " ${GREEN}compare [args]${NC} Shortcut: --compare [args]"
|
||||||
echo ""
|
echo ""
|
||||||
|
echo " Writing Style Benchmark:"
|
||||||
|
echo -e " ${GREEN}style-bench [args]${NC} Run benchmark_style.py (args passed through)"
|
||||||
|
echo -e " ${GREEN}style-list${NC} List available ollama models for style bench"
|
||||||
|
echo -e " ${GREEN}style-run [args]${NC} Run writing style benchmark (--models, --samples, --include-large, --scan-disk PATH, --cforch)"
|
||||||
|
echo -e " ${GREEN}style-last${NC} Print most recent writing style benchmark report"
|
||||||
|
echo ""
|
||||||
echo " Dev:"
|
echo " Dev:"
|
||||||
echo -e " ${GREEN}dev${NC} Hot-reload: uvicorn --reload (:8503) + Vite HMR (:5173)"
|
echo -e " ${GREEN}dev${NC} Hot-reload: uvicorn --reload (:8503) + Vite HMR (:5173)"
|
||||||
echo -e " ${GREEN}test${NC} Run pytest suite"
|
echo -e " ${GREEN}test${NC} Run pytest suite"
|
||||||
|
|
@ -249,6 +255,26 @@ case "$CMD" in
|
||||||
exec "$0" benchmark --compare "$@"
|
exec "$0" benchmark --compare "$@"
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
style-bench)
|
||||||
|
info "Running writing style benchmark (${ENV_BM})…"
|
||||||
|
if [[ ! -x "$PYTHON_BM" ]]; then
|
||||||
|
error "Python not found in ${ENV_BM} env at ${PYTHON_BM}"
|
||||||
|
fi
|
||||||
|
"$PYTHON_BM" scripts/benchmark_style.py "$@"
|
||||||
|
;;
|
||||||
|
|
||||||
|
style-list)
|
||||||
|
exec "$0" style-bench --list-models
|
||||||
|
;;
|
||||||
|
|
||||||
|
style-run)
|
||||||
|
exec "$0" style-bench --run "$@"
|
||||||
|
;;
|
||||||
|
|
||||||
|
style-last)
|
||||||
|
exec "$0" style-bench --show-last
|
||||||
|
;;
|
||||||
|
|
||||||
help|--help|-h)
|
help|--help|-h)
|
||||||
usage
|
usage
|
||||||
;;
|
;;
|
||||||
|
|
|
||||||
|
|
@ -122,17 +122,88 @@ def test_lookup_returns_correct_shape(client):
|
||||||
assert data["already_queued"] is False
|
assert data["already_queued"] is False
|
||||||
|
|
||||||
|
|
||||||
def test_lookup_unknown_pipeline_tag_returns_null_adapter(client):
|
def test_lookup_unknown_pipeline_tag_returns_null_adapter_and_incompatible(client):
|
||||||
"""An unrecognised pipeline_tag yields adapter_recommendation=null."""
|
"""An unrecognised pipeline_tag yields adapter_recommendation=null and compatible=False."""
|
||||||
mock_resp = MagicMock()
|
mock_resp = MagicMock()
|
||||||
mock_resp.status_code = 200
|
mock_resp.status_code = 200
|
||||||
mock_resp.json.return_value = _make_hf_response("org/m", "audio-classification")
|
mock_resp.json.return_value = _make_hf_response("org/m", "reinforcement-learning")
|
||||||
|
|
||||||
with patch("app.models.httpx.get", return_value=mock_resp):
|
with patch("app.models.httpx.get", return_value=mock_resp):
|
||||||
r = client.get("/api/models/lookup", params={"repo_id": "org/m"})
|
r = client.get("/api/models/lookup", params={"repo_id": "org/m"})
|
||||||
|
|
||||||
assert r.status_code == 200
|
assert r.status_code == 200
|
||||||
assert r.json()["adapter_recommendation"] is None
|
data = r.json()
|
||||||
|
assert data["adapter_recommendation"] is None
|
||||||
|
assert data["compatible"] is False
|
||||||
|
assert data["role"] is None
|
||||||
|
assert data["service"] is None
|
||||||
|
assert "CircuitForge model ecosystem" in data["warning"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_lookup_stt_tag_returns_compatible_with_cf_stt_service(client):
|
||||||
|
"""automatic-speech-recognition tag yields compatible=True, service=cf-stt."""
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.status_code = 200
|
||||||
|
mock_resp.json.return_value = _make_hf_response("openai/whisper-base", "automatic-speech-recognition")
|
||||||
|
|
||||||
|
with patch("app.models.httpx.get", return_value=mock_resp):
|
||||||
|
r = client.get("/api/models/lookup", params={"repo_id": "openai/whisper-base"})
|
||||||
|
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
assert data["compatible"] is True
|
||||||
|
assert data["adapter_recommendation"] is None
|
||||||
|
assert data["role"] == "stt"
|
||||||
|
assert data["service"] == "cf-stt"
|
||||||
|
assert data["warning"] is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_lookup_vision_tag_returns_compatible_with_cf_vision_service(client):
|
||||||
|
"""image-classification tag yields compatible=True, service=cf-vision."""
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.status_code = 200
|
||||||
|
mock_resp.json.return_value = _make_hf_response("google/siglip-base", "image-classification")
|
||||||
|
|
||||||
|
with patch("app.models.httpx.get", return_value=mock_resp):
|
||||||
|
r = client.get("/api/models/lookup", params={"repo_id": "google/siglip-base"})
|
||||||
|
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
assert data["compatible"] is True
|
||||||
|
assert data["role"] == "vision"
|
||||||
|
assert data["service"] == "cf-vision"
|
||||||
|
|
||||||
|
|
||||||
|
def test_lookup_audio_classification_tag_returns_cf_voice_service(client):
|
||||||
|
"""audio-classification tag yields compatible=True, service=cf-voice."""
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.status_code = 200
|
||||||
|
mock_resp.json.return_value = _make_hf_response("org/audio-model", "audio-classification")
|
||||||
|
|
||||||
|
with patch("app.models.httpx.get", return_value=mock_resp):
|
||||||
|
r = client.get("/api/models/lookup", params={"repo_id": "org/audio-model"})
|
||||||
|
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
assert data["compatible"] is True
|
||||||
|
assert data["role"] == "classifier"
|
||||||
|
assert data["service"] == "cf-voice"
|
||||||
|
|
||||||
|
|
||||||
|
def test_lookup_embedding_tag_returns_compatible_with_cf_core_service(client):
|
||||||
|
"""feature-extraction tag yields compatible=True, service=cf-core."""
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.status_code = 200
|
||||||
|
mock_resp.json.return_value = _make_hf_response("BAAI/bge-small-en", "feature-extraction")
|
||||||
|
|
||||||
|
with patch("app.models.httpx.get", return_value=mock_resp):
|
||||||
|
r = client.get("/api/models/lookup", params={"repo_id": "BAAI/bge-small-en"})
|
||||||
|
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
assert data["compatible"] is True
|
||||||
|
assert data["role"] == "embedding"
|
||||||
|
assert data["service"] == "cf-core"
|
||||||
|
|
||||||
|
|
||||||
def test_lookup_already_queued_flag(client):
|
def test_lookup_already_queued_flag(client):
|
||||||
|
|
@ -181,6 +252,26 @@ def test_queue_add_returns_entry_fields(client):
|
||||||
assert entry["adapter_recommendation"] == "ZeroShotAdapter"
|
assert entry["adapter_recommendation"] == "ZeroShotAdapter"
|
||||||
|
|
||||||
|
|
||||||
|
def test_queue_preserves_role_and_service(client):
|
||||||
|
"""POST /queue with role/service fields round-trips them through GET /queue."""
|
||||||
|
r = client.post("/api/models/queue", json={
|
||||||
|
"repo_id": "openai/whisper-base",
|
||||||
|
"pipeline_tag": "automatic-speech-recognition",
|
||||||
|
"adapter_recommendation": None,
|
||||||
|
"role": "stt",
|
||||||
|
"service": "cf-stt",
|
||||||
|
})
|
||||||
|
assert r.status_code == 201
|
||||||
|
entry = r.json()
|
||||||
|
assert entry["role"] == "stt"
|
||||||
|
assert entry["service"] == "cf-stt"
|
||||||
|
|
||||||
|
r2 = client.get("/api/models/queue")
|
||||||
|
items = r2.json()
|
||||||
|
assert items[0]["role"] == "stt"
|
||||||
|
assert items[0]["service"] == "cf-stt"
|
||||||
|
|
||||||
|
|
||||||
# ── POST /queue — 409 duplicate ────────────────────────────────────────────────
|
# ── POST /queue — 409 duplicate ────────────────────────────────────────────────
|
||||||
|
|
||||||
def test_queue_duplicate_returns_409(client):
|
def test_queue_duplicate_returns_409(client):
|
||||||
|
|
@ -317,7 +408,12 @@ def test_installed_detects_downloaded_model(client, tmp_path):
|
||||||
model_dir.mkdir()
|
model_dir.mkdir()
|
||||||
(model_dir / "config.json").write_text(json.dumps({"model_type": "bert"}), encoding="utf-8")
|
(model_dir / "config.json").write_text(json.dumps({"model_type": "bert"}), encoding="utf-8")
|
||||||
(model_dir / "model_info.json").write_text(
|
(model_dir / "model_info.json").write_text(
|
||||||
json.dumps({"repo_id": "org/mymodel", "adapter_recommendation": "ZeroShotAdapter"}),
|
json.dumps({
|
||||||
|
"repo_id": "org/mymodel",
|
||||||
|
"adapter_recommendation": "ZeroShotAdapter",
|
||||||
|
"role": "classifier",
|
||||||
|
"service": "avocet",
|
||||||
|
}),
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -329,6 +425,51 @@ def test_installed_detects_downloaded_model(client, tmp_path):
|
||||||
assert items[0]["name"] == "org--mymodel"
|
assert items[0]["name"] == "org--mymodel"
|
||||||
assert items[0]["adapter"] == "ZeroShotAdapter"
|
assert items[0]["adapter"] == "ZeroShotAdapter"
|
||||||
assert items[0]["model_id"] == "org/mymodel"
|
assert items[0]["model_id"] == "org/mymodel"
|
||||||
|
assert items[0]["role"] == "classifier"
|
||||||
|
assert items[0]["service"] == "avocet"
|
||||||
|
|
||||||
|
|
||||||
|
def test_installed_stt_model_surfaces_role_and_service(client):
|
||||||
|
"""A downloaded STT model's role/service are returned by GET /installed."""
|
||||||
|
from app import models as models_module
|
||||||
|
|
||||||
|
model_dir = models_module._MODELS_DIR / "openai--whisper-base"
|
||||||
|
model_dir.mkdir()
|
||||||
|
(model_dir / "config.json").write_text(json.dumps({"model_type": "whisper"}), encoding="utf-8")
|
||||||
|
(model_dir / "model_info.json").write_text(
|
||||||
|
json.dumps({
|
||||||
|
"repo_id": "openai/whisper-base",
|
||||||
|
"adapter_recommendation": None,
|
||||||
|
"role": "stt",
|
||||||
|
"service": "cf-stt",
|
||||||
|
}),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
r = client.get("/api/models/installed")
|
||||||
|
assert r.status_code == 200
|
||||||
|
items = r.json()
|
||||||
|
assert items[0]["role"] == "stt"
|
||||||
|
assert items[0]["service"] == "cf-stt"
|
||||||
|
assert items[0]["adapter"] is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_installed_finetuned_model_defaults_to_avocet_service(client):
|
||||||
|
"""Fine-tuned models with no role/service in training_info default to avocet/classifier."""
|
||||||
|
from app import models as models_module
|
||||||
|
|
||||||
|
model_dir = models_module._MODELS_DIR / "my-finetuned-v2"
|
||||||
|
model_dir.mkdir()
|
||||||
|
(model_dir / "training_info.json").write_text(
|
||||||
|
json.dumps({"base_model": "microsoft/deberta-v3-base", "epochs": 3}),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
r = client.get("/api/models/installed")
|
||||||
|
assert r.status_code == 200
|
||||||
|
items = r.json()
|
||||||
|
assert items[0]["role"] == "classifier"
|
||||||
|
assert items[0]["service"] == "avocet"
|
||||||
|
|
||||||
|
|
||||||
def test_installed_detects_finetuned_model(client):
|
def test_installed_detects_finetuned_model(client):
|
||||||
|
|
|
||||||
4
web/.gitignore
vendored
4
web/.gitignore
vendored
|
|
@ -22,3 +22,7 @@ dist-ssr
|
||||||
*.njsproj
|
*.njsproj
|
||||||
*.sln
|
*.sln
|
||||||
*.sw?
|
*.sw?
|
||||||
|
|
||||||
|
# Local environment overrides
|
||||||
|
.env
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,12 @@
|
||||||
<span v-if="lookupResult.pipeline_tag" class="chip chip-pipeline">
|
<span v-if="lookupResult.pipeline_tag" class="chip chip-pipeline">
|
||||||
{{ lookupResult.pipeline_tag }}
|
{{ lookupResult.pipeline_tag }}
|
||||||
</span>
|
</span>
|
||||||
|
<span v-if="lookupResult.role" class="chip chip-role">
|
||||||
|
{{ lookupResult.role }}
|
||||||
|
</span>
|
||||||
|
<span v-if="lookupResult.service" class="chip" :class="serviceChipClass(lookupResult.service)">
|
||||||
|
{{ lookupResult.service }}
|
||||||
|
</span>
|
||||||
<span v-if="lookupResult.adapter_recommendation" class="chip chip-adapter">
|
<span v-if="lookupResult.adapter_recommendation" class="chip chip-adapter">
|
||||||
{{ lookupResult.adapter_recommendation }}
|
{{ lookupResult.adapter_recommendation }}
|
||||||
</span>
|
</span>
|
||||||
|
|
@ -61,11 +67,10 @@
|
||||||
|
|
||||||
<button
|
<button
|
||||||
class="btn-primary btn-add-queue"
|
class="btn-primary btn-add-queue"
|
||||||
:class="{ 'btn-add-queue-warn': !lookupResult.compatible }"
|
|
||||||
:disabled="lookupResult.already_installed || lookupResult.already_queued || addingToQueue"
|
:disabled="lookupResult.already_installed || lookupResult.already_queued || addingToQueue"
|
||||||
@click="addToQueue"
|
@click="addToQueue"
|
||||||
>
|
>
|
||||||
{{ addingToQueue ? 'Adding…' : lookupResult.compatible ? 'Add to queue' : 'Add anyway' }}
|
{{ addingToQueue ? 'Adding…' : 'Add to queue' }}
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
|
|
@ -91,6 +96,8 @@
|
||||||
</div>
|
</div>
|
||||||
<div class="model-meta">
|
<div class="model-meta">
|
||||||
<span v-if="model.pipeline_tag" class="chip chip-pipeline">{{ model.pipeline_tag }}</span>
|
<span v-if="model.pipeline_tag" class="chip chip-pipeline">{{ model.pipeline_tag }}</span>
|
||||||
|
<span v-if="model.role" class="chip chip-role">{{ model.role }}</span>
|
||||||
|
<span v-if="model.service" class="chip" :class="serviceChipClass(model.service)">{{ model.service }}</span>
|
||||||
<span v-if="model.adapter_recommendation" class="chip chip-adapter">{{ model.adapter_recommendation }}</span>
|
<span v-if="model.adapter_recommendation" class="chip chip-adapter">{{ model.adapter_recommendation }}</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="model-card-actions">
|
<div class="model-card-actions">
|
||||||
|
|
@ -116,6 +123,8 @@
|
||||||
</div>
|
</div>
|
||||||
<div class="model-meta">
|
<div class="model-meta">
|
||||||
<span v-if="model.pipeline_tag" class="chip chip-pipeline">{{ model.pipeline_tag }}</span>
|
<span v-if="model.pipeline_tag" class="chip chip-pipeline">{{ model.pipeline_tag }}</span>
|
||||||
|
<span v-if="model.role" class="chip chip-role">{{ model.role }}</span>
|
||||||
|
<span v-if="model.service" class="chip" :class="serviceChipClass(model.service)">{{ model.service }}</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div v-if="downloadErrors[model.id]" class="download-error" role="alert">
|
<div v-if="downloadErrors[model.id]" class="download-error" role="alert">
|
||||||
|
|
@ -124,14 +133,19 @@
|
||||||
<div v-else class="progress-wrap" :aria-label="`Download progress for ${model.repo_id}`">
|
<div v-else class="progress-wrap" :aria-label="`Download progress for ${model.repo_id}`">
|
||||||
<div
|
<div
|
||||||
class="progress-bar"
|
class="progress-bar"
|
||||||
:style="{ width: `${downloadProgress[model.id] ?? 0}%` }"
|
:style="{ width: `${downloadProgress[model.repo_id]?.pct ?? 0}%` }"
|
||||||
role="progressbar"
|
role="progressbar"
|
||||||
:aria-valuenow="downloadProgress[model.id] ?? 0"
|
:aria-valuenow="downloadProgress[model.repo_id]?.pct ?? 0"
|
||||||
aria-valuemin="0"
|
aria-valuemin="0"
|
||||||
aria-valuemax="100"
|
aria-valuemax="100"
|
||||||
/>
|
/>
|
||||||
<span class="progress-label">
|
<span class="progress-label">
|
||||||
{{ downloadProgress[model.id] == null ? 'Preparing…' : `${downloadProgress[model.id]}%` }}
|
{{
|
||||||
|
!downloadProgress[model.repo_id] ? 'Preparing…'
|
||||||
|
: downloadProgress[model.repo_id].pct != null ? `${Math.round(downloadProgress[model.repo_id].pct!)}%`
|
||||||
|
: downloadProgress[model.repo_id].bytes > 0 ? `${(downloadProgress[model.repo_id].bytes / 1024 / 1024).toFixed(0)} MB downloaded…`
|
||||||
|
: 'Preparing…'
|
||||||
|
}}
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
@ -145,20 +159,33 @@
|
||||||
No models installed yet.
|
No models installed yet.
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div v-else class="installed-table-wrap">
|
<template v-else>
|
||||||
|
<div
|
||||||
|
v-for="group in installedByService"
|
||||||
|
:key="group.service"
|
||||||
|
class="installed-group"
|
||||||
|
>
|
||||||
|
<div class="installed-group-header">
|
||||||
|
<span class="chip" :class="serviceChipClass(group.service)">
|
||||||
|
{{ serviceLabel(group.service) }}
|
||||||
|
</span>
|
||||||
|
<span class="installed-group-count">{{ group.models.length }} model{{ group.models.length !== 1 ? 's' : '' }}</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="installed-table-wrap">
|
||||||
<table class="installed-table">
|
<table class="installed-table">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
<th>Name</th>
|
<th>Name</th>
|
||||||
<th>Type</th>
|
<th>Type</th>
|
||||||
<th>Adapter</th>
|
<th>Role</th>
|
||||||
<th>Size</th>
|
<th>Size</th>
|
||||||
<th></th>
|
<th></th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr v-for="model in installedModels" :key="model.name">
|
<tr v-for="model in group.models" :key="model.name">
|
||||||
<td class="td-name">{{ model.name }}</td>
|
<td class="td-name">{{ model.model_id ?? model.name }}</td>
|
||||||
<td>
|
<td>
|
||||||
<span
|
<span
|
||||||
class="badge"
|
class="badge"
|
||||||
|
|
@ -167,9 +194,42 @@
|
||||||
{{ model.type }}
|
{{ model.type }}
|
||||||
</span>
|
</span>
|
||||||
</td>
|
</td>
|
||||||
<td>{{ model.adapter ?? '—' }}</td>
|
|
||||||
<td>{{ humanBytes(model.size) }}</td>
|
|
||||||
<td>
|
<td>
|
||||||
|
<span v-if="model.role" class="chip chip-role chip-sm">{{ model.role }}</span>
|
||||||
|
<span v-else>—</span>
|
||||||
|
</td>
|
||||||
|
<td>{{ humanBytes(model.size_bytes) }}</td>
|
||||||
|
<td class="td-actions">
|
||||||
|
<div v-if="!model.service" class="classify-row">
|
||||||
|
<select
|
||||||
|
class="classify-select"
|
||||||
|
:value="classifyDraft[model.name]?.service ?? ''"
|
||||||
|
@change="onServiceChange(model.name, ($event.target as HTMLSelectElement).value)"
|
||||||
|
aria-label="Assign service"
|
||||||
|
>
|
||||||
|
<option value="" disabled>Service…</option>
|
||||||
|
<option v-for="svc in CLASSIFIABLE_SERVICES" :key="svc.value" :value="svc.value">{{ svc.label }}</option>
|
||||||
|
</select>
|
||||||
|
<select
|
||||||
|
class="classify-select"
|
||||||
|
:value="classifyDraft[model.name]?.role ?? ''"
|
||||||
|
:disabled="!classifyDraft[model.name]?.service"
|
||||||
|
@change="(e) => setClassifyRole(model.name, (e.target as HTMLSelectElement).value)"
|
||||||
|
aria-label="Assign role"
|
||||||
|
>
|
||||||
|
<option value="" disabled>Role…</option>
|
||||||
|
<option
|
||||||
|
v-for="role in rolesForService(classifyDraft[model.name]?.service ?? '')"
|
||||||
|
:key="role"
|
||||||
|
:value="role"
|
||||||
|
>{{ role }}</option>
|
||||||
|
</select>
|
||||||
|
<button
|
||||||
|
class="btn-primary btn-sm"
|
||||||
|
:disabled="!classifyDraft[model.name]?.service || !classifyDraft[model.name]?.role"
|
||||||
|
@click="saveClassify(model.name)"
|
||||||
|
>Save</button>
|
||||||
|
</div>
|
||||||
<button
|
<button
|
||||||
class="btn-danger btn-sm"
|
class="btn-danger btn-sm"
|
||||||
@click="deleteInstalled(model.name)"
|
@click="deleteInstalled(model.name)"
|
||||||
|
|
@ -181,6 +241,8 @@
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
</section>
|
</section>
|
||||||
</div>
|
</div>
|
||||||
</template>
|
</template>
|
||||||
|
|
@ -194,6 +256,8 @@ interface LookupResult {
|
||||||
repo_id: string
|
repo_id: string
|
||||||
pipeline_tag: string | null
|
pipeline_tag: string | null
|
||||||
adapter_recommendation: string | null
|
adapter_recommendation: string | null
|
||||||
|
role: string | null
|
||||||
|
service: string | null
|
||||||
compatible: boolean
|
compatible: boolean
|
||||||
warning: string | null
|
warning: string | null
|
||||||
size: number | null
|
size: number | null
|
||||||
|
|
@ -208,20 +272,27 @@ interface QueuedModel {
|
||||||
status: 'pending' | 'downloading' | 'done' | 'error'
|
status: 'pending' | 'downloading' | 'done' | 'error'
|
||||||
pipeline_tag: string | null
|
pipeline_tag: string | null
|
||||||
adapter_recommendation: string | null
|
adapter_recommendation: string | null
|
||||||
|
role: string | null
|
||||||
|
service: string | null
|
||||||
}
|
}
|
||||||
|
|
||||||
interface InstalledModel {
|
interface InstalledModel {
|
||||||
name: string
|
name: string
|
||||||
type: 'finetuned' | 'downloaded'
|
type: 'finetuned' | 'downloaded'
|
||||||
adapter: string | null
|
adapter: string | null
|
||||||
size: number
|
role: string | null
|
||||||
|
service: string | null
|
||||||
|
size_bytes: number
|
||||||
|
model_id: string | null
|
||||||
}
|
}
|
||||||
|
|
||||||
interface SseProgressEvent {
|
interface SseProgressEvent {
|
||||||
model_id: string
|
type: 'progress' | 'done' | 'error' | 'idle'
|
||||||
pct: number | null
|
repo_id?: string
|
||||||
status: 'progress' | 'done' | 'error'
|
pct?: number
|
||||||
message?: string
|
downloaded_bytes?: number
|
||||||
|
total_bytes?: number
|
||||||
|
error?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── State ─────────────────────────────────────────────
|
// ── State ─────────────────────────────────────────────
|
||||||
|
|
@ -235,7 +306,8 @@ const addingToQueue = ref(false)
|
||||||
const queuedModels = ref<QueuedModel[]>([])
|
const queuedModels = ref<QueuedModel[]>([])
|
||||||
const installedModels = ref<InstalledModel[]>([])
|
const installedModels = ref<InstalledModel[]>([])
|
||||||
|
|
||||||
const downloadProgress = ref<Record<string, number>>({})
|
const downloadProgress = ref<Record<string, { pct: number | null; bytes: number }>>({})
|
||||||
|
const classifyDraft = ref<Record<string, { service: string; role: string }>>({})
|
||||||
const downloadErrors = ref<Record<string, string>>({})
|
const downloadErrors = ref<Record<string, string>>({})
|
||||||
|
|
||||||
let pollInterval: ReturnType<typeof setInterval> | null = null
|
let pollInterval: ReturnType<typeof setInterval> | null = null
|
||||||
|
|
@ -251,8 +323,69 @@ const downloadingModels = computed(() =>
|
||||||
queuedModels.value.filter(m => m.status === 'downloading')
|
queuedModels.value.filter(m => m.status === 'downloading')
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const SERVICE_ORDER = ['avocet', 'cf-text', 'cf-stt', 'cf-tts', 'cf-vision', 'cf-image', 'cf-core', 'cf-voice', 'other']
|
||||||
|
|
||||||
|
const CLASSIFIABLE_SERVICES = [
|
||||||
|
{ value: 'avocet', label: 'Avocet — Email Classifiers' },
|
||||||
|
{ value: 'cf-text', label: 'cf-text — Language Models' },
|
||||||
|
{ value: 'cf-stt', label: 'cf-stt — Speech Recognition' },
|
||||||
|
{ value: 'cf-tts', label: 'cf-tts — Text to Speech' },
|
||||||
|
{ value: 'cf-vision', label: 'cf-vision — Vision / VLM' },
|
||||||
|
{ value: 'cf-image', label: 'cf-image — Image Generation' },
|
||||||
|
{ value: 'cf-core', label: 'cf-core — Embeddings' },
|
||||||
|
{ value: 'cf-voice', label: 'cf-voice — Audio Classification' },
|
||||||
|
]
|
||||||
|
|
||||||
|
const SERVICE_ROLES: Record<string, string[]> = {
|
||||||
|
'avocet': ['classifier', 'reranker'],
|
||||||
|
'cf-text': ['generator'],
|
||||||
|
'cf-stt': ['stt', 'alm'],
|
||||||
|
'cf-tts': ['tts'],
|
||||||
|
'cf-vision': ['vision', 'vlm', 'embedding'],
|
||||||
|
'cf-image': ['image-gen'],
|
||||||
|
'cf-core': ['embedding'],
|
||||||
|
'cf-voice': ['classifier'],
|
||||||
|
}
|
||||||
|
|
||||||
|
function rolesForService(service: string): string[] {
|
||||||
|
return SERVICE_ROLES[service] ?? []
|
||||||
|
}
|
||||||
|
|
||||||
|
const installedByService = computed(() => {
|
||||||
|
const grouped: Record<string, InstalledModel[]> = {}
|
||||||
|
for (const model of installedModels.value) {
|
||||||
|
const key = model.service ?? 'other'
|
||||||
|
if (!grouped[key]) grouped[key] = []
|
||||||
|
grouped[key].push(model)
|
||||||
|
}
|
||||||
|
// Return ordered sections: known services first, then anything else
|
||||||
|
const keys = [...SERVICE_ORDER.filter(s => grouped[s]), ...Object.keys(grouped).filter(k => !SERVICE_ORDER.includes(k))]
|
||||||
|
return keys.map(key => ({ service: key, models: grouped[key] }))
|
||||||
|
})
|
||||||
|
|
||||||
// ── Helpers ───────────────────────────────────────────
|
// ── Helpers ───────────────────────────────────────────
|
||||||
|
|
||||||
|
const SERVICE_LABELS: Record<string, string> = {
|
||||||
|
'avocet': 'Avocet — Email Classifiers',
|
||||||
|
'cf-text': 'cf-text — Language Models',
|
||||||
|
'cf-stt': 'cf-stt — Speech Recognition',
|
||||||
|
'cf-tts': 'cf-tts — Text to Speech',
|
||||||
|
'cf-vision': 'cf-vision — Vision / VLM',
|
||||||
|
'cf-image': 'cf-image — Image Generation',
|
||||||
|
'cf-core': 'cf-core — Embeddings',
|
||||||
|
'cf-voice': 'cf-voice — Audio Classification',
|
||||||
|
'other': 'Other — Unclassified',
|
||||||
|
}
|
||||||
|
|
||||||
|
function serviceLabel(service: string): string {
|
||||||
|
return SERVICE_LABELS[service] ?? service
|
||||||
|
}
|
||||||
|
|
||||||
|
function serviceChipClass(service: string | null): string {
|
||||||
|
if (!service) return 'chip-service-other'
|
||||||
|
return `chip-service-${service.replace(/[^a-z0-9]/g, '-')}`
|
||||||
|
}
|
||||||
|
|
||||||
function humanBytes(bytes: number | null): string {
|
function humanBytes(bytes: number | null): string {
|
||||||
if (bytes == null) return '—'
|
if (bytes == null) return '—'
|
||||||
const units = ['B', 'KB', 'MB', 'GB', 'TB']
|
const units = ['B', 'KB', 'MB', 'GB', 'TB']
|
||||||
|
|
@ -305,10 +438,11 @@ async function addToQueue() {
|
||||||
if (!lookupResult.value) return
|
if (!lookupResult.value) return
|
||||||
addingToQueue.value = true
|
addingToQueue.value = true
|
||||||
try {
|
try {
|
||||||
|
const { repo_id, pipeline_tag, adapter_recommendation, role, service } = lookupResult.value
|
||||||
const res = await fetch('/api/models/queue', {
|
const res = await fetch('/api/models/queue', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ repo_id: lookupResult.value.repo_id }),
|
body: JSON.stringify({ repo_id, pipeline_tag, adapter_recommendation, role, service }),
|
||||||
})
|
})
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
lookupResult.value = { ...lookupResult.value, already_queued: true }
|
lookupResult.value = { ...lookupResult.value, already_queued: true }
|
||||||
|
|
@ -339,12 +473,50 @@ async function dismissModel(id: string) {
|
||||||
} catch { /* ignore */ }
|
} catch { /* ignore */ }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function onServiceChange(name: string, service: string) {
|
||||||
|
const roles = SERVICE_ROLES[service] ?? []
|
||||||
|
classifyDraft.value = {
|
||||||
|
...classifyDraft.value,
|
||||||
|
[name]: { service, role: roles.length === 1 ? roles[0] : '' },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function setClassifyRole(name: string, role: string) {
|
||||||
|
classifyDraft.value = {
|
||||||
|
...classifyDraft.value,
|
||||||
|
[name]: { ...classifyDraft.value[name], role },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function saveClassify(name: string) {
|
||||||
|
const draft = classifyDraft.value[name]
|
||||||
|
if (!draft?.service || !draft?.role) return
|
||||||
|
try {
|
||||||
|
const res = await fetch(`/api/models/installed/${encodeURIComponent(name)}`, {
|
||||||
|
method: 'PATCH',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ service: draft.service, role: draft.role }),
|
||||||
|
})
|
||||||
|
if (res.ok) {
|
||||||
|
// Update in-place so the model moves to the correct service group
|
||||||
|
installedModels.value = installedModels.value.map(m =>
|
||||||
|
m.name === name ? { ...m, service: draft.service, role: draft.role } : m
|
||||||
|
)
|
||||||
|
const updated = { ...classifyDraft.value }
|
||||||
|
delete updated[name]
|
||||||
|
classifyDraft.value = updated
|
||||||
|
await loadQueue()
|
||||||
|
}
|
||||||
|
} catch { /* non-fatal */ }
|
||||||
|
}
|
||||||
|
|
||||||
async function deleteInstalled(name: string) {
|
async function deleteInstalled(name: string) {
|
||||||
if (!window.confirm(`Delete installed model "${name}"? This cannot be undone.`)) return
|
if (!window.confirm(`Delete installed model "${name}"? This cannot be undone.`)) return
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`/api/models/installed/${encodeURIComponent(name)}`, { method: 'DELETE' })
|
const res = await fetch(`/api/models/installed/${encodeURIComponent(name)}`, { method: 'DELETE' })
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
installedModels.value = installedModels.value.filter(m => m.name !== name)
|
installedModels.value = installedModels.value.filter(m => m.name !== name)
|
||||||
|
await loadQueue()
|
||||||
}
|
}
|
||||||
} catch { /* ignore */ }
|
} catch { /* ignore */ }
|
||||||
}
|
}
|
||||||
|
|
@ -378,21 +550,28 @@ function startSse() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
const { model_id, pct, status, message } = event
|
const { type, repo_id, pct, downloaded_bytes, error } = event
|
||||||
|
if (!repo_id) return
|
||||||
|
|
||||||
if (status === 'progress' && pct != null) {
|
if (type === 'progress') {
|
||||||
downloadProgress.value = { ...downloadProgress.value, [model_id]: pct }
|
const bytes = downloaded_bytes ?? 0
|
||||||
} else if (status === 'done') {
|
// pct stays null when total_bytes is unknown so we can show "X MB" instead
|
||||||
|
const progress = (pct != null && pct > 0) ? pct : (bytes > 0 ? null : undefined)
|
||||||
|
downloadProgress.value = { ...downloadProgress.value, [repo_id]: { pct: progress ?? null, bytes } }
|
||||||
|
} else if (type === 'done') {
|
||||||
const updated = { ...downloadProgress.value }
|
const updated = { ...downloadProgress.value }
|
||||||
delete updated[model_id]
|
delete updated[repo_id]
|
||||||
downloadProgress.value = updated
|
downloadProgress.value = updated
|
||||||
|
|
||||||
queuedModels.value = queuedModels.value.filter(m => m.id !== model_id)
|
queuedModels.value = queuedModels.value.filter(m => m.repo_id !== repo_id)
|
||||||
loadInstalled()
|
loadInstalled()
|
||||||
} else if (status === 'error') {
|
} else if (type === 'error') {
|
||||||
|
const entry = queuedModels.value.find(m => m.repo_id === repo_id)
|
||||||
|
if (entry) {
|
||||||
downloadErrors.value = {
|
downloadErrors.value = {
|
||||||
...downloadErrors.value,
|
...downloadErrors.value,
|
||||||
[model_id]: message ?? 'Download failed.',
|
[entry.id]: error ?? 'Download failed.',
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
@ -595,12 +774,6 @@ onUnmounted(() => {
|
||||||
align-self: flex-start;
|
align-self: flex-start;
|
||||||
}
|
}
|
||||||
|
|
||||||
.btn-add-queue-warn {
|
|
||||||
background: var(--color-surface-raised, #e4ebf5);
|
|
||||||
color: var(--color-text-secondary, #6b7a99);
|
|
||||||
border: 1px solid var(--color-border, #d0d7e8);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ── Model cards (queue + downloads) ── */
|
/* ── Model cards (queue + downloads) ── */
|
||||||
.model-card {
|
.model-card {
|
||||||
border: 1px solid var(--color-border, #a8b8d0);
|
border: 1px solid var(--color-border, #a8b8d0);
|
||||||
|
|
@ -715,6 +888,35 @@ onUnmounted(() => {
|
||||||
word-break: break-all;
|
word-break: break-all;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.td-actions {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.4rem;
|
||||||
|
align-items: flex-start;
|
||||||
|
}
|
||||||
|
|
||||||
|
.classify-row {
|
||||||
|
display: flex;
|
||||||
|
gap: 0.35rem;
|
||||||
|
align-items: center;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.classify-select {
|
||||||
|
font-size: 0.78rem;
|
||||||
|
padding: 0.2rem 0.4rem;
|
||||||
|
border-radius: 4px;
|
||||||
|
border: 1px solid var(--color-border, #444);
|
||||||
|
background: var(--color-surface, #1e1e2e);
|
||||||
|
color: var(--color-text, #cdd6f4);
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.classify-select:disabled {
|
||||||
|
opacity: 0.4;
|
||||||
|
cursor: not-allowed;
|
||||||
|
}
|
||||||
|
|
||||||
/* ── Badges ── */
|
/* ── Badges ── */
|
||||||
.badge-group {
|
.badge-group {
|
||||||
display: flex;
|
display: flex;
|
||||||
|
|
@ -777,6 +979,76 @@ onUnmounted(() => {
|
||||||
background: color-mix(in srgb, var(--color-accent, #c4732a) 12%, var(--color-surface-alt, #dde4f0));
|
background: color-mix(in srgb, var(--color-accent, #c4732a) 12%, var(--color-surface-alt, #dde4f0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.chip-role {
|
||||||
|
color: var(--color-info, #1e6091);
|
||||||
|
background: color-mix(in srgb, var(--color-info, #1e6091) 12%, var(--color-surface-alt, #dde4f0));
|
||||||
|
}
|
||||||
|
|
||||||
|
.chip-sm {
|
||||||
|
font-size: 0.68rem;
|
||||||
|
padding: 0.1rem 0.4rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Service chips — one colour per CF service */
|
||||||
|
.chip-service-avocet {
|
||||||
|
color: var(--color-primary, #2d5a27);
|
||||||
|
background: color-mix(in srgb, var(--color-primary, #2d5a27) 15%, var(--color-surface-alt, #dde4f0));
|
||||||
|
}
|
||||||
|
|
||||||
|
.chip-service-cf-text {
|
||||||
|
color: #c2410c;
|
||||||
|
background: color-mix(in srgb, #c2410c 12%, var(--color-surface-alt, #dde4f0));
|
||||||
|
}
|
||||||
|
|
||||||
|
.chip-service-cf-stt {
|
||||||
|
color: #5e35b1;
|
||||||
|
background: color-mix(in srgb, #5e35b1 12%, var(--color-surface-alt, #dde4f0));
|
||||||
|
}
|
||||||
|
|
||||||
|
.chip-service-cf-tts {
|
||||||
|
color: #0277bd;
|
||||||
|
background: color-mix(in srgb, #0277bd 12%, var(--color-surface-alt, #dde4f0));
|
||||||
|
}
|
||||||
|
|
||||||
|
.chip-service-cf-vision {
|
||||||
|
color: #00695c;
|
||||||
|
background: color-mix(in srgb, #00695c 12%, var(--color-surface-alt, #dde4f0));
|
||||||
|
}
|
||||||
|
|
||||||
|
.chip-service-cf-core {
|
||||||
|
color: #6d4c41;
|
||||||
|
background: color-mix(in srgb, #6d4c41 12%, var(--color-surface-alt, #dde4f0));
|
||||||
|
}
|
||||||
|
|
||||||
|
.chip-service-cf-voice {
|
||||||
|
color: #ad1457;
|
||||||
|
background: color-mix(in srgb, #ad1457 12%, var(--color-surface-alt, #dde4f0));
|
||||||
|
}
|
||||||
|
|
||||||
|
.chip-service-other {
|
||||||
|
color: var(--color-text-muted, #4a5c7a);
|
||||||
|
background: var(--color-surface-alt, #dde4f0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ── Installed group ── */
|
||||||
|
.installed-group {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.installed-group-header {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
padding: 0.25rem 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.installed-group-count {
|
||||||
|
font-size: 0.78rem;
|
||||||
|
color: var(--color-text-muted, #4a5c7a);
|
||||||
|
}
|
||||||
|
|
||||||
/* ── Buttons ── */
|
/* ── Buttons ── */
|
||||||
.btn-primary, .btn-danger {
|
.btn-primary, .btn-danger {
|
||||||
padding: 0.4rem 0.9rem;
|
padding: 0.4rem 0.9rem;
|
||||||
|
|
@ -852,7 +1124,7 @@ onUnmounted(() => {
|
||||||
|
|
||||||
.installed-table th:nth-child(3),
|
.installed-table th:nth-child(3),
|
||||||
.installed-table td:nth-child(3) {
|
.installed-table td:nth-child(3) {
|
||||||
display: none; /* hide Adapter column on very narrow screens */
|
display: none; /* hide Role column on very narrow screens */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue