chore(models): refresh model registries with current cluster catalog

Replace stale llama/mistral/phi model refs with models active on the
cluster: deepseek-r1 (1.5b, 7b-4bit, 0528-qwen3-8b-gguf), granite-4.1-8b,
qwen2.5 (3b, 7b), capybarahermes-2.5-mistral-7b, darwin-9b-opus. Update
benchmark_plans.py doc examples to match.
This commit is contained in:
pyr0ball 2026-05-17 11:24:03 -07:00
parent d416ef8aa4
commit 13ca082a43
2 changed files with 31 additions and 12 deletions

View file

@ -38,11 +38,15 @@ router = APIRouter()
# Kept here so the UI can list them without importing the script. # Kept here so the UI can list them without importing the script.
MODEL_REGISTRY: dict[str, str] = { MODEL_REGISTRY: dict[str, str] = {
"llama3.2-3b": "Llama 3.2 3B Instruct (local via cf-text)", "deepseek-r1-1.5b": "DeepSeek R1 1.5B distill (cf-orch catalog key)",
"llama3.2-1b": "Llama 3.2 1B Instruct (local via cf-text)", "deepseek-r1-7b-4bit": "DeepSeek R1 7B distill, 4-bit (cf-orch catalog key)",
"mistral-7b": "Mistral 7B Instruct (local via cf-text)", "deepseek-r1-0528-qwen3-8b-gguf": "DeepSeek R1 0528 Qwen3 8B GGUF (4 nodes)",
"phi3-mini": "Phi-3 Mini 3.8B (local via cf-text)", "deepseek-coder-6.7b-4bit": "DeepSeek Coder 6.7B instruct, 4-bit (cf-orch catalog key)",
"qwen2.5-3b": "Qwen 2.5 3B Instruct (local via cf-text)", "granite-4.1-8b": "IBM Granite 4.1 8B, 4-bit (cf-orch catalog key)",
"qwen2.5-3b": "Qwen 2.5 3B Q4 GGUF (cf-orch catalog key)",
"qwen2.5-7b": "Qwen 2.5 7B Q4 GGUF (cf-orch catalog key)",
"capybarahermes-2.5-mistral-7b-gguf": "CapybaraHermes 2.5 Mistral 7B GGUF (4 nodes)",
"darwin-9b-opus-gguf": "Darwin 9B Opus GGUF -- long-form writing (3 nodes)",
} }
RUBRIC_LABELS: dict[str, str] = { RUBRIC_LABELS: dict[str, str] = {

View file

@ -23,16 +23,16 @@ Usage
python scripts/benchmark_plans.py --list-models python scripts/benchmark_plans.py --list-models
# Run all held-out prompts against a single model, print report # Run all held-out prompts against a single model, print report
python scripts/benchmark_plans.py --model llama3.2-3b python scripts/benchmark_plans.py --model granite-4.1-8b
# Compare two models side-by-side # Compare two models side-by-side
python scripts/benchmark_plans.py --compare llama3.2-3b mistral-7b python scripts/benchmark_plans.py --compare granite-4.1-8b deepseek-r1-7b-4bit
# Run with a custom API base (cf-text default: http://localhost:8080/v1) # Run with a custom API base (cf-text default: http://localhost:8080/v1)
python scripts/benchmark_plans.py --model llama3.2-3b --api-base http://localhost:8080/v1 python scripts/benchmark_plans.py --model granite-4.1-8b --api-base http://localhost:8080/v1
# Export detailed results JSON # Export detailed results JSON
python scripts/benchmark_plans.py --model llama3.2-3b --output data/bench_results.json python scripts/benchmark_plans.py --model granite-4.1-8b --output data/bench_results.json
""" """
from __future__ import annotations from __future__ import annotations
@ -290,6 +290,11 @@ MODEL_REGISTRY: dict[str, dict[str, str]] = {
"model": "deepseek-r1-7b-4bit", "model": "deepseek-r1-7b-4bit",
"description": "DeepSeek R1 7B distill, 4-bit (cf-orch catalog key)", "description": "DeepSeek R1 7B distill, 4-bit (cf-orch catalog key)",
}, },
"deepseek-r1-0528-qwen3-8b-gguf": {
"api_base": CF_TEXT_BASE,
"model": "deepseek-r1-0528-qwen3-8b-gguf",
"description": "DeepSeek R1 0528 Qwen3 8B GGUF -- current reasoning model (4 nodes)",
},
"deepseek-coder-6.7b-4bit": { "deepseek-coder-6.7b-4bit": {
"api_base": CF_TEXT_BASE, "api_base": CF_TEXT_BASE,
"model": "deepseek-coder-6.7b-4bit", "model": "deepseek-coder-6.7b-4bit",
@ -298,17 +303,27 @@ MODEL_REGISTRY: dict[str, dict[str, str]] = {
"granite-4.1-8b": { "granite-4.1-8b": {
"api_base": CF_TEXT_BASE, "api_base": CF_TEXT_BASE,
"model": "granite-4.1-8b", "model": "granite-4.1-8b",
"description": "IBM Granite 4.1 8B, 4-bit (cf-orch catalog key)", "description": "IBM Granite 4.1 8B, 4-bit -- safety-trained (cf-orch catalog key)",
},
"capybarahermes-2.5-mistral-7b-gguf": {
"api_base": CF_TEXT_BASE,
"model": "capybarahermes-2.5-mistral-7b-gguf",
"description": "CapybaraHermes 2.5 Mistral 7B GGUF -- conversational/creative (4 nodes)",
},
"darwin-9b-opus-gguf": {
"api_base": CF_TEXT_BASE,
"model": "darwin-9b-opus-gguf",
"description": "Darwin 9B Opus GGUF -- high-quality long-form writing (3 nodes)",
}, },
"qwen2.5-3b": { "qwen2.5-3b": {
"api_base": CF_TEXT_BASE, "api_base": CF_TEXT_BASE,
"model": "qwen2.5-3b", "model": "qwen2.5-3b",
"description": "Qwen 2.5 3B Q4 GGUF (cf-orch catalog key, navi only)", "description": "Qwen 2.5 3B Q4 GGUF (cf-orch catalog key)",
}, },
"qwen2.5-7b": { "qwen2.5-7b": {
"api_base": CF_TEXT_BASE, "api_base": CF_TEXT_BASE,
"model": "qwen2.5-7b", "model": "qwen2.5-7b",
"description": "Qwen 2.5 7B Q4 GGUF (cf-orch catalog key, navi only)", "description": "Qwen 2.5 7B Q4 GGUF (cf-orch catalog key)",
}, },
} }