From 13ca082a43fbc30fa4e67ad3ad5e72c3790aa6a6 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sun, 17 May 2026 11:24:03 -0700 Subject: [PATCH] chore(models): refresh model registries with current cluster catalog Replace stale llama/mistral/phi model refs with models active on the cluster: deepseek-r1 (1.5b, 7b-4bit, 0528-qwen3-8b-gguf), granite-4.1-8b, qwen2.5 (3b, 7b), capybarahermes-2.5-mistral-7b, darwin-9b-opus. Update benchmark_plans.py doc examples to match. --- app/plans_bench.py | 14 +++++++++----- scripts/benchmark_plans.py | 29 ++++++++++++++++++++++------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/app/plans_bench.py b/app/plans_bench.py index c46ddcb..cc9b5ad 100644 --- a/app/plans_bench.py +++ b/app/plans_bench.py @@ -38,11 +38,15 @@ router = APIRouter() # Kept here so the UI can list them without importing the script. MODEL_REGISTRY: dict[str, str] = { - "llama3.2-3b": "Llama 3.2 3B Instruct (local via cf-text)", - "llama3.2-1b": "Llama 3.2 1B Instruct (local via cf-text)", - "mistral-7b": "Mistral 7B Instruct (local via cf-text)", - "phi3-mini": "Phi-3 Mini 3.8B (local via cf-text)", - "qwen2.5-3b": "Qwen 2.5 3B Instruct (local via cf-text)", + "deepseek-r1-1.5b": "DeepSeek R1 1.5B distill (cf-orch catalog key)", + "deepseek-r1-7b-4bit": "DeepSeek R1 7B distill, 4-bit (cf-orch catalog key)", + "deepseek-r1-0528-qwen3-8b-gguf": "DeepSeek R1 0528 Qwen3 8B GGUF (4 nodes)", + "deepseek-coder-6.7b-4bit": "DeepSeek Coder 6.7B instruct, 4-bit (cf-orch catalog key)", + "granite-4.1-8b": "IBM Granite 4.1 8B, 4-bit (cf-orch catalog key)", + "qwen2.5-3b": "Qwen 2.5 3B Q4 GGUF (cf-orch catalog key)", + "qwen2.5-7b": "Qwen 2.5 7B Q4 GGUF (cf-orch catalog key)", + "capybarahermes-2.5-mistral-7b-gguf": "CapybaraHermes 2.5 Mistral 7B GGUF (4 nodes)", + "darwin-9b-opus-gguf": "Darwin 9B Opus GGUF -- long-form writing (3 nodes)", } RUBRIC_LABELS: dict[str, str] = { diff --git a/scripts/benchmark_plans.py b/scripts/benchmark_plans.py index 0b02fa7..98792e9 100644 --- a/scripts/benchmark_plans.py +++ b/scripts/benchmark_plans.py @@ -23,16 +23,16 @@ Usage python scripts/benchmark_plans.py --list-models # Run all held-out prompts against a single model, print report - python scripts/benchmark_plans.py --model llama3.2-3b + python scripts/benchmark_plans.py --model granite-4.1-8b # Compare two models side-by-side - python scripts/benchmark_plans.py --compare llama3.2-3b mistral-7b + python scripts/benchmark_plans.py --compare granite-4.1-8b deepseek-r1-7b-4bit # Run with a custom API base (cf-text default: http://localhost:8080/v1) - python scripts/benchmark_plans.py --model llama3.2-3b --api-base http://localhost:8080/v1 + python scripts/benchmark_plans.py --model granite-4.1-8b --api-base http://localhost:8080/v1 # Export detailed results JSON - python scripts/benchmark_plans.py --model llama3.2-3b --output data/bench_results.json + python scripts/benchmark_plans.py --model granite-4.1-8b --output data/bench_results.json """ from __future__ import annotations @@ -290,6 +290,11 @@ MODEL_REGISTRY: dict[str, dict[str, str]] = { "model": "deepseek-r1-7b-4bit", "description": "DeepSeek R1 7B distill, 4-bit (cf-orch catalog key)", }, + "deepseek-r1-0528-qwen3-8b-gguf": { + "api_base": CF_TEXT_BASE, + "model": "deepseek-r1-0528-qwen3-8b-gguf", + "description": "DeepSeek R1 0528 Qwen3 8B GGUF -- current reasoning model (4 nodes)", + }, "deepseek-coder-6.7b-4bit": { "api_base": CF_TEXT_BASE, "model": "deepseek-coder-6.7b-4bit", @@ -298,17 +303,27 @@ MODEL_REGISTRY: dict[str, dict[str, str]] = { "granite-4.1-8b": { "api_base": CF_TEXT_BASE, "model": "granite-4.1-8b", - "description": "IBM Granite 4.1 8B, 4-bit (cf-orch catalog key)", + "description": "IBM Granite 4.1 8B, 4-bit -- safety-trained (cf-orch catalog key)", + }, + "capybarahermes-2.5-mistral-7b-gguf": { + "api_base": CF_TEXT_BASE, + "model": "capybarahermes-2.5-mistral-7b-gguf", + "description": "CapybaraHermes 2.5 Mistral 7B GGUF -- conversational/creative (4 nodes)", + }, + "darwin-9b-opus-gguf": { + "api_base": CF_TEXT_BASE, + "model": "darwin-9b-opus-gguf", + "description": "Darwin 9B Opus GGUF -- high-quality long-form writing (3 nodes)", }, "qwen2.5-3b": { "api_base": CF_TEXT_BASE, "model": "qwen2.5-3b", - "description": "Qwen 2.5 3B Q4 GGUF (cf-orch catalog key, navi only)", + "description": "Qwen 2.5 3B Q4 GGUF (cf-orch catalog key)", }, "qwen2.5-7b": { "api_base": CF_TEXT_BASE, "model": "qwen2.5-7b", - "description": "Qwen 2.5 7B Q4 GGUF (cf-orch catalog key, navi only)", + "description": "Qwen 2.5 7B Q4 GGUF (cf-orch catalog key)", }, }