From 11fb3a07b4d9c359d74200c561501d7cc9383206 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Thu, 2 Apr 2026 15:34:59 -0700 Subject: [PATCH] chore(llm): switch vllm model_candidates from Ouro to Phi-4-mini + Qwen2.5-3B Ouro models incompatible with transformers 5.x bundled in cf env. Phi-4-mini-instruct tried first (stronger benchmarks, 7.2GB); Qwen2.5-3B-Instruct as VRAM-constrained fallback (5.8GB). --- config/llm.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/config/llm.yaml b/config/llm.yaml index 0ef6abc..e61e5bf 100644 --- a/config/llm.yaml +++ b/config/llm.yaml @@ -28,7 +28,7 @@ backends: type: openai_compat ollama_research: api_key: ollama - base_url: http://host.docker.internal:11434/v1 + base_url: http://ollama_research:11434/v1 enabled: true model: llama3.1:8b supports_images: false @@ -40,7 +40,7 @@ backends: type: vision_service vllm: api_key: '' - base_url: http://host.docker.internal:8000/v1 + base_url: http://vllm:8000/v1 enabled: true model: __auto__ supports_images: false @@ -48,12 +48,12 @@ backends: cf_orch: service: vllm model_candidates: - - Ouro-2.6B-Thinking - - Ouro-1.4B + - Phi-4-mini-instruct + - Qwen2.5-3B-Instruct ttl_s: 300 vllm_research: api_key: '' - base_url: http://host.docker.internal:8000/v1 + base_url: http://vllm:8000/v1 enabled: true model: __auto__ supports_images: false