From 11fb3a07b4d9c359d74200c561501d7cc9383206 Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Thu, 2 Apr 2026 15:34:59 -0700
Subject: [PATCH] chore(llm): switch vllm model_candidates from Ouro to
 Phi-4-mini + Qwen2.5-3B

Ouro models incompatible with transformers 5.x bundled in cf env.
Phi-4-mini-instruct tried first (stronger benchmarks, 7.2GB);
Qwen2.5-3B-Instruct as VRAM-constrained fallback (5.8GB).
---
 config/llm.yaml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/config/llm.yaml b/config/llm.yaml
index 0ef6abc..e61e5bf 100644
--- a/config/llm.yaml
+++ b/config/llm.yaml
@@ -28,7 +28,7 @@ backends:
     type: openai_compat
   ollama_research:
     api_key: ollama
-    base_url: http://host.docker.internal:11434/v1
+    base_url: http://ollama_research:11434/v1
     enabled: true
     model: llama3.1:8b
     supports_images: false
@@ -40,7 +40,7 @@ backends:
     type: vision_service
   vllm:
     api_key: ''
-    base_url: http://host.docker.internal:8000/v1
+    base_url: http://vllm:8000/v1
     enabled: true
     model: __auto__
     supports_images: false
@@ -48,12 +48,12 @@ backends:
     cf_orch:
       service: vllm
       model_candidates:
-        - Ouro-2.6B-Thinking
-        - Ouro-1.4B
+      - Phi-4-mini-instruct
+      - Qwen2.5-3B-Instruct
       ttl_s: 300
   vllm_research:
     api_key: ''
-    base_url: http://host.docker.internal:8000/v1
+    base_url: http://vllm:8000/v1
     enabled: true
     model: __auto__
     supports_images: false