From bc80922d61fdee458c65af82c17e75b11ff941ed Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Thu, 2 Apr 2026 16:36:38 -0700 Subject: [PATCH] =?UTF-8?q?chore(llm):=20swap=20model=5Fcandidates=20order?= =?UTF-8?q?=20=E2=80=94=20Qwen2.5-3B=20first,=20Phi-4-mini=20fallback?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phi-4-mini's cached modeling_phi3.py imports SlidingWindowCache which was removed in transformers 5.x. Qwen2.5-3B uses built-in qwen2 arch and works cleanly. Reorder so Qwen is tried first. --- config/llm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/llm.yaml b/config/llm.yaml index e61e5bf..d033a2e 100644 --- a/config/llm.yaml +++ b/config/llm.yaml @@ -48,8 +48,8 @@ backends: cf_orch: service: vllm model_candidates: - - Phi-4-mini-instruct - Qwen2.5-3B-Instruct + - Phi-4-mini-instruct ttl_s: 300 vllm_research: api_key: ''