peregrine/config/llm.yaml
pyr0ball 11fb3a07b4 chore(llm): switch vllm model_candidates from Ouro to Phi-4-mini + Qwen2.5-3B
Ouro models incompatible with transformers 5.x bundled in cf env.
Phi-4-mini-instruct tried first (stronger benchmarks, 7.2GB);
Qwen2.5-3B-Instruct as VRAM-constrained fallback (5.8GB).
2026-04-02 15:34:59 -07:00

76 lines
1.6 KiB
YAML

backends:
anthropic:
api_key_env: ANTHROPIC_API_KEY
enabled: false
model: claude-sonnet-4-6
supports_images: true
type: anthropic
claude_code:
api_key: any
base_url: http://localhost:3009/v1
enabled: false
model: claude-code-terminal
supports_images: true
type: openai_compat
github_copilot:
api_key: any
base_url: http://localhost:3010/v1
enabled: false
model: gpt-4o
supports_images: false
type: openai_compat
ollama:
api_key: ollama
base_url: http://host.docker.internal:11434/v1
enabled: true
model: llama3.2:3b
supports_images: false
type: openai_compat
ollama_research:
api_key: ollama
base_url: http://ollama_research:11434/v1
enabled: true
model: llama3.1:8b
supports_images: false
type: openai_compat
vision_service:
base_url: http://host.docker.internal:8002
enabled: true
supports_images: true
type: vision_service
vllm:
api_key: ''
base_url: http://vllm:8000/v1
enabled: true
model: __auto__
supports_images: false
type: openai_compat
cf_orch:
service: vllm
model_candidates:
- Phi-4-mini-instruct
- Qwen2.5-3B-Instruct
ttl_s: 300
vllm_research:
api_key: ''
base_url: http://vllm:8000/v1
enabled: true
model: __auto__
supports_images: false
type: openai_compat
fallback_order:
- ollama
- claude_code
- vllm
- github_copilot
- anthropic
research_fallback_order:
- claude_code
- vllm_research
- ollama_research
- github_copilot
- anthropic
vision_fallback_order:
- vision_service
- claude_code
- anthropic