peregrine/config/llm.yaml
pyr0ball 80041d1dd9 feat: wire cf-orch allocate flow for LLM routing
- Fix cf_text base_url (was port 8006/cf-musicgen, corrected to 8008/cf-text)
- Add cf_orch blocks to cf_text, ollama, ollama_research, vllm_research backends
- Fix ollama_research base_url to host.docker.internal:11435 (was Docker service name)
- Promote cf_text to top of research_fallback_order
- Add cf_text backend to llm.cloud.yaml with cf_orch block
- Wire _RL_WIZARD rate limit to wizard_ai_interview endpoint (closes TODO from #122)

Closes: #122
2026-06-14 15:21:53 -07:00

98 lines
2 KiB
YAML

backends:
cf_text:
api_key: any
base_url: http://host.docker.internal:8008/v1
enabled: true
model: cf-text
supports_images: false
type: openai_compat
cf_orch:
service: cf-text
ttl_s: 300
anthropic:
api_key_env: ANTHROPIC_API_KEY
enabled: false
model: claude-sonnet-4-6
supports_images: true
type: anthropic
claude_code:
api_key: any
base_url: http://localhost:3009/v1
enabled: false
model: claude-code-terminal
supports_images: true
type: openai_compat
github_copilot:
api_key: any
base_url: http://localhost:3010/v1
enabled: false
model: gpt-4o
supports_images: false
type: openai_compat
ollama:
api_key: ollama
base_url: http://host.docker.internal:11434/v1
enabled: true
model: llama3.2:3b
supports_images: false
type: openai_compat
cf_orch:
service: ollama
ttl_s: 300
ollama_research:
api_key: ollama
base_url: http://host.docker.internal:11435/v1
enabled: true
model: llama3.1:8b
supports_images: false
type: openai_compat
cf_orch:
service: ollama
ttl_s: 300
vision_service:
base_url: http://vision:8002
enabled: true
supports_images: true
type: vision_service
vllm:
api_key: ''
base_url: http://host.docker.internal:8000/v1
enabled: true
model: __auto__
supports_images: false
type: openai_compat
cf_orch:
service: vllm
model_candidates:
- Qwen2.5-3B-Instruct
ttl_s: 300
vllm_research:
api_key: ''
base_url: http://host.docker.internal:8000/v1
enabled: true
model: __auto__
supports_images: false
type: openai_compat
cf_orch:
service: vllm
model_candidates:
- Qwen2.5-3B-Instruct
ttl_s: 300
fallback_order:
- cf_text
- ollama
- claude_code
- vllm
- github_copilot
- anthropic
research_fallback_order:
- cf_text
- vllm_research
- ollama_research
- claude_code
- github_copilot
- anthropic
vision_fallback_order:
- vision_service
- claude_code
- anthropic