- Fix cf_text base_url (was port 8006/cf-musicgen, corrected to 8008/cf-text) - Add cf_orch blocks to cf_text, ollama, ollama_research, vllm_research backends - Fix ollama_research base_url to host.docker.internal:11435 (was Docker service name) - Promote cf_text to top of research_fallback_order - Add cf_text backend to llm.cloud.yaml with cf_orch block - Wire _RL_WIZARD rate limit to wizard_ai_interview endpoint (closes TODO from #122) Closes: #122
98 lines
2 KiB
YAML
98 lines
2 KiB
YAML
backends:
|
|
cf_text:
|
|
api_key: any
|
|
base_url: http://host.docker.internal:8008/v1
|
|
enabled: true
|
|
model: cf-text
|
|
supports_images: false
|
|
type: openai_compat
|
|
cf_orch:
|
|
service: cf-text
|
|
ttl_s: 300
|
|
anthropic:
|
|
api_key_env: ANTHROPIC_API_KEY
|
|
enabled: false
|
|
model: claude-sonnet-4-6
|
|
supports_images: true
|
|
type: anthropic
|
|
claude_code:
|
|
api_key: any
|
|
base_url: http://localhost:3009/v1
|
|
enabled: false
|
|
model: claude-code-terminal
|
|
supports_images: true
|
|
type: openai_compat
|
|
github_copilot:
|
|
api_key: any
|
|
base_url: http://localhost:3010/v1
|
|
enabled: false
|
|
model: gpt-4o
|
|
supports_images: false
|
|
type: openai_compat
|
|
ollama:
|
|
api_key: ollama
|
|
base_url: http://host.docker.internal:11434/v1
|
|
enabled: true
|
|
model: llama3.2:3b
|
|
supports_images: false
|
|
type: openai_compat
|
|
cf_orch:
|
|
service: ollama
|
|
ttl_s: 300
|
|
ollama_research:
|
|
api_key: ollama
|
|
base_url: http://host.docker.internal:11435/v1
|
|
enabled: true
|
|
model: llama3.1:8b
|
|
supports_images: false
|
|
type: openai_compat
|
|
cf_orch:
|
|
service: ollama
|
|
ttl_s: 300
|
|
vision_service:
|
|
base_url: http://vision:8002
|
|
enabled: true
|
|
supports_images: true
|
|
type: vision_service
|
|
vllm:
|
|
api_key: ''
|
|
base_url: http://host.docker.internal:8000/v1
|
|
enabled: true
|
|
model: __auto__
|
|
supports_images: false
|
|
type: openai_compat
|
|
cf_orch:
|
|
service: vllm
|
|
model_candidates:
|
|
- Qwen2.5-3B-Instruct
|
|
ttl_s: 300
|
|
vllm_research:
|
|
api_key: ''
|
|
base_url: http://host.docker.internal:8000/v1
|
|
enabled: true
|
|
model: __auto__
|
|
supports_images: false
|
|
type: openai_compat
|
|
cf_orch:
|
|
service: vllm
|
|
model_candidates:
|
|
- Qwen2.5-3B-Instruct
|
|
ttl_s: 300
|
|
fallback_order:
|
|
- cf_text
|
|
- ollama
|
|
- claude_code
|
|
- vllm
|
|
- github_copilot
|
|
- anthropic
|
|
research_fallback_order:
|
|
- cf_text
|
|
- vllm_research
|
|
- ollama_research
|
|
- claude_code
|
|
- github_copilot
|
|
- anthropic
|
|
vision_fallback_order:
|
|
- vision_service
|
|
- claude_code
|
|
- anthropic
|