Documents the cf-orch allocation pattern (cf_text openai_compat backend with cf_orch block). Snipe's trust query builder can route through cf-text when CF_ORCH_URL is set, rather than hitting ollama directly.
60 lines
1.7 KiB
Text
60 lines
1.7 KiB
Text
# config/llm.yaml.example
|
|
# Snipe — LLM backend configuration
|
|
#
|
|
# Copy to config/llm.yaml and edit for your setup.
|
|
# The query builder ("Search with AI") uses the text fallback_order.
|
|
#
|
|
# Backends are tried in fallback_order until one succeeds.
|
|
# Set enabled: false to skip a backend without removing it.
|
|
#
|
|
# CF Orchestrator (cf-orch): when CF_ORCH_URL is set in the environment and a
|
|
# backend has a cf_orch block, allocations are routed through cf-orch for
|
|
# VRAM-aware scheduling. Omit cf_orch to hit the backend directly.
|
|
|
|
backends:
|
|
anthropic:
|
|
type: anthropic
|
|
api_key_env: ANTHROPIC_API_KEY
|
|
model: claude-haiku-4-5-20251001
|
|
enabled: false
|
|
supports_images: false
|
|
|
|
openai:
|
|
type: openai_compat
|
|
base_url: https://api.openai.com/v1
|
|
api_key_env: OPENAI_API_KEY
|
|
model: gpt-4o-mini
|
|
enabled: false
|
|
supports_images: false
|
|
|
|
ollama:
|
|
type: openai_compat
|
|
base_url: http://localhost:11434/v1
|
|
api_key: ollama
|
|
model: llama3.1:8b
|
|
enabled: true
|
|
supports_images: false
|
|
# Uncomment to route through cf-orch for VRAM-aware scheduling:
|
|
# cf_orch:
|
|
# service: ollama
|
|
# ttl_s: 300
|
|
|
|
# ── cf-orch trunk services ─────────────────────────────────────────────────
|
|
# Allocate via cf-orch; the router calls the allocated service directly.
|
|
# Set CF_ORCH_URL (env) or url below to activate.
|
|
cf_text:
|
|
type: openai_compat
|
|
enabled: false
|
|
base_url: http://localhost:8008/v1
|
|
model: __auto__
|
|
api_key: any
|
|
supports_images: false
|
|
cf_orch:
|
|
service: cf-text
|
|
model_candidates: []
|
|
ttl_s: 3600
|
|
|
|
fallback_order:
|
|
- anthropic
|
|
- openai
|
|
- ollama
|