snipe/config/llm.yaml.example
pyr0ball 2e0a49bc12 docs(config): add cf_text trunk service backend to llm.yaml.example
Documents the cf-orch allocation pattern (cf_text openai_compat backend
with cf_orch block). Snipe's trust query builder can route through
cf-text when CF_ORCH_URL is set, rather than hitting ollama directly.
2026-04-20 10:56:23 -07:00

60 lines
1.7 KiB
Text

# config/llm.yaml.example
# Snipe — LLM backend configuration
#
# Copy to config/llm.yaml and edit for your setup.
# The query builder ("Search with AI") uses the text fallback_order.
#
# Backends are tried in fallback_order until one succeeds.
# Set enabled: false to skip a backend without removing it.
#
# CF Orchestrator (cf-orch): when CF_ORCH_URL is set in the environment and a
# backend has a cf_orch block, allocations are routed through cf-orch for
# VRAM-aware scheduling. Omit cf_orch to hit the backend directly.
backends:
anthropic:
type: anthropic
api_key_env: ANTHROPIC_API_KEY
model: claude-haiku-4-5-20251001
enabled: false
supports_images: false
openai:
type: openai_compat
base_url: https://api.openai.com/v1
api_key_env: OPENAI_API_KEY
model: gpt-4o-mini
enabled: false
supports_images: false
ollama:
type: openai_compat
base_url: http://localhost:11434/v1
api_key: ollama
model: llama3.1:8b
enabled: true
supports_images: false
# Uncomment to route through cf-orch for VRAM-aware scheduling:
# cf_orch:
# service: ollama
# ttl_s: 300
# ── cf-orch trunk services ─────────────────────────────────────────────────
# Allocate via cf-orch; the router calls the allocated service directly.
# Set CF_ORCH_URL (env) or url below to activate.
cf_text:
type: openai_compat
enabled: false
base_url: http://localhost:8008/v1
model: __auto__
api_key: any
supports_images: false
cf_orch:
service: cf-text
model_candidates: []
ttl_s: 3600
fallback_order:
- anthropic
- openai
- ollama