snipe/config/llm.yaml.example

# config/llm.yaml.example
# Snipe — LLM backend configuration
#
# Copy to config/llm.yaml and edit for your setup.
# The query builder ("Search with AI") uses the text fallback_order.
#
# Backends are tried in fallback_order until one succeeds.
# Set enabled: false to skip a backend without removing it.
#
# CF Orchestrator (cf-orch): when CF_ORCH_URL is set in the environment and a
# backend has a cf_orch block, allocations are routed through cf-orch for
# VRAM-aware scheduling. Omit cf_orch to hit the backend directly.

backends:
  anthropic:
    type: anthropic
    api_key_env: ANTHROPIC_API_KEY
    model: claude-haiku-4-5-20251001
    enabled: false
    supports_images: false

  openai:
    type: openai_compat
    base_url: https://api.openai.com/v1
    api_key_env: OPENAI_API_KEY
    model: gpt-4o-mini
    enabled: false
    supports_images: false

  ollama:
    type: openai_compat
    base_url: http://localhost:11434/v1
    api_key: ollama
    model: llama3.1:8b
    enabled: true
    supports_images: false
    # Uncomment to route through cf-orch for VRAM-aware scheduling:
    # cf_orch:
    #   service: ollama
    #   ttl_s: 300

  # ── cf-orch trunk services ─────────────────────────────────────────────────
  # Allocate via cf-orch; the router calls the allocated service directly.
  # Set CF_ORCH_URL (env) or url below to activate.
  cf_text:
    type: openai_compat
    enabled: false
    base_url: http://localhost:8008/v1
    model: __auto__
    api_key: any
    supports_images: false
    cf_orch:
      service: cf-text
      model_candidates: []
      ttl_s: 3600

fallback_order:
  - anthropic
  - openai
  - ollama