snipe/config/llm.yaml.example

# config/llm.yaml.example
# Snipe — LLM backend configuration
#
# Copy to config/llm.yaml and edit for your setup.
# The query builder ("Search with AI") uses the text fallback_order.
#
# Backends are tried in fallback_order until one succeeds.
# Set enabled: false to skip a backend without removing it.
#
# CF Orchestrator (cf-orch): when CF_ORCH_URL is set in the environment and a
# backend has a cf_orch block, allocations are routed through cf-orch for
# VRAM-aware scheduling. Omit cf_orch to hit the backend directly.

backends:
  anthropic:
    type: anthropic
    api_key_env: ANTHROPIC_API_KEY
    model: claude-haiku-4-5-20251001
    enabled: false
    supports_images: false

  openai:
    type: openai_compat
    base_url: https://api.openai.com/v1
    api_key_env: OPENAI_API_KEY
    model: gpt-4o-mini
    enabled: false
    supports_images: false

  ollama:
    type: openai_compat
    base_url: http://localhost:11434/v1
    api_key: ollama
    model: llama3.1:8b
    enabled: true
    supports_images: false
    # Uncomment to route through cf-orch for VRAM-aware scheduling:
    # cf_orch:
    #   service: ollama
    #   ttl_s: 300

fallback_order:
  - anthropic
  - openai
  - ollama