feat: wire Search with AI to cf-orch → Ollama (llama3.1:8b)
- Add app/llm/router.py shim — tri-level config lookup: repo config/llm.yaml → ~/.config/circuitforge/llm.yaml → env vars - Add config/llm.cloud.yaml — ollama via cf-orch, llama3.1:8b - Add config/llm.yaml.example — self-hosted reference config - compose.cloud.yml: mount llm.cloud.yaml, set CF_ORCH_URL, add host.docker.internal:host-gateway (required on Linux Docker) - api/main.py: use app.llm.router.LLMRouter (shim) not core directly - .env.example: update LLM section to reference config/llm.yaml.example - .gitignore: exclude config/llm.yaml (keep example + cloud yaml) End-to-end tested: 3.2s for "used RTX 3080 under $400, no mining cards" via cloud container → host.docker.internal:11434 → Ollama llama3.1:8b
This commit is contained in:
parent
c0a92315d9
commit
af1ffa1d94
7 changed files with 135 additions and 5 deletions
12
.env.example
12
.env.example
|
|
@ -54,13 +54,17 @@ SNIPE_DB=data/snipe.db
|
||||||
# own ID; the CF cloud instance uses CF's campaign ID (disclosed in the UI).
|
# own ID; the CF cloud instance uses CF's campaign ID (disclosed in the UI).
|
||||||
# EBAY_AFFILIATE_CAMPAIGN_ID=
|
# EBAY_AFFILIATE_CAMPAIGN_ID=
|
||||||
|
|
||||||
# ── LLM inference (vision / photo analysis) ──────────────────────────────────
|
# ── LLM inference (Search with AI / photo analysis) ──────────────────────────
|
||||||
# circuitforge-core LLMRouter auto-detects backends from these env vars
|
# For self-hosted use, create config/llm.yaml from config/llm.yaml.example.
|
||||||
# (no llm.yaml required). Backends are tried in this priority order:
|
# config/llm.yaml is the preferred way to configure backends (supports cf-orch,
|
||||||
|
# multiple fallback backends, per-backend model selection).
|
||||||
|
#
|
||||||
|
# As a quick alternative, circuitforge-core LLMRouter also auto-detects backends
|
||||||
|
# from these env vars when no llm.yaml is present:
|
||||||
# 1. ANTHROPIC_API_KEY → Claude API (cloud; requires Paid tier key)
|
# 1. ANTHROPIC_API_KEY → Claude API (cloud; requires Paid tier key)
|
||||||
# 2. OPENAI_API_KEY → OpenAI-compatible endpoint
|
# 2. OPENAI_API_KEY → OpenAI-compatible endpoint
|
||||||
# 3. OLLAMA_HOST → local Ollama (default: http://localhost:11434)
|
# 3. OLLAMA_HOST → local Ollama (default: http://localhost:11434)
|
||||||
# Leave all unset to disable LLM features (photo analysis won't run).
|
# Leave all unset to disable LLM features (Search with AI won't be available).
|
||||||
|
|
||||||
# ANTHROPIC_API_KEY=
|
# ANTHROPIC_API_KEY=
|
||||||
# ANTHROPIC_MODEL=claude-haiku-4-5-20251001
|
# ANTHROPIC_MODEL=claude-haiku-4-5-20251001
|
||||||
|
|
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -9,3 +9,4 @@ data/
|
||||||
.superpowers/
|
.superpowers/
|
||||||
web/node_modules/
|
web/node_modules/
|
||||||
web/dist/
|
web/dist/
|
||||||
|
config/llm.yaml
|
||||||
|
|
|
||||||
|
|
@ -109,7 +109,7 @@ async def _lifespan(app: FastAPI):
|
||||||
_category_cache.refresh(token_manager=None) # bootstrap fallback
|
_category_cache.refresh(token_manager=None) # bootstrap fallback
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from circuitforge_core.llm import LLMRouter
|
from app.llm.router import LLMRouter
|
||||||
_llm_router = LLMRouter()
|
_llm_router = LLMRouter()
|
||||||
_query_translator = QueryTranslator(
|
_query_translator = QueryTranslator(
|
||||||
category_cache=_category_cache,
|
category_cache=_category_cache,
|
||||||
|
|
|
||||||
36
app/llm/router.py
Normal file
36
app/llm/router.py
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
# app/llm/router.py
|
||||||
|
# BSL 1.1 License
|
||||||
|
"""
|
||||||
|
Snipe LLMRouter shim — tri-level config path priority.
|
||||||
|
|
||||||
|
Config lookup order:
|
||||||
|
1. <repo>/config/llm.yaml — per-install local override
|
||||||
|
2. ~/.config/circuitforge/llm.yaml — user-level config (circuitforge-core default)
|
||||||
|
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, CF_ORCH_URL)
|
||||||
|
"""
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
|
||||||
|
|
||||||
|
_REPO_CONFIG = Path(__file__).parent.parent.parent / "config" / "llm.yaml"
|
||||||
|
_USER_CONFIG = Path.home() / ".config" / "circuitforge" / "llm.yaml"
|
||||||
|
|
||||||
|
|
||||||
|
class LLMRouter(_CoreLLMRouter):
|
||||||
|
"""Snipe-specific LLMRouter with tri-level config resolution.
|
||||||
|
|
||||||
|
Explicit ``config_path`` bypasses the lookup (useful in tests).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config_path: Path | None = None) -> None:
|
||||||
|
if config_path is not None:
|
||||||
|
super().__init__(config_path)
|
||||||
|
return
|
||||||
|
|
||||||
|
if _REPO_CONFIG.exists():
|
||||||
|
super().__init__(_REPO_CONFIG)
|
||||||
|
elif _USER_CONFIG.exists():
|
||||||
|
super().__init__(_USER_CONFIG)
|
||||||
|
else:
|
||||||
|
# No yaml — let circuitforge-core env-var auto-config handle it.
|
||||||
|
super().__init__()
|
||||||
|
|
@ -20,9 +20,15 @@ services:
|
||||||
CLOUD_MODE: "true"
|
CLOUD_MODE: "true"
|
||||||
CLOUD_DATA_ROOT: /devl/snipe-cloud-data
|
CLOUD_DATA_ROOT: /devl/snipe-cloud-data
|
||||||
# DIRECTUS_JWT_SECRET, HEIMDALL_URL, HEIMDALL_ADMIN_TOKEN — set in .env (never commit)
|
# DIRECTUS_JWT_SECRET, HEIMDALL_URL, HEIMDALL_ADMIN_TOKEN — set in .env (never commit)
|
||||||
|
# CF_ORCH_URL routes LLM query builder through cf-orch for VRAM-aware scheduling.
|
||||||
|
# Override in .env to use a different coordinator URL.
|
||||||
|
CF_ORCH_URL: "http://host.docker.internal:7700"
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
# No network_mode: host — isolated on snipe-cloud-net; nginx reaches it via 'api:8510'
|
# No network_mode: host — isolated on snipe-cloud-net; nginx reaches it via 'api:8510'
|
||||||
volumes:
|
volumes:
|
||||||
- /devl/snipe-cloud-data:/devl/snipe-cloud-data
|
- /devl/snipe-cloud-data:/devl/snipe-cloud-data
|
||||||
|
- ./config/llm.cloud.yaml:/app/snipe/config/llm.yaml:ro
|
||||||
networks:
|
networks:
|
||||||
- snipe-cloud-net
|
- snipe-cloud-net
|
||||||
|
|
||||||
|
|
|
||||||
38
config/llm.cloud.yaml
Normal file
38
config/llm.cloud.yaml
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
# config/llm.cloud.yaml
|
||||||
|
# Snipe — LLM config for the managed cloud instance (menagerie)
|
||||||
|
#
|
||||||
|
# Mounted read-only into the cloud API container at /app/config/llm.yaml
|
||||||
|
# (see compose.cloud.yml). Personal fine-tunes and local-only backends
|
||||||
|
# (claude_code, copilot) are intentionally excluded here.
|
||||||
|
#
|
||||||
|
# CF Orchestrator routes both ollama and vllm allocations for VRAM-aware
|
||||||
|
# scheduling. CF_ORCH_URL must be set in .env for allocations to resolve;
|
||||||
|
# if cf-orch is unreachable the backend falls back to its static base_url.
|
||||||
|
#
|
||||||
|
# Model choice for query builder: llama3.1:8b
|
||||||
|
# - Reliable instruction following and JSON output
|
||||||
|
# - No creative fine-tuning drift (unlike writer models in the pool)
|
||||||
|
# - Fits comfortably in 8 GB VRAM alongside other services
|
||||||
|
|
||||||
|
backends:
|
||||||
|
ollama:
|
||||||
|
type: openai_compat
|
||||||
|
base_url: http://host.docker.internal:11434/v1
|
||||||
|
api_key: ollama
|
||||||
|
model: llama3.1:8b
|
||||||
|
enabled: true
|
||||||
|
supports_images: false
|
||||||
|
cf_orch:
|
||||||
|
service: ollama
|
||||||
|
ttl_s: 300
|
||||||
|
|
||||||
|
anthropic:
|
||||||
|
type: anthropic
|
||||||
|
api_key_env: ANTHROPIC_API_KEY
|
||||||
|
model: claude-haiku-4-5-20251001
|
||||||
|
enabled: false
|
||||||
|
supports_images: false
|
||||||
|
|
||||||
|
fallback_order:
|
||||||
|
- ollama
|
||||||
|
- anthropic
|
||||||
45
config/llm.yaml.example
Normal file
45
config/llm.yaml.example
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
# config/llm.yaml.example
|
||||||
|
# Snipe — LLM backend configuration
|
||||||
|
#
|
||||||
|
# Copy to config/llm.yaml and edit for your setup.
|
||||||
|
# The query builder ("Search with AI") uses the text fallback_order.
|
||||||
|
#
|
||||||
|
# Backends are tried in fallback_order until one succeeds.
|
||||||
|
# Set enabled: false to skip a backend without removing it.
|
||||||
|
#
|
||||||
|
# CF Orchestrator (cf-orch): when CF_ORCH_URL is set in the environment and a
|
||||||
|
# backend has a cf_orch block, allocations are routed through cf-orch for
|
||||||
|
# VRAM-aware scheduling. Omit cf_orch to hit the backend directly.
|
||||||
|
|
||||||
|
backends:
|
||||||
|
anthropic:
|
||||||
|
type: anthropic
|
||||||
|
api_key_env: ANTHROPIC_API_KEY
|
||||||
|
model: claude-haiku-4-5-20251001
|
||||||
|
enabled: false
|
||||||
|
supports_images: false
|
||||||
|
|
||||||
|
openai:
|
||||||
|
type: openai_compat
|
||||||
|
base_url: https://api.openai.com/v1
|
||||||
|
api_key_env: OPENAI_API_KEY
|
||||||
|
model: gpt-4o-mini
|
||||||
|
enabled: false
|
||||||
|
supports_images: false
|
||||||
|
|
||||||
|
ollama:
|
||||||
|
type: openai_compat
|
||||||
|
base_url: http://localhost:11434/v1
|
||||||
|
api_key: ollama
|
||||||
|
model: llama3.1:8b
|
||||||
|
enabled: true
|
||||||
|
supports_images: false
|
||||||
|
# Uncomment to route through cf-orch for VRAM-aware scheduling:
|
||||||
|
# cf_orch:
|
||||||
|
# service: ollama
|
||||||
|
# ttl_s: 300
|
||||||
|
|
||||||
|
fallback_order:
|
||||||
|
- anthropic
|
||||||
|
- openai
|
||||||
|
- ollama
|
||||||
Loading…
Reference in a new issue