From af1ffa1d9436f7a39f31ed996d4c3feddec2ac8c Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Tue, 14 Apr 2026 13:23:44 -0700 Subject: [PATCH] =?UTF-8?q?feat:=20wire=20Search=20with=20AI=20to=20cf-orc?= =?UTF-8?q?h=20=E2=86=92=20Ollama=20(llama3.1:8b)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add app/llm/router.py shim — tri-level config lookup: repo config/llm.yaml → ~/.config/circuitforge/llm.yaml → env vars - Add config/llm.cloud.yaml — ollama via cf-orch, llama3.1:8b - Add config/llm.yaml.example — self-hosted reference config - compose.cloud.yml: mount llm.cloud.yaml, set CF_ORCH_URL, add host.docker.internal:host-gateway (required on Linux Docker) - api/main.py: use app.llm.router.LLMRouter (shim) not core directly - .env.example: update LLM section to reference config/llm.yaml.example - .gitignore: exclude config/llm.yaml (keep example + cloud yaml) End-to-end tested: 3.2s for "used RTX 3080 under $400, no mining cards" via cloud container → host.docker.internal:11434 → Ollama llama3.1:8b --- .env.example | 12 +++++++---- .gitignore | 1 + api/main.py | 2 +- app/llm/router.py | 36 +++++++++++++++++++++++++++++++++ compose.cloud.yml | 6 ++++++ config/llm.cloud.yaml | 38 ++++++++++++++++++++++++++++++++++ config/llm.yaml.example | 45 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 135 insertions(+), 5 deletions(-) create mode 100644 app/llm/router.py create mode 100644 config/llm.cloud.yaml create mode 100644 config/llm.yaml.example diff --git a/.env.example b/.env.example index e1c674d..9ee65a4 100644 --- a/.env.example +++ b/.env.example @@ -54,13 +54,17 @@ SNIPE_DB=data/snipe.db # own ID; the CF cloud instance uses CF's campaign ID (disclosed in the UI). # EBAY_AFFILIATE_CAMPAIGN_ID= -# ── LLM inference (vision / photo analysis) ────────────────────────────────── -# circuitforge-core LLMRouter auto-detects backends from these env vars -# (no llm.yaml required). Backends are tried in this priority order: +# ── LLM inference (Search with AI / photo analysis) ────────────────────────── +# For self-hosted use, create config/llm.yaml from config/llm.yaml.example. +# config/llm.yaml is the preferred way to configure backends (supports cf-orch, +# multiple fallback backends, per-backend model selection). +# +# As a quick alternative, circuitforge-core LLMRouter also auto-detects backends +# from these env vars when no llm.yaml is present: # 1. ANTHROPIC_API_KEY → Claude API (cloud; requires Paid tier key) # 2. OPENAI_API_KEY → OpenAI-compatible endpoint # 3. OLLAMA_HOST → local Ollama (default: http://localhost:11434) -# Leave all unset to disable LLM features (photo analysis won't run). +# Leave all unset to disable LLM features (Search with AI won't be available). # ANTHROPIC_API_KEY= # ANTHROPIC_MODEL=claude-haiku-4-5-20251001 diff --git a/.gitignore b/.gitignore index 4e81ab0..60c85e2 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ data/ .superpowers/ web/node_modules/ web/dist/ +config/llm.yaml diff --git a/api/main.py b/api/main.py index 83333c5..86eee40 100644 --- a/api/main.py +++ b/api/main.py @@ -109,7 +109,7 @@ async def _lifespan(app: FastAPI): _category_cache.refresh(token_manager=None) # bootstrap fallback try: - from circuitforge_core.llm import LLMRouter + from app.llm.router import LLMRouter _llm_router = LLMRouter() _query_translator = QueryTranslator( category_cache=_category_cache, diff --git a/app/llm/router.py b/app/llm/router.py new file mode 100644 index 0000000..802c3a6 --- /dev/null +++ b/app/llm/router.py @@ -0,0 +1,36 @@ +# app/llm/router.py +# BSL 1.1 License +""" +Snipe LLMRouter shim — tri-level config path priority. + +Config lookup order: + 1. /config/llm.yaml — per-install local override + 2. ~/.config/circuitforge/llm.yaml — user-level config (circuitforge-core default) + 3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, CF_ORCH_URL) +""" +from pathlib import Path + +from circuitforge_core.llm import LLMRouter as _CoreLLMRouter + +_REPO_CONFIG = Path(__file__).parent.parent.parent / "config" / "llm.yaml" +_USER_CONFIG = Path.home() / ".config" / "circuitforge" / "llm.yaml" + + +class LLMRouter(_CoreLLMRouter): + """Snipe-specific LLMRouter with tri-level config resolution. + + Explicit ``config_path`` bypasses the lookup (useful in tests). + """ + + def __init__(self, config_path: Path | None = None) -> None: + if config_path is not None: + super().__init__(config_path) + return + + if _REPO_CONFIG.exists(): + super().__init__(_REPO_CONFIG) + elif _USER_CONFIG.exists(): + super().__init__(_USER_CONFIG) + else: + # No yaml — let circuitforge-core env-var auto-config handle it. + super().__init__() diff --git a/compose.cloud.yml b/compose.cloud.yml index 95b77eb..a41bb14 100644 --- a/compose.cloud.yml +++ b/compose.cloud.yml @@ -20,9 +20,15 @@ services: CLOUD_MODE: "true" CLOUD_DATA_ROOT: /devl/snipe-cloud-data # DIRECTUS_JWT_SECRET, HEIMDALL_URL, HEIMDALL_ADMIN_TOKEN — set in .env (never commit) + # CF_ORCH_URL routes LLM query builder through cf-orch for VRAM-aware scheduling. + # Override in .env to use a different coordinator URL. + CF_ORCH_URL: "http://host.docker.internal:7700" + extra_hosts: + - "host.docker.internal:host-gateway" # No network_mode: host — isolated on snipe-cloud-net; nginx reaches it via 'api:8510' volumes: - /devl/snipe-cloud-data:/devl/snipe-cloud-data + - ./config/llm.cloud.yaml:/app/snipe/config/llm.yaml:ro networks: - snipe-cloud-net diff --git a/config/llm.cloud.yaml b/config/llm.cloud.yaml new file mode 100644 index 0000000..0d39774 --- /dev/null +++ b/config/llm.cloud.yaml @@ -0,0 +1,38 @@ +# config/llm.cloud.yaml +# Snipe — LLM config for the managed cloud instance (menagerie) +# +# Mounted read-only into the cloud API container at /app/config/llm.yaml +# (see compose.cloud.yml). Personal fine-tunes and local-only backends +# (claude_code, copilot) are intentionally excluded here. +# +# CF Orchestrator routes both ollama and vllm allocations for VRAM-aware +# scheduling. CF_ORCH_URL must be set in .env for allocations to resolve; +# if cf-orch is unreachable the backend falls back to its static base_url. +# +# Model choice for query builder: llama3.1:8b +# - Reliable instruction following and JSON output +# - No creative fine-tuning drift (unlike writer models in the pool) +# - Fits comfortably in 8 GB VRAM alongside other services + +backends: + ollama: + type: openai_compat + base_url: http://host.docker.internal:11434/v1 + api_key: ollama + model: llama3.1:8b + enabled: true + supports_images: false + cf_orch: + service: ollama + ttl_s: 300 + + anthropic: + type: anthropic + api_key_env: ANTHROPIC_API_KEY + model: claude-haiku-4-5-20251001 + enabled: false + supports_images: false + +fallback_order: + - ollama + - anthropic diff --git a/config/llm.yaml.example b/config/llm.yaml.example new file mode 100644 index 0000000..23a0972 --- /dev/null +++ b/config/llm.yaml.example @@ -0,0 +1,45 @@ +# config/llm.yaml.example +# Snipe — LLM backend configuration +# +# Copy to config/llm.yaml and edit for your setup. +# The query builder ("Search with AI") uses the text fallback_order. +# +# Backends are tried in fallback_order until one succeeds. +# Set enabled: false to skip a backend without removing it. +# +# CF Orchestrator (cf-orch): when CF_ORCH_URL is set in the environment and a +# backend has a cf_orch block, allocations are routed through cf-orch for +# VRAM-aware scheduling. Omit cf_orch to hit the backend directly. + +backends: + anthropic: + type: anthropic + api_key_env: ANTHROPIC_API_KEY + model: claude-haiku-4-5-20251001 + enabled: false + supports_images: false + + openai: + type: openai_compat + base_url: https://api.openai.com/v1 + api_key_env: OPENAI_API_KEY + model: gpt-4o-mini + enabled: false + supports_images: false + + ollama: + type: openai_compat + base_url: http://localhost:11434/v1 + api_key: ollama + model: llama3.1:8b + enabled: true + supports_images: false + # Uncomment to route through cf-orch for VRAM-aware scheduling: + # cf_orch: + # service: ollama + # ttl_s: 300 + +fallback_order: + - anthropic + - openai + - ollama