feat: wire cf_text as openai_compat backend in llm.yaml

Adds the cf-text inference service (circuitforge-core) to the LLM fallback chain as the first option for cover letter generation. cf-text now exposes /v1/chat/completions (added in cf-core 69a338b), making it a drop-in openai_compat backend at port 8006. CF_TEXT_MODEL and CF_TEXT_PORT added to .env.example. Closes #75.
2026-04-12 17:10:41 -07:00 · 2026-04-12 17:10:41 -07:00 · 7467fb5416
commit 7467fb5416
parent 278413b073
2 changed files with 25 additions and 1 deletions
--- a/.env.example
+++ b/.env.example
@ -5,6 +5,7 @@
 STREAMLIT_PORT=8502
 OLLAMA_PORT=11434
 VLLM_PORT=8000
 CF_TEXT_PORT=8006
 SEARXNG_PORT=8888
 VISION_PORT=8002
 VISION_MODEL=vikhyatk/moondream2
@ -15,6 +16,7 @@ OLLAMA_MODELS_DIR=~/models/ollama
 VLLM_MODELS_DIR=~/models/vllm        # override with full path to your model dir
 VLLM_MODEL=Ouro-1.4B                # cover letters — fast 1.4B model
 VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking  # research — reasoning 2.6B model; restart vllm to switch
 CF_TEXT_MODEL=/Library/Assets/LLM/qwen2.5-3b-instruct-q4_k_m.gguf  # cf-text GGUF model; set to "mock" to disable
 VLLM_MAX_MODEL_LEN=4096             # increase to 8192 for Thinking models with long CoT
 VLLM_GPU_MEM_UTIL=0.75              # lower to 0.6 if sharing GPU with other services
 OLLAMA_DEFAULT_MODEL=llama3.2:3b
@ -45,6 +47,19 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
 CF_LICENSE_KEY=
 CF_ORCH_URL=https://orch.circuitforge.tech
 # cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*)
 # The agent registers this node with the cf-orch coordinator and reports VRAM stats.
 # CF_ORCH_COORDINATOR_URL: coordinator the agent registers with
 # CF_ORCH_NODE_ID:         name shown on the dashboard (default: peregrine)
 # CF_ORCH_AGENT_PORT:      host port for the agent HTTP server (default: 7701)
 # CF_ORCH_ADVERTISE_HOST:  IP the coordinator uses to reach back to this agent.
 #                          Defaults to 127.0.0.1 (same-host coordinator).
 #                          Set to your host LAN IP for a remote coordinator.
 CF_ORCH_COORDINATOR_URL=http://localhost:7700
 CF_ORCH_NODE_ID=peregrine
 CF_ORCH_AGENT_PORT=7701
 #CF_ORCH_ADVERTISE_HOST=10.1.10.71
 # Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
 CLOUD_MODE=false
 CLOUD_DATA_ROOT=/devl/menagerie-data
--- a/config/llm.yaml
+++ b/config/llm.yaml
@ -1,4 +1,11 @@
 backends:
  cf_text:
    api_key: any
    base_url: http://host.docker.internal:8006/v1
    enabled: true
    model: cf-text
    supports_images: false
    type: openai_compat
  anthropic:
    api_key_env: ANTHROPIC_API_KEY
    enabled: false
@ -34,7 +41,7 @@ backends:
    supports_images: false
    type: openai_compat
  vision_service:
-    base_url: http://host.docker.internal:8002
+    base_url: http://vision:8002
    enabled: true
    supports_images: true
    type: vision_service
@ -58,6 +65,7 @@ backends:
    supports_images: false
    type: openai_compat
 fallback_order:
 - cf_text
 - ollama
 - claude_code
 - vllm
@ -67,6 +75,7 @@ research_fallback_order:
 - claude_code
 - vllm_research
 - ollama_research
 - cf_text
 - github_copilot
 - anthropic
 vision_fallback_order: