From 7467fb541650540a2691aa5b93ccf136e71e7a24 Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Sun, 12 Apr 2026 17:10:41 -0700
Subject: [PATCH] feat: wire cf_text as openai_compat backend in llm.yaml

Adds the cf-text inference service (circuitforge-core) to the LLM
fallback chain as the first option for cover letter generation.
cf-text now exposes /v1/chat/completions (added in cf-core 69a338b),
making it a drop-in openai_compat backend at port 8006.

CF_TEXT_MODEL and CF_TEXT_PORT added to .env.example. Closes #75.
---
 .env.example    | 15 +++++++++++++++
 config/llm.yaml | 11 ++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/.env.example b/.env.example
index b8a4dce..bf458e0 100644
--- a/.env.example
+++ b/.env.example
@@ -5,6 +5,7 @@
 STREAMLIT_PORT=8502
 OLLAMA_PORT=11434
 VLLM_PORT=8000
+CF_TEXT_PORT=8006
 SEARXNG_PORT=8888
 VISION_PORT=8002
 VISION_MODEL=vikhyatk/moondream2
@@ -15,6 +16,7 @@ OLLAMA_MODELS_DIR=~/models/ollama
 VLLM_MODELS_DIR=~/models/vllm        # override with full path to your model dir
 VLLM_MODEL=Ouro-1.4B                # cover letters — fast 1.4B model
 VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking  # research — reasoning 2.6B model; restart vllm to switch
+CF_TEXT_MODEL=/Library/Assets/LLM/qwen2.5-3b-instruct-q4_k_m.gguf  # cf-text GGUF model; set to "mock" to disable
 VLLM_MAX_MODEL_LEN=4096             # increase to 8192 for Thinking models with long CoT
 VLLM_GPU_MEM_UTIL=0.75              # lower to 0.6 if sharing GPU with other services
 OLLAMA_DEFAULT_MODEL=llama3.2:3b
@@ -45,6 +47,19 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
 CF_LICENSE_KEY=
 CF_ORCH_URL=https://orch.circuitforge.tech
 
+# cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*)
+# The agent registers this node with the cf-orch coordinator and reports VRAM stats.
+# CF_ORCH_COORDINATOR_URL: coordinator the agent registers with
+# CF_ORCH_NODE_ID:         name shown on the dashboard (default: peregrine)
+# CF_ORCH_AGENT_PORT:      host port for the agent HTTP server (default: 7701)
+# CF_ORCH_ADVERTISE_HOST:  IP the coordinator uses to reach back to this agent.
+#                          Defaults to 127.0.0.1 (same-host coordinator).
+#                          Set to your host LAN IP for a remote coordinator.
+CF_ORCH_COORDINATOR_URL=http://localhost:7700
+CF_ORCH_NODE_ID=peregrine
+CF_ORCH_AGENT_PORT=7701
+#CF_ORCH_ADVERTISE_HOST=10.1.10.71
+
 # Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
 CLOUD_MODE=false
 CLOUD_DATA_ROOT=/devl/menagerie-data
diff --git a/config/llm.yaml b/config/llm.yaml
index 485b6a2..515bf24 100644
--- a/config/llm.yaml
+++ b/config/llm.yaml
@@ -1,4 +1,11 @@
 backends:
+  cf_text:
+    api_key: any
+    base_url: http://host.docker.internal:8006/v1
+    enabled: true
+    model: cf-text
+    supports_images: false
+    type: openai_compat
   anthropic:
     api_key_env: ANTHROPIC_API_KEY
     enabled: false
@@ -34,7 +41,7 @@ backends:
     supports_images: false
     type: openai_compat
   vision_service:
-    base_url: http://host.docker.internal:8002
+    base_url: http://vision:8002
     enabled: true
     supports_images: true
     type: vision_service
@@ -58,6 +65,7 @@ backends:
     supports_images: false
     type: openai_compat
 fallback_order:
+- cf_text
 - ollama
 - claude_code
 - vllm
@@ -67,6 +75,7 @@ research_fallback_order:
 - claude_code
 - vllm_research
 - ollama_research
+- cf_text
 - github_copilot
 - anthropic
 vision_fallback_order: