From 7467fb541650540a2691aa5b93ccf136e71e7a24 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sun, 12 Apr 2026 17:10:41 -0700 Subject: [PATCH] feat: wire cf_text as openai_compat backend in llm.yaml Adds the cf-text inference service (circuitforge-core) to the LLM fallback chain as the first option for cover letter generation. cf-text now exposes /v1/chat/completions (added in cf-core 69a338b), making it a drop-in openai_compat backend at port 8006. CF_TEXT_MODEL and CF_TEXT_PORT added to .env.example. Closes #75. --- .env.example | 15 +++++++++++++++ config/llm.yaml | 11 ++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index b8a4dce..bf458e0 100644 --- a/.env.example +++ b/.env.example @@ -5,6 +5,7 @@ STREAMLIT_PORT=8502 OLLAMA_PORT=11434 VLLM_PORT=8000 +CF_TEXT_PORT=8006 SEARXNG_PORT=8888 VISION_PORT=8002 VISION_MODEL=vikhyatk/moondream2 @@ -15,6 +16,7 @@ OLLAMA_MODELS_DIR=~/models/ollama VLLM_MODELS_DIR=~/models/vllm # override with full path to your model dir VLLM_MODEL=Ouro-1.4B # cover letters — fast 1.4B model VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking # research — reasoning 2.6B model; restart vllm to switch +CF_TEXT_MODEL=/Library/Assets/LLM/qwen2.5-3b-instruct-q4_k_m.gguf # cf-text GGUF model; set to "mock" to disable VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with long CoT VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services OLLAMA_DEFAULT_MODEL=llama3.2:3b @@ -45,6 +47,19 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1 CF_LICENSE_KEY= CF_ORCH_URL=https://orch.circuitforge.tech +# cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*) +# The agent registers this node with the cf-orch coordinator and reports VRAM stats. +# CF_ORCH_COORDINATOR_URL: coordinator the agent registers with +# CF_ORCH_NODE_ID: name shown on the dashboard (default: peregrine) +# CF_ORCH_AGENT_PORT: host port for the agent HTTP server (default: 7701) +# CF_ORCH_ADVERTISE_HOST: IP the coordinator uses to reach back to this agent. +# Defaults to 127.0.0.1 (same-host coordinator). +# Set to your host LAN IP for a remote coordinator. +CF_ORCH_COORDINATOR_URL=http://localhost:7700 +CF_ORCH_NODE_ID=peregrine +CF_ORCH_AGENT_PORT=7701 +#CF_ORCH_ADVERTISE_HOST=10.1.10.71 + # Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs) CLOUD_MODE=false CLOUD_DATA_ROOT=/devl/menagerie-data diff --git a/config/llm.yaml b/config/llm.yaml index 485b6a2..515bf24 100644 --- a/config/llm.yaml +++ b/config/llm.yaml @@ -1,4 +1,11 @@ backends: + cf_text: + api_key: any + base_url: http://host.docker.internal:8006/v1 + enabled: true + model: cf-text + supports_images: false + type: openai_compat anthropic: api_key_env: ANTHROPIC_API_KEY enabled: false @@ -34,7 +41,7 @@ backends: supports_images: false type: openai_compat vision_service: - base_url: http://host.docker.internal:8002 + base_url: http://vision:8002 enabled: true supports_images: true type: vision_service @@ -58,6 +65,7 @@ backends: supports_images: false type: openai_compat fallback_order: +- cf_text - ollama - claude_code - vllm @@ -67,6 +75,7 @@ research_fallback_order: - claude_code - vllm_research - ollama_research +- cf_text - github_copilot - anthropic vision_fallback_order: