feat: wire cf_text as openai_compat backend in llm.yaml
Some checks failed
CI / Backend (Python) (push) Failing after 12s
CI / Frontend (Vue) (push) Successful in 20s
Mirror / mirror (push) Failing after 7s

Adds the cf-text inference service (circuitforge-core) to the LLM
fallback chain as the first option for cover letter generation.
cf-text now exposes /v1/chat/completions (added in cf-core 69a338b),
making it a drop-in openai_compat backend at port 8006.

CF_TEXT_MODEL and CF_TEXT_PORT added to .env.example. Closes #75.
This commit is contained in:
pyr0ball 2026-04-12 17:10:41 -07:00
parent 278413b073
commit 7467fb5416
2 changed files with 25 additions and 1 deletions

View file

@ -5,6 +5,7 @@
STREAMLIT_PORT=8502 STREAMLIT_PORT=8502
OLLAMA_PORT=11434 OLLAMA_PORT=11434
VLLM_PORT=8000 VLLM_PORT=8000
CF_TEXT_PORT=8006
SEARXNG_PORT=8888 SEARXNG_PORT=8888
VISION_PORT=8002 VISION_PORT=8002
VISION_MODEL=vikhyatk/moondream2 VISION_MODEL=vikhyatk/moondream2
@ -15,6 +16,7 @@ OLLAMA_MODELS_DIR=~/models/ollama
VLLM_MODELS_DIR=~/models/vllm # override with full path to your model dir VLLM_MODELS_DIR=~/models/vllm # override with full path to your model dir
VLLM_MODEL=Ouro-1.4B # cover letters — fast 1.4B model VLLM_MODEL=Ouro-1.4B # cover letters — fast 1.4B model
VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking # research — reasoning 2.6B model; restart vllm to switch VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking # research — reasoning 2.6B model; restart vllm to switch
CF_TEXT_MODEL=/Library/Assets/LLM/qwen2.5-3b-instruct-q4_k_m.gguf # cf-text GGUF model; set to "mock" to disable
VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with long CoT VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with long CoT
VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services
OLLAMA_DEFAULT_MODEL=llama3.2:3b OLLAMA_DEFAULT_MODEL=llama3.2:3b
@ -45,6 +47,19 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
CF_LICENSE_KEY= CF_LICENSE_KEY=
CF_ORCH_URL=https://orch.circuitforge.tech CF_ORCH_URL=https://orch.circuitforge.tech
# cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*)
# The agent registers this node with the cf-orch coordinator and reports VRAM stats.
# CF_ORCH_COORDINATOR_URL: coordinator the agent registers with
# CF_ORCH_NODE_ID: name shown on the dashboard (default: peregrine)
# CF_ORCH_AGENT_PORT: host port for the agent HTTP server (default: 7701)
# CF_ORCH_ADVERTISE_HOST: IP the coordinator uses to reach back to this agent.
# Defaults to 127.0.0.1 (same-host coordinator).
# Set to your host LAN IP for a remote coordinator.
CF_ORCH_COORDINATOR_URL=http://localhost:7700
CF_ORCH_NODE_ID=peregrine
CF_ORCH_AGENT_PORT=7701
#CF_ORCH_ADVERTISE_HOST=10.1.10.71
# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs) # Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
CLOUD_MODE=false CLOUD_MODE=false
CLOUD_DATA_ROOT=/devl/menagerie-data CLOUD_DATA_ROOT=/devl/menagerie-data

View file

@ -1,4 +1,11 @@
backends: backends:
cf_text:
api_key: any
base_url: http://host.docker.internal:8006/v1
enabled: true
model: cf-text
supports_images: false
type: openai_compat
anthropic: anthropic:
api_key_env: ANTHROPIC_API_KEY api_key_env: ANTHROPIC_API_KEY
enabled: false enabled: false
@ -34,7 +41,7 @@ backends:
supports_images: false supports_images: false
type: openai_compat type: openai_compat
vision_service: vision_service:
base_url: http://host.docker.internal:8002 base_url: http://vision:8002
enabled: true enabled: true
supports_images: true supports_images: true
type: vision_service type: vision_service
@ -58,6 +65,7 @@ backends:
supports_images: false supports_images: false
type: openai_compat type: openai_compat
fallback_order: fallback_order:
- cf_text
- ollama - ollama
- claude_code - claude_code
- vllm - vllm
@ -67,6 +75,7 @@ research_fallback_order:
- claude_code - claude_code
- vllm_research - vllm_research
- ollama_research - ollama_research
- cf_text
- github_copilot - github_copilot
- anthropic - anthropic
vision_fallback_order: vision_fallback_order: