feat: wire cf_text as openai_compat backend in llm.yaml
Adds the cf-text inference service (circuitforge-core) to the LLM fallback chain as the first option for cover letter generation. cf-text now exposes /v1/chat/completions (added in cf-core 69a338b), making it a drop-in openai_compat backend at port 8006. CF_TEXT_MODEL and CF_TEXT_PORT added to .env.example. Closes #75.
This commit is contained in:
parent
278413b073
commit
7467fb5416
2 changed files with 25 additions and 1 deletions
15
.env.example
15
.env.example
|
|
@ -5,6 +5,7 @@
|
|||
STREAMLIT_PORT=8502
|
||||
OLLAMA_PORT=11434
|
||||
VLLM_PORT=8000
|
||||
CF_TEXT_PORT=8006
|
||||
SEARXNG_PORT=8888
|
||||
VISION_PORT=8002
|
||||
VISION_MODEL=vikhyatk/moondream2
|
||||
|
|
@ -15,6 +16,7 @@ OLLAMA_MODELS_DIR=~/models/ollama
|
|||
VLLM_MODELS_DIR=~/models/vllm # override with full path to your model dir
|
||||
VLLM_MODEL=Ouro-1.4B # cover letters — fast 1.4B model
|
||||
VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking # research — reasoning 2.6B model; restart vllm to switch
|
||||
CF_TEXT_MODEL=/Library/Assets/LLM/qwen2.5-3b-instruct-q4_k_m.gguf # cf-text GGUF model; set to "mock" to disable
|
||||
VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with long CoT
|
||||
VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services
|
||||
OLLAMA_DEFAULT_MODEL=llama3.2:3b
|
||||
|
|
@ -45,6 +47,19 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
|
|||
CF_LICENSE_KEY=
|
||||
CF_ORCH_URL=https://orch.circuitforge.tech
|
||||
|
||||
# cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*)
|
||||
# The agent registers this node with the cf-orch coordinator and reports VRAM stats.
|
||||
# CF_ORCH_COORDINATOR_URL: coordinator the agent registers with
|
||||
# CF_ORCH_NODE_ID: name shown on the dashboard (default: peregrine)
|
||||
# CF_ORCH_AGENT_PORT: host port for the agent HTTP server (default: 7701)
|
||||
# CF_ORCH_ADVERTISE_HOST: IP the coordinator uses to reach back to this agent.
|
||||
# Defaults to 127.0.0.1 (same-host coordinator).
|
||||
# Set to your host LAN IP for a remote coordinator.
|
||||
CF_ORCH_COORDINATOR_URL=http://localhost:7700
|
||||
CF_ORCH_NODE_ID=peregrine
|
||||
CF_ORCH_AGENT_PORT=7701
|
||||
#CF_ORCH_ADVERTISE_HOST=10.1.10.71
|
||||
|
||||
# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
|
||||
CLOUD_MODE=false
|
||||
CLOUD_DATA_ROOT=/devl/menagerie-data
|
||||
|
|
|
|||
|
|
@ -1,4 +1,11 @@
|
|||
backends:
|
||||
cf_text:
|
||||
api_key: any
|
||||
base_url: http://host.docker.internal:8006/v1
|
||||
enabled: true
|
||||
model: cf-text
|
||||
supports_images: false
|
||||
type: openai_compat
|
||||
anthropic:
|
||||
api_key_env: ANTHROPIC_API_KEY
|
||||
enabled: false
|
||||
|
|
@ -34,7 +41,7 @@ backends:
|
|||
supports_images: false
|
||||
type: openai_compat
|
||||
vision_service:
|
||||
base_url: http://host.docker.internal:8002
|
||||
base_url: http://vision:8002
|
||||
enabled: true
|
||||
supports_images: true
|
||||
type: vision_service
|
||||
|
|
@ -58,6 +65,7 @@ backends:
|
|||
supports_images: false
|
||||
type: openai_compat
|
||||
fallback_order:
|
||||
- cf_text
|
||||
- ollama
|
||||
- claude_code
|
||||
- vllm
|
||||
|
|
@ -67,6 +75,7 @@ research_fallback_order:
|
|||
- claude_code
|
||||
- vllm_research
|
||||
- ollama_research
|
||||
- cf_text
|
||||
- github_copilot
|
||||
- anthropic
|
||||
vision_fallback_order:
|
||||
|
|
|
|||
Loading…
Reference in a new issue