feat: wire cf_text as openai_compat backend in llm.yaml
Adds the cf-text inference service (circuitforge-core) to the LLM fallback chain as the first option for cover letter generation. cf-text now exposes /v1/chat/completions (added in cf-core 69a338b), making it a drop-in openai_compat backend at port 8006. CF_TEXT_MODEL and CF_TEXT_PORT added to .env.example. Closes #75.
This commit is contained in:
parent
278413b073
commit
7467fb5416
2 changed files with 25 additions and 1 deletions
15
.env.example
15
.env.example
|
|
@ -5,6 +5,7 @@
|
||||||
STREAMLIT_PORT=8502
|
STREAMLIT_PORT=8502
|
||||||
OLLAMA_PORT=11434
|
OLLAMA_PORT=11434
|
||||||
VLLM_PORT=8000
|
VLLM_PORT=8000
|
||||||
|
CF_TEXT_PORT=8006
|
||||||
SEARXNG_PORT=8888
|
SEARXNG_PORT=8888
|
||||||
VISION_PORT=8002
|
VISION_PORT=8002
|
||||||
VISION_MODEL=vikhyatk/moondream2
|
VISION_MODEL=vikhyatk/moondream2
|
||||||
|
|
@ -15,6 +16,7 @@ OLLAMA_MODELS_DIR=~/models/ollama
|
||||||
VLLM_MODELS_DIR=~/models/vllm # override with full path to your model dir
|
VLLM_MODELS_DIR=~/models/vllm # override with full path to your model dir
|
||||||
VLLM_MODEL=Ouro-1.4B # cover letters — fast 1.4B model
|
VLLM_MODEL=Ouro-1.4B # cover letters — fast 1.4B model
|
||||||
VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking # research — reasoning 2.6B model; restart vllm to switch
|
VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking # research — reasoning 2.6B model; restart vllm to switch
|
||||||
|
CF_TEXT_MODEL=/Library/Assets/LLM/qwen2.5-3b-instruct-q4_k_m.gguf # cf-text GGUF model; set to "mock" to disable
|
||||||
VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with long CoT
|
VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with long CoT
|
||||||
VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services
|
VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services
|
||||||
OLLAMA_DEFAULT_MODEL=llama3.2:3b
|
OLLAMA_DEFAULT_MODEL=llama3.2:3b
|
||||||
|
|
@ -45,6 +47,19 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
|
||||||
CF_LICENSE_KEY=
|
CF_LICENSE_KEY=
|
||||||
CF_ORCH_URL=https://orch.circuitforge.tech
|
CF_ORCH_URL=https://orch.circuitforge.tech
|
||||||
|
|
||||||
|
# cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*)
|
||||||
|
# The agent registers this node with the cf-orch coordinator and reports VRAM stats.
|
||||||
|
# CF_ORCH_COORDINATOR_URL: coordinator the agent registers with
|
||||||
|
# CF_ORCH_NODE_ID: name shown on the dashboard (default: peregrine)
|
||||||
|
# CF_ORCH_AGENT_PORT: host port for the agent HTTP server (default: 7701)
|
||||||
|
# CF_ORCH_ADVERTISE_HOST: IP the coordinator uses to reach back to this agent.
|
||||||
|
# Defaults to 127.0.0.1 (same-host coordinator).
|
||||||
|
# Set to your host LAN IP for a remote coordinator.
|
||||||
|
CF_ORCH_COORDINATOR_URL=http://localhost:7700
|
||||||
|
CF_ORCH_NODE_ID=peregrine
|
||||||
|
CF_ORCH_AGENT_PORT=7701
|
||||||
|
#CF_ORCH_ADVERTISE_HOST=10.1.10.71
|
||||||
|
|
||||||
# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
|
# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
|
||||||
CLOUD_MODE=false
|
CLOUD_MODE=false
|
||||||
CLOUD_DATA_ROOT=/devl/menagerie-data
|
CLOUD_DATA_ROOT=/devl/menagerie-data
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,11 @@
|
||||||
backends:
|
backends:
|
||||||
|
cf_text:
|
||||||
|
api_key: any
|
||||||
|
base_url: http://host.docker.internal:8006/v1
|
||||||
|
enabled: true
|
||||||
|
model: cf-text
|
||||||
|
supports_images: false
|
||||||
|
type: openai_compat
|
||||||
anthropic:
|
anthropic:
|
||||||
api_key_env: ANTHROPIC_API_KEY
|
api_key_env: ANTHROPIC_API_KEY
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|
@ -34,7 +41,7 @@ backends:
|
||||||
supports_images: false
|
supports_images: false
|
||||||
type: openai_compat
|
type: openai_compat
|
||||||
vision_service:
|
vision_service:
|
||||||
base_url: http://host.docker.internal:8002
|
base_url: http://vision:8002
|
||||||
enabled: true
|
enabled: true
|
||||||
supports_images: true
|
supports_images: true
|
||||||
type: vision_service
|
type: vision_service
|
||||||
|
|
@ -58,6 +65,7 @@ backends:
|
||||||
supports_images: false
|
supports_images: false
|
||||||
type: openai_compat
|
type: openai_compat
|
||||||
fallback_order:
|
fallback_order:
|
||||||
|
- cf_text
|
||||||
- ollama
|
- ollama
|
||||||
- claude_code
|
- claude_code
|
||||||
- vllm
|
- vllm
|
||||||
|
|
@ -67,6 +75,7 @@ research_fallback_order:
|
||||||
- claude_code
|
- claude_code
|
||||||
- vllm_research
|
- vllm_research
|
||||||
- ollama_research
|
- ollama_research
|
||||||
|
- cf_text
|
||||||
- github_copilot
|
- github_copilot
|
||||||
- anthropic
|
- anthropic
|
||||||
vision_fallback_order:
|
vision_fallback_order:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue