feat: cf-core env-var LLM config + coordinator auth (closes #67)
Some checks failed
CI / test (push) Failing after 38s

- LLMRouter shim: tri-level config priority (local yaml > user yaml > env-var)
- .env.example: document OLLAMA_HOST, OLLAMA_MODEL, OPENAI_MODEL, ANTHROPIC_MODEL,
  CF_LICENSE_KEY, CF_ORCH_URL
- Wizard Step 5: env-var setup hint + optional Ollama fields for remote profile
- Preflight: write OLLAMA_HOST to .env when Ollama is adopted from host process
This commit is contained in:
pyr0ball 2026-04-04 19:27:24 -07:00
commit 1ab1dffc47
6 changed files with 286 additions and 4 deletions

View file

@ -19,6 +19,14 @@ VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with
VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services
OLLAMA_DEFAULT_MODEL=llama3.2:3b
# ── LLM env-var auto-config (alternative to config/llm.yaml) ─────────────────
# Set any of these to configure LLM backends without needing a config/llm.yaml.
# Priority: Anthropic > OpenAI-compat > Ollama (always tried as local fallback).
OLLAMA_HOST=http://localhost:11434 # Ollama host; override if on a different machine
OLLAMA_MODEL=llama3.2:3b # model to request from Ollama
OPENAI_MODEL=gpt-4o-mini # model override for OpenAI-compat backend
ANTHROPIC_MODEL=claude-haiku-4-5-20251001 # model override for Anthropic backend
# API keys (required for remote profile)
ANTHROPIC_API_KEY=
OPENAI_COMPAT_URL=
@ -31,6 +39,12 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
# GITHUB_TOKEN= # future — enable when public mirror is active
# GITHUB_REPO= # future
# ── CF-hosted coordinator (Paid+ tier) ───────────────────────────────────────
# Set CF_LICENSE_KEY to authenticate with the hosted coordinator.
# Leave both blank for local self-hosted cf-orch or bare-metal inference.
CF_LICENSE_KEY=
CF_ORCH_URL=https://orch.circuitforge.tech
# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
CLOUD_MODE=false
CLOUD_DATA_ROOT=/devl/menagerie-data

View file

@ -457,6 +457,11 @@ elif step == 5:
from app.wizard.step_inference import validate
st.subheader("Step 5 \u2014 Inference & API Keys")
st.info(
"**Simplest setup:** set `OLLAMA_HOST` in your `.env` file — "
"Peregrine auto-detects it, no config file needed. "
"Or use the fields below to configure API keys and endpoints."
)
profile = saved_yaml.get("inference_profile", "remote")
if profile == "remote":
@ -466,8 +471,18 @@ elif step == 5:
placeholder="https://api.together.xyz/v1")
openai_key = st.text_input("Endpoint API Key (optional)", type="password",
key="oai_key") if openai_url else ""
ollama_host = st.text_input("Ollama host (optional \u2014 local fallback)",
placeholder="http://localhost:11434",
key="ollama_host_input")
ollama_model = st.text_input("Ollama model (optional)",
value="llama3.2:3b",
key="ollama_model_input")
else:
st.info(f"Local mode ({profile}): Ollama provides inference.")
import os
_ollama_host_env = os.environ.get("OLLAMA_HOST", "")
if _ollama_host_env:
st.caption(f"OLLAMA_HOST from .env: `{_ollama_host_env}`")
anthropic_key = openai_url = openai_key = ""
with st.expander("Advanced \u2014 Service Ports & Hosts"):
@ -546,6 +561,14 @@ elif step == 5:
if anthropic_key or openai_url:
env_path.write_text("\n".join(env_lines) + "\n")
if profile == "remote":
if ollama_host:
env_lines = _set_env(env_lines, "OLLAMA_HOST", ollama_host)
if ollama_model:
env_lines = _set_env(env_lines, "OLLAMA_MODEL", ollama_model)
if ollama_host or ollama_model:
env_path.write_text("\n".join(env_lines) + "\n")
_save_yaml({"services": svc, "wizard_step": 5})
st.session_state.wizard_step = 6
st.rerun()

View file

@ -1,19 +1,46 @@
"""
LLM abstraction layer with priority fallback chain.
Reads config/llm.yaml. Tries backends in order; falls back on any error.
Config lookup order:
1. <repo>/config/llm.yaml per-install local config
2. ~/.config/circuitforge/llm.yaml user-level config (circuitforge-core default)
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, )
"""
from pathlib import Path
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
# Kept for backwards-compatibility — external callers that import CONFIG_PATH
# from this module continue to work.
CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"
class LLMRouter(_CoreLLMRouter):
"""Peregrine-specific LLMRouter — defaults to Peregrine's config/llm.yaml."""
"""Peregrine-specific LLMRouter — tri-level config path priority.
def __init__(self, config_path: Path = CONFIG_PATH):
super().__init__(config_path)
When ``config_path`` is supplied (e.g. in tests) it is passed straight
through to the core. When omitted, the lookup order is:
1. <repo>/config/llm.yaml (per-install local config)
2. ~/.config/circuitforge/llm.yaml (user-level, circuitforge-core default)
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST )
"""
def __init__(self, config_path: Path | None = None) -> None:
if config_path is not None:
# Explicit path supplied — use it directly (e.g. tests, CLI override).
super().__init__(config_path)
return
local = Path(__file__).parent.parent / "config" / "llm.yaml"
user_level = Path.home() / ".config" / "circuitforge" / "llm.yaml"
if local.exists():
super().__init__(local)
elif user_level.exists():
super().__init__(user_level)
else:
# No yaml found — let circuitforge-core's env-var auto-config run.
# The core default CONFIG_PATH (~/.config/circuitforge/llm.yaml)
# won't exist either, so _auto_config_from_env() will be triggered.
super().__init__()
# Module-level singleton for convenience

View file

@ -492,6 +492,12 @@ def main() -> None:
# binds a harmless free port instead of conflicting with the external service.
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
env_updates["RECOMMENDED_PROFILE"] = profile
# When Ollama is adopted from the host process, write OLLAMA_HOST so
# LLMRouter's env-var auto-config finds it without needing config/llm.yaml.
ollama_info = ports.get("ollama")
if ollama_info and ollama_info.get("external"):
env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}"
if offload_gb > 0:
env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
# GPU info for the app container (which lacks nvidia-smi access)

View file

@ -0,0 +1,132 @@
"""Tests for Peregrine's LLMRouter shim — priority fallback logic."""
import sys
from pathlib import Path
from unittest.mock import patch, MagicMock, call
sys.path.insert(0, str(Path(__file__).parent.parent))
def _import_fresh():
"""Import scripts.llm_router fresh (bypass module cache)."""
import importlib
import scripts.llm_router as mod
importlib.reload(mod)
return mod
# ---------------------------------------------------------------------------
# Test 1: local config/llm.yaml takes priority when it exists
# ---------------------------------------------------------------------------
def test_uses_local_yaml_when_present():
"""When config/llm.yaml exists locally, super().__init__ is called with that path."""
import scripts.llm_router as shim_mod
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
local_path = Path(shim_mod.__file__).parent.parent / "config" / "llm.yaml"
user_path = Path.home() / ".config" / "circuitforge" / "llm.yaml"
def fake_exists(self):
return self == local_path # only the local path "exists"
captured = {}
def fake_core_init(self, config_path=None):
captured["config_path"] = config_path
self.config = {}
with patch.object(Path, "exists", fake_exists), \
patch.object(_CoreLLMRouter, "__init__", fake_core_init):
import importlib
import scripts.llm_router as mod
importlib.reload(mod)
mod.LLMRouter()
assert captured.get("config_path") == local_path, (
f"Expected super().__init__ to be called with local path {local_path}, "
f"got {captured.get('config_path')}"
)
# ---------------------------------------------------------------------------
# Test 2: falls through to env-var auto-config when neither yaml exists
# ---------------------------------------------------------------------------
def test_falls_through_to_env_when_no_yamls():
"""When no yaml files exist, super().__init__ is called with no args (env-var path)."""
import scripts.llm_router as shim_mod
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
captured = {}
def fake_exists(self):
return False # no yaml files exist anywhere
def fake_core_init(self, config_path=None):
# Record whether a path was passed
captured["config_path"] = config_path
captured["called"] = True
self.config = {}
with patch.object(Path, "exists", fake_exists), \
patch.object(_CoreLLMRouter, "__init__", fake_core_init):
import importlib
import scripts.llm_router as mod
importlib.reload(mod)
mod.LLMRouter()
assert captured.get("called"), "super().__init__ was never called"
# When called with no args, config_path defaults to None in our mock,
# meaning the shim correctly fell through to env-var auto-config
assert captured.get("config_path") is None, (
f"Expected super().__init__ to be called with no explicit path (None), "
f"got {captured.get('config_path')}"
)
# ---------------------------------------------------------------------------
# Test 3: module-level complete() singleton is only instantiated once
# ---------------------------------------------------------------------------
def test_complete_singleton_is_reused():
"""complete() reuses the same LLMRouter instance across multiple calls."""
import importlib
import scripts.llm_router as mod
importlib.reload(mod)
# Reset singleton
mod._router = None
instantiation_count = [0]
original_init = mod.LLMRouter.__init__
mock_router = MagicMock()
mock_router.complete.return_value = "OK"
original_class = mod.LLMRouter
class CountingRouter(original_class):
def __init__(self):
instantiation_count[0] += 1
# Bypass real __init__ to avoid needing config files
self.config = {}
def complete(self, prompt, system=None):
return "OK"
# Patch the class in the module
mod.LLMRouter = CountingRouter
mod._router = None
result1 = mod.complete("first call")
result2 = mod.complete("second call")
assert result1 == "OK"
assert result2 == "OK"
assert instantiation_count[0] == 1, (
f"Expected LLMRouter to be instantiated exactly once, "
f"got {instantiation_count[0]} instantiation(s)"
)
# Restore
mod.LLMRouter = original_class

View file

@ -0,0 +1,80 @@
"""Tests: preflight writes OLLAMA_HOST to .env when Ollama is adopted from host."""
import sys
from pathlib import Path
from unittest.mock import patch, call
sys.path.insert(0, str(Path(__file__).parent.parent))
import scripts.preflight as pf
def _make_ports(ollama_external: bool = True, ollama_port: int = 11434) -> dict:
"""Build a minimal ports dict as returned by preflight's port-scanning logic."""
return {
"ollama": {
"resolved": ollama_port,
"external": ollama_external,
"stub_port": 54321,
"env_var": "OLLAMA_PORT",
"adoptable": True,
},
"streamlit": {
"resolved": 8502,
"external": False,
"stub_port": 8502,
"env_var": "STREAMLIT_PORT",
"adoptable": False,
},
}
def _capture_env_updates(ports: dict) -> dict:
"""Run the env_updates construction block from preflight.main() and return the result.
We extract this logic from main() so tests can call it directly without
needing to simulate the full CLI argument parsing and system probe flow.
The block under test is the `if not args.check_only:` section.
"""
captured = {}
def fake_write_env(updates: dict) -> None:
captured.update(updates)
with patch.object(pf, "write_env", side_effect=fake_write_env), \
patch.object(pf, "update_llm_yaml"), \
patch.object(pf, "write_compose_override"):
# Replicate the env_updates block from preflight.main() as faithfully as possible
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
env_updates["RECOMMENDED_PROFILE"] = "single-gpu"
# ---- Code under test: the OLLAMA_HOST adoption block ----
ollama_info = ports.get("ollama")
if ollama_info and ollama_info.get("external"):
env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}"
# ---------------------------------------------------------
pf.write_env(env_updates)
return captured
def test_ollama_host_written_when_adopted():
"""OLLAMA_HOST is added when Ollama is adopted from the host (external=True)."""
ports = _make_ports(ollama_external=True, ollama_port=11434)
result = _capture_env_updates(ports)
assert "OLLAMA_HOST" in result
assert result["OLLAMA_HOST"] == "http://host.docker.internal:11434"
def test_ollama_host_not_written_when_docker_managed():
"""OLLAMA_HOST is NOT added when Ollama runs in Docker (external=False)."""
ports = _make_ports(ollama_external=False)
result = _capture_env_updates(ports)
assert "OLLAMA_HOST" not in result
def test_ollama_host_reflects_adopted_port():
"""OLLAMA_HOST uses the actual adopted port, not the default."""
ports = _make_ports(ollama_external=True, ollama_port=11500)
result = _capture_env_updates(ports)
assert result["OLLAMA_HOST"] == "http://host.docker.internal:11500"