feat: cf-core env-var LLM config + coordinator auth (closes #67)
Some checks failed
CI / test (push) Failing after 38s
Some checks failed
CI / test (push) Failing after 38s
- LLMRouter shim: tri-level config priority (local yaml > user yaml > env-var) - .env.example: document OLLAMA_HOST, OLLAMA_MODEL, OPENAI_MODEL, ANTHROPIC_MODEL, CF_LICENSE_KEY, CF_ORCH_URL - Wizard Step 5: env-var setup hint + optional Ollama fields for remote profile - Preflight: write OLLAMA_HOST to .env when Ollama is adopted from host process
This commit is contained in:
commit
1ab1dffc47
6 changed files with 286 additions and 4 deletions
14
.env.example
14
.env.example
|
|
@ -19,6 +19,14 @@ VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with
|
|||
VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services
|
||||
OLLAMA_DEFAULT_MODEL=llama3.2:3b
|
||||
|
||||
# ── LLM env-var auto-config (alternative to config/llm.yaml) ─────────────────
|
||||
# Set any of these to configure LLM backends without needing a config/llm.yaml.
|
||||
# Priority: Anthropic > OpenAI-compat > Ollama (always tried as local fallback).
|
||||
OLLAMA_HOST=http://localhost:11434 # Ollama host; override if on a different machine
|
||||
OLLAMA_MODEL=llama3.2:3b # model to request from Ollama
|
||||
OPENAI_MODEL=gpt-4o-mini # model override for OpenAI-compat backend
|
||||
ANTHROPIC_MODEL=claude-haiku-4-5-20251001 # model override for Anthropic backend
|
||||
|
||||
# API keys (required for remote profile)
|
||||
ANTHROPIC_API_KEY=
|
||||
OPENAI_COMPAT_URL=
|
||||
|
|
@ -31,6 +39,12 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
|
|||
# GITHUB_TOKEN= # future — enable when public mirror is active
|
||||
# GITHUB_REPO= # future
|
||||
|
||||
# ── CF-hosted coordinator (Paid+ tier) ───────────────────────────────────────
|
||||
# Set CF_LICENSE_KEY to authenticate with the hosted coordinator.
|
||||
# Leave both blank for local self-hosted cf-orch or bare-metal inference.
|
||||
CF_LICENSE_KEY=
|
||||
CF_ORCH_URL=https://orch.circuitforge.tech
|
||||
|
||||
# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
|
||||
CLOUD_MODE=false
|
||||
CLOUD_DATA_ROOT=/devl/menagerie-data
|
||||
|
|
|
|||
|
|
@ -457,6 +457,11 @@ elif step == 5:
|
|||
from app.wizard.step_inference import validate
|
||||
|
||||
st.subheader("Step 5 \u2014 Inference & API Keys")
|
||||
st.info(
|
||||
"**Simplest setup:** set `OLLAMA_HOST` in your `.env` file — "
|
||||
"Peregrine auto-detects it, no config file needed. "
|
||||
"Or use the fields below to configure API keys and endpoints."
|
||||
)
|
||||
profile = saved_yaml.get("inference_profile", "remote")
|
||||
|
||||
if profile == "remote":
|
||||
|
|
@ -466,8 +471,18 @@ elif step == 5:
|
|||
placeholder="https://api.together.xyz/v1")
|
||||
openai_key = st.text_input("Endpoint API Key (optional)", type="password",
|
||||
key="oai_key") if openai_url else ""
|
||||
ollama_host = st.text_input("Ollama host (optional \u2014 local fallback)",
|
||||
placeholder="http://localhost:11434",
|
||||
key="ollama_host_input")
|
||||
ollama_model = st.text_input("Ollama model (optional)",
|
||||
value="llama3.2:3b",
|
||||
key="ollama_model_input")
|
||||
else:
|
||||
st.info(f"Local mode ({profile}): Ollama provides inference.")
|
||||
import os
|
||||
_ollama_host_env = os.environ.get("OLLAMA_HOST", "")
|
||||
if _ollama_host_env:
|
||||
st.caption(f"OLLAMA_HOST from .env: `{_ollama_host_env}`")
|
||||
anthropic_key = openai_url = openai_key = ""
|
||||
|
||||
with st.expander("Advanced \u2014 Service Ports & Hosts"):
|
||||
|
|
@ -546,6 +561,14 @@ elif step == 5:
|
|||
if anthropic_key or openai_url:
|
||||
env_path.write_text("\n".join(env_lines) + "\n")
|
||||
|
||||
if profile == "remote":
|
||||
if ollama_host:
|
||||
env_lines = _set_env(env_lines, "OLLAMA_HOST", ollama_host)
|
||||
if ollama_model:
|
||||
env_lines = _set_env(env_lines, "OLLAMA_MODEL", ollama_model)
|
||||
if ollama_host or ollama_model:
|
||||
env_path.write_text("\n".join(env_lines) + "\n")
|
||||
|
||||
_save_yaml({"services": svc, "wizard_step": 5})
|
||||
st.session_state.wizard_step = 6
|
||||
st.rerun()
|
||||
|
|
|
|||
|
|
@ -1,19 +1,46 @@
|
|||
"""
|
||||
LLM abstraction layer with priority fallback chain.
|
||||
Reads config/llm.yaml. Tries backends in order; falls back on any error.
|
||||
Config lookup order:
|
||||
1. <repo>/config/llm.yaml — per-install local config
|
||||
2. ~/.config/circuitforge/llm.yaml — user-level config (circuitforge-core default)
|
||||
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, …)
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
|
||||
|
||||
# Kept for backwards-compatibility — external callers that import CONFIG_PATH
|
||||
# from this module continue to work.
|
||||
CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"
|
||||
|
||||
|
||||
class LLMRouter(_CoreLLMRouter):
|
||||
"""Peregrine-specific LLMRouter — defaults to Peregrine's config/llm.yaml."""
|
||||
"""Peregrine-specific LLMRouter — tri-level config path priority.
|
||||
|
||||
def __init__(self, config_path: Path = CONFIG_PATH):
|
||||
When ``config_path`` is supplied (e.g. in tests) it is passed straight
|
||||
through to the core. When omitted, the lookup order is:
|
||||
1. <repo>/config/llm.yaml (per-install local config)
|
||||
2. ~/.config/circuitforge/llm.yaml (user-level, circuitforge-core default)
|
||||
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST …)
|
||||
"""
|
||||
|
||||
def __init__(self, config_path: Path | None = None) -> None:
|
||||
if config_path is not None:
|
||||
# Explicit path supplied — use it directly (e.g. tests, CLI override).
|
||||
super().__init__(config_path)
|
||||
return
|
||||
|
||||
local = Path(__file__).parent.parent / "config" / "llm.yaml"
|
||||
user_level = Path.home() / ".config" / "circuitforge" / "llm.yaml"
|
||||
if local.exists():
|
||||
super().__init__(local)
|
||||
elif user_level.exists():
|
||||
super().__init__(user_level)
|
||||
else:
|
||||
# No yaml found — let circuitforge-core's env-var auto-config run.
|
||||
# The core default CONFIG_PATH (~/.config/circuitforge/llm.yaml)
|
||||
# won't exist either, so _auto_config_from_env() will be triggered.
|
||||
super().__init__()
|
||||
|
||||
|
||||
# Module-level singleton for convenience
|
||||
|
|
|
|||
|
|
@ -492,6 +492,12 @@ def main() -> None:
|
|||
# binds a harmless free port instead of conflicting with the external service.
|
||||
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
|
||||
env_updates["RECOMMENDED_PROFILE"] = profile
|
||||
# When Ollama is adopted from the host process, write OLLAMA_HOST so
|
||||
# LLMRouter's env-var auto-config finds it without needing config/llm.yaml.
|
||||
ollama_info = ports.get("ollama")
|
||||
if ollama_info and ollama_info.get("external"):
|
||||
env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}"
|
||||
|
||||
if offload_gb > 0:
|
||||
env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
|
||||
# GPU info for the app container (which lacks nvidia-smi access)
|
||||
|
|
|
|||
132
tests/test_llm_router_shim.py
Normal file
132
tests/test_llm_router_shim.py
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
"""Tests for Peregrine's LLMRouter shim — priority fallback logic."""
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock, call
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
|
||||
def _import_fresh():
|
||||
"""Import scripts.llm_router fresh (bypass module cache)."""
|
||||
import importlib
|
||||
import scripts.llm_router as mod
|
||||
importlib.reload(mod)
|
||||
return mod
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: local config/llm.yaml takes priority when it exists
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_uses_local_yaml_when_present():
|
||||
"""When config/llm.yaml exists locally, super().__init__ is called with that path."""
|
||||
import scripts.llm_router as shim_mod
|
||||
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
|
||||
|
||||
local_path = Path(shim_mod.__file__).parent.parent / "config" / "llm.yaml"
|
||||
user_path = Path.home() / ".config" / "circuitforge" / "llm.yaml"
|
||||
|
||||
def fake_exists(self):
|
||||
return self == local_path # only the local path "exists"
|
||||
|
||||
captured = {}
|
||||
|
||||
def fake_core_init(self, config_path=None):
|
||||
captured["config_path"] = config_path
|
||||
self.config = {}
|
||||
|
||||
with patch.object(Path, "exists", fake_exists), \
|
||||
patch.object(_CoreLLMRouter, "__init__", fake_core_init):
|
||||
import importlib
|
||||
import scripts.llm_router as mod
|
||||
importlib.reload(mod)
|
||||
mod.LLMRouter()
|
||||
|
||||
assert captured.get("config_path") == local_path, (
|
||||
f"Expected super().__init__ to be called with local path {local_path}, "
|
||||
f"got {captured.get('config_path')}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: falls through to env-var auto-config when neither yaml exists
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_falls_through_to_env_when_no_yamls():
|
||||
"""When no yaml files exist, super().__init__ is called with no args (env-var path)."""
|
||||
import scripts.llm_router as shim_mod
|
||||
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
|
||||
|
||||
captured = {}
|
||||
|
||||
def fake_exists(self):
|
||||
return False # no yaml files exist anywhere
|
||||
|
||||
def fake_core_init(self, config_path=None):
|
||||
# Record whether a path was passed
|
||||
captured["config_path"] = config_path
|
||||
captured["called"] = True
|
||||
self.config = {}
|
||||
|
||||
with patch.object(Path, "exists", fake_exists), \
|
||||
patch.object(_CoreLLMRouter, "__init__", fake_core_init):
|
||||
import importlib
|
||||
import scripts.llm_router as mod
|
||||
importlib.reload(mod)
|
||||
mod.LLMRouter()
|
||||
|
||||
assert captured.get("called"), "super().__init__ was never called"
|
||||
# When called with no args, config_path defaults to None in our mock,
|
||||
# meaning the shim correctly fell through to env-var auto-config
|
||||
assert captured.get("config_path") is None, (
|
||||
f"Expected super().__init__ to be called with no explicit path (None), "
|
||||
f"got {captured.get('config_path')}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3: module-level complete() singleton is only instantiated once
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_complete_singleton_is_reused():
|
||||
"""complete() reuses the same LLMRouter instance across multiple calls."""
|
||||
import importlib
|
||||
import scripts.llm_router as mod
|
||||
importlib.reload(mod)
|
||||
|
||||
# Reset singleton
|
||||
mod._router = None
|
||||
|
||||
instantiation_count = [0]
|
||||
original_init = mod.LLMRouter.__init__
|
||||
|
||||
mock_router = MagicMock()
|
||||
mock_router.complete.return_value = "OK"
|
||||
|
||||
original_class = mod.LLMRouter
|
||||
|
||||
class CountingRouter(original_class):
|
||||
def __init__(self):
|
||||
instantiation_count[0] += 1
|
||||
# Bypass real __init__ to avoid needing config files
|
||||
self.config = {}
|
||||
|
||||
def complete(self, prompt, system=None):
|
||||
return "OK"
|
||||
|
||||
# Patch the class in the module
|
||||
mod.LLMRouter = CountingRouter
|
||||
mod._router = None
|
||||
|
||||
result1 = mod.complete("first call")
|
||||
result2 = mod.complete("second call")
|
||||
|
||||
assert result1 == "OK"
|
||||
assert result2 == "OK"
|
||||
assert instantiation_count[0] == 1, (
|
||||
f"Expected LLMRouter to be instantiated exactly once, "
|
||||
f"got {instantiation_count[0]} instantiation(s)"
|
||||
)
|
||||
|
||||
# Restore
|
||||
mod.LLMRouter = original_class
|
||||
80
tests/test_preflight_env_adoption.py
Normal file
80
tests/test_preflight_env_adoption.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
"""Tests: preflight writes OLLAMA_HOST to .env when Ollama is adopted from host."""
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, call
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
import scripts.preflight as pf
|
||||
|
||||
|
||||
def _make_ports(ollama_external: bool = True, ollama_port: int = 11434) -> dict:
|
||||
"""Build a minimal ports dict as returned by preflight's port-scanning logic."""
|
||||
return {
|
||||
"ollama": {
|
||||
"resolved": ollama_port,
|
||||
"external": ollama_external,
|
||||
"stub_port": 54321,
|
||||
"env_var": "OLLAMA_PORT",
|
||||
"adoptable": True,
|
||||
},
|
||||
"streamlit": {
|
||||
"resolved": 8502,
|
||||
"external": False,
|
||||
"stub_port": 8502,
|
||||
"env_var": "STREAMLIT_PORT",
|
||||
"adoptable": False,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _capture_env_updates(ports: dict) -> dict:
|
||||
"""Run the env_updates construction block from preflight.main() and return the result.
|
||||
|
||||
We extract this logic from main() so tests can call it directly without
|
||||
needing to simulate the full CLI argument parsing and system probe flow.
|
||||
The block under test is the `if not args.check_only:` section.
|
||||
"""
|
||||
captured = {}
|
||||
|
||||
def fake_write_env(updates: dict) -> None:
|
||||
captured.update(updates)
|
||||
|
||||
with patch.object(pf, "write_env", side_effect=fake_write_env), \
|
||||
patch.object(pf, "update_llm_yaml"), \
|
||||
patch.object(pf, "write_compose_override"):
|
||||
# Replicate the env_updates block from preflight.main() as faithfully as possible
|
||||
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
|
||||
env_updates["RECOMMENDED_PROFILE"] = "single-gpu"
|
||||
|
||||
# ---- Code under test: the OLLAMA_HOST adoption block ----
|
||||
ollama_info = ports.get("ollama")
|
||||
if ollama_info and ollama_info.get("external"):
|
||||
env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}"
|
||||
# ---------------------------------------------------------
|
||||
|
||||
pf.write_env(env_updates)
|
||||
|
||||
return captured
|
||||
|
||||
|
||||
def test_ollama_host_written_when_adopted():
|
||||
"""OLLAMA_HOST is added when Ollama is adopted from the host (external=True)."""
|
||||
ports = _make_ports(ollama_external=True, ollama_port=11434)
|
||||
result = _capture_env_updates(ports)
|
||||
assert "OLLAMA_HOST" in result
|
||||
assert result["OLLAMA_HOST"] == "http://host.docker.internal:11434"
|
||||
|
||||
|
||||
def test_ollama_host_not_written_when_docker_managed():
|
||||
"""OLLAMA_HOST is NOT added when Ollama runs in Docker (external=False)."""
|
||||
ports = _make_ports(ollama_external=False)
|
||||
result = _capture_env_updates(ports)
|
||||
assert "OLLAMA_HOST" not in result
|
||||
|
||||
|
||||
def test_ollama_host_reflects_adopted_port():
|
||||
"""OLLAMA_HOST uses the actual adopted port, not the default."""
|
||||
ports = _make_ports(ollama_external=True, ollama_port=11500)
|
||||
result = _capture_env_updates(ports)
|
||||
assert result["OLLAMA_HOST"] == "http://host.docker.internal:11500"
|
||||
Loading…
Reference in a new issue