feat: cf-core env-var LLM config + coordinator auth (closes #67)
Some checks failed
CI / test (push) Failing after 38s
Some checks failed
CI / test (push) Failing after 38s
- LLMRouter shim: tri-level config priority (local yaml > user yaml > env-var) - .env.example: document OLLAMA_HOST, OLLAMA_MODEL, OPENAI_MODEL, ANTHROPIC_MODEL, CF_LICENSE_KEY, CF_ORCH_URL - Wizard Step 5: env-var setup hint + optional Ollama fields for remote profile - Preflight: write OLLAMA_HOST to .env when Ollama is adopted from host process
This commit is contained in:
commit
1ab1dffc47
6 changed files with 286 additions and 4 deletions
14
.env.example
14
.env.example
|
|
@ -19,6 +19,14 @@ VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with
|
||||||
VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services
|
VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services
|
||||||
OLLAMA_DEFAULT_MODEL=llama3.2:3b
|
OLLAMA_DEFAULT_MODEL=llama3.2:3b
|
||||||
|
|
||||||
|
# ── LLM env-var auto-config (alternative to config/llm.yaml) ─────────────────
|
||||||
|
# Set any of these to configure LLM backends without needing a config/llm.yaml.
|
||||||
|
# Priority: Anthropic > OpenAI-compat > Ollama (always tried as local fallback).
|
||||||
|
OLLAMA_HOST=http://localhost:11434 # Ollama host; override if on a different machine
|
||||||
|
OLLAMA_MODEL=llama3.2:3b # model to request from Ollama
|
||||||
|
OPENAI_MODEL=gpt-4o-mini # model override for OpenAI-compat backend
|
||||||
|
ANTHROPIC_MODEL=claude-haiku-4-5-20251001 # model override for Anthropic backend
|
||||||
|
|
||||||
# API keys (required for remote profile)
|
# API keys (required for remote profile)
|
||||||
ANTHROPIC_API_KEY=
|
ANTHROPIC_API_KEY=
|
||||||
OPENAI_COMPAT_URL=
|
OPENAI_COMPAT_URL=
|
||||||
|
|
@ -31,6 +39,12 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
|
||||||
# GITHUB_TOKEN= # future — enable when public mirror is active
|
# GITHUB_TOKEN= # future — enable when public mirror is active
|
||||||
# GITHUB_REPO= # future
|
# GITHUB_REPO= # future
|
||||||
|
|
||||||
|
# ── CF-hosted coordinator (Paid+ tier) ───────────────────────────────────────
|
||||||
|
# Set CF_LICENSE_KEY to authenticate with the hosted coordinator.
|
||||||
|
# Leave both blank for local self-hosted cf-orch or bare-metal inference.
|
||||||
|
CF_LICENSE_KEY=
|
||||||
|
CF_ORCH_URL=https://orch.circuitforge.tech
|
||||||
|
|
||||||
# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
|
# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
|
||||||
CLOUD_MODE=false
|
CLOUD_MODE=false
|
||||||
CLOUD_DATA_ROOT=/devl/menagerie-data
|
CLOUD_DATA_ROOT=/devl/menagerie-data
|
||||||
|
|
|
||||||
|
|
@ -457,6 +457,11 @@ elif step == 5:
|
||||||
from app.wizard.step_inference import validate
|
from app.wizard.step_inference import validate
|
||||||
|
|
||||||
st.subheader("Step 5 \u2014 Inference & API Keys")
|
st.subheader("Step 5 \u2014 Inference & API Keys")
|
||||||
|
st.info(
|
||||||
|
"**Simplest setup:** set `OLLAMA_HOST` in your `.env` file — "
|
||||||
|
"Peregrine auto-detects it, no config file needed. "
|
||||||
|
"Or use the fields below to configure API keys and endpoints."
|
||||||
|
)
|
||||||
profile = saved_yaml.get("inference_profile", "remote")
|
profile = saved_yaml.get("inference_profile", "remote")
|
||||||
|
|
||||||
if profile == "remote":
|
if profile == "remote":
|
||||||
|
|
@ -466,8 +471,18 @@ elif step == 5:
|
||||||
placeholder="https://api.together.xyz/v1")
|
placeholder="https://api.together.xyz/v1")
|
||||||
openai_key = st.text_input("Endpoint API Key (optional)", type="password",
|
openai_key = st.text_input("Endpoint API Key (optional)", type="password",
|
||||||
key="oai_key") if openai_url else ""
|
key="oai_key") if openai_url else ""
|
||||||
|
ollama_host = st.text_input("Ollama host (optional \u2014 local fallback)",
|
||||||
|
placeholder="http://localhost:11434",
|
||||||
|
key="ollama_host_input")
|
||||||
|
ollama_model = st.text_input("Ollama model (optional)",
|
||||||
|
value="llama3.2:3b",
|
||||||
|
key="ollama_model_input")
|
||||||
else:
|
else:
|
||||||
st.info(f"Local mode ({profile}): Ollama provides inference.")
|
st.info(f"Local mode ({profile}): Ollama provides inference.")
|
||||||
|
import os
|
||||||
|
_ollama_host_env = os.environ.get("OLLAMA_HOST", "")
|
||||||
|
if _ollama_host_env:
|
||||||
|
st.caption(f"OLLAMA_HOST from .env: `{_ollama_host_env}`")
|
||||||
anthropic_key = openai_url = openai_key = ""
|
anthropic_key = openai_url = openai_key = ""
|
||||||
|
|
||||||
with st.expander("Advanced \u2014 Service Ports & Hosts"):
|
with st.expander("Advanced \u2014 Service Ports & Hosts"):
|
||||||
|
|
@ -546,6 +561,14 @@ elif step == 5:
|
||||||
if anthropic_key or openai_url:
|
if anthropic_key or openai_url:
|
||||||
env_path.write_text("\n".join(env_lines) + "\n")
|
env_path.write_text("\n".join(env_lines) + "\n")
|
||||||
|
|
||||||
|
if profile == "remote":
|
||||||
|
if ollama_host:
|
||||||
|
env_lines = _set_env(env_lines, "OLLAMA_HOST", ollama_host)
|
||||||
|
if ollama_model:
|
||||||
|
env_lines = _set_env(env_lines, "OLLAMA_MODEL", ollama_model)
|
||||||
|
if ollama_host or ollama_model:
|
||||||
|
env_path.write_text("\n".join(env_lines) + "\n")
|
||||||
|
|
||||||
_save_yaml({"services": svc, "wizard_step": 5})
|
_save_yaml({"services": svc, "wizard_step": 5})
|
||||||
st.session_state.wizard_step = 6
|
st.session_state.wizard_step = 6
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,46 @@
|
||||||
"""
|
"""
|
||||||
LLM abstraction layer with priority fallback chain.
|
LLM abstraction layer with priority fallback chain.
|
||||||
Reads config/llm.yaml. Tries backends in order; falls back on any error.
|
Config lookup order:
|
||||||
|
1. <repo>/config/llm.yaml — per-install local config
|
||||||
|
2. ~/.config/circuitforge/llm.yaml — user-level config (circuitforge-core default)
|
||||||
|
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, …)
|
||||||
"""
|
"""
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
|
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
|
||||||
|
|
||||||
|
# Kept for backwards-compatibility — external callers that import CONFIG_PATH
|
||||||
|
# from this module continue to work.
|
||||||
CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"
|
CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"
|
||||||
|
|
||||||
|
|
||||||
class LLMRouter(_CoreLLMRouter):
|
class LLMRouter(_CoreLLMRouter):
|
||||||
"""Peregrine-specific LLMRouter — defaults to Peregrine's config/llm.yaml."""
|
"""Peregrine-specific LLMRouter — tri-level config path priority.
|
||||||
|
|
||||||
def __init__(self, config_path: Path = CONFIG_PATH):
|
When ``config_path`` is supplied (e.g. in tests) it is passed straight
|
||||||
super().__init__(config_path)
|
through to the core. When omitted, the lookup order is:
|
||||||
|
1. <repo>/config/llm.yaml (per-install local config)
|
||||||
|
2. ~/.config/circuitforge/llm.yaml (user-level, circuitforge-core default)
|
||||||
|
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST …)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config_path: Path | None = None) -> None:
|
||||||
|
if config_path is not None:
|
||||||
|
# Explicit path supplied — use it directly (e.g. tests, CLI override).
|
||||||
|
super().__init__(config_path)
|
||||||
|
return
|
||||||
|
|
||||||
|
local = Path(__file__).parent.parent / "config" / "llm.yaml"
|
||||||
|
user_level = Path.home() / ".config" / "circuitforge" / "llm.yaml"
|
||||||
|
if local.exists():
|
||||||
|
super().__init__(local)
|
||||||
|
elif user_level.exists():
|
||||||
|
super().__init__(user_level)
|
||||||
|
else:
|
||||||
|
# No yaml found — let circuitforge-core's env-var auto-config run.
|
||||||
|
# The core default CONFIG_PATH (~/.config/circuitforge/llm.yaml)
|
||||||
|
# won't exist either, so _auto_config_from_env() will be triggered.
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
|
||||||
# Module-level singleton for convenience
|
# Module-level singleton for convenience
|
||||||
|
|
|
||||||
|
|
@ -492,6 +492,12 @@ def main() -> None:
|
||||||
# binds a harmless free port instead of conflicting with the external service.
|
# binds a harmless free port instead of conflicting with the external service.
|
||||||
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
|
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
|
||||||
env_updates["RECOMMENDED_PROFILE"] = profile
|
env_updates["RECOMMENDED_PROFILE"] = profile
|
||||||
|
# When Ollama is adopted from the host process, write OLLAMA_HOST so
|
||||||
|
# LLMRouter's env-var auto-config finds it without needing config/llm.yaml.
|
||||||
|
ollama_info = ports.get("ollama")
|
||||||
|
if ollama_info and ollama_info.get("external"):
|
||||||
|
env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}"
|
||||||
|
|
||||||
if offload_gb > 0:
|
if offload_gb > 0:
|
||||||
env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
|
env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
|
||||||
# GPU info for the app container (which lacks nvidia-smi access)
|
# GPU info for the app container (which lacks nvidia-smi access)
|
||||||
|
|
|
||||||
132
tests/test_llm_router_shim.py
Normal file
132
tests/test_llm_router_shim.py
Normal file
|
|
@ -0,0 +1,132 @@
|
||||||
|
"""Tests for Peregrine's LLMRouter shim — priority fallback logic."""
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch, MagicMock, call
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
|
||||||
|
def _import_fresh():
|
||||||
|
"""Import scripts.llm_router fresh (bypass module cache)."""
|
||||||
|
import importlib
|
||||||
|
import scripts.llm_router as mod
|
||||||
|
importlib.reload(mod)
|
||||||
|
return mod
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test 1: local config/llm.yaml takes priority when it exists
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_uses_local_yaml_when_present():
|
||||||
|
"""When config/llm.yaml exists locally, super().__init__ is called with that path."""
|
||||||
|
import scripts.llm_router as shim_mod
|
||||||
|
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
|
||||||
|
|
||||||
|
local_path = Path(shim_mod.__file__).parent.parent / "config" / "llm.yaml"
|
||||||
|
user_path = Path.home() / ".config" / "circuitforge" / "llm.yaml"
|
||||||
|
|
||||||
|
def fake_exists(self):
|
||||||
|
return self == local_path # only the local path "exists"
|
||||||
|
|
||||||
|
captured = {}
|
||||||
|
|
||||||
|
def fake_core_init(self, config_path=None):
|
||||||
|
captured["config_path"] = config_path
|
||||||
|
self.config = {}
|
||||||
|
|
||||||
|
with patch.object(Path, "exists", fake_exists), \
|
||||||
|
patch.object(_CoreLLMRouter, "__init__", fake_core_init):
|
||||||
|
import importlib
|
||||||
|
import scripts.llm_router as mod
|
||||||
|
importlib.reload(mod)
|
||||||
|
mod.LLMRouter()
|
||||||
|
|
||||||
|
assert captured.get("config_path") == local_path, (
|
||||||
|
f"Expected super().__init__ to be called with local path {local_path}, "
|
||||||
|
f"got {captured.get('config_path')}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test 2: falls through to env-var auto-config when neither yaml exists
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_falls_through_to_env_when_no_yamls():
|
||||||
|
"""When no yaml files exist, super().__init__ is called with no args (env-var path)."""
|
||||||
|
import scripts.llm_router as shim_mod
|
||||||
|
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
|
||||||
|
|
||||||
|
captured = {}
|
||||||
|
|
||||||
|
def fake_exists(self):
|
||||||
|
return False # no yaml files exist anywhere
|
||||||
|
|
||||||
|
def fake_core_init(self, config_path=None):
|
||||||
|
# Record whether a path was passed
|
||||||
|
captured["config_path"] = config_path
|
||||||
|
captured["called"] = True
|
||||||
|
self.config = {}
|
||||||
|
|
||||||
|
with patch.object(Path, "exists", fake_exists), \
|
||||||
|
patch.object(_CoreLLMRouter, "__init__", fake_core_init):
|
||||||
|
import importlib
|
||||||
|
import scripts.llm_router as mod
|
||||||
|
importlib.reload(mod)
|
||||||
|
mod.LLMRouter()
|
||||||
|
|
||||||
|
assert captured.get("called"), "super().__init__ was never called"
|
||||||
|
# When called with no args, config_path defaults to None in our mock,
|
||||||
|
# meaning the shim correctly fell through to env-var auto-config
|
||||||
|
assert captured.get("config_path") is None, (
|
||||||
|
f"Expected super().__init__ to be called with no explicit path (None), "
|
||||||
|
f"got {captured.get('config_path')}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test 3: module-level complete() singleton is only instantiated once
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_complete_singleton_is_reused():
|
||||||
|
"""complete() reuses the same LLMRouter instance across multiple calls."""
|
||||||
|
import importlib
|
||||||
|
import scripts.llm_router as mod
|
||||||
|
importlib.reload(mod)
|
||||||
|
|
||||||
|
# Reset singleton
|
||||||
|
mod._router = None
|
||||||
|
|
||||||
|
instantiation_count = [0]
|
||||||
|
original_init = mod.LLMRouter.__init__
|
||||||
|
|
||||||
|
mock_router = MagicMock()
|
||||||
|
mock_router.complete.return_value = "OK"
|
||||||
|
|
||||||
|
original_class = mod.LLMRouter
|
||||||
|
|
||||||
|
class CountingRouter(original_class):
|
||||||
|
def __init__(self):
|
||||||
|
instantiation_count[0] += 1
|
||||||
|
# Bypass real __init__ to avoid needing config files
|
||||||
|
self.config = {}
|
||||||
|
|
||||||
|
def complete(self, prompt, system=None):
|
||||||
|
return "OK"
|
||||||
|
|
||||||
|
# Patch the class in the module
|
||||||
|
mod.LLMRouter = CountingRouter
|
||||||
|
mod._router = None
|
||||||
|
|
||||||
|
result1 = mod.complete("first call")
|
||||||
|
result2 = mod.complete("second call")
|
||||||
|
|
||||||
|
assert result1 == "OK"
|
||||||
|
assert result2 == "OK"
|
||||||
|
assert instantiation_count[0] == 1, (
|
||||||
|
f"Expected LLMRouter to be instantiated exactly once, "
|
||||||
|
f"got {instantiation_count[0]} instantiation(s)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Restore
|
||||||
|
mod.LLMRouter = original_class
|
||||||
80
tests/test_preflight_env_adoption.py
Normal file
80
tests/test_preflight_env_adoption.py
Normal file
|
|
@ -0,0 +1,80 @@
|
||||||
|
"""Tests: preflight writes OLLAMA_HOST to .env when Ollama is adopted from host."""
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch, call
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
import scripts.preflight as pf
|
||||||
|
|
||||||
|
|
||||||
|
def _make_ports(ollama_external: bool = True, ollama_port: int = 11434) -> dict:
|
||||||
|
"""Build a minimal ports dict as returned by preflight's port-scanning logic."""
|
||||||
|
return {
|
||||||
|
"ollama": {
|
||||||
|
"resolved": ollama_port,
|
||||||
|
"external": ollama_external,
|
||||||
|
"stub_port": 54321,
|
||||||
|
"env_var": "OLLAMA_PORT",
|
||||||
|
"adoptable": True,
|
||||||
|
},
|
||||||
|
"streamlit": {
|
||||||
|
"resolved": 8502,
|
||||||
|
"external": False,
|
||||||
|
"stub_port": 8502,
|
||||||
|
"env_var": "STREAMLIT_PORT",
|
||||||
|
"adoptable": False,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _capture_env_updates(ports: dict) -> dict:
|
||||||
|
"""Run the env_updates construction block from preflight.main() and return the result.
|
||||||
|
|
||||||
|
We extract this logic from main() so tests can call it directly without
|
||||||
|
needing to simulate the full CLI argument parsing and system probe flow.
|
||||||
|
The block under test is the `if not args.check_only:` section.
|
||||||
|
"""
|
||||||
|
captured = {}
|
||||||
|
|
||||||
|
def fake_write_env(updates: dict) -> None:
|
||||||
|
captured.update(updates)
|
||||||
|
|
||||||
|
with patch.object(pf, "write_env", side_effect=fake_write_env), \
|
||||||
|
patch.object(pf, "update_llm_yaml"), \
|
||||||
|
patch.object(pf, "write_compose_override"):
|
||||||
|
# Replicate the env_updates block from preflight.main() as faithfully as possible
|
||||||
|
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
|
||||||
|
env_updates["RECOMMENDED_PROFILE"] = "single-gpu"
|
||||||
|
|
||||||
|
# ---- Code under test: the OLLAMA_HOST adoption block ----
|
||||||
|
ollama_info = ports.get("ollama")
|
||||||
|
if ollama_info and ollama_info.get("external"):
|
||||||
|
env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}"
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
|
||||||
|
pf.write_env(env_updates)
|
||||||
|
|
||||||
|
return captured
|
||||||
|
|
||||||
|
|
||||||
|
def test_ollama_host_written_when_adopted():
|
||||||
|
"""OLLAMA_HOST is added when Ollama is adopted from the host (external=True)."""
|
||||||
|
ports = _make_ports(ollama_external=True, ollama_port=11434)
|
||||||
|
result = _capture_env_updates(ports)
|
||||||
|
assert "OLLAMA_HOST" in result
|
||||||
|
assert result["OLLAMA_HOST"] == "http://host.docker.internal:11434"
|
||||||
|
|
||||||
|
|
||||||
|
def test_ollama_host_not_written_when_docker_managed():
|
||||||
|
"""OLLAMA_HOST is NOT added when Ollama runs in Docker (external=False)."""
|
||||||
|
ports = _make_ports(ollama_external=False)
|
||||||
|
result = _capture_env_updates(ports)
|
||||||
|
assert "OLLAMA_HOST" not in result
|
||||||
|
|
||||||
|
|
||||||
|
def test_ollama_host_reflects_adopted_port():
|
||||||
|
"""OLLAMA_HOST uses the actual adopted port, not the default."""
|
||||||
|
ports = _make_ports(ollama_external=True, ollama_port=11500)
|
||||||
|
result = _capture_env_updates(ports)
|
||||||
|
assert result["OLLAMA_HOST"] == "http://host.docker.internal:11500"
|
||||||
Loading…
Reference in a new issue