feat: cf-core env-var LLM config + coordinator auth (closes #67)

- LLMRouter shim: tri-level config priority (local yaml > user yaml > env-var) - .env.example: document OLLAMA_HOST, OLLAMA_MODEL, OPENAI_MODEL, ANTHROPIC_MODEL, CF_LICENSE_KEY, CF_ORCH_URL - Wizard Step 5: env-var setup hint + optional Ollama fields for remote profile - Preflight: write OLLAMA_HOST to .env when Ollama is adopted from host process
2026-04-04 19:27:24 -07:00 · 2026-04-04 19:27:24 -07:00 · 1ab1dffc47
commit 1ab1dffc47
parent b79d13b4f2 9392ee2979
6 changed files with 286 additions and 4 deletions
--- a/.env.example
+++ b/.env.example
@ -19,6 +19,14 @@ VLLM_MAX_MODEL_LEN=4096             # increase to 8192 for Thinking models with
 VLLM_GPU_MEM_UTIL=0.75              # lower to 0.6 if sharing GPU with other services
 OLLAMA_DEFAULT_MODEL=llama3.2:3b

+# ── LLM env-var auto-config (alternative to config/llm.yaml) ─────────────────
+# Set any of these to configure LLM backends without needing a config/llm.yaml.
+# Priority: Anthropic > OpenAI-compat > Ollama (always tried as local fallback).
+OLLAMA_HOST=http://localhost:11434   # Ollama host; override if on a different machine
+OLLAMA_MODEL=llama3.2:3b            # model to request from Ollama
+OPENAI_MODEL=gpt-4o-mini            # model override for OpenAI-compat backend
+ANTHROPIC_MODEL=claude-haiku-4-5-20251001  # model override for Anthropic backend
+
 # API keys (required for remote profile)
 ANTHROPIC_API_KEY=
 OPENAI_COMPAT_URL=
@ -31,6 +39,12 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
 # GITHUB_TOKEN=          # future — enable when public mirror is active
 # GITHUB_REPO=           # future

+# ── CF-hosted coordinator (Paid+ tier) ───────────────────────────────────────
+# Set CF_LICENSE_KEY to authenticate with the hosted coordinator.
+# Leave both blank for local self-hosted cf-orch or bare-metal inference.
+CF_LICENSE_KEY=
+CF_ORCH_URL=https://orch.circuitforge.tech
+
 # Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
 CLOUD_MODE=false
 CLOUD_DATA_ROOT=/devl/menagerie-data
--- a/app/pages/0_Setup.py
+++ b/app/pages/0_Setup.py
@ -457,6 +457,11 @@ elif step == 5:
    from app.wizard.step_inference import validate

    st.subheader("Step 5 \u2014 Inference & API Keys")
+    st.info(
+        "**Simplest setup:** set `OLLAMA_HOST` in your `.env` file — "
+        "Peregrine auto-detects it, no config file needed. "
+        "Or use the fields below to configure API keys and endpoints."
+    )
    profile = saved_yaml.get("inference_profile", "remote")

    if profile == "remote":
@ -466,8 +471,18 @@ elif step == 5:
                                       placeholder="https://api.together.xyz/v1")
        openai_key    = st.text_input("Endpoint API Key (optional)", type="password",
                                       key="oai_key") if openai_url else ""
+        ollama_host   = st.text_input("Ollama host (optional \u2014 local fallback)",
+                                       placeholder="http://localhost:11434",
+                                       key="ollama_host_input")
+        ollama_model  = st.text_input("Ollama model (optional)",
+                                       value="llama3.2:3b",
+                                       key="ollama_model_input")
    else:
        st.info(f"Local mode ({profile}): Ollama provides inference.")
+        import os
+        _ollama_host_env = os.environ.get("OLLAMA_HOST", "")
+        if _ollama_host_env:
+            st.caption(f"OLLAMA_HOST from .env: `{_ollama_host_env}`")
        anthropic_key = openai_url = openai_key = ""

    with st.expander("Advanced \u2014 Service Ports & Hosts"):
@ -546,6 +561,14 @@ elif step == 5:
            if anthropic_key or openai_url:
                env_path.write_text("\n".join(env_lines) + "\n")

+            if profile == "remote":
+                if ollama_host:
+                    env_lines = _set_env(env_lines, "OLLAMA_HOST", ollama_host)
+                if ollama_model:
+                    env_lines = _set_env(env_lines, "OLLAMA_MODEL", ollama_model)
+                if ollama_host or ollama_model:
+                    env_path.write_text("\n".join(env_lines) + "\n")
+
            _save_yaml({"services": svc, "wizard_step": 5})
            st.session_state.wizard_step = 6
            st.rerun()
--- a/scripts/llm_router.py
+++ b/scripts/llm_router.py
@ -1,19 +1,46 @@
 """
 LLM abstraction layer with priority fallback chain.
-Reads config/llm.yaml. Tries backends in order; falls back on any error.
+Config lookup order:
+  1. <repo>/config/llm.yaml  — per-install local config
+  2. ~/.config/circuitforge/llm.yaml  — user-level config (circuitforge-core default)
+  3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, …)
 """
 from pathlib import Path

 from circuitforge_core.llm import LLMRouter as _CoreLLMRouter

+# Kept for backwards-compatibility — external callers that import CONFIG_PATH
+# from this module continue to work.
 CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"


 class LLMRouter(_CoreLLMRouter):
-    """Peregrine-specific LLMRouter — defaults to Peregrine's config/llm.yaml."""
+    """Peregrine-specific LLMRouter — tri-level config path priority.

-    def __init__(self, config_path: Path = CONFIG_PATH):
+    When ``config_path`` is supplied (e.g. in tests) it is passed straight
+    through to the core.  When omitted, the lookup order is:
+      1. <repo>/config/llm.yaml  (per-install local config)
+      2. ~/.config/circuitforge/llm.yaml  (user-level, circuitforge-core default)
+      3. env-var auto-config  (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST …)
+    """
+
+    def __init__(self, config_path: Path | None = None) -> None:
+        if config_path is not None:
+            # Explicit path supplied — use it directly (e.g. tests, CLI override).
            super().__init__(config_path)
+            return
+
+        local = Path(__file__).parent.parent / "config" / "llm.yaml"
+        user_level = Path.home() / ".config" / "circuitforge" / "llm.yaml"
+        if local.exists():
+            super().__init__(local)
+        elif user_level.exists():
+            super().__init__(user_level)
+        else:
+            # No yaml found — let circuitforge-core's env-var auto-config run.
+            # The core default CONFIG_PATH (~/.config/circuitforge/llm.yaml)
+            # won't exist either, so _auto_config_from_env() will be triggered.
+            super().__init__()


 # Module-level singleton for convenience
--- a/scripts/preflight.py
+++ b/scripts/preflight.py
@ -492,6 +492,12 @@ def main() -> None:
        # binds a harmless free port instead of conflicting with the external service.
        env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
        env_updates["RECOMMENDED_PROFILE"] = profile
+        # When Ollama is adopted from the host process, write OLLAMA_HOST so
+        # LLMRouter's env-var auto-config finds it without needing config/llm.yaml.
+        ollama_info = ports.get("ollama")
+        if ollama_info and ollama_info.get("external"):
+            env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}"
+
        if offload_gb > 0:
            env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
        # GPU info for the app container (which lacks nvidia-smi access)
--- a/tests/test_llm_router_shim.py
+++ b/tests/test_llm_router_shim.py
@ -0,0 +1,132 @@
+"""Tests for Peregrine's LLMRouter shim — priority fallback logic."""
+import sys
+from pathlib import Path
+from unittest.mock import patch, MagicMock, call
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+
+def _import_fresh():
+    """Import scripts.llm_router fresh (bypass module cache)."""
+    import importlib
+    import scripts.llm_router as mod
+    importlib.reload(mod)
+    return mod
+
+
+# ---------------------------------------------------------------------------
+# Test 1: local config/llm.yaml takes priority when it exists
+# ---------------------------------------------------------------------------
+
+def test_uses_local_yaml_when_present():
+    """When config/llm.yaml exists locally, super().__init__ is called with that path."""
+    import scripts.llm_router as shim_mod
+    from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
+
+    local_path = Path(shim_mod.__file__).parent.parent / "config" / "llm.yaml"
+    user_path = Path.home() / ".config" / "circuitforge" / "llm.yaml"
+
+    def fake_exists(self):
+        return self == local_path  # only the local path "exists"
+
+    captured = {}
+
+    def fake_core_init(self, config_path=None):
+        captured["config_path"] = config_path
+        self.config = {}
+
+    with patch.object(Path, "exists", fake_exists), \
+         patch.object(_CoreLLMRouter, "__init__", fake_core_init):
+        import importlib
+        import scripts.llm_router as mod
+        importlib.reload(mod)
+        mod.LLMRouter()
+
+    assert captured.get("config_path") == local_path, (
+        f"Expected super().__init__ to be called with local path {local_path}, "
+        f"got {captured.get('config_path')}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Test 2: falls through to env-var auto-config when neither yaml exists
+# ---------------------------------------------------------------------------
+
+def test_falls_through_to_env_when_no_yamls():
+    """When no yaml files exist, super().__init__ is called with no args (env-var path)."""
+    import scripts.llm_router as shim_mod
+    from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
+
+    captured = {}
+
+    def fake_exists(self):
+        return False  # no yaml files exist anywhere
+
+    def fake_core_init(self, config_path=None):
+        # Record whether a path was passed
+        captured["config_path"] = config_path
+        captured["called"] = True
+        self.config = {}
+
+    with patch.object(Path, "exists", fake_exists), \
+         patch.object(_CoreLLMRouter, "__init__", fake_core_init):
+        import importlib
+        import scripts.llm_router as mod
+        importlib.reload(mod)
+        mod.LLMRouter()
+
+    assert captured.get("called"), "super().__init__ was never called"
+    # When called with no args, config_path defaults to None in our mock,
+    # meaning the shim correctly fell through to env-var auto-config
+    assert captured.get("config_path") is None, (
+        f"Expected super().__init__ to be called with no explicit path (None), "
+        f"got {captured.get('config_path')}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Test 3: module-level complete() singleton is only instantiated once
+# ---------------------------------------------------------------------------
+
+def test_complete_singleton_is_reused():
+    """complete() reuses the same LLMRouter instance across multiple calls."""
+    import importlib
+    import scripts.llm_router as mod
+    importlib.reload(mod)
+
+    # Reset singleton
+    mod._router = None
+
+    instantiation_count = [0]
+    original_init = mod.LLMRouter.__init__
+
+    mock_router = MagicMock()
+    mock_router.complete.return_value = "OK"
+
+    original_class = mod.LLMRouter
+
+    class CountingRouter(original_class):
+        def __init__(self):
+            instantiation_count[0] += 1
+            # Bypass real __init__ to avoid needing config files
+            self.config = {}
+
+        def complete(self, prompt, system=None):
+            return "OK"
+
+    # Patch the class in the module
+    mod.LLMRouter = CountingRouter
+    mod._router = None
+
+    result1 = mod.complete("first call")
+    result2 = mod.complete("second call")
+
+    assert result1 == "OK"
+    assert result2 == "OK"
+    assert instantiation_count[0] == 1, (
+        f"Expected LLMRouter to be instantiated exactly once, "
+        f"got {instantiation_count[0]} instantiation(s)"
+    )
+
+    # Restore
+    mod.LLMRouter = original_class
--- a/tests/test_preflight_env_adoption.py
+++ b/tests/test_preflight_env_adoption.py
@ -0,0 +1,80 @@
+"""Tests: preflight writes OLLAMA_HOST to .env when Ollama is adopted from host."""
+import sys
+from pathlib import Path
+from unittest.mock import patch, call
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+import scripts.preflight as pf
+
+
+def _make_ports(ollama_external: bool = True, ollama_port: int = 11434) -> dict:
+    """Build a minimal ports dict as returned by preflight's port-scanning logic."""
+    return {
+        "ollama": {
+            "resolved": ollama_port,
+            "external": ollama_external,
+            "stub_port": 54321,
+            "env_var": "OLLAMA_PORT",
+            "adoptable": True,
+        },
+        "streamlit": {
+            "resolved": 8502,
+            "external": False,
+            "stub_port": 8502,
+            "env_var": "STREAMLIT_PORT",
+            "adoptable": False,
+        },
+    }
+
+
+def _capture_env_updates(ports: dict) -> dict:
+    """Run the env_updates construction block from preflight.main() and return the result.
+
+    We extract this logic from main() so tests can call it directly without
+    needing to simulate the full CLI argument parsing and system probe flow.
+    The block under test is the `if not args.check_only:` section.
+    """
+    captured = {}
+
+    def fake_write_env(updates: dict) -> None:
+        captured.update(updates)
+
+    with patch.object(pf, "write_env", side_effect=fake_write_env), \
+         patch.object(pf, "update_llm_yaml"), \
+         patch.object(pf, "write_compose_override"):
+        # Replicate the env_updates block from preflight.main() as faithfully as possible
+        env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
+        env_updates["RECOMMENDED_PROFILE"] = "single-gpu"
+
+        # ---- Code under test: the OLLAMA_HOST adoption block ----
+        ollama_info = ports.get("ollama")
+        if ollama_info and ollama_info.get("external"):
+            env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}"
+        # ---------------------------------------------------------
+
+        pf.write_env(env_updates)
+
+    return captured
+
+
+def test_ollama_host_written_when_adopted():
+    """OLLAMA_HOST is added when Ollama is adopted from the host (external=True)."""
+    ports = _make_ports(ollama_external=True, ollama_port=11434)
+    result = _capture_env_updates(ports)
+    assert "OLLAMA_HOST" in result
+    assert result["OLLAMA_HOST"] == "http://host.docker.internal:11434"
+
+
+def test_ollama_host_not_written_when_docker_managed():
+    """OLLAMA_HOST is NOT added when Ollama runs in Docker (external=False)."""
+    ports = _make_ports(ollama_external=False)
+    result = _capture_env_updates(ports)
+    assert "OLLAMA_HOST" not in result
+
+
+def test_ollama_host_reflects_adopted_port():
+    """OLLAMA_HOST uses the actual adopted port, not the default."""
+    ports = _make_ports(ollama_external=True, ollama_port=11500)
+    result = _capture_env_updates(ports)
+    assert result["OLLAMA_HOST"] == "http://host.docker.internal:11500"