feat: cf-core env-var LLM config + coordinator auth (closes #67)

- LLMRouter shim: tri-level config priority (local yaml > user yaml > env-var) - .env.example: document OLLAMA_HOST, OLLAMA_MODEL, OPENAI_MODEL, ANTHROPIC_MODEL, CF_LICENSE_KEY, CF_ORCH_URL - Wizard Step 5: env-var setup hint + optional Ollama fields for remote profile - Preflight: write OLLAMA_HOST to .env when Ollama is adopted from host process
2026-04-04 19:27:24 -07:00 · 2026-04-04 19:27:24 -07:00 · 1ab1dffc47
commit 1ab1dffc47
parent b79d13b4f2 9392ee2979
6 changed files with 286 additions and 4 deletions
--- a/.env.example
+++ b/.env.example
@ -19,6 +19,14 @@ VLLM_MAX_MODEL_LEN=4096             # increase to 8192 for Thinking models with
 VLLM_GPU_MEM_UTIL=0.75              # lower to 0.6 if sharing GPU with other services
 OLLAMA_DEFAULT_MODEL=llama3.2:3b
 # ── LLM env-var auto-config (alternative to config/llm.yaml) ─────────────────
 # Set any of these to configure LLM backends without needing a config/llm.yaml.
 # Priority: Anthropic > OpenAI-compat > Ollama (always tried as local fallback).
 OLLAMA_HOST=http://localhost:11434   # Ollama host; override if on a different machine
 OLLAMA_MODEL=llama3.2:3b            # model to request from Ollama
 OPENAI_MODEL=gpt-4o-mini            # model override for OpenAI-compat backend
 ANTHROPIC_MODEL=claude-haiku-4-5-20251001  # model override for Anthropic backend
 # API keys (required for remote profile)
 ANTHROPIC_API_KEY=
 OPENAI_COMPAT_URL=
@ -31,6 +39,12 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
 # GITHUB_TOKEN=          # future — enable when public mirror is active
 # GITHUB_REPO=           # future
 # ── CF-hosted coordinator (Paid+ tier) ───────────────────────────────────────
 # Set CF_LICENSE_KEY to authenticate with the hosted coordinator.
 # Leave both blank for local self-hosted cf-orch or bare-metal inference.
 CF_LICENSE_KEY=
 CF_ORCH_URL=https://orch.circuitforge.tech
 # Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
 CLOUD_MODE=false
 CLOUD_DATA_ROOT=/devl/menagerie-data
--- a/app/pages/0_Setup.py
+++ b/app/pages/0_Setup.py
@ -457,6 +457,11 @@ elif step == 5:
    from app.wizard.step_inference import validate
    st.subheader("Step 5 \u2014 Inference & API Keys")
    st.info(
        "**Simplest setup:** set `OLLAMA_HOST` in your `.env` file — "
        "Peregrine auto-detects it, no config file needed. "
        "Or use the fields below to configure API keys and endpoints."
    )
    profile = saved_yaml.get("inference_profile", "remote")
    if profile == "remote":
@ -466,8 +471,18 @@ elif step == 5:
                                       placeholder="https://api.together.xyz/v1")
        openai_key    = st.text_input("Endpoint API Key (optional)", type="password",
                                       key="oai_key") if openai_url else ""
        ollama_host   = st.text_input("Ollama host (optional \u2014 local fallback)",
                                       placeholder="http://localhost:11434",
                                       key="ollama_host_input")
        ollama_model  = st.text_input("Ollama model (optional)",
                                       value="llama3.2:3b",
                                       key="ollama_model_input")
    else:
        st.info(f"Local mode ({profile}): Ollama provides inference.")
        import os
        _ollama_host_env = os.environ.get("OLLAMA_HOST", "")
        if _ollama_host_env:
            st.caption(f"OLLAMA_HOST from .env: `{_ollama_host_env}`")
        anthropic_key = openai_url = openai_key = ""
    with st.expander("Advanced \u2014 Service Ports & Hosts"):
@ -546,6 +561,14 @@ elif step == 5:
            if anthropic_key or openai_url:
                env_path.write_text("\n".join(env_lines) + "\n")
            if profile == "remote":
                if ollama_host:
                    env_lines = _set_env(env_lines, "OLLAMA_HOST", ollama_host)
                if ollama_model:
                    env_lines = _set_env(env_lines, "OLLAMA_MODEL", ollama_model)
                if ollama_host or ollama_model:
                    env_path.write_text("\n".join(env_lines) + "\n")
            _save_yaml({"services": svc, "wizard_step": 5})
            st.session_state.wizard_step = 6
            st.rerun()
--- a/scripts/llm_router.py
+++ b/scripts/llm_router.py
@ -1,19 +1,46 @@
 """
 LLM abstraction layer with priority fallback chain.
-Reads config/llm.yaml. Tries backends in order; falls back on any error.
+Config lookup order:
  1. <repo>/config/llm.yaml  — per-install local config
  2. ~/.config/circuitforge/llm.yaml  — user-level config (circuitforge-core default)
  3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, …)
 """
 from pathlib import Path
 from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
 # Kept for backwards-compatibility — external callers that import CONFIG_PATH
 # from this module continue to work.
 CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"
 class LLMRouter(_CoreLLMRouter):
-    """Peregrine-specific LLMRouter — defaults to Peregrine's config/llm.yaml."""
+    """Peregrine-specific LLMRouter — tri-level config path priority.
-    def __init__(self, config_path: Path = CONFIG_PATH):
+    When ``config_path`` is supplied (e.g. in tests) it is passed straight
-        super().__init__(config_path)
+    through to the core.  When omitted, the lookup order is:
      1. <repo>/config/llm.yaml  (per-install local config)
      2. ~/.config/circuitforge/llm.yaml  (user-level, circuitforge-core default)
      3. env-var auto-config  (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST …)
    """
    def __init__(self, config_path: Path | None = None) -> None:
        if config_path is not None:
            # Explicit path supplied — use it directly (e.g. tests, CLI override).
            super().__init__(config_path)
            return
        local = Path(__file__).parent.parent / "config" / "llm.yaml"
        user_level = Path.home() / ".config" / "circuitforge" / "llm.yaml"
        if local.exists():
            super().__init__(local)
        elif user_level.exists():
            super().__init__(user_level)
        else:
            # No yaml found — let circuitforge-core's env-var auto-config run.
            # The core default CONFIG_PATH (~/.config/circuitforge/llm.yaml)
            # won't exist either, so _auto_config_from_env() will be triggered.
            super().__init__()
 # Module-level singleton for convenience
--- a/scripts/preflight.py
+++ b/scripts/preflight.py
@ -492,6 +492,12 @@ def main() -> None:
        # binds a harmless free port instead of conflicting with the external service.
        env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
        env_updates["RECOMMENDED_PROFILE"] = profile
        # When Ollama is adopted from the host process, write OLLAMA_HOST so
        # LLMRouter's env-var auto-config finds it without needing config/llm.yaml.
        ollama_info = ports.get("ollama")
        if ollama_info and ollama_info.get("external"):
            env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}"
        if offload_gb > 0:
            env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
        # GPU info for the app container (which lacks nvidia-smi access)
--- a/tests/test_llm_router_shim.py
+++ b/tests/test_llm_router_shim.py
@ -0,0 +1,132 @@
 """Tests for Peregrine's LLMRouter shim — priority fallback logic."""
 import sys
 from pathlib import Path
 from unittest.mock import patch, MagicMock, call
 sys.path.insert(0, str(Path(__file__).parent.parent))
 def _import_fresh():
    """Import scripts.llm_router fresh (bypass module cache)."""
    import importlib
    import scripts.llm_router as mod
    importlib.reload(mod)
    return mod
 # ---------------------------------------------------------------------------
 # Test 1: local config/llm.yaml takes priority when it exists
 # ---------------------------------------------------------------------------
 def test_uses_local_yaml_when_present():
    """When config/llm.yaml exists locally, super().__init__ is called with that path."""
    import scripts.llm_router as shim_mod
    from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
    local_path = Path(shim_mod.__file__).parent.parent / "config" / "llm.yaml"
    user_path = Path.home() / ".config" / "circuitforge" / "llm.yaml"
    def fake_exists(self):
        return self == local_path  # only the local path "exists"
    captured = {}
    def fake_core_init(self, config_path=None):
        captured["config_path"] = config_path
        self.config = {}
    with patch.object(Path, "exists", fake_exists), \
         patch.object(_CoreLLMRouter, "__init__", fake_core_init):
        import importlib
        import scripts.llm_router as mod
        importlib.reload(mod)
        mod.LLMRouter()
    assert captured.get("config_path") == local_path, (
        f"Expected super().__init__ to be called with local path {local_path}, "
        f"got {captured.get('config_path')}"
    )
 # ---------------------------------------------------------------------------
 # Test 2: falls through to env-var auto-config when neither yaml exists
 # ---------------------------------------------------------------------------
 def test_falls_through_to_env_when_no_yamls():
    """When no yaml files exist, super().__init__ is called with no args (env-var path)."""
    import scripts.llm_router as shim_mod
    from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
    captured = {}
    def fake_exists(self):
        return False  # no yaml files exist anywhere
    def fake_core_init(self, config_path=None):
        # Record whether a path was passed
        captured["config_path"] = config_path
        captured["called"] = True
        self.config = {}
    with patch.object(Path, "exists", fake_exists), \
         patch.object(_CoreLLMRouter, "__init__", fake_core_init):
        import importlib
        import scripts.llm_router as mod
        importlib.reload(mod)
        mod.LLMRouter()
    assert captured.get("called"), "super().__init__ was never called"
    # When called with no args, config_path defaults to None in our mock,
    # meaning the shim correctly fell through to env-var auto-config
    assert captured.get("config_path") is None, (
        f"Expected super().__init__ to be called with no explicit path (None), "
        f"got {captured.get('config_path')}"
    )
 # ---------------------------------------------------------------------------
 # Test 3: module-level complete() singleton is only instantiated once
 # ---------------------------------------------------------------------------
 def test_complete_singleton_is_reused():
    """complete() reuses the same LLMRouter instance across multiple calls."""
    import importlib
    import scripts.llm_router as mod
    importlib.reload(mod)
    # Reset singleton
    mod._router = None
    instantiation_count = [0]
    original_init = mod.LLMRouter.__init__
    mock_router = MagicMock()
    mock_router.complete.return_value = "OK"
    original_class = mod.LLMRouter
    class CountingRouter(original_class):
        def __init__(self):
            instantiation_count[0] += 1
            # Bypass real __init__ to avoid needing config files
            self.config = {}
        def complete(self, prompt, system=None):
            return "OK"
    # Patch the class in the module
    mod.LLMRouter = CountingRouter
    mod._router = None
    result1 = mod.complete("first call")
    result2 = mod.complete("second call")
    assert result1 == "OK"
    assert result2 == "OK"
    assert instantiation_count[0] == 1, (
        f"Expected LLMRouter to be instantiated exactly once, "
        f"got {instantiation_count[0]} instantiation(s)"
    )
    # Restore
    mod.LLMRouter = original_class
--- a/tests/test_preflight_env_adoption.py
+++ b/tests/test_preflight_env_adoption.py
@ -0,0 +1,80 @@
 """Tests: preflight writes OLLAMA_HOST to .env when Ollama is adopted from host."""
 import sys
 from pathlib import Path
 from unittest.mock import patch, call
 sys.path.insert(0, str(Path(__file__).parent.parent))
 import scripts.preflight as pf
 def _make_ports(ollama_external: bool = True, ollama_port: int = 11434) -> dict:
    """Build a minimal ports dict as returned by preflight's port-scanning logic."""
    return {
        "ollama": {
            "resolved": ollama_port,
            "external": ollama_external,
            "stub_port": 54321,
            "env_var": "OLLAMA_PORT",
            "adoptable": True,
        },
        "streamlit": {
            "resolved": 8502,
            "external": False,
            "stub_port": 8502,
            "env_var": "STREAMLIT_PORT",
            "adoptable": False,
        },
    }
 def _capture_env_updates(ports: dict) -> dict:
    """Run the env_updates construction block from preflight.main() and return the result.
    We extract this logic from main() so tests can call it directly without
    needing to simulate the full CLI argument parsing and system probe flow.
    The block under test is the `if not args.check_only:` section.
    """
    captured = {}
    def fake_write_env(updates: dict) -> None:
        captured.update(updates)
    with patch.object(pf, "write_env", side_effect=fake_write_env), \
         patch.object(pf, "update_llm_yaml"), \
         patch.object(pf, "write_compose_override"):
        # Replicate the env_updates block from preflight.main() as faithfully as possible
        env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
        env_updates["RECOMMENDED_PROFILE"] = "single-gpu"
        # ---- Code under test: the OLLAMA_HOST adoption block ----
        ollama_info = ports.get("ollama")
        if ollama_info and ollama_info.get("external"):
            env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}"
        # ---------------------------------------------------------
        pf.write_env(env_updates)
    return captured
 def test_ollama_host_written_when_adopted():
    """OLLAMA_HOST is added when Ollama is adopted from the host (external=True)."""
    ports = _make_ports(ollama_external=True, ollama_port=11434)
    result = _capture_env_updates(ports)
    assert "OLLAMA_HOST" in result
    assert result["OLLAMA_HOST"] == "http://host.docker.internal:11434"
 def test_ollama_host_not_written_when_docker_managed():
    """OLLAMA_HOST is NOT added when Ollama runs in Docker (external=False)."""
    ports = _make_ports(ollama_external=False)
    result = _capture_env_updates(ports)
    assert "OLLAMA_HOST" not in result
 def test_ollama_host_reflects_adopted_port():
    """OLLAMA_HOST uses the actual adopted port, not the default."""
    ports = _make_ports(ollama_external=True, ollama_port=11500)
    result = _capture_env_updates(ports)
    assert result["OLLAMA_HOST"] == "http://host.docker.internal:11500"