diff --git a/.env.example b/.env.example index 9763220..61d12b2 100644 --- a/.env.example +++ b/.env.example @@ -19,6 +19,14 @@ VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services OLLAMA_DEFAULT_MODEL=llama3.2:3b +# ── LLM env-var auto-config (alternative to config/llm.yaml) ───────────────── +# Set any of these to configure LLM backends without needing a config/llm.yaml. +# Priority: Anthropic > OpenAI-compat > Ollama (always tried as local fallback). +OLLAMA_HOST=http://localhost:11434 # Ollama host; override if on a different machine +OLLAMA_MODEL=llama3.2:3b # model to request from Ollama +OPENAI_MODEL=gpt-4o-mini # model override for OpenAI-compat backend +ANTHROPIC_MODEL=claude-haiku-4-5-20251001 # model override for Anthropic backend + # API keys (required for remote profile) ANTHROPIC_API_KEY= OPENAI_COMPAT_URL= @@ -31,6 +39,12 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1 # GITHUB_TOKEN= # future — enable when public mirror is active # GITHUB_REPO= # future +# ── CF-hosted coordinator (Paid+ tier) ─────────────────────────────────────── +# Set CF_LICENSE_KEY to authenticate with the hosted coordinator. +# Leave both blank for local self-hosted cf-orch or bare-metal inference. +CF_LICENSE_KEY= +CF_ORCH_URL=https://orch.circuitforge.tech + # Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs) CLOUD_MODE=false CLOUD_DATA_ROOT=/devl/menagerie-data diff --git a/app/pages/0_Setup.py b/app/pages/0_Setup.py index fa28123..23d6967 100644 --- a/app/pages/0_Setup.py +++ b/app/pages/0_Setup.py @@ -457,6 +457,11 @@ elif step == 5: from app.wizard.step_inference import validate st.subheader("Step 5 \u2014 Inference & API Keys") + st.info( + "**Simplest setup:** set `OLLAMA_HOST` in your `.env` file — " + "Peregrine auto-detects it, no config file needed. " + "Or use the fields below to configure API keys and endpoints." + ) profile = saved_yaml.get("inference_profile", "remote") if profile == "remote": @@ -466,8 +471,18 @@ elif step == 5: placeholder="https://api.together.xyz/v1") openai_key = st.text_input("Endpoint API Key (optional)", type="password", key="oai_key") if openai_url else "" + ollama_host = st.text_input("Ollama host (optional \u2014 local fallback)", + placeholder="http://localhost:11434", + key="ollama_host_input") + ollama_model = st.text_input("Ollama model (optional)", + value="llama3.2:3b", + key="ollama_model_input") else: st.info(f"Local mode ({profile}): Ollama provides inference.") + import os + _ollama_host_env = os.environ.get("OLLAMA_HOST", "") + if _ollama_host_env: + st.caption(f"OLLAMA_HOST from .env: `{_ollama_host_env}`") anthropic_key = openai_url = openai_key = "" with st.expander("Advanced \u2014 Service Ports & Hosts"): @@ -546,6 +561,14 @@ elif step == 5: if anthropic_key or openai_url: env_path.write_text("\n".join(env_lines) + "\n") + if profile == "remote": + if ollama_host: + env_lines = _set_env(env_lines, "OLLAMA_HOST", ollama_host) + if ollama_model: + env_lines = _set_env(env_lines, "OLLAMA_MODEL", ollama_model) + if ollama_host or ollama_model: + env_path.write_text("\n".join(env_lines) + "\n") + _save_yaml({"services": svc, "wizard_step": 5}) st.session_state.wizard_step = 6 st.rerun() diff --git a/scripts/llm_router.py b/scripts/llm_router.py index 45f9fc1..b88bed5 100644 --- a/scripts/llm_router.py +++ b/scripts/llm_router.py @@ -1,19 +1,46 @@ """ LLM abstraction layer with priority fallback chain. -Reads config/llm.yaml. Tries backends in order; falls back on any error. +Config lookup order: + 1. /config/llm.yaml — per-install local config + 2. ~/.config/circuitforge/llm.yaml — user-level config (circuitforge-core default) + 3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, …) """ from pathlib import Path from circuitforge_core.llm import LLMRouter as _CoreLLMRouter +# Kept for backwards-compatibility — external callers that import CONFIG_PATH +# from this module continue to work. CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml" class LLMRouter(_CoreLLMRouter): - """Peregrine-specific LLMRouter — defaults to Peregrine's config/llm.yaml.""" + """Peregrine-specific LLMRouter — tri-level config path priority. - def __init__(self, config_path: Path = CONFIG_PATH): - super().__init__(config_path) + When ``config_path`` is supplied (e.g. in tests) it is passed straight + through to the core. When omitted, the lookup order is: + 1. /config/llm.yaml (per-install local config) + 2. ~/.config/circuitforge/llm.yaml (user-level, circuitforge-core default) + 3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST …) + """ + + def __init__(self, config_path: Path | None = None) -> None: + if config_path is not None: + # Explicit path supplied — use it directly (e.g. tests, CLI override). + super().__init__(config_path) + return + + local = Path(__file__).parent.parent / "config" / "llm.yaml" + user_level = Path.home() / ".config" / "circuitforge" / "llm.yaml" + if local.exists(): + super().__init__(local) + elif user_level.exists(): + super().__init__(user_level) + else: + # No yaml found — let circuitforge-core's env-var auto-config run. + # The core default CONFIG_PATH (~/.config/circuitforge/llm.yaml) + # won't exist either, so _auto_config_from_env() will be triggered. + super().__init__() # Module-level singleton for convenience diff --git a/scripts/preflight.py b/scripts/preflight.py index 6e7866b..34d7907 100644 --- a/scripts/preflight.py +++ b/scripts/preflight.py @@ -492,6 +492,12 @@ def main() -> None: # binds a harmless free port instead of conflicting with the external service. env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()} env_updates["RECOMMENDED_PROFILE"] = profile + # When Ollama is adopted from the host process, write OLLAMA_HOST so + # LLMRouter's env-var auto-config finds it without needing config/llm.yaml. + ollama_info = ports.get("ollama") + if ollama_info and ollama_info.get("external"): + env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}" + if offload_gb > 0: env_updates["CPU_OFFLOAD_GB"] = str(offload_gb) # GPU info for the app container (which lacks nvidia-smi access) diff --git a/tests/test_llm_router_shim.py b/tests/test_llm_router_shim.py new file mode 100644 index 0000000..23866a0 --- /dev/null +++ b/tests/test_llm_router_shim.py @@ -0,0 +1,132 @@ +"""Tests for Peregrine's LLMRouter shim — priority fallback logic.""" +import sys +from pathlib import Path +from unittest.mock import patch, MagicMock, call + +sys.path.insert(0, str(Path(__file__).parent.parent)) + + +def _import_fresh(): + """Import scripts.llm_router fresh (bypass module cache).""" + import importlib + import scripts.llm_router as mod + importlib.reload(mod) + return mod + + +# --------------------------------------------------------------------------- +# Test 1: local config/llm.yaml takes priority when it exists +# --------------------------------------------------------------------------- + +def test_uses_local_yaml_when_present(): + """When config/llm.yaml exists locally, super().__init__ is called with that path.""" + import scripts.llm_router as shim_mod + from circuitforge_core.llm import LLMRouter as _CoreLLMRouter + + local_path = Path(shim_mod.__file__).parent.parent / "config" / "llm.yaml" + user_path = Path.home() / ".config" / "circuitforge" / "llm.yaml" + + def fake_exists(self): + return self == local_path # only the local path "exists" + + captured = {} + + def fake_core_init(self, config_path=None): + captured["config_path"] = config_path + self.config = {} + + with patch.object(Path, "exists", fake_exists), \ + patch.object(_CoreLLMRouter, "__init__", fake_core_init): + import importlib + import scripts.llm_router as mod + importlib.reload(mod) + mod.LLMRouter() + + assert captured.get("config_path") == local_path, ( + f"Expected super().__init__ to be called with local path {local_path}, " + f"got {captured.get('config_path')}" + ) + + +# --------------------------------------------------------------------------- +# Test 2: falls through to env-var auto-config when neither yaml exists +# --------------------------------------------------------------------------- + +def test_falls_through_to_env_when_no_yamls(): + """When no yaml files exist, super().__init__ is called with no args (env-var path).""" + import scripts.llm_router as shim_mod + from circuitforge_core.llm import LLMRouter as _CoreLLMRouter + + captured = {} + + def fake_exists(self): + return False # no yaml files exist anywhere + + def fake_core_init(self, config_path=None): + # Record whether a path was passed + captured["config_path"] = config_path + captured["called"] = True + self.config = {} + + with patch.object(Path, "exists", fake_exists), \ + patch.object(_CoreLLMRouter, "__init__", fake_core_init): + import importlib + import scripts.llm_router as mod + importlib.reload(mod) + mod.LLMRouter() + + assert captured.get("called"), "super().__init__ was never called" + # When called with no args, config_path defaults to None in our mock, + # meaning the shim correctly fell through to env-var auto-config + assert captured.get("config_path") is None, ( + f"Expected super().__init__ to be called with no explicit path (None), " + f"got {captured.get('config_path')}" + ) + + +# --------------------------------------------------------------------------- +# Test 3: module-level complete() singleton is only instantiated once +# --------------------------------------------------------------------------- + +def test_complete_singleton_is_reused(): + """complete() reuses the same LLMRouter instance across multiple calls.""" + import importlib + import scripts.llm_router as mod + importlib.reload(mod) + + # Reset singleton + mod._router = None + + instantiation_count = [0] + original_init = mod.LLMRouter.__init__ + + mock_router = MagicMock() + mock_router.complete.return_value = "OK" + + original_class = mod.LLMRouter + + class CountingRouter(original_class): + def __init__(self): + instantiation_count[0] += 1 + # Bypass real __init__ to avoid needing config files + self.config = {} + + def complete(self, prompt, system=None): + return "OK" + + # Patch the class in the module + mod.LLMRouter = CountingRouter + mod._router = None + + result1 = mod.complete("first call") + result2 = mod.complete("second call") + + assert result1 == "OK" + assert result2 == "OK" + assert instantiation_count[0] == 1, ( + f"Expected LLMRouter to be instantiated exactly once, " + f"got {instantiation_count[0]} instantiation(s)" + ) + + # Restore + mod.LLMRouter = original_class diff --git a/tests/test_preflight_env_adoption.py b/tests/test_preflight_env_adoption.py new file mode 100644 index 0000000..21c4cf9 --- /dev/null +++ b/tests/test_preflight_env_adoption.py @@ -0,0 +1,80 @@ +"""Tests: preflight writes OLLAMA_HOST to .env when Ollama is adopted from host.""" +import sys +from pathlib import Path +from unittest.mock import patch, call + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +import scripts.preflight as pf + + +def _make_ports(ollama_external: bool = True, ollama_port: int = 11434) -> dict: + """Build a minimal ports dict as returned by preflight's port-scanning logic.""" + return { + "ollama": { + "resolved": ollama_port, + "external": ollama_external, + "stub_port": 54321, + "env_var": "OLLAMA_PORT", + "adoptable": True, + }, + "streamlit": { + "resolved": 8502, + "external": False, + "stub_port": 8502, + "env_var": "STREAMLIT_PORT", + "adoptable": False, + }, + } + + +def _capture_env_updates(ports: dict) -> dict: + """Run the env_updates construction block from preflight.main() and return the result. + + We extract this logic from main() so tests can call it directly without + needing to simulate the full CLI argument parsing and system probe flow. + The block under test is the `if not args.check_only:` section. + """ + captured = {} + + def fake_write_env(updates: dict) -> None: + captured.update(updates) + + with patch.object(pf, "write_env", side_effect=fake_write_env), \ + patch.object(pf, "update_llm_yaml"), \ + patch.object(pf, "write_compose_override"): + # Replicate the env_updates block from preflight.main() as faithfully as possible + env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()} + env_updates["RECOMMENDED_PROFILE"] = "single-gpu" + + # ---- Code under test: the OLLAMA_HOST adoption block ---- + ollama_info = ports.get("ollama") + if ollama_info and ollama_info.get("external"): + env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}" + # --------------------------------------------------------- + + pf.write_env(env_updates) + + return captured + + +def test_ollama_host_written_when_adopted(): + """OLLAMA_HOST is added when Ollama is adopted from the host (external=True).""" + ports = _make_ports(ollama_external=True, ollama_port=11434) + result = _capture_env_updates(ports) + assert "OLLAMA_HOST" in result + assert result["OLLAMA_HOST"] == "http://host.docker.internal:11434" + + +def test_ollama_host_not_written_when_docker_managed(): + """OLLAMA_HOST is NOT added when Ollama runs in Docker (external=False).""" + ports = _make_ports(ollama_external=False) + result = _capture_env_updates(ports) + assert "OLLAMA_HOST" not in result + + +def test_ollama_host_reflects_adopted_port(): + """OLLAMA_HOST uses the actual adopted port, not the default.""" + ports = _make_ports(ollama_external=True, ollama_port=11500) + result = _capture_env_updates(ports) + assert result["OLLAMA_HOST"] == "http://host.docker.internal:11500"