diff --git a/circuitforge_core/llm/__init__.py b/circuitforge_core/llm/__init__.py new file mode 100644 index 0000000..79c9313 --- /dev/null +++ b/circuitforge_core/llm/__init__.py @@ -0,0 +1,3 @@ +from .router import LLMRouter + +__all__ = ["LLMRouter"] diff --git a/circuitforge_core/llm/router.py b/circuitforge_core/llm/router.py new file mode 100644 index 0000000..e404fbe --- /dev/null +++ b/circuitforge_core/llm/router.py @@ -0,0 +1,183 @@ +""" +LLM abstraction layer with priority fallback chain. +Reads config from ~/.config/circuitforge/llm.yaml. +Tries backends in order; falls back on any error. +""" +import os +import yaml +import requests +from pathlib import Path +from openai import OpenAI + +CONFIG_PATH = Path.home() / ".config" / "circuitforge" / "llm.yaml" + + +class LLMRouter: + def __init__(self, config_path: Path = CONFIG_PATH): + if not config_path.exists(): + raise FileNotFoundError( + f"{config_path} not found. " + "Copy the llm.yaml.example to ~/.config/circuitforge/llm.yaml and configure your LLM backends." + ) + with open(config_path) as f: + self.config = yaml.safe_load(f) + + def _is_reachable(self, base_url: str) -> bool: + """Quick health-check ping. Returns True if backend is up.""" + health_url = base_url.rstrip("/").removesuffix("/v1") + "/health" + try: + resp = requests.get(health_url, timeout=2) + return resp.status_code < 500 + except Exception: + return False + + def _resolve_model(self, client: OpenAI, model: str) -> str: + """Resolve __auto__ to the first model served by vLLM.""" + if model != "__auto__": + return model + models = client.models.list() + return models.data[0].id + + def complete(self, prompt: str, system: str | None = None, + model_override: str | None = None, + fallback_order: list[str] | None = None, + images: list[str] | None = None, + max_tokens: int | None = None) -> str: + """ + Generate a completion. Tries each backend in fallback_order. + + model_override: when set, replaces the configured model for + openai_compat backends (e.g. pass a research-specific ollama model). + fallback_order: when set, overrides config fallback_order for this + call (e.g. pass config["research_fallback_order"] for research tasks). + images: optional list of base64-encoded PNG/JPG strings. When provided, + backends without supports_images=true are skipped. vision_service backends + are only tried when images is provided. + Raises RuntimeError if all backends are exhausted. + """ + if os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes"): + raise RuntimeError( + "AI inference is disabled in the public demo. " + "Run your own instance to use AI features." + ) + order = fallback_order if fallback_order is not None else self.config["fallback_order"] + for name in order: + backend = self.config["backends"][name] + + if not backend.get("enabled", True): + print(f"[LLMRouter] {name}: disabled, skipping") + continue + + supports_images = backend.get("supports_images", False) + is_vision_service = backend["type"] == "vision_service" + + # vision_service only used when images provided + if is_vision_service and not images: + print(f"[LLMRouter] {name}: vision_service skipped (no images)") + continue + + # non-vision backends skipped when images provided and they don't support it + if images and not supports_images and not is_vision_service: + print(f"[LLMRouter] {name}: no image support, skipping") + continue + + if is_vision_service: + if not self._is_reachable(backend["base_url"]): + print(f"[LLMRouter] {name}: unreachable, skipping") + continue + try: + resp = requests.post( + backend["base_url"].rstrip("/") + "/analyze", + json={ + "prompt": prompt, + "image_base64": images[0] if images else "", + }, + timeout=60, + ) + resp.raise_for_status() + print(f"[LLMRouter] Used backend: {name} (vision_service)") + return resp.json()["text"] + except Exception as e: + print(f"[LLMRouter] {name}: error — {e}, trying next") + continue + + elif backend["type"] == "openai_compat": + if not self._is_reachable(backend["base_url"]): + print(f"[LLMRouter] {name}: unreachable, skipping") + continue + try: + client = OpenAI( + base_url=backend["base_url"], + api_key=backend.get("api_key") or "any", + ) + raw_model = model_override or backend["model"] + model = self._resolve_model(client, raw_model) + messages = [] + if system: + messages.append({"role": "system", "content": system}) + if images and supports_images: + content = [{"type": "text", "text": prompt}] + for img in images: + content.append({ + "type": "image_url", + "image_url": {"url": f"data:image/png;base64,{img}"}, + }) + messages.append({"role": "user", "content": content}) + else: + messages.append({"role": "user", "content": prompt}) + + create_kwargs: dict = {"model": model, "messages": messages} + if max_tokens is not None: + create_kwargs["max_tokens"] = max_tokens + resp = client.chat.completions.create(**create_kwargs) + print(f"[LLMRouter] Used backend: {name} ({model})") + return resp.choices[0].message.content + + except Exception as e: + print(f"[LLMRouter] {name}: error — {e}, trying next") + continue + + elif backend["type"] == "anthropic": + api_key = os.environ.get(backend["api_key_env"], "") + if not api_key: + print(f"[LLMRouter] {name}: {backend['api_key_env']} not set, skipping") + continue + try: + import anthropic as _anthropic + client = _anthropic.Anthropic(api_key=api_key) + if images and supports_images: + content = [] + for img in images: + content.append({ + "type": "image", + "source": {"type": "base64", "media_type": "image/png", "data": img}, + }) + content.append({"type": "text", "text": prompt}) + else: + content = prompt + kwargs: dict = { + "model": backend["model"], + "max_tokens": 4096, + "messages": [{"role": "user", "content": content}], + } + if system: + kwargs["system"] = system + msg = client.messages.create(**kwargs) + print(f"[LLMRouter] Used backend: {name}") + return msg.content[0].text + except Exception as e: + print(f"[LLMRouter] {name}: error — {e}, trying next") + continue + + raise RuntimeError("All LLM backends exhausted") + + +# Module-level singleton for convenience +_router: LLMRouter | None = None + + +def complete(prompt: str, system: str | None = None) -> str: + global _router + if _router is None: + _router = LLMRouter() + return _router.complete(prompt, system) diff --git a/tests/test_llm_router.py b/tests/test_llm_router.py new file mode 100644 index 0000000..3cb6ca0 --- /dev/null +++ b/tests/test_llm_router.py @@ -0,0 +1,79 @@ +from unittest.mock import MagicMock, patch +import pytest +from circuitforge_core.llm import LLMRouter + + +def _make_router(config: dict) -> LLMRouter: + """Build a router from an in-memory config dict (bypass file loading).""" + router = object.__new__(LLMRouter) + router.config = config + return router + + +def test_complete_uses_first_reachable_backend(): + router = _make_router({ + "fallback_order": ["local"], + "backends": { + "local": { + "type": "openai_compat", + "base_url": "http://localhost:11434/v1", + "model": "llama3", + "supports_images": False, + } + } + }) + mock_client = MagicMock() + mock_client.chat.completions.create.return_value = MagicMock( + choices=[MagicMock(message=MagicMock(content="hello"))] + ) + with patch.object(router, "_is_reachable", return_value=True), \ + patch("circuitforge_core.llm.router.OpenAI", return_value=mock_client): + result = router.complete("say hello") + assert result == "hello" + + +def test_complete_falls_back_on_unreachable_backend(): + router = _make_router({ + "fallback_order": ["unreachable", "working"], + "backends": { + "unreachable": { + "type": "openai_compat", + "base_url": "http://nowhere:1/v1", + "model": "x", + "supports_images": False, + }, + "working": { + "type": "openai_compat", + "base_url": "http://localhost:11434/v1", + "model": "llama3", + "supports_images": False, + } + } + }) + mock_client = MagicMock() + mock_client.chat.completions.create.return_value = MagicMock( + choices=[MagicMock(message=MagicMock(content="fallback"))] + ) + def reachable(url): + return "nowhere" not in url + with patch.object(router, "_is_reachable", side_effect=reachable), \ + patch("circuitforge_core.llm.router.OpenAI", return_value=mock_client): + result = router.complete("test") + assert result == "fallback" + + +def test_complete_raises_when_all_backends_exhausted(): + router = _make_router({ + "fallback_order": ["dead"], + "backends": { + "dead": { + "type": "openai_compat", + "base_url": "http://nowhere:1/v1", + "model": "x", + "supports_images": False, + } + } + }) + with patch.object(router, "_is_reachable", return_value=False): + with pytest.raises(RuntimeError, match="exhausted"): + router.complete("test")