diff --git a/CHANGELOG.md b/CHANGELOG.md index fa5b92f..8dd2d39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,18 @@ Versions follow [Semantic Versioning](https://semver.org/spec/v2.0.0.html). --- +## [0.20.0] — 2026-05-05 + +### Fixed / Enhanced + +**`circuitforge_core.llm.LLMRouter`** — Pagepiper-driven improvements (closes #59, #60) + +- **#59 — dict init** (`LLMRouter(config_path: Path | dict)`): `__init__` now accepts an inline config dict in addition to a `Path`. Ingest scripts that construct Ollama URLs from product-specific env vars (e.g. `PAGEPIPER_OLLAMA_URL`) can pass the dict directly without writing a temp file. Passing a dict previously raised `AttributeError: 'dict' object has no attribute 'exists'`. Tests: `test_init_accepts_inline_dict`, `test_init_dict_is_used_directly`. + +- **#60 — Ollama preflight** (`_check_ollama_model_pulled()`): Before the first `embed()` call on an Ollama backend, `GET /api/tags` is checked to verify the configured embedding model is pulled. If it is not, a `RuntimeError` with an actionable `ollama pull ` hint is raised immediately — replacing the opaque `All LLM backends exhausted for embed()` error. Results are cached per base URL for the router's lifetime (one HTTP call, not one per `embed()` invocation). Non-Ollama backends (vLLM, etc.) don't expose `/api/tags` — a non-200 response causes the check to be silently skipped. Tests: `test_embed_raises_actionable_error_when_model_not_pulled`, `test_embed_proceeds_when_model_is_pulled`, `test_embed_skips_preflight_when_tags_endpoint_unavailable`, `test_ollama_tags_cache_is_hit_only_once`. + +--- + ## [0.17.0] — 2026-04-27 ### Added diff --git a/circuitforge_core/llm/router.py b/circuitforge_core/llm/router.py index 593a4c0..e719bc1 100644 --- a/circuitforge_core/llm/router.py +++ b/circuitforge_core/llm/router.py @@ -57,8 +57,11 @@ CONFIG_PATH = Path.home() / ".config" / "circuitforge" / "llm.yaml" class LLMRouter: - def __init__(self, config_path: Path = CONFIG_PATH): - if config_path.exists(): + def __init__(self, config_path: Path | dict = CONFIG_PATH): + self._ollama_tags_cache: dict[str, set[str]] = {} + if isinstance(config_path, dict): + self.config = config_path + elif config_path.exists(): with open(config_path) as f: self.config = yaml.safe_load(f) else: @@ -145,6 +148,37 @@ class LLMRouter: except Exception: return False + def _check_ollama_model_pulled(self, base_url: str, model: str) -> None: + """Raise RuntimeError with actionable message if model is not pulled in Ollama. + + Silently skips the check if the /api/tags endpoint is unavailable (e.g. vLLM). + Results are cached per base_url for the lifetime of this router instance. + """ + tags_url = base_url.rstrip("/").removesuffix("/v1") + "/api/tags" + if not hasattr(self, "_ollama_tags_cache"): + self._ollama_tags_cache = {} + if base_url not in self._ollama_tags_cache: + try: + resp = requests.get(tags_url, timeout=3) + if resp.status_code != 200: + return + pulled = { + m["name"].split(":")[0] + for m in resp.json().get("models", []) + } + self._ollama_tags_cache[base_url] = pulled + except Exception: + return # can't verify — let the actual embed call fail naturally + pulled_models = self._ollama_tags_cache.get(base_url) + if pulled_models is None: + return + model_base = model.split(":")[0] + if model_base not in pulled_models: + raise RuntimeError( + f'Ollama embedding model "{model}" is not pulled.\n' + f"Fix: ollama pull {model}" + ) + def _resolve_model(self, client: OpenAI, model: str) -> str: """Resolve __auto__ to the first model served by vLLM.""" if model != "__auto__": @@ -176,13 +210,14 @@ class LLMRouter: ttl_s = float(orch_cfg.get("ttl_s", 3600.0)) # CF_APP_NAME identifies the calling product (kiwi, peregrine, etc.) # in coordinator analytics — set in each product's .env. - pipeline = os.environ.get("CF_APP_NAME") or None + cf_app = os.environ.get("CF_APP_NAME") or None + caller = f"{cf_app}.llm-router" if cf_app else "llm-router" ctx = client.allocate( service, model_candidates=candidates, ttl_s=ttl_s, - caller="llm-router", - pipeline=pipeline, + caller=caller, + pipeline=cf_app, ) alloc = ctx.__enter__() return (ctx, alloc) @@ -424,14 +459,17 @@ class LLMRouter: print(f"[LLMRouter] {name}: unreachable, skipping") continue + embed_model = model_override or backend.get( + "embedding_model", backend["model"] + ) + self._check_ollama_model_pulled(backend["base_url"], embed_model) + try: client = OpenAI( base_url=backend["base_url"], api_key=backend.get("api_key") or "any", ) - model = model_override or backend.get( - "embedding_model", backend["model"] - ) + model = embed_model resp = client.embeddings.create(model=model, input=texts) print(f"[LLMRouter] embed: used backend {name} ({model})") return [item.embedding for item in resp.data] diff --git a/pyproject.toml b/pyproject.toml index 58522a3..26c32a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "circuitforge-core" -version = "0.19.0" +version = "0.20.0" description = "Shared scaffold for CircuitForge products (MIT)" requires-python = ">=3.11" dependencies = [ diff --git a/tests/test_llm_router.py b/tests/test_llm_router.py index 0ca8a81..6b28031 100644 --- a/tests/test_llm_router.py +++ b/tests/test_llm_router.py @@ -125,6 +125,7 @@ def test_embed_returns_vectors_from_openai_compat_backend(): ) with ( patch.object(router, "_is_reachable", return_value=True), + patch("circuitforge_core.llm.router.requests.get", return_value=MagicMock(status_code=404)), patch("circuitforge_core.llm.router.OpenAI", return_value=mock_client), ): result = router.embed(["hello world", "fireball rules"]) @@ -156,6 +157,7 @@ def test_embed_uses_chat_model_when_no_embedding_model_configured(): ) with ( patch.object(router, "_is_reachable", return_value=True), + patch("circuitforge_core.llm.router.requests.get", return_value=MagicMock(status_code=404)), patch("circuitforge_core.llm.router.OpenAI", return_value=mock_client), ): router.embed(["test"]) @@ -192,6 +194,7 @@ def test_embed_skips_non_openai_compat_backends(): mock_openai = MagicMock(return_value=mock_client) with ( patch.object(router, "_is_reachable", return_value=True), + patch("circuitforge_core.llm.router.requests.get", return_value=MagicMock(status_code=404)), patch("circuitforge_core.llm.router.OpenAI", mock_openai), ): result = router.embed(["hello"]) @@ -218,3 +221,116 @@ def test_embed_raises_when_all_backends_exhausted(): with patch.object(router, "_is_reachable", return_value=False): with pytest.raises(RuntimeError, match="exhausted"): router.embed(["test"]) + + +# ── #59: LLMRouter dict init ────────────────────────────────────────────────── + + +def test_init_accepts_inline_dict(): + config = { + "fallback_order": ["local"], + "backends": { + "local": { + "type": "openai_compat", + "base_url": "http://localhost:11434/v1", + "model": "llama3", + "supports_images": False, + } + }, + } + router = LLMRouter(config) + assert router.config["fallback_order"] == ["local"] + assert "local" in router.config["backends"] + + +def test_init_dict_is_used_directly(): + config = {"fallback_order": [], "backends": {}} + router = LLMRouter(config) + assert router.config is config + + +# ── #60: Ollama embedding model preflight ───────────────────────────────────── + + +def _ollama_backend(model: str = "nomic-embed-text") -> dict: + return { + "fallback_order": ["ollama"], + "backends": { + "ollama": { + "type": "openai_compat", + "base_url": "http://localhost:11434/v1", + "embedding_model": model, + "model": "mistral:7b", + "supports_images": False, + } + }, + } + + +def test_embed_raises_actionable_error_when_model_not_pulled(): + router = _make_router(_ollama_backend("nomic-embed-text")) + tags_resp = MagicMock(status_code=200) + tags_resp.json.return_value = {"models": [{"name": "mistral:latest"}]} + with ( + patch.object(router, "_is_reachable", return_value=True), + patch("circuitforge_core.llm.router.requests.get", return_value=tags_resp), + ): + with pytest.raises(RuntimeError, match='ollama pull nomic-embed-text'): + router.embed(["hello"]) + + +def test_embed_proceeds_when_model_is_pulled(): + router = _make_router(_ollama_backend("nomic-embed-text")) + tags_resp = MagicMock(status_code=200) + tags_resp.json.return_value = { + "models": [{"name": "nomic-embed-text:latest"}, {"name": "mistral:latest"}] + } + mock_client = MagicMock() + mock_client.embeddings.create.return_value = MagicMock( + data=[MagicMock(embedding=[0.1, 0.2])] + ) + with ( + patch.object(router, "_is_reachable", return_value=True), + patch("circuitforge_core.llm.router.requests.get", return_value=tags_resp), + patch("circuitforge_core.llm.router.OpenAI", return_value=mock_client), + ): + result = router.embed(["hello"]) + assert result == [[0.1, 0.2]] + + +def test_embed_skips_preflight_when_tags_endpoint_unavailable(): + """Non-Ollama backends (vLLM, etc.) don't expose /api/tags — check must be silent.""" + router = _make_router(_ollama_backend("custom-embed")) + tags_resp = MagicMock(status_code=404) + mock_client = MagicMock() + mock_client.embeddings.create.return_value = MagicMock( + data=[MagicMock(embedding=[0.5])] + ) + with ( + patch.object(router, "_is_reachable", return_value=True), + patch("circuitforge_core.llm.router.requests.get", return_value=tags_resp), + patch("circuitforge_core.llm.router.OpenAI", return_value=mock_client), + ): + result = router.embed(["hello"]) + assert result == [[0.5]] + + +def test_ollama_tags_cache_is_hit_only_once(): + router = _make_router(_ollama_backend("nomic-embed-text")) + tags_resp = MagicMock(status_code=200) + tags_resp.json.return_value = {"models": [{"name": "nomic-embed-text:latest"}]} + mock_client = MagicMock() + mock_client.embeddings.create.return_value = MagicMock( + data=[MagicMock(embedding=[0.1])] + ) + with ( + patch.object(router, "_is_reachable", return_value=True), + patch("circuitforge_core.llm.router.requests.get", return_value=tags_resp) as mock_get, + patch("circuitforge_core.llm.router.OpenAI", return_value=mock_client), + ): + router.embed(["first"]) + router.embed(["second"]) + + # /api/tags is called once (cache hit on second embed) + tags_calls = [c for c in mock_get.call_args_list if "api/tags" in str(c)] + assert len(tags_calls) == 1