- LLMRouter.__init__ now accepts a Path | dict; pagepiper ingest scripts
pass a runtime-constructed config dict instead of a temp file
- _check_ollama_model_pulled() preflight on embed(): checks /api/tags once
per backend URL and raises RuntimeError("...Fix: ollama pull <model>")
when the configured embedding model is not pulled; silently skips for
non-Ollama backends (vLLM, etc.) that don't expose /api/tags
- 6 new tests: dict init paths (x2) + preflight scenarios (x4)
- Existing embed tests updated to mock requests.get to avoid live Ollama calls
This commit is contained in:
parent
ccc6a15d94
commit
fb3a4c697d
4 changed files with 175 additions and 9 deletions
12
CHANGELOG.md
12
CHANGELOG.md
|
|
@ -6,6 +6,18 @@ Versions follow [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|||
|
||||
---
|
||||
|
||||
## [0.20.0] — 2026-05-05
|
||||
|
||||
### Fixed / Enhanced
|
||||
|
||||
**`circuitforge_core.llm.LLMRouter`** — Pagepiper-driven improvements (closes #59, #60)
|
||||
|
||||
- **#59 — dict init** (`LLMRouter(config_path: Path | dict)`): `__init__` now accepts an inline config dict in addition to a `Path`. Ingest scripts that construct Ollama URLs from product-specific env vars (e.g. `PAGEPIPER_OLLAMA_URL`) can pass the dict directly without writing a temp file. Passing a dict previously raised `AttributeError: 'dict' object has no attribute 'exists'`. Tests: `test_init_accepts_inline_dict`, `test_init_dict_is_used_directly`.
|
||||
|
||||
- **#60 — Ollama preflight** (`_check_ollama_model_pulled()`): Before the first `embed()` call on an Ollama backend, `GET /api/tags` is checked to verify the configured embedding model is pulled. If it is not, a `RuntimeError` with an actionable `ollama pull <model>` hint is raised immediately — replacing the opaque `All LLM backends exhausted for embed()` error. Results are cached per base URL for the router's lifetime (one HTTP call, not one per `embed()` invocation). Non-Ollama backends (vLLM, etc.) don't expose `/api/tags` — a non-200 response causes the check to be silently skipped. Tests: `test_embed_raises_actionable_error_when_model_not_pulled`, `test_embed_proceeds_when_model_is_pulled`, `test_embed_skips_preflight_when_tags_endpoint_unavailable`, `test_ollama_tags_cache_is_hit_only_once`.
|
||||
|
||||
---
|
||||
|
||||
## [0.17.0] — 2026-04-27
|
||||
|
||||
### Added
|
||||
|
|
|
|||
|
|
@ -57,8 +57,11 @@ CONFIG_PATH = Path.home() / ".config" / "circuitforge" / "llm.yaml"
|
|||
|
||||
|
||||
class LLMRouter:
|
||||
def __init__(self, config_path: Path = CONFIG_PATH):
|
||||
if config_path.exists():
|
||||
def __init__(self, config_path: Path | dict = CONFIG_PATH):
|
||||
self._ollama_tags_cache: dict[str, set[str]] = {}
|
||||
if isinstance(config_path, dict):
|
||||
self.config = config_path
|
||||
elif config_path.exists():
|
||||
with open(config_path) as f:
|
||||
self.config = yaml.safe_load(f)
|
||||
else:
|
||||
|
|
@ -145,6 +148,37 @@ class LLMRouter:
|
|||
except Exception:
|
||||
return False
|
||||
|
||||
def _check_ollama_model_pulled(self, base_url: str, model: str) -> None:
|
||||
"""Raise RuntimeError with actionable message if model is not pulled in Ollama.
|
||||
|
||||
Silently skips the check if the /api/tags endpoint is unavailable (e.g. vLLM).
|
||||
Results are cached per base_url for the lifetime of this router instance.
|
||||
"""
|
||||
tags_url = base_url.rstrip("/").removesuffix("/v1") + "/api/tags"
|
||||
if not hasattr(self, "_ollama_tags_cache"):
|
||||
self._ollama_tags_cache = {}
|
||||
if base_url not in self._ollama_tags_cache:
|
||||
try:
|
||||
resp = requests.get(tags_url, timeout=3)
|
||||
if resp.status_code != 200:
|
||||
return
|
||||
pulled = {
|
||||
m["name"].split(":")[0]
|
||||
for m in resp.json().get("models", [])
|
||||
}
|
||||
self._ollama_tags_cache[base_url] = pulled
|
||||
except Exception:
|
||||
return # can't verify — let the actual embed call fail naturally
|
||||
pulled_models = self._ollama_tags_cache.get(base_url)
|
||||
if pulled_models is None:
|
||||
return
|
||||
model_base = model.split(":")[0]
|
||||
if model_base not in pulled_models:
|
||||
raise RuntimeError(
|
||||
f'Ollama embedding model "{model}" is not pulled.\n'
|
||||
f"Fix: ollama pull {model}"
|
||||
)
|
||||
|
||||
def _resolve_model(self, client: OpenAI, model: str) -> str:
|
||||
"""Resolve __auto__ to the first model served by vLLM."""
|
||||
if model != "__auto__":
|
||||
|
|
@ -176,13 +210,14 @@ class LLMRouter:
|
|||
ttl_s = float(orch_cfg.get("ttl_s", 3600.0))
|
||||
# CF_APP_NAME identifies the calling product (kiwi, peregrine, etc.)
|
||||
# in coordinator analytics — set in each product's .env.
|
||||
pipeline = os.environ.get("CF_APP_NAME") or None
|
||||
cf_app = os.environ.get("CF_APP_NAME") or None
|
||||
caller = f"{cf_app}.llm-router" if cf_app else "llm-router"
|
||||
ctx = client.allocate(
|
||||
service,
|
||||
model_candidates=candidates,
|
||||
ttl_s=ttl_s,
|
||||
caller="llm-router",
|
||||
pipeline=pipeline,
|
||||
caller=caller,
|
||||
pipeline=cf_app,
|
||||
)
|
||||
alloc = ctx.__enter__()
|
||||
return (ctx, alloc)
|
||||
|
|
@ -424,14 +459,17 @@ class LLMRouter:
|
|||
print(f"[LLMRouter] {name}: unreachable, skipping")
|
||||
continue
|
||||
|
||||
embed_model = model_override or backend.get(
|
||||
"embedding_model", backend["model"]
|
||||
)
|
||||
self._check_ollama_model_pulled(backend["base_url"], embed_model)
|
||||
|
||||
try:
|
||||
client = OpenAI(
|
||||
base_url=backend["base_url"],
|
||||
api_key=backend.get("api_key") or "any",
|
||||
)
|
||||
model = model_override or backend.get(
|
||||
"embedding_model", backend["model"]
|
||||
)
|
||||
model = embed_model
|
||||
resp = client.embeddings.create(model=model, input=texts)
|
||||
print(f"[LLMRouter] embed: used backend {name} ({model})")
|
||||
return [item.embedding for item in resp.data]
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||
|
||||
[project]
|
||||
name = "circuitforge-core"
|
||||
version = "0.19.0"
|
||||
version = "0.20.0"
|
||||
description = "Shared scaffold for CircuitForge products (MIT)"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
|
|
|
|||
|
|
@ -125,6 +125,7 @@ def test_embed_returns_vectors_from_openai_compat_backend():
|
|||
)
|
||||
with (
|
||||
patch.object(router, "_is_reachable", return_value=True),
|
||||
patch("circuitforge_core.llm.router.requests.get", return_value=MagicMock(status_code=404)),
|
||||
patch("circuitforge_core.llm.router.OpenAI", return_value=mock_client),
|
||||
):
|
||||
result = router.embed(["hello world", "fireball rules"])
|
||||
|
|
@ -156,6 +157,7 @@ def test_embed_uses_chat_model_when_no_embedding_model_configured():
|
|||
)
|
||||
with (
|
||||
patch.object(router, "_is_reachable", return_value=True),
|
||||
patch("circuitforge_core.llm.router.requests.get", return_value=MagicMock(status_code=404)),
|
||||
patch("circuitforge_core.llm.router.OpenAI", return_value=mock_client),
|
||||
):
|
||||
router.embed(["test"])
|
||||
|
|
@ -192,6 +194,7 @@ def test_embed_skips_non_openai_compat_backends():
|
|||
mock_openai = MagicMock(return_value=mock_client)
|
||||
with (
|
||||
patch.object(router, "_is_reachable", return_value=True),
|
||||
patch("circuitforge_core.llm.router.requests.get", return_value=MagicMock(status_code=404)),
|
||||
patch("circuitforge_core.llm.router.OpenAI", mock_openai),
|
||||
):
|
||||
result = router.embed(["hello"])
|
||||
|
|
@ -218,3 +221,116 @@ def test_embed_raises_when_all_backends_exhausted():
|
|||
with patch.object(router, "_is_reachable", return_value=False):
|
||||
with pytest.raises(RuntimeError, match="exhausted"):
|
||||
router.embed(["test"])
|
||||
|
||||
|
||||
# ── #59: LLMRouter dict init ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_init_accepts_inline_dict():
|
||||
config = {
|
||||
"fallback_order": ["local"],
|
||||
"backends": {
|
||||
"local": {
|
||||
"type": "openai_compat",
|
||||
"base_url": "http://localhost:11434/v1",
|
||||
"model": "llama3",
|
||||
"supports_images": False,
|
||||
}
|
||||
},
|
||||
}
|
||||
router = LLMRouter(config)
|
||||
assert router.config["fallback_order"] == ["local"]
|
||||
assert "local" in router.config["backends"]
|
||||
|
||||
|
||||
def test_init_dict_is_used_directly():
|
||||
config = {"fallback_order": [], "backends": {}}
|
||||
router = LLMRouter(config)
|
||||
assert router.config is config
|
||||
|
||||
|
||||
# ── #60: Ollama embedding model preflight ─────────────────────────────────────
|
||||
|
||||
|
||||
def _ollama_backend(model: str = "nomic-embed-text") -> dict:
|
||||
return {
|
||||
"fallback_order": ["ollama"],
|
||||
"backends": {
|
||||
"ollama": {
|
||||
"type": "openai_compat",
|
||||
"base_url": "http://localhost:11434/v1",
|
||||
"embedding_model": model,
|
||||
"model": "mistral:7b",
|
||||
"supports_images": False,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_embed_raises_actionable_error_when_model_not_pulled():
|
||||
router = _make_router(_ollama_backend("nomic-embed-text"))
|
||||
tags_resp = MagicMock(status_code=200)
|
||||
tags_resp.json.return_value = {"models": [{"name": "mistral:latest"}]}
|
||||
with (
|
||||
patch.object(router, "_is_reachable", return_value=True),
|
||||
patch("circuitforge_core.llm.router.requests.get", return_value=tags_resp),
|
||||
):
|
||||
with pytest.raises(RuntimeError, match='ollama pull nomic-embed-text'):
|
||||
router.embed(["hello"])
|
||||
|
||||
|
||||
def test_embed_proceeds_when_model_is_pulled():
|
||||
router = _make_router(_ollama_backend("nomic-embed-text"))
|
||||
tags_resp = MagicMock(status_code=200)
|
||||
tags_resp.json.return_value = {
|
||||
"models": [{"name": "nomic-embed-text:latest"}, {"name": "mistral:latest"}]
|
||||
}
|
||||
mock_client = MagicMock()
|
||||
mock_client.embeddings.create.return_value = MagicMock(
|
||||
data=[MagicMock(embedding=[0.1, 0.2])]
|
||||
)
|
||||
with (
|
||||
patch.object(router, "_is_reachable", return_value=True),
|
||||
patch("circuitforge_core.llm.router.requests.get", return_value=tags_resp),
|
||||
patch("circuitforge_core.llm.router.OpenAI", return_value=mock_client),
|
||||
):
|
||||
result = router.embed(["hello"])
|
||||
assert result == [[0.1, 0.2]]
|
||||
|
||||
|
||||
def test_embed_skips_preflight_when_tags_endpoint_unavailable():
|
||||
"""Non-Ollama backends (vLLM, etc.) don't expose /api/tags — check must be silent."""
|
||||
router = _make_router(_ollama_backend("custom-embed"))
|
||||
tags_resp = MagicMock(status_code=404)
|
||||
mock_client = MagicMock()
|
||||
mock_client.embeddings.create.return_value = MagicMock(
|
||||
data=[MagicMock(embedding=[0.5])]
|
||||
)
|
||||
with (
|
||||
patch.object(router, "_is_reachable", return_value=True),
|
||||
patch("circuitforge_core.llm.router.requests.get", return_value=tags_resp),
|
||||
patch("circuitforge_core.llm.router.OpenAI", return_value=mock_client),
|
||||
):
|
||||
result = router.embed(["hello"])
|
||||
assert result == [[0.5]]
|
||||
|
||||
|
||||
def test_ollama_tags_cache_is_hit_only_once():
|
||||
router = _make_router(_ollama_backend("nomic-embed-text"))
|
||||
tags_resp = MagicMock(status_code=200)
|
||||
tags_resp.json.return_value = {"models": [{"name": "nomic-embed-text:latest"}]}
|
||||
mock_client = MagicMock()
|
||||
mock_client.embeddings.create.return_value = MagicMock(
|
||||
data=[MagicMock(embedding=[0.1])]
|
||||
)
|
||||
with (
|
||||
patch.object(router, "_is_reachable", return_value=True),
|
||||
patch("circuitforge_core.llm.router.requests.get", return_value=tags_resp) as mock_get,
|
||||
patch("circuitforge_core.llm.router.OpenAI", return_value=mock_client),
|
||||
):
|
||||
router.embed(["first"])
|
||||
router.embed(["second"])
|
||||
|
||||
# /api/tags is called once (cache hit on second embed)
|
||||
tags_calls = [c for c in mock_get.call_args_list if "api/tags" in str(c)]
|
||||
assert len(tags_calls) == 1
|
||||
|
|
|
|||
Loading…
Reference in a new issue