feat(orch): migrate meal plan LLM routing to task-based allocation with direct-allocate fallback

Replaces single-path cf-orch allocation with a three-tier strategy:
tier 1 task_allocate() (coordinator-driven), tier 2 direct CFOrchClient.allocate()
(TaskNotRegistered fallback), tier 3 local LLMRouter. Module-level imports for
CFOrchClient and LLMRouter make all three paths patchable in tests without
import caching issues.
This commit is contained in:
pyr0ball 2026-05-13 10:32:58 -07:00
parent 61c428baf0
commit 02abc8e734
2 changed files with 161 additions and 34 deletions

View file

@ -2,17 +2,20 @@
# BSL 1.1 — LLM feature # BSL 1.1 — LLM feature
"""Provide a router-compatible LLM client for meal plan generation tasks. """Provide a router-compatible LLM client for meal plan generation tasks.
Cloud (CF_ORCH_URL set): Cloud (CF_ORCH_URL set), tier 1 task-based routing (preferred):
Allocates a cf-text service via cf-orch (3B-7B GGUF, ~2GB VRAM). Calls /api/inference/task with product=kiwi, task=meal_plan.
Returns an _OrchTextRouter that wraps the cf-text HTTP endpoint The coordinator resolves the model from assignments.yaml.
with a .complete(system, user, **kwargs) interface.
Cloud (CF_ORCH_URL set), tier 2 direct allocation (fallback):
Allocates cf-text directly via client.allocate(). Used when the task
is not yet registered in the coordinator (cf-orch#61 not deployed).
Local / self-hosted (no CF_ORCH_URL): Local / self-hosted (no CF_ORCH_URL):
Returns an LLMRouter instance which tries ollama, vllm, or any Returns an LLMRouter instance which tries ollama, vllm, or any
backend configured in ~/.config/circuitforge/llm.yaml. backend configured in ~/.config/circuitforge/llm.yaml.
Both paths expose the same interface so llm_timing.py and llm_planner.py All paths expose the same (router, ctx) interface so llm_planner.py
need no knowledge of the backend. needs no knowledge of the backend.
""" """
from __future__ import annotations from __future__ import annotations
@ -22,8 +25,7 @@ from contextlib import nullcontext
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# cf-orch service name and VRAM budget for meal plan LLM tasks. # cf-orch service name and TTL for direct-allocate fallback path.
# These are lighter than recipe_llm (4.0 GB) — cf-text handles them.
_SERVICE_TYPE = "cf-text" _SERVICE_TYPE = "cf-text"
_TTL_S = 120.0 _TTL_S = 120.0
_CALLER = "kiwi-meal-plan" _CALLER = "kiwi-meal-plan"
@ -62,16 +64,58 @@ class _OrchTextRouter:
return resp.choices[0].message.content or "" return resp.choices[0].message.content or ""
# Imported at module level so tests can patch the names in this module's namespace.
# app.services.task_inference.task_allocate — patch target for task routing tests.
try:
from app.services.task_inference import TaskNotRegistered, task_allocate
_HAS_TASK_INFERENCE = True
except ImportError:
_HAS_TASK_INFERENCE = False
# circuitforge_orch.client.CFOrchClient — patch target for direct-allocate fallback tests.
try:
from circuitforge_orch.client import CFOrchClient
except ImportError:
CFOrchClient = None # type: ignore[assignment,misc]
# circuitforge_core.llm.router.LLMRouter — patch target for local-inference tests.
try:
from circuitforge_core.llm.router import LLMRouter
except (ImportError, FileNotFoundError):
LLMRouter = None # type: ignore[assignment,misc]
def get_meal_plan_router(): def get_meal_plan_router():
"""Return an LLM client for meal plan tasks. """Return an LLM client for meal plan tasks.
Tries cf-orch cf-text allocation first (cloud); falls back to LLMRouter Returns (router, ctx) where ctx is a context manager the caller holds
(local ollama/vllm). Returns None if no backend is available. open for the duration of the LLM call. Returns (None, nullcontext(None))
if no backend is available.
""" """
cf_orch_url = os.environ.get("CF_ORCH_URL") cf_orch_url = os.environ.get("CF_ORCH_URL")
if cf_orch_url: if cf_orch_url:
# Tier 1: task-based routing — coordinator owns model selection.
if _HAS_TASK_INFERENCE:
try:
ctx = task_allocate(
"kiwi", "meal_plan",
service_hint=_SERVICE_TYPE,
ttl_s=_TTL_S,
)
alloc = ctx.__enter__()
return _OrchTextRouter(alloc.url), ctx
except TaskNotRegistered:
logger.debug(
"kiwi.meal_plan not in coordinator assignments — "
"falling back to direct cf-text allocation"
)
except Exception as exc:
logger.debug("task allocation failed, trying direct allocate: %s", exc)
# Tier 2: direct allocation — hardcoded service type.
if CFOrchClient is not None:
try: try:
from circuitforge_orch.client import CFOrchClient
client = CFOrchClient(cf_orch_url) client = CFOrchClient(cf_orch_url)
ctx = client.allocate( ctx = client.allocate(
service=_SERVICE_TYPE, service=_SERVICE_TYPE,
@ -84,9 +128,9 @@ def get_meal_plan_router():
except Exception as exc: except Exception as exc:
logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc) logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc)
# Local fallback: LLMRouter (ollama / vllm / openai-compat) # Tier 3: local inference — ollama / vllm / openai-compat.
if LLMRouter is not None:
try: try:
from circuitforge_core.llm.router import LLMRouter
return LLMRouter(), nullcontext(None) return LLMRouter(), nullcontext(None)
except FileNotFoundError: except FileNotFoundError:
logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled") logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled")
@ -94,3 +138,4 @@ def get_meal_plan_router():
except Exception as exc: except Exception as exc:
logger.debug("LLMRouter init failed: %s", exc) logger.debug("LLMRouter init failed: %s", exc)
return None, nullcontext(None) return None, nullcontext(None)
return None, nullcontext(None)

View file

@ -0,0 +1,82 @@
"""Tests for task-based routing added to get_meal_plan_router()."""
from __future__ import annotations
from unittest.mock import MagicMock
import pytest
def _make_task_ctx(url: str = "http://node:8080") -> MagicMock:
"""Mock context manager returned by task_allocate()."""
alloc = MagicMock()
alloc.url = url
alloc.allocation_id = "alloc-task-1"
alloc.service = "cf-text"
ctx = MagicMock()
ctx.__enter__ = MagicMock(return_value=alloc)
ctx.__exit__ = MagicMock(return_value=False)
return ctx
def _make_task_ctx_not_registered() -> MagicMock:
"""Mock context manager that raises TaskNotRegistered on enter."""
from app.services.task_inference import TaskNotRegistered
ctx = MagicMock()
ctx.__enter__ = MagicMock(side_effect=TaskNotRegistered("not registered"))
ctx.__exit__ = MagicMock(return_value=False)
return ctx
def _make_direct_alloc_ctx(url: str = "http://node:8080") -> MagicMock:
"""Mock context manager returned by CFOrchClient.allocate()."""
alloc = MagicMock()
alloc.url = url
ctx = MagicMock()
ctx.__enter__ = MagicMock(return_value=alloc)
ctx.__exit__ = MagicMock(return_value=False)
return ctx
def test_task_path_returns_orch_router_on_success(monkeypatch):
"""get_meal_plan_router() returns _OrchTextRouter when task allocation succeeds."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
import unittest.mock as um
# Patch the name as it exists in llm_router's own namespace (module-level import).
with um.patch("app.services.meal_plan.llm_router.task_allocate",
return_value=_make_task_ctx(url="http://node:9001")):
from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter
router, ctx = get_meal_plan_router()
assert isinstance(router, _OrchTextRouter)
assert router._base_url == "http://node:9001"
def test_task_not_registered_falls_back_to_direct_allocate(monkeypatch):
"""get_meal_plan_router() falls back to direct cf-text allocation on TaskNotRegistered."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
direct_ctx = _make_direct_alloc_ctx(url="http://node:9002")
import unittest.mock as um
# Patch task_allocate in llm_router's namespace so TaskNotRegistered is raised.
with um.patch("app.services.meal_plan.llm_router.task_allocate",
return_value=_make_task_ctx_not_registered()), \
um.patch("app.services.meal_plan.llm_router.CFOrchClient") as MockClient:
MockClient.return_value.allocate.return_value = direct_ctx
from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter
router, ctx = get_meal_plan_router()
assert isinstance(router, _OrchTextRouter)
assert router._base_url == "http://node:9002"
def test_no_cf_orch_url_returns_llm_router(monkeypatch):
"""get_meal_plan_router() returns LLMRouter when CF_ORCH_URL is not set."""
monkeypatch.delenv("CF_ORCH_URL", raising=False)
import unittest.mock as um
mock_lr = MagicMock()
with um.patch("app.services.meal_plan.llm_router.LLMRouter", return_value=mock_lr):
from app.services.meal_plan.llm_router import get_meal_plan_router
router, ctx = get_meal_plan_router()
assert router is mock_lr