feat(orch): migrate meal plan LLM routing to task-based allocation with direct-allocate fallback
Replaces single-path cf-orch allocation with a three-tier strategy: tier 1 task_allocate() (coordinator-driven), tier 2 direct CFOrchClient.allocate() (TaskNotRegistered fallback), tier 3 local LLMRouter. Module-level imports for CFOrchClient and LLMRouter make all three paths patchable in tests without import caching issues.
This commit is contained in:
parent
61c428baf0
commit
02abc8e734
2 changed files with 161 additions and 34 deletions
|
|
@ -2,17 +2,20 @@
|
|||
# BSL 1.1 — LLM feature
|
||||
"""Provide a router-compatible LLM client for meal plan generation tasks.
|
||||
|
||||
Cloud (CF_ORCH_URL set):
|
||||
Allocates a cf-text service via cf-orch (3B-7B GGUF, ~2GB VRAM).
|
||||
Returns an _OrchTextRouter that wraps the cf-text HTTP endpoint
|
||||
with a .complete(system, user, **kwargs) interface.
|
||||
Cloud (CF_ORCH_URL set), tier 1 — task-based routing (preferred):
|
||||
Calls /api/inference/task with product=kiwi, task=meal_plan.
|
||||
The coordinator resolves the model from assignments.yaml.
|
||||
|
||||
Cloud (CF_ORCH_URL set), tier 2 — direct allocation (fallback):
|
||||
Allocates cf-text directly via client.allocate(). Used when the task
|
||||
is not yet registered in the coordinator (cf-orch#61 not deployed).
|
||||
|
||||
Local / self-hosted (no CF_ORCH_URL):
|
||||
Returns an LLMRouter instance which tries ollama, vllm, or any
|
||||
backend configured in ~/.config/circuitforge/llm.yaml.
|
||||
|
||||
Both paths expose the same interface so llm_timing.py and llm_planner.py
|
||||
need no knowledge of the backend.
|
||||
All paths expose the same (router, ctx) interface so llm_planner.py
|
||||
needs no knowledge of the backend.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
@ -22,8 +25,7 @@ from contextlib import nullcontext
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# cf-orch service name and VRAM budget for meal plan LLM tasks.
|
||||
# These are lighter than recipe_llm (4.0 GB) — cf-text handles them.
|
||||
# cf-orch service name and TTL for direct-allocate fallback path.
|
||||
_SERVICE_TYPE = "cf-text"
|
||||
_TTL_S = 120.0
|
||||
_CALLER = "kiwi-meal-plan"
|
||||
|
|
@ -62,35 +64,78 @@ class _OrchTextRouter:
|
|||
return resp.choices[0].message.content or ""
|
||||
|
||||
|
||||
# Imported at module level so tests can patch the names in this module's namespace.
|
||||
# app.services.task_inference.task_allocate — patch target for task routing tests.
|
||||
try:
|
||||
from app.services.task_inference import TaskNotRegistered, task_allocate
|
||||
_HAS_TASK_INFERENCE = True
|
||||
except ImportError:
|
||||
_HAS_TASK_INFERENCE = False
|
||||
|
||||
# circuitforge_orch.client.CFOrchClient — patch target for direct-allocate fallback tests.
|
||||
try:
|
||||
from circuitforge_orch.client import CFOrchClient
|
||||
except ImportError:
|
||||
CFOrchClient = None # type: ignore[assignment,misc]
|
||||
|
||||
# circuitforge_core.llm.router.LLMRouter — patch target for local-inference tests.
|
||||
try:
|
||||
from circuitforge_core.llm.router import LLMRouter
|
||||
except (ImportError, FileNotFoundError):
|
||||
LLMRouter = None # type: ignore[assignment,misc]
|
||||
|
||||
|
||||
def get_meal_plan_router():
|
||||
"""Return an LLM client for meal plan tasks.
|
||||
|
||||
Tries cf-orch cf-text allocation first (cloud); falls back to LLMRouter
|
||||
(local ollama/vllm). Returns None if no backend is available.
|
||||
Returns (router, ctx) where ctx is a context manager the caller holds
|
||||
open for the duration of the LLM call. Returns (None, nullcontext(None))
|
||||
if no backend is available.
|
||||
"""
|
||||
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
||||
if cf_orch_url:
|
||||
try:
|
||||
from circuitforge_orch.client import CFOrchClient
|
||||
client = CFOrchClient(cf_orch_url)
|
||||
ctx = client.allocate(
|
||||
service=_SERVICE_TYPE,
|
||||
ttl_s=_TTL_S,
|
||||
caller=_CALLER,
|
||||
)
|
||||
alloc = ctx.__enter__()
|
||||
if alloc is not None:
|
||||
return _OrchTextRouter(alloc.url), ctx
|
||||
except Exception as exc:
|
||||
logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc)
|
||||
|
||||
# Local fallback: LLMRouter (ollama / vllm / openai-compat)
|
||||
try:
|
||||
from circuitforge_core.llm.router import LLMRouter
|
||||
return LLMRouter(), nullcontext(None)
|
||||
except FileNotFoundError:
|
||||
logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled")
|
||||
return None, nullcontext(None)
|
||||
except Exception as exc:
|
||||
logger.debug("LLMRouter init failed: %s", exc)
|
||||
return None, nullcontext(None)
|
||||
if cf_orch_url:
|
||||
# Tier 1: task-based routing — coordinator owns model selection.
|
||||
if _HAS_TASK_INFERENCE:
|
||||
try:
|
||||
ctx = task_allocate(
|
||||
"kiwi", "meal_plan",
|
||||
service_hint=_SERVICE_TYPE,
|
||||
ttl_s=_TTL_S,
|
||||
)
|
||||
alloc = ctx.__enter__()
|
||||
return _OrchTextRouter(alloc.url), ctx
|
||||
except TaskNotRegistered:
|
||||
logger.debug(
|
||||
"kiwi.meal_plan not in coordinator assignments — "
|
||||
"falling back to direct cf-text allocation"
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("task allocation failed, trying direct allocate: %s", exc)
|
||||
|
||||
# Tier 2: direct allocation — hardcoded service type.
|
||||
if CFOrchClient is not None:
|
||||
try:
|
||||
client = CFOrchClient(cf_orch_url)
|
||||
ctx = client.allocate(
|
||||
service=_SERVICE_TYPE,
|
||||
ttl_s=_TTL_S,
|
||||
caller=_CALLER,
|
||||
)
|
||||
alloc = ctx.__enter__()
|
||||
if alloc is not None:
|
||||
return _OrchTextRouter(alloc.url), ctx
|
||||
except Exception as exc:
|
||||
logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc)
|
||||
|
||||
# Tier 3: local inference — ollama / vllm / openai-compat.
|
||||
if LLMRouter is not None:
|
||||
try:
|
||||
return LLMRouter(), nullcontext(None)
|
||||
except FileNotFoundError:
|
||||
logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled")
|
||||
return None, nullcontext(None)
|
||||
except Exception as exc:
|
||||
logger.debug("LLMRouter init failed: %s", exc)
|
||||
return None, nullcontext(None)
|
||||
return None, nullcontext(None)
|
||||
|
|
|
|||
82
tests/services/test_llm_router_task.py
Normal file
82
tests/services/test_llm_router_task.py
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
"""Tests for task-based routing added to get_meal_plan_router()."""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _make_task_ctx(url: str = "http://node:8080") -> MagicMock:
|
||||
"""Mock context manager returned by task_allocate()."""
|
||||
alloc = MagicMock()
|
||||
alloc.url = url
|
||||
alloc.allocation_id = "alloc-task-1"
|
||||
alloc.service = "cf-text"
|
||||
ctx = MagicMock()
|
||||
ctx.__enter__ = MagicMock(return_value=alloc)
|
||||
ctx.__exit__ = MagicMock(return_value=False)
|
||||
return ctx
|
||||
|
||||
|
||||
def _make_task_ctx_not_registered() -> MagicMock:
|
||||
"""Mock context manager that raises TaskNotRegistered on enter."""
|
||||
from app.services.task_inference import TaskNotRegistered
|
||||
ctx = MagicMock()
|
||||
ctx.__enter__ = MagicMock(side_effect=TaskNotRegistered("not registered"))
|
||||
ctx.__exit__ = MagicMock(return_value=False)
|
||||
return ctx
|
||||
|
||||
|
||||
def _make_direct_alloc_ctx(url: str = "http://node:8080") -> MagicMock:
|
||||
"""Mock context manager returned by CFOrchClient.allocate()."""
|
||||
alloc = MagicMock()
|
||||
alloc.url = url
|
||||
ctx = MagicMock()
|
||||
ctx.__enter__ = MagicMock(return_value=alloc)
|
||||
ctx.__exit__ = MagicMock(return_value=False)
|
||||
return ctx
|
||||
|
||||
|
||||
def test_task_path_returns_orch_router_on_success(monkeypatch):
|
||||
"""get_meal_plan_router() returns _OrchTextRouter when task allocation succeeds."""
|
||||
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||
import unittest.mock as um
|
||||
# Patch the name as it exists in llm_router's own namespace (module-level import).
|
||||
with um.patch("app.services.meal_plan.llm_router.task_allocate",
|
||||
return_value=_make_task_ctx(url="http://node:9001")):
|
||||
from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter
|
||||
router, ctx = get_meal_plan_router()
|
||||
|
||||
assert isinstance(router, _OrchTextRouter)
|
||||
assert router._base_url == "http://node:9001"
|
||||
|
||||
|
||||
def test_task_not_registered_falls_back_to_direct_allocate(monkeypatch):
|
||||
"""get_meal_plan_router() falls back to direct cf-text allocation on TaskNotRegistered."""
|
||||
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||
direct_ctx = _make_direct_alloc_ctx(url="http://node:9002")
|
||||
|
||||
import unittest.mock as um
|
||||
# Patch task_allocate in llm_router's namespace so TaskNotRegistered is raised.
|
||||
with um.patch("app.services.meal_plan.llm_router.task_allocate",
|
||||
return_value=_make_task_ctx_not_registered()), \
|
||||
um.patch("app.services.meal_plan.llm_router.CFOrchClient") as MockClient:
|
||||
MockClient.return_value.allocate.return_value = direct_ctx
|
||||
from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter
|
||||
router, ctx = get_meal_plan_router()
|
||||
|
||||
assert isinstance(router, _OrchTextRouter)
|
||||
assert router._base_url == "http://node:9002"
|
||||
|
||||
|
||||
def test_no_cf_orch_url_returns_llm_router(monkeypatch):
|
||||
"""get_meal_plan_router() returns LLMRouter when CF_ORCH_URL is not set."""
|
||||
monkeypatch.delenv("CF_ORCH_URL", raising=False)
|
||||
|
||||
import unittest.mock as um
|
||||
mock_lr = MagicMock()
|
||||
with um.patch("app.services.meal_plan.llm_router.LLMRouter", return_value=mock_lr):
|
||||
from app.services.meal_plan.llm_router import get_meal_plan_router
|
||||
router, ctx = get_meal_plan_router()
|
||||
|
||||
assert router is mock_lr
|
||||
Loading…
Reference in a new issue