feat(orch): migrate meal plan LLM routing to task-based allocation with direct-allocate fallback
Replaces single-path cf-orch allocation with a three-tier strategy: tier 1 task_allocate() (coordinator-driven), tier 2 direct CFOrchClient.allocate() (TaskNotRegistered fallback), tier 3 local LLMRouter. Module-level imports for CFOrchClient and LLMRouter make all three paths patchable in tests without import caching issues.
This commit is contained in:
parent
61c428baf0
commit
02abc8e734
2 changed files with 161 additions and 34 deletions
|
|
@ -2,17 +2,20 @@
|
||||||
# BSL 1.1 — LLM feature
|
# BSL 1.1 — LLM feature
|
||||||
"""Provide a router-compatible LLM client for meal plan generation tasks.
|
"""Provide a router-compatible LLM client for meal plan generation tasks.
|
||||||
|
|
||||||
Cloud (CF_ORCH_URL set):
|
Cloud (CF_ORCH_URL set), tier 1 — task-based routing (preferred):
|
||||||
Allocates a cf-text service via cf-orch (3B-7B GGUF, ~2GB VRAM).
|
Calls /api/inference/task with product=kiwi, task=meal_plan.
|
||||||
Returns an _OrchTextRouter that wraps the cf-text HTTP endpoint
|
The coordinator resolves the model from assignments.yaml.
|
||||||
with a .complete(system, user, **kwargs) interface.
|
|
||||||
|
Cloud (CF_ORCH_URL set), tier 2 — direct allocation (fallback):
|
||||||
|
Allocates cf-text directly via client.allocate(). Used when the task
|
||||||
|
is not yet registered in the coordinator (cf-orch#61 not deployed).
|
||||||
|
|
||||||
Local / self-hosted (no CF_ORCH_URL):
|
Local / self-hosted (no CF_ORCH_URL):
|
||||||
Returns an LLMRouter instance which tries ollama, vllm, or any
|
Returns an LLMRouter instance which tries ollama, vllm, or any
|
||||||
backend configured in ~/.config/circuitforge/llm.yaml.
|
backend configured in ~/.config/circuitforge/llm.yaml.
|
||||||
|
|
||||||
Both paths expose the same interface so llm_timing.py and llm_planner.py
|
All paths expose the same (router, ctx) interface so llm_planner.py
|
||||||
need no knowledge of the backend.
|
needs no knowledge of the backend.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
@ -22,8 +25,7 @@ from contextlib import nullcontext
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# cf-orch service name and VRAM budget for meal plan LLM tasks.
|
# cf-orch service name and TTL for direct-allocate fallback path.
|
||||||
# These are lighter than recipe_llm (4.0 GB) — cf-text handles them.
|
|
||||||
_SERVICE_TYPE = "cf-text"
|
_SERVICE_TYPE = "cf-text"
|
||||||
_TTL_S = 120.0
|
_TTL_S = 120.0
|
||||||
_CALLER = "kiwi-meal-plan"
|
_CALLER = "kiwi-meal-plan"
|
||||||
|
|
@ -62,16 +64,58 @@ class _OrchTextRouter:
|
||||||
return resp.choices[0].message.content or ""
|
return resp.choices[0].message.content or ""
|
||||||
|
|
||||||
|
|
||||||
|
# Imported at module level so tests can patch the names in this module's namespace.
|
||||||
|
# app.services.task_inference.task_allocate — patch target for task routing tests.
|
||||||
|
try:
|
||||||
|
from app.services.task_inference import TaskNotRegistered, task_allocate
|
||||||
|
_HAS_TASK_INFERENCE = True
|
||||||
|
except ImportError:
|
||||||
|
_HAS_TASK_INFERENCE = False
|
||||||
|
|
||||||
|
# circuitforge_orch.client.CFOrchClient — patch target for direct-allocate fallback tests.
|
||||||
|
try:
|
||||||
|
from circuitforge_orch.client import CFOrchClient
|
||||||
|
except ImportError:
|
||||||
|
CFOrchClient = None # type: ignore[assignment,misc]
|
||||||
|
|
||||||
|
# circuitforge_core.llm.router.LLMRouter — patch target for local-inference tests.
|
||||||
|
try:
|
||||||
|
from circuitforge_core.llm.router import LLMRouter
|
||||||
|
except (ImportError, FileNotFoundError):
|
||||||
|
LLMRouter = None # type: ignore[assignment,misc]
|
||||||
|
|
||||||
|
|
||||||
def get_meal_plan_router():
|
def get_meal_plan_router():
|
||||||
"""Return an LLM client for meal plan tasks.
|
"""Return an LLM client for meal plan tasks.
|
||||||
|
|
||||||
Tries cf-orch cf-text allocation first (cloud); falls back to LLMRouter
|
Returns (router, ctx) where ctx is a context manager the caller holds
|
||||||
(local ollama/vllm). Returns None if no backend is available.
|
open for the duration of the LLM call. Returns (None, nullcontext(None))
|
||||||
|
if no backend is available.
|
||||||
"""
|
"""
|
||||||
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
||||||
|
|
||||||
if cf_orch_url:
|
if cf_orch_url:
|
||||||
|
# Tier 1: task-based routing — coordinator owns model selection.
|
||||||
|
if _HAS_TASK_INFERENCE:
|
||||||
|
try:
|
||||||
|
ctx = task_allocate(
|
||||||
|
"kiwi", "meal_plan",
|
||||||
|
service_hint=_SERVICE_TYPE,
|
||||||
|
ttl_s=_TTL_S,
|
||||||
|
)
|
||||||
|
alloc = ctx.__enter__()
|
||||||
|
return _OrchTextRouter(alloc.url), ctx
|
||||||
|
except TaskNotRegistered:
|
||||||
|
logger.debug(
|
||||||
|
"kiwi.meal_plan not in coordinator assignments — "
|
||||||
|
"falling back to direct cf-text allocation"
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("task allocation failed, trying direct allocate: %s", exc)
|
||||||
|
|
||||||
|
# Tier 2: direct allocation — hardcoded service type.
|
||||||
|
if CFOrchClient is not None:
|
||||||
try:
|
try:
|
||||||
from circuitforge_orch.client import CFOrchClient
|
|
||||||
client = CFOrchClient(cf_orch_url)
|
client = CFOrchClient(cf_orch_url)
|
||||||
ctx = client.allocate(
|
ctx = client.allocate(
|
||||||
service=_SERVICE_TYPE,
|
service=_SERVICE_TYPE,
|
||||||
|
|
@ -84,9 +128,9 @@ def get_meal_plan_router():
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc)
|
logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc)
|
||||||
|
|
||||||
# Local fallback: LLMRouter (ollama / vllm / openai-compat)
|
# Tier 3: local inference — ollama / vllm / openai-compat.
|
||||||
|
if LLMRouter is not None:
|
||||||
try:
|
try:
|
||||||
from circuitforge_core.llm.router import LLMRouter
|
|
||||||
return LLMRouter(), nullcontext(None)
|
return LLMRouter(), nullcontext(None)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled")
|
logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled")
|
||||||
|
|
@ -94,3 +138,4 @@ def get_meal_plan_router():
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("LLMRouter init failed: %s", exc)
|
logger.debug("LLMRouter init failed: %s", exc)
|
||||||
return None, nullcontext(None)
|
return None, nullcontext(None)
|
||||||
|
return None, nullcontext(None)
|
||||||
|
|
|
||||||
82
tests/services/test_llm_router_task.py
Normal file
82
tests/services/test_llm_router_task.py
Normal file
|
|
@ -0,0 +1,82 @@
|
||||||
|
"""Tests for task-based routing added to get_meal_plan_router()."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _make_task_ctx(url: str = "http://node:8080") -> MagicMock:
|
||||||
|
"""Mock context manager returned by task_allocate()."""
|
||||||
|
alloc = MagicMock()
|
||||||
|
alloc.url = url
|
||||||
|
alloc.allocation_id = "alloc-task-1"
|
||||||
|
alloc.service = "cf-text"
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__enter__ = MagicMock(return_value=alloc)
|
||||||
|
ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def _make_task_ctx_not_registered() -> MagicMock:
|
||||||
|
"""Mock context manager that raises TaskNotRegistered on enter."""
|
||||||
|
from app.services.task_inference import TaskNotRegistered
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__enter__ = MagicMock(side_effect=TaskNotRegistered("not registered"))
|
||||||
|
ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def _make_direct_alloc_ctx(url: str = "http://node:8080") -> MagicMock:
|
||||||
|
"""Mock context manager returned by CFOrchClient.allocate()."""
|
||||||
|
alloc = MagicMock()
|
||||||
|
alloc.url = url
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__enter__ = MagicMock(return_value=alloc)
|
||||||
|
ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_path_returns_orch_router_on_success(monkeypatch):
|
||||||
|
"""get_meal_plan_router() returns _OrchTextRouter when task allocation succeeds."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
import unittest.mock as um
|
||||||
|
# Patch the name as it exists in llm_router's own namespace (module-level import).
|
||||||
|
with um.patch("app.services.meal_plan.llm_router.task_allocate",
|
||||||
|
return_value=_make_task_ctx(url="http://node:9001")):
|
||||||
|
from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter
|
||||||
|
router, ctx = get_meal_plan_router()
|
||||||
|
|
||||||
|
assert isinstance(router, _OrchTextRouter)
|
||||||
|
assert router._base_url == "http://node:9001"
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_not_registered_falls_back_to_direct_allocate(monkeypatch):
|
||||||
|
"""get_meal_plan_router() falls back to direct cf-text allocation on TaskNotRegistered."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
direct_ctx = _make_direct_alloc_ctx(url="http://node:9002")
|
||||||
|
|
||||||
|
import unittest.mock as um
|
||||||
|
# Patch task_allocate in llm_router's namespace so TaskNotRegistered is raised.
|
||||||
|
with um.patch("app.services.meal_plan.llm_router.task_allocate",
|
||||||
|
return_value=_make_task_ctx_not_registered()), \
|
||||||
|
um.patch("app.services.meal_plan.llm_router.CFOrchClient") as MockClient:
|
||||||
|
MockClient.return_value.allocate.return_value = direct_ctx
|
||||||
|
from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter
|
||||||
|
router, ctx = get_meal_plan_router()
|
||||||
|
|
||||||
|
assert isinstance(router, _OrchTextRouter)
|
||||||
|
assert router._base_url == "http://node:9002"
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_cf_orch_url_returns_llm_router(monkeypatch):
|
||||||
|
"""get_meal_plan_router() returns LLMRouter when CF_ORCH_URL is not set."""
|
||||||
|
monkeypatch.delenv("CF_ORCH_URL", raising=False)
|
||||||
|
|
||||||
|
import unittest.mock as um
|
||||||
|
mock_lr = MagicMock()
|
||||||
|
with um.patch("app.services.meal_plan.llm_router.LLMRouter", return_value=mock_lr):
|
||||||
|
from app.services.meal_plan.llm_router import get_meal_plan_router
|
||||||
|
router, ctx = get_meal_plan_router()
|
||||||
|
|
||||||
|
assert router is mock_lr
|
||||||
Loading…
Reference in a new issue