From 02abc8e73418e5b5e037196da0b0ace461d73a14 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Wed, 13 May 2026 10:32:58 -0700 Subject: [PATCH] feat(orch): migrate meal plan LLM routing to task-based allocation with direct-allocate fallback Replaces single-path cf-orch allocation with a three-tier strategy: tier 1 task_allocate() (coordinator-driven), tier 2 direct CFOrchClient.allocate() (TaskNotRegistered fallback), tier 3 local LLMRouter. Module-level imports for CFOrchClient and LLMRouter make all three paths patchable in tests without import caching issues. --- app/services/meal_plan/llm_router.py | 113 +++++++++++++++++-------- tests/services/test_llm_router_task.py | 82 ++++++++++++++++++ 2 files changed, 161 insertions(+), 34 deletions(-) create mode 100644 tests/services/test_llm_router_task.py diff --git a/app/services/meal_plan/llm_router.py b/app/services/meal_plan/llm_router.py index 4475b52..07dde41 100644 --- a/app/services/meal_plan/llm_router.py +++ b/app/services/meal_plan/llm_router.py @@ -2,17 +2,20 @@ # BSL 1.1 — LLM feature """Provide a router-compatible LLM client for meal plan generation tasks. -Cloud (CF_ORCH_URL set): - Allocates a cf-text service via cf-orch (3B-7B GGUF, ~2GB VRAM). - Returns an _OrchTextRouter that wraps the cf-text HTTP endpoint - with a .complete(system, user, **kwargs) interface. +Cloud (CF_ORCH_URL set), tier 1 — task-based routing (preferred): + Calls /api/inference/task with product=kiwi, task=meal_plan. + The coordinator resolves the model from assignments.yaml. + +Cloud (CF_ORCH_URL set), tier 2 — direct allocation (fallback): + Allocates cf-text directly via client.allocate(). Used when the task + is not yet registered in the coordinator (cf-orch#61 not deployed). Local / self-hosted (no CF_ORCH_URL): Returns an LLMRouter instance which tries ollama, vllm, or any backend configured in ~/.config/circuitforge/llm.yaml. -Both paths expose the same interface so llm_timing.py and llm_planner.py -need no knowledge of the backend. +All paths expose the same (router, ctx) interface so llm_planner.py +needs no knowledge of the backend. """ from __future__ import annotations @@ -22,8 +25,7 @@ from contextlib import nullcontext logger = logging.getLogger(__name__) -# cf-orch service name and VRAM budget for meal plan LLM tasks. -# These are lighter than recipe_llm (4.0 GB) — cf-text handles them. +# cf-orch service name and TTL for direct-allocate fallback path. _SERVICE_TYPE = "cf-text" _TTL_S = 120.0 _CALLER = "kiwi-meal-plan" @@ -62,35 +64,78 @@ class _OrchTextRouter: return resp.choices[0].message.content or "" +# Imported at module level so tests can patch the names in this module's namespace. +# app.services.task_inference.task_allocate — patch target for task routing tests. +try: + from app.services.task_inference import TaskNotRegistered, task_allocate + _HAS_TASK_INFERENCE = True +except ImportError: + _HAS_TASK_INFERENCE = False + +# circuitforge_orch.client.CFOrchClient — patch target for direct-allocate fallback tests. +try: + from circuitforge_orch.client import CFOrchClient +except ImportError: + CFOrchClient = None # type: ignore[assignment,misc] + +# circuitforge_core.llm.router.LLMRouter — patch target for local-inference tests. +try: + from circuitforge_core.llm.router import LLMRouter +except (ImportError, FileNotFoundError): + LLMRouter = None # type: ignore[assignment,misc] + + def get_meal_plan_router(): """Return an LLM client for meal plan tasks. - Tries cf-orch cf-text allocation first (cloud); falls back to LLMRouter - (local ollama/vllm). Returns None if no backend is available. + Returns (router, ctx) where ctx is a context manager the caller holds + open for the duration of the LLM call. Returns (None, nullcontext(None)) + if no backend is available. """ cf_orch_url = os.environ.get("CF_ORCH_URL") - if cf_orch_url: - try: - from circuitforge_orch.client import CFOrchClient - client = CFOrchClient(cf_orch_url) - ctx = client.allocate( - service=_SERVICE_TYPE, - ttl_s=_TTL_S, - caller=_CALLER, - ) - alloc = ctx.__enter__() - if alloc is not None: - return _OrchTextRouter(alloc.url), ctx - except Exception as exc: - logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc) - # Local fallback: LLMRouter (ollama / vllm / openai-compat) - try: - from circuitforge_core.llm.router import LLMRouter - return LLMRouter(), nullcontext(None) - except FileNotFoundError: - logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled") - return None, nullcontext(None) - except Exception as exc: - logger.debug("LLMRouter init failed: %s", exc) - return None, nullcontext(None) + if cf_orch_url: + # Tier 1: task-based routing — coordinator owns model selection. + if _HAS_TASK_INFERENCE: + try: + ctx = task_allocate( + "kiwi", "meal_plan", + service_hint=_SERVICE_TYPE, + ttl_s=_TTL_S, + ) + alloc = ctx.__enter__() + return _OrchTextRouter(alloc.url), ctx + except TaskNotRegistered: + logger.debug( + "kiwi.meal_plan not in coordinator assignments — " + "falling back to direct cf-text allocation" + ) + except Exception as exc: + logger.debug("task allocation failed, trying direct allocate: %s", exc) + + # Tier 2: direct allocation — hardcoded service type. + if CFOrchClient is not None: + try: + client = CFOrchClient(cf_orch_url) + ctx = client.allocate( + service=_SERVICE_TYPE, + ttl_s=_TTL_S, + caller=_CALLER, + ) + alloc = ctx.__enter__() + if alloc is not None: + return _OrchTextRouter(alloc.url), ctx + except Exception as exc: + logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc) + + # Tier 3: local inference — ollama / vllm / openai-compat. + if LLMRouter is not None: + try: + return LLMRouter(), nullcontext(None) + except FileNotFoundError: + logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled") + return None, nullcontext(None) + except Exception as exc: + logger.debug("LLMRouter init failed: %s", exc) + return None, nullcontext(None) + return None, nullcontext(None) diff --git a/tests/services/test_llm_router_task.py b/tests/services/test_llm_router_task.py new file mode 100644 index 0000000..a261343 --- /dev/null +++ b/tests/services/test_llm_router_task.py @@ -0,0 +1,82 @@ +"""Tests for task-based routing added to get_meal_plan_router().""" +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest + + +def _make_task_ctx(url: str = "http://node:8080") -> MagicMock: + """Mock context manager returned by task_allocate().""" + alloc = MagicMock() + alloc.url = url + alloc.allocation_id = "alloc-task-1" + alloc.service = "cf-text" + ctx = MagicMock() + ctx.__enter__ = MagicMock(return_value=alloc) + ctx.__exit__ = MagicMock(return_value=False) + return ctx + + +def _make_task_ctx_not_registered() -> MagicMock: + """Mock context manager that raises TaskNotRegistered on enter.""" + from app.services.task_inference import TaskNotRegistered + ctx = MagicMock() + ctx.__enter__ = MagicMock(side_effect=TaskNotRegistered("not registered")) + ctx.__exit__ = MagicMock(return_value=False) + return ctx + + +def _make_direct_alloc_ctx(url: str = "http://node:8080") -> MagicMock: + """Mock context manager returned by CFOrchClient.allocate().""" + alloc = MagicMock() + alloc.url = url + ctx = MagicMock() + ctx.__enter__ = MagicMock(return_value=alloc) + ctx.__exit__ = MagicMock(return_value=False) + return ctx + + +def test_task_path_returns_orch_router_on_success(monkeypatch): + """get_meal_plan_router() returns _OrchTextRouter when task allocation succeeds.""" + monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700") + import unittest.mock as um + # Patch the name as it exists in llm_router's own namespace (module-level import). + with um.patch("app.services.meal_plan.llm_router.task_allocate", + return_value=_make_task_ctx(url="http://node:9001")): + from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter + router, ctx = get_meal_plan_router() + + assert isinstance(router, _OrchTextRouter) + assert router._base_url == "http://node:9001" + + +def test_task_not_registered_falls_back_to_direct_allocate(monkeypatch): + """get_meal_plan_router() falls back to direct cf-text allocation on TaskNotRegistered.""" + monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700") + direct_ctx = _make_direct_alloc_ctx(url="http://node:9002") + + import unittest.mock as um + # Patch task_allocate in llm_router's namespace so TaskNotRegistered is raised. + with um.patch("app.services.meal_plan.llm_router.task_allocate", + return_value=_make_task_ctx_not_registered()), \ + um.patch("app.services.meal_plan.llm_router.CFOrchClient") as MockClient: + MockClient.return_value.allocate.return_value = direct_ctx + from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter + router, ctx = get_meal_plan_router() + + assert isinstance(router, _OrchTextRouter) + assert router._base_url == "http://node:9002" + + +def test_no_cf_orch_url_returns_llm_router(monkeypatch): + """get_meal_plan_router() returns LLMRouter when CF_ORCH_URL is not set.""" + monkeypatch.delenv("CF_ORCH_URL", raising=False) + + import unittest.mock as um + mock_lr = MagicMock() + with um.patch("app.services.meal_plan.llm_router.LLMRouter", return_value=mock_lr): + from app.services.meal_plan.llm_router import get_meal_plan_router + router, ctx = get_meal_plan_router() + + assert router is mock_lr