feat(services/bsl): add llm_router — cf-text via cf-orch on cloud, LLMRouter (ollama/vllm) local fallback

refs kiwi#68
2026-04-12 14:07:13 -07:00 · 2026-04-12 14:07:13 -07:00 · 4281b0ce19
commit 4281b0ce19
parent f54127a8cc
1 changed files with 96 additions and 0 deletions
--- a/app/services/meal_plan/llm_router.py
+++ b/app/services/meal_plan/llm_router.py
@ -0,0 +1,96 @@
 # app/services/meal_plan/llm_router.py
 # BSL 1.1 — LLM feature
 """Provide a router-compatible LLM client for meal plan generation tasks.
 Cloud (CF_ORCH_URL set):
  Allocates a cf-text service via cf-orch (3B-7B GGUF, ~2GB VRAM).
  Returns an _OrchTextRouter that wraps the cf-text HTTP endpoint
  with a .complete(system, user, **kwargs) interface.
 Local / self-hosted (no CF_ORCH_URL):
  Returns an LLMRouter instance which tries ollama, vllm, or any
  backend configured in ~/.config/circuitforge/llm.yaml.
 Both paths expose the same interface so llm_timing.py and llm_planner.py
 need no knowledge of the backend.
 """
 from __future__ import annotations
 import logging
 import os
 from contextlib import nullcontext
 logger = logging.getLogger(__name__)
 # cf-orch service name and VRAM budget for meal plan LLM tasks.
 # These are lighter than recipe_llm (4.0 GB) — cf-text handles them.
 _SERVICE_TYPE = "cf-text"
 _TTL_S = 120.0
 _CALLER = "kiwi-meal-plan"
 class _OrchTextRouter:
    """Thin adapter that makes a cf-text HTTP endpoint look like LLMRouter."""
    def __init__(self, base_url: str) -> None:
        self._base_url = base_url.rstrip("/")
    def complete(
        self,
        system: str = "",
        user: str = "",
        max_tokens: int = 512,
        temperature: float = 0.7,
        **_kwargs,
    ) -> str:
        from openai import OpenAI
        client = OpenAI(base_url=self._base_url + "/v1", api_key="any")
        messages = []
        if system:
            messages.append({"role": "system", "content": system})
        messages.append({"role": "user", "content": user})
        try:
            model = client.models.list().data[0].id
        except Exception:
            model = "local"
        resp = client.chat.completions.create(
            model=model,
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
        )
        return resp.choices[0].message.content or ""
 def get_meal_plan_router():
    """Return an LLM client for meal plan tasks.
    Tries cf-orch cf-text allocation first (cloud); falls back to LLMRouter
    (local ollama/vllm). Returns None if no backend is available.
    """
    cf_orch_url = os.environ.get("CF_ORCH_URL")
    if cf_orch_url:
        try:
            from circuitforge_orch.client import CFOrchClient
            client = CFOrchClient(cf_orch_url)
            ctx = client.allocate(
                service=_SERVICE_TYPE,
                ttl_s=_TTL_S,
                caller=_CALLER,
            )
            alloc = ctx.__enter__()
            if alloc is not None:
                return _OrchTextRouter(alloc.url), ctx
        except Exception as exc:
            logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc)
    # Local fallback: LLMRouter (ollama / vllm / openai-compat)
    try:
        from circuitforge_core.llm.router import LLMRouter
        return LLMRouter(), nullcontext(None)
    except FileNotFoundError:
        logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled")
        return None, nullcontext(None)
    except Exception as exc:
        logger.debug("LLMRouter init failed: %s", exc)
        return None, nullcontext(None)