feat(services/bsl): add llm_router — cf-text via cf-orch on cloud, LLMRouter (ollama/vllm) local fallback
refs kiwi#68
This commit is contained in:
parent
f54127a8cc
commit
4281b0ce19
1 changed files with 96 additions and 0 deletions
96
app/services/meal_plan/llm_router.py
Normal file
96
app/services/meal_plan/llm_router.py
Normal file
|
|
@ -0,0 +1,96 @@
|
||||||
|
# app/services/meal_plan/llm_router.py
|
||||||
|
# BSL 1.1 — LLM feature
|
||||||
|
"""Provide a router-compatible LLM client for meal plan generation tasks.
|
||||||
|
|
||||||
|
Cloud (CF_ORCH_URL set):
|
||||||
|
Allocates a cf-text service via cf-orch (3B-7B GGUF, ~2GB VRAM).
|
||||||
|
Returns an _OrchTextRouter that wraps the cf-text HTTP endpoint
|
||||||
|
with a .complete(system, user, **kwargs) interface.
|
||||||
|
|
||||||
|
Local / self-hosted (no CF_ORCH_URL):
|
||||||
|
Returns an LLMRouter instance which tries ollama, vllm, or any
|
||||||
|
backend configured in ~/.config/circuitforge/llm.yaml.
|
||||||
|
|
||||||
|
Both paths expose the same interface so llm_timing.py and llm_planner.py
|
||||||
|
need no knowledge of the backend.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from contextlib import nullcontext
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# cf-orch service name and VRAM budget for meal plan LLM tasks.
|
||||||
|
# These are lighter than recipe_llm (4.0 GB) — cf-text handles them.
|
||||||
|
_SERVICE_TYPE = "cf-text"
|
||||||
|
_TTL_S = 120.0
|
||||||
|
_CALLER = "kiwi-meal-plan"
|
||||||
|
|
||||||
|
|
||||||
|
class _OrchTextRouter:
|
||||||
|
"""Thin adapter that makes a cf-text HTTP endpoint look like LLMRouter."""
|
||||||
|
|
||||||
|
def __init__(self, base_url: str) -> None:
|
||||||
|
self._base_url = base_url.rstrip("/")
|
||||||
|
|
||||||
|
def complete(
|
||||||
|
self,
|
||||||
|
system: str = "",
|
||||||
|
user: str = "",
|
||||||
|
max_tokens: int = 512,
|
||||||
|
temperature: float = 0.7,
|
||||||
|
**_kwargs,
|
||||||
|
) -> str:
|
||||||
|
from openai import OpenAI
|
||||||
|
client = OpenAI(base_url=self._base_url + "/v1", api_key="any")
|
||||||
|
messages = []
|
||||||
|
if system:
|
||||||
|
messages.append({"role": "system", "content": system})
|
||||||
|
messages.append({"role": "user", "content": user})
|
||||||
|
try:
|
||||||
|
model = client.models.list().data[0].id
|
||||||
|
except Exception:
|
||||||
|
model = "local"
|
||||||
|
resp = client.chat.completions.create(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
temperature=temperature,
|
||||||
|
)
|
||||||
|
return resp.choices[0].message.content or ""
|
||||||
|
|
||||||
|
|
||||||
|
def get_meal_plan_router():
|
||||||
|
"""Return an LLM client for meal plan tasks.
|
||||||
|
|
||||||
|
Tries cf-orch cf-text allocation first (cloud); falls back to LLMRouter
|
||||||
|
(local ollama/vllm). Returns None if no backend is available.
|
||||||
|
"""
|
||||||
|
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
||||||
|
if cf_orch_url:
|
||||||
|
try:
|
||||||
|
from circuitforge_orch.client import CFOrchClient
|
||||||
|
client = CFOrchClient(cf_orch_url)
|
||||||
|
ctx = client.allocate(
|
||||||
|
service=_SERVICE_TYPE,
|
||||||
|
ttl_s=_TTL_S,
|
||||||
|
caller=_CALLER,
|
||||||
|
)
|
||||||
|
alloc = ctx.__enter__()
|
||||||
|
if alloc is not None:
|
||||||
|
return _OrchTextRouter(alloc.url), ctx
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc)
|
||||||
|
|
||||||
|
# Local fallback: LLMRouter (ollama / vllm / openai-compat)
|
||||||
|
try:
|
||||||
|
from circuitforge_core.llm.router import LLMRouter
|
||||||
|
return LLMRouter(), nullcontext(None)
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled")
|
||||||
|
return None, nullcontext(None)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("LLMRouter init failed: %s", exc)
|
||||||
|
return None, nullcontext(None)
|
||||||
Loading…
Reference in a new issue