fix: wire recipe engine to cf-text service instead of vllm

Aligns llm_recipe.py with the pattern already used by the meal plan
service. cf-text routes through a lighter GGUF/llama.cpp path and
shares VRAM budget with other products via cf-orch, rather than
requiring a dedicated vLLM process. Also drops model_candidates
(not applicable to cf-text allocation).

Closes #70
This commit is contained in:
pyr0ball 2026-04-16 06:25:46 -07:00
parent 64a0abebe3
commit 443e68ba3f

View file

@ -143,12 +143,14 @@ class LLMRecipeGenerator:
return "\n".join(lines) return "\n".join(lines)
_MODEL_CANDIDATES: list[str] = ["Ouro-2.6B-Thinking", "Ouro-1.4B"] _SERVICE_TYPE = "cf-text"
_TTL_S = 300.0
_CALLER = "kiwi-recipe"
def _get_llm_context(self): def _get_llm_context(self):
"""Return a sync context manager that yields an Allocation or None. """Return a sync context manager that yields an Allocation or None.
When CF_ORCH_URL is set, uses CFOrchClient to acquire a vLLM allocation When CF_ORCH_URL is set, uses CFOrchClient to acquire a cf-text allocation
(which handles service lifecycle and VRAM). Falls back to nullcontext(None) (which handles service lifecycle and VRAM). Falls back to nullcontext(None)
when the env var is absent or CFOrchClient raises on construction. when the env var is absent or CFOrchClient raises on construction.
""" """
@ -158,10 +160,9 @@ class LLMRecipeGenerator:
from circuitforge_orch.client import CFOrchClient from circuitforge_orch.client import CFOrchClient
client = CFOrchClient(cf_orch_url) client = CFOrchClient(cf_orch_url)
return client.allocate( return client.allocate(
service="vllm", service=self._SERVICE_TYPE,
model_candidates=self._MODEL_CANDIDATES, ttl_s=self._TTL_S,
ttl_s=300.0, caller=self._CALLER,
caller="kiwi-recipe",
) )
except Exception as exc: except Exception as exc:
logger.debug("CFOrchClient init failed, falling back to direct URL: %s", exc) logger.debug("CFOrchClient init failed, falling back to direct URL: %s", exc)