fix: wire recipe engine to cf-text service instead of vllm
Aligns llm_recipe.py with the pattern already used by the meal plan service. cf-text routes through a lighter GGUF/llama.cpp path and shares VRAM budget with other products via cf-orch, rather than requiring a dedicated vLLM process. Also drops model_candidates (not applicable to cf-text allocation). Closes #70
This commit is contained in:
parent
64a0abebe3
commit
443e68ba3f
1 changed files with 7 additions and 6 deletions
|
|
@ -143,12 +143,14 @@ class LLMRecipeGenerator:
|
||||||
|
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
_MODEL_CANDIDATES: list[str] = ["Ouro-2.6B-Thinking", "Ouro-1.4B"]
|
_SERVICE_TYPE = "cf-text"
|
||||||
|
_TTL_S = 300.0
|
||||||
|
_CALLER = "kiwi-recipe"
|
||||||
|
|
||||||
def _get_llm_context(self):
|
def _get_llm_context(self):
|
||||||
"""Return a sync context manager that yields an Allocation or None.
|
"""Return a sync context manager that yields an Allocation or None.
|
||||||
|
|
||||||
When CF_ORCH_URL is set, uses CFOrchClient to acquire a vLLM allocation
|
When CF_ORCH_URL is set, uses CFOrchClient to acquire a cf-text allocation
|
||||||
(which handles service lifecycle and VRAM). Falls back to nullcontext(None)
|
(which handles service lifecycle and VRAM). Falls back to nullcontext(None)
|
||||||
when the env var is absent or CFOrchClient raises on construction.
|
when the env var is absent or CFOrchClient raises on construction.
|
||||||
"""
|
"""
|
||||||
|
|
@ -158,10 +160,9 @@ class LLMRecipeGenerator:
|
||||||
from circuitforge_orch.client import CFOrchClient
|
from circuitforge_orch.client import CFOrchClient
|
||||||
client = CFOrchClient(cf_orch_url)
|
client = CFOrchClient(cf_orch_url)
|
||||||
return client.allocate(
|
return client.allocate(
|
||||||
service="vllm",
|
service=self._SERVICE_TYPE,
|
||||||
model_candidates=self._MODEL_CANDIDATES,
|
ttl_s=self._TTL_S,
|
||||||
ttl_s=300.0,
|
caller=self._CALLER,
|
||||||
caller="kiwi-recipe",
|
|
||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("CFOrchClient init failed, falling back to direct URL: %s", exc)
|
logger.debug("CFOrchClient init failed, falling back to direct URL: %s", exc)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue