fix: wire recipe engine to cf-text service instead of vllm

Aligns llm_recipe.py with the pattern already used by the meal plan service. cf-text routes through a lighter GGUF/llama.cpp path and shares VRAM budget with other products via cf-orch, rather than requiring a dedicated vLLM process. Also drops model_candidates (not applicable to cf-text allocation). Closes #70
2026-04-16 06:25:46 -07:00 · 2026-04-16 06:25:46 -07:00 · 443e68ba3f
commit 443e68ba3f
parent 64a0abebe3
1 changed files with 7 additions and 6 deletions
--- a/app/services/recipe/llm_recipe.py
+++ b/app/services/recipe/llm_recipe.py
@ -143,12 +143,14 @@ class LLMRecipeGenerator:

        return "\n".join(lines)

-    _MODEL_CANDIDATES: list[str] = ["Ouro-2.6B-Thinking", "Ouro-1.4B"]
+    _SERVICE_TYPE = "cf-text"
+    _TTL_S = 300.0
+    _CALLER = "kiwi-recipe"

    def _get_llm_context(self):
        """Return a sync context manager that yields an Allocation or None.

-        When CF_ORCH_URL is set, uses CFOrchClient to acquire a vLLM allocation
+        When CF_ORCH_URL is set, uses CFOrchClient to acquire a cf-text allocation
        (which handles service lifecycle and VRAM). Falls back to nullcontext(None)
        when the env var is absent or CFOrchClient raises on construction.
        """
@ -158,10 +160,9 @@ class LLMRecipeGenerator:
                from circuitforge_orch.client import CFOrchClient
                client = CFOrchClient(cf_orch_url)
                return client.allocate(
-                    service="vllm",
-                    model_candidates=self._MODEL_CANDIDATES,
-                    ttl_s=300.0,
-                    caller="kiwi-recipe",
+                    service=self._SERVICE_TYPE,
+                    ttl_s=self._TTL_S,
+                    caller=self._CALLER,
                )
            except Exception as exc:
                logger.debug("CFOrchClient init failed, falling back to direct URL: %s", exc)