From 7a7eae466645ef2f8632c05b042d1d695f017567 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Tue, 21 Apr 2026 15:05:38 -0700 Subject: [PATCH] chore(cf-orch): route recipe LLM calls through vllm with model candidates + CF_APP_NAME Switches recipe generation service type from 'cf-text' to 'vllm' so the coordinator can route to quantized small models (Qwen2.5-3B, Phi-4-mini) rather than the full text backend. Passes CF_APP_NAME for per-product VRAM/request analytics in the coordinator dashboard. - llm_recipe.py: _SERVICE_TYPE = 'vllm'; _MODEL_CANDIDATES list; passes model_candidates and pipeline= to CFOrchClient.allocate() - compose.cloud.yml: CF_APP_NAME=kiwi env var for coordinator attribution --- app/services/recipe/llm_recipe.py | 5 ++++- compose.cloud.yml | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/app/services/recipe/llm_recipe.py b/app/services/recipe/llm_recipe.py index 18ee718..76c59e1 100644 --- a/app/services/recipe/llm_recipe.py +++ b/app/services/recipe/llm_recipe.py @@ -149,7 +149,8 @@ class LLMRecipeGenerator: return "\n".join(lines) - _SERVICE_TYPE = "cf-text" + _SERVICE_TYPE = "vllm" + _MODEL_CANDIDATES = ["Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"] _TTL_S = 300.0 _CALLER = "kiwi-recipe" @@ -167,8 +168,10 @@ class LLMRecipeGenerator: client = CFOrchClient(cf_orch_url) return client.allocate( service=self._SERVICE_TYPE, + model_candidates=self._MODEL_CANDIDATES, ttl_s=self._TTL_S, caller=self._CALLER, + pipeline=os.environ.get("CF_APP_NAME") or None, ) except Exception as exc: logger.debug("CFOrchClient init failed, falling back to direct URL: %s", exc) diff --git a/compose.cloud.yml b/compose.cloud.yml index 757578a..74a2b6f 100644 --- a/compose.cloud.yml +++ b/compose.cloud.yml @@ -21,6 +21,8 @@ services: CLOUD_AUTH_BYPASS_IPS: ${CLOUD_AUTH_BYPASS_IPS:-} # cf-orch: route LLM calls through the coordinator for managed GPU inference CF_ORCH_URL: http://host.docker.internal:7700 + # Product identifier for coordinator analytics — per-product VRAM/request breakdown + CF_APP_NAME: kiwi # Community PostgreSQL — shared across CF products; unset = community features unavailable (fail soft) COMMUNITY_DB_URL: ${COMMUNITY_DB_URL:-} COMMUNITY_PSEUDONYM_SALT: ${COMMUNITY_PSEUDONYM_SALT:-}