diff --git a/app/services/recipe/llm_recipe.py b/app/services/recipe/llm_recipe.py index 18ee718..76c59e1 100644 --- a/app/services/recipe/llm_recipe.py +++ b/app/services/recipe/llm_recipe.py @@ -149,7 +149,8 @@ class LLMRecipeGenerator: return "\n".join(lines) - _SERVICE_TYPE = "cf-text" + _SERVICE_TYPE = "vllm" + _MODEL_CANDIDATES = ["Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"] _TTL_S = 300.0 _CALLER = "kiwi-recipe" @@ -167,8 +168,10 @@ class LLMRecipeGenerator: client = CFOrchClient(cf_orch_url) return client.allocate( service=self._SERVICE_TYPE, + model_candidates=self._MODEL_CANDIDATES, ttl_s=self._TTL_S, caller=self._CALLER, + pipeline=os.environ.get("CF_APP_NAME") or None, ) except Exception as exc: logger.debug("CFOrchClient init failed, falling back to direct URL: %s", exc) diff --git a/compose.cloud.yml b/compose.cloud.yml index 757578a..74a2b6f 100644 --- a/compose.cloud.yml +++ b/compose.cloud.yml @@ -21,6 +21,8 @@ services: CLOUD_AUTH_BYPASS_IPS: ${CLOUD_AUTH_BYPASS_IPS:-} # cf-orch: route LLM calls through the coordinator for managed GPU inference CF_ORCH_URL: http://host.docker.internal:7700 + # Product identifier for coordinator analytics — per-product VRAM/request breakdown + CF_APP_NAME: kiwi # Community PostgreSQL — shared across CF products; unset = community features unavailable (fail soft) COMMUNITY_DB_URL: ${COMMUNITY_DB_URL:-} COMMUNITY_PSEUDONYM_SALT: ${COMMUNITY_PSEUDONYM_SALT:-}