From eba536070cdbecc2b5cd0f016ba179392420056a Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sun, 19 Apr 2026 20:24:21 -0700 Subject: [PATCH] fix(recipe): fail fast on cf-orch 429 instead of slow LLMRouter fallback When the coordinator returns 429 (all nodes at max_concurrent limit), the previous code fell back to LLMRouter which is also overloaded at high concurrency. This caused the request to hang for ~60s before nginx returned a 504. Now: detect 429/max_concurrent in the RuntimeError message and return "" immediately so the caller gets an empty RecipeResult (graceful degradation) rather than a timeout. --- app/services/recipe/llm_recipe.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/app/services/recipe/llm_recipe.py b/app/services/recipe/llm_recipe.py index 51cbc24..00afb0c 100644 --- a/app/services/recipe/llm_recipe.py +++ b/app/services/recipe/llm_recipe.py @@ -181,6 +181,19 @@ class LLMRecipeGenerator: try: alloc = ctx.__enter__() except Exception as exc: + msg = str(exc) + # 429 = coordinator at capacity (all nodes at max_concurrent limit). + # Don't fall back to LLMRouter — it's also overloaded and the slow + # fallback causes nginx 504s. Return "" fast so the caller degrades + # gracefully (empty recipe result) rather than timing out. + if "429" in msg or "max_concurrent" in msg.lower(): + logger.info("cf-orch at capacity — returning empty result (graceful degradation)") + if ctx is not None: + try: + ctx.__exit__(None, None, None) + except Exception: + pass + return "" logger.debug("cf-orch allocation failed, falling back to LLMRouter: %s", exc) ctx = None # __enter__ raised — do not call __exit__