From eba536070cdbecc2b5cd0f016ba179392420056a Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Sun, 19 Apr 2026 20:24:21 -0700
Subject: [PATCH] fix(recipe): fail fast on cf-orch 429 instead of slow
 LLMRouter fallback

When the coordinator returns 429 (all nodes at max_concurrent limit), the previous
code fell back to LLMRouter which is also overloaded at high concurrency. This
caused the request to hang for ~60s before nginx returned a 504.

Now: detect 429/max_concurrent in the RuntimeError message and return "" immediately
so the caller gets an empty RecipeResult (graceful degradation) rather than a timeout.
---
 app/services/recipe/llm_recipe.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/app/services/recipe/llm_recipe.py b/app/services/recipe/llm_recipe.py
index 51cbc24..00afb0c 100644
--- a/app/services/recipe/llm_recipe.py
+++ b/app/services/recipe/llm_recipe.py
@@ -181,6 +181,19 @@ class LLMRecipeGenerator:
         try:
             alloc = ctx.__enter__()
         except Exception as exc:
+            msg = str(exc)
+            # 429 = coordinator at capacity (all nodes at max_concurrent limit).
+            # Don't fall back to LLMRouter — it's also overloaded and the slow
+            # fallback causes nginx 504s. Return "" fast so the caller degrades
+            # gracefully (empty recipe result) rather than timing out.
+            if "429" in msg or "max_concurrent" in msg.lower():
+                logger.info("cf-orch at capacity — returning empty result (graceful degradation)")
+                if ctx is not None:
+                    try:
+                        ctx.__exit__(None, None, None)
+                    except Exception:
+                        pass
+                return ""
             logger.debug("cf-orch allocation failed, falling back to LLMRouter: %s", exc)
             ctx = None  # __enter__ raised — do not call __exit__