feat(llm): add cf_orch allocation support to LLMRouter backends

2026-04-02 12:19:17 -07:00 · 2026-04-02 12:19:17 -07:00 · 17a24173f7
commit 17a24173f7
parent f741e6a80b
1 changed files with 39 additions and 0 deletions
--- a/circuitforge_core/llm/router.py
+++ b/circuitforge_core/llm/router.py
@ -38,6 +38,33 @@ class LLMRouter:
        models = client.models.list()
        return models.data[0].id

+    def _try_cf_orch_alloc(self, backend: dict) -> "tuple | None":
+        """
+        If backend config has a cf_orch block and CF_ORCH_URL is set (env takes
+        precedence over yaml url), allocate via cf-orch and return (ctx, alloc).
+        Returns None if not configured or allocation fails.
+        Caller MUST call ctx.__exit__(None, None, None) in a finally block.
+        """
+        import os
+        orch_cfg = backend.get("cf_orch")
+        if not orch_cfg:
+            return None
+        orch_url = os.environ.get("CF_ORCH_URL", orch_cfg.get("url", ""))
+        if not orch_url:
+            return None
+        try:
+            from circuitforge_core.resources.client import CFOrchClient
+            client = CFOrchClient(orch_url)
+            service = orch_cfg.get("service", "vllm")
+            candidates = orch_cfg.get("model_candidates", [])
+            ttl_s = float(orch_cfg.get("ttl_s", 3600.0))
+            ctx = client.allocate(service, model_candidates=candidates, ttl_s=ttl_s, caller="llm-router")
+            alloc = ctx.__enter__()
+            return (ctx, alloc)
+        except Exception as exc:
+            print(f"[LLMRouter] cf_orch allocation failed, using base_url directly: {exc}")
+            return None
+
    def complete(self, prompt: str, system: str | None = None,
                 model_override: str | None = None,
                 fallback_order: list[str] | None = None,
@ -105,6 +132,12 @@ class LLMRouter:
                if not self._is_reachable(backend["base_url"]):
                    print(f"[LLMRouter] {name}: unreachable, skipping")
                    continue
+                # --- cf_orch: optionally override base_url with coordinator-allocated URL ---
+                orch_ctx = orch_alloc = None
+                orch_result = self._try_cf_orch_alloc(backend)
+                if orch_result is not None:
+                    orch_ctx, orch_alloc = orch_result
+                    backend = {**backend, "base_url": orch_alloc.url + "/v1"}
                try:
                    client = OpenAI(
                        base_url=backend["base_url"],
@ -136,6 +169,12 @@ class LLMRouter:
                except Exception as e:
                    print(f"[LLMRouter] {name}: error — {e}, trying next")
                    continue
+                finally:
+                    if orch_ctx is not None:
+                        try:
+                            orch_ctx.__exit__(None, None, None)
+                        except Exception:
+                            pass

            elif backend["type"] == "anthropic":
                api_key = os.environ.get(backend["api_key_env"], "")