feat(llm): add cf_orch allocation support to LLMRouter backends
This commit is contained in:
parent
f741e6a80b
commit
17a24173f7
1 changed files with 39 additions and 0 deletions
|
|
@ -38,6 +38,33 @@ class LLMRouter:
|
||||||
models = client.models.list()
|
models = client.models.list()
|
||||||
return models.data[0].id
|
return models.data[0].id
|
||||||
|
|
||||||
|
def _try_cf_orch_alloc(self, backend: dict) -> "tuple | None":
|
||||||
|
"""
|
||||||
|
If backend config has a cf_orch block and CF_ORCH_URL is set (env takes
|
||||||
|
precedence over yaml url), allocate via cf-orch and return (ctx, alloc).
|
||||||
|
Returns None if not configured or allocation fails.
|
||||||
|
Caller MUST call ctx.__exit__(None, None, None) in a finally block.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
orch_cfg = backend.get("cf_orch")
|
||||||
|
if not orch_cfg:
|
||||||
|
return None
|
||||||
|
orch_url = os.environ.get("CF_ORCH_URL", orch_cfg.get("url", ""))
|
||||||
|
if not orch_url:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
from circuitforge_core.resources.client import CFOrchClient
|
||||||
|
client = CFOrchClient(orch_url)
|
||||||
|
service = orch_cfg.get("service", "vllm")
|
||||||
|
candidates = orch_cfg.get("model_candidates", [])
|
||||||
|
ttl_s = float(orch_cfg.get("ttl_s", 3600.0))
|
||||||
|
ctx = client.allocate(service, model_candidates=candidates, ttl_s=ttl_s, caller="llm-router")
|
||||||
|
alloc = ctx.__enter__()
|
||||||
|
return (ctx, alloc)
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"[LLMRouter] cf_orch allocation failed, using base_url directly: {exc}")
|
||||||
|
return None
|
||||||
|
|
||||||
def complete(self, prompt: str, system: str | None = None,
|
def complete(self, prompt: str, system: str | None = None,
|
||||||
model_override: str | None = None,
|
model_override: str | None = None,
|
||||||
fallback_order: list[str] | None = None,
|
fallback_order: list[str] | None = None,
|
||||||
|
|
@ -105,6 +132,12 @@ class LLMRouter:
|
||||||
if not self._is_reachable(backend["base_url"]):
|
if not self._is_reachable(backend["base_url"]):
|
||||||
print(f"[LLMRouter] {name}: unreachable, skipping")
|
print(f"[LLMRouter] {name}: unreachable, skipping")
|
||||||
continue
|
continue
|
||||||
|
# --- cf_orch: optionally override base_url with coordinator-allocated URL ---
|
||||||
|
orch_ctx = orch_alloc = None
|
||||||
|
orch_result = self._try_cf_orch_alloc(backend)
|
||||||
|
if orch_result is not None:
|
||||||
|
orch_ctx, orch_alloc = orch_result
|
||||||
|
backend = {**backend, "base_url": orch_alloc.url + "/v1"}
|
||||||
try:
|
try:
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
base_url=backend["base_url"],
|
base_url=backend["base_url"],
|
||||||
|
|
@ -136,6 +169,12 @@ class LLMRouter:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[LLMRouter] {name}: error — {e}, trying next")
|
print(f"[LLMRouter] {name}: error — {e}, trying next")
|
||||||
continue
|
continue
|
||||||
|
finally:
|
||||||
|
if orch_ctx is not None:
|
||||||
|
try:
|
||||||
|
orch_ctx.__exit__(None, None, None)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
elif backend["type"] == "anthropic":
|
elif backend["type"] == "anthropic":
|
||||||
api_key = os.environ.get(backend["api_key_env"], "")
|
api_key = os.environ.get(backend["api_key_env"], "")
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue