feat(llm): task-based cf-orch allocation in LLMRouter (v0.21.0)
_try_cf_orch_alloc now checks for cf_orch.task + cf_orch.product keys. When present, uses client.task_allocate(product, task) instead of service-based allocate(). Supports peregrine#115 task-model routing. Existing service-based configs are unaffected.
This commit is contained in:
parent
af66877b51
commit
93d36346c1
2 changed files with 21 additions and 7 deletions
|
|
@ -190,6 +190,14 @@ class LLMRouter:
|
|||
"""
|
||||
If backend config has a cf_orch block and CF_ORCH_URL is set (env takes
|
||||
precedence over yaml url), allocate via cf-orch and return (ctx, alloc).
|
||||
|
||||
Two allocation modes:
|
||||
- task-based (preferred): cf_orch block has `product` + `task` keys.
|
||||
Calls POST /api/inference/task; coordinator resolves model/node from
|
||||
assignments.yaml. No hardcoded model IDs in product config.
|
||||
- service-based (legacy): cf_orch block has `service` key.
|
||||
Calls allocate(service=...) directly.
|
||||
|
||||
Returns None if not configured or allocation fails.
|
||||
Caller MUST call ctx.__exit__(None, None, None) in a finally block.
|
||||
"""
|
||||
|
|
@ -205,16 +213,22 @@ class LLMRouter:
|
|||
from circuitforge_orch.client import CFOrchClient
|
||||
|
||||
client = CFOrchClient(orch_url)
|
||||
service = orch_cfg.get("service", "vllm")
|
||||
candidates = orch_cfg.get("model_candidates", [])
|
||||
ttl_s = float(orch_cfg.get("ttl_s", 3600.0))
|
||||
# CF_APP_NAME identifies the calling product (kiwi, peregrine, etc.)
|
||||
# in coordinator analytics — set in each product's .env.
|
||||
|
||||
# Task-based allocation: product+task → coordinator resolves model/node.
|
||||
task = orch_cfg.get("task")
|
||||
product = orch_cfg.get("product") or os.environ.get("CF_APP_NAME") or None
|
||||
if task and product:
|
||||
ctx = client.task_allocate(product, task, ttl_s=ttl_s)
|
||||
alloc = ctx.__enter__()
|
||||
return (ctx, alloc)
|
||||
|
||||
# Service-based allocation (legacy path).
|
||||
cf_app = os.environ.get("CF_APP_NAME") or None
|
||||
caller = f"{cf_app}.llm-router" if cf_app else "llm-router"
|
||||
ctx = client.allocate(
|
||||
service,
|
||||
model_candidates=candidates,
|
||||
orch_cfg.get("service", "vllm"),
|
||||
model_candidates=orch_cfg.get("model_candidates", []),
|
||||
ttl_s=ttl_s,
|
||||
caller=caller,
|
||||
pipeline=cf_app,
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||
|
||||
[project]
|
||||
name = "circuitforge-core"
|
||||
version = "0.20.0"
|
||||
version = "0.21.0"
|
||||
description = "Shared scaffold for CircuitForge products (MIT)"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
|
|
|
|||
Loading…
Reference in a new issue