feat(llm): task-based cf-orch allocation in LLMRouter (v0.21.0)

_try_cf_orch_alloc now checks for cf_orch.task + cf_orch.product keys.
When present, uses client.task_allocate(product, task) instead of
service-based allocate(). Supports peregrine#115 task-model routing.
Existing service-based configs are unaffected.
This commit is contained in:
pyr0ball 2026-05-17 19:59:48 -07:00
parent af66877b51
commit 93d36346c1
2 changed files with 21 additions and 7 deletions

View file

@ -190,6 +190,14 @@ class LLMRouter:
""" """
If backend config has a cf_orch block and CF_ORCH_URL is set (env takes If backend config has a cf_orch block and CF_ORCH_URL is set (env takes
precedence over yaml url), allocate via cf-orch and return (ctx, alloc). precedence over yaml url), allocate via cf-orch and return (ctx, alloc).
Two allocation modes:
- task-based (preferred): cf_orch block has `product` + `task` keys.
Calls POST /api/inference/task; coordinator resolves model/node from
assignments.yaml. No hardcoded model IDs in product config.
- service-based (legacy): cf_orch block has `service` key.
Calls allocate(service=...) directly.
Returns None if not configured or allocation fails. Returns None if not configured or allocation fails.
Caller MUST call ctx.__exit__(None, None, None) in a finally block. Caller MUST call ctx.__exit__(None, None, None) in a finally block.
""" """
@ -205,16 +213,22 @@ class LLMRouter:
from circuitforge_orch.client import CFOrchClient from circuitforge_orch.client import CFOrchClient
client = CFOrchClient(orch_url) client = CFOrchClient(orch_url)
service = orch_cfg.get("service", "vllm")
candidates = orch_cfg.get("model_candidates", [])
ttl_s = float(orch_cfg.get("ttl_s", 3600.0)) ttl_s = float(orch_cfg.get("ttl_s", 3600.0))
# CF_APP_NAME identifies the calling product (kiwi, peregrine, etc.)
# in coordinator analytics — set in each product's .env. # Task-based allocation: product+task → coordinator resolves model/node.
task = orch_cfg.get("task")
product = orch_cfg.get("product") or os.environ.get("CF_APP_NAME") or None
if task and product:
ctx = client.task_allocate(product, task, ttl_s=ttl_s)
alloc = ctx.__enter__()
return (ctx, alloc)
# Service-based allocation (legacy path).
cf_app = os.environ.get("CF_APP_NAME") or None cf_app = os.environ.get("CF_APP_NAME") or None
caller = f"{cf_app}.llm-router" if cf_app else "llm-router" caller = f"{cf_app}.llm-router" if cf_app else "llm-router"
ctx = client.allocate( ctx = client.allocate(
service, orch_cfg.get("service", "vllm"),
model_candidates=candidates, model_candidates=orch_cfg.get("model_candidates", []),
ttl_s=ttl_s, ttl_s=ttl_s,
caller=caller, caller=caller,
pipeline=cf_app, pipeline=cf_app,

View file

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "circuitforge-core" name = "circuitforge-core"
version = "0.20.0" version = "0.21.0"
description = "Shared scaffold for CircuitForge products (MIT)" description = "Shared scaffold for CircuitForge products (MIT)"
requires-python = ">=3.11" requires-python = ">=3.11"
dependencies = [ dependencies = [