feat(orch): add ServiceRegistry — allocation tracking + idle state machine

2026-04-02 12:22:46 -07:00 · 2026-04-02 12:22:46 -07:00 · 9754f522d9
commit 9754f522d9
parent 17a24173f7
2 changed files with 203 additions and 0 deletions
--- a/circuitforge_core/resources/coordinator/service_registry.py
+++ b/circuitforge_core/resources/coordinator/service_registry.py
@ -0,0 +1,140 @@
 from __future__ import annotations
 import dataclasses
 import time
 import uuid
 from dataclasses import dataclass
 from typing import Literal
@dataclass
 class ServiceAllocation:
    allocation_id: str
    service: str
    node_id: str
    gpu_id: int
    model: str | None
    caller: str
    url: str
    created_at: float
    expires_at: float  # 0 = no expiry
@dataclass
 class ServiceInstance:
    service: str
    node_id: str
    gpu_id: int
    state: Literal["starting", "running", "idle", "stopped"]
    model: str | None
    url: str | None
    idle_since: float | None = None
 class ServiceRegistry:
    """
    In-memory registry of service allocations and instance state.
    Allocations: per-caller request — many per service instance.
    Instances: per (service, node_id, gpu_id) — one per running container.
    """
    def __init__(self) -> None:
        self._allocations: dict[str, ServiceAllocation] = {}
        self._instances: dict[str, ServiceInstance] = {}  # key: "service:node_id:gpu_id"
    # ── allocation API ────────────────────────────────────────────────
    def allocate(
        self,
        service: str,
        node_id: str,
        gpu_id: int,
        model: str | None,
        url: str,
        caller: str,
        ttl_s: float,
    ) -> ServiceAllocation:
        alloc = ServiceAllocation(
            allocation_id=str(uuid.uuid4()),
            service=service,
            node_id=node_id,
            gpu_id=gpu_id,
            model=model,
            caller=caller,
            url=url,
            created_at=time.time(),
            expires_at=time.time() + ttl_s if ttl_s > 0 else 0.0,
        )
        self._allocations[alloc.allocation_id] = alloc
        # If an instance exists in idle/stopped state, mark it running again
        key = f"{service}:{node_id}:{gpu_id}"
        if key in self._instances:
            inst = self._instances[key]
            if inst.state in ("idle", "stopped"):
                self._instances[key] = dataclasses.replace(
                    inst, state="running", idle_since=None
                )
        return alloc
    def release(self, allocation_id: str) -> bool:
        alloc = self._allocations.pop(allocation_id, None)
        if alloc is None:
            return False
        # If no active allocations remain for this instance, mark it idle
        key = f"{alloc.service}:{alloc.node_id}:{alloc.gpu_id}"
        if self.active_allocations(alloc.service, alloc.node_id) == 0:
            if key in self._instances:
                self._instances[key] = dataclasses.replace(
                    self._instances[key], state="idle", idle_since=time.time()
                )
        return True
    def active_allocations(self, service: str, node_id: str) -> int:
        return sum(
            1 for a in self._allocations.values()
            if a.service == service and a.node_id == node_id
        )
    # ── instance API ─────────────────────────────────────────────────
    def upsert_instance(
        self,
        service: str,
        node_id: str,
        gpu_id: int,
        state: Literal["starting", "running", "idle", "stopped"],
        model: str | None,
        url: str | None,
    ) -> ServiceInstance:
        key = f"{service}:{node_id}:{gpu_id}"
        existing = self._instances.get(key)
        idle_since: float | None = None
        if state == "idle":
            # Preserve idle_since if already idle; set now if transitioning into idle
            idle_since = existing.idle_since if (existing and existing.state == "idle") else time.time()
        inst = ServiceInstance(
            service=service, node_id=node_id, gpu_id=gpu_id,
            state=state, model=model, url=url, idle_since=idle_since,
        )
        self._instances[key] = inst
        return inst
    def all_instances(self) -> list[ServiceInstance]:
        return list(self._instances.values())
    def idle_past_timeout(self, idle_stop_config: dict[str, int]) -> list[ServiceInstance]:
        """
        Return instances in 'idle' state whose idle time exceeds their configured timeout.
        idle_stop_config: {service_name: seconds} — 0 means never stop automatically.
        """
        now = time.time()
        result = []
        for inst in self._instances.values():
            if inst.state != "idle" or inst.idle_since is None:
                continue
            timeout = idle_stop_config.get(inst.service, 0)
            if timeout > 0 and (now - inst.idle_since) >= timeout:
                result.append(inst)
        return result
--- a/tests/test_resources/test_service_registry.py
+++ b/tests/test_resources/test_service_registry.py
@ -0,0 +1,63 @@
 import time
 import dataclasses
 import pytest
 from circuitforge_core.resources.coordinator.service_registry import (
    ServiceRegistry, ServiceAllocation, ServiceInstance,
 )
@pytest.fixture
 def registry():
    return ServiceRegistry()
 def test_allocate_creates_allocation(registry):
    alloc = registry.allocate(
        service="vllm", node_id="heimdall", gpu_id=0,
        model="Ouro-1.4B", url="http://heimdall:8000",
        caller="test", ttl_s=300.0,
    )
    assert alloc.service == "vllm"
    assert alloc.node_id == "heimdall"
    assert alloc.allocation_id  # non-empty UUID string
 def test_active_allocations_count(registry):
    registry.allocate("vllm", "heimdall", 0, "M", "http://h:8000", "a", 300.0)
    registry.allocate("vllm", "heimdall", 0, "M", "http://h:8000", "b", 300.0)
    assert registry.active_allocations("vllm", "heimdall") == 2
 def test_release_decrements_count(registry):
    alloc = registry.allocate("vllm", "heimdall", 0, "M", "http://h:8000", "a", 300.0)
    registry.release(alloc.allocation_id)
    assert registry.active_allocations("vllm", "heimdall") == 0
 def test_release_nonexistent_returns_false(registry):
    assert registry.release("nonexistent-id") is False
 def test_upsert_instance_sets_running_state(registry):
    registry.upsert_instance("vllm", "heimdall", 0, state="running",
                              model="Ouro-1.4B", url="http://heimdall:8000")
    instances = registry.all_instances()
    assert len(instances) == 1
    assert instances[0].state == "running"
 def test_release_last_alloc_marks_instance_idle(registry):
    registry.upsert_instance("vllm", "heimdall", 0, state="running",
                              model="Ouro-1.4B", url="http://heimdall:8000")
    alloc = registry.allocate("vllm", "heimdall", 0, "Ouro-1.4B", "http://heimdall:8000", "a", 300.0)
    registry.release(alloc.allocation_id)
    instance = registry.all_instances()[0]
    assert instance.state == "idle"
    assert instance.idle_since is not None
 def test_new_alloc_on_idle_instance_marks_it_running(registry):
    registry.upsert_instance("vllm", "heimdall", 0, state="idle",
                              model="M", url="http://h:8000")
    registry.allocate("vllm", "heimdall", 0, "M", "http://h:8000", "x", 300.0)
    assert registry.all_instances()[0].state == "running"