feat(core): add CFOrchClient sync+async context manager

Implements CFOrchClient with allocate() (sync contextmanager) and allocate_async() (async contextmanager) for cf-orch GPU resource allocation. Releases allocation on exit; ignores 404 on release; raises RuntimeError on non-2xx allocation response. Exports CFOrchClient and Allocation from circuitforge_core.resources. Note: async test uses unittest.mock rather than httpretty — httpretty only patches stdlib sockets and does not intercept httpx async (anyio) transport.
2026-04-02 11:44:35 -07:00 · 2026-04-02 11:44:35 -07:00 · defaf39883
commit defaf39883
parent 8201f6b3e9
3 changed files with 221 additions and 0 deletions
--- a/circuitforge_core/resources/init.py
+++ b/circuitforge_core/resources/init.py
@ -0,0 +1 @@
+from circuitforge_core.resources.client import CFOrchClient, Allocation  # noqa: F401
--- a/circuitforge_core/resources/client.py
+++ b/circuitforge_core/resources/client.py
@ -0,0 +1,126 @@
+from __future__ import annotations
+
+import logging
+from contextlib import contextmanager, asynccontextmanager
+from dataclasses import dataclass
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Allocation:
+    allocation_id: str
+    service: str
+    node_id: str
+    gpu_id: int
+    model: str | None
+    url: str
+    started: bool
+    warm: bool
+
+
+class CFOrchClient:
+    """
+    Client for cf-orch coordinator allocation.
+
+    Sync usage (in LLMRouter or other sync code):
+        client = CFOrchClient(os.environ["CF_ORCH_URL"])
+        with client.allocate("vllm", model_candidates=["Ouro-1.4B"]) as alloc:
+            # alloc.url is the inference endpoint
+
+    Async usage (in FastAPI apps):
+        async with client.allocate_async("vllm", model_candidates=["Ouro-1.4B"]) as alloc:
+            ...
+
+    Raises ValueError immediately if coordinator_url is empty.
+    """
+
+    def __init__(self, coordinator_url: str) -> None:
+        if not coordinator_url:
+            raise ValueError("coordinator_url is empty — cf-orch not configured")
+        self._url = coordinator_url.rstrip("/")
+
+    def _build_body(self, model_candidates: list[str] | None, ttl_s: float, caller: str) -> dict:
+        return {
+            "model_candidates": model_candidates or [],
+            "ttl_s": ttl_s,
+            "caller": caller,
+        }
+
+    def _parse_allocation(self, data: dict, service: str) -> Allocation:
+        return Allocation(
+            allocation_id=data["allocation_id"],
+            service=service,
+            node_id=data["node_id"],
+            gpu_id=data["gpu_id"],
+            model=data.get("model"),
+            url=data["url"],
+            started=data.get("started", False),
+            warm=data.get("warm", False),
+        )
+
+    @contextmanager
+    def allocate(
+        self,
+        service: str,
+        *,
+        model_candidates: list[str] | None = None,
+        ttl_s: float = 3600.0,
+        caller: str = "",
+    ):
+        """Sync context manager. Allocates on enter, releases on exit."""
+        resp = httpx.post(
+            f"{self._url}/api/services/{service}/allocate",
+            json=self._build_body(model_candidates, ttl_s, caller),
+            timeout=120.0,
+        )
+        if not resp.is_success:
+            raise RuntimeError(
+                f"cf-orch allocation failed for {service!r}: "
+                f"HTTP {resp.status_code} — {resp.text[:200]}"
+            )
+        alloc = self._parse_allocation(resp.json(), service)
+        try:
+            yield alloc
+        finally:
+            try:
+                httpx.delete(
+                    f"{self._url}/api/services/{service}/allocations/{alloc.allocation_id}",
+                    timeout=10.0,
+                )
+            except Exception as exc:
+                logger.debug("cf-orch release failed (non-fatal): %s", exc)
+
+    @asynccontextmanager
+    async def allocate_async(
+        self,
+        service: str,
+        *,
+        model_candidates: list[str] | None = None,
+        ttl_s: float = 3600.0,
+        caller: str = "",
+    ):
+        """Async context manager. Allocates on enter, releases on exit."""
+        async with httpx.AsyncClient(timeout=120.0) as client:
+            resp = await client.post(
+                f"{self._url}/api/services/{service}/allocate",
+                json=self._build_body(model_candidates, ttl_s, caller),
+            )
+            if not resp.is_success:
+                raise RuntimeError(
+                    f"cf-orch allocation failed for {service!r}: "
+                    f"HTTP {resp.status_code} — {resp.text[:200]}"
+                )
+            alloc = self._parse_allocation(resp.json(), service)
+            try:
+                yield alloc
+            finally:
+                try:
+                    await client.delete(
+                        f"{self._url}/api/services/{service}/allocations/{alloc.allocation_id}",
+                        timeout=10.0,
+                    )
+                except Exception as exc:
+                    logger.debug("cf-orch async release failed (non-fatal): %s", exc)
--- a/tests/test_resources/test_client.py
+++ b/tests/test_resources/test_client.py
@ -0,0 +1,94 @@
+import json
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+import httpretty
+from circuitforge_core.resources.client import CFOrchClient, Allocation
+
+_ALLOC_BODY = (
+    '{"allocation_id":"abc123","service":"vllm","node_id":"heimdall",'
+    '"gpu_id":0,"model":"Ouro-1.4B","url":"http://heimdall:8000","started":false,"warm":true}'
+)
+
+
+@httpretty.activate
+def test_sync_allocate_returns_allocation():
+    httpretty.register_uri(
+        httpretty.POST, "http://orch:7700/api/services/vllm/allocate",
+        body=_ALLOC_BODY, content_type="application/json",
+    )
+    httpretty.register_uri(
+        httpretty.DELETE, "http://orch:7700/api/services/vllm/allocations/abc123",
+        body='{"released":true}', content_type="application/json",
+    )
+    client = CFOrchClient("http://orch:7700")
+    with client.allocate("vllm", model_candidates=["Ouro-1.4B"], caller="test") as alloc:
+        assert isinstance(alloc, Allocation)
+        assert alloc.url == "http://heimdall:8000"
+        assert alloc.model == "Ouro-1.4B"
+        assert alloc.allocation_id == "abc123"
+    assert httpretty.last_request().method == "DELETE"
+
+
+@httpretty.activate
+def test_sync_allocate_ignores_404_on_release():
+    httpretty.register_uri(
+        httpretty.POST, "http://orch:7700/api/services/vllm/allocate",
+        body='{"allocation_id":"xyz","service":"vllm","node_id":"a","gpu_id":0,'
+             '"model":"m","url":"http://a:8000","started":false,"warm":false}',
+        content_type="application/json",
+    )
+    httpretty.register_uri(
+        httpretty.DELETE, "http://orch:7700/api/services/vllm/allocations/xyz",
+        status=404, body='{"detail":"not found"}', content_type="application/json",
+    )
+    client = CFOrchClient("http://orch:7700")
+    with client.allocate("vllm", model_candidates=["m"]) as alloc:
+        assert alloc.url == "http://a:8000"
+    # No exception raised — 404 on release is silently ignored
+
+
+@httpretty.activate
+def test_sync_allocate_raises_on_503():
+    httpretty.register_uri(
+        httpretty.POST, "http://orch:7700/api/services/vllm/allocate",
+        status=503, body='{"detail":"no capacity"}', content_type="application/json",
+    )
+    client = CFOrchClient("http://orch:7700")
+    with pytest.raises(RuntimeError, match="cf-orch allocation failed"):
+        with client.allocate("vllm", model_candidates=["m"]):
+            pass
+
+
+async def test_async_allocate_works():
+    # httpretty only patches stdlib sockets; httpx async uses anyio sockets so
+    # we mock httpx.AsyncClient directly instead.
+    alloc_data = {
+        "allocation_id": "a1", "service": "vllm", "node_id": "n",
+        "gpu_id": 0, "model": "m", "url": "http://n:8000",
+        "started": False, "warm": False,
+    }
+    release_data = {"released": True}
+
+    def _make_response(data, status_code=200):
+        resp = MagicMock()
+        resp.is_success = status_code < 400
+        resp.status_code = status_code
+        resp.json.return_value = data
+        return resp
+
+    mock_post = AsyncMock(return_value=_make_response(alloc_data))
+    mock_delete = AsyncMock(return_value=_make_response(release_data))
+
+    mock_async_client = MagicMock()
+    mock_async_client.post = mock_post
+    mock_async_client.delete = mock_delete
+    mock_async_client.__aenter__ = AsyncMock(return_value=mock_async_client)
+    mock_async_client.__aexit__ = AsyncMock(return_value=False)
+
+    with patch("httpx.AsyncClient", return_value=mock_async_client):
+        client = CFOrchClient("http://orch:7700")
+        async with client.allocate_async("vllm", model_candidates=["m"]) as alloc:
+            assert alloc.url == "http://n:8000"
+            assert alloc.allocation_id == "a1"
+    mock_delete.assert_called_once()
				`@ -0,0 +1 @@`
				`from circuitforge_core.resources.client import CFOrchClient, Allocation # noqa: F401`