From 77189116529ced1a223fd6ac48999a12a2ee600d Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Mon, 30 Mar 2026 20:51:08 -0700 Subject: [PATCH] feat(resources): add cforch-agent FastAPI app with /health /gpu-info /evict --- circuitforge_core/resources/agent/app.py | 60 +++++++++++++++++++++ tests/test_resources/test_agent_app.py | 68 ++++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 circuitforge_core/resources/agent/app.py create mode 100644 tests/test_resources/test_agent_app.py diff --git a/circuitforge_core/resources/agent/app.py b/circuitforge_core/resources/agent/app.py new file mode 100644 index 0000000..628a470 --- /dev/null +++ b/circuitforge_core/resources/agent/app.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +import logging +from typing import Any + +from fastapi import FastAPI +from pydantic import BaseModel + +from circuitforge_core.resources.agent.eviction_executor import EvictionExecutor +from circuitforge_core.resources.agent.gpu_monitor import GpuMonitor + +logger = logging.getLogger(__name__) + + +class EvictRequest(BaseModel): + pid: int + grace_period_s: float = 5.0 + + +def create_agent_app( + node_id: str, + monitor: GpuMonitor | None = None, + executor: EvictionExecutor | None = None, +) -> FastAPI: + _monitor = monitor or GpuMonitor() + _executor = executor or EvictionExecutor() + + app = FastAPI(title=f"cforch-agent [{node_id}]") + + @app.get("/health") + def health() -> dict[str, Any]: + return {"status": "ok", "node_id": node_id} + + @app.get("/gpu-info") + def gpu_info() -> dict[str, Any]: + gpus = _monitor.poll() + return { + "node_id": node_id, + "gpus": [ + { + "gpu_id": g.gpu_id, + "name": g.name, + "vram_total_mb": g.vram_total_mb, + "vram_used_mb": g.vram_used_mb, + "vram_free_mb": g.vram_free_mb, + } + for g in gpus + ], + } + + @app.post("/evict") + def evict(req: EvictRequest) -> dict[str, Any]: + result = _executor.evict_pid(pid=req.pid, grace_period_s=req.grace_period_s) + return { + "success": result.success, + "method": result.method, + "message": result.message, + } + + return app diff --git a/tests/test_resources/test_agent_app.py b/tests/test_resources/test_agent_app.py new file mode 100644 index 0000000..b24c1aa --- /dev/null +++ b/tests/test_resources/test_agent_app.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +import pytest +from unittest.mock import MagicMock +from fastapi.testclient import TestClient + +from circuitforge_core.resources.agent.app import create_agent_app +from circuitforge_core.resources.models import GpuInfo +from circuitforge_core.resources.agent.eviction_executor import EvictionResult + +MOCK_GPUS = [ + GpuInfo( + gpu_id=0, + name="RTX 4000", + vram_total_mb=8192, + vram_used_mb=1024, + vram_free_mb=7168, + ), +] + + +@pytest.fixture +def agent_client(): + mock_monitor = MagicMock() + mock_monitor.poll.return_value = MOCK_GPUS + mock_executor = MagicMock() + app = create_agent_app( + node_id="heimdall", + monitor=mock_monitor, + executor=mock_executor, + ) + return TestClient(app), mock_monitor, mock_executor + + +def test_health_returns_ok(agent_client): + client, _, _ = agent_client + resp = client.get("/health") + assert resp.status_code == 200 + assert resp.json()["status"] == "ok" + assert resp.json()["node_id"] == "heimdall" + + +def test_gpu_info_returns_gpu_list(agent_client): + client, _, _ = agent_client + resp = client.get("/gpu-info") + assert resp.status_code == 200 + data = resp.json() + assert len(data["gpus"]) == 1 + assert data["gpus"][0]["gpu_id"] == 0 + assert data["gpus"][0]["name"] == "RTX 4000" + assert data["gpus"][0]["vram_free_mb"] == 7168 + + +def test_evict_calls_executor(agent_client): + client, _, mock_executor = agent_client + mock_executor.evict_pid.return_value = EvictionResult( + success=True, method="sigterm", message="done" + ) + resp = client.post("/evict", json={"pid": 1234, "grace_period_s": 5.0}) + assert resp.status_code == 200 + assert resp.json()["success"] is True + mock_executor.evict_pid.assert_called_once_with(pid=1234, grace_period_s=5.0) + + +def test_evict_requires_pid(agent_client): + client, _, _ = agent_client + resp = client.post("/evict", json={"grace_period_s": 5.0}) + assert resp.status_code == 422