fix(resources): address code review findings from final review

- eviction_engine: replace deprecated asyncio.get_event_loop() with
  get_running_loop() (Python 3.12 compatibility)
- eviction_engine: remove unused httpx import
- coordinator app: return 422 for unknown node_id instead of silently
  falling back to hardcoded localhost URL
- eviction_executor: guard against pid <= 0 to prevent accidental
  SIGTERM to process group
- pyproject.toml: move pytest-asyncio to [dev] extras, not [orch]
- profile_registry: document CPU profile exclusion from list_public()
This commit is contained in:
pyr0ball 2026-03-30 22:46:07 -07:00
parent d755e9ea2c
commit db4e3047fd
5 changed files with 23 additions and 5 deletions

View file

@ -31,6 +31,12 @@ class EvictionExecutor:
) -> EvictionResult:
grace = grace_period_s if grace_period_s is not None else self._default_grace
if pid <= 0:
return EvictionResult(
success=False, method="error",
message=f"Refusing to signal invalid PID {pid}"
)
if not psutil.pid_exists(pid):
return EvictionResult(
success=False, method="not_found",

View file

@ -86,7 +86,12 @@ def create_coordinator_app(
@app.post("/api/leases")
async def request_lease(req: LeaseRequest) -> dict[str, Any]:
node_info = agent_supervisor.get_node_info(req.node_id)
agent_url = node_info.agent_url if node_info else "http://localhost:7701"
if node_info is None:
raise HTTPException(
status_code=422,
detail=f"Unknown node_id {req.node_id!r} — node not registered",
)
agent_url = node_info.agent_url
lease = await eviction_engine.request_lease(
node_id=req.node_id,

View file

@ -3,8 +3,6 @@ from __future__ import annotations
import asyncio
import logging
import httpx
from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
from circuitforge_core.resources.models import VRAMLease
@ -61,8 +59,9 @@ class EvictionEngine:
await self._evict_lease(candidate, agent_url)
# Wait for evictions to free up VRAM (poll with timeout)
deadline = asyncio.get_event_loop().time() + self._timeout
while asyncio.get_event_loop().time() < deadline:
loop = asyncio.get_running_loop()
deadline = loop.time() + self._timeout
while loop.time() < deadline:
lease = await self.lease_manager.try_grant(
node_id, gpu_id, mb, service, priority, ttl_s
)

View file

@ -44,6 +44,9 @@ class ProfileRegistry:
return profile
def list_public(self) -> list[GpuProfile]:
# CPU profiles (cpu-*) are intentionally excluded — this endpoint
# is used to match GPU hardware. CPU inference nodes self-select
# their profile via the CLI and are not listed for lease matching.
return [
p for p in self._profiles.values()
if p.name.startswith("single-gpu-")

View file

@ -21,7 +21,12 @@ orch = [
"pydantic>=2.0",
"typer[all]>=0.12",
"psutil>=5.9",
]
dev = [
"circuitforge-core[orch]",
"pytest>=8.0",
"pytest-asyncio>=0.23",
"httpx>=0.27",
]
[project.scripts]