test(resources): add integration tests for full lease/eviction cycle
This commit is contained in:
parent
1f296c0cdb
commit
d755e9ea2c
1 changed files with 219 additions and 0 deletions
219
tests/test_resources/test_integration.py
Normal file
219
tests/test_resources/test_integration.py
Normal file
|
|
@ -0,0 +1,219 @@
|
|||
"""Integration test: full lease → eviction → re-grant cycle.
|
||||
|
||||
Runs coordinator in-process (no subprocesses, no real nvidia-smi).
|
||||
Uses TestClient for HTTP, mocks AgentSupervisor to return fixed node state.
|
||||
"""
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
|
||||
from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
|
||||
from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor
|
||||
from circuitforge_core.resources.coordinator.app import create_coordinator_app
|
||||
from circuitforge_core.resources.models import GpuInfo, NodeInfo
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def system():
|
||||
"""Create an in-process coordinator system with 8GB GPU and mock supervisor."""
|
||||
lease_manager = LeaseManager()
|
||||
lease_manager.register_gpu("local", 0, 8192)
|
||||
|
||||
mock_supervisor = MagicMock(spec=AgentSupervisor)
|
||||
mock_supervisor.all_nodes.return_value = [
|
||||
NodeInfo(
|
||||
node_id="local",
|
||||
agent_url="http://localhost:7701",
|
||||
gpus=[GpuInfo(
|
||||
gpu_id=0,
|
||||
name="RTX 4000",
|
||||
vram_total_mb=8192,
|
||||
vram_used_mb=0,
|
||||
vram_free_mb=8192,
|
||||
)],
|
||||
last_heartbeat=0.0,
|
||||
)
|
||||
]
|
||||
mock_supervisor.get_node_info.return_value = NodeInfo(
|
||||
node_id="local",
|
||||
agent_url="http://localhost:7701",
|
||||
gpus=[],
|
||||
last_heartbeat=0.0,
|
||||
)
|
||||
|
||||
profile_registry = ProfileRegistry()
|
||||
app = create_coordinator_app(
|
||||
lease_manager=lease_manager,
|
||||
profile_registry=profile_registry,
|
||||
agent_supervisor=mock_supervisor,
|
||||
)
|
||||
client = TestClient(app)
|
||||
return client, lease_manager
|
||||
|
||||
|
||||
def test_full_lease_cycle(system):
|
||||
"""Test: grant, verify, release, verify gone."""
|
||||
client, _ = system
|
||||
|
||||
# Grant a lease
|
||||
resp = client.post("/api/leases", json={
|
||||
"node_id": "local",
|
||||
"gpu_id": 0,
|
||||
"mb": 4096,
|
||||
"service": "peregrine",
|
||||
"priority": 1,
|
||||
})
|
||||
assert resp.status_code == 200
|
||||
lease_data = resp.json()["lease"]
|
||||
lease_id = lease_data["lease_id"]
|
||||
assert lease_data["mb_granted"] == 4096
|
||||
assert lease_data["holder_service"] == "peregrine"
|
||||
|
||||
# Verify it appears in active leases
|
||||
resp = client.get("/api/leases")
|
||||
assert resp.status_code == 200
|
||||
leases = resp.json()["leases"]
|
||||
assert any(l["lease_id"] == lease_id for l in leases)
|
||||
|
||||
# Release it
|
||||
resp = client.delete(f"/api/leases/{lease_id}")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["released"] is True
|
||||
|
||||
# Verify it's gone
|
||||
resp = client.get("/api/leases")
|
||||
assert resp.status_code == 200
|
||||
leases = resp.json()["leases"]
|
||||
assert not any(l["lease_id"] == lease_id for l in leases)
|
||||
|
||||
|
||||
def test_vram_exhaustion_returns_503(system):
|
||||
"""Test: fill GPU, then request with no eviction candidates returns 503."""
|
||||
client, _ = system
|
||||
|
||||
# Fill GPU 0 with high-priority lease
|
||||
resp = client.post("/api/leases", json={
|
||||
"node_id": "local",
|
||||
"gpu_id": 0,
|
||||
"mb": 8000,
|
||||
"service": "vllm",
|
||||
"priority": 1,
|
||||
})
|
||||
assert resp.status_code == 200
|
||||
|
||||
# Try to get more VRAM with same priority (no eviction candidates)
|
||||
resp = client.post("/api/leases", json={
|
||||
"node_id": "local",
|
||||
"gpu_id": 0,
|
||||
"mb": 2000,
|
||||
"service": "kiwi",
|
||||
"priority": 1,
|
||||
})
|
||||
assert resp.status_code == 503
|
||||
assert "Insufficient VRAM" in resp.json()["detail"]
|
||||
|
||||
|
||||
def test_auto_detect_profile_for_8gb():
|
||||
"""Test: ProfileRegistry auto-detects single-gpu-8gb for 8GB GPU."""
|
||||
registry = ProfileRegistry()
|
||||
gpu = GpuInfo(
|
||||
gpu_id=0,
|
||||
name="RTX 4000",
|
||||
vram_total_mb=8192,
|
||||
vram_used_mb=0,
|
||||
vram_free_mb=8192,
|
||||
)
|
||||
profile = registry.auto_detect([gpu])
|
||||
assert profile.name == "single-gpu-8gb"
|
||||
# Verify profile has services configured
|
||||
assert hasattr(profile, "services")
|
||||
|
||||
|
||||
def test_node_endpoint_shows_nodes(system):
|
||||
"""Test: GET /api/nodes returns the mocked node."""
|
||||
client, _ = system
|
||||
resp = client.get("/api/nodes")
|
||||
assert resp.status_code == 200
|
||||
nodes = resp.json()["nodes"]
|
||||
assert len(nodes) == 1
|
||||
assert nodes[0]["node_id"] == "local"
|
||||
assert nodes[0]["agent_url"] == "http://localhost:7701"
|
||||
assert len(nodes[0]["gpus"]) == 1
|
||||
assert nodes[0]["gpus"][0]["name"] == "RTX 4000"
|
||||
|
||||
|
||||
def test_profiles_endpoint_returns_public_profiles(system):
|
||||
"""Test: GET /api/profiles returns standard public profiles."""
|
||||
client, _ = system
|
||||
resp = client.get("/api/profiles")
|
||||
assert resp.status_code == 200
|
||||
profiles = resp.json()["profiles"]
|
||||
names = [p["name"] for p in profiles]
|
||||
# Verify common public profiles are present
|
||||
assert "single-gpu-8gb" in names
|
||||
assert "single-gpu-6gb" in names
|
||||
assert "single-gpu-2gb" in names
|
||||
|
||||
|
||||
def test_multiple_leases_tracked_independently(system):
|
||||
"""Test: multiple active leases are tracked correctly."""
|
||||
client, _ = system
|
||||
|
||||
# Grant lease 1
|
||||
resp1 = client.post("/api/leases", json={
|
||||
"node_id": "local",
|
||||
"gpu_id": 0,
|
||||
"mb": 2048,
|
||||
"service": "peregrine",
|
||||
"priority": 2,
|
||||
})
|
||||
assert resp1.status_code == 200
|
||||
lease1_id = resp1.json()["lease"]["lease_id"]
|
||||
|
||||
# Grant lease 2
|
||||
resp2 = client.post("/api/leases", json={
|
||||
"node_id": "local",
|
||||
"gpu_id": 0,
|
||||
"mb": 2048,
|
||||
"service": "kiwi",
|
||||
"priority": 2,
|
||||
})
|
||||
assert resp2.status_code == 200
|
||||
lease2_id = resp2.json()["lease"]["lease_id"]
|
||||
|
||||
# Both should be in active leases
|
||||
resp = client.get("/api/leases")
|
||||
leases = resp.json()["leases"]
|
||||
lease_ids = [l["lease_id"] for l in leases]
|
||||
assert lease1_id in lease_ids
|
||||
assert lease2_id in lease_ids
|
||||
assert len(leases) == 2
|
||||
|
||||
# Release lease 1
|
||||
resp = client.delete(f"/api/leases/{lease1_id}")
|
||||
assert resp.status_code == 200
|
||||
|
||||
# Only lease 2 should remain
|
||||
resp = client.get("/api/leases")
|
||||
leases = resp.json()["leases"]
|
||||
lease_ids = [l["lease_id"] for l in leases]
|
||||
assert lease1_id not in lease_ids
|
||||
assert lease2_id in lease_ids
|
||||
assert len(leases) == 1
|
||||
|
||||
|
||||
def test_delete_nonexistent_lease_returns_404(system):
|
||||
"""Test: deleting a nonexistent lease returns 404."""
|
||||
client, _ = system
|
||||
resp = client.delete("/api/leases/nonexistent-lease-id")
|
||||
assert resp.status_code == 404
|
||||
assert "not found" in resp.json()["detail"]
|
||||
|
||||
|
||||
def test_health_endpoint_returns_ok(system):
|
||||
"""Test: GET /api/health returns status ok."""
|
||||
client, _ = system
|
||||
resp = client.get("/api/health")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["status"] == "ok"
|
||||
Loading…
Reference in a new issue