Merge pull request 'feat(resources): cf-orch GPU VRAM orchestration — Plan A core' (#1) from feature/cforch-core-orchestration into main

2026-03-31 10:43:52 -07:00 · 2026-03-31 10:43:52 -07:00 · 99f4e95018
commit 99f4e95018
parent 56042dffba db4e3047fd
37 changed files with 2094 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,3 +4,7 @@ __pycache__/
 *.egg-info/
 dist/
 .pytest_cache/
+.superpowers/
+
+# cf-orch private profiles (commit on personal/heimdall branch only)
+circuitforge_core/resources/profiles/private/
--- a/circuitforge_core/resources/init.py
+++ b/circuitforge_core/resources/init.py
--- a/circuitforge_core/resources/agent/init.py
+++ b/circuitforge_core/resources/agent/init.py
--- a/circuitforge_core/resources/agent/app.py
+++ b/circuitforge_core/resources/agent/app.py
@ -0,0 +1,60 @@
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from fastapi import FastAPI
+from pydantic import BaseModel
+
+from circuitforge_core.resources.agent.eviction_executor import EvictionExecutor
+from circuitforge_core.resources.agent.gpu_monitor import GpuMonitor
+
+logger = logging.getLogger(__name__)
+
+
+class EvictRequest(BaseModel):
+    pid: int
+    grace_period_s: float = 5.0
+
+
+def create_agent_app(
+    node_id: str,
+    monitor: GpuMonitor | None = None,
+    executor: EvictionExecutor | None = None,
+) -> FastAPI:
+    _monitor = monitor or GpuMonitor()
+    _executor = executor or EvictionExecutor()
+
+    app = FastAPI(title=f"cf-orch-agent [{node_id}]")
+
+    @app.get("/health")
+    def health() -> dict[str, Any]:
+        return {"status": "ok", "node_id": node_id}
+
+    @app.get("/gpu-info")
+    def gpu_info() -> dict[str, Any]:
+        gpus = _monitor.poll()
+        return {
+            "node_id": node_id,
+            "gpus": [
+                {
+                    "gpu_id": g.gpu_id,
+                    "name": g.name,
+                    "vram_total_mb": g.vram_total_mb,
+                    "vram_used_mb": g.vram_used_mb,
+                    "vram_free_mb": g.vram_free_mb,
+                }
+                for g in gpus
+            ],
+        }
+
+    @app.post("/evict")
+    def evict(req: EvictRequest) -> dict[str, Any]:
+        result = _executor.evict_pid(pid=req.pid, grace_period_s=req.grace_period_s)
+        return {
+            "success": result.success,
+            "method": result.method,
+            "message": result.message,
+        }
+
+    return app
--- a/circuitforge_core/resources/agent/eviction_executor.py
+++ b/circuitforge_core/resources/agent/eviction_executor.py
@ -0,0 +1,85 @@
+from __future__ import annotations
+
+import logging
+import os
+import signal
+import time
+from dataclasses import dataclass
+
+import psutil
+
+logger = logging.getLogger(__name__)
+
+_DEFAULT_GRACE_S = 5.0
+
+
+@dataclass(frozen=True)
+class EvictionResult:
+    success: bool
+    method: str   # "sigterm", "sigkill", "already_gone", "not_found", "error"
+    message: str
+
+
+class EvictionExecutor:
+    def __init__(self, grace_period_s: float = _DEFAULT_GRACE_S) -> None:
+        self._default_grace = grace_period_s
+
+    def evict_pid(
+        self,
+        pid: int,
+        grace_period_s: float | None = None,
+    ) -> EvictionResult:
+        grace = grace_period_s if grace_period_s is not None else self._default_grace
+
+        if pid <= 0:
+            return EvictionResult(
+                success=False, method="error",
+                message=f"Refusing to signal invalid PID {pid}"
+            )
+
+        if not psutil.pid_exists(pid):
+            return EvictionResult(
+                success=False, method="not_found",
+                message=f"PID {pid} not found"
+            )
+
+        try:
+            os.kill(pid, signal.SIGTERM)
+        except ProcessLookupError:
+            return EvictionResult(
+                success=True, method="already_gone",
+                message=f"PID {pid} vanished before SIGTERM"
+            )
+        except PermissionError as exc:
+            return EvictionResult(
+                success=False, method="error",
+                message=f"Permission denied terminating PID {pid}: {exc}"
+            )
+
+        # Wait for grace period
+        deadline = time.monotonic() + grace
+        while time.monotonic() < deadline:
+            if not psutil.pid_exists(pid):
+                logger.info("PID %d exited cleanly after SIGTERM", pid)
+                return EvictionResult(
+                    success=True, method="sigterm",
+                    message=f"PID {pid} exited after SIGTERM"
+                )
+            time.sleep(0.05)
+
+        # Escalate to SIGKILL
+        if psutil.pid_exists(pid):
+            try:
+                os.kill(pid, signal.SIGKILL)
+                logger.warning("PID %d required SIGKILL", pid)
+                return EvictionResult(
+                    success=True, method="sigkill",
+                    message=f"PID {pid} killed with SIGKILL"
+                )
+            except ProcessLookupError:
+                pass
+
+        return EvictionResult(
+            success=True, method="sigkill",
+            message=f"PID {pid} is gone"
+        )
--- a/circuitforge_core/resources/agent/gpu_monitor.py
+++ b/circuitforge_core/resources/agent/gpu_monitor.py
@ -0,0 +1,52 @@
+from __future__ import annotations
+
+import logging
+import subprocess
+
+from circuitforge_core.resources.models import GpuInfo
+
+logger = logging.getLogger(__name__)
+
+_NVIDIA_SMI_CMD = [
+    "nvidia-smi",
+    "--query-gpu=index,name,memory.total,memory.used,memory.free",
+    "--format=csv,noheader,nounits",
+]
+
+
+class GpuMonitor:
+    def poll(self) -> list[GpuInfo]:
+        try:
+            result = subprocess.run(
+                _NVIDIA_SMI_CMD,
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+        except (FileNotFoundError, subprocess.TimeoutExpired) as exc:
+            logger.warning("nvidia-smi unavailable: %s", exc)
+            return []
+
+        if result.returncode != 0:
+            logger.warning("nvidia-smi exited %d", result.returncode)
+            return []
+
+        return self._parse(result.stdout)
+
+    def _parse(self, output: str) -> list[GpuInfo]:
+        gpus: list[GpuInfo] = []
+        for line in output.strip().splitlines():
+            parts = [p.strip() for p in line.split(",")]
+            if len(parts) != 5:
+                continue
+            try:
+                gpus.append(GpuInfo(
+                    gpu_id=int(parts[0]),
+                    name=parts[1],
+                    vram_total_mb=int(parts[2]),
+                    vram_used_mb=int(parts[3]),
+                    vram_free_mb=int(parts[4]),
+                ))
+            except ValueError:
+                logger.debug("Skipping malformed nvidia-smi line: %r", line)
+        return gpus
--- a/circuitforge_core/resources/cli.py
+++ b/circuitforge_core/resources/cli.py
@ -0,0 +1,143 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+from typing import Annotated, Optional
+
+import typer
+import uvicorn
+
+app = typer.Typer(name="cf-orch", help="CircuitForge GPU resource orchestrator")
+
+_SYSTEMD_UNIT_PATH = Path("/etc/systemd/system/cf-orch.service")
+
+_SYSTEMD_UNIT_TEMPLATE = """\
+[Unit]
+Description=CircuitForge GPU Resource Orchestrator
+After=network.target
+
+[Service]
+Type=simple
+ExecStart={python} -m circuitforge_core.resources.cli start
+Restart=on-failure
+RestartSec=5
+
+[Install]
+WantedBy=multi-user.target
+"""
+
+
+@app.command()
+def start(
+    profile: Annotated[Optional[Path], typer.Option(help="Profile YAML path")] = None,
+    host: str = "0.0.0.0",
+    port: int = 7700,
+    agent_port: int = 7701,
+) -> None:
+    """Start the cf-orch coordinator (auto-detects GPU profile if not specified)."""
+    from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
+    from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
+    from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor
+    from circuitforge_core.resources.coordinator.app import create_coordinator_app
+    from circuitforge_core.resources.agent.gpu_monitor import GpuMonitor
+
+    lease_manager = LeaseManager()
+    profile_registry = ProfileRegistry()
+    supervisor = AgentSupervisor(lease_manager=lease_manager)
+
+    monitor = GpuMonitor()
+    gpus = monitor.poll()
+    if not gpus:
+        typer.echo(
+            "Warning: no GPUs detected via nvidia-smi — coordinator running with 0 VRAM"
+        )
+    else:
+        for gpu in gpus:
+            lease_manager.register_gpu("local", gpu.gpu_id, gpu.vram_total_mb)
+        typer.echo(f"Detected {len(gpus)} GPU(s)")
+
+    if profile:
+        active_profile = profile_registry.load(profile)
+        typer.echo(f"Using profile: {active_profile.name} (from {profile})")
+    else:
+        active_profile = (
+            profile_registry.auto_detect(gpus)
+            if gpus
+            else profile_registry.list_public()[-1]
+        )
+        typer.echo(f"Auto-selected profile: {active_profile.name}")
+
+    coordinator_app = create_coordinator_app(
+        lease_manager=lease_manager,
+        profile_registry=profile_registry,
+        agent_supervisor=supervisor,
+    )
+
+    typer.echo(f"Starting cf-orch coordinator on {host}:{port}")
+    uvicorn.run(coordinator_app, host=host, port=port)
+
+
+@app.command()
+def agent(
+    coordinator: str = "http://localhost:7700",
+    node_id: str = "local",
+    host: str = "0.0.0.0",
+    port: int = 7701,
+) -> None:
+    """Start a cf-orch node agent (for remote nodes like Navi, Huginn)."""
+    from circuitforge_core.resources.agent.app import create_agent_app
+
+    agent_app = create_agent_app(node_id=node_id)
+    typer.echo(f"Starting cf-orch agent [{node_id}] on {host}:{port}")
+    uvicorn.run(agent_app, host=host, port=port)
+
+
+@app.command()
+def status(coordinator: str = "http://localhost:7700") -> None:
+    """Show GPU and lease status from the coordinator."""
+    import httpx
+
+    try:
+        resp = httpx.get(f"{coordinator}/api/nodes", timeout=5.0)
+        resp.raise_for_status()
+        nodes = resp.json().get("nodes", [])
+        for node in nodes:
+            typer.echo(f"\nNode: {node['node_id']}")
+            for gpu in node.get("gpus", []):
+                typer.echo(
+                    f"  GPU {gpu['gpu_id']}: {gpu['name']} — "
+                    f"{gpu['vram_used_mb']}/{gpu['vram_total_mb']} MB used"
+                )
+    except Exception as exc:
+        typer.echo(f"Coordinator unreachable at {coordinator}: {exc}", err=True)
+        raise typer.Exit(1)
+
+
+@app.command("install-service")
+def install_service(
+    dry_run: bool = typer.Option(
+        False, "--dry-run", help="Print unit file without writing"
+    ),
+) -> None:
+    """Write a systemd unit file for cf-orch (requires root)."""
+    python = sys.executable
+    unit_content = _SYSTEMD_UNIT_TEMPLATE.format(python=python)
+    if dry_run:
+        typer.echo(f"Would write to {_SYSTEMD_UNIT_PATH}:\n")
+        typer.echo(unit_content)
+        return
+    try:
+        _SYSTEMD_UNIT_PATH.write_text(unit_content)
+        typer.echo(f"Written: {_SYSTEMD_UNIT_PATH}")
+        typer.echo(
+            "Run: sudo systemctl daemon-reload && sudo systemctl enable --now cf-orch"
+        )
+    except PermissionError:
+        typer.echo(
+            f"Permission denied writing to {_SYSTEMD_UNIT_PATH}. Run as root.", err=True
+        )
+        raise typer.Exit(1)
+
+
+if __name__ == "__main__":
+    app()
--- a/circuitforge_core/resources/compose.yml
+++ b/circuitforge_core/resources/compose.yml
@ -0,0 +1,44 @@
+# circuitforge_core/resources/compose.yml
+# One-command cf-orch deployment for Docker self-hosters:
+#   docker compose -f path/to/compose.yml up cf-orch-coordinator
+
+services:
+  cf-orch-coordinator:
+    image: python:3.12-slim
+    command: >
+      sh -c "pip install 'circuitforge-core[orch]' &&
+             cf-orch start --host 0.0.0.0 --port 7700"
+    ports:
+      - "7700:7700"
+    volumes:
+      - /run/docker.sock:/var/run/docker.sock:ro
+      - cf-orch-data:/data
+    environment:
+      - CFORCH_PROFILE=${CFORCH_PROFILE:-}
+    restart: unless-stopped
+    devices:
+      - /dev/nvidia0:/dev/nvidia0
+      - /dev/nvidiactl:/dev/nvidiactl
+    runtime: nvidia
+
+  cf-orch-agent:
+    image: python:3.12-slim
+    command: >
+      sh -c "pip install 'circuitforge-core[orch]' &&
+             cf-orch agent --coordinator http://cf-orch-coordinator:7700
+                           --node-id ${CFORCH_NODE_ID:-local}
+                           --host 0.0.0.0 --port 7701"
+    ports:
+      - "7701:7701"
+    depends_on:
+      - cf-orch-coordinator
+    environment:
+      - CFORCH_NODE_ID=${CFORCH_NODE_ID:-local}
+    restart: unless-stopped
+    devices:
+      - /dev/nvidia0:/dev/nvidia0
+      - /dev/nvidiactl:/dev/nvidiactl
+    runtime: nvidia
+
+volumes:
+  cf-orch-data:
--- a/circuitforge_core/resources/coordinator/init.py
+++ b/circuitforge_core/resources/coordinator/init.py
--- a/circuitforge_core/resources/coordinator/agent_supervisor.py
+++ b/circuitforge_core/resources/coordinator/agent_supervisor.py
@ -0,0 +1,101 @@
+from __future__ import annotations
+
+import asyncio
+import logging
+import time
+from dataclasses import dataclass, field
+
+import httpx
+
+from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
+from circuitforge_core.resources.models import GpuInfo, NodeInfo
+
+logger = logging.getLogger(__name__)
+
+_HEARTBEAT_INTERVAL_S = 10.0
+_AGENT_TIMEOUT_S = 5.0
+
+
+@dataclass
+class AgentRecord:
+    node_id: str
+    agent_url: str
+    last_seen: float = field(default_factory=time.time)
+    gpus: list[GpuInfo] = field(default_factory=list)
+    online: bool = False
+
+
+class AgentSupervisor:
+    def __init__(self, lease_manager: LeaseManager) -> None:
+        self._agents: dict[str, AgentRecord] = {}
+        self._lease_manager = lease_manager
+        self._running = False
+
+    def register(self, node_id: str, agent_url: str) -> None:
+        if node_id not in self._agents:
+            self._agents[node_id] = AgentRecord(node_id=node_id, agent_url=agent_url)
+            logger.info("Registered agent node: %s @ %s", node_id, agent_url)
+
+    def get_node_info(self, node_id: str) -> NodeInfo | None:
+        record = self._agents.get(node_id)
+        if record is None:
+            return None
+        return NodeInfo(
+            node_id=record.node_id,
+            agent_url=record.agent_url,
+            gpus=record.gpus,
+            last_heartbeat=record.last_seen,
+        )
+
+    def all_nodes(self) -> list[NodeInfo]:
+        return [
+            NodeInfo(
+                node_id=r.node_id,
+                agent_url=r.agent_url,
+                gpus=r.gpus,
+                last_heartbeat=r.last_seen,
+            )
+            for r in self._agents.values()
+        ]
+
+    async def poll_agent(self, node_id: str) -> bool:
+        record = self._agents.get(node_id)
+        if record is None:
+            return False
+        try:
+            async with httpx.AsyncClient(timeout=_AGENT_TIMEOUT_S) as client:
+                resp = await client.get(f"{record.agent_url}/gpu-info")
+            resp.raise_for_status()
+            data = resp.json()
+            gpus = [
+                GpuInfo(
+                    gpu_id=g["gpu_id"],
+                    name=g["name"],
+                    vram_total_mb=g["vram_total_mb"],
+                    vram_used_mb=g["vram_used_mb"],
+                    vram_free_mb=g["vram_free_mb"],
+                )
+                for g in data.get("gpus", [])
+            ]
+            record.gpus = gpus
+            record.last_seen = time.time()
+            record.online = True
+            for gpu in gpus:
+                self._lease_manager.register_gpu(node_id, gpu.gpu_id, gpu.vram_total_mb)
+            return True
+        except Exception as exc:
+            logger.warning("Agent %s unreachable: %s", node_id, exc)
+            record.online = False
+            return False
+
+    async def poll_all(self) -> None:
+        await asyncio.gather(*[self.poll_agent(nid) for nid in self._agents])
+
+    async def run_heartbeat_loop(self) -> None:
+        self._running = True
+        while self._running:
+            await self.poll_all()
+            await asyncio.sleep(_HEARTBEAT_INTERVAL_S)
+
+    def stop(self) -> None:
+        self._running = False
--- a/circuitforge_core/resources/coordinator/app.py
+++ b/circuitforge_core/resources/coordinator/app.py
@ -0,0 +1,129 @@
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+
+from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor
+from circuitforge_core.resources.coordinator.eviction_engine import EvictionEngine
+from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
+from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
+
+
+class LeaseRequest(BaseModel):
+    node_id: str
+    gpu_id: int
+    mb: int
+    service: str
+    priority: int = 2
+    ttl_s: float = 0.0
+
+
+def create_coordinator_app(
+    lease_manager: LeaseManager,
+    profile_registry: ProfileRegistry,
+    agent_supervisor: AgentSupervisor,
+) -> FastAPI:
+    eviction_engine = EvictionEngine(lease_manager=lease_manager)
+
+    app = FastAPI(title="cf-orch-coordinator")
+
+    @app.get("/api/health")
+    def health() -> dict[str, Any]:
+        return {"status": "ok"}
+
+    @app.get("/api/nodes")
+    def get_nodes() -> dict[str, Any]:
+        nodes = agent_supervisor.all_nodes()
+        return {
+            "nodes": [
+                {
+                    "node_id": n.node_id,
+                    "agent_url": n.agent_url,
+                    "last_heartbeat": n.last_heartbeat,
+                    "gpus": [
+                        {
+                            "gpu_id": g.gpu_id,
+                            "name": g.name,
+                            "vram_total_mb": g.vram_total_mb,
+                            "vram_used_mb": g.vram_used_mb,
+                            "vram_free_mb": g.vram_free_mb,
+                        }
+                        for g in n.gpus
+                    ],
+                }
+                for n in nodes
+            ]
+        }
+
+    @app.get("/api/profiles")
+    def get_profiles() -> dict[str, Any]:
+        return {
+            "profiles": [
+                {"name": p.name, "vram_total_mb": p.vram_total_mb}
+                for p in profile_registry.list_public()
+            ]
+        }
+
+    @app.get("/api/leases")
+    def get_leases() -> dict[str, Any]:
+        return {
+            "leases": [
+                {
+                    "lease_id": lease.lease_id,
+                    "node_id": lease.node_id,
+                    "gpu_id": lease.gpu_id,
+                    "mb_granted": lease.mb_granted,
+                    "holder_service": lease.holder_service,
+                    "priority": lease.priority,
+                    "expires_at": lease.expires_at,
+                }
+                for lease in lease_manager.all_leases()
+            ]
+        }
+
+    @app.post("/api/leases")
+    async def request_lease(req: LeaseRequest) -> dict[str, Any]:
+        node_info = agent_supervisor.get_node_info(req.node_id)
+        if node_info is None:
+            raise HTTPException(
+                status_code=422,
+                detail=f"Unknown node_id {req.node_id!r} — node not registered",
+            )
+        agent_url = node_info.agent_url
+
+        lease = await eviction_engine.request_lease(
+            node_id=req.node_id,
+            gpu_id=req.gpu_id,
+            mb=req.mb,
+            service=req.service,
+            priority=req.priority,
+            agent_url=agent_url,
+            ttl_s=req.ttl_s,
+        )
+        if lease is None:
+            raise HTTPException(
+                status_code=503,
+                detail="Insufficient VRAM — no eviction candidates available",
+            )
+        return {
+            "lease": {
+                "lease_id": lease.lease_id,
+                "node_id": lease.node_id,
+                "gpu_id": lease.gpu_id,
+                "mb_granted": lease.mb_granted,
+                "holder_service": lease.holder_service,
+                "priority": lease.priority,
+                "expires_at": lease.expires_at,
+            }
+        }
+
+    @app.delete("/api/leases/{lease_id}")
+    async def release_lease(lease_id: str) -> dict[str, Any]:
+        released = await lease_manager.release(lease_id)
+        if not released:
+            raise HTTPException(status_code=404, detail=f"Lease {lease_id!r} not found")
+        return {"released": True, "lease_id": lease_id}
+
+    return app
--- a/circuitforge_core/resources/coordinator/eviction_engine.py
+++ b/circuitforge_core/resources/coordinator/eviction_engine.py
@ -0,0 +1,81 @@
+from __future__ import annotations
+
+import asyncio
+import logging
+
+from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
+from circuitforge_core.resources.models import VRAMLease
+
+logger = logging.getLogger(__name__)
+
+_DEFAULT_EVICTION_TIMEOUT_S = 10.0
+
+
+class EvictionEngine:
+    def __init__(
+        self,
+        lease_manager: LeaseManager,
+        eviction_timeout_s: float = _DEFAULT_EVICTION_TIMEOUT_S,
+    ) -> None:
+        self.lease_manager = lease_manager
+        self._timeout = eviction_timeout_s
+
+    async def request_lease(
+        self,
+        node_id: str,
+        gpu_id: int,
+        mb: int,
+        service: str,
+        priority: int,
+        agent_url: str,
+        ttl_s: float = 0.0,
+    ) -> VRAMLease | None:
+        # Fast path: enough free VRAM
+        lease = await self.lease_manager.try_grant(
+            node_id, gpu_id, mb, service, priority, ttl_s
+        )
+        if lease is not None:
+            return lease
+
+        # Find eviction candidates
+        candidates = self.lease_manager.get_eviction_candidates(
+            node_id=node_id, gpu_id=gpu_id,
+            needed_mb=mb, requester_priority=priority,
+        )
+        if not candidates:
+            logger.info(
+                "No eviction candidates for %s on %s:GPU%d (%dMB needed)",
+                service, node_id, gpu_id, mb,
+            )
+            return None
+
+        # Evict candidates
+        freed_mb = sum(c.mb_granted for c in candidates)
+        logger.info(
+            "Evicting %d lease(s) to free %dMB for %s",
+            len(candidates), freed_mb, service,
+        )
+        for candidate in candidates:
+            await self._evict_lease(candidate, agent_url)
+
+        # Wait for evictions to free up VRAM (poll with timeout)
+        loop = asyncio.get_running_loop()
+        deadline = loop.time() + self._timeout
+        while loop.time() < deadline:
+            lease = await self.lease_manager.try_grant(
+                node_id, gpu_id, mb, service, priority, ttl_s
+            )
+            if lease is not None:
+                return lease
+            await asyncio.sleep(0.1)
+
+        logger.warning("Eviction timed out for %s after %.1fs", service, self._timeout)
+        return None
+
+    async def _evict_lease(self, lease: VRAMLease, agent_url: str) -> None:
+        """Release lease accounting. Process-level eviction deferred to Plan B."""
+        await self.lease_manager.release(lease.lease_id)
+
+    async def _call_agent_evict(self, agent_url: str, lease: VRAMLease) -> bool:
+        """POST /evict to the agent. Stub for v1 — real process lookup in Plan B."""
+        return True
--- a/circuitforge_core/resources/coordinator/lease_manager.py
+++ b/circuitforge_core/resources/coordinator/lease_manager.py
@ -0,0 +1,88 @@
+from __future__ import annotations
+
+import asyncio
+from collections import defaultdict
+
+from circuitforge_core.resources.models import VRAMLease
+
+
+class LeaseManager:
+    def __init__(self) -> None:
+        self._leases: dict[str, VRAMLease] = {}
+        self._gpu_total: dict[tuple[str, int], int] = {}
+        self._gpu_used: dict[tuple[str, int], int] = defaultdict(int)
+        self._lock = asyncio.Lock()
+
+    def register_gpu(self, node_id: str, gpu_id: int, total_mb: int) -> None:
+        self._gpu_total[(node_id, gpu_id)] = total_mb
+
+    def gpu_total_mb(self, node_id: str, gpu_id: int) -> int:
+        return self._gpu_total.get((node_id, gpu_id), 0)
+
+    def used_mb(self, node_id: str, gpu_id: int) -> int:
+        return self._gpu_used[(node_id, gpu_id)]
+
+    async def try_grant(
+        self,
+        node_id: str,
+        gpu_id: int,
+        mb: int,
+        service: str,
+        priority: int,
+        ttl_s: float = 0.0,
+    ) -> VRAMLease | None:
+        async with self._lock:
+            total = self._gpu_total.get((node_id, gpu_id), 0)
+            used = self._gpu_used[(node_id, gpu_id)]
+            if total - used < mb:
+                return None
+            lease = VRAMLease.create(
+                gpu_id=gpu_id, node_id=node_id, mb=mb,
+                service=service, priority=priority, ttl_s=ttl_s,
+            )
+            self._leases[lease.lease_id] = lease
+            self._gpu_used[(node_id, gpu_id)] += mb
+            return lease
+
+    async def release(self, lease_id: str) -> bool:
+        async with self._lock:
+            lease = self._leases.pop(lease_id, None)
+            if lease is None:
+                return False
+            self._gpu_used[(lease.node_id, lease.gpu_id)] -= lease.mb_granted
+            return True
+
+    def get_eviction_candidates(
+        self,
+        node_id: str,
+        gpu_id: int,
+        needed_mb: int,
+        requester_priority: int,
+    ) -> list[VRAMLease]:
+        candidates = [
+            lease for lease in self._leases.values()
+            if lease.node_id == node_id
+            and lease.gpu_id == gpu_id
+            and lease.priority > requester_priority
+        ]
+        candidates.sort(key=lambda lease: lease.priority, reverse=True)
+        selected: list[VRAMLease] = []
+        freed = 0
+        for candidate in candidates:
+            selected.append(candidate)
+            freed += candidate.mb_granted
+            if freed >= needed_mb:
+                break
+        return selected
+
+    def list_leases(
+        self, node_id: str | None = None, gpu_id: int | None = None
+    ) -> list[VRAMLease]:
+        return [
+            lease for lease in self._leases.values()
+            if (node_id is None or lease.node_id == node_id)
+            and (gpu_id is None or lease.gpu_id == gpu_id)
+        ]
+
+    def all_leases(self) -> list[VRAMLease]:
+        return list(self._leases.values())
--- a/circuitforge_core/resources/coordinator/profile_registry.py
+++ b/circuitforge_core/resources/coordinator/profile_registry.py
@ -0,0 +1,65 @@
+# circuitforge_core/resources/coordinator/profile_registry.py
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+
+from circuitforge_core.resources.models import GpuInfo
+from circuitforge_core.resources.profiles.schema import GpuProfile, load_profile
+
+_PUBLIC_DIR = Path(__file__).parent.parent / "profiles" / "public"
+
+# VRAM thresholds for public profile selection (MB)
+_PROFILE_THRESHOLDS = [
+    (22000, "single-gpu-24gb"),
+    (14000, "single-gpu-16gb"),
+    (8000, "single-gpu-8gb"),
+    (5500, "single-gpu-6gb"),
+    (3500, "single-gpu-4gb"),
+    (0, "single-gpu-2gb"),
+]
+
+_log = logging.getLogger(__name__)
+
+
+class ProfileRegistry:
+    def __init__(self, extra_dirs: list[Path] | None = None) -> None:
+        self._profiles: dict[str, GpuProfile] = {}
+        self._load_dir(_PUBLIC_DIR)
+        for d in (extra_dirs or []):
+            if d.exists():
+                self._load_dir(d)
+
+    def _load_dir(self, directory: Path) -> None:
+        for yaml_file in directory.glob("*.yaml"):
+            try:
+                profile = load_profile(yaml_file)
+                self._profiles[profile.name] = profile
+            except Exception as exc:
+                _log.warning("Skipping %s: %s", yaml_file, exc)
+
+    def load(self, path: Path) -> GpuProfile:
+        profile = load_profile(path)
+        self._profiles[profile.name] = profile
+        return profile
+
+    def list_public(self) -> list[GpuProfile]:
+        # CPU profiles (cpu-*) are intentionally excluded — this endpoint
+        # is used to match GPU hardware. CPU inference nodes self-select
+        # their profile via the CLI and are not listed for lease matching.
+        return [
+            p for p in self._profiles.values()
+            if p.name.startswith("single-gpu-")
+        ]
+
+    def get(self, name: str) -> GpuProfile | None:
+        return self._profiles.get(name)
+
+    def auto_detect(self, gpus: list[GpuInfo]) -> GpuProfile:
+        primary_vram = gpus[0].vram_total_mb if gpus else 0
+        for threshold_mb, profile_name in _PROFILE_THRESHOLDS:
+            if primary_vram >= threshold_mb:
+                profile = self._profiles.get(profile_name)
+                if profile:
+                    return profile
+        return self._profiles["single-gpu-2gb"]
--- a/circuitforge_core/resources/models.py
+++ b/circuitforge_core/resources/models.py
@ -0,0 +1,56 @@
+from __future__ import annotations
+
+import time
+import uuid
+from dataclasses import dataclass, field
+
+
+@dataclass(frozen=True)
+class VRAMLease:
+    lease_id: str
+    gpu_id: int
+    node_id: str
+    mb_granted: int
+    holder_service: str
+    priority: int
+    expires_at: float  # unix timestamp; 0.0 = no expiry
+
+    @classmethod
+    def create(
+        cls,
+        gpu_id: int,
+        node_id: str,
+        mb: int,
+        service: str,
+        priority: int,
+        ttl_s: float = 0.0,
+    ) -> VRAMLease:
+        return cls(
+            lease_id=str(uuid.uuid4()),
+            gpu_id=gpu_id,
+            node_id=node_id,
+            mb_granted=mb,
+            holder_service=service,
+            priority=priority,
+            expires_at=time.time() + ttl_s if ttl_s > 0.0 else 0.0,
+        )
+
+    def is_expired(self) -> bool:
+        return self.expires_at > 0.0 and time.time() > self.expires_at
+
+
+@dataclass(frozen=True)
+class GpuInfo:
+    gpu_id: int
+    name: str
+    vram_total_mb: int
+    vram_used_mb: int
+    vram_free_mb: int
+
+
+@dataclass
+class NodeInfo:
+    node_id: str
+    agent_url: str
+    gpus: list[GpuInfo]
+    last_heartbeat: float = field(default_factory=time.time)
--- a/circuitforge_core/resources/profiles/init.py
+++ b/circuitforge_core/resources/profiles/init.py
--- a/circuitforge_core/resources/profiles/public/cpu-16gb.yaml
+++ b/circuitforge_core/resources/profiles/public/cpu-16gb.yaml
@ -0,0 +1,33 @@
+schema_version: 1
+name: cpu-16gb
+eviction_timeout_s: 30.0
+services:
+  ollama:
+    max_mb: 0
+    priority: 1
+  cf-stt:
+    max_mb: 0
+    priority: 2
+    shared: true
+    max_concurrent: 1
+    backend: moonshine
+  cf-tts:
+    max_mb: 0
+    priority: 2
+    shared: true
+    max_concurrent: 1
+  cf-embed:
+    max_mb: 0
+    priority: 2
+    shared: true
+    max_concurrent: 2
+    always_on: true
+  cf-classify:
+    max_mb: 0
+    priority: 2
+    shared: true
+    max_concurrent: 2
+    always_on: true
+model_size_hints:
+  llm_max_params: 3b-q4
+  image_gen_max: none
--- a/circuitforge_core/resources/profiles/public/cpu-32gb.yaml
+++ b/circuitforge_core/resources/profiles/public/cpu-32gb.yaml
@ -0,0 +1,33 @@
+schema_version: 1
+name: cpu-32gb
+eviction_timeout_s: 30.0
+services:
+  ollama:
+    max_mb: 0
+    priority: 1
+  cf-stt:
+    max_mb: 0
+    priority: 2
+    shared: true
+    max_concurrent: 2
+    backend: faster-whisper
+  cf-tts:
+    max_mb: 0
+    priority: 2
+    shared: true
+    max_concurrent: 2
+  cf-embed:
+    max_mb: 0
+    priority: 2
+    shared: true
+    max_concurrent: 4
+    always_on: true
+  cf-classify:
+    max_mb: 0
+    priority: 2
+    shared: true
+    max_concurrent: 4
+    always_on: true
+model_size_hints:
+  llm_max_params: 7b-q4
+  image_gen_max: none
--- a/circuitforge_core/resources/profiles/public/single-gpu-16gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-16gb.yaml
@ -0,0 +1,45 @@
+schema_version: 1
+name: single-gpu-16gb
+vram_total_mb: 16384
+eviction_timeout_s: 10.0
+services:
+  vllm:
+    max_mb: 12288
+    priority: 1
+  ollama:
+    max_mb: 12288
+    priority: 1
+  cf-vision:
+    max_mb: 3072
+    priority: 2
+    shared: true
+    max_concurrent: 4
+  cf-stt:
+    max_mb: 1200
+    priority: 2
+    shared: true
+    max_concurrent: 3
+    backend: parakeet-tdt
+  cf-tts:
+    max_mb: 1024
+    priority: 2
+    shared: true
+    max_concurrent: 3
+  cf-embed:
+    max_mb: 512
+    priority: 2
+    shared: true
+    max_concurrent: 6
+    always_on: true
+  cf-classify:
+    max_mb: 512
+    priority: 2
+    shared: true
+    max_concurrent: 6
+    always_on: true
+  comfyui:
+    max_mb: 14336
+    priority: 4
+model_size_hints:
+  llm_max_params: 34b
+  image_gen_max: flux-dev-fp8
--- a/circuitforge_core/resources/profiles/public/single-gpu-24gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-24gb.yaml
@ -0,0 +1,45 @@
+schema_version: 1
+name: single-gpu-24gb
+vram_total_mb: 24576
+eviction_timeout_s: 10.0
+services:
+  vllm:
+    max_mb: 20480
+    priority: 1
+  ollama:
+    max_mb: 18432
+    priority: 1
+  cf-vision:
+    max_mb: 4096
+    priority: 2
+    shared: true
+    max_concurrent: 6
+  cf-stt:
+    max_mb: 1200
+    priority: 2
+    shared: true
+    max_concurrent: 4
+    backend: parakeet-tdt
+  cf-tts:
+    max_mb: 1024
+    priority: 2
+    shared: true
+    max_concurrent: 4
+  cf-embed:
+    max_mb: 512
+    priority: 2
+    shared: true
+    max_concurrent: 8
+    always_on: true
+  cf-classify:
+    max_mb: 512
+    priority: 2
+    shared: true
+    max_concurrent: 8
+    always_on: true
+  comfyui:
+    max_mb: 20480
+    priority: 4
+model_size_hints:
+  llm_max_params: 70b
+  image_gen_max: flux-dev-fp16
--- a/circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml
@ -0,0 +1,22 @@
+schema_version: 1
+name: single-gpu-2gb
+vram_total_mb: 2048
+eviction_timeout_s: 15.0
+services:
+  ollama:
+    max_mb: 1536
+    priority: 1
+  cf-vision:
+    max_mb: 512
+    priority: 2
+    shared: true
+    max_concurrent: 1
+  cf-stt:
+    max_mb: 200
+    priority: 2
+    shared: true
+    max_concurrent: 1
+    backend: moonshine
+model_size_hints:
+  llm_max_params: 3b
+  image_gen_max: none
--- a/circuitforge_core/resources/profiles/public/single-gpu-4gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-4gb.yaml
@ -0,0 +1,30 @@
+schema_version: 1
+name: single-gpu-4gb
+vram_total_mb: 4096
+eviction_timeout_s: 15.0
+services:
+  ollama:
+    max_mb: 3072
+    priority: 1
+  cf-vision:
+    max_mb: 1024
+    priority: 2
+    shared: true
+    max_concurrent: 1
+  cf-stt:
+    max_mb: 600
+    priority: 2
+    shared: true
+    max_concurrent: 1
+    backend: faster-whisper
+  cf-tts:
+    max_mb: 512
+    priority: 2
+    shared: true
+    max_concurrent: 1
+  comfyui:
+    max_mb: 3584
+    priority: 4
+model_size_hints:
+  llm_max_params: 3b
+  image_gen_max: sd15-fp8
--- a/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml
@ -0,0 +1,33 @@
+schema_version: 1
+name: single-gpu-6gb
+vram_total_mb: 6144
+eviction_timeout_s: 10.0
+services:
+  vllm:
+    max_mb: 4096
+    priority: 1
+  ollama:
+    max_mb: 3584
+    priority: 1
+  cf-vision:
+    max_mb: 1536
+    priority: 2
+    shared: true
+    max_concurrent: 2
+  cf-stt:
+    max_mb: 600
+    priority: 2
+    shared: true
+    max_concurrent: 2
+    backend: faster-whisper
+  cf-tts:
+    max_mb: 768
+    priority: 2
+    shared: true
+    max_concurrent: 1
+  comfyui:
+    max_mb: 5120
+    priority: 4
+model_size_hints:
+  llm_max_params: 7b
+  image_gen_max: sd15
--- a/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml
@ -0,0 +1,33 @@
+schema_version: 1
+name: single-gpu-8gb
+vram_total_mb: 8192
+eviction_timeout_s: 10.0
+services:
+  vllm:
+    max_mb: 5120
+    priority: 1
+  ollama:
+    max_mb: 4096
+    priority: 1
+  cf-vision:
+    max_mb: 2048
+    priority: 2
+    shared: true
+    max_concurrent: 3
+  cf-stt:
+    max_mb: 1200
+    priority: 2
+    shared: true
+    max_concurrent: 2
+    backend: parakeet-tdt
+  cf-tts:
+    max_mb: 1024
+    priority: 2
+    shared: true
+    max_concurrent: 2
+  comfyui:
+    max_mb: 6144
+    priority: 4
+model_size_hints:
+  llm_max_params: 8b
+  image_gen_max: sdxl-fp8
--- a/circuitforge_core/resources/profiles/schema.py
+++ b/circuitforge_core/resources/profiles/schema.py
@ -0,0 +1,66 @@
+# circuitforge_core/resources/profiles/schema.py
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import yaml
+from pydantic import BaseModel, Field
+
+SUPPORTED_SCHEMA_VERSION = 1
+
+
+class ServiceProfile(BaseModel):
+    max_mb: int
+    priority: int
+    shared: bool = False
+    max_concurrent: int = 1
+    always_on: bool = False
+    backend: str | None = None
+    consumers: list[str] = Field(default_factory=list)
+
+    model_config = {"frozen": True}
+
+
+class GpuNodeEntry(BaseModel):
+    id: int
+    vram_mb: int
+    role: str
+    card: str = "unknown"
+    always_on: bool = False
+    services: list[str] = Field(default_factory=list)
+
+    model_config = {"frozen": True}
+
+
+class NodeProfile(BaseModel):
+    gpus: list[GpuNodeEntry]
+    agent_url: str | None = None
+    nas_mount: str | None = None
+
+    model_config = {"frozen": True}
+
+
+class GpuProfile(BaseModel):
+    schema_version: int
+    name: str
+    vram_total_mb: int | None = None
+    eviction_timeout_s: float = 10.0
+    services: dict[str, ServiceProfile] = Field(default_factory=dict)
+    model_size_hints: dict[str, str] = Field(default_factory=dict)
+    nodes: dict[str, NodeProfile] = Field(default_factory=dict)
+
+    model_config = {"frozen": True}
+
+
+def load_profile(path: Path) -> GpuProfile:
+    raw: dict[str, Any] = yaml.safe_load(path.read_text())
+    if not isinstance(raw, dict):
+        raise ValueError(f"Profile file {path} must be a YAML mapping, got {type(raw).__name__}")
+    version = raw.get("schema_version")
+    if version != SUPPORTED_SCHEMA_VERSION:
+        raise ValueError(
+            f"Unsupported schema_version {version!r} in {path}. "
+            f"Expected {SUPPORTED_SCHEMA_VERSION}."
+        )
+    return GpuProfile.model_validate(raw)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "circuitforge-core"
-version = "0.1.0"
+version = "0.2.0"
 description = "Shared scaffold for CircuitForge products"
 requires-python = ">=3.11"
 dependencies = [
@ -13,9 +13,29 @@ dependencies = [
    "openai>=1.0",
 ]

+[project.optional-dependencies]
+orch = [
+    "fastapi>=0.110",
+    "uvicorn[standard]>=0.29",
+    "httpx>=0.27",
+    "pydantic>=2.0",
+    "typer[all]>=0.12",
+    "psutil>=5.9",
+]
+dev = [
+    "circuitforge-core[orch]",
+    "pytest>=8.0",
+    "pytest-asyncio>=0.23",
+    "httpx>=0.27",
+]
+
+[project.scripts]
+cf-orch = "circuitforge_core.resources.cli:app"
+
 [tool.setuptools.packages.find]
 where = ["."]
 include = ["circuitforge_core*"]

 [tool.pytest.ini_options]
 testpaths = ["tests"]
+asyncio_mode = "auto"
--- a/tests/test_resources/init.py
+++ b/tests/test_resources/init.py
--- a/tests/test_resources/test_agent_app.py
+++ b/tests/test_resources/test_agent_app.py
@ -0,0 +1,68 @@
+from __future__ import annotations
+
+import pytest
+from unittest.mock import MagicMock
+from fastapi.testclient import TestClient
+
+from circuitforge_core.resources.agent.app import create_agent_app
+from circuitforge_core.resources.models import GpuInfo
+from circuitforge_core.resources.agent.eviction_executor import EvictionResult
+
+MOCK_GPUS = [
+    GpuInfo(
+        gpu_id=0,
+        name="RTX 4000",
+        vram_total_mb=8192,
+        vram_used_mb=1024,
+        vram_free_mb=7168,
+    ),
+]
+
+
+@pytest.fixture
+def agent_client():
+    mock_monitor = MagicMock()
+    mock_monitor.poll.return_value = MOCK_GPUS
+    mock_executor = MagicMock()
+    app = create_agent_app(
+        node_id="heimdall",
+        monitor=mock_monitor,
+        executor=mock_executor,
+    )
+    return TestClient(app), mock_monitor, mock_executor
+
+
+def test_health_returns_ok(agent_client):
+    client, _, _ = agent_client
+    resp = client.get("/health")
+    assert resp.status_code == 200
+    assert resp.json()["status"] == "ok"
+    assert resp.json()["node_id"] == "heimdall"
+
+
+def test_gpu_info_returns_gpu_list(agent_client):
+    client, _, _ = agent_client
+    resp = client.get("/gpu-info")
+    assert resp.status_code == 200
+    data = resp.json()
+    assert len(data["gpus"]) == 1
+    assert data["gpus"][0]["gpu_id"] == 0
+    assert data["gpus"][0]["name"] == "RTX 4000"
+    assert data["gpus"][0]["vram_free_mb"] == 7168
+
+
+def test_evict_calls_executor(agent_client):
+    client, _, mock_executor = agent_client
+    mock_executor.evict_pid.return_value = EvictionResult(
+        success=True, method="sigterm", message="done"
+    )
+    resp = client.post("/evict", json={"pid": 1234, "grace_period_s": 5.0})
+    assert resp.status_code == 200
+    assert resp.json()["success"] is True
+    mock_executor.evict_pid.assert_called_once_with(pid=1234, grace_period_s=5.0)
+
+
+def test_evict_requires_pid(agent_client):
+    client, _, _ = agent_client
+    resp = client.post("/evict", json={"grace_period_s": 5.0})
+    assert resp.status_code == 422
--- a/tests/test_resources/test_cli.py
+++ b/tests/test_resources/test_cli.py
@ -0,0 +1,33 @@
+from __future__ import annotations
+
+from pathlib import Path
+from unittest.mock import patch
+
+from typer.testing import CliRunner
+
+from circuitforge_core.resources.cli import app
+
+runner = CliRunner()
+
+
+def test_cli_help():
+    result = runner.invoke(app, ["--help"])
+    assert result.exit_code == 0
+    assert "cf-orch" in result.output.lower() or "Usage" in result.output
+
+
+def test_status_command_shows_no_coordinator_message():
+    with patch("httpx.get", side_effect=ConnectionRefusedError("refused")):
+        result = runner.invoke(app, ["status"])
+    assert result.exit_code != 0 or "unreachable" in result.output.lower() \
+        or "coordinator" in result.output.lower()
+
+
+def test_install_service_creates_systemd_unit(tmp_path: Path):
+    unit_path = tmp_path / "cf-orch.service"
+    with patch(
+        "circuitforge_core.resources.cli._SYSTEMD_UNIT_PATH", unit_path
+    ):
+        result = runner.invoke(app, ["install-service", "--dry-run"])
+    assert result.exit_code == 0
+    assert "cf-orch.service" in result.output or "systemd" in result.output.lower()
--- a/tests/test_resources/test_coordinator_app.py
+++ b/tests/test_resources/test_coordinator_app.py
@ -0,0 +1,102 @@
+import pytest
+from unittest.mock import MagicMock
+from fastapi.testclient import TestClient
+from circuitforge_core.resources.coordinator.app import create_coordinator_app
+from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
+from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
+from circuitforge_core.resources.models import GpuInfo, NodeInfo
+
+
+@pytest.fixture
+def coordinator_client():
+    lease_manager = LeaseManager()
+    lease_manager.register_gpu("heimdall", 0, 8192)
+    profile_registry = ProfileRegistry()
+    supervisor = MagicMock()
+    supervisor.all_nodes.return_value = [
+        NodeInfo(
+            node_id="heimdall",
+            agent_url="http://localhost:7701",
+            gpus=[GpuInfo(gpu_id=0, name="RTX 4000",
+                          vram_total_mb=8192, vram_used_mb=0, vram_free_mb=8192)],
+            last_heartbeat=0.0,
+        )
+    ]
+    supervisor.get_node_info.return_value = NodeInfo(
+        node_id="heimdall",
+        agent_url="http://localhost:7701",
+        gpus=[],
+        last_heartbeat=0.0,
+    )
+    app = create_coordinator_app(
+        lease_manager=lease_manager,
+        profile_registry=profile_registry,
+        agent_supervisor=supervisor,
+    )
+    return TestClient(app), lease_manager
+
+
+def test_health_returns_ok(coordinator_client):
+    client, _ = coordinator_client
+    resp = client.get("/api/health")
+    assert resp.status_code == 200
+    assert resp.json()["status"] == "ok"
+
+
+def test_get_nodes_returns_list(coordinator_client):
+    client, _ = coordinator_client
+    resp = client.get("/api/nodes")
+    assert resp.status_code == 200
+    nodes = resp.json()["nodes"]
+    assert len(nodes) == 1
+    assert nodes[0]["node_id"] == "heimdall"
+
+
+def test_get_profiles_returns_public_profiles(coordinator_client):
+    client, _ = coordinator_client
+    resp = client.get("/api/profiles")
+    assert resp.status_code == 200
+    names = [p["name"] for p in resp.json()["profiles"]]
+    assert "single-gpu-8gb" in names
+
+
+def test_post_lease_grants_lease(coordinator_client):
+    client, _ = coordinator_client
+    resp = client.post("/api/leases", json={
+        "node_id": "heimdall", "gpu_id": 0,
+        "mb": 2048, "service": "peregrine", "priority": 1,
+    })
+    assert resp.status_code == 200
+    data = resp.json()
+    assert data["lease"]["mb_granted"] == 2048
+    assert data["lease"]["holder_service"] == "peregrine"
+    assert "lease_id" in data["lease"]
+
+
+def test_delete_lease_releases_it(coordinator_client):
+    client, _ = coordinator_client
+    resp = client.post("/api/leases", json={
+        "node_id": "heimdall", "gpu_id": 0,
+        "mb": 2048, "service": "peregrine", "priority": 1,
+    })
+    lease_id = resp.json()["lease"]["lease_id"]
+    del_resp = client.delete(f"/api/leases/{lease_id}")
+    assert del_resp.status_code == 200
+    assert del_resp.json()["released"] is True
+
+
+def test_delete_unknown_lease_returns_404(coordinator_client):
+    client, _ = coordinator_client
+    resp = client.delete("/api/leases/nonexistent-id")
+    assert resp.status_code == 404
+
+
+def test_get_leases_returns_active_leases(coordinator_client):
+    client, _ = coordinator_client
+    client.post("/api/leases", json={
+        "node_id": "heimdall", "gpu_id": 0,
+        "mb": 1024, "service": "kiwi", "priority": 2,
+    })
+    resp = client.get("/api/leases")
+    assert resp.status_code == 200
+    assert len(resp.json()["leases"]) == 1
--- a/tests/test_resources/test_eviction_engine.py
+++ b/tests/test_resources/test_eviction_engine.py
@ -0,0 +1,67 @@
+import asyncio
+import pytest
+from unittest.mock import AsyncMock, patch
+from circuitforge_core.resources.coordinator.eviction_engine import EvictionEngine
+from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
+
+
+@pytest.fixture
+def lease_manager():
+    mgr = LeaseManager()
+    mgr.register_gpu("heimdall", 0, 8192)
+    return mgr
+
+
+@pytest.fixture
+def engine(lease_manager):
+    return EvictionEngine(lease_manager=lease_manager, eviction_timeout_s=0.1)
+
+
+@pytest.mark.asyncio
+async def test_request_lease_grants_when_vram_available(engine, lease_manager):
+    lease = await engine.request_lease(
+        node_id="heimdall", gpu_id=0, mb=4096,
+        service="peregrine", priority=1,
+        agent_url="http://localhost:7701",
+    )
+    assert lease is not None
+    assert lease.mb_granted == 4096
+
+
+@pytest.mark.asyncio
+async def test_request_lease_evicts_and_grants(engine, lease_manager):
+    # Pre-fill with a low-priority lease
+    big_lease = await lease_manager.try_grant(
+        "heimdall", 0, 7000, "comfyui", priority=4
+    )
+    assert big_lease is not None
+
+    # Mock the agent eviction call
+    with patch(
+        "circuitforge_core.resources.coordinator.eviction_engine.EvictionEngine._call_agent_evict",
+        new_callable=AsyncMock,
+    ) as mock_evict:
+        mock_evict.return_value = True
+        # Simulate the comfyui lease being released (as if the agent evicted it)
+        asyncio.get_event_loop().call_later(
+            0.05, lambda: asyncio.ensure_future(lease_manager.release(big_lease.lease_id))
+        )
+        lease = await engine.request_lease(
+            node_id="heimdall", gpu_id=0, mb=4096,
+            service="peregrine", priority=1,
+            agent_url="http://localhost:7701",
+        )
+    assert lease is not None
+    assert lease.holder_service == "peregrine"
+
+
+@pytest.mark.asyncio
+async def test_request_lease_returns_none_when_no_eviction_candidates(engine):
+    await engine.lease_manager.try_grant("heimdall", 0, 6000, "vllm", priority=1)
+    # Requesting 4GB but no lower-priority leases exist
+    lease = await engine.request_lease(
+        node_id="heimdall", gpu_id=0, mb=4096,
+        service="kiwi", priority=2,
+        agent_url="http://localhost:7701",
+    )
+    assert lease is None
--- a/tests/test_resources/test_eviction_executor.py
+++ b/tests/test_resources/test_eviction_executor.py
@ -0,0 +1,43 @@
+import signal
+from unittest.mock import patch, call
+import pytest
+from circuitforge_core.resources.agent.eviction_executor import EvictionExecutor, EvictionResult
+
+
+def test_evict_by_pid_sends_sigterm_then_sigkill():
+    executor = EvictionExecutor(grace_period_s=0.01)
+    # pid_exists always True → grace period expires → SIGKILL fires
+    with patch("os.kill") as mock_kill, \
+         patch("circuitforge_core.resources.agent.eviction_executor.psutil") as mock_psutil:
+        mock_psutil.pid_exists.return_value = True
+        result = executor.evict_pid(pid=1234, grace_period_s=0.01)
+
+    assert result.success is True
+    calls = mock_kill.call_args_list
+    assert call(1234, signal.SIGTERM) in calls
+    assert call(1234, signal.SIGKILL) in calls
+
+
+def test_evict_pid_succeeds_on_sigterm_alone():
+    executor = EvictionExecutor(grace_period_s=0.1)
+    with patch("os.kill"), \
+         patch("circuitforge_core.resources.agent.eviction_executor.psutil") as mock_psutil:
+        mock_psutil.pid_exists.side_effect = [True, False]  # gone after SIGTERM
+        result = executor.evict_pid(pid=5678, grace_period_s=0.01)
+    assert result.success is True
+    assert result.method == "sigterm"
+
+
+def test_evict_pid_not_found_returns_failure():
+    executor = EvictionExecutor()
+    with patch("circuitforge_core.resources.agent.eviction_executor.psutil") as mock_psutil:
+        mock_psutil.pid_exists.return_value = False
+        result = executor.evict_pid(pid=9999)
+    assert result.success is False
+    assert "not found" in result.message.lower()
+
+
+def test_eviction_result_is_immutable():
+    result = EvictionResult(success=True, method="sigterm", message="ok")
+    with pytest.raises((AttributeError, TypeError)):
+        result.success = False  # type: ignore
--- a/tests/test_resources/test_gpu_monitor.py
+++ b/tests/test_resources/test_gpu_monitor.py
@ -0,0 +1,60 @@
+from unittest.mock import patch
+from circuitforge_core.resources.agent.gpu_monitor import GpuMonitor
+
+
+SAMPLE_NVIDIA_SMI_OUTPUT = (
+    "0, Quadro RTX 4000, 8192, 6843, 1349\n"
+    "1, Quadro RTX 4000, 8192, 721, 7471\n"
+)
+
+
+def test_parse_returns_list_of_gpu_info():
+    monitor = GpuMonitor()
+    with patch("circuitforge_core.resources.agent.gpu_monitor.subprocess.run") as mock_run:
+        mock_run.return_value.returncode = 0
+        mock_run.return_value.stdout = SAMPLE_NVIDIA_SMI_OUTPUT
+        gpus = monitor.poll()
+    assert len(gpus) == 2
+    assert gpus[0].gpu_id == 0
+    assert gpus[0].name == "Quadro RTX 4000"
+    assert gpus[0].vram_total_mb == 8192
+    assert gpus[0].vram_used_mb == 6843
+    assert gpus[0].vram_free_mb == 1349
+
+
+def test_parse_second_gpu():
+    monitor = GpuMonitor()
+    with patch("circuitforge_core.resources.agent.gpu_monitor.subprocess.run") as mock_run:
+        mock_run.return_value.returncode = 0
+        mock_run.return_value.stdout = SAMPLE_NVIDIA_SMI_OUTPUT
+        gpus = monitor.poll()
+    assert gpus[1].gpu_id == 1
+    assert gpus[1].vram_used_mb == 721
+    assert gpus[1].vram_free_mb == 7471
+
+
+def test_poll_returns_empty_list_when_nvidia_smi_unavailable():
+    monitor = GpuMonitor()
+    with patch("circuitforge_core.resources.agent.gpu_monitor.subprocess.run", side_effect=FileNotFoundError):
+        gpus = monitor.poll()
+    assert gpus == []
+
+
+def test_poll_returns_empty_list_on_nonzero_exit():
+    monitor = GpuMonitor()
+    with patch("circuitforge_core.resources.agent.gpu_monitor.subprocess.run") as mock_run:
+        mock_run.return_value.returncode = 1
+        mock_run.return_value.stdout = ""
+        gpus = monitor.poll()
+    assert gpus == []
+
+
+def test_poll_skips_malformed_lines():
+    monitor = GpuMonitor()
+    malformed = "0, RTX 4000, 8192, not_a_number, 1024\n1, RTX 4000, 8192, 512, 7680\n"
+    with patch("circuitforge_core.resources.agent.gpu_monitor.subprocess.run") as mock_run:
+        mock_run.return_value.returncode = 0
+        mock_run.return_value.stdout = malformed
+        gpus = monitor.poll()
+    assert len(gpus) == 1
+    assert gpus[0].gpu_id == 1
--- a/tests/test_resources/test_integration.py
+++ b/tests/test_resources/test_integration.py
@ -0,0 +1,219 @@
+"""Integration test: full lease → eviction → re-grant cycle.
+
+Runs coordinator in-process (no subprocesses, no real nvidia-smi).
+Uses TestClient for HTTP, mocks AgentSupervisor to return fixed node state.
+"""
+import pytest
+from unittest.mock import MagicMock
+from fastapi.testclient import TestClient
+
+from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
+from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
+from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor
+from circuitforge_core.resources.coordinator.app import create_coordinator_app
+from circuitforge_core.resources.models import GpuInfo, NodeInfo
+
+
+@pytest.fixture
+def system():
+    """Create an in-process coordinator system with 8GB GPU and mock supervisor."""
+    lease_manager = LeaseManager()
+    lease_manager.register_gpu("local", 0, 8192)
+
+    mock_supervisor = MagicMock(spec=AgentSupervisor)
+    mock_supervisor.all_nodes.return_value = [
+        NodeInfo(
+            node_id="local",
+            agent_url="http://localhost:7701",
+            gpus=[GpuInfo(
+                gpu_id=0,
+                name="RTX 4000",
+                vram_total_mb=8192,
+                vram_used_mb=0,
+                vram_free_mb=8192,
+            )],
+            last_heartbeat=0.0,
+        )
+    ]
+    mock_supervisor.get_node_info.return_value = NodeInfo(
+        node_id="local",
+        agent_url="http://localhost:7701",
+        gpus=[],
+        last_heartbeat=0.0,
+    )
+
+    profile_registry = ProfileRegistry()
+    app = create_coordinator_app(
+        lease_manager=lease_manager,
+        profile_registry=profile_registry,
+        agent_supervisor=mock_supervisor,
+    )
+    client = TestClient(app)
+    return client, lease_manager
+
+
+def test_full_lease_cycle(system):
+    """Test: grant, verify, release, verify gone."""
+    client, _ = system
+
+    # Grant a lease
+    resp = client.post("/api/leases", json={
+        "node_id": "local",
+        "gpu_id": 0,
+        "mb": 4096,
+        "service": "peregrine",
+        "priority": 1,
+    })
+    assert resp.status_code == 200
+    lease_data = resp.json()["lease"]
+    lease_id = lease_data["lease_id"]
+    assert lease_data["mb_granted"] == 4096
+    assert lease_data["holder_service"] == "peregrine"
+
+    # Verify it appears in active leases
+    resp = client.get("/api/leases")
+    assert resp.status_code == 200
+    leases = resp.json()["leases"]
+    assert any(l["lease_id"] == lease_id for l in leases)
+
+    # Release it
+    resp = client.delete(f"/api/leases/{lease_id}")
+    assert resp.status_code == 200
+    assert resp.json()["released"] is True
+
+    # Verify it's gone
+    resp = client.get("/api/leases")
+    assert resp.status_code == 200
+    leases = resp.json()["leases"]
+    assert not any(l["lease_id"] == lease_id for l in leases)
+
+
+def test_vram_exhaustion_returns_503(system):
+    """Test: fill GPU, then request with no eviction candidates returns 503."""
+    client, _ = system
+
+    # Fill GPU 0 with high-priority lease
+    resp = client.post("/api/leases", json={
+        "node_id": "local",
+        "gpu_id": 0,
+        "mb": 8000,
+        "service": "vllm",
+        "priority": 1,
+    })
+    assert resp.status_code == 200
+
+    # Try to get more VRAM with same priority (no eviction candidates)
+    resp = client.post("/api/leases", json={
+        "node_id": "local",
+        "gpu_id": 0,
+        "mb": 2000,
+        "service": "kiwi",
+        "priority": 1,
+    })
+    assert resp.status_code == 503
+    assert "Insufficient VRAM" in resp.json()["detail"]
+
+
+def test_auto_detect_profile_for_8gb():
+    """Test: ProfileRegistry auto-detects single-gpu-8gb for 8GB GPU."""
+    registry = ProfileRegistry()
+    gpu = GpuInfo(
+        gpu_id=0,
+        name="RTX 4000",
+        vram_total_mb=8192,
+        vram_used_mb=0,
+        vram_free_mb=8192,
+    )
+    profile = registry.auto_detect([gpu])
+    assert profile.name == "single-gpu-8gb"
+    # Verify profile has services configured
+    assert hasattr(profile, "services")
+
+
+def test_node_endpoint_shows_nodes(system):
+    """Test: GET /api/nodes returns the mocked node."""
+    client, _ = system
+    resp = client.get("/api/nodes")
+    assert resp.status_code == 200
+    nodes = resp.json()["nodes"]
+    assert len(nodes) == 1
+    assert nodes[0]["node_id"] == "local"
+    assert nodes[0]["agent_url"] == "http://localhost:7701"
+    assert len(nodes[0]["gpus"]) == 1
+    assert nodes[0]["gpus"][0]["name"] == "RTX 4000"
+
+
+def test_profiles_endpoint_returns_public_profiles(system):
+    """Test: GET /api/profiles returns standard public profiles."""
+    client, _ = system
+    resp = client.get("/api/profiles")
+    assert resp.status_code == 200
+    profiles = resp.json()["profiles"]
+    names = [p["name"] for p in profiles]
+    # Verify common public profiles are present
+    assert "single-gpu-8gb" in names
+    assert "single-gpu-6gb" in names
+    assert "single-gpu-2gb" in names
+
+
+def test_multiple_leases_tracked_independently(system):
+    """Test: multiple active leases are tracked correctly."""
+    client, _ = system
+
+    # Grant lease 1
+    resp1 = client.post("/api/leases", json={
+        "node_id": "local",
+        "gpu_id": 0,
+        "mb": 2048,
+        "service": "peregrine",
+        "priority": 2,
+    })
+    assert resp1.status_code == 200
+    lease1_id = resp1.json()["lease"]["lease_id"]
+
+    # Grant lease 2
+    resp2 = client.post("/api/leases", json={
+        "node_id": "local",
+        "gpu_id": 0,
+        "mb": 2048,
+        "service": "kiwi",
+        "priority": 2,
+    })
+    assert resp2.status_code == 200
+    lease2_id = resp2.json()["lease"]["lease_id"]
+
+    # Both should be in active leases
+    resp = client.get("/api/leases")
+    leases = resp.json()["leases"]
+    lease_ids = [l["lease_id"] for l in leases]
+    assert lease1_id in lease_ids
+    assert lease2_id in lease_ids
+    assert len(leases) == 2
+
+    # Release lease 1
+    resp = client.delete(f"/api/leases/{lease1_id}")
+    assert resp.status_code == 200
+
+    # Only lease 2 should remain
+    resp = client.get("/api/leases")
+    leases = resp.json()["leases"]
+    lease_ids = [l["lease_id"] for l in leases]
+    assert lease1_id not in lease_ids
+    assert lease2_id in lease_ids
+    assert len(leases) == 1
+
+
+def test_delete_nonexistent_lease_returns_404(system):
+    """Test: deleting a nonexistent lease returns 404."""
+    client, _ = system
+    resp = client.delete("/api/leases/nonexistent-lease-id")
+    assert resp.status_code == 404
+    assert "not found" in resp.json()["detail"]
+
+
+def test_health_endpoint_returns_ok(system):
+    """Test: GET /api/health returns status ok."""
+    client, _ = system
+    resp = client.get("/api/health")
+    assert resp.status_code == 200
+    assert resp.json()["status"] == "ok"
--- a/tests/test_resources/test_lease_manager.py
+++ b/tests/test_resources/test_lease_manager.py
@ -0,0 +1,85 @@
+import pytest
+from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
+
+
+@pytest.fixture
+def mgr():
+    m = LeaseManager()
+    m.register_gpu(node_id="heimdall", gpu_id=0, total_mb=8192)
+    return m
+
+
+@pytest.mark.asyncio
+async def test_grant_succeeds_when_vram_available(mgr):
+    lease = await mgr.try_grant(
+        node_id="heimdall", gpu_id=0, mb=4096,
+        service="peregrine", priority=1
+    )
+    assert lease is not None
+    assert lease.mb_granted == 4096
+    assert lease.node_id == "heimdall"
+    assert lease.gpu_id == 0
+
+
+@pytest.mark.asyncio
+async def test_grant_fails_when_vram_insufficient(mgr):
+    await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=7000,
+                         service="vllm", priority=1)
+    lease = await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=2000,
+                                 service="kiwi", priority=2)
+    assert lease is None
+
+
+@pytest.mark.asyncio
+async def test_release_frees_vram(mgr):
+    lease = await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=7000,
+                                 service="vllm", priority=1)
+    assert lease is not None
+    released = await mgr.release(lease.lease_id)
+    assert released is True
+    lease2 = await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=7000,
+                                   service="comfyui", priority=4)
+    assert lease2 is not None
+
+
+@pytest.mark.asyncio
+async def test_release_unknown_lease_returns_false(mgr):
+    result = await mgr.release("nonexistent-id")
+    assert result is False
+
+
+@pytest.mark.asyncio
+async def test_get_eviction_candidates_returns_lower_priority_leases(mgr):
+    await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=3000,
+                         service="comfyui", priority=4)
+    await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=2000,
+                         service="ollama", priority=1)
+    candidates = mgr.get_eviction_candidates(
+        node_id="heimdall", gpu_id=0,
+        needed_mb=3000, requester_priority=2
+    )
+    assert len(candidates) == 1
+    assert candidates[0].holder_service == "comfyui"
+
+
+@pytest.mark.asyncio
+async def test_list_leases_for_gpu(mgr):
+    await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=1024,
+                         service="peregrine", priority=1)
+    await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=512,
+                         service="kiwi", priority=2)
+    leases = mgr.list_leases(node_id="heimdall", gpu_id=0)
+    assert len(leases) == 2
+
+
+def test_register_gpu_sets_total(mgr):
+    assert mgr.gpu_total_mb("heimdall", 0) == 8192
+
+
+@pytest.mark.asyncio
+async def test_used_mb_tracks_grants():
+    mgr = LeaseManager()
+    mgr.register_gpu("heimdall", 0, 8192)
+    await mgr.try_grant("heimdall", 0, 3000, "a", 1)
+    await mgr.try_grant("heimdall", 0, 2000, "b", 2)
+    assert mgr.used_mb("heimdall", 0) == 5000
--- a/tests/test_resources/test_models.py
+++ b/tests/test_resources/test_models.py
@ -0,0 +1,47 @@
+import time
+import pytest
+from circuitforge_core.resources.models import VRAMLease, GpuInfo, NodeInfo
+
+
+def test_vram_lease_create_assigns_unique_ids():
+    lease_a = VRAMLease.create(gpu_id=0, node_id="heimdall", mb=4096,
+                                service="peregrine", priority=1)
+    lease_b = VRAMLease.create(gpu_id=0, node_id="heimdall", mb=4096,
+                                service="peregrine", priority=1)
+    assert lease_a.lease_id != lease_b.lease_id
+
+
+def test_vram_lease_create_with_ttl_sets_expiry():
+    before = time.time()
+    lease = VRAMLease.create(gpu_id=0, node_id="heimdall", mb=2048,
+                              service="kiwi", priority=2, ttl_s=60.0)
+    after = time.time()
+    assert before + 60.0 <= lease.expires_at <= after + 60.0
+
+
+def test_vram_lease_create_no_ttl_has_zero_expiry():
+    lease = VRAMLease.create(gpu_id=0, node_id="heimdall", mb=1024,
+                              service="snipe", priority=2)
+    assert lease.expires_at == 0.0
+
+
+def test_vram_lease_is_immutable():
+    lease = VRAMLease.create(gpu_id=0, node_id="heimdall", mb=1024,
+                              service="snipe", priority=2)
+    with pytest.raises((AttributeError, TypeError)):
+        lease.mb_granted = 999  # type: ignore
+
+
+def test_gpu_info_fields():
+    info = GpuInfo(gpu_id=0, name="RTX 4000", vram_total_mb=8192,
+                   vram_used_mb=2048, vram_free_mb=6144)
+    assert info.vram_free_mb == 6144
+
+
+def test_node_info_fields():
+    gpu = GpuInfo(gpu_id=0, name="RTX 4000", vram_total_mb=8192,
+                  vram_used_mb=0, vram_free_mb=8192)
+    node = NodeInfo(node_id="heimdall", agent_url="http://localhost:7701",
+                    gpus=[gpu], last_heartbeat=time.time())
+    assert node.node_id == "heimdall"
+    assert len(node.gpus) == 1
--- a/tests/test_resources/test_profile_registry.py
+++ b/tests/test_resources/test_profile_registry.py
@ -0,0 +1,101 @@
+# tests/test_resources/test_profile_registry.py
+import pytest
+from unittest.mock import MagicMock
+
+from circuitforge_core.resources.profiles.schema import (
+    GpuProfile, ServiceProfile, load_profile
+)
+from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
+
+
+def test_load_8gb_profile(tmp_path):
+    yaml_content = """
+schema_version: 1
+name: single-gpu-8gb
+vram_total_mb: 8192
+eviction_timeout_s: 10.0
+services:
+  vllm:
+    max_mb: 5120
+    priority: 1
+  cf-vision:
+    max_mb: 2048
+    priority: 2
+    shared: true
+    max_concurrent: 3
+"""
+    profile_file = tmp_path / "test.yaml"
+    profile_file.write_text(yaml_content)
+    profile = load_profile(profile_file)
+
+    assert profile.name == "single-gpu-8gb"
+    assert profile.schema_version == 1
+    assert profile.vram_total_mb == 8192
+    assert profile.eviction_timeout_s == 10.0
+    assert "vllm" in profile.services
+    assert profile.services["vllm"].max_mb == 5120
+    assert profile.services["vllm"].priority == 1
+    assert profile.services["cf-vision"].shared is True
+    assert profile.services["cf-vision"].max_concurrent == 3
+
+
+def test_load_profile_rejects_wrong_schema_version(tmp_path):
+    yaml_content = "schema_version: 99\nname: future\n"
+    profile_file = tmp_path / "future.yaml"
+    profile_file.write_text(yaml_content)
+    with pytest.raises(ValueError, match="schema_version"):
+        load_profile(profile_file)
+
+
+def test_service_profile_defaults():
+    svc = ServiceProfile(max_mb=1024, priority=2)
+    assert svc.shared is False
+    assert svc.max_concurrent == 1
+    assert svc.always_on is False
+    assert svc.backend is None
+    assert svc.consumers == []
+
+
+def test_profile_registry_loads_public_profiles():
+    registry = ProfileRegistry()
+    profiles = registry.list_public()
+    names = [p.name for p in profiles]
+    assert "single-gpu-8gb" in names
+    assert "single-gpu-6gb" in names
+    assert "single-gpu-2gb" in names
+
+
+def test_profile_registry_auto_detect_selects_8gb():
+    registry = ProfileRegistry()
+    mock_gpus = [
+        MagicMock(vram_total_mb=8192),
+    ]
+    profile = registry.auto_detect(mock_gpus)
+    assert profile.name == "single-gpu-8gb"
+
+
+def test_profile_registry_auto_detect_selects_6gb():
+    registry = ProfileRegistry()
+    mock_gpus = [MagicMock(vram_total_mb=6144)]
+    profile = registry.auto_detect(mock_gpus)
+    assert profile.name == "single-gpu-6gb"
+
+
+def test_profile_registry_auto_detect_selects_2gb():
+    registry = ProfileRegistry()
+    mock_gpus = [MagicMock(vram_total_mb=2048)]
+    profile = registry.auto_detect(mock_gpus)
+    assert profile.name == "single-gpu-2gb"
+
+
+def test_profile_registry_load_from_path(tmp_path):
+    yaml_content = (
+        "schema_version: 1\nname: custom\n"
+        "vram_total_mb: 12288\neviction_timeout_s: 5.0\n"
+    )
+    p = tmp_path / "custom.yaml"
+    p.write_text(yaml_content)
+    registry = ProfileRegistry()
+    profile = registry.load(p)
+    assert profile.name == "custom"
+    assert profile.vram_total_mb == 12288