feat!: strip resources/ from MIT core — moves to circuitforge-orch (v0.8.0)

BREAKING CHANGE: circuitforge_core.resources is no longer available. Import CFOrchClient from circuitforge_orch.client instead. cf-orch CLI entry point is now in the circuitforge-orch package.
2026-04-04 22:34:27 -07:00 · 2026-04-04 22:34:27 -07:00 · c244260d1c
commit c244260d1c
parent 2259382d0b
63 changed files with 34 additions and 6571 deletions
--- a/README.md
+++ b/README.md
@ -2,15 +2,29 @@
 Shared scaffold for CircuitForge products.
 **Current version: 0.7.0**
 ## Modules
 ### Implemented
 - `circuitforge_core.db` — SQLite connection factory and migration runner
- `circuitforge_core.llm` — LLM router with fallback chain
+- `circuitforge_core.llm` — LLM router with fallback chain (Ollama, vLLM, Anthropic, OpenAI-compatible)
 - `circuitforge_core.tiers` — Tier system with BYOK and local vision unlocks
 - `circuitforge_core.config` — Env validation and .env loader
- `circuitforge_core.vision` — Vision router stub (v0.2+)
+- `circuitforge_core.hardware` — Hardware detection and LLM backend profile generation (VRAM tiers, GPU/CPU auto-select)
- `circuitforge_core.wizard` — First-run wizard base class stub
+- `circuitforge_core.documents` — Document ingestion pipeline: PDF, DOCX, and image OCR → `StructuredDocument`
- `circuitforge_core.pipeline` — Staging queue stub (v0.2+)
+- `circuitforge_core.affiliates` — Affiliate URL wrapping with opt-out, BYOK user IDs, and CF env-var fallback (`wrap_url`)
 - `circuitforge_core.preferences` — User preference store (local YAML file, pluggable backend); dot-path get/set API
 - `circuitforge_core.tasks` — VRAM-aware LLM task scheduler; shared slot manager across services (`TaskScheduler`)
 - `circuitforge_core.manage` — Cross-platform product process manager (Docker and native modes)
 - `circuitforge_core.resources` — Resource coordinator and agent: VRAM allocation, eviction engine, GPU profile registry
 ### Stubs (in-tree, not yet implemented)
 - `circuitforge_core.vision` — Vision router base class (planned: moondream2 / Claude vision dispatch)
 - `circuitforge_core.wizard` — First-run wizard base class (products subclass `BaseWizard`)
 - `circuitforge_core.pipeline` — Staging queue base (`StagingDB`; products provide concrete schema)
 ## Install
--- a/circuitforge_core/init.py
+++ b/circuitforge_core/init.py
@ -1 +1 @@
-__version__ = "0.7.0"
+__version__ = "0.8.0"
--- a/circuitforge_core/affiliates/programs.py
+++ b/circuitforge_core/affiliates/programs.py
@ -56,6 +56,12 @@ def _build_ebay_url(url: str, affiliate_id: str) -> str:
    return f"{url}{sep}{params}"
 def _build_instacart_url(url: str, affiliate_id: str) -> str:
    """Append Instacart affiliate parameter to a search URL."""
    sep = "&" if "?" in url else "?"
    return f"{url}{sep}aff={affiliate_id}"
 def _build_amazon_url(url: str, affiliate_id: str) -> str:
    """Merge an Amazon Associates tag into a product URL's query string."""
    parsed = urlparse(url)
@ -101,3 +107,10 @@ register_program(AffiliateProgram(
    env_var="AMAZON_ASSOCIATES_TAG",
    build_url=_build_amazon_url,
 ))
 register_program(AffiliateProgram(
    name="Instacart",
    retailer_key="instacart",
    env_var="INSTACART_AFFILIATE_ID",
    build_url=_build_instacart_url,
 ))
--- a/circuitforge_core/resources/init.py
+++ b/circuitforge_core/resources/init.py
@ -1 +0,0 @@
 from circuitforge_core.resources.client import CFOrchClient, Allocation  # noqa: F401
--- a/circuitforge_core/resources/agent/init.py
+++ b/circuitforge_core/resources/agent/init.py
--- a/circuitforge_core/resources/agent/app.py
+++ b/circuitforge_core/resources/agent/app.py
@ -1,105 +0,0 @@
 from __future__ import annotations
 import logging
 from typing import Any
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from circuitforge_core.resources.agent.eviction_executor import EvictionExecutor
 from circuitforge_core.resources.agent.gpu_monitor import GpuMonitor
 from circuitforge_core.resources.agent.service_manager import ServiceManager
 logger = logging.getLogger(__name__)
 class EvictRequest(BaseModel):
    pid: int
    grace_period_s: float = 5.0
 class ServiceStartRequest(BaseModel):
    gpu_id: int = 0
    params: dict[str, str] = {}
 def create_agent_app(
    node_id: str,
    monitor: GpuMonitor | None = None,
    executor: EvictionExecutor | None = None,
    service_manager: ServiceManager | None = None,
 ) -> FastAPI:
    _monitor = monitor or GpuMonitor()
    _executor = executor or EvictionExecutor()
    app = FastAPI(title=f"cf-orch-agent [{node_id}]")
    @app.get("/health")
    def health() -> dict[str, Any]:
        return {"status": "ok", "node_id": node_id}
    @app.get("/gpu-info")
    def gpu_info() -> dict[str, Any]:
        gpus = _monitor.poll()
        return {
            "node_id": node_id,
            "gpus": [
                {
                    "gpu_id": g.gpu_id,
                    "name": g.name,
                    "vram_total_mb": g.vram_total_mb,
                    "vram_used_mb": g.vram_used_mb,
                    "vram_free_mb": g.vram_free_mb,
                }
                for g in gpus
            ],
        }
    @app.post("/evict")
    def evict(req: EvictRequest) -> dict[str, Any]:
        result = _executor.evict_pid(pid=req.pid, grace_period_s=req.grace_period_s)
        return {
            "success": result.success,
            "method": result.method,
            "message": result.message,
        }
    @app.get("/resident-info")
    def resident_info() -> dict[str, Any]:
        """Return which models are currently loaded in each running managed service."""
        if service_manager is None:
            return {"residents": []}
        from circuitforge_core.resources.agent.service_probe import probe_all
        return {"residents": probe_all(service_manager)}
    if service_manager is not None:
        @app.get("/services")
        def list_services() -> dict:
            return {"running": service_manager.list_running()}
        @app.get("/services/{service}")
        def service_status(service: str) -> dict:
            running = service_manager.is_running(service)
            url = service_manager.get_url(service) if running else None
            return {"service": service, "running": running, "url": url}
        @app.post("/services/{service}/start")
        def start_service(service: str, req: ServiceStartRequest) -> dict:
            try:
                already_running = service_manager.is_running(service)
                url = service_manager.start(service, req.gpu_id, req.params)
                # adopted=True signals the coordinator to treat this instance as
                # immediately running rather than waiting for the probe loop.
                adopted = already_running and service_manager.is_running(service)
                return {"service": service, "url": url, "running": True, "adopted": adopted}
            except (ValueError, NotImplementedError) as exc:
                raise HTTPException(status_code=422, detail=str(exc))
            except Exception as exc:
                raise HTTPException(status_code=500, detail=f"Failed to start {service}: {exc}")
        @app.post("/services/{service}/stop")
        def stop_service(service: str) -> dict:
            stopped = service_manager.stop(service)
            return {"service": service, "stopped": stopped}
    return app
--- a/circuitforge_core/resources/agent/eviction_executor.py
+++ b/circuitforge_core/resources/agent/eviction_executor.py
@ -1,85 +0,0 @@
 from __future__ import annotations
 import logging
 import os
 import signal
 import time
 from dataclasses import dataclass
 import psutil
 logger = logging.getLogger(__name__)
 _DEFAULT_GRACE_S = 5.0
@dataclass(frozen=True)
 class EvictionResult:
    success: bool
    method: str   # "sigterm", "sigkill", "already_gone", "not_found", "error"
    message: str
 class EvictionExecutor:
    def __init__(self, grace_period_s: float = _DEFAULT_GRACE_S) -> None:
        self._default_grace = grace_period_s
    def evict_pid(
        self,
        pid: int,
        grace_period_s: float | None = None,
    ) -> EvictionResult:
        grace = grace_period_s if grace_period_s is not None else self._default_grace
        if pid <= 0:
            return EvictionResult(
                success=False, method="error",
                message=f"Refusing to signal invalid PID {pid}"
            )
        if not psutil.pid_exists(pid):
            return EvictionResult(
                success=False, method="not_found",
                message=f"PID {pid} not found"
            )
        try:
            os.kill(pid, signal.SIGTERM)
        except ProcessLookupError:
            return EvictionResult(
                success=True, method="already_gone",
                message=f"PID {pid} vanished before SIGTERM"
            )
        except PermissionError as exc:
            return EvictionResult(
                success=False, method="error",
                message=f"Permission denied terminating PID {pid}: {exc}"
            )
        # Wait for grace period
        deadline = time.monotonic() + grace
        while time.monotonic() < deadline:
            if not psutil.pid_exists(pid):
                logger.info("PID %d exited cleanly after SIGTERM", pid)
                return EvictionResult(
                    success=True, method="sigterm",
                    message=f"PID {pid} exited after SIGTERM"
                )
            time.sleep(0.05)
        # Escalate to SIGKILL
        if psutil.pid_exists(pid):
            try:
                os.kill(pid, signal.SIGKILL)
                logger.warning("PID %d required SIGKILL", pid)
                return EvictionResult(
                    success=True, method="sigkill",
                    message=f"PID {pid} killed with SIGKILL"
                )
            except ProcessLookupError:
                pass
        return EvictionResult(
            success=True, method="sigkill",
            message=f"PID {pid} is gone"
        )
--- a/circuitforge_core/resources/agent/gpu_monitor.py
+++ b/circuitforge_core/resources/agent/gpu_monitor.py
@ -1,52 +0,0 @@
 from __future__ import annotations
 import logging
 import subprocess
 from circuitforge_core.resources.models import GpuInfo
 logger = logging.getLogger(__name__)
 _NVIDIA_SMI_CMD = [
    "nvidia-smi",
    "--query-gpu=index,name,memory.total,memory.used,memory.free",
    "--format=csv,noheader,nounits",
 ]
 class GpuMonitor:
    def poll(self) -> list[GpuInfo]:
        try:
            result = subprocess.run(
                _NVIDIA_SMI_CMD,
                capture_output=True,
                text=True,
                timeout=5,
            )
        except (FileNotFoundError, subprocess.TimeoutExpired) as exc:
            logger.warning("nvidia-smi unavailable: %s", exc)
            return []
        if result.returncode != 0:
            logger.warning("nvidia-smi exited %d", result.returncode)
            return []
        return self._parse(result.stdout)
    def _parse(self, output: str) -> list[GpuInfo]:
        gpus: list[GpuInfo] = []
        for line in output.strip().splitlines():
            parts = [p.strip() for p in line.split(",")]
            if len(parts) != 5:
                continue
            try:
                gpus.append(GpuInfo(
                    gpu_id=int(parts[0]),
                    name=parts[1],
                    vram_total_mb=int(parts[2]),
                    vram_used_mb=int(parts[3]),
                    vram_free_mb=int(parts[4]),
                ))
            except ValueError:
                logger.debug("Skipping malformed nvidia-smi line: %r", line)
        return gpus
--- a/circuitforge_core/resources/agent/service_manager.py
+++ b/circuitforge_core/resources/agent/service_manager.py
@ -1,186 +0,0 @@
 """
 ServiceManager — start/stop Docker containers and processes for cf-orch managed services.
 Container naming convention: cf-orch-{service}-{node_id}
 """
 from __future__ import annotations
 import os
 import re
 import subprocess
 from collections import defaultdict
 from typing import Any
 from circuitforge_core.resources.profiles.schema import DockerSpec, GpuProfile, ProcessSpec
 def _expand_volume(v: str) -> str:
    """Expand bash-style volume strings including ${VAR:-default} and $VAR."""
    def _sub(m: re.Match) -> str:  # type: ignore[type-arg]
        var, default = m.group(1), m.group(2) or ""
        return os.environ.get(var) or default
    v = re.sub(r"\$\{(\w+)(?::-(.*?))?\}", _sub, v)
    v = re.sub(r"\$(\w+)", lambda m: os.environ.get(m.group(1), m.group(0)), v)
    return v
 class ServiceManager:
    def __init__(
        self,
        node_id: str,
        profile: GpuProfile,
        advertise_host: str = "127.0.0.1",
    ) -> None:
        self.node_id = node_id
        self.profile = profile
        self.advertise_host = advertise_host
        self._procs: dict[str, Any] = {}
    def container_name(self, service: str) -> str:
        return f"cf-orch-{service}-{self.node_id}"
    def _get_spec(self, service: str) -> DockerSpec | ProcessSpec | None:
        svc = self.profile.services.get(service)
        if svc is None:
            return None
        return svc.managed
    def is_running(self, service: str) -> bool:
        spec = self._get_spec(service)
        if spec is None:
            return False
        if isinstance(spec, DockerSpec):
            try:
                result = subprocess.run(
                    [
                        "docker",
                        "inspect",
                        "--format",
                        "{{.State.Running}}",
                        self.container_name(service),
                    ],
                    capture_output=True,
                    text=True,
                    check=True,
                )
                return result.stdout.strip() == "true"
            except subprocess.CalledProcessError:
                return False
        if isinstance(spec, ProcessSpec):
            # For adopt=True services, check the health endpoint regardless of whether
            # we spawned the process (it may be a system daemon we didn't start).
            if spec.adopt:
                return self._probe_health(spec.host_port, spec.health_path)
            proc = self._procs.get(service)
            if proc is None or proc.poll() is not None:
                return False
            import socket
            try:
                with socket.create_connection(("127.0.0.1", spec.host_port), timeout=1):
                    return True
            except OSError:
                return False
        return False
    def _probe_health(self, port: int, health_path: str = "/health") -> bool:
        """Return True if the service at localhost:port responds 200 on health_path."""
        import urllib.request
        try:
            url = f"http://127.0.0.1:{port}{health_path}"
            with urllib.request.urlopen(url, timeout=2.0) as resp:
                return resp.status == 200
        except Exception:
            return False
    def start(self, service: str, gpu_id: int, params: dict[str, str]) -> str:
        spec = self._get_spec(service)
        if spec is None:
            raise ValueError(f"Service {service!r} not in profile or has no managed spec")
        if self.is_running(service):
            return f"http://{self.advertise_host}:{spec.host_port}"
        if isinstance(spec, DockerSpec):
            expanded_volumes = [_expand_volume(v) for v in spec.volumes]
            filler: dict[str, str] = defaultdict(str, params)
            expanded_command = spec.command_template.format_map(filler).split()
            cmd = [
                "docker", "run", "-d", "--rm",
                "--name", self.container_name(service),
                "--runtime", spec.runtime,
                "--gpus", f"device={gpu_id}",
                "--ipc", spec.ipc,
                "-p", f"{spec.host_port}:{spec.port}",
            ]
            for vol in expanded_volumes:
                cmd += ["-v", vol]
            for key, val in spec.env.items():
                cmd += ["-e", f"{key}={val}"]
            cmd.append(spec.image)
            cmd.extend(expanded_command)
            subprocess.run(cmd, check=True, capture_output=True, text=True)
            return f"http://{self.advertise_host}:{spec.host_port}"
        if isinstance(spec, ProcessSpec):
            # adopt=True: if the service is already healthy, claim it without spawning.
            if spec.adopt and self._probe_health(spec.host_port, spec.health_path):
                return f"http://{self.advertise_host}:{spec.host_port}"
            import subprocess as _sp
            filler = defaultdict(str, params)
            filler.setdefault("port", str(spec.port))
            filler.setdefault("gpu_id", str(gpu_id))
            args_expanded = spec.args_template.format_map(filler).split()
            cmd = [spec.exec_path] + args_expanded
            env = {**__import__("os").environ}
            proc = _sp.Popen(
                cmd,
                cwd=spec.cwd or None,
                env=env,
                stdout=_sp.DEVNULL,
                stderr=_sp.DEVNULL,
            )
            self._procs[service] = proc
            return f"http://{self.advertise_host}:{spec.host_port}"
        raise NotImplementedError(f"Unknown spec type: {type(spec)}")
    def stop(self, service: str) -> bool:
        spec = self._get_spec(service)
        if spec is None:
            return False
        if isinstance(spec, DockerSpec):
            try:
                subprocess.run(
                    ["docker", "stop", self.container_name(service)],
                    check=True,
                    capture_output=True,
                    text=True,
                )
                return True
            except subprocess.CalledProcessError:
                return False
        if isinstance(spec, ProcessSpec):
            proc = self._procs.pop(service, None)
            if proc is not None:
                proc.terminate()
                try:
                    proc.wait(timeout=10)
                except Exception:
                    proc.kill()
            return True
        return False
    def list_running(self) -> list[str]:
        return [svc for svc in self.profile.services if self.is_running(svc)]
    def get_url(self, service: str) -> str | None:
        spec = self._get_spec(service)
        if spec is None or not self.is_running(service):
            return None
        return f"http://{self.advertise_host}:{spec.host_port}"
--- a/circuitforge_core/resources/agent/service_probe.py
+++ b/circuitforge_core/resources/agent/service_probe.py
@ -1,123 +0,0 @@
 """
 Probe running services to detect which models are currently loaded in VRAM.
 Two probe strategies run together:
 1. Well-known ports — always checked, regardless of who started the service.
   Catches ollama, vLLM, etc. running outside cf-orch management.
 2. Managed services — services cf-orch started via ServiceManager.
   Checked on their configured host_port, deduplicates with well-known results.
 Each service exposes a different introspection API:
  - vllm:   GET /v1/models  → {"data": [{"id": "<model-name>"}]}
  - ollama: GET /api/ps     → {"models": [{"name": "<model>", "size_vram": <bytes>}]}
 ollama can have multiple models loaded simultaneously; each is reported as a
 separate entry so the dashboard shows per-model residency.
 The probe is best-effort: a timeout or connection refusal means model_name=None
 but the service is still reported as resident.
 """
 from __future__ import annotations
 import json
 import logging
 import urllib.request
 from typing import Any
 from circuitforge_core.resources.profiles.schema import DockerSpec
 logger = logging.getLogger(__name__)
 _PROBE_TIMEOUT_S = 2.0
 # Well-known service ports probed on every heartbeat.
 # key → (service_name, prober_key)
 _WELL_KNOWN_PORTS: dict[int, str] = {
    11434: "ollama",
    8000:  "vllm",
    8080:  "vllm",  # common alt vLLM port
 }
 def _fetch_json(url: str) -> dict[str, Any] | None:
    """GET a URL and parse JSON; returns None on any error."""
    try:
        with urllib.request.urlopen(url, timeout=_PROBE_TIMEOUT_S) as resp:
            return json.loads(resp.read())
    except Exception as exc:
        logger.debug("Probe %s: %s", url, exc)
        return None
 def _probe_vllm(port: int) -> list[str]:
    data = _fetch_json(f"http://127.0.0.1:{port}/v1/models")
    if data and data.get("data"):
        return [m["id"] for m in data["data"] if m.get("id")]
    return []
 def _probe_ollama(port: int) -> list[str]:
    # /api/ps lists models currently *loaded in memory*, not just downloaded.
    data = _fetch_json(f"http://127.0.0.1:{port}/api/ps")
    if data and data.get("models"):
        return [m["name"] for m in data["models"] if m.get("name")]
    return []
 _PROBERS: dict[str, Any] = {
    "vllm":   _probe_vllm,
    "ollama": _probe_ollama,
 }
 def probe_all(service_manager: Any) -> list[dict[str, Any]]:
    """
    Probe all services — both well-known ports and cf-orch managed services.
    Returns a list of dicts: [{"service": str, "model_name": str | None}].
    Multiple loaded models in one service (e.g. two ollama models) each get
    their own entry, disambiguated as "ollama/0", "ollama/1", etc.
    """
    results: list[dict[str, Any]] = []
    seen_ports: set[int] = set()
    # ── 1. Well-known ports ──────────────────────────────────────────
    for port, service in _WELL_KNOWN_PORTS.items():
        prober = _PROBERS.get(service)
        if prober is None:
            continue
        models = prober(port)
        if not models:
            continue  # nothing on this port right now
        seen_ports.add(port)
        if len(models) == 1:
            results.append({"service": service, "model_name": models[0]})
        else:
            for i, model in enumerate(models):
                results.append({"service": f"{service}/{i}", "model_name": model})
    # ── 2. Managed services (cf-orch started) ───────────────────────
    if service_manager is not None:
        for service in service_manager.list_running():
            spec = service_manager._get_spec(service)
            if not isinstance(spec, DockerSpec):
                continue
            if spec.host_port in seen_ports:
                continue  # already captured by well-known probe
            prober = _PROBERS.get(service)
            if prober is None:
                results.append({"service": service, "model_name": None})
                continue
            models = prober(spec.host_port)
            seen_ports.add(spec.host_port)
            if not models:
                results.append({"service": service, "model_name": None})
            elif len(models) == 1:
                results.append({"service": service, "model_name": models[0]})
            else:
                for i, model in enumerate(models):
                    results.append({"service": f"{service}/{i}", "model_name": model})
    return results
--- a/circuitforge_core/resources/cli.py
+++ b/circuitforge_core/resources/cli.py
@ -1,234 +0,0 @@
 from __future__ import annotations
 import logging
 import sys
 from pathlib import Path
 from typing import Annotated, Optional
 import typer
 import uvicorn
 logger = logging.getLogger(__name__)
 app = typer.Typer(name="cf-orch", help="CircuitForge GPU resource orchestrator")
 _SYSTEMD_UNIT_PATH = Path("/etc/systemd/system/cf-orch.service")
 _SYSTEMD_UNIT_TEMPLATE = """\
 [Unit]
 Description=CircuitForge GPU Resource Orchestrator
 After=network.target
 [Service]
 Type=simple
 ExecStart={python} -m circuitforge_core.resources.cli start
 Restart=on-failure
 RestartSec=5
 [Install]
 WantedBy=multi-user.target
 """
@app.command()
 def start(
    profile: Annotated[Optional[Path], typer.Option(help="Profile YAML path")] = None,
    host: str = "0.0.0.0",
    port: int = 7700,
    node_id: str = "local",
    agent_port: int = 7701,
 ) -> None:
    """Start the cf-orch coordinator (auto-detects GPU profile if not specified).
    Automatically pre-registers the local agent so its GPUs appear on the
    dashboard immediately. Remote nodes self-register via POST /api/nodes.
    """
    from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
    from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
    from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor
    from circuitforge_core.resources.coordinator.app import create_coordinator_app
    from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry
    from circuitforge_core.resources.agent.gpu_monitor import GpuMonitor
    from circuitforge_core.resources.coordinator.node_store import NodeStore
    lease_manager = LeaseManager()
    profile_registry = ProfileRegistry()
    service_registry = ServiceRegistry()
    node_store = NodeStore()
    supervisor = AgentSupervisor(
        lease_manager=lease_manager,
        service_registry=service_registry,
        profile_registry=profile_registry,
        node_store=node_store,
    )
    restored = supervisor.restore_from_store()
    if restored:
        typer.echo(f"Restored {restored} known node(s) from previous session")
    monitor = GpuMonitor()
    gpus = monitor.poll()
    if not gpus:
        typer.echo(
            "Warning: no GPUs detected via nvidia-smi — coordinator running with 0 VRAM"
        )
    else:
        typer.echo(f"Detected {len(gpus)} GPU(s)")
    if profile:
        active_profile = profile_registry.load(profile)
        typer.echo(f"Using profile: {active_profile.name} (from {profile})")
    else:
        active_profile = (
            profile_registry.auto_detect(gpus)
            if gpus
            else profile_registry.list_public()[-1]
        )
        typer.echo(f"Auto-selected profile: {active_profile.name}")
    # Pre-register the local agent — the heartbeat loop will poll it for live GPU data.
    local_agent_url = f"http://127.0.0.1:{agent_port}"
    supervisor.register(node_id, local_agent_url)
    typer.echo(f"Registered local node '{node_id}' → {local_agent_url}")
    coordinator_app = create_coordinator_app(
        lease_manager=lease_manager,
        profile_registry=profile_registry,
        agent_supervisor=supervisor,
        service_registry=service_registry,
    )
    typer.echo(f"Starting cf-orch coordinator on {host}:{port}")
    uvicorn.run(coordinator_app, host=host, port=port)
@app.command()
 def agent(
    coordinator: str = "http://localhost:7700",
    node_id: str = "local",
    host: str = "0.0.0.0",
    port: int = 7701,
    advertise_host: Optional[str] = None,
    profile: Annotated[Optional[Path], typer.Option(help="Profile YAML path")] = None,
 ) -> None:
    """Start a cf-orch node agent and self-register with the coordinator.
    The agent starts its HTTP server, then POSTs its URL to the coordinator
    so it appears on the dashboard without manual configuration.
    Use --advertise-host to override the IP the coordinator should use to
    reach this agent (e.g. on a multi-homed or NATted host).
    """
    import threading
    import httpx
    from circuitforge_core.resources.agent.app import create_agent_app
    from circuitforge_core.resources.agent.service_manager import ServiceManager
    from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
    # The URL the coordinator should use to reach this agent.
    reach_host = advertise_host or ("127.0.0.1" if host in ("0.0.0.0", "::") else host)
    agent_url = f"http://{reach_host}:{port}"
    _RECONNECT_INTERVAL_S = 30.0
    def _reconnect_loop() -> None:
        """
        Persistently re-register this agent with the coordinator.
        Runs as a daemon thread for the lifetime of the agent process:
        - Waits 2 s on first run (uvicorn needs time to bind)
        - Re-registers every 30 s thereafter
        - If the coordinator is down, silently retries — no crashing
        - When the coordinator restarts, the agent re-appears within one cycle
        This means coordinator restarts require no manual intervention on agent hosts.
        """
        import time
        first = True
        while True:
            time.sleep(2.0 if first else _RECONNECT_INTERVAL_S)
            first = False
            try:
                resp = httpx.post(
                    f"{coordinator}/api/nodes",
                    json={"node_id": node_id, "agent_url": agent_url},
                    timeout=5.0,
                )
                if resp.is_success:
                    logger.debug("Registered with coordinator at %s as '%s'", coordinator, node_id)
                else:
                    logger.warning(
                        "Coordinator registration returned %s", resp.status_code
                    )
            except Exception as exc:
                logger.debug("Coordinator at %s unreachable, will retry: %s", coordinator, exc)
    # Fire reconnect loop in a daemon thread so uvicorn.run() can start blocking immediately.
    threading.Thread(target=_reconnect_loop, daemon=True, name="cf-orch-reconnect").start()
    typer.echo(f"Reconnect loop started — will register with {coordinator} every {int(_RECONNECT_INTERVAL_S)}s")
    service_manager = None
    try:
        from circuitforge_core.resources.agent.gpu_monitor import GpuMonitor
        pr = ProfileRegistry()
        gpus = GpuMonitor().poll()
        p = pr.load(Path(profile)) if profile else pr.auto_detect(gpus)
        service_manager = ServiceManager(node_id=node_id, profile=p, advertise_host=reach_host)
        typer.echo(f"ServiceManager ready with profile: {p.name}")
    except Exception as exc:
        typer.echo(f"Warning: ServiceManager unavailable ({exc})", err=True)
    agent_app = create_agent_app(node_id=node_id, service_manager=service_manager)
    typer.echo(f"Starting cf-orch agent [{node_id}] on {host}:{port}")
    uvicorn.run(agent_app, host=host, port=port)
@app.command()
 def status(coordinator: str = "http://localhost:7700") -> None:
    """Show GPU and lease status from the coordinator."""
    import httpx
    try:
        resp = httpx.get(f"{coordinator}/api/nodes", timeout=5.0)
        resp.raise_for_status()
        nodes = resp.json().get("nodes", [])
        for node in nodes:
            typer.echo(f"\nNode: {node['node_id']}")
            for gpu in node.get("gpus", []):
                typer.echo(
                    f"  GPU {gpu['gpu_id']}: {gpu['name']} — "
                    f"{gpu['vram_used_mb']}/{gpu['vram_total_mb']} MB used"
                )
    except Exception as exc:
        typer.echo(f"Coordinator unreachable at {coordinator}: {exc}", err=True)
        raise typer.Exit(1)
@app.command("install-service")
 def install_service(
    dry_run: bool = typer.Option(
        False, "--dry-run", help="Print unit file without writing"
    ),
 ) -> None:
    """Write a systemd unit file for cf-orch (requires root)."""
    python = sys.executable
    unit_content = _SYSTEMD_UNIT_TEMPLATE.format(python=python)
    if dry_run:
        typer.echo(f"Would write to {_SYSTEMD_UNIT_PATH}:\n")
        typer.echo(unit_content)
        return
    try:
        _SYSTEMD_UNIT_PATH.write_text(unit_content)
        typer.echo(f"Written: {_SYSTEMD_UNIT_PATH}")
        typer.echo(
            "Run: sudo systemctl daemon-reload && sudo systemctl enable --now cf-orch"
        )
    except PermissionError:
        typer.echo(
            f"Permission denied writing to {_SYSTEMD_UNIT_PATH}. Run as root.", err=True
        )
        raise typer.Exit(1)
 if __name__ == "__main__":
    app()
--- a/circuitforge_core/resources/client.py
+++ b/circuitforge_core/resources/client.py
@ -1,143 +0,0 @@
 from __future__ import annotations
 import logging
 import os
 from contextlib import contextmanager, asynccontextmanager
 from dataclasses import dataclass
 import httpx
 logger = logging.getLogger(__name__)
@dataclass
 class Allocation:
    allocation_id: str
    service: str
    node_id: str
    gpu_id: int
    model: str | None
    url: str
    started: bool
    warm: bool
 class CFOrchClient:
    """
    Client for cf-orch coordinator allocation.
    Sync usage (in LLMRouter or other sync code):
        client = CFOrchClient(os.environ["CF_ORCH_URL"])
        with client.allocate("vllm", model_candidates=["Ouro-1.4B"]) as alloc:
            # alloc.url is the inference endpoint
    Async usage (in FastAPI apps):
        async with client.allocate_async("vllm", model_candidates=["Ouro-1.4B"]) as alloc:
            ...
    Authentication:
        Pass api_key explicitly, or set CF_LICENSE_KEY env var. When set, every
        request carries Authorization: Bearer <key>. Required for the hosted
        CircuitForge coordinator (orch.circuitforge.tech); optional for local
        self-hosted coordinators.
    Raises ValueError immediately if coordinator_url is empty.
    """
    def __init__(self, coordinator_url: str, api_key: str | None = None) -> None:
        if not coordinator_url:
            raise ValueError("coordinator_url is empty — cf-orch not configured")
        self._url = coordinator_url.rstrip("/")
        self._api_key = api_key or os.environ.get("CF_LICENSE_KEY", "")
    def _headers(self) -> dict[str, str]:
        if self._api_key:
            return {"Authorization": f"Bearer {self._api_key}"}
        return {}
    def _build_body(self, model_candidates: list[str] | None, ttl_s: float, caller: str) -> dict:
        return {
            "model_candidates": model_candidates or [],
            "ttl_s": ttl_s,
            "caller": caller,
        }
    def _parse_allocation(self, data: dict, service: str) -> Allocation:
        return Allocation(
            allocation_id=data["allocation_id"],
            service=service,
            node_id=data["node_id"],
            gpu_id=data["gpu_id"],
            model=data.get("model"),
            url=data["url"],
            started=data.get("started", False),
            warm=data.get("warm", False),
        )
    @contextmanager
    def allocate(
        self,
        service: str,
        *,
        model_candidates: list[str] | None = None,
        ttl_s: float = 3600.0,
        caller: str = "",
    ):
        """Sync context manager. Allocates on enter, releases on exit."""
        resp = httpx.post(
            f"{self._url}/api/services/{service}/allocate",
            json=self._build_body(model_candidates, ttl_s, caller),
            headers=self._headers(),
            timeout=120.0,
        )
        if not resp.is_success:
            raise RuntimeError(
                f"cf-orch allocation failed for {service!r}: "
                f"HTTP {resp.status_code} — {resp.text[:200]}"
            )
        alloc = self._parse_allocation(resp.json(), service)
        try:
            yield alloc
        finally:
            try:
                httpx.delete(
                    f"{self._url}/api/services/{service}/allocations/{alloc.allocation_id}",
                    headers=self._headers(),
                    timeout=10.0,
                )
            except Exception as exc:
                logger.debug("cf-orch release failed (non-fatal): %s", exc)
    @asynccontextmanager
    async def allocate_async(
        self,
        service: str,
        *,
        model_candidates: list[str] | None = None,
        ttl_s: float = 3600.0,
        caller: str = "",
    ):
        """Async context manager. Allocates on enter, releases on exit."""
        async with httpx.AsyncClient(timeout=120.0) as client:
            resp = await client.post(
                f"{self._url}/api/services/{service}/allocate",
                json=self._build_body(model_candidates, ttl_s, caller),
                headers=self._headers(),
            )
            if not resp.is_success:
                raise RuntimeError(
                    f"cf-orch allocation failed for {service!r}: "
                    f"HTTP {resp.status_code} — {resp.text[:200]}"
                )
            alloc = self._parse_allocation(resp.json(), service)
            try:
                yield alloc
            finally:
                try:
                    await client.delete(
                        f"{self._url}/api/services/{service}/allocations/{alloc.allocation_id}",
                        headers=self._headers(),
                        timeout=10.0,
                    )
                except Exception as exc:
                    logger.debug("cf-orch async release failed (non-fatal): %s", exc)
--- a/circuitforge_core/resources/compose.yml
+++ b/circuitforge_core/resources/compose.yml
@ -1,44 +0,0 @@
 # circuitforge_core/resources/compose.yml
 # One-command cf-orch deployment for Docker self-hosters:
 #   docker compose -f path/to/compose.yml up cf-orch-coordinator
 services:
  cf-orch-coordinator:
    image: python:3.12-slim
    command: >
      sh -c "pip install 'circuitforge-core[orch]' &&
             cf-orch start --host 0.0.0.0 --port 7700"
    ports:
      - "7700:7700"
    volumes:
      - /run/docker.sock:/var/run/docker.sock:ro
      - cf-orch-data:/data
    environment:
      - CFORCH_PROFILE=${CFORCH_PROFILE:-}
    restart: unless-stopped
    devices:
      - /dev/nvidia0:/dev/nvidia0
      - /dev/nvidiactl:/dev/nvidiactl
    runtime: nvidia
  cf-orch-agent:
    image: python:3.12-slim
    command: >
      sh -c "pip install 'circuitforge-core[orch]' &&
             cf-orch agent --coordinator http://cf-orch-coordinator:7700
                           --node-id ${CFORCH_NODE_ID:-local}
                           --host 0.0.0.0 --port 7701"
    ports:
      - "7701:7701"
    depends_on:
      - cf-orch-coordinator
    environment:
      - CFORCH_NODE_ID=${CFORCH_NODE_ID:-local}
    restart: unless-stopped
    devices:
      - /dev/nvidia0:/dev/nvidia0
      - /dev/nvidiactl:/dev/nvidiactl
    runtime: nvidia
 volumes:
  cf-orch-data:
--- a/circuitforge_core/resources/coordinator/init.py
+++ b/circuitforge_core/resources/coordinator/init.py
--- a/circuitforge_core/resources/coordinator/agent_supervisor.py
+++ b/circuitforge_core/resources/coordinator/agent_supervisor.py
@ -1,209 +0,0 @@
 from __future__ import annotations
 import asyncio
 import logging
 import time
 from dataclasses import dataclass, field
 import httpx
 from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
 from circuitforge_core.resources.coordinator.node_store import NodeStore
 from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
 from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry
 from circuitforge_core.resources.models import GpuInfo, NodeInfo, ResidentAllocation
 logger = logging.getLogger(__name__)
 _HEARTBEAT_INTERVAL_S = 10.0
 _AGENT_TIMEOUT_S = 5.0
@dataclass
 class AgentRecord:
    node_id: str
    agent_url: str
    last_seen: float = field(default_factory=time.time)
    gpus: list[GpuInfo] = field(default_factory=list)
    online: bool = False
 class AgentSupervisor:
    def __init__(
        self,
        lease_manager: LeaseManager,
        service_registry: ServiceRegistry | None = None,
        profile_registry: ProfileRegistry | None = None,
        node_store: NodeStore | None = None,
    ) -> None:
        self._agents: dict[str, AgentRecord] = {}
        self._lease_manager = lease_manager
        self._running = False
        self._service_registry = service_registry
        self._profile_registry = profile_registry
        self._node_store = node_store
        self._heartbeat_tick = 0
    def restore_from_store(self) -> int:
        """
        Load previously-known nodes from NodeStore into the in-memory registry.
        All restored nodes start as offline=False. The heartbeat loop will poll
        them on its first tick and promote any that respond to online=True.
        Returns the number of nodes restored.
        """
        if self._node_store is None:
            return 0
        restored = 0
        for node_id, agent_url in self._node_store.all():
            if node_id not in self._agents:
                self._agents[node_id] = AgentRecord(
                    node_id=node_id, agent_url=agent_url, online=False
                )
                restored += 1
        if restored:
            logger.info("NodeStore: restored %d known node(s) from previous session", restored)
        return restored
    def register(self, node_id: str, agent_url: str) -> None:
        if node_id not in self._agents:
            self._agents[node_id] = AgentRecord(node_id=node_id, agent_url=agent_url)
            logger.info("Registered agent node: %s @ %s", node_id, agent_url)
        else:
            if self._agents[node_id].agent_url != agent_url:
                self._agents[node_id].agent_url = agent_url
                logger.info("Updated agent URL for %s → %s", node_id, agent_url)
        if self._node_store is not None:
            self._node_store.upsert(node_id, agent_url)
    def get_node_info(self, node_id: str) -> NodeInfo | None:
        record = self._agents.get(node_id)
        if record is None:
            return None
        return NodeInfo(
            node_id=record.node_id,
            agent_url=record.agent_url,
            gpus=record.gpus,
            last_heartbeat=record.last_seen,
        )
    def all_nodes(self) -> list[NodeInfo]:
        return [
            NodeInfo(
                node_id=r.node_id,
                agent_url=r.agent_url,
                gpus=r.gpus,
                last_heartbeat=r.last_seen,
            )
            for r in self._agents.values()
        ]
    def online_agents(self) -> "dict[str, AgentRecord]":
        """Return only currently-online agents, keyed by node_id."""
        return {nid: rec for nid, rec in self._agents.items() if rec.online}
    async def poll_agent(self, node_id: str) -> bool:
        record = self._agents.get(node_id)
        if record is None:
            return False
        try:
            async with httpx.AsyncClient(timeout=_AGENT_TIMEOUT_S) as client:
                gpu_resp = await client.get(f"{record.agent_url}/gpu-info")
                gpu_resp.raise_for_status()
                # Resident-info is best-effort — older agents may not have the endpoint.
                try:
                    res_resp = await client.get(f"{record.agent_url}/resident-info")
                    resident_data = res_resp.json() if res_resp.is_success else {}
                except Exception:
                    resident_data = {}
            data = gpu_resp.json()
            gpus = [
                GpuInfo(
                    gpu_id=g["gpu_id"],
                    name=g["name"],
                    vram_total_mb=g["vram_total_mb"],
                    vram_used_mb=g["vram_used_mb"],
                    vram_free_mb=g["vram_free_mb"],
                )
                for g in data.get("gpus", [])
            ]
            record.gpus = gpus
            record.last_seen = time.time()
            record.online = True
            for gpu in gpus:
                self._lease_manager.register_gpu(node_id, gpu.gpu_id, gpu.vram_total_mb)
            residents = [
                (r["service"], r.get("model_name"))
                for r in resident_data.get("residents", [])
            ]
            self._lease_manager.set_residents_for_node(node_id, residents)
            return True
        except Exception as exc:
            logger.warning("Agent %s unreachable: %s", node_id, exc)
            record.online = False
            return False
    async def poll_all(self) -> None:
        await asyncio.gather(*[self.poll_agent(nid) for nid in self._agents])
    def _build_idle_stop_config(self) -> dict[str, int]:
        if self._profile_registry is None:
            return {}
        config: dict[str, int] = {}
        for profile in self._profile_registry.list_public():
            for svc_name, svc in profile.services.items():
                if svc.idle_stop_after_s > 0:
                    existing = config.get(svc_name, 0)
                    config[svc_name] = min(existing, svc.idle_stop_after_s) if existing > 0 else svc.idle_stop_after_s
        return config
    async def _http_post(self, url: str) -> bool:
        try:
            async with httpx.AsyncClient(timeout=10.0) as client:
                resp = await client.post(url)
                return resp.is_success
        except Exception as exc:
            logger.warning("HTTP POST %s failed: %s", url, exc)
            return False
    async def _run_idle_sweep(self) -> None:
        if self._service_registry is None:
            return
        expired = self._service_registry.sweep_expired_allocations()
        if expired:
            logger.info("TTL sweep: expired %d allocation(s): %s", len(expired), expired)
        idle_stop_config = self._build_idle_stop_config()
        if not idle_stop_config:
            return
        timed_out = self._service_registry.idle_past_timeout(idle_stop_config)
        for instance in timed_out:
            node_info = self.get_node_info(instance.node_id)
            if node_info is None:
                continue
            stop_url = f"{node_info.agent_url}/services/{instance.service}/stop"
            logger.info(
                "Idle sweep: stopping %s on %s gpu%s (idle timeout)",
                instance.service, instance.node_id, instance.gpu_id,
            )
            success = await self._http_post(stop_url)
            if success:
                self._service_registry.mark_stopped(
                    instance.service, instance.node_id, instance.gpu_id
                )
    async def run_heartbeat_loop(self) -> None:
        self._running = True
        while self._running:
            await self.poll_all()
            self._heartbeat_tick += 1
            if self._heartbeat_tick % 3 == 0:
                await self._run_idle_sweep()
            await asyncio.sleep(_HEARTBEAT_INTERVAL_S)
    def stop(self) -> None:
        self._running = False
--- a/circuitforge_core/resources/coordinator/app.py
+++ b/circuitforge_core/resources/coordinator/app.py
@ -1,509 +0,0 @@
 from __future__ import annotations
 import logging
 import time
 import urllib.request
 from contextlib import asynccontextmanager
 from pathlib import Path
 from typing import Any
 logger = logging.getLogger(__name__)
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
 from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor
 from circuitforge_core.resources.coordinator.eviction_engine import EvictionEngine
 from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
 from circuitforge_core.resources.coordinator.node_selector import select_node
 from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
 from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry
 from circuitforge_core.resources.profiles.schema import ProcessSpec
 _DASHBOARD_HTML = (Path(__file__).parent / "dashboard.html").read_text()
 def _get_health_path(profile_registry: ProfileRegistry, service: str) -> str:
    """Return the health_path for a service from the first matching profile spec."""
    for profile in profile_registry.list_public():
        svc = profile.services.get(service)
        if svc and isinstance(svc.managed, ProcessSpec):
            return svc.managed.health_path
    return "/health"
 _PROBE_INTERVAL_S = 5.0    # how often to poll starting instances
 _PROBE_TIMEOUT_S = 300.0   # give up and mark stopped after this many seconds
 async def _run_instance_probe_loop(service_registry: ServiceRegistry) -> None:
    """
    Background loop: transition 'starting' instances to 'running' once their
    /health endpoint responds, or to 'stopped' after PROBE_TIMEOUT_S.
    """
    import asyncio
    start_times: dict[str, float] = {}  # instance key → time first seen as starting
    while True:
        await asyncio.sleep(_PROBE_INTERVAL_S)
        now = time.time()
        for inst in service_registry.all_instances():
            if inst.state != "starting":
                start_times.pop(f"{inst.service}:{inst.node_id}:{inst.gpu_id}", None)
                continue
            key = f"{inst.service}:{inst.node_id}:{inst.gpu_id}"
            start_times.setdefault(key, now)
            healthy = False
            if inst.url:
                try:
                    with urllib.request.urlopen(
                        inst.url.rstrip("/") + inst.health_path, timeout=2.0
                    ) as resp:
                        healthy = resp.status == 200
                except Exception:
                    pass
            if healthy:
                service_registry.upsert_instance(
                    service=inst.service, node_id=inst.node_id, gpu_id=inst.gpu_id,
                    state="running", model=inst.model, url=inst.url,
                )
                start_times.pop(key, None)
                logger.info("Instance %s/%s gpu=%s transitioned to running", inst.service, inst.node_id, inst.gpu_id)
            elif now - start_times[key] > _PROBE_TIMEOUT_S:
                service_registry.upsert_instance(
                    service=inst.service, node_id=inst.node_id, gpu_id=inst.gpu_id,
                    state="stopped", model=inst.model, url=inst.url,
                )
                start_times.pop(key, None)
                logger.warning("Instance %s/%s gpu=%s timed out in starting state — marked stopped", inst.service, inst.node_id, inst.gpu_id)
 class LeaseRequest(BaseModel):
    node_id: str
    gpu_id: int
    mb: int
    service: str
    priority: int = 2
    ttl_s: float = 0.0
 class NodeRegisterRequest(BaseModel):
    node_id: str
    agent_url: str  # e.g. "http://10.1.10.71:7701"
 class ServiceEnsureRequest(BaseModel):
    node_id: str
    gpu_id: int = 0
    params: dict[str, str] = {}
    ttl_s: float = 3600.0
    # Ordered list of model names to try; falls back down the list if VRAM is tight.
    # The "model" key in params is used if this list is empty.
    model_candidates: list[str] = []
 class ServiceAllocateRequest(BaseModel):
    model_candidates: list[str] = []
    gpu_id: int | None = None
    params: dict[str, str] = {}
    ttl_s: float = 3600.0
    caller: str = ""
 def create_coordinator_app(
    lease_manager: LeaseManager,
    profile_registry: ProfileRegistry,
    agent_supervisor: AgentSupervisor,
    service_registry: ServiceRegistry,
 ) -> FastAPI:
    eviction_engine = EvictionEngine(lease_manager=lease_manager)
    @asynccontextmanager
    async def _lifespan(app: FastAPI):  # type: ignore[type-arg]
        import asyncio
        heartbeat_task = asyncio.create_task(agent_supervisor.run_heartbeat_loop())
        probe_task = asyncio.create_task(_run_instance_probe_loop(service_registry))
        yield
        agent_supervisor.stop()
        heartbeat_task.cancel()
        probe_task.cancel()
    app = FastAPI(title="cf-orch-coordinator", lifespan=_lifespan)
    # Optional Heimdall auth — enabled when HEIMDALL_URL env var is set.
    # Self-hosted coordinators skip this entirely; the CF-hosted public endpoint
    # (orch.circuitforge.tech) sets HEIMDALL_URL to gate paid+ access.
    from circuitforge_core.resources.coordinator.auth import HeimdallAuthMiddleware
    _auth = HeimdallAuthMiddleware.from_env()
    if _auth is not None:
        app.middleware("http")(_auth)
    @app.get("/", response_class=HTMLResponse, include_in_schema=False)
    def dashboard() -> HTMLResponse:
        return HTMLResponse(content=_DASHBOARD_HTML)
    @app.get("/api/health")
    def health() -> dict[str, Any]:
        return {"status": "ok"}
    @app.get("/api/nodes")
    def get_nodes() -> dict[str, Any]:
        nodes = agent_supervisor.all_nodes()
        return {
            "nodes": [
                {
                    "node_id": n.node_id,
                    "agent_url": n.agent_url,
                    "last_heartbeat": n.last_heartbeat,
                    "gpus": [
                        {
                            "gpu_id": g.gpu_id,
                            "name": g.name,
                            "vram_total_mb": g.vram_total_mb,
                            "vram_used_mb": g.vram_used_mb,
                            "vram_free_mb": g.vram_free_mb,
                        }
                        for g in n.gpus
                    ],
                }
                for n in nodes
            ]
        }
    @app.post("/api/nodes")
    async def register_node(req: NodeRegisterRequest) -> dict[str, Any]:
        """Agents call this to self-register. Coordinator immediately polls for GPU info."""
        agent_supervisor.register(req.node_id, req.agent_url)
        await agent_supervisor.poll_agent(req.node_id)
        return {"registered": True, "node_id": req.node_id}
    @app.get("/api/profiles")
    def get_profiles() -> dict[str, Any]:
        return {
            "profiles": [
                {"name": p.name, "vram_total_mb": p.vram_total_mb}
                for p in profile_registry.list_public()
            ]
        }
    @app.get("/api/resident")
    def get_residents() -> dict[str, Any]:
        return {
            "residents": [
                {
                    "service": r.service,
                    "node_id": r.node_id,
                    "model_name": r.model_name,
                    "first_seen": r.first_seen,
                }
                for r in lease_manager.all_residents()
            ]
        }
    @app.get("/api/leases")
    def get_leases() -> dict[str, Any]:
        return {
            "leases": [
                {
                    "lease_id": lease.lease_id,
                    "node_id": lease.node_id,
                    "gpu_id": lease.gpu_id,
                    "mb_granted": lease.mb_granted,
                    "holder_service": lease.holder_service,
                    "priority": lease.priority,
                    "expires_at": lease.expires_at,
                }
                for lease in lease_manager.all_leases()
            ]
        }
    @app.post("/api/leases")
    async def request_lease(req: LeaseRequest) -> dict[str, Any]:
        node_info = agent_supervisor.get_node_info(req.node_id)
        if node_info is None:
            raise HTTPException(
                status_code=422,
                detail=f"Unknown node_id {req.node_id!r} — node not registered",
            )
        agent_url = node_info.agent_url
        lease = await eviction_engine.request_lease(
            node_id=req.node_id,
            gpu_id=req.gpu_id,
            mb=req.mb,
            service=req.service,
            priority=req.priority,
            agent_url=agent_url,
            ttl_s=req.ttl_s,
        )
        if lease is None:
            raise HTTPException(
                status_code=503,
                detail="Insufficient VRAM — no eviction candidates available",
            )
        return {
            "lease": {
                "lease_id": lease.lease_id,
                "node_id": lease.node_id,
                "gpu_id": lease.gpu_id,
                "mb_granted": lease.mb_granted,
                "holder_service": lease.holder_service,
                "priority": lease.priority,
                "expires_at": lease.expires_at,
            }
        }
    @app.delete("/api/leases/{lease_id}")
    async def release_lease(lease_id: str) -> dict[str, Any]:
        released = await lease_manager.release(lease_id)
        if not released:
            raise HTTPException(status_code=404, detail=f"Lease {lease_id!r} not found")
        return {"released": True, "lease_id": lease_id}
    @app.post("/api/services/{service}/ensure")
    async def ensure_service(service: str, req: ServiceEnsureRequest) -> dict[str, Any]:
        """
        Ensure a managed service is running on the given node.
        If model_candidates is provided, tries each model in order, skipping any
        that exceed the live free VRAM on the target GPU. Falls back down the list
        until one succeeds. The selected model is returned in the response.
        """
        import httpx
        node_info = agent_supervisor.get_node_info(req.node_id)
        if node_info is None:
            raise HTTPException(422, detail=f"Unknown node_id {req.node_id!r}")
        # Resolve candidate list — fall back to params["model"] if not specified.
        candidates: list[str] = req.model_candidates or (
            [req.params["model"]] if "model" in req.params else []
        )
        if not candidates:
            raise HTTPException(422, detail="No model specified: set params.model or model_candidates")
        # Live free VRAM on the target GPU (used for pre-flight filtering).
        gpu = next((g for g in node_info.gpus if g.gpu_id == req.gpu_id), None)
        free_mb = gpu.vram_free_mb if gpu else 0
        # Profile max_mb for the service gives us the VRAM ceiling for this slot.
        # Models larger than free_mb are skipped before we even try to start them.
        # We use model file size as a rough proxy — skip if free_mb < half of max_mb,
        # since a fully-loaded model typically needs ~50-80% of its param size in VRAM.
        service_max_mb = 0
        for p in profile_registry.list_public():
            svc = p.services.get(service)
            if svc:
                service_max_mb = svc.max_mb
                break
        # Filter candidates by VRAM headroom — require free VRAM >= service ceiling
        # so the model can actually load without competing for VRAM with other processes.
        if service_max_mb > 0 and free_mb < service_max_mb:
            raise HTTPException(
                503,
                detail=f"Insufficient VRAM on gpu {req.gpu_id}: {free_mb}MB free, need {service_max_mb}MB",
            )
        last_error: str = ""
        async with httpx.AsyncClient(timeout=120.0) as client:
            for model in candidates:
                params_with_model = {**req.params, "model": model}
                try:
                    start_resp = await client.post(
                        f"{node_info.agent_url}/services/{service}/start",
                        json={"gpu_id": req.gpu_id, "params": params_with_model},
                    )
                    if start_resp.is_success:
                        data = start_resp.json()
                        return {
                            "service": service,
                            "node_id": req.node_id,
                            "gpu_id": req.gpu_id,
                            "model": model,
                            "url": data.get("url"),
                            "running": data.get("running", False),
                        }
                    last_error = start_resp.text
                except httpx.HTTPError as exc:
                    raise HTTPException(502, detail=f"Agent unreachable: {exc}")
        raise HTTPException(
            503,
            detail=f"All model candidates exhausted for {service!r}. Last error: {last_error}",
        )
    @app.post("/api/services/{service}/allocate")
    async def allocate_service(service: str, req: ServiceAllocateRequest) -> dict[str, Any]:
        """
        Allocate a managed service — coordinator picks the best node automatically.
        Returns a URL + allocation_id. (Allocation not tracked server-side until Phase 2.)
        """
        import httpx
        if not req.model_candidates:
            raise HTTPException(422, detail="model_candidates must be non-empty")
        # Validate service is known in at least one profile, regardless of gpu_id
        if not any(service in p.services for p in profile_registry.list_public()):
            raise HTTPException(422, detail=f"Unknown service {service!r} — not in any profile")
        residents = lease_manager.resident_keys()
        if req.gpu_id is None:
            online = agent_supervisor.online_agents()
            placement = select_node(online, service, profile_registry, residents)
            if placement is None:
                raise HTTPException(
                    503,
                    detail=f"No online node has capacity for service {service!r}",
                )
            node_id, gpu_id = placement
        else:
            online = agent_supervisor.online_agents()
            node_id = next(
                (nid for nid, rec in online.items()
                 if any(g.gpu_id == req.gpu_id for g in rec.gpus)),
                None,
            )
            if node_id is None:
                raise HTTPException(422, detail=f"No online node has gpu_id={req.gpu_id}")
            gpu_id = req.gpu_id
        node_info = agent_supervisor.get_node_info(node_id)
        if node_info is None:
            raise HTTPException(422, detail=f"Node {node_id!r} not found")
        warm = f"{node_id}:{service}" in residents
        async with httpx.AsyncClient(timeout=120.0) as client:
            last_error = ""
            for model in req.model_candidates:
                try:
                    resp = await client.post(
                        f"{node_info.agent_url}/services/{service}/start",
                        json={"gpu_id": gpu_id, "params": {**req.params, "model": model}},
                    )
                    if resp.is_success:
                        data = resp.json()
                        svc_url = data.get("url", "")
                        alloc = service_registry.allocate(
                            service=service,
                            node_id=node_id,
                            gpu_id=gpu_id,
                            model=model,
                            caller=req.caller,
                            url=svc_url,
                            ttl_s=req.ttl_s,
                        )
                        # Seed the instance state for first-time starts.
                        # adopted=True means the agent found it already running.
                        adopted = data.get("adopted", False)
                        instance_state = "running" if (warm or adopted) else "starting"
                        health_path = _get_health_path(profile_registry, service)
                        service_registry.upsert_instance(
                            service=service,
                            node_id=node_id,
                            gpu_id=gpu_id,
                            state=instance_state,
                            model=model,
                            url=svc_url,
                            health_path=health_path,
                        )
                        return {
                            "allocation_id": alloc.allocation_id,
                            "service": service,
                            "node_id": node_id,
                            "gpu_id": gpu_id,
                            "model": model,
                            "url": data.get("url"),
                            "started": not warm,
                            "warm": warm,
                        }
                    last_error = resp.text
                except httpx.HTTPError as exc:
                    raise HTTPException(502, detail=f"Agent unreachable: {exc}")
        raise HTTPException(
            503,
            detail=f"All model candidates exhausted for {service!r}. Last error: {last_error}",
        )
    @app.delete("/api/services/{service}/allocations/{allocation_id}")
    async def release_allocation(service: str, allocation_id: str) -> dict[str, Any]:
        existing = service_registry.get_allocation(allocation_id)
        if existing is None or existing.service != service:
            raise HTTPException(404, detail=f"Allocation {allocation_id!r} not found for service {service!r}")
        released = service_registry.release(allocation_id)
        if not released:
            raise HTTPException(404, detail=f"Allocation {allocation_id!r} not found")
        return {"released": True, "allocation_id": allocation_id}
    @app.get("/api/services/{service}/status")
    def get_service_status(service: str) -> dict[str, Any]:
        instances = [i for i in service_registry.all_instances() if i.service == service]
        allocations = [a for a in service_registry.all_allocations() if a.service == service]
        return {
            "service": service,
            "instances": [
                {
                    "node_id": i.node_id,
                    "gpu_id": i.gpu_id,
                    "state": i.state,
                    "model": i.model,
                    "url": i.url,
                    "idle_since": i.idle_since,
                }
                for i in instances
            ],
            "allocations": [
                {
                    "allocation_id": a.allocation_id,
                    "node_id": a.node_id,
                    "gpu_id": a.gpu_id,
                    "model": a.model,
                    "caller": a.caller,
                    "url": a.url,
                    "expires_at": a.expires_at,
                }
                for a in allocations
            ],
        }
    @app.get("/api/services")
    def list_services() -> dict[str, Any]:
        instances = service_registry.all_instances()
        return {
            "services": [
                {
                    "service": i.service,
                    "node_id": i.node_id,
                    "gpu_id": i.gpu_id,
                    "state": i.state,
                    "model": i.model,
                    "url": i.url,
                }
                for i in instances
            ]
        }
    @app.delete("/api/services/{service}")
    async def stop_service(service: str, node_id: str) -> dict[str, Any]:
        """Stop a managed service on the given node."""
        node_info = agent_supervisor.get_node_info(node_id)
        if node_info is None:
            raise HTTPException(422, detail=f"Unknown node_id {node_id!r}")
        import httpx
        async with httpx.AsyncClient(timeout=30.0) as client:
            try:
                resp = await client.post(f"{node_info.agent_url}/services/{service}/stop")
                resp.raise_for_status()
                return {"service": service, "node_id": node_id, "stopped": resp.json().get("stopped", False)}
            except httpx.HTTPError as exc:
                raise HTTPException(502, detail=f"Agent unreachable: {exc}")
    return app
--- a/circuitforge_core/resources/coordinator/auth.py
+++ b/circuitforge_core/resources/coordinator/auth.py
@ -1,197 +0,0 @@
 """
 cf-orch coordinator auth middleware.
 When HEIMDALL_URL is set, all /api/* requests (except /api/health) must carry:
    Authorization: Bearer <CF license key>
 The key is validated against Heimdall and the result cached for
 CACHE_TTL_S seconds (default 300 / 5 min). This keeps Heimdall out of the
 per-allocation hot path while keeping revocation latency bounded.
 When HEIMDALL_URL is not set, auth is disabled — self-hosted deployments work
 with no configuration change.
 Environment variables
 ---------------------
 HEIMDALL_URL          Heimdall base URL, e.g. https://license.circuitforge.tech
                      When absent, auth is skipped entirely.
 HEIMDALL_MIN_TIER     Minimum tier required (default: "paid").
                      Accepted values: free, paid, premium, ultra.
 CF_ORCH_AUTH_SECRET   Shared secret sent to Heimdall so it can distinguish
                      coordinator service calls from end-user requests.
                      Must match the COORDINATOR_SECRET env var on Heimdall.
 """
 from __future__ import annotations
 import logging
 import os
 import time
 from dataclasses import dataclass, field
 from threading import Lock
 import httpx
 from fastapi import Request
 from fastapi.responses import JSONResponse
 logger = logging.getLogger(__name__)
 # Unauthenticated paths — health check must always be accessible for monitoring.
 _EXEMPT_PATHS: frozenset[str] = frozenset({"/api/health", "/", "/openapi.json", "/docs", "/redoc"})
 _TIER_ORDER: dict[str, int] = {"free": 0, "paid": 1, "premium": 2, "ultra": 3}
 CACHE_TTL_S: float = 300.0  # 5 minutes — matches Kiwi cloud session TTL
@dataclass
 class _CacheEntry:
    valid: bool
    tier: str
    user_id: str
    expires_at: float
 class _ValidationCache:
    """Thread-safe TTL cache for Heimdall validation results."""
    def __init__(self, ttl_s: float = CACHE_TTL_S) -> None:
        self._ttl = ttl_s
        self._store: dict[str, _CacheEntry] = {}
        self._lock = Lock()
    def get(self, key: str) -> _CacheEntry | None:
        with self._lock:
            entry = self._store.get(key)
            if entry is None or time.monotonic() > entry.expires_at:
                return None
            return entry
    def set(self, key: str, valid: bool, tier: str, user_id: str) -> None:
        with self._lock:
            self._store[key] = _CacheEntry(
                valid=valid,
                tier=tier,
                user_id=user_id,
                expires_at=time.monotonic() + self._ttl,
            )
    def evict(self, key: str) -> None:
        with self._lock:
            self._store.pop(key, None)
    def prune(self) -> int:
        """Remove expired entries. Returns count removed."""
        now = time.monotonic()
        with self._lock:
            expired = [k for k, e in self._store.items() if now > e.expires_at]
            for k in expired:
                del self._store[k]
        return len(expired)
 class HeimdallAuthMiddleware:
    """
    ASGI middleware that validates CF license keys against Heimdall.
    Attach to a FastAPI app via app.middleware("http"):
        middleware = HeimdallAuthMiddleware.from_env()
        if middleware:
            app.middleware("http")(middleware)
    """
    def __init__(
        self,
        heimdall_url: str,
        min_tier: str = "paid",
        auth_secret: str = "",
        cache_ttl_s: float = CACHE_TTL_S,
    ) -> None:
        self._heimdall = heimdall_url.rstrip("/")
        self._min_tier_rank = _TIER_ORDER.get(min_tier, 1)
        self._min_tier = min_tier
        self._auth_secret = auth_secret
        self._cache = _ValidationCache(ttl_s=cache_ttl_s)
        logger.info(
            "[cf-orch auth] Heimdall auth enabled — url=%s min_tier=%s ttl=%ss",
            self._heimdall, min_tier, cache_ttl_s,
        )
    @classmethod
    def from_env(cls) -> "HeimdallAuthMiddleware | None":
        """Return a configured middleware instance, or None if HEIMDALL_URL is not set."""
        url = os.environ.get("HEIMDALL_URL", "")
        if not url:
            logger.info("[cf-orch auth] HEIMDALL_URL not set — auth disabled (self-hosted mode)")
            return None
        return cls(
            heimdall_url=url,
            min_tier=os.environ.get("HEIMDALL_MIN_TIER", "paid"),
            auth_secret=os.environ.get("CF_ORCH_AUTH_SECRET", ""),
        )
    def _validate_against_heimdall(self, license_key: str) -> tuple[bool, str, str]:
        """
        Call Heimdall's /licenses/verify endpoint.
        Returns (valid, tier, user_id).
        On any network or parse error, returns (False, "", "") — fail closed.
        """
        try:
            headers: dict[str, str] = {"Content-Type": "application/json"}
            if self._auth_secret:
                headers["X-Coordinator-Secret"] = self._auth_secret
            resp = httpx.post(
                f"{self._heimdall}/licenses/verify",
                json={"key": license_key, "min_tier": self._min_tier},
                headers=headers,
                timeout=5.0,
            )
            if resp.status_code == 200:
                data = resp.json()
                return data.get("valid", False), data.get("tier", ""), data.get("user_id", "")
            # 401/403 from Heimdall = key invalid/insufficient tier
            logger.debug("[cf-orch auth] Heimdall returned %s for key ...%s", resp.status_code, license_key[-6:])
            return False, "", ""
        except Exception as exc:
            logger.warning("[cf-orch auth] Heimdall unreachable — failing closed: %s", exc)
            return False, "", ""
    def _check_key(self, license_key: str) -> tuple[bool, str]:
        """
        Validate key (cache-first). Returns (authorized, reason_if_denied).
        """
        cached = self._cache.get(license_key)
        if cached is not None:
            if not cached.valid:
                return False, "license key invalid or expired"
            if _TIER_ORDER.get(cached.tier, -1) < self._min_tier_rank:
                return False, f"feature requires {self._min_tier} tier (have: {cached.tier})"
            return True, ""
        valid, tier, user_id = self._validate_against_heimdall(license_key)
        self._cache.set(license_key, valid=valid, tier=tier, user_id=user_id)
        if not valid:
            return False, "license key invalid or expired"
        if _TIER_ORDER.get(tier, -1) < self._min_tier_rank:
            return False, f"feature requires {self._min_tier} tier (have: {tier})"
        return True, ""
    async def __call__(self, request: Request, call_next):  # type: ignore[no-untyped-def]
        if request.url.path in _EXEMPT_PATHS:
            return await call_next(request)
        auth_header = request.headers.get("Authorization", "")
        if not auth_header.startswith("Bearer "):
            return JSONResponse(
                status_code=401,
                content={"detail": "Authorization: Bearer <license_key> required"},
            )
        license_key = auth_header.removeprefix("Bearer ").strip()
        authorized, reason = self._check_key(license_key)
        if not authorized:
            return JSONResponse(status_code=403, content={"detail": reason})
        return await call_next(request)
--- a/circuitforge_core/resources/coordinator/dashboard.html
+++ b/circuitforge_core/resources/coordinator/dashboard.html
@ -1,473 +0,0 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
 <meta charset="UTF-8">
 <meta name="viewport" content="width=device-width, initial-scale=1.0">
 <title>cf-orch · dashboard</title>
 <style>
  *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
  :root {
    --bg:         #0d1117;
    --bg2:        #161b22;
    --bg3:        #1c2129;
    --border:     #30363d;
    --border-dim: #21262d;
    --text:       #e6edf3;
    --muted:      #8b949e;
    --dim:        #4d5763;
    --indigo:     #818cf8;
    --cyan:       #22d3ee;
    --green:      #4ade80;
    --amber:      #fbbf24;
    --red:        #f85149;
    --orange:     #fb923c;
    --radius:     6px;
    --radius-sm:  3px;
    --font:       'JetBrains Mono', 'Fira Code', ui-monospace, monospace;
  }
  body { background: var(--bg); color: var(--text); font-family: var(--font); font-size: 13px; line-height: 1.5; padding: 1rem; }
  /* header */
  header { display: flex; align-items: center; gap: 1rem; margin-bottom: 1rem; padding-bottom: 0.75rem; border-bottom: 1px solid var(--border); }
  .logo  { color: var(--indigo); font-size: 1.1em; font-weight: 700; }
  #refresh-badge { margin-left: auto; font-size: 0.75em; color: var(--dim); }
  #refresh-badge span { color: var(--green); }
  /* section labels */
  .section-label { font-size: 0.72em; font-weight: 600; text-transform: uppercase; letter-spacing: 0.07em; color: var(--dim); margin-bottom: 0.5rem; }
  /* health strip */
  #health-strip { display: flex; flex-wrap: wrap; gap: 0.4rem; margin-bottom: 1rem; padding: 0.6rem 0.75rem; background: var(--bg2); border: 1px solid var(--border); border-radius: var(--radius); min-height: 36px; }
  .pill { display: inline-flex; align-items: center; gap: 0.3rem; padding: 2px 10px; border-radius: 99px; font-size: 0.8em; font-weight: 600; }
  .pill.ok  { background: rgba(74,222,128,.12); color: var(--green); }
  .pill.err { background: rgba(248,81,73,.12);  color: var(--red); }
  .pill.off { background: rgba(139,148,158,.1); color: var(--dim); }
  /* GPU grid */
  #gpu-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(180px, 1fr)); gap: 0.6rem; margin-bottom: 1rem; }
  .gpu-card { background: var(--bg3); border: 1px solid var(--border); border-radius: var(--radius); padding: 0.7rem 0.8rem; }
  .gpu-card.offline { border-color: #7c2d12; opacity: 0.7; }
  .gpu-node  { font-size: 0.75em; font-weight: 700; color: var(--indigo); margin-bottom: 1px; }
  .gpu-offline .gpu-node { color: var(--orange); }
  .gpu-name  { font-size: 0.78em; color: var(--text); margin-bottom: 0.4rem; }
  .vram-track    { position: relative; background: var(--bg); border-radius: var(--radius-sm); height: 6px; margin-bottom: 0.3rem; overflow: hidden; }
  .vram-leased   { position: absolute; left: 0; top: 0; height: 100%; background: var(--cyan);   transition: width 0.4s; }
  .vram-resident { position: absolute; top: 0; height: 100%; background: var(--amber); transition: left 0.4s, width 0.4s; }
  .vram-label { font-size: 0.72em; color: var(--muted); margin-bottom: 0.25rem; }
  .gpu-status { font-size: 0.72em; }
  .gpu-status.idle { color: var(--green); }
  .gpu-status.busy { color: var(--amber); }
  .gpu-status.full { color: var(--red); }
  .gpu-status.offline { color: var(--orange); }
  .spark-track { height: 24px; background: var(--bg); border-radius: var(--radius-sm); margin-top: 0.4rem; overflow: hidden; }
  /* shared table base */
  .cf-table { width: 100%; border-collapse: collapse; background: var(--bg2); border: 1px solid var(--border); border-radius: var(--radius); overflow: hidden; margin-bottom: 1rem; }
  .cf-table th { background: var(--bg3); color: var(--dim); font-size: 0.72em; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; padding: 0.4rem 0.6rem; text-align: left; border-bottom: 1px solid var(--border); }
  .cf-table td { padding: 0.35rem 0.6rem; border-bottom: 1px solid var(--border-dim); font-size: 0.8em; vertical-align: middle; }
  .cf-table tr:last-child td { border-bottom: none; }
  .td-service  { color: var(--indigo); font-weight: 600; }
  .td-node     { color: var(--muted); }
  .td-mb       { color: var(--text); }
  .td-priority { color: var(--amber); }
  .td-model    { color: var(--cyan); font-size: 0.75em; }
  .td-warm     { color: var(--amber); }
  .td-none     { color: var(--dim); font-style: italic; }
  .ttl-wrap    { display: flex; align-items: center; gap: 0.5rem; }
  .ttl-label   { color: var(--cyan); font-variant-numeric: tabular-nums; white-space: nowrap; }
  .ttl-track   { flex: 1; background: var(--bg); border-radius: var(--radius-sm); height: 4px; }
  .ttl-fill    { height: 100%; border-radius: var(--radius-sm); background: var(--cyan); transition: width 0.4s; }
  /* service state classes */
  .state-running  { color: #2ecc40; }
  .state-idle     { color: #ff851b; }
  .state-stopped  { color: #aaa; }
  .state-starting { color: #0074d9; }
  .state-unknown  { color: #ff4136; }
  /* error */
  #error-banner { display: none; background: rgba(248,81,73,.1); border: 1px solid var(--red); border-radius: var(--radius); color: var(--red); padding: 0.5rem 0.75rem; font-size: 0.82em; margin-bottom: 1rem; }
  /* footer */
  footer { border-top: 1px solid var(--border); padding-top: 0.5rem; color: var(--dim); font-size: 0.72em; display: flex; gap: 1.5rem; }
  footer a { color: var(--indigo); text-decoration: none; }
  footer a:hover { text-decoration: underline; }
 </style>
 </head>
 <body>
 <header>
  <span class="logo">cf-orch</span>
  <span id="cluster-label" style="color:var(--muted)">coordinator</span>
  <div id="refresh-badge">auto-refresh <span id="countdown">5</span>s</div>
 </header>
 <div id="error-banner"></div>
 <div class="section-label">Services</div>
 <div id="health-strip"></div>
 <div class="section-label">GPU Nodes</div>
 <div id="gpu-grid"></div>
 <div id="services-section">
  <div class="section-label">Service Instances</div>
  <table class="cf-table" id="services-table">
    <thead>
      <tr>
        <th>Service</th><th>Node</th><th>GPU</th><th>State</th><th>Model</th><th>URL</th>
      </tr>
    </thead>
    <tbody id="services-body"></tbody>
  </table>
 </div>
 <div class="section-label">Active Leases</div>
 <table class="cf-table" id="leases-table">
  <thead>
    <tr>
      <th>Service</th><th>Node / GPU</th><th>VRAM</th><th>Priority</th><th>TTL / Expires</th>
    </tr>
  </thead>
  <tbody id="leases-body"></tbody>
 </table>
 <div class="section-label">Warm Models</div>
 <table class="cf-table" id="resident-table">
  <thead>
    <tr>
      <th>Service</th><th>Node</th><th>Model</th><th>Warm Since</th>
    </tr>
  </thead>
  <tbody id="resident-body"></tbody>
 </table>
 <footer>
  <span>cf-orch · circuitforge-core</span>
  <a href="/api/nodes" target="_blank">/api/nodes</a>
  <a href="/api/leases" target="_blank">/api/leases</a>
  <a href="/api/resident" target="_blank">/api/resident</a>
  <a href="/api/services" target="_blank">/api/services</a>
  <a href="/api/health" target="_blank">/api/health</a>
 </footer>
 <script>
 "use strict";
 // ── helpers ──────────────────────────────────────────────────────
 /** Create an element with optional className and textContent. */
 function el(tag, opts) {
  const e = document.createElement(tag);
  if (opts && opts.cls)  { opts.cls.split(' ').forEach(c => c && e.classList.add(c)); }
  if (opts && opts.text != null) e.textContent = opts.text;
  if (opts && opts.style) Object.assign(e.style, opts.style);
  if (opts && opts.attr)  Object.entries(opts.attr).forEach(([k,v]) => e.setAttribute(k, v));
  return e;
 }
 /** Append children to a parent element. Returns parent. */
 function append(parent, ...children) {
  children.forEach(c => c && parent.appendChild(c));
  return parent;
 }
 /** Replace all children of a DOM node. */
 function setChildren(parent, ...children) {
  while (parent.firstChild) parent.removeChild(parent.firstChild);
  append(parent, ...children);
 }
 /** Build a sparkline SVG element (no innerHTML). */
 function buildSparkline(history, totalMb) {
  const ns = 'http://www.w3.org/2000/svg';
  const svg = document.createElementNS(ns, 'svg');
  svg.setAttribute('width', '100%');
  svg.setAttribute('height', '16');
  svg.setAttribute('viewBox', '0 0 100 16');
  if (!history || history.length < 2) {
    const line = document.createElementNS(ns, 'line');
    line.setAttribute('x1', '0'); line.setAttribute('y1', '14');
    line.setAttribute('x2', '100'); line.setAttribute('y2', '14');
    line.setAttribute('stroke', '#30363d'); line.setAttribute('stroke-width', '1');
    svg.appendChild(line);
    return svg;
  }
  const max = Math.max(totalMb, 1);
  const pts = history.map((v, i) => {
    const x = (i / (history.length - 1)) * 100;
    const y = 14 - ((v / max) * 12);
    return x.toFixed(1) + ',' + y.toFixed(1);
  }).join(' ');
  const poly = document.createElementNS(ns, 'polyline');
  poly.setAttribute('points', pts);
  poly.setAttribute('fill', 'none');
  poly.setAttribute('stroke', '#818cf8');
  poly.setAttribute('stroke-width', '1.5');
  poly.setAttribute('stroke-linejoin', 'round');
  svg.appendChild(poly);
  return svg;
 }
 /** VRAM fill colour based on utilisation fraction. */
 function vramColor(pct) {
  if (pct >= 0.9) return '#f85149';
  if (pct >= 0.7) return '#fbbf24';
  return '#22d3ee';
 }
 // ── sparkline history ────────────────────────────────────────────
 // keyed "nodeId:gpuId" → array of vram_used_mb, max 20 samples
 const sparkHistory = {};
 // ── countdown ────────────────────────────────────────────────────
 let countdown = 5;
 setInterval(() => {
  countdown = countdown <= 1 ? 5 : countdown - 1;
  document.getElementById('countdown').textContent = countdown;
 }, 1000);
 // ── state class helper ───────────────────────────────────────────
 function stateClass(state) {
  const map = { running: 'state-running', idle: 'state-idle', stopped: 'state-stopped', starting: 'state-starting' };
  return map[state] || 'state-unknown';
 }
 // ── render: services table ───────────────────────────────────────
 function renderServices(services) {
  const tbody = document.getElementById('services-body');
  if (!services || services.length === 0) {
    const tr = document.createElement('tr');
    const td = el('td', { cls: 'td-none', text: 'No service instances registered.' });
    td.setAttribute('colspan', '6');
    tr.appendChild(td);
    setChildren(tbody, tr);
    return;
  }
  const rows = services.map(svc => {
    const tr = document.createElement('tr');
    const fields = [
      { text: svc.service,              cls: 'td-service' },
      { text: svc.node_id,              cls: 'td-node'    },
      { text: String(svc.gpu_id),       cls: 'td-mb'      },
      { text: svc.state,                cls: stateClass(svc.state) },
      { text: svc.model || '\u2014',    cls: 'td-model'   },
      { text: svc.url   || '\u2014',    cls: 'td-node'    },
    ];
    fields.forEach(f => tr.appendChild(el('td', { cls: f.cls, text: f.text })));
    return tr;
  });
  setChildren(tbody, ...rows);
 }
 // ── render: health strip ─────────────────────────────────────────
 function renderHealth(ok) {
  const strip = document.getElementById('health-strip');
  const pill = el('span', { cls: 'pill ' + (ok ? 'ok' : 'err'), text: (ok ? '● ' : '✕ ') + 'coordinator' });
  setChildren(strip, pill);
 }
 // ── render: GPU grid ─────────────────────────────────────────────
 // leasedByGpu: "nodeId:gpuId" → total MB currently leased (from active leases)
 function renderNodes(nodes, leasedByGpu) {
  const grid = document.getElementById('gpu-grid');
  if (!nodes || nodes.length === 0) {
    setChildren(grid, el('div', { text: 'No nodes registered.', style: { color: 'var(--dim)', fontSize: '0.8em', padding: '0.5rem' } }));
    return;
  }
  const cards = [];
  for (const node of nodes) {
    for (const gpu of node.gpus) {
      const key      = node.node_id + ':' + gpu.gpu_id;
      const total    = gpu.vram_total_mb || 1;
      const used     = gpu.vram_used_mb;
      const leased   = leasedByGpu[key] || 0;
      // Resident = nvidia-smi used minus actively leased; clamped to [0, used].
      const resident = Math.max(0, Math.min(used - leased, used));
      const pct      = used / total;
      if (!sparkHistory[key]) sparkHistory[key] = [];
      sparkHistory[key].push(used);
      if (sparkHistory[key].length > 20) sparkHistory[key].shift();
      const statusCls  = pct >= 0.9 ? 'full' : pct >= 0.1 ? 'busy' : 'idle';
      const statusText = pct >= 0.9 ? 'saturated' : pct >= 0.1 ? Math.round(pct * 100) + '% used' : 'idle';
      const card      = el('div', { cls: 'gpu-card' });
      const nodeLabel = el('div', { cls: 'gpu-node', text: node.node_id.toUpperCase() + ' · GPU ' + gpu.gpu_id });
      const nameLine  = el('div', { cls: 'gpu-name', text: gpu.name || 'Unknown GPU' });
      // Stacked bar: cyan (leased) → amber (resident) → dark bg (free).
      const leasedPct   = (leased   / total * 100).toFixed(1);
      const residentPct = (resident / total * 100).toFixed(1);
      const track        = el('div', { cls: 'vram-track' });
      const fillLeased   = el('div', { cls: 'vram-leased',   style: { width: leasedPct + '%' } });
      const fillResident = el('div', { cls: 'vram-resident', style: { left: leasedPct + '%', width: residentPct + '%' } });
      append(track, fillLeased, fillResident);
      // Breakdown label when something is allocated.
      let labelText = (used / 1024).toFixed(1) + ' / ' + (total / 1024).toFixed(1) + ' GB';
      if (leased > 0 || resident > 0) {
        const parts = [];
        if (leased   > 0) parts.push((leased   / 1024).toFixed(1) + 'G leased');
        if (resident > 0) parts.push((resident / 1024).toFixed(1) + 'G resident');
        labelText += '  (' + parts.join(' · ') + ')';
      }
      const vramLbl    = el('div', { cls: 'vram-label', text: labelText });
      const statusEl   = el('div', { cls: 'gpu-status ' + statusCls, text: statusText });
      const sparkTrack = el('div', { cls: 'spark-track' });
      sparkTrack.appendChild(buildSparkline(sparkHistory[key], total));
      append(card, nodeLabel, nameLine, track, vramLbl, statusEl, sparkTrack);
      cards.push(card);
    }
  }
  setChildren(grid, ...cards);
 }
 // ── render: warm models table ────────────────────────────────────
 function renderResidents(residents) {
  const tbody = document.getElementById('resident-body');
  if (!residents || residents.length === 0) {
    const tr = document.createElement('tr');
    const td = el('td', { cls: 'td-none', text: 'No warm models detected.' });
    td.setAttribute('colspan', '4');
    tr.appendChild(td);
    setChildren(tbody, tr);
    return;
  }
  const now = Date.now() / 1000;
  const rows = residents.map(r => {
    const warmSecs = now - (r.first_seen || now);
    const warmText = warmSecs < 60
      ? Math.floor(warmSecs) + 's'
      : warmSecs < 3600
        ? Math.floor(warmSecs / 60) + 'm ' + String(Math.floor(warmSecs % 60)).padStart(2, '0') + 's'
        : Math.floor(warmSecs / 3600) + 'h ' + String(Math.floor((warmSecs % 3600) / 60)).padStart(2, '0') + 'm';
    const tr = document.createElement('tr');
    append(tr,
      el('td', { cls: 'td-service', text: r.service }),
      el('td', { cls: 'td-node',    text: r.node_id }),
      el('td', { cls: 'td-model',   text: r.model_name || '—' }),
      el('td', { cls: 'td-warm',    text: warmText }),
    );
    return tr;
  });
  setChildren(tbody, ...rows);
 }
 // ── render: leases table ─────────────────────────────────────────
 function renderLeases(leases) {
  const tbody = document.getElementById('leases-body');
  if (!leases || leases.length === 0) {
    const tr = document.createElement('tr');
    const td = el('td', { cls: 'td-none', text: 'No active leases.' });
    td.setAttribute('colspan', '5');
    tr.appendChild(td);
    setChildren(tbody, tr);
    return;
  }
  const now = Date.now() / 1000;
  const rows = leases.map(lease => {
    const mbGb = lease.mb_granted >= 1024
      ? (lease.mb_granted / 1024).toFixed(1) + ' GB'
      : lease.mb_granted + ' MB';
    const tr = document.createElement('tr');
    const tdService  = el('td', { cls: 'td-service',  text: lease.holder_service });
    const tdNode     = el('td', { cls: 'td-node',     text: lease.node_id + ' / GPU ' + lease.gpu_id });
    const tdMb       = el('td', { cls: 'td-mb',       text: mbGb });
    const tdPriority = el('td', { cls: 'td-priority', text: 'p' + lease.priority });
    const tdTtl = document.createElement('td');
    if (!lease.expires_at) {
      tdTtl.appendChild(el('span', { cls: 'ttl-label', text: '∞' }));
    } else {
      const remaining = Math.max(0, lease.expires_at - now);
      const pct   = Math.min(100, (remaining / 300) * 100);
      const mins  = Math.floor(remaining / 60);
      const secs  = Math.floor(remaining % 60);
      const label = remaining > 60
        ? mins + 'm ' + String(secs).padStart(2, '0') + 's'
        : Math.floor(remaining) + 's';
      const wrap  = el('div', { cls: 'ttl-wrap' });
      const lbl   = el('span', { cls: 'ttl-label', text: label });
      const track = el('div', { cls: 'ttl-track' });
      const fill  = el('div', { cls: 'ttl-fill', style: { width: pct.toFixed(1) + '%' } });
      track.appendChild(fill);
      append(wrap, lbl, track);
      tdTtl.appendChild(wrap);
    }
    append(tr, tdService, tdNode, tdMb, tdPriority, tdTtl);
    return tr;
  });
  setChildren(tbody, ...rows);
 }
 // ── error banner ─────────────────────────────────────────────────
 function showError(msg) {
  const el = document.getElementById('error-banner');
  el.textContent = msg;   // textContent — safe
  el.style.display = 'block';
 }
 function clearError() { document.getElementById('error-banner').style.display = 'none'; }
 // ── poll ─────────────────────────────────────────────────────────
 async function poll() {
  try {
    const [nodesRes, leasesRes, residentRes, healthRes, servicesRes] = await Promise.all([
      fetch('/api/nodes'),
      fetch('/api/leases'),
      fetch('/api/resident'),
      fetch('/api/health'),
      fetch('/api/services'),
    ]);
    if (!nodesRes.ok || !leasesRes.ok) throw new Error('API error: ' + nodesRes.status);
    const [nodesData, leasesData, residentData, servicesData] = await Promise.all([
      nodesRes.json(), leasesRes.json(),
      residentRes.ok  ? residentRes.json()  : Promise.resolve({ residents: [] }),
      servicesRes.ok  ? servicesRes.json()  : Promise.resolve({ services: [] }),
    ]);
    // Build per-GPU leased-MB index for the stacked bar.
    const leasedByGpu = {};
    for (const lease of (leasesData.leases || [])) {
      const key = lease.node_id + ':' + lease.gpu_id;
      leasedByGpu[key] = (leasedByGpu[key] || 0) + lease.mb_granted;
    }
    clearError();
    renderHealth(healthRes.ok);
    renderNodes(nodesData.nodes || [], leasedByGpu);
    renderServices(servicesData.services || []);
    renderLeases(leasesData.leases || []);
    renderResidents(residentData.residents || []);
  } catch (err) {
    showError('Failed to reach coordinator: ' + err.message);
    renderHealth(false);
  }
 }
 poll();
 setInterval(poll, 5000);
 </script>
 </body>
 </html>
--- a/circuitforge_core/resources/coordinator/eviction_engine.py
+++ b/circuitforge_core/resources/coordinator/eviction_engine.py
@ -1,81 +0,0 @@
 from __future__ import annotations
 import asyncio
 import logging
 from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
 from circuitforge_core.resources.models import VRAMLease
 logger = logging.getLogger(__name__)
 _DEFAULT_EVICTION_TIMEOUT_S = 10.0
 class EvictionEngine:
    def __init__(
        self,
        lease_manager: LeaseManager,
        eviction_timeout_s: float = _DEFAULT_EVICTION_TIMEOUT_S,
    ) -> None:
        self.lease_manager = lease_manager
        self._timeout = eviction_timeout_s
    async def request_lease(
        self,
        node_id: str,
        gpu_id: int,
        mb: int,
        service: str,
        priority: int,
        agent_url: str,
        ttl_s: float = 0.0,
    ) -> VRAMLease | None:
        # Fast path: enough free VRAM
        lease = await self.lease_manager.try_grant(
            node_id, gpu_id, mb, service, priority, ttl_s
        )
        if lease is not None:
            return lease
        # Find eviction candidates
        candidates = self.lease_manager.get_eviction_candidates(
            node_id=node_id, gpu_id=gpu_id,
            needed_mb=mb, requester_priority=priority,
        )
        if not candidates:
            logger.info(
                "No eviction candidates for %s on %s:GPU%d (%dMB needed)",
                service, node_id, gpu_id, mb,
            )
            return None
        # Evict candidates
        freed_mb = sum(c.mb_granted for c in candidates)
        logger.info(
            "Evicting %d lease(s) to free %dMB for %s",
            len(candidates), freed_mb, service,
        )
        for candidate in candidates:
            await self._evict_lease(candidate, agent_url)
        # Wait for evictions to free up VRAM (poll with timeout)
        loop = asyncio.get_running_loop()
        deadline = loop.time() + self._timeout
        while loop.time() < deadline:
            lease = await self.lease_manager.try_grant(
                node_id, gpu_id, mb, service, priority, ttl_s
            )
            if lease is not None:
                return lease
            await asyncio.sleep(0.1)
        logger.warning("Eviction timed out for %s after %.1fs", service, self._timeout)
        return None
    async def _evict_lease(self, lease: VRAMLease, agent_url: str) -> None:
        """Release lease accounting. Process-level eviction deferred to Plan B."""
        await self.lease_manager.release(lease.lease_id)
    async def _call_agent_evict(self, agent_url: str, lease: VRAMLease) -> bool:
        """POST /evict to the agent. Stub for v1 — real process lookup in Plan B."""
        return True
--- a/circuitforge_core/resources/coordinator/lease_manager.py
+++ b/circuitforge_core/resources/coordinator/lease_manager.py
@ -1,130 +0,0 @@
 from __future__ import annotations
 import asyncio
 from collections import defaultdict
 from circuitforge_core.resources.models import ResidentAllocation, VRAMLease
 class LeaseManager:
    def __init__(self) -> None:
        self._leases: dict[str, VRAMLease] = {}
        self._gpu_total: dict[tuple[str, int], int] = {}
        self._gpu_used: dict[tuple[str, int], int] = defaultdict(int)
        self._lock = asyncio.Lock()
        # Resident allocations — keyed "node_id:service", updated by heartbeat.
        # No lock needed: only the single heartbeat task writes this dict.
        self._residents: dict[str, ResidentAllocation] = {}
    def register_gpu(self, node_id: str, gpu_id: int, total_mb: int) -> None:
        self._gpu_total[(node_id, gpu_id)] = total_mb
    def gpu_total_mb(self, node_id: str, gpu_id: int) -> int:
        return self._gpu_total.get((node_id, gpu_id), 0)
    def used_mb(self, node_id: str, gpu_id: int) -> int:
        return self._gpu_used[(node_id, gpu_id)]
    async def try_grant(
        self,
        node_id: str,
        gpu_id: int,
        mb: int,
        service: str,
        priority: int,
        ttl_s: float = 0.0,
    ) -> VRAMLease | None:
        async with self._lock:
            total = self._gpu_total.get((node_id, gpu_id), 0)
            used = self._gpu_used[(node_id, gpu_id)]
            if total - used < mb:
                return None
            lease = VRAMLease.create(
                gpu_id=gpu_id, node_id=node_id, mb=mb,
                service=service, priority=priority, ttl_s=ttl_s,
            )
            self._leases[lease.lease_id] = lease
            self._gpu_used[(node_id, gpu_id)] += mb
            return lease
    async def release(self, lease_id: str) -> bool:
        async with self._lock:
            lease = self._leases.pop(lease_id, None)
            if lease is None:
                return False
            self._gpu_used[(lease.node_id, lease.gpu_id)] -= lease.mb_granted
            return True
    def get_eviction_candidates(
        self,
        node_id: str,
        gpu_id: int,
        needed_mb: int,
        requester_priority: int,
    ) -> list[VRAMLease]:
        candidates = [
            lease for lease in self._leases.values()
            if lease.node_id == node_id
            and lease.gpu_id == gpu_id
            and lease.priority > requester_priority
        ]
        candidates.sort(key=lambda lease: lease.priority, reverse=True)
        selected: list[VRAMLease] = []
        freed = 0
        for candidate in candidates:
            selected.append(candidate)
            freed += candidate.mb_granted
            if freed >= needed_mb:
                break
        return selected
    def list_leases(
        self, node_id: str | None = None, gpu_id: int | None = None
    ) -> list[VRAMLease]:
        return [
            lease for lease in self._leases.values()
            if (node_id is None or lease.node_id == node_id)
            and (gpu_id is None or lease.gpu_id == gpu_id)
        ]
    def all_leases(self) -> list[VRAMLease]:
        return list(self._leases.values())
    # ── resident tracking ────────────────────────────────────────────
    def set_residents_for_node(
        self,
        node_id: str,
        residents: list[tuple[str, str | None]],  # (service, model_name)
    ) -> None:
        """
        Replace the resident snapshot for a node.
        Preserves first_seen for entries whose service+model_name are unchanged,
        so the dashboard can show how long a model has been warm.
        """
        new_keys = {f"{node_id}:{service}" for service, _ in residents}
        # Remove stale entries (service no longer running on this node).
        for key in list(self._residents):
            if key.startswith(f"{node_id}:") and key not in new_keys:
                del self._residents[key]
        # Upsert: preserve first_seen when model is unchanged, reset otherwise.
        for service, model_name in residents:
            key = f"{node_id}:{service}"
            existing = self._residents.get(key)
            if existing is not None and existing.model_name == model_name:
                continue  # same model still loaded — keep original first_seen
            self._residents[key] = ResidentAllocation(
                service=service,
                node_id=node_id,
                model_name=model_name,
            )
    def all_residents(self) -> list[ResidentAllocation]:
        return list(self._residents.values())
    def resident_keys(self) -> set[str]:
        """Return set of 'node_id:service' strings for currently-warm services."""
        return set(self._residents.keys())
--- a/circuitforge_core/resources/coordinator/node_selector.py
+++ b/circuitforge_core/resources/coordinator/node_selector.py
@ -1,74 +0,0 @@
 from __future__ import annotations
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    from circuitforge_core.resources.coordinator.agent_supervisor import AgentRecord
    from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
 _WARM_BONUS_MB = 1000
@dataclass(frozen=True)
 class _Scored:
    node_id: str
    gpu_id: int
    vram_free_mb: int
    effective_free_mb: int
    can_fit: bool
    warm: bool
 def select_node(
    agents: "dict[str, AgentRecord]",
    service: str,
    profile_registry: "ProfileRegistry",
    resident_keys: set[str],
 ) -> tuple[str, int] | None:
    """
    Pick the best (node_id, gpu_id) for the requested service.
    Warm nodes (service already running) get priority, then sorted by free VRAM.
    Returns None if no suitable node exists.
    """
    service_max_mb = _find_service_max_mb(service, profile_registry)
    if service_max_mb is None:
        return None  # service not in any profile
    candidates: list[_Scored] = []
    for node_id, record in agents.items():
        if not record.online:
            continue
        for gpu in record.gpus:
            warm = f"{node_id}:{service}" in resident_keys
            effective = gpu.vram_free_mb + (_WARM_BONUS_MB if warm else 0)
            can_fit = gpu.vram_free_mb >= service_max_mb
            candidates.append(_Scored(
                node_id=node_id,
                gpu_id=gpu.gpu_id,
                vram_free_mb=gpu.vram_free_mb,
                effective_free_mb=effective,
                can_fit=can_fit,
                warm=warm,
            ))
    if not candidates:
        return None
    # Prefer: (1) warm nodes (model already resident — no cold start)
    #         (2) cold nodes that can fit the service (free >= half of max_mb)
    # Fallback: best-effort node when nothing fits and nothing is warm
    #   (coordinator will attempt to start the service anyway; it may evict or fail)
    # Note: resident_keys are per-node, not per-GPU. On multi-GPU nodes, the warm
    #   bonus applies to all GPUs on the node. This is a known coarseness —
    #   per-GPU resident tracking requires a resident_key format change.
    preferred = [c for c in candidates if c.warm or c.can_fit]
    pool = preferred if preferred else candidates
    best = max(pool, key=lambda c: (c.warm, c.effective_free_mb))
    return best.node_id, best.gpu_id
 def _find_service_max_mb(service: str, profile_registry: "ProfileRegistry") -> int | None:
    for profile in profile_registry.list_public():
        svc = profile.services.get(service)
        if svc is not None:
            return svc.max_mb
    return None
--- a/circuitforge_core/resources/coordinator/node_store.py
+++ b/circuitforge_core/resources/coordinator/node_store.py
@ -1,85 +0,0 @@
 """
 circuitforge_core.resources.coordinator.node_store — SQLite persistence for known agent nodes.
 Gives the coordinator restart-safe memory of which nodes have ever registered.
 On startup the coordinator reloads all known nodes and immediately probes them;
 nodes that respond come back online within one heartbeat cycle (~10 s) without
 any manual intervention on the agent hosts.
 """
 from __future__ import annotations
 import logging
 import sqlite3
 import time
 from pathlib import Path
 logger = logging.getLogger(__name__)
 _DEFAULT_DB_PATH = Path.home() / ".local" / "share" / "circuitforge" / "cf-orch-nodes.db"
 _STALE_AGE_DAYS = 30  # nodes unseen for this long are pruned automatically
 class NodeStore:
    """
    Thin SQLite wrapper for persisting known agent nodes across coordinator restarts.
    Thread-safe for single-writer use (coordinator runs in one asyncio thread).
    """
    def __init__(self, db_path: Path = _DEFAULT_DB_PATH) -> None:
        self.db_path = db_path
        db_path.parent.mkdir(parents=True, exist_ok=True)
        self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
        self._conn.row_factory = sqlite3.Row
        self._migrate()
        logger.debug("NodeStore initialised at %s", db_path)
    def _migrate(self) -> None:
        self._conn.executescript("""
            CREATE TABLE IF NOT EXISTS known_nodes (
                node_id    TEXT PRIMARY KEY,
                agent_url  TEXT NOT NULL,
                last_seen  REAL NOT NULL
            );
        """)
        self._conn.commit()
    def upsert(self, node_id: str, agent_url: str) -> None:
        """Record or update a node. Called on every successful registration."""
        self._conn.execute(
            """
            INSERT INTO known_nodes (node_id, agent_url, last_seen)
            VALUES (?, ?, ?)
            ON CONFLICT(node_id) DO UPDATE SET
                agent_url = excluded.agent_url,
                last_seen = excluded.last_seen
            """,
            (node_id, agent_url, time.time()),
        )
        self._conn.commit()
    def all(self) -> list[tuple[str, str]]:
        """Return all known (node_id, agent_url) pairs."""
        rows = self._conn.execute(
            "SELECT node_id, agent_url FROM known_nodes ORDER BY last_seen DESC"
        ).fetchall()
        return [(r["node_id"], r["agent_url"]) for r in rows]
    def remove(self, node_id: str) -> None:
        self._conn.execute("DELETE FROM known_nodes WHERE node_id = ?", (node_id,))
        self._conn.commit()
    def prune_stale(self, max_age_days: int = _STALE_AGE_DAYS) -> int:
        """Delete nodes not seen within max_age_days. Returns count removed."""
        cutoff = time.time() - max_age_days * 86400
        cur = self._conn.execute(
            "DELETE FROM known_nodes WHERE last_seen < ?", (cutoff,)
        )
        self._conn.commit()
        removed = cur.rowcount
        if removed:
            logger.info("NodeStore: pruned %d stale node(s) (>%d days old)", removed, max_age_days)
        return removed
    def close(self) -> None:
        self._conn.close()
--- a/circuitforge_core/resources/coordinator/profile_registry.py
+++ b/circuitforge_core/resources/coordinator/profile_registry.py
@ -1,65 +0,0 @@
 # circuitforge_core/resources/coordinator/profile_registry.py
 from __future__ import annotations
 import logging
 from pathlib import Path
 from circuitforge_core.resources.models import GpuInfo
 from circuitforge_core.resources.profiles.schema import GpuProfile, load_profile
 _PUBLIC_DIR = Path(__file__).parent.parent / "profiles" / "public"
 # VRAM thresholds for public profile selection (MB)
 _PROFILE_THRESHOLDS = [
    (22000, "single-gpu-24gb"),
    (14000, "single-gpu-16gb"),
    (8000, "single-gpu-8gb"),
    (5500, "single-gpu-6gb"),
    (3500, "single-gpu-4gb"),
    (0, "single-gpu-2gb"),
 ]
 _log = logging.getLogger(__name__)
 class ProfileRegistry:
    def __init__(self, extra_dirs: list[Path] | None = None) -> None:
        self._profiles: dict[str, GpuProfile] = {}
        self._load_dir(_PUBLIC_DIR)
        for d in (extra_dirs or []):
            if d.exists():
                self._load_dir(d)
    def _load_dir(self, directory: Path) -> None:
        for yaml_file in directory.glob("*.yaml"):
            try:
                profile = load_profile(yaml_file)
                self._profiles[profile.name] = profile
            except Exception as exc:
                _log.warning("Skipping %s: %s", yaml_file, exc)
    def load(self, path: Path) -> GpuProfile:
        profile = load_profile(path)
        self._profiles[profile.name] = profile
        return profile
    def list_public(self) -> list[GpuProfile]:
        # CPU profiles (cpu-*) are intentionally excluded — this endpoint
        # is used to match GPU hardware. CPU inference nodes self-select
        # their profile via the CLI and are not listed for lease matching.
        return [
            p for p in self._profiles.values()
            if p.name.startswith("single-gpu-")
        ]
    def get(self, name: str) -> GpuProfile | None:
        return self._profiles.get(name)
    def auto_detect(self, gpus: list[GpuInfo]) -> GpuProfile:
        primary_vram = gpus[0].vram_total_mb if gpus else 0
        for threshold_mb, profile_name in _PROFILE_THRESHOLDS:
            if primary_vram >= threshold_mb:
                profile = self._profiles.get(profile_name)
                if profile:
                    return profile
        return self._profiles["single-gpu-2gb"]
--- a/circuitforge_core/resources/coordinator/service_registry.py
+++ b/circuitforge_core/resources/coordinator/service_registry.py
@ -1,173 +0,0 @@
 from __future__ import annotations
 import dataclasses
 import time
 import uuid
 from dataclasses import dataclass
 from typing import Literal
@dataclass
 class ServiceAllocation:
    allocation_id: str
    service: str
    node_id: str
    gpu_id: int
    model: str | None
    caller: str
    url: str
    created_at: float
    expires_at: float  # 0 = no expiry
@dataclass
 class ServiceInstance:
    service: str
    node_id: str
    gpu_id: int
    state: Literal["starting", "running", "idle", "stopped"]
    model: str | None
    url: str | None
    idle_since: float | None = None
    health_path: str = "/health"
 class ServiceRegistry:
    """
    In-memory registry of service allocations and instance state.
    Allocations: per-caller request — many per service instance.
    Instances: per (service, node_id, gpu_id) — one per running container.
    """
    def __init__(self) -> None:
        self._allocations: dict[str, ServiceAllocation] = {}
        self._instances: dict[str, ServiceInstance] = {}  # key: "service:node_id:gpu_id"
    # ── allocation API ────────────────────────────────────────────────
    def allocate(
        self,
        service: str,
        node_id: str,
        gpu_id: int,
        model: str | None,
        url: str,
        caller: str,
        ttl_s: float,
    ) -> ServiceAllocation:
        alloc = ServiceAllocation(
            allocation_id=str(uuid.uuid4()),
            service=service,
            node_id=node_id,
            gpu_id=gpu_id,
            model=model,
            caller=caller,
            url=url,
            created_at=time.time(),
            expires_at=time.time() + ttl_s if ttl_s > 0 else 0.0,
        )
        self._allocations[alloc.allocation_id] = alloc
        # If an instance exists in idle/stopped state, mark it running again
        key = f"{service}:{node_id}:{gpu_id}"
        if key in self._instances:
            inst = self._instances[key]
            if inst.state in ("idle", "stopped"):
                self._instances[key] = dataclasses.replace(
                    inst, state="running", idle_since=None
                )
        return alloc
    def release(self, allocation_id: str) -> bool:
        alloc = self._allocations.pop(allocation_id, None)
        if alloc is None:
            return False
        # If no active allocations remain for this instance, mark it idle
        key = f"{alloc.service}:{alloc.node_id}:{alloc.gpu_id}"
        if self.active_allocations(alloc.service, alloc.node_id, alloc.gpu_id) == 0:
            if key in self._instances:
                self._instances[key] = dataclasses.replace(
                    self._instances[key], state="idle", idle_since=time.time()
                )
        return True
    def active_allocations(self, service: str, node_id: str, gpu_id: int) -> int:
        return sum(
            1 for a in self._allocations.values()
            if a.service == service and a.node_id == node_id and a.gpu_id == gpu_id
        )
    # ── instance API ─────────────────────────────────────────────────
    def upsert_instance(
        self,
        service: str,
        node_id: str,
        gpu_id: int,
        state: Literal["starting", "running", "idle", "stopped"],
        model: str | None,
        url: str | None,
        health_path: str = "/health",
    ) -> ServiceInstance:
        key = f"{service}:{node_id}:{gpu_id}"
        existing = self._instances.get(key)
        idle_since: float | None = None
        if state == "idle":
            # Preserve idle_since if already idle; set now if transitioning into idle
            idle_since = existing.idle_since if (existing and existing.state == "idle") else time.time()
        inst = ServiceInstance(
            service=service, node_id=node_id, gpu_id=gpu_id,
            state=state, model=model, url=url, idle_since=idle_since,
            health_path=health_path,
        )
        self._instances[key] = inst
        return inst
    def get_allocation(self, allocation_id: str) -> ServiceAllocation | None:
        return self._allocations.get(allocation_id)
    def sweep_expired_allocations(self) -> list[str]:
        """
        Remove all allocations whose TTL has elapsed and transition the
        corresponding instance to 'idle' if no active allocations remain.
        Returns the list of expired allocation_ids.
        """
        now = time.time()
        expired = [
            alloc_id
            for alloc_id, alloc in self._allocations.items()
            if alloc.expires_at > 0 and now > alloc.expires_at
        ]
        for alloc_id in expired:
            self.release(alloc_id)
        return expired
    def all_allocations(self) -> list[ServiceAllocation]:
        return list(self._allocations.values())
    def all_instances(self) -> list[ServiceInstance]:
        return list(self._instances.values())
    def mark_stopped(self, service: str, node_id: str, gpu_id: int) -> None:
        """Transition an instance to 'stopped' state and clear idle_since."""
        key = f"{service}:{node_id}:{gpu_id}"
        if key in self._instances:
            self._instances[key] = dataclasses.replace(
                self._instances[key], state="stopped", idle_since=None
            )
    def idle_past_timeout(self, idle_stop_config: dict[str, int]) -> list[ServiceInstance]:
        """
        Return instances in 'idle' state whose idle time exceeds their configured timeout.
        idle_stop_config: {service_name: seconds} — 0 means never stop automatically.
        """
        now = time.time()
        result = []
        for inst in self._instances.values():
            if inst.state != "idle" or inst.idle_since is None:
                continue
            timeout = idle_stop_config.get(inst.service, 0)
            if timeout > 0 and (now - inst.idle_since) >= timeout:
                result.append(inst)
        return result
--- a/circuitforge_core/resources/docuvision/init.py
+++ b/circuitforge_core/resources/docuvision/init.py
--- a/circuitforge_core/resources/docuvision/app.py
+++ b/circuitforge_core/resources/docuvision/app.py
@ -1,250 +0,0 @@
 """
 cf-docuvision — managed document understanding service.
 Wraps ByteDance/Dolphin-v2 (Qwen2.5-VL backbone) behind a simple HTTP API.
 Managed by cf-orch; started/stopped as a ProcessSpec service.
 API
 ---
 GET  /health          → {"status": "ok", "model": "<path>"}
 POST /extract         → ExtractResponse
 Usage (standalone)::
    python -m circuitforge_core.resources.docuvision.app \\
        --model /Library/Assets/LLM/docuvision/models/dolphin-v2 \\
        --port 8003 --gpu-id 0
 """
 from __future__ import annotations
 import argparse
 import base64
 import io
 import json
 import logging
 from contextlib import asynccontextmanager
 from typing import Any
 import uvicorn
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 logger = logging.getLogger(__name__)
 # Module-level state — populated by _load_model() on first /extract call
 _model: Any = None
 _processor: Any = None
 _model_path: str = ""
 _device: str = "cpu"
 # ── lazy loader ───────────────────────────────────────────────────────────────
 def _load_model() -> None:
    """Lazy-load Dolphin-v2. Called once on first /extract request."""
    global _model, _processor, _device
    if _model is not None:
        return
    import torch
    from transformers import AutoProcessor, AutoModelForCausalLM
    logger.info("Loading Dolphin-v2 from %s ...", _model_path)
    _device = "cuda" if torch.cuda.is_available() else "cpu"
    _processor = AutoProcessor.from_pretrained(
        _model_path,
        trust_remote_code=True,
    )
    _model = AutoModelForCausalLM.from_pretrained(
        _model_path,
        trust_remote_code=True,
        torch_dtype=torch.float16 if _device == "cuda" else torch.float32,
        device_map=_device,
    )
    _model.eval()
    logger.info("Dolphin-v2 loaded on %s", _device)
 # ── FastAPI app ───────────────────────────────────────────────────────────────
@asynccontextmanager
 async def _lifespan(app: FastAPI):
    yield
 app = FastAPI(title="cf-docuvision", lifespan=_lifespan)
 # ── request / response models ─────────────────────────────────────────────────
 class ExtractRequest(BaseModel):
    """
    Either image_b64 (base64-encoded bytes) or image_path (absolute path) must
    be provided. hint guides the extraction mode:
      - "auto"     - Dolphin-v2 detects layout and element types automatically
      - "table"    - optimise for tabular data (receipts, invoices, forms)
      - "text"     - optimise for dense prose (contracts, letters)
      - "form"     - optimise for form field extraction
    """
    image_b64: str | None = None
    image_path: str | None = None
    hint: str = "auto"
 class ElementOut(BaseModel):
    type: str          # heading | paragraph | list | table | figure | formula | code
    text: str
    bbox: list[float] | None = None   # [x0, y0, x1, y1] normalised 0-1 if available
 class TableOut(BaseModel):
    html: str
    bbox: list[float] | None = None
 class ExtractResponse(BaseModel):
    elements: list[ElementOut]
    raw_text: str
    tables: list[TableOut]
    metadata: dict[str, Any]
 # ── helpers ───────────────────────────────────────────────────────────────────
 _HINT_PROMPTS: dict[str, str] = {
    "auto":  "Parse this document. Extract all elements with their types and text content.",
    "table": "Extract all tables from this document as structured HTML. Also extract any line-item text.",
    "text":  "Extract all text from this document preserving paragraph and heading structure.",
    "form":  "Extract all form fields from this document. Return field labels and their values.",
 }
 def _image_from_request(req: ExtractRequest):
    """Return a PIL Image from either image_b64 or image_path."""
    from PIL import Image
    if req.image_b64:
        img_bytes = base64.b64decode(req.image_b64)
        return Image.open(io.BytesIO(img_bytes)).convert("RGB")
    if req.image_path:
        from pathlib import Path
        p = Path(req.image_path)
        if not p.exists():
            raise HTTPException(status_code=404, detail=f"image_path not found: {req.image_path}")
        return Image.open(p).convert("RGB")
    raise HTTPException(status_code=422, detail="Either image_b64 or image_path must be provided")
 def _parse_dolphin_output(raw: str) -> tuple[list[ElementOut], list[TableOut], str]:
    """
    Parse Dolphin-v2's structured output into elements and tables.
    Dolphin-v2 returns a JSON array of element dicts with keys:
      type, text, [html], [bbox]
    Falls back gracefully if the model returns plain text instead.
    """
    elements: list[ElementOut] = []
    tables: list[TableOut] = []
    # Try JSON parse first
    try:
        parsed = json.loads(raw)
        if isinstance(parsed, list):
            for item in parsed:
                etype = item.get("type", "paragraph")
                text = item.get("text", "")
                bbox = item.get("bbox")
                if etype == "table":
                    tables.append(TableOut(html=item.get("html", text), bbox=bbox))
                elements.append(ElementOut(type=etype, text=text, bbox=bbox))
            raw_text = "\n".join(e.text for e in elements)
            return elements, tables, raw_text
    except (json.JSONDecodeError, TypeError):
        pass
    # Plain-text fallback: treat entire output as a single paragraph
    elements = [ElementOut(type="paragraph", text=raw.strip())]
    return elements, tables, raw.strip()
 # ── routes ────────────────────────────────────────────────────────────────────
@app.get("/health")
 async def health() -> dict[str, str]:
    return {"status": "ok", "model": _model_path}
@app.post("/extract", response_model=ExtractResponse)
 async def extract(req: ExtractRequest) -> ExtractResponse:
    _load_model()
    image = _image_from_request(req)
    prompt = _HINT_PROMPTS.get(req.hint, _HINT_PROMPTS["auto"])
    import torch
    inputs = _processor(
        text=prompt,
        images=image,
        return_tensors="pt",
    ).to(_device)
    with torch.no_grad():
        output_ids = _model.generate(
            **inputs,
            max_new_tokens=2048,
            do_sample=False,
        )
    # Decode only the newly generated tokens
    input_len = inputs["input_ids"].shape[1]
    raw_output = _processor.decode(
        output_ids[0][input_len:],
        skip_special_tokens=True,
    )
    elements, tables, raw_text = _parse_dolphin_output(raw_output)
    w, h = image.size
    return ExtractResponse(
        elements=elements,
        raw_text=raw_text,
        tables=tables,
        metadata={
            "hint": req.hint,
            "width": w,
            "height": h,
            "model": _model_path,
            "device": _device,
        },
    )
 # ── CLI entry point ───────────────────────────────────────────────────────────
 def main() -> None:
    parser = argparse.ArgumentParser(description="cf-docuvision service")
    parser.add_argument("--model", required=True, help="Path to Dolphin-v2 model directory")
    parser.add_argument("--port", type=int, default=8003)
    parser.add_argument("--host", default="0.0.0.0")
    parser.add_argument("--gpu-id", type=int, default=0)
    args = parser.parse_args()
    global _model_path
    _model_path = args.model
    import os
    os.environ.setdefault("CUDA_VISIBLE_DEVICES", str(args.gpu_id))
    logging.basicConfig(level=logging.INFO, format="%(levelname)s %(name)s: %(message)s")
    uvicorn.run(app, host=args.host, port=args.port)
 if __name__ == "__main__":
    main()
--- a/circuitforge_core/resources/inference/init.py
+++ b/circuitforge_core/resources/inference/init.py
--- a/circuitforge_core/resources/inference/llm_server.py
+++ b/circuitforge_core/resources/inference/llm_server.py
@ -1,137 +0,0 @@
 """Generic OpenAI-compatible inference server for HuggingFace causal LMs."""
 from __future__ import annotations
 import argparse
 import time
 import uuid
 from contextlib import asynccontextmanager
 from typing import Any
 import torch
 import uvicorn
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from transformers import AutoModelForCausalLM, AutoTokenizer
 _model: Any = None
 _tokenizer: Any = None
 _model_id: str = ""
 _device: str = "cpu"
@asynccontextmanager
 async def lifespan(app: FastAPI):
    yield
 app = FastAPI(lifespan=lifespan)
 class Message(BaseModel):
    role: str
    content: str
 class ChatRequest(BaseModel):
    model: str | None = None
    messages: list[Message]
    max_tokens: int | None = 512
    temperature: float | None = 0.7
    stream: bool | None = False
@app.get("/health")
 def health() -> dict[str, str]:
    return {"status": "ok", "model": _model_id}
@app.get("/v1/models")
 def list_models() -> dict[str, Any]:
    return {
        "object": "list",
        "data": [{"id": _model_id, "object": "model", "owned_by": "cf-orch"}],
    }
@app.post("/v1/chat/completions")
 def chat_completions(req: ChatRequest) -> dict[str, Any]:
    if _model is None:
        raise HTTPException(503, detail="Model not loaded")
    if req.stream:
        raise HTTPException(501, detail="Streaming not supported")
    conversation = [{"role": m.role, "content": m.content} for m in req.messages]
    try:
        encoded = _tokenizer.apply_chat_template(
            conversation,
            return_tensors="pt",
            add_generation_prompt=True,
        )
        # transformers 5.x returns BatchEncoding; 4.x returned a bare tensor
        input_ids = (encoded.input_ids if hasattr(encoded, "input_ids") else encoded).to(_device)
    except Exception as exc:
        raise HTTPException(500, detail=f"Tokenisation failed: {exc}")
    max_new = req.max_tokens or 512
    temp = req.temperature if req.temperature is not None else 0.7
    gen_kwargs: dict[str, Any] = {
        "max_new_tokens": max_new,
        "do_sample": temp > 0,
        "pad_token_id": _tokenizer.eos_token_id,
    }
    if temp > 0:
        gen_kwargs["temperature"] = temp
    with torch.inference_mode():
        output_ids = _model.generate(input_ids, **gen_kwargs)
    new_tokens = output_ids[0][input_ids.shape[-1]:]
    reply = _tokenizer.decode(new_tokens, skip_special_tokens=True)
    return {
        "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
        "object": "chat.completion",
        "created": int(time.time()),
        "model": _model_id,
        "choices": [
            {
                "index": 0,
                "message": {"role": "assistant", "content": reply},
                "finish_reason": "stop",
            }
        ],
        "usage": {
            "prompt_tokens": input_ids.shape[-1],
            "completion_tokens": len(new_tokens),
            "total_tokens": input_ids.shape[-1] + len(new_tokens),
        },
    }
 def _load_model(model_path: str, gpu_id: int) -> None:
    global _model, _tokenizer, _model_id, _device
    _device = f"cuda:{gpu_id}" if torch.cuda.is_available() else "cpu"
    _model_id = model_path
    _tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
    _model = AutoModelForCausalLM.from_pretrained(
        model_path,
        dtype=torch.float16 if "cuda" in _device else torch.float32,
        device_map={"": _device},
        trust_remote_code=True,
    )
    _model.eval()
 def main() -> None:
    parser = argparse.ArgumentParser(description="cf-orch generic LLM inference server")
    parser.add_argument("--model", required=True)
    parser.add_argument("--port", type=int, default=8000)
    parser.add_argument("--host", default="0.0.0.0")
    parser.add_argument("--gpu-id", type=int, default=0)
    args = parser.parse_args()
    _load_model(args.model, args.gpu_id)
    uvicorn.run(app, host=args.host, port=args.port, log_level="info")
 if __name__ == "__main__":
    main()
--- a/circuitforge_core/resources/models.py
+++ b/circuitforge_core/resources/models.py
@ -1,66 +0,0 @@
 from __future__ import annotations
 import time
 import uuid
 from dataclasses import dataclass, field
 from typing import Optional
@dataclass(frozen=True)
 class VRAMLease:
    lease_id: str
    gpu_id: int
    node_id: str
    mb_granted: int
    holder_service: str
    priority: int
    expires_at: float  # unix timestamp; 0.0 = no expiry
    @classmethod
    def create(
        cls,
        gpu_id: int,
        node_id: str,
        mb: int,
        service: str,
        priority: int,
        ttl_s: float = 0.0,
    ) -> VRAMLease:
        return cls(
            lease_id=str(uuid.uuid4()),
            gpu_id=gpu_id,
            node_id=node_id,
            mb_granted=mb,
            holder_service=service,
            priority=priority,
            expires_at=time.time() + ttl_s if ttl_s > 0.0 else 0.0,
        )
    def is_expired(self) -> bool:
        return self.expires_at > 0.0 and time.time() > self.expires_at
@dataclass(frozen=True)
 class GpuInfo:
    gpu_id: int
    name: str
    vram_total_mb: int
    vram_used_mb: int
    vram_free_mb: int
@dataclass(frozen=True)
 class ResidentAllocation:
    """A model that is loaded and warm in VRAM but not actively serving a request."""
    service: str
    node_id: str
    model_name: Optional[str]  # None if service is running but model probe failed
    first_seen: float = field(default_factory=time.time)
@dataclass
 class NodeInfo:
    node_id: str
    agent_url: str
    gpus: list[GpuInfo]
    last_heartbeat: float = field(default_factory=time.time)
--- a/circuitforge_core/resources/profiles/init.py
+++ b/circuitforge_core/resources/profiles/init.py
--- a/circuitforge_core/resources/profiles/public/cpu-16gb.yaml
+++ b/circuitforge_core/resources/profiles/public/cpu-16gb.yaml
@ -1,41 +0,0 @@
 schema_version: 1
 name: cpu-16gb
 eviction_timeout_s: 30.0
 services:
  ollama:
    max_mb: 0
    priority: 1
    managed:
      type: process
      adopt: true
      exec_path: "/usr/local/bin/ollama"
      args_template: "serve"
      port: 11434
      host_port: 11434
      health_path: /api/tags
  cf-stt:
    max_mb: 0
    priority: 2
    shared: true
    max_concurrent: 1
    backend: moonshine
  cf-tts:
    max_mb: 0
    priority: 2
    shared: true
    max_concurrent: 1
  cf-embed:
    max_mb: 0
    priority: 2
    shared: true
    max_concurrent: 2
    always_on: true
  cf-classify:
    max_mb: 0
    priority: 2
    shared: true
    max_concurrent: 2
    always_on: true
 model_size_hints:
  llm_max_params: 3b-q4
  image_gen_max: none
--- a/circuitforge_core/resources/profiles/public/cpu-32gb.yaml
+++ b/circuitforge_core/resources/profiles/public/cpu-32gb.yaml
@ -1,41 +0,0 @@
 schema_version: 1
 name: cpu-32gb
 eviction_timeout_s: 30.0
 services:
  ollama:
    max_mb: 0
    priority: 1
    managed:
      type: process
      adopt: true
      exec_path: "/usr/local/bin/ollama"
      args_template: "serve"
      port: 11434
      host_port: 11434
      health_path: /api/tags
  cf-stt:
    max_mb: 0
    priority: 2
    shared: true
    max_concurrent: 2
    backend: faster-whisper
  cf-tts:
    max_mb: 0
    priority: 2
    shared: true
    max_concurrent: 2
  cf-embed:
    max_mb: 0
    priority: 2
    shared: true
    max_concurrent: 4
    always_on: true
  cf-classify:
    max_mb: 0
    priority: 2
    shared: true
    max_concurrent: 4
    always_on: true
 model_size_hints:
  llm_max_params: 7b-q4
  image_gen_max: none
--- a/circuitforge_core/resources/profiles/public/single-gpu-16gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-16gb.yaml
@ -1,73 +0,0 @@
 schema_version: 1
 name: single-gpu-16gb
 vram_total_mb: 16384
 eviction_timeout_s: 10.0
 services:
  vllm:
    max_mb: 9000
    priority: 1
    idle_stop_after_s: 600
    managed:
      type: process
      exec_path: "/devl/miniconda3/envs/cf/bin/python"
      args_template: "-m circuitforge_core.resources.inference.llm_server --model /Library/Assets/LLM/vllm/models/{model} --port {port} --gpu-id {gpu_id}"
      port: 8000
      host_port: 8000
      cwd: "/Library/Development/CircuitForge/circuitforge-core"
  ollama:
    max_mb: 12288
    priority: 1
    managed:
      type: process
      adopt: true
      exec_path: "/usr/local/bin/ollama"
      args_template: "serve"
      port: 11434
      host_port: 11434
      health_path: /api/tags
  cf-vision:
    max_mb: 3072
    priority: 2
    shared: true
    max_concurrent: 4
  cf-docuvision:
    max_mb: 6144
    priority: 2
    shared: true
    max_concurrent: 3
    managed:
      type: process
      exec_path: "/devl/miniconda3/envs/cf/bin/python"
      args_template: "-m circuitforge_core.resources.docuvision.app --model /Library/Assets/LLM/docuvision/models/dolphin-v2 --port {port} --gpu-id {gpu_id}"
      port: 8003
      host_port: 8003
      cwd: "/Library/Development/CircuitForge/circuitforge-core"
  cf-stt:
    max_mb: 1200
    priority: 2
    shared: true
    max_concurrent: 3
    backend: parakeet-tdt
  cf-tts:
    max_mb: 1024
    priority: 2
    shared: true
    max_concurrent: 3
  cf-embed:
    max_mb: 512
    priority: 2
    shared: true
    max_concurrent: 6
    always_on: true
  cf-classify:
    max_mb: 512
    priority: 2
    shared: true
    max_concurrent: 6
    always_on: true
  comfyui:
    max_mb: 14336
    priority: 4
 model_size_hints:
  llm_max_params: 34b
  image_gen_max: flux-dev-fp8
--- a/circuitforge_core/resources/profiles/public/single-gpu-24gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-24gb.yaml
@ -1,73 +0,0 @@
 schema_version: 1
 name: single-gpu-24gb
 vram_total_mb: 24576
 eviction_timeout_s: 10.0
 services:
  vllm:
    max_mb: 9000
    priority: 1
    idle_stop_after_s: 600
    managed:
      type: process
      exec_path: "/devl/miniconda3/envs/cf/bin/python"
      args_template: "-m circuitforge_core.resources.inference.llm_server --model /Library/Assets/LLM/vllm/models/{model} --port {port} --gpu-id {gpu_id}"
      port: 8000
      host_port: 8000
      cwd: "/Library/Development/CircuitForge/circuitforge-core"
  ollama:
    max_mb: 18432
    priority: 1
    managed:
      type: process
      adopt: true
      exec_path: "/usr/local/bin/ollama"
      args_template: "serve"
      port: 11434
      host_port: 11434
      health_path: /api/tags
  cf-vision:
    max_mb: 4096
    priority: 2
    shared: true
    max_concurrent: 6
  cf-docuvision:
    max_mb: 8192
    priority: 2
    shared: true
    max_concurrent: 4
    managed:
      type: process
      exec_path: "/devl/miniconda3/envs/cf/bin/python"
      args_template: "-m circuitforge_core.resources.docuvision.app --model /Library/Assets/LLM/docuvision/models/dolphin-v2 --port {port} --gpu-id {gpu_id}"
      port: 8003
      host_port: 8003
      cwd: "/Library/Development/CircuitForge/circuitforge-core"
  cf-stt:
    max_mb: 1200
    priority: 2
    shared: true
    max_concurrent: 4
    backend: parakeet-tdt
  cf-tts:
    max_mb: 1024
    priority: 2
    shared: true
    max_concurrent: 4
  cf-embed:
    max_mb: 512
    priority: 2
    shared: true
    max_concurrent: 8
    always_on: true
  cf-classify:
    max_mb: 512
    priority: 2
    shared: true
    max_concurrent: 8
    always_on: true
  comfyui:
    max_mb: 20480
    priority: 4
 model_size_hints:
  llm_max_params: 70b
  image_gen_max: flux-dev-fp16
--- a/circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml
@ -1,30 +0,0 @@
 schema_version: 1
 name: single-gpu-2gb
 vram_total_mb: 2048
 eviction_timeout_s: 15.0
 services:
  ollama:
    max_mb: 1536
    priority: 1
    managed:
      type: process
      adopt: true
      exec_path: "/usr/local/bin/ollama"
      args_template: "serve"
      port: 11434
      host_port: 11434
      health_path: /api/tags
  cf-vision:
    max_mb: 512
    priority: 2
    shared: true
    max_concurrent: 1
  cf-stt:
    max_mb: 200
    priority: 2
    shared: true
    max_concurrent: 1
    backend: moonshine
 model_size_hints:
  llm_max_params: 3b
  image_gen_max: none
--- a/circuitforge_core/resources/profiles/public/single-gpu-4gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-4gb.yaml
@ -1,38 +0,0 @@
 schema_version: 1
 name: single-gpu-4gb
 vram_total_mb: 4096
 eviction_timeout_s: 15.0
 services:
  ollama:
    max_mb: 3072
    priority: 1
    managed:
      type: process
      adopt: true
      exec_path: "/usr/local/bin/ollama"
      args_template: "serve"
      port: 11434
      host_port: 11434
      health_path: /api/tags
  cf-vision:
    max_mb: 1024
    priority: 2
    shared: true
    max_concurrent: 1
  cf-stt:
    max_mb: 600
    priority: 2
    shared: true
    max_concurrent: 1
    backend: faster-whisper
  cf-tts:
    max_mb: 512
    priority: 2
    shared: true
    max_concurrent: 1
  comfyui:
    max_mb: 3584
    priority: 4
 model_size_hints:
  llm_max_params: 3b
  image_gen_max: sd15-fp8
--- a/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml
@ -1,61 +0,0 @@
 schema_version: 1
 name: single-gpu-6gb
 vram_total_mb: 6144
 eviction_timeout_s: 10.0
 services:
  vllm:
    max_mb: 5500
    priority: 1
    idle_stop_after_s: 600
    managed:
      type: process
      exec_path: "/devl/miniconda3/envs/cf/bin/python"
      args_template: "-m circuitforge_core.resources.inference.llm_server --model /Library/Assets/LLM/vllm/models/{model} --port {port} --gpu-id {gpu_id}"
      port: 8000
      host_port: 8000
      cwd: "/Library/Development/CircuitForge/circuitforge-core"
  ollama:
    max_mb: 3584
    priority: 1
    managed:
      type: process
      adopt: true
      exec_path: "/usr/local/bin/ollama"
      args_template: "serve"
      port: 11434
      host_port: 11434
      health_path: /api/tags
  cf-vision:
    max_mb: 1536
    priority: 2
    shared: true
    max_concurrent: 2
  cf-docuvision:
    max_mb: 3072
    priority: 2
    shared: true
    max_concurrent: 1
    managed:
      type: process
      exec_path: "/devl/miniconda3/envs/cf/bin/python"
      args_template: "-m circuitforge_core.resources.docuvision.app --model /Library/Assets/LLM/docuvision/models/dolphin-v2 --port {port} --gpu-id {gpu_id}"
      port: 8003
      host_port: 8003
      cwd: "/Library/Development/CircuitForge/circuitforge-core"
  cf-stt:
    max_mb: 600
    priority: 2
    shared: true
    max_concurrent: 2
    backend: faster-whisper
  cf-tts:
    max_mb: 768
    priority: 2
    shared: true
    max_concurrent: 1
  comfyui:
    max_mb: 5120
    priority: 4
 model_size_hints:
  llm_max_params: 7b
  image_gen_max: sd15
--- a/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml
@ -1,68 +0,0 @@
 schema_version: 1
 name: single-gpu-8gb
 vram_total_mb: 8192
 eviction_timeout_s: 10.0
 services:
  vllm:
    max_mb: 6500
    priority: 1
    idle_stop_after_s: 600
    managed:
      type: process
      exec_path: "/devl/miniconda3/envs/cf/bin/python"
      args_template: "-m circuitforge_core.resources.inference.llm_server --model /Library/Assets/LLM/vllm/models/{model} --port {port} --gpu-id {gpu_id}"
      port: 8000
      host_port: 8000
      cwd: "/Library/Development/CircuitForge/circuitforge-core"
  ollama:
    max_mb: 4096
    priority: 1
    managed:
      type: process
      adopt: true
      exec_path: "/usr/local/bin/ollama"
      args_template: "serve"
      port: 11434
      host_port: 11434
      health_path: /api/tags
  cf-vision:
    max_mb: 2048
    priority: 2
    shared: true
    max_concurrent: 3
  cf-docuvision:
    max_mb: 4096
    priority: 2
    shared: true
    max_concurrent: 2
    managed:
      type: process
      exec_path: "/devl/miniconda3/envs/cf/bin/python"
      args_template: "-m circuitforge_core.resources.docuvision.app --model /Library/Assets/LLM/docuvision/models/dolphin-v2 --port {port} --gpu-id {gpu_id}"
      port: 8003
      host_port: 8003
      cwd: "/Library/Development/CircuitForge/circuitforge-core"
  cf-stt:
    max_mb: 1200
    priority: 2
    shared: true
    max_concurrent: 2
    backend: parakeet-tdt
  cf-tts:
    max_mb: 1024
    priority: 2
    shared: true
    max_concurrent: 2
  comfyui:
    max_mb: 6144
    priority: 4
    managed:
      type: process
      exec_path: "/opt/miniconda3/envs/comfyui/bin/python"
      args_template: "/opt/ComfyUI/main.py --listen 0.0.0.0 --port {port} --cuda-device {gpu_id}"
      cwd: "/opt/ComfyUI"
      port: 8188
      host_port: 8188
 model_size_hints:
  llm_max_params: 8b
  image_gen_max: sdxl-fp8
--- a/circuitforge_core/resources/profiles/schema.py
+++ b/circuitforge_core/resources/profiles/schema.py
@ -1,121 +0,0 @@
 # circuitforge_core/resources/profiles/schema.py
 from __future__ import annotations
 from pathlib import Path
 from typing import Any
 import yaml
 from pydantic import BaseModel, Field, model_validator
 SUPPORTED_SCHEMA_VERSION = 1
 class DockerSpec(BaseModel):
    """Spec for a Docker-managed service."""
    image: str
    port: int
    host_port: int
    command_template: str = ""
    volumes: list[str] = Field(default_factory=list)
    env: dict[str, str] = Field(default_factory=dict)
    runtime: str = "nvidia"
    ipc: str = "host"
    model_config = {"frozen": True}
 class ProcessSpec(BaseModel):
    """Spec for a process-managed service (non-Docker, e.g. conda env)."""
    exec_path: str
    args_template: str = ""
    cwd: str = ""
    env: dict[str, str] = Field(default_factory=dict)
    port: int = 0
    host_port: int = 0
    # adopt=True: if the service is already listening on host_port, claim it rather
    # than spawning a new process (useful for system daemons like Ollama).
    adopt: bool = False
    # Override the health probe path; defaults to /health (Ollama uses /api/tags).
    health_path: str = "/health"
    model_config = {"frozen": True}
 class ServiceProfile(BaseModel):
    max_mb: int
    priority: int
    shared: bool = False
    max_concurrent: int = 1
    always_on: bool = False
    idle_stop_after_s: int = 0
    backend: str | None = None
    consumers: list[str] = Field(default_factory=list)
    managed: DockerSpec | ProcessSpec | None = None
    model_config = {"frozen": True}
    @model_validator(mode="before")
    @classmethod
    def _parse_managed(cls, values: Any) -> Any:
        if not isinstance(values, dict):
            return values
        raw = values.get("managed")
        if raw is None:
            return values
        if not isinstance(raw, dict):
            return values
        spec_type = raw.get("type")
        managed_fields = {k: v for k, v in raw.items() if k != "type"}
        if spec_type == "docker":
            values["managed"] = DockerSpec(**managed_fields)
        elif spec_type == "process":
            values["managed"] = ProcessSpec(**managed_fields)
        else:
            raise ValueError(f"Unknown managed service type: {spec_type!r}")
        return values
 class GpuNodeEntry(BaseModel):
    id: int
    vram_mb: int
    role: str
    card: str = "unknown"
    always_on: bool = False
    services: list[str] = Field(default_factory=list)
    model_config = {"frozen": True}
 class NodeProfile(BaseModel):
    gpus: list[GpuNodeEntry]
    agent_url: str | None = None
    nas_mount: str | None = None
    model_config = {"frozen": True}
 class GpuProfile(BaseModel):
    schema_version: int
    name: str
    vram_total_mb: int | None = None
    eviction_timeout_s: float = 10.0
    services: dict[str, ServiceProfile] = Field(default_factory=dict)
    model_size_hints: dict[str, str] = Field(default_factory=dict)
    nodes: dict[str, NodeProfile] = Field(default_factory=dict)
    model_config = {"frozen": True}
 def load_profile(path: Path) -> GpuProfile:
    raw: dict[str, Any] = yaml.safe_load(path.read_text())
    if not isinstance(raw, dict):
        raise ValueError(f"Profile file {path} must be a YAML mapping, got {type(raw).__name__}")
    version = raw.get("schema_version")
    if version != SUPPORTED_SCHEMA_VERSION:
        raise ValueError(
            f"Unsupported schema_version {version!r} in {path}. "
            f"Expected {SUPPORTED_SCHEMA_VERSION}."
        )
    return GpuProfile.model_validate(raw)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "circuitforge-core"
-version = "0.7.0"
+version = "0.8.0"
-description = "Shared scaffold for CircuitForge products"
+description = "Shared scaffold for CircuitForge products (MIT)"
 requires-python = ">=3.11"
 dependencies = [
    "pyyaml>=6.0",
@ -14,32 +14,17 @@ dependencies = [
 ]
 [project.optional-dependencies]
 orch = [
    "fastapi>=0.110",
    "uvicorn[standard]>=0.29",
    "httpx>=0.27",
    "pydantic>=2.0",
    "typer[all]>=0.12",
    "psutil>=5.9",
 ]
 tasks = [
    "httpx>=0.27",
 ]
 manage = [
    "platformdirs>=4.0",
    "typer[all]>=0.12",
 ]
 dev = [
    "circuitforge-core[orch]",
    "circuitforge-core[tasks]",
    "circuitforge-core[manage]",
    "pytest>=8.0",
    "pytest-asyncio>=0.23",
    "httpx>=0.27",
 ]
 [project.scripts]
 cf-orch = "circuitforge_core.resources.cli:app"
 cf-manage = "circuitforge_core.manage.cli:app"
 [tool.setuptools.packages.find]
--- a/tests/test_resources/init.py
+++ b/tests/test_resources/init.py
--- a/tests/test_resources/test_agent_app.py
+++ b/tests/test_resources/test_agent_app.py
@ -1,68 +0,0 @@
 from __future__ import annotations
 import pytest
 from unittest.mock import MagicMock
 from fastapi.testclient import TestClient
 from circuitforge_core.resources.agent.app import create_agent_app
 from circuitforge_core.resources.models import GpuInfo
 from circuitforge_core.resources.agent.eviction_executor import EvictionResult
 MOCK_GPUS = [
    GpuInfo(
        gpu_id=0,
        name="RTX 4000",
        vram_total_mb=8192,
        vram_used_mb=1024,
        vram_free_mb=7168,
    ),
 ]
@pytest.fixture
 def agent_client():
    mock_monitor = MagicMock()
    mock_monitor.poll.return_value = MOCK_GPUS
    mock_executor = MagicMock()
    app = create_agent_app(
        node_id="heimdall",
        monitor=mock_monitor,
        executor=mock_executor,
    )
    return TestClient(app), mock_monitor, mock_executor
 def test_health_returns_ok(agent_client):
    client, _, _ = agent_client
    resp = client.get("/health")
    assert resp.status_code == 200
    assert resp.json()["status"] == "ok"
    assert resp.json()["node_id"] == "heimdall"
 def test_gpu_info_returns_gpu_list(agent_client):
    client, _, _ = agent_client
    resp = client.get("/gpu-info")
    assert resp.status_code == 200
    data = resp.json()
    assert len(data["gpus"]) == 1
    assert data["gpus"][0]["gpu_id"] == 0
    assert data["gpus"][0]["name"] == "RTX 4000"
    assert data["gpus"][0]["vram_free_mb"] == 7168
 def test_evict_calls_executor(agent_client):
    client, _, mock_executor = agent_client
    mock_executor.evict_pid.return_value = EvictionResult(
        success=True, method="sigterm", message="done"
    )
    resp = client.post("/evict", json={"pid": 1234, "grace_period_s": 5.0})
    assert resp.status_code == 200
    assert resp.json()["success"] is True
    mock_executor.evict_pid.assert_called_once_with(pid=1234, grace_period_s=5.0)
 def test_evict_requires_pid(agent_client):
    client, _, _ = agent_client
    resp = client.post("/evict", json={"grace_period_s": 5.0})
    assert resp.status_code == 422
--- a/tests/test_resources/test_agent_supervisor.py
+++ b/tests/test_resources/test_agent_supervisor.py
@ -1,93 +0,0 @@
 import asyncio
 import time
 import pytest
 from unittest.mock import AsyncMock, MagicMock, patch
 from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor
 from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
 from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry, ServiceInstance
 def test_build_idle_stop_config_empty_without_registry():
    lm = LeaseManager()
    supervisor = AgentSupervisor(lease_manager=lm)
    assert supervisor._build_idle_stop_config() == {}
 def test_build_idle_stop_config_from_profiles():
    lm = LeaseManager()
    mock_svc = MagicMock()
    mock_svc.idle_stop_after_s = 600
    mock_profile = MagicMock()
    mock_profile.services = {"vllm": mock_svc}
    mock_profile_registry = MagicMock()
    mock_profile_registry.list_public.return_value = [mock_profile]
    supervisor = AgentSupervisor(lease_manager=lm, profile_registry=mock_profile_registry)
    config = supervisor._build_idle_stop_config()
    assert config == {"vllm": 600}
@pytest.mark.asyncio
 async def test_run_idle_sweep_posts_stop():
    lm = LeaseManager()
    service_registry = ServiceRegistry()
    # Upsert instance as running, then allocate + release to transition it to idle
    service_registry.upsert_instance(
        service="vllm",
        node_id="heimdall",
        gpu_id=0,
        state="running",
        model="test-model",
        url="http://heimdall:8000",
    )
    alloc = service_registry.allocate(
        service="vllm",
        node_id="heimdall",
        gpu_id=0,
        model="test-model",
        url="http://heimdall:8000",
        caller="test",
        ttl_s=300.0,
    )
    service_registry.release(alloc.allocation_id)
    # Backdate idle_since so it exceeds the timeout
    import dataclasses
    key = "vllm:heimdall:0"
    inst = service_registry._instances[key]
    service_registry._instances[key] = dataclasses.replace(inst, idle_since=time.time() - 700)
    mock_profile_registry = MagicMock()
    mock_svc = MagicMock()
    mock_svc.idle_stop_after_s = 600
    mock_profile = MagicMock()
    mock_profile.services = {"vllm": mock_svc}
    mock_profile_registry.list_public.return_value = [mock_profile]
    supervisor = AgentSupervisor(
        lease_manager=lm,
        service_registry=service_registry,
        profile_registry=mock_profile_registry,
    )
    supervisor.register("heimdall", "http://heimdall:7701")
    posted_urls = []
    async def fake_http_post(url: str) -> bool:
        posted_urls.append(url)
        return True
    supervisor._http_post = fake_http_post
    await supervisor._run_idle_sweep()
    assert len(posted_urls) == 1
    assert posted_urls[0] == "http://heimdall:7701/services/vllm/stop"
@pytest.mark.asyncio
 async def test_run_idle_sweep_skips_without_registry():
    lm = LeaseManager()
    supervisor = AgentSupervisor(lease_manager=lm)
    # Should return immediately without error
    await supervisor._run_idle_sweep()
--- a/tests/test_resources/test_agent_watchdog.py
+++ b/tests/test_resources/test_agent_watchdog.py
@ -1,151 +0,0 @@
 # tests/test_resources/test_agent_watchdog.py
 """
 Tests for AgentSupervisor watchdog behaviour:
  - restore_from_store() reloads known nodes from NodeStore on startup
  - register() persists to NodeStore
  - restored nodes start offline and come online after a successful poll
  - NodeStore=None path is a no-op (backwards compatibility)
 """
 from __future__ import annotations
 from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor
 from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
 from circuitforge_core.resources.coordinator.node_store import NodeStore
 # ── fixtures ──────────────────────────────────────────────────────────────────
@pytest.fixture
 def store(tmp_path: Path) -> NodeStore:
    return NodeStore(db_path=tmp_path / "nodes.db")
@pytest.fixture
 def supervisor(store: NodeStore) -> AgentSupervisor:
    return AgentSupervisor(lease_manager=LeaseManager(), node_store=store)
@pytest.fixture
 def supervisor_no_store() -> AgentSupervisor:
    return AgentSupervisor(lease_manager=LeaseManager(), node_store=None)
 # ── register() persists ───────────────────────────────────────────────────────
 def test_register_persists_to_store(supervisor: AgentSupervisor, store: NodeStore) -> None:
    supervisor.register("heimdall", "http://127.0.0.1:7701")
    rows = store.all()
    assert len(rows) == 1
    assert rows[0] == ("heimdall", "http://127.0.0.1:7701")
 def test_register_updates_url_in_store(supervisor: AgentSupervisor, store: NodeStore) -> None:
    supervisor.register("navi", "http://10.1.10.10:7701")
    supervisor.register("navi", "http://10.1.10.10:9999")
    rows = store.all()
    assert len(rows) == 1
    assert rows[0][1] == "http://10.1.10.10:9999"
 def test_register_without_store_does_not_crash(supervisor_no_store: AgentSupervisor) -> None:
    supervisor_no_store.register("heimdall", "http://127.0.0.1:7701")
    assert supervisor_no_store.get_node_info("heimdall") is not None
 # ── restore_from_store() ──────────────────────────────────────────────────────
 def test_restore_loads_known_nodes(tmp_path: Path) -> None:
    """Nodes written by a previous supervisor session are restored into a fresh one."""
    db = tmp_path / "nodes.db"
    # Session 1: register two nodes
    s1 = NodeStore(db_path=db)
    sup1 = AgentSupervisor(lease_manager=LeaseManager(), node_store=s1)
    sup1.register("navi", "http://10.1.10.10:7701")
    sup1.register("strahl", "http://10.1.10.20:7701")
    # Session 2: fresh supervisor, same DB
    s2 = NodeStore(db_path=db)
    sup2 = AgentSupervisor(lease_manager=LeaseManager(), node_store=s2)
    restored = sup2.restore_from_store()
    assert restored == 2
    assert sup2.get_node_info("navi") is not None
    assert sup2.get_node_info("strahl") is not None
 def test_restore_marks_nodes_offline(tmp_path: Path) -> None:
    """Restored nodes start offline — they haven't been polled yet."""
    db = tmp_path / "nodes.db"
    s1 = NodeStore(db_path=db)
    AgentSupervisor(lease_manager=LeaseManager(), node_store=s1).register(
        "navi", "http://10.1.10.10:7701"
    )
    s2 = NodeStore(db_path=db)
    sup2 = AgentSupervisor(lease_manager=LeaseManager(), node_store=s2)
    sup2.restore_from_store()
    assert sup2.online_agents() == {}
 def test_restore_returns_zero_without_store() -> None:
    sup = AgentSupervisor(lease_manager=LeaseManager(), node_store=None)
    assert sup.restore_from_store() == 0
 def test_restore_skips_already_registered(tmp_path: Path) -> None:
    """Nodes manually registered before restore_from_store() are not duplicated."""
    db = tmp_path / "nodes.db"
    store = NodeStore(db_path=db)
    store.upsert("heimdall", "http://127.0.0.1:7701")
    sup = AgentSupervisor(lease_manager=LeaseManager(), node_store=store)
    sup.register("heimdall", "http://127.0.0.1:7701")  # already in memory
    restored = sup.restore_from_store()
    assert restored == 0  # already present, not double-counted
 # ── restored node comes online after poll ─────────────────────────────────────
@pytest.mark.asyncio
 async def test_restored_node_comes_online_after_poll(tmp_path: Path) -> None:
    """After restore, a successful poll_agent() brings the node online."""
    db = tmp_path / "nodes.db"
    store = NodeStore(db_path=db)
    store.upsert("navi", "http://10.1.10.10:7701")
    sup = AgentSupervisor(lease_manager=LeaseManager(), node_store=store)
    sup.restore_from_store()
    # Stub poll_agent to succeed
    gpu_payload = {"gpus": [{"gpu_id": 0, "name": "RTX 4000",
                              "vram_total_mb": 8192, "vram_used_mb": 512, "vram_free_mb": 7680}]}
    resident_payload = {"residents": []}
    mock_resp_gpu = MagicMock()
    mock_resp_gpu.raise_for_status = MagicMock()
    mock_resp_gpu.json.return_value = gpu_payload
    mock_resp_res = MagicMock()
    mock_resp_res.is_success = True
    mock_resp_res.json.return_value = resident_payload
    mock_client = AsyncMock()
    mock_client.get = AsyncMock(side_effect=[mock_resp_gpu, mock_resp_res])
    mock_client.__aenter__ = AsyncMock(return_value=mock_client)
    mock_client.__aexit__ = AsyncMock(return_value=False)
    with patch("circuitforge_core.resources.coordinator.agent_supervisor.httpx.AsyncClient",
               return_value=mock_client):
        result = await sup.poll_agent("navi")
    assert result is True
    assert "navi" in sup.online_agents()
--- a/tests/test_resources/test_cli.py
+++ b/tests/test_resources/test_cli.py
@ -1,33 +0,0 @@
 from __future__ import annotations
 from pathlib import Path
 from unittest.mock import patch
 from typer.testing import CliRunner
 from circuitforge_core.resources.cli import app
 runner = CliRunner()
 def test_cli_help():
    result = runner.invoke(app, ["--help"])
    assert result.exit_code == 0
    assert "cf-orch" in result.output.lower() or "Usage" in result.output
 def test_status_command_shows_no_coordinator_message():
    with patch("httpx.get", side_effect=ConnectionRefusedError("refused")):
        result = runner.invoke(app, ["status"])
    assert result.exit_code != 0 or "unreachable" in result.output.lower() \
        or "coordinator" in result.output.lower()
 def test_install_service_creates_systemd_unit(tmp_path: Path):
    unit_path = tmp_path / "cf-orch.service"
    with patch(
        "circuitforge_core.resources.cli._SYSTEMD_UNIT_PATH", unit_path
    ):
        result = runner.invoke(app, ["install-service", "--dry-run"])
    assert result.exit_code == 0
    assert "cf-orch.service" in result.output or "systemd" in result.output.lower()
--- a/tests/test_resources/test_client.py
+++ b/tests/test_resources/test_client.py
@ -1,94 +0,0 @@
 import json
 from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 import httpretty
 from circuitforge_core.resources.client import CFOrchClient, Allocation
 _ALLOC_BODY = (
    '{"allocation_id":"abc123","service":"vllm","node_id":"heimdall",'
    '"gpu_id":0,"model":"Ouro-1.4B","url":"http://heimdall:8000","started":false,"warm":true}'
 )
@httpretty.activate
 def test_sync_allocate_returns_allocation():
    httpretty.register_uri(
        httpretty.POST, "http://orch:7700/api/services/vllm/allocate",
        body=_ALLOC_BODY, content_type="application/json",
    )
    httpretty.register_uri(
        httpretty.DELETE, "http://orch:7700/api/services/vllm/allocations/abc123",
        body='{"released":true}', content_type="application/json",
    )
    client = CFOrchClient("http://orch:7700")
    with client.allocate("vllm", model_candidates=["Ouro-1.4B"], caller="test") as alloc:
        assert isinstance(alloc, Allocation)
        assert alloc.url == "http://heimdall:8000"
        assert alloc.model == "Ouro-1.4B"
        assert alloc.allocation_id == "abc123"
    assert httpretty.last_request().method == "DELETE"
@httpretty.activate
 def test_sync_allocate_ignores_404_on_release():
    httpretty.register_uri(
        httpretty.POST, "http://orch:7700/api/services/vllm/allocate",
        body='{"allocation_id":"xyz","service":"vllm","node_id":"a","gpu_id":0,'
             '"model":"m","url":"http://a:8000","started":false,"warm":false}',
        content_type="application/json",
    )
    httpretty.register_uri(
        httpretty.DELETE, "http://orch:7700/api/services/vllm/allocations/xyz",
        status=404, body='{"detail":"not found"}', content_type="application/json",
    )
    client = CFOrchClient("http://orch:7700")
    with client.allocate("vllm", model_candidates=["m"]) as alloc:
        assert alloc.url == "http://a:8000"
    # No exception raised — 404 on release is silently ignored
@httpretty.activate
 def test_sync_allocate_raises_on_503():
    httpretty.register_uri(
        httpretty.POST, "http://orch:7700/api/services/vllm/allocate",
        status=503, body='{"detail":"no capacity"}', content_type="application/json",
    )
    client = CFOrchClient("http://orch:7700")
    with pytest.raises(RuntimeError, match="cf-orch allocation failed"):
        with client.allocate("vllm", model_candidates=["m"]):
            pass
 async def test_async_allocate_works():
    # httpretty only patches stdlib sockets; httpx async uses anyio sockets so
    # we mock httpx.AsyncClient directly instead.
    alloc_data = {
        "allocation_id": "a1", "service": "vllm", "node_id": "n",
        "gpu_id": 0, "model": "m", "url": "http://n:8000",
        "started": False, "warm": False,
    }
    release_data = {"released": True}
    def _make_response(data, status_code=200):
        resp = MagicMock()
        resp.is_success = status_code < 400
        resp.status_code = status_code
        resp.json.return_value = data
        return resp
    mock_post = AsyncMock(return_value=_make_response(alloc_data))
    mock_delete = AsyncMock(return_value=_make_response(release_data))
    mock_async_client = MagicMock()
    mock_async_client.post = mock_post
    mock_async_client.delete = mock_delete
    mock_async_client.__aenter__ = AsyncMock(return_value=mock_async_client)
    mock_async_client.__aexit__ = AsyncMock(return_value=False)
    with patch("httpx.AsyncClient", return_value=mock_async_client):
        client = CFOrchClient("http://orch:7700")
        async with client.allocate_async("vllm", model_candidates=["m"]) as alloc:
            assert alloc.url == "http://n:8000"
            assert alloc.allocation_id == "a1"
    mock_delete.assert_called_once()
--- a/tests/test_resources/test_coordinator_allocate.py
+++ b/tests/test_resources/test_coordinator_allocate.py
@ -1,132 +0,0 @@
 import pytest
 from unittest.mock import AsyncMock, MagicMock, patch
 from fastapi.testclient import TestClient
 from circuitforge_core.resources.coordinator.app import create_coordinator_app
 from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
 from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
 from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry
 from circuitforge_core.resources.coordinator.agent_supervisor import AgentRecord
 from circuitforge_core.resources.models import GpuInfo, NodeInfo
 def _make_supervisor_mock(online: bool = True):
    sup = MagicMock()
    record = AgentRecord(node_id="heimdall", agent_url="http://heimdall:7701")
    record.gpus = [GpuInfo(0, "RTX 4000", 8192, 0, 8192)]
    record.online = online
    sup.online_agents.return_value = {"heimdall": record} if online else {}
    sup.get_node_info.return_value = NodeInfo(
        node_id="heimdall",
        agent_url="http://heimdall:7701",
        gpus=record.gpus,
        last_heartbeat=0.0,
    )
    return sup
@pytest.fixture
 def alloc_client():
    lm = LeaseManager()
    pr = ProfileRegistry()
    sup = _make_supervisor_mock()
    sr = ServiceRegistry()
    app = create_coordinator_app(lease_manager=lm, profile_registry=pr, agent_supervisor=sup, service_registry=sr)
    return TestClient(app), sup, sr
 def test_allocate_returns_allocation_id_and_url(alloc_client):
    client, sup, sr = alloc_client
    with patch("httpx.AsyncClient") as mock_http:
        mock_resp = MagicMock()
        mock_resp.is_success = True
        mock_resp.json.return_value = {"running": True, "url": "http://heimdall:8000"}
        mock_http.return_value.__aenter__.return_value.post = AsyncMock(return_value=mock_resp)
        resp = client.post("/api/services/vllm/allocate", json={
            "model_candidates": ["Ouro-1.4B"],
            "ttl_s": 300.0,
            "caller": "test",
        })
    assert resp.status_code == 200
    data = resp.json()
    assert "allocation_id" in data
    assert data["service"] == "vllm"
    assert data["node_id"] == "heimdall"
    assert data["url"] == "http://heimdall:8000"
 def test_allocate_returns_503_when_no_online_nodes(alloc_client):
    client, sup, sr = alloc_client
    sup.online_agents.return_value = {}
    resp = client.post("/api/services/vllm/allocate", json={"model_candidates": ["Ouro-1.4B"]})
    assert resp.status_code == 503
 def test_allocate_returns_422_for_empty_candidates(alloc_client):
    client, _, sr = alloc_client
    resp = client.post("/api/services/vllm/allocate", json={"model_candidates": []})
    assert resp.status_code == 422
 def test_allocate_returns_422_for_unknown_service(alloc_client):
    client, _, sr = alloc_client
    resp = client.post("/api/services/cf-made-up/allocate", json={"model_candidates": ["x"]})
    assert resp.status_code == 422
 def test_allocate_records_in_registry(alloc_client):
    client, sup, sr = alloc_client
    with patch("httpx.AsyncClient") as mock_http:
        mock_resp = MagicMock()
        mock_resp.is_success = True
        mock_resp.json.return_value = {"running": True, "url": "http://heimdall:8000"}
        mock_http.return_value.__aenter__.return_value.post = AsyncMock(return_value=mock_resp)
        resp = client.post("/api/services/vllm/allocate", json={
            "model_candidates": ["Ouro-1.4B"],
            "ttl_s": 300.0,
            "caller": "test",
        })
    assert resp.status_code == 200
    allocation_id = resp.json()["allocation_id"]
    status_resp = client.get("/api/services/vllm/status")
    assert status_resp.status_code == 200
    status_data = status_resp.json()
    assert status_data["service"] == "vllm"
    alloc_ids = [a["allocation_id"] for a in status_data["allocations"]]
    assert allocation_id in alloc_ids
 def test_release_allocation(alloc_client):
    client, sup, sr = alloc_client
    with patch("httpx.AsyncClient") as mock_http:
        mock_resp = MagicMock()
        mock_resp.is_success = True
        mock_resp.json.return_value = {"running": True, "url": "http://heimdall:8000"}
        mock_http.return_value.__aenter__.return_value.post = AsyncMock(return_value=mock_resp)
        resp = client.post("/api/services/vllm/allocate", json={
            "model_candidates": ["Ouro-1.4B"],
            "ttl_s": 300.0,
            "caller": "test",
        })
    assert resp.status_code == 200
    allocation_id = resp.json()["allocation_id"]
    del_resp = client.delete(f"/api/services/vllm/allocations/{allocation_id}")
    assert del_resp.status_code == 200
    assert del_resp.json() == {"released": True, "allocation_id": allocation_id}
    status_resp = client.get("/api/services/vllm/status")
    alloc_ids = [a["allocation_id"] for a in status_resp.json()["allocations"]]
    assert allocation_id not in alloc_ids
 def test_release_allocation_not_found(alloc_client):
    client, _, sr = alloc_client
    resp = client.delete("/api/services/vllm/allocations/bad-id")
    assert resp.status_code == 404
--- a/tests/test_resources/test_coordinator_app.py
+++ b/tests/test_resources/test_coordinator_app.py
@ -1,183 +0,0 @@
 import pytest
 from unittest.mock import MagicMock
 from pathlib import Path
 from fastapi.testclient import TestClient
 from circuitforge_core.resources.coordinator.app import create_coordinator_app
 from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor
 from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
 from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
 from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry
 from circuitforge_core.resources.models import GpuInfo, NodeInfo
 from circuitforge_core.resources.profiles.schema import load_profile
@pytest.fixture
 def coordinator_client():
    lease_manager = LeaseManager()
    lease_manager.register_gpu("heimdall", 0, 8192)
    profile_registry = ProfileRegistry()
    supervisor = MagicMock()
    supervisor.all_nodes.return_value = [
        NodeInfo(
            node_id="heimdall",
            agent_url="http://localhost:7701",
            gpus=[GpuInfo(gpu_id=0, name="RTX 4000",
                          vram_total_mb=8192, vram_used_mb=0, vram_free_mb=8192)],
            last_heartbeat=0.0,
        )
    ]
    supervisor.get_node_info.return_value = NodeInfo(
        node_id="heimdall",
        agent_url="http://localhost:7701",
        gpus=[],
        last_heartbeat=0.0,
    )
    app = create_coordinator_app(
        lease_manager=lease_manager,
        profile_registry=profile_registry,
        agent_supervisor=supervisor,
        service_registry=ServiceRegistry(),
    )
    return TestClient(app), lease_manager
 def test_health_returns_ok(coordinator_client):
    client, _ = coordinator_client
    resp = client.get("/api/health")
    assert resp.status_code == 200
    assert resp.json()["status"] == "ok"
 def test_get_nodes_returns_list(coordinator_client):
    client, _ = coordinator_client
    resp = client.get("/api/nodes")
    assert resp.status_code == 200
    nodes = resp.json()["nodes"]
    assert len(nodes) == 1
    assert nodes[0]["node_id"] == "heimdall"
 def test_get_profiles_returns_public_profiles(coordinator_client):
    client, _ = coordinator_client
    resp = client.get("/api/profiles")
    assert resp.status_code == 200
    names = [p["name"] for p in resp.json()["profiles"]]
    assert "single-gpu-8gb" in names
 def test_post_lease_grants_lease(coordinator_client):
    client, _ = coordinator_client
    resp = client.post("/api/leases", json={
        "node_id": "heimdall", "gpu_id": 0,
        "mb": 2048, "service": "peregrine", "priority": 1,
    })
    assert resp.status_code == 200
    data = resp.json()
    assert data["lease"]["mb_granted"] == 2048
    assert data["lease"]["holder_service"] == "peregrine"
    assert "lease_id" in data["lease"]
 def test_delete_lease_releases_it(coordinator_client):
    client, _ = coordinator_client
    resp = client.post("/api/leases", json={
        "node_id": "heimdall", "gpu_id": 0,
        "mb": 2048, "service": "peregrine", "priority": 1,
    })
    lease_id = resp.json()["lease"]["lease_id"]
    del_resp = client.delete(f"/api/leases/{lease_id}")
    assert del_resp.status_code == 200
    assert del_resp.json()["released"] is True
 def test_delete_unknown_lease_returns_404(coordinator_client):
    client, _ = coordinator_client
    resp = client.delete("/api/leases/nonexistent-id")
    assert resp.status_code == 404
 def test_get_leases_returns_active_leases(coordinator_client):
    client, _ = coordinator_client
    client.post("/api/leases", json={
        "node_id": "heimdall", "gpu_id": 0,
        "mb": 1024, "service": "kiwi", "priority": 2,
    })
    resp = client.get("/api/leases")
    assert resp.status_code == 200
    assert len(resp.json()["leases"]) == 1
 def test_dashboard_serves_html(coordinator_client):
    """GET / returns the dashboard HTML page."""
    client, _ = coordinator_client
    resp = client.get("/")
    assert resp.status_code == 200
    assert "text/html" in resp.headers["content-type"]
    # Verify key structural markers are present (without asserting exact markup)
    assert "cf-orch" in resp.text
    assert "/api/nodes" in resp.text
    assert "/api/leases" in resp.text
 def test_online_agents_excludes_offline():
    lm = LeaseManager()
    sup = AgentSupervisor(lm)
    sup.register("online_node", "http://a:7701")
    sup.register("offline_node", "http://b:7701")
    sup._agents["online_node"].online = True
    sup._agents["offline_node"].online = False
    result = sup.online_agents()
    assert "online_node" in result
    assert "offline_node" not in result
 def test_resident_keys_returns_set_of_node_service():
    lm = LeaseManager()
    lm.set_residents_for_node("heimdall", [("vllm", "Ouro-1.4B"), ("ollama", None)])
    keys = lm.resident_keys()
    assert keys == {"heimdall:vllm", "heimdall:ollama"}
 def test_single_gpu_8gb_profile_has_idle_stop_after_s():
    profile = load_profile(
        Path("circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml")
    )
    vllm_svc = profile.services.get("vllm")
    assert vllm_svc is not None
    assert hasattr(vllm_svc, "idle_stop_after_s")
    assert vllm_svc.idle_stop_after_s == 600
 def test_ensure_service_returns_503_when_vram_too_low():
    """VRAM pre-flight guard fires before any HTTP request when free VRAM < service max_mb."""
    # Threshold = full max_mb (not half); 100 MB free on any profile triggers 503.
    lease_manager = LeaseManager()
    lease_manager.register_gpu("low-vram-node", 0, 512)
    profile_registry = ProfileRegistry()
    supervisor = MagicMock()
    supervisor.get_node_info.return_value = NodeInfo(
        node_id="low-vram-node",
        agent_url="http://localhost:7701",
        gpus=[GpuInfo(gpu_id=0, name="GTX 1050",
                      vram_total_mb=512, vram_used_mb=412, vram_free_mb=100)],
        last_heartbeat=0.0,
    )
    supervisor.all_nodes.return_value = []
    app = create_coordinator_app(
        lease_manager=lease_manager,
        profile_registry=profile_registry,
        agent_supervisor=supervisor,
        service_registry=ServiceRegistry(),
    )
    client = TestClient(app)
    resp = client.post("/api/services/vllm/ensure", json={
        "node_id": "low-vram-node",
        "gpu_id": 0,
        "params": {"model": "some-model"},
    })
    assert resp.status_code == 503
    assert "Insufficient VRAM" in resp.json()["detail"]
    # Guard must fire before any agent HTTP call is attempted.
    supervisor.get_node_info.assert_called_once_with("low-vram-node")
--- a/tests/test_resources/test_coordinator_auth.py
+++ b/tests/test_resources/test_coordinator_auth.py
@ -1,148 +0,0 @@
 """Tests for HeimdallAuthMiddleware — TTL cache and request gating."""
 import time
 import pytest
 from unittest.mock import patch, MagicMock
 from fastapi import FastAPI
 from fastapi.testclient import TestClient
 from circuitforge_core.resources.coordinator.auth import (
    HeimdallAuthMiddleware,
    _ValidationCache,
    CACHE_TTL_S,
 )
 # ── Cache unit tests ──────────────────────────────────────────────────────────
 def test_cache_miss_returns_none():
    cache = _ValidationCache()
    assert cache.get("nonexistent") is None
 def test_cache_stores_and_retrieves():
    cache = _ValidationCache()
    cache.set("key1", valid=True, tier="paid", user_id="u1")
    entry = cache.get("key1")
    assert entry is not None
    assert entry.valid is True
    assert entry.tier == "paid"
 def test_cache_entry_expires():
    cache = _ValidationCache(ttl_s=0.05)
    cache.set("key1", valid=True, tier="paid", user_id="u1")
    time.sleep(0.1)
    assert cache.get("key1") is None
 def test_cache_evict_removes_key():
    cache = _ValidationCache()
    cache.set("key1", valid=True, tier="paid", user_id="u1")
    cache.evict("key1")
    assert cache.get("key1") is None
 def test_cache_prune_removes_expired():
    cache = _ValidationCache(ttl_s=0.05)
    cache.set("k1", valid=True, tier="paid", user_id="")
    cache.set("k2", valid=True, tier="paid", user_id="")
    time.sleep(0.1)
    removed = cache.prune()
    assert removed == 2
 # ── Middleware integration tests ──────────────────────────────────────────────
 def _make_app_with_auth(middleware: HeimdallAuthMiddleware) -> TestClient:
    app = FastAPI()
    app.middleware("http")(middleware)
    @app.get("/api/health")
    def health():
        return {"status": "ok"}
    @app.post("/api/services/vllm/allocate")
    def allocate():
        return {"allocation_id": "abc", "url": "http://gpu:8000"}
    return TestClient(app, raise_server_exceptions=False)
 def _patched_middleware(valid: bool, tier: str = "paid") -> HeimdallAuthMiddleware:
    """Return a middleware whose Heimdall call is pre-mocked."""
    mw = HeimdallAuthMiddleware(
        heimdall_url="http://heimdall.test",
        min_tier="paid",
    )
    mw._validate_against_heimdall = MagicMock(  # type: ignore[method-assign]
        return_value=(valid, tier, "user-1" if valid else "")
    )
    return mw
 def test_health_exempt_no_auth_required():
    mw = _patched_middleware(valid=True)
    client = _make_app_with_auth(mw)
    resp = client.get("/api/health")
    assert resp.status_code == 200
 def test_missing_auth_header_returns_401():
    mw = _patched_middleware(valid=True)
    client = _make_app_with_auth(mw)
    resp = client.post("/api/services/vllm/allocate")
    assert resp.status_code == 401
 def test_invalid_key_returns_403():
    mw = _patched_middleware(valid=False)
    client = _make_app_with_auth(mw)
    resp = client.post(
        "/api/services/vllm/allocate",
        headers={"Authorization": "Bearer BAD-KEY"},
    )
    assert resp.status_code == 403
 def test_valid_paid_key_passes():
    mw = _patched_middleware(valid=True, tier="paid")
    client = _make_app_with_auth(mw)
    resp = client.post(
        "/api/services/vllm/allocate",
        headers={"Authorization": "Bearer CFG-KIWI-GOOD-GOOD-GOOD"},
    )
    assert resp.status_code == 200
 def test_free_tier_key_rejected_when_min_is_paid():
    mw = _patched_middleware(valid=True, tier="free")
    client = _make_app_with_auth(mw)
    resp = client.post(
        "/api/services/vllm/allocate",
        headers={"Authorization": "Bearer CFG-KIWI-FREE-FREE-FREE"},
    )
    assert resp.status_code == 403
    assert "paid" in resp.json()["detail"]
 def test_cache_prevents_second_heimdall_call():
    mw = _patched_middleware(valid=True, tier="paid")
    client = _make_app_with_auth(mw)
    key = "CFG-KIWI-CACHED-KEY-1"
    headers = {"Authorization": f"Bearer {key}"}
    client.post("/api/services/vllm/allocate", headers=headers)
    client.post("/api/services/vllm/allocate", headers=headers)
    # Heimdall should only have been called once — second hit is from cache
    assert mw._validate_against_heimdall.call_count == 1  # type: ignore[attr-defined]
 def test_from_env_returns_none_without_heimdall_url(monkeypatch):
    monkeypatch.delenv("HEIMDALL_URL", raising=False)
    assert HeimdallAuthMiddleware.from_env() is None
 def test_from_env_returns_middleware_when_set(monkeypatch):
    monkeypatch.setenv("HEIMDALL_URL", "http://heimdall.test")
    mw = HeimdallAuthMiddleware.from_env()
    assert mw is not None
    assert mw._heimdall == "http://heimdall.test"
--- a/tests/test_resources/test_coordinator_probe.py
+++ b/tests/test_resources/test_coordinator_probe.py
@ -1,215 +0,0 @@
 # tests/test_resources/test_coordinator_probe.py
 """
 Unit tests for _run_instance_probe_loop in coordinator/app.py.
 Covers:
  - healthy path:   /health → 200 → state transitions starting → running
  - timeout path:   no healthy response within _PROBE_TIMEOUT_S → starting → stopped
  - cleanup path:   non-starting instance cleans up its start_times entry
 """
 from __future__ import annotations
 import asyncio
 from unittest.mock import MagicMock, patch
 import pytest
 from circuitforge_core.resources.coordinator.app import (
    _PROBE_TIMEOUT_S,
    _run_instance_probe_loop,
 )
 from circuitforge_core.resources.coordinator.service_registry import ServiceInstance, ServiceRegistry
 # ── helpers ──────────────────────────────────────────────────────────────────
 def _inst(**kwargs) -> ServiceInstance:
    defaults = dict(
        service="vllm", node_id="node1", gpu_id=0,
        state="starting", model="qwen", url="http://localhost:8000",
    )
    defaults.update(kwargs)
    return ServiceInstance(**defaults)
 def _registry(*instances: ServiceInstance) -> MagicMock:
    reg = MagicMock(spec=ServiceRegistry)
    reg.all_instances.return_value = list(instances)
    return reg
 def _health_resp(status: int = 200) -> MagicMock:
    """Context-manager mock that simulates an HTTP response."""
    resp = MagicMock()
    resp.status = status
    resp.__enter__ = lambda s: resp
    resp.__exit__ = MagicMock(return_value=False)
    return resp
 async def _one_tick(coro_fn, registry, *, time_val: float = 1000.0, **url_patch):
    """
    Run the probe loop for exactly one iteration then cancel it.
    asyncio.sleep is patched to return immediately on the first call
    and raise CancelledError on the second (ending the loop cleanly).
    """
    calls = 0
    async def _fake_sleep(_delay):
        nonlocal calls
        calls += 1
        if calls > 1:
            raise asyncio.CancelledError()
    patches = [
        patch("asyncio.sleep", new=_fake_sleep),
        patch("time.time", return_value=time_val),
    ]
    if url_patch:
        patches.append(patch("urllib.request.urlopen", **url_patch))
    ctx = [p.__enter__() for p in patches]
    try:
        await coro_fn(registry)
    except asyncio.CancelledError:
        pass
    finally:
        for p in reversed(patches):
            p.__exit__(None, None, None)
 # ── tests ────────────────────────────────────────────────────────────────────
@pytest.mark.asyncio
 async def test_probe_transitions_starting_to_running():
    """GET /health → 200 while in starting state → upsert_instance(state='running')."""
    reg = _registry(_inst(state="starting", url="http://localhost:8000"))
    calls = 0
    async def fake_sleep(_delay):
        nonlocal calls
        calls += 1
        if calls > 1:
            raise asyncio.CancelledError()
    with patch("asyncio.sleep", new=fake_sleep), \
         patch("time.time", return_value=1000.0), \
         patch("urllib.request.urlopen", return_value=_health_resp(200)):
        try:
            await _run_instance_probe_loop(reg)
        except asyncio.CancelledError:
            pass
    reg.upsert_instance.assert_called_once_with(
        service="vllm", node_id="node1", gpu_id=0,
        state="running", model="qwen", url="http://localhost:8000",
    )
@pytest.mark.asyncio
 async def test_probe_transitions_starting_to_stopped_on_timeout():
    """No healthy response + time past _PROBE_TIMEOUT_S → upsert_instance(state='stopped').
    Tick 1: seeds start_times[key] = 1000.0
    Tick 2: time has advanced past _PROBE_TIMEOUT_S → timeout fires → stopped
    Tick 3: CancelledError exits the loop
    """
    reg = _registry(_inst(state="starting", url="http://localhost:8000"))
    tick = 0
    # Tick 1: t=1000 (seed); Tick 2: t=far_future (timeout fires)
    times = [1000.0, 1000.0 + _PROBE_TIMEOUT_S + 1.0]
    async def fake_sleep(_delay):
        nonlocal tick
        tick += 1
        if tick > 2:
            raise asyncio.CancelledError()
    with patch("asyncio.sleep", new=fake_sleep), \
         patch("time.time", side_effect=times * 10), \
         patch("urllib.request.urlopen", side_effect=OSError("connection refused")):
        try:
            await _run_instance_probe_loop(reg)
        except asyncio.CancelledError:
            pass
    reg.upsert_instance.assert_called_once_with(
        service="vllm", node_id="node1", gpu_id=0,
        state="stopped", model="qwen", url="http://localhost:8000",
    )
@pytest.mark.asyncio
 async def test_probe_cleans_up_start_times_for_non_starting():
    """
    An instance that is no longer in 'starting' state should not cause
    upsert_instance to be called, and its key should be removed from start_times.
    We verify this indirectly: run two ticks — first with state='starting' (seeds
    the key and transitions to running), second with the updated registry returning
    state='running' (should not call upsert again).
    """
    starting_inst = _inst(state="starting", url="http://localhost:8000")
    running_inst = _inst(state="running", url="http://localhost:8000")
    tick = 0
    # First tick: instance is starting → transitions to running
    # Second tick: registry now returns running → no upsert
    # Third tick: cancel
    def instances_side_effect():
        if tick <= 1:
            return [starting_inst]
        return [running_inst]
    reg = MagicMock(spec=ServiceRegistry)
    reg.all_instances.side_effect = instances_side_effect
    async def fake_sleep(_delay):
        nonlocal tick
        tick += 1
        if tick > 2:
            raise asyncio.CancelledError()
    with patch("asyncio.sleep", new=fake_sleep), \
         patch("time.time", return_value=1000.0), \
         patch("urllib.request.urlopen", return_value=_health_resp(200)):
        try:
            await _run_instance_probe_loop(reg)
        except asyncio.CancelledError:
            pass
    # upsert should have been called exactly once (the starting→running transition)
    assert reg.upsert_instance.call_count == 1
    reg.upsert_instance.assert_called_once_with(
        service="vllm", node_id="node1", gpu_id=0,
        state="running", model="qwen", url="http://localhost:8000",
    )
@pytest.mark.asyncio
 async def test_probe_no_url_does_not_attempt_health_check():
    """Instance with no URL stays in starting state (no health check, no timeout yet)."""
    reg = _registry(_inst(state="starting", url=None))
    tick = 0
    async def fake_sleep(_delay):
        nonlocal tick
        tick += 1
        if tick > 1:
            raise asyncio.CancelledError()
    with patch("asyncio.sleep", new=fake_sleep), \
         patch("time.time", return_value=1000.0), \
         patch("urllib.request.urlopen") as mock_urlopen:
        try:
            await _run_instance_probe_loop(reg)
        except asyncio.CancelledError:
            pass
    mock_urlopen.assert_not_called()
    reg.upsert_instance.assert_not_called()
--- a/tests/test_resources/test_docuvision.py
+++ b/tests/test_resources/test_docuvision.py
@ -1,215 +0,0 @@
 # tests/test_resources/test_docuvision.py
 """
 Unit tests for cf-docuvision FastAPI service (circuitforge_core/resources/docuvision/app.py).
 Covers:
  - GET /health          → status + model path
  - POST /extract        → image_b64, image_path, hint routing, metadata fields
  - _parse_dolphin_output → JSON list path, table detection, plain-text fallback
  - _image_from_request  → missing both fields → 422; bad image_path → 404
 """
 from __future__ import annotations
 import base64
 import io
 import json
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 import pytest
 from fastapi.testclient import TestClient
 from PIL import Image
 import circuitforge_core.resources.docuvision.app as docuvision_module
 from circuitforge_core.resources.docuvision.app import (
    _parse_dolphin_output,
    app,
 )
 # ── fixtures ──────────────────────────────────────────────────────────────────
 def _make_jpeg_b64(width: int = 10, height: int = 10) -> str:
    """Return a base64-encoded 10x10 white JPEG."""
    img = Image.new("RGB", (width, height), color=(255, 255, 255))
    buf = io.BytesIO()
    img.save(buf, format="JPEG")
    return base64.b64encode(buf.getvalue()).decode()
@pytest.fixture(autouse=True)
 def _reset_module_state():
    """Reset module-level model state between tests."""
    docuvision_module._model = None
    docuvision_module._processor = None
    docuvision_module._model_path = "/fake/model"
    docuvision_module._device = "cpu"
    yield
    docuvision_module._model = None
    docuvision_module._processor = None
@pytest.fixture
 def mock_model():
    """
    Inject fake model + processor into the module so _load_model() is skipped.
    The processor returns a dict-like with 'input_ids'; the model generate()
    returns a tensor-like whose decode produces a JSON string.
    """
    fake_ids = MagicMock()
    fake_ids.shape = [1, 5]      # input_len = 5
    fake_inputs = {"input_ids": fake_ids}
    fake_inputs_obj = MagicMock()
    fake_inputs_obj.__getitem__ = lambda self, k: fake_inputs[k]
    fake_inputs_obj.to = lambda device: fake_inputs_obj
    fake_output = MagicMock()
    fake_output.__getitem__ = lambda self, idx: MagicMock()  # output_ids[0]
    fake_model = MagicMock()
    fake_model.generate.return_value = fake_output
    fake_processor = MagicMock()
    fake_processor.return_value = fake_inputs_obj
    fake_processor.decode.return_value = json.dumps([
        {"type": "heading", "text": "Invoice", "bbox": [0.0, 0.0, 1.0, 0.1]},
        {"type": "table", "text": "row1", "html": "<table><tr><td>row1</td></tr></table>",
         "bbox": [0.0, 0.1, 1.0, 0.5]},
    ])
    docuvision_module._model = fake_model
    docuvision_module._processor = fake_processor
    return fake_model, fake_processor
@pytest.fixture
 def client():
    return TestClient(app)
 # ── health ────────────────────────────────────────────────────────────────────
 def test_health_returns_ok(client):
    resp = client.get("/health")
    assert resp.status_code == 200
    data = resp.json()
    assert data["status"] == "ok"
    assert data["model"] == "/fake/model"
 # ── _parse_dolphin_output ────────────────────────────────────────────────────
 def test_parse_json_list_elements():
    raw = json.dumps([
        {"type": "heading", "text": "Title"},
        {"type": "paragraph", "text": "Body text"},
    ])
    elements, tables, raw_text = _parse_dolphin_output(raw)
    assert len(elements) == 2
    assert elements[0].type == "heading"
    assert elements[0].text == "Title"
    assert elements[1].type == "paragraph"
    assert raw_text == "Title\nBody text"
    assert tables == []
 def test_parse_json_table_extracted():
    raw = json.dumps([
        {"type": "table", "text": "row", "html": "<table><tr><td>A</td></tr></table>",
         "bbox": [0.0, 0.0, 1.0, 0.5]},
    ])
    elements, tables, raw_text = _parse_dolphin_output(raw)
    assert len(tables) == 1
    assert tables[0].html == "<table><tr><td>A</td></tr></table>"
    assert tables[0].bbox == [0.0, 0.0, 1.0, 0.5]
    assert len(elements) == 1
    assert elements[0].type == "table"
 def test_parse_plain_text_fallback():
    raw = "This is not JSON at all."
    elements, tables, raw_text = _parse_dolphin_output(raw)
    assert len(elements) == 1
    assert elements[0].type == "paragraph"
    assert elements[0].text == raw
    assert tables == []
    assert raw_text == raw
 def test_parse_empty_string_fallback():
    elements, tables, raw_text = _parse_dolphin_output("")
    assert len(elements) == 1
    assert elements[0].type == "paragraph"
    assert elements[0].text == ""
 def test_parse_json_missing_type_defaults_to_paragraph():
    raw = json.dumps([{"text": "no type field"}])
    elements, tables, _ = _parse_dolphin_output(raw)
    assert elements[0].type == "paragraph"
 # ── POST /extract ─────────────────────────────────────────────────────────────
 def test_extract_image_b64(client, mock_model):
    resp = client.post("/extract", json={"image_b64": _make_jpeg_b64(), "hint": "auto"})
    assert resp.status_code == 200
    data = resp.json()
    assert "elements" in data
    assert "raw_text" in data
    assert "tables" in data
    assert data["metadata"]["hint"] == "auto"
    assert data["metadata"]["model"] == "/fake/model"
    assert data["metadata"]["width"] == 10
    assert data["metadata"]["height"] == 10
 def test_extract_hint_table_routes_correct_prompt(client, mock_model):
    _, fake_processor = mock_model
    resp = client.post("/extract", json={"image_b64": _make_jpeg_b64(), "hint": "table"})
    assert resp.status_code == 200
    # Verify processor was called with the table-specific prompt
    call_kwargs = fake_processor.call_args
    assert "table" in call_kwargs.kwargs.get("text", "") or \
           "table" in str(call_kwargs)
 def test_extract_hint_unknown_falls_back_to_auto(client, mock_model):
    """An unrecognised hint silently falls back to the 'auto' prompt."""
    resp = client.post("/extract", json={"image_b64": _make_jpeg_b64(), "hint": "nonsense"})
    assert resp.status_code == 200
 def test_extract_image_path(tmp_path, client, mock_model):
    img_file = tmp_path / "doc.png"
    Image.new("RGB", (8, 8), color=(0, 0, 0)).save(img_file)
    resp = client.post("/extract", json={"image_path": str(img_file)})
    assert resp.status_code == 200
    assert resp.json()["metadata"]["width"] == 8
 def test_extract_image_path_not_found(client, mock_model):
    resp = client.post("/extract", json={"image_path": "/nonexistent/path/img.png"})
    assert resp.status_code == 404
 def test_extract_no_image_raises_422(client, mock_model):
    resp = client.post("/extract", json={"hint": "auto"})
    assert resp.status_code == 422
 def test_extract_response_includes_tables(client, mock_model):
    """Verify table objects surface in response when model returns table elements."""
    resp = client.post("/extract", json={"image_b64": _make_jpeg_b64()})
    assert resp.status_code == 200
    data = resp.json()
    assert len(data["tables"]) == 1
    assert "<table>" in data["tables"][0]["html"]
 def test_extract_device_in_metadata(client, mock_model):
    resp = client.post("/extract", json={"image_b64": _make_jpeg_b64()})
    assert resp.status_code == 200
    assert "device" in resp.json()["metadata"]
--- a/tests/test_resources/test_eviction_engine.py
+++ b/tests/test_resources/test_eviction_engine.py
@ -1,67 +0,0 @@
 import asyncio
 import pytest
 from unittest.mock import AsyncMock, patch
 from circuitforge_core.resources.coordinator.eviction_engine import EvictionEngine
 from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
@pytest.fixture
 def lease_manager():
    mgr = LeaseManager()
    mgr.register_gpu("heimdall", 0, 8192)
    return mgr
@pytest.fixture
 def engine(lease_manager):
    return EvictionEngine(lease_manager=lease_manager, eviction_timeout_s=0.1)
@pytest.mark.asyncio
 async def test_request_lease_grants_when_vram_available(engine, lease_manager):
    lease = await engine.request_lease(
        node_id="heimdall", gpu_id=0, mb=4096,
        service="peregrine", priority=1,
        agent_url="http://localhost:7701",
    )
    assert lease is not None
    assert lease.mb_granted == 4096
@pytest.mark.asyncio
 async def test_request_lease_evicts_and_grants(engine, lease_manager):
    # Pre-fill with a low-priority lease
    big_lease = await lease_manager.try_grant(
        "heimdall", 0, 7000, "comfyui", priority=4
    )
    assert big_lease is not None
    # Mock the agent eviction call
    with patch(
        "circuitforge_core.resources.coordinator.eviction_engine.EvictionEngine._call_agent_evict",
        new_callable=AsyncMock,
    ) as mock_evict:
        mock_evict.return_value = True
        # Simulate the comfyui lease being released (as if the agent evicted it)
        asyncio.get_event_loop().call_later(
            0.05, lambda: asyncio.ensure_future(lease_manager.release(big_lease.lease_id))
        )
        lease = await engine.request_lease(
            node_id="heimdall", gpu_id=0, mb=4096,
            service="peregrine", priority=1,
            agent_url="http://localhost:7701",
        )
    assert lease is not None
    assert lease.holder_service == "peregrine"
@pytest.mark.asyncio
 async def test_request_lease_returns_none_when_no_eviction_candidates(engine):
    await engine.lease_manager.try_grant("heimdall", 0, 6000, "vllm", priority=1)
    # Requesting 4GB but no lower-priority leases exist
    lease = await engine.request_lease(
        node_id="heimdall", gpu_id=0, mb=4096,
        service="kiwi", priority=2,
        agent_url="http://localhost:7701",
    )
    assert lease is None
--- a/tests/test_resources/test_eviction_executor.py
+++ b/tests/test_resources/test_eviction_executor.py
@ -1,43 +0,0 @@
 import signal
 from unittest.mock import patch, call
 import pytest
 from circuitforge_core.resources.agent.eviction_executor import EvictionExecutor, EvictionResult
 def test_evict_by_pid_sends_sigterm_then_sigkill():
    executor = EvictionExecutor(grace_period_s=0.01)
    # pid_exists always True → grace period expires → SIGKILL fires
    with patch("os.kill") as mock_kill, \
         patch("circuitforge_core.resources.agent.eviction_executor.psutil") as mock_psutil:
        mock_psutil.pid_exists.return_value = True
        result = executor.evict_pid(pid=1234, grace_period_s=0.01)
    assert result.success is True
    calls = mock_kill.call_args_list
    assert call(1234, signal.SIGTERM) in calls
    assert call(1234, signal.SIGKILL) in calls
 def test_evict_pid_succeeds_on_sigterm_alone():
    executor = EvictionExecutor(grace_period_s=0.1)
    with patch("os.kill"), \
         patch("circuitforge_core.resources.agent.eviction_executor.psutil") as mock_psutil:
        mock_psutil.pid_exists.side_effect = [True, False]  # gone after SIGTERM
        result = executor.evict_pid(pid=5678, grace_period_s=0.01)
    assert result.success is True
    assert result.method == "sigterm"
 def test_evict_pid_not_found_returns_failure():
    executor = EvictionExecutor()
    with patch("circuitforge_core.resources.agent.eviction_executor.psutil") as mock_psutil:
        mock_psutil.pid_exists.return_value = False
        result = executor.evict_pid(pid=9999)
    assert result.success is False
    assert "not found" in result.message.lower()
 def test_eviction_result_is_immutable():
    result = EvictionResult(success=True, method="sigterm", message="ok")
    with pytest.raises((AttributeError, TypeError)):
        result.success = False  # type: ignore
--- a/tests/test_resources/test_gpu_monitor.py
+++ b/tests/test_resources/test_gpu_monitor.py
@ -1,60 +0,0 @@
 from unittest.mock import patch
 from circuitforge_core.resources.agent.gpu_monitor import GpuMonitor
 SAMPLE_NVIDIA_SMI_OUTPUT = (
    "0, Quadro RTX 4000, 8192, 6843, 1349\n"
    "1, Quadro RTX 4000, 8192, 721, 7471\n"
 )
 def test_parse_returns_list_of_gpu_info():
    monitor = GpuMonitor()
    with patch("circuitforge_core.resources.agent.gpu_monitor.subprocess.run") as mock_run:
        mock_run.return_value.returncode = 0
        mock_run.return_value.stdout = SAMPLE_NVIDIA_SMI_OUTPUT
        gpus = monitor.poll()
    assert len(gpus) == 2
    assert gpus[0].gpu_id == 0
    assert gpus[0].name == "Quadro RTX 4000"
    assert gpus[0].vram_total_mb == 8192
    assert gpus[0].vram_used_mb == 6843
    assert gpus[0].vram_free_mb == 1349
 def test_parse_second_gpu():
    monitor = GpuMonitor()
    with patch("circuitforge_core.resources.agent.gpu_monitor.subprocess.run") as mock_run:
        mock_run.return_value.returncode = 0
        mock_run.return_value.stdout = SAMPLE_NVIDIA_SMI_OUTPUT
        gpus = monitor.poll()
    assert gpus[1].gpu_id == 1
    assert gpus[1].vram_used_mb == 721
    assert gpus[1].vram_free_mb == 7471
 def test_poll_returns_empty_list_when_nvidia_smi_unavailable():
    monitor = GpuMonitor()
    with patch("circuitforge_core.resources.agent.gpu_monitor.subprocess.run", side_effect=FileNotFoundError):
        gpus = monitor.poll()
    assert gpus == []
 def test_poll_returns_empty_list_on_nonzero_exit():
    monitor = GpuMonitor()
    with patch("circuitforge_core.resources.agent.gpu_monitor.subprocess.run") as mock_run:
        mock_run.return_value.returncode = 1
        mock_run.return_value.stdout = ""
        gpus = monitor.poll()
    assert gpus == []
 def test_poll_skips_malformed_lines():
    monitor = GpuMonitor()
    malformed = "0, RTX 4000, 8192, not_a_number, 1024\n1, RTX 4000, 8192, 512, 7680\n"
    with patch("circuitforge_core.resources.agent.gpu_monitor.subprocess.run") as mock_run:
        mock_run.return_value.returncode = 0
        mock_run.return_value.stdout = malformed
        gpus = monitor.poll()
    assert len(gpus) == 1
    assert gpus[0].gpu_id == 1
--- a/tests/test_resources/test_integration.py
+++ b/tests/test_resources/test_integration.py
@ -1,221 +0,0 @@
 """Integration test: full lease → eviction → re-grant cycle.
 Runs coordinator in-process (no subprocesses, no real nvidia-smi).
 Uses TestClient for HTTP, mocks AgentSupervisor to return fixed node state.
 """
 import pytest
 from unittest.mock import MagicMock
 from fastapi.testclient import TestClient
 from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
 from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
 from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor
 from circuitforge_core.resources.coordinator.app import create_coordinator_app
 from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry
 from circuitforge_core.resources.models import GpuInfo, NodeInfo
@pytest.fixture
 def system():
    """Create an in-process coordinator system with 8GB GPU and mock supervisor."""
    lease_manager = LeaseManager()
    lease_manager.register_gpu("local", 0, 8192)
    mock_supervisor = MagicMock(spec=AgentSupervisor)
    mock_supervisor.all_nodes.return_value = [
        NodeInfo(
            node_id="local",
            agent_url="http://localhost:7701",
            gpus=[GpuInfo(
                gpu_id=0,
                name="RTX 4000",
                vram_total_mb=8192,
                vram_used_mb=0,
                vram_free_mb=8192,
            )],
            last_heartbeat=0.0,
        )
    ]
    mock_supervisor.get_node_info.return_value = NodeInfo(
        node_id="local",
        agent_url="http://localhost:7701",
        gpus=[],
        last_heartbeat=0.0,
    )
    profile_registry = ProfileRegistry()
    app = create_coordinator_app(
        lease_manager=lease_manager,
        profile_registry=profile_registry,
        agent_supervisor=mock_supervisor,
        service_registry=ServiceRegistry(),
    )
    client = TestClient(app)
    return client, lease_manager
 def test_full_lease_cycle(system):
    """Test: grant, verify, release, verify gone."""
    client, _ = system
    # Grant a lease
    resp = client.post("/api/leases", json={
        "node_id": "local",
        "gpu_id": 0,
        "mb": 4096,
        "service": "peregrine",
        "priority": 1,
    })
    assert resp.status_code == 200
    lease_data = resp.json()["lease"]
    lease_id = lease_data["lease_id"]
    assert lease_data["mb_granted"] == 4096
    assert lease_data["holder_service"] == "peregrine"
    # Verify it appears in active leases
    resp = client.get("/api/leases")
    assert resp.status_code == 200
    leases = resp.json()["leases"]
    assert any(l["lease_id"] == lease_id for l in leases)
    # Release it
    resp = client.delete(f"/api/leases/{lease_id}")
    assert resp.status_code == 200
    assert resp.json()["released"] is True
    # Verify it's gone
    resp = client.get("/api/leases")
    assert resp.status_code == 200
    leases = resp.json()["leases"]
    assert not any(l["lease_id"] == lease_id for l in leases)
 def test_vram_exhaustion_returns_503(system):
    """Test: fill GPU, then request with no eviction candidates returns 503."""
    client, _ = system
    # Fill GPU 0 with high-priority lease
    resp = client.post("/api/leases", json={
        "node_id": "local",
        "gpu_id": 0,
        "mb": 8000,
        "service": "vllm",
        "priority": 1,
    })
    assert resp.status_code == 200
    # Try to get more VRAM with same priority (no eviction candidates)
    resp = client.post("/api/leases", json={
        "node_id": "local",
        "gpu_id": 0,
        "mb": 2000,
        "service": "kiwi",
        "priority": 1,
    })
    assert resp.status_code == 503
    assert "Insufficient VRAM" in resp.json()["detail"]
 def test_auto_detect_profile_for_8gb():
    """Test: ProfileRegistry auto-detects single-gpu-8gb for 8GB GPU."""
    registry = ProfileRegistry()
    gpu = GpuInfo(
        gpu_id=0,
        name="RTX 4000",
        vram_total_mb=8192,
        vram_used_mb=0,
        vram_free_mb=8192,
    )
    profile = registry.auto_detect([gpu])
    assert profile.name == "single-gpu-8gb"
    # Verify profile has services configured
    assert hasattr(profile, "services")
 def test_node_endpoint_shows_nodes(system):
    """Test: GET /api/nodes returns the mocked node."""
    client, _ = system
    resp = client.get("/api/nodes")
    assert resp.status_code == 200
    nodes = resp.json()["nodes"]
    assert len(nodes) == 1
    assert nodes[0]["node_id"] == "local"
    assert nodes[0]["agent_url"] == "http://localhost:7701"
    assert len(nodes[0]["gpus"]) == 1
    assert nodes[0]["gpus"][0]["name"] == "RTX 4000"
 def test_profiles_endpoint_returns_public_profiles(system):
    """Test: GET /api/profiles returns standard public profiles."""
    client, _ = system
    resp = client.get("/api/profiles")
    assert resp.status_code == 200
    profiles = resp.json()["profiles"]
    names = [p["name"] for p in profiles]
    # Verify common public profiles are present
    assert "single-gpu-8gb" in names
    assert "single-gpu-6gb" in names
    assert "single-gpu-2gb" in names
 def test_multiple_leases_tracked_independently(system):
    """Test: multiple active leases are tracked correctly."""
    client, _ = system
    # Grant lease 1
    resp1 = client.post("/api/leases", json={
        "node_id": "local",
        "gpu_id": 0,
        "mb": 2048,
        "service": "peregrine",
        "priority": 2,
    })
    assert resp1.status_code == 200
    lease1_id = resp1.json()["lease"]["lease_id"]
    # Grant lease 2
    resp2 = client.post("/api/leases", json={
        "node_id": "local",
        "gpu_id": 0,
        "mb": 2048,
        "service": "kiwi",
        "priority": 2,
    })
    assert resp2.status_code == 200
    lease2_id = resp2.json()["lease"]["lease_id"]
    # Both should be in active leases
    resp = client.get("/api/leases")
    leases = resp.json()["leases"]
    lease_ids = [l["lease_id"] for l in leases]
    assert lease1_id in lease_ids
    assert lease2_id in lease_ids
    assert len(leases) == 2
    # Release lease 1
    resp = client.delete(f"/api/leases/{lease1_id}")
    assert resp.status_code == 200
    # Only lease 2 should remain
    resp = client.get("/api/leases")
    leases = resp.json()["leases"]
    lease_ids = [l["lease_id"] for l in leases]
    assert lease1_id not in lease_ids
    assert lease2_id in lease_ids
    assert len(leases) == 1
 def test_delete_nonexistent_lease_returns_404(system):
    """Test: deleting a nonexistent lease returns 404."""
    client, _ = system
    resp = client.delete("/api/leases/nonexistent-lease-id")
    assert resp.status_code == 404
    assert "not found" in resp.json()["detail"]
 def test_health_endpoint_returns_ok(system):
    """Test: GET /api/health returns status ok."""
    client, _ = system
    resp = client.get("/api/health")
    assert resp.status_code == 200
    assert resp.json()["status"] == "ok"
--- a/tests/test_resources/test_lease_manager.py
+++ b/tests/test_resources/test_lease_manager.py
@ -1,85 +0,0 @@
 import pytest
 from circuitforge_core.resources.coordinator.lease_manager import LeaseManager
@pytest.fixture
 def mgr():
    m = LeaseManager()
    m.register_gpu(node_id="heimdall", gpu_id=0, total_mb=8192)
    return m
@pytest.mark.asyncio
 async def test_grant_succeeds_when_vram_available(mgr):
    lease = await mgr.try_grant(
        node_id="heimdall", gpu_id=0, mb=4096,
        service="peregrine", priority=1
    )
    assert lease is not None
    assert lease.mb_granted == 4096
    assert lease.node_id == "heimdall"
    assert lease.gpu_id == 0
@pytest.mark.asyncio
 async def test_grant_fails_when_vram_insufficient(mgr):
    await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=7000,
                         service="vllm", priority=1)
    lease = await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=2000,
                                 service="kiwi", priority=2)
    assert lease is None
@pytest.mark.asyncio
 async def test_release_frees_vram(mgr):
    lease = await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=7000,
                                 service="vllm", priority=1)
    assert lease is not None
    released = await mgr.release(lease.lease_id)
    assert released is True
    lease2 = await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=7000,
                                   service="comfyui", priority=4)
    assert lease2 is not None
@pytest.mark.asyncio
 async def test_release_unknown_lease_returns_false(mgr):
    result = await mgr.release("nonexistent-id")
    assert result is False
@pytest.mark.asyncio
 async def test_get_eviction_candidates_returns_lower_priority_leases(mgr):
    await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=3000,
                         service="comfyui", priority=4)
    await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=2000,
                         service="ollama", priority=1)
    candidates = mgr.get_eviction_candidates(
        node_id="heimdall", gpu_id=0,
        needed_mb=3000, requester_priority=2
    )
    assert len(candidates) == 1
    assert candidates[0].holder_service == "comfyui"
@pytest.mark.asyncio
 async def test_list_leases_for_gpu(mgr):
    await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=1024,
                         service="peregrine", priority=1)
    await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=512,
                         service="kiwi", priority=2)
    leases = mgr.list_leases(node_id="heimdall", gpu_id=0)
    assert len(leases) == 2
 def test_register_gpu_sets_total(mgr):
    assert mgr.gpu_total_mb("heimdall", 0) == 8192
@pytest.mark.asyncio
 async def test_used_mb_tracks_grants():
    mgr = LeaseManager()
    mgr.register_gpu("heimdall", 0, 8192)
    await mgr.try_grant("heimdall", 0, 3000, "a", 1)
    await mgr.try_grant("heimdall", 0, 2000, "b", 2)
    assert mgr.used_mb("heimdall", 0) == 5000
--- a/tests/test_resources/test_models.py
+++ b/tests/test_resources/test_models.py
@ -1,47 +0,0 @@
 import time
 import pytest
 from circuitforge_core.resources.models import VRAMLease, GpuInfo, NodeInfo
 def test_vram_lease_create_assigns_unique_ids():
    lease_a = VRAMLease.create(gpu_id=0, node_id="heimdall", mb=4096,
                                service="peregrine", priority=1)
    lease_b = VRAMLease.create(gpu_id=0, node_id="heimdall", mb=4096,
                                service="peregrine", priority=1)
    assert lease_a.lease_id != lease_b.lease_id
 def test_vram_lease_create_with_ttl_sets_expiry():
    before = time.time()
    lease = VRAMLease.create(gpu_id=0, node_id="heimdall", mb=2048,
                              service="kiwi", priority=2, ttl_s=60.0)
    after = time.time()
    assert before + 60.0 <= lease.expires_at <= after + 60.0
 def test_vram_lease_create_no_ttl_has_zero_expiry():
    lease = VRAMLease.create(gpu_id=0, node_id="heimdall", mb=1024,
                              service="snipe", priority=2)
    assert lease.expires_at == 0.0
 def test_vram_lease_is_immutable():
    lease = VRAMLease.create(gpu_id=0, node_id="heimdall", mb=1024,
                              service="snipe", priority=2)
    with pytest.raises((AttributeError, TypeError)):
        lease.mb_granted = 999  # type: ignore
 def test_gpu_info_fields():
    info = GpuInfo(gpu_id=0, name="RTX 4000", vram_total_mb=8192,
                   vram_used_mb=2048, vram_free_mb=6144)
    assert info.vram_free_mb == 6144
 def test_node_info_fields():
    gpu = GpuInfo(gpu_id=0, name="RTX 4000", vram_total_mb=8192,
                  vram_used_mb=0, vram_free_mb=8192)
    node = NodeInfo(node_id="heimdall", agent_url="http://localhost:7701",
                    gpus=[gpu], last_heartbeat=time.time())
    assert node.node_id == "heimdall"
    assert len(node.gpus) == 1
--- a/tests/test_resources/test_node_selector.py
+++ b/tests/test_resources/test_node_selector.py
@ -1,82 +0,0 @@
 import pytest
 from circuitforge_core.resources.coordinator.node_selector import select_node
 from circuitforge_core.resources.coordinator.agent_supervisor import AgentRecord
 from circuitforge_core.resources.models import GpuInfo
 from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
 def _make_agent(node_id: str, free_mb: int, online: bool = True) -> AgentRecord:
    r = AgentRecord(node_id=node_id, agent_url=f"http://{node_id}:7701")
    r.gpus = [GpuInfo(gpu_id=0, name="RTX", vram_total_mb=8192,
                      vram_used_mb=8192 - free_mb, vram_free_mb=free_mb)]
    r.online = online
    return r
 def test_selects_node_with_most_free_vram():
    agents = {
        "a": _make_agent("a", free_mb=2000),
        "b": _make_agent("b", free_mb=6000),
    }
    registry = ProfileRegistry()
    result = select_node(agents, "vllm", registry, resident_keys=set())
    assert result == ("b", 0)
 def test_prefers_warm_node_even_with_less_free_vram():
    agents = {
        "a": _make_agent("a", free_mb=2000),
        "b": _make_agent("b", free_mb=6000),
    }
    registry = ProfileRegistry()
    result = select_node(agents, "vllm", registry, resident_keys={"a:vllm"})
    assert result == ("a", 0)
 def test_excludes_offline_nodes():
    agents = {
        "a": _make_agent("a", free_mb=8000, online=False),
        "b": _make_agent("b", free_mb=2000, online=True),
    }
    registry = ProfileRegistry()
    result = select_node(agents, "vllm", registry, resident_keys=set())
    assert result == ("b", 0)
 def test_returns_none_when_no_node_has_profile_for_service():
    agents = {"a": _make_agent("a", free_mb=8000)}
    registry = ProfileRegistry()
    result = select_node(agents, "cf-nonexistent-service", registry, resident_keys=set())
    assert result is None
 def test_returns_none_when_no_agents():
    registry = ProfileRegistry()
    result = select_node({}, "vllm", registry, resident_keys=set())
    assert result is None
 def test_prefers_node_that_fully_fits_service_over_one_that_does_not():
    """can_fit requires free_mb >= service max_mb (full ceiling, not half).
    9500 MB guarantees above all profile ceilings (max is 9000); 1000 MB is below all.
    """
    agents = {
        "a": _make_agent("a", free_mb=1000),
        "b": _make_agent("b", free_mb=9500),
    }
    registry = ProfileRegistry()
    result = select_node(agents, "vllm", registry, resident_keys=set())
    # "b" is the only node in the preferred (can_fit) pool
    assert result == ("b", 0)
 def test_falls_back_to_best_effort_when_no_node_fully_fits():
    """When nothing can_fit, select_node returns the best-VRAM node as fallback."""
    agents = {
        "a": _make_agent("a", free_mb=1000),
        "b": _make_agent("b", free_mb=2000),
    }
    registry = ProfileRegistry()
    # Neither has enough free VRAM; fallback picks highest effective_free_mb
    result = select_node(agents, "vllm", registry, resident_keys=set())
    assert result == ("b", 0)
--- a/tests/test_resources/test_node_store.py
+++ b/tests/test_resources/test_node_store.py
@ -1,87 +0,0 @@
 # tests/test_resources/test_node_store.py
 """Unit tests for NodeStore — SQLite persistence layer for known agent nodes."""
 from __future__ import annotations
 import time
 from pathlib import Path
 import pytest
 from circuitforge_core.resources.coordinator.node_store import NodeStore
@pytest.fixture
 def store(tmp_path: Path) -> NodeStore:
    return NodeStore(db_path=tmp_path / "test-nodes.db")
 def test_upsert_and_all(store: NodeStore) -> None:
    store.upsert("heimdall", "http://127.0.0.1:7701")
    rows = store.all()
    assert len(rows) == 1
    assert rows[0] == ("heimdall", "http://127.0.0.1:7701")
 def test_upsert_updates_url(store: NodeStore) -> None:
    store.upsert("navi", "http://10.1.10.10:7701")
    store.upsert("navi", "http://10.1.10.10:7702")
    rows = store.all()
    assert len(rows) == 1
    assert rows[0][1] == "http://10.1.10.10:7702"
 def test_multiple_nodes(store: NodeStore) -> None:
    store.upsert("heimdall", "http://127.0.0.1:7701")
    store.upsert("navi", "http://10.1.10.10:7701")
    store.upsert("strahl", "http://10.1.10.20:7701")
    assert len(store.all()) == 3
 def test_remove(store: NodeStore) -> None:
    store.upsert("heimdall", "http://127.0.0.1:7701")
    store.upsert("navi", "http://10.1.10.10:7701")
    store.remove("navi")
    ids = [r[0] for r in store.all()]
    assert "navi" not in ids
    assert "heimdall" in ids
 def test_prune_stale_removes_old_entries(store: NodeStore) -> None:
    # Insert a node with a last_seen in the distant past
    store._conn.execute(
        "INSERT INTO known_nodes (node_id, agent_url, last_seen) VALUES (?, ?, ?)",
        ("ghost", "http://dead:7701", time.time() - 40 * 86400),
    )
    store._conn.commit()
    store.upsert("live", "http://live:7701")
    removed = store.prune_stale(max_age_days=30)
    assert removed == 1
    ids = [r[0] for r in store.all()]
    assert "ghost" not in ids
    assert "live" in ids
 def test_prune_stale_keeps_recent(store: NodeStore) -> None:
    store.upsert("recent", "http://recent:7701")
    removed = store.prune_stale(max_age_days=30)
    assert removed == 0
    assert len(store.all()) == 1
 def test_all_empty(store: NodeStore) -> None:
    assert store.all() == []
 def test_db_persists_across_instances(tmp_path: Path) -> None:
    """Data written by one NodeStore instance is visible to a new one on the same file."""
    db = tmp_path / "shared.db"
    s1 = NodeStore(db_path=db)
    s1.upsert("navi", "http://10.1.10.10:7701")
    s1.close()
    s2 = NodeStore(db_path=db)
    rows = s2.all()
    assert len(rows) == 1
    assert rows[0][0] == "navi"
    s2.close()
--- a/tests/test_resources/test_ollama_adopt.py
+++ b/tests/test_resources/test_ollama_adopt.py
@ -1,176 +0,0 @@
 # tests/test_resources/test_ollama_adopt.py
 """
 Tests for the Ollama adopt-if-running path:
  - ProcessSpec: adopt and health_path fields parsed from YAML
  - ServiceManager.start(): adopt path claims running service; falls through if not running
  - ServiceManager.is_running(): adopt path uses health probe, not proc table
  - ServiceInstance.health_path persists through upsert_instance
  - Probe loop uses inst.health_path instead of hardcoded /health
 """
 from __future__ import annotations
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 import pytest
 from circuitforge_core.resources.agent.service_manager import ServiceManager
 from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry
 from circuitforge_core.resources.profiles.schema import GpuProfile, ProcessSpec, ServiceProfile, load_profile
 # ── ProcessSpec schema ────────────────────────────────────────────────────────
 def test_process_spec_defaults():
    spec = ProcessSpec(exec_path="/usr/local/bin/ollama")
    assert spec.adopt is False
    assert spec.health_path == "/health"
 def test_process_spec_adopt_fields():
    spec = ProcessSpec(
        exec_path="/usr/local/bin/ollama",
        adopt=True,
        health_path="/api/tags",
        port=11434,
        host_port=11434,
    )
    assert spec.adopt is True
    assert spec.health_path == "/api/tags"
 def test_profile_yaml_parses_adopt(tmp_path: Path):
    yaml_text = """\
 schema_version: 1
 name: test
 services:
  ollama:
    max_mb: 4096
    priority: 1
    managed:
      type: process
      adopt: true
      exec_path: /usr/local/bin/ollama
      args_template: serve
      port: 11434
      host_port: 11434
      health_path: /api/tags
 """
    p = tmp_path / "profile.yaml"
    p.write_text(yaml_text)
    profile = load_profile(p)
    spec = profile.services["ollama"].managed
    assert isinstance(spec, ProcessSpec)
    assert spec.adopt is True
    assert spec.health_path == "/api/tags"
    assert spec.host_port == 11434
 # ── ServiceManager adopt path ─────────────────────────────────────────────────
 def _make_manager_with_ollama(advertise_host: str = "127.0.0.1") -> ServiceManager:
    profile = GpuProfile(
        schema_version=1,
        name="test",
        services={
            "ollama": ServiceProfile(
                max_mb=4096,
                priority=1,
                managed=ProcessSpec(
                    exec_path="/usr/local/bin/ollama",
                    args_template="serve",
                    port=11434,
                    host_port=11434,
                    adopt=True,
                    health_path="/api/tags",
                ),
            )
        },
    )
    return ServiceManager(node_id="heimdall", profile=profile, advertise_host=advertise_host)
 def test_start_adopt_claims_running_service():
    """When Ollama is already healthy, start() returns its URL without spawning a process."""
    mgr = _make_manager_with_ollama()
    with patch.object(mgr, "_probe_health", return_value=True) as mock_probe:
        url = mgr.start("ollama", gpu_id=0, params={})
    assert url == "http://127.0.0.1:11434"
    mock_probe.assert_called_once_with(11434, "/api/tags")
    assert "ollama" not in mgr._procs  # no subprocess spawned
 def test_start_adopt_spawns_when_not_running():
    """When Ollama is not yet running, start() spawns it normally."""
    mgr = _make_manager_with_ollama()
    mock_proc = MagicMock()
    mock_proc.poll.return_value = None
    with patch.object(mgr, "_probe_health", return_value=False), \
         patch("subprocess.Popen", return_value=mock_proc) as mock_popen:
        url = mgr.start("ollama", gpu_id=0, params={})
    assert url == "http://127.0.0.1:11434"
    mock_popen.assert_called_once()
    assert "ollama" in mgr._procs
 def test_is_running_adopt_uses_health_probe():
    """is_running() for adopt=True services checks the health endpoint, not the proc table."""
    mgr = _make_manager_with_ollama()
    with patch.object(mgr, "_probe_health", return_value=True):
        assert mgr.is_running("ollama") is True
    with patch.object(mgr, "_probe_health", return_value=False):
        assert mgr.is_running("ollama") is False
 def test_probe_health_returns_true_on_200():
    mgr = _make_manager_with_ollama()
    mock_resp = MagicMock()
    mock_resp.status = 200
    mock_resp.__enter__ = lambda s: mock_resp
    mock_resp.__exit__ = MagicMock(return_value=False)
    with patch("urllib.request.urlopen", return_value=mock_resp):
        assert mgr._probe_health(11434, "/api/tags") is True
 def test_probe_health_returns_false_on_connection_error():
    mgr = _make_manager_with_ollama()
    with patch("urllib.request.urlopen", side_effect=OSError("refused")):
        assert mgr._probe_health(11434, "/api/tags") is False
 # ── ServiceRegistry health_path ───────────────────────────────────────────────
 def test_upsert_instance_stores_health_path():
    reg = ServiceRegistry()
    inst = reg.upsert_instance(
        service="ollama", node_id="heimdall", gpu_id=0,
        state="running", model=None, url="http://127.0.0.1:11434",
        health_path="/api/tags",
    )
    assert inst.health_path == "/api/tags"
 def test_upsert_instance_default_health_path():
    reg = ServiceRegistry()
    inst = reg.upsert_instance(
        service="vllm", node_id="heimdall", gpu_id=0,
        state="starting", model="qwen", url="http://127.0.0.1:8000",
    )
    assert inst.health_path == "/health"
 def test_all_gpu_profiles_have_ollama_managed_block():
    """Sanity check: all public GPU profiles now have a managed block for ollama."""
    from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
    registry = ProfileRegistry()
    for profile in registry.list_public():
        svc = profile.services.get("ollama")
        if svc is None:
            continue  # profile may not define ollama
        assert svc.managed is not None, f"{profile.name}: ollama missing managed block"
        assert isinstance(svc.managed, ProcessSpec)
        assert svc.managed.adopt is True, f"{profile.name}: ollama adopt should be True"
        assert svc.managed.health_path == "/api/tags", f"{profile.name}: wrong health_path"
--- a/tests/test_resources/test_profile_registry.py
+++ b/tests/test_resources/test_profile_registry.py
@ -1,101 +0,0 @@
 # tests/test_resources/test_profile_registry.py
 import pytest
 from unittest.mock import MagicMock
 from circuitforge_core.resources.profiles.schema import (
    GpuProfile, ServiceProfile, load_profile
 )
 from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry
 def test_load_8gb_profile(tmp_path):
    yaml_content = """
 schema_version: 1
 name: single-gpu-8gb
 vram_total_mb: 8192
 eviction_timeout_s: 10.0
 services:
  vllm:
    max_mb: 5120
    priority: 1
  cf-vision:
    max_mb: 2048
    priority: 2
    shared: true
    max_concurrent: 3
 """
    profile_file = tmp_path / "test.yaml"
    profile_file.write_text(yaml_content)
    profile = load_profile(profile_file)
    assert profile.name == "single-gpu-8gb"
    assert profile.schema_version == 1
    assert profile.vram_total_mb == 8192
    assert profile.eviction_timeout_s == 10.0
    assert "vllm" in profile.services
    assert profile.services["vllm"].max_mb == 5120
    assert profile.services["vllm"].priority == 1
    assert profile.services["cf-vision"].shared is True
    assert profile.services["cf-vision"].max_concurrent == 3
 def test_load_profile_rejects_wrong_schema_version(tmp_path):
    yaml_content = "schema_version: 99\nname: future\n"
    profile_file = tmp_path / "future.yaml"
    profile_file.write_text(yaml_content)
    with pytest.raises(ValueError, match="schema_version"):
        load_profile(profile_file)
 def test_service_profile_defaults():
    svc = ServiceProfile(max_mb=1024, priority=2)
    assert svc.shared is False
    assert svc.max_concurrent == 1
    assert svc.always_on is False
    assert svc.backend is None
    assert svc.consumers == []
 def test_profile_registry_loads_public_profiles():
    registry = ProfileRegistry()
    profiles = registry.list_public()
    names = [p.name for p in profiles]
    assert "single-gpu-8gb" in names
    assert "single-gpu-6gb" in names
    assert "single-gpu-2gb" in names
 def test_profile_registry_auto_detect_selects_8gb():
    registry = ProfileRegistry()
    mock_gpus = [
        MagicMock(vram_total_mb=8192),
    ]
    profile = registry.auto_detect(mock_gpus)
    assert profile.name == "single-gpu-8gb"
 def test_profile_registry_auto_detect_selects_6gb():
    registry = ProfileRegistry()
    mock_gpus = [MagicMock(vram_total_mb=6144)]
    profile = registry.auto_detect(mock_gpus)
    assert profile.name == "single-gpu-6gb"
 def test_profile_registry_auto_detect_selects_2gb():
    registry = ProfileRegistry()
    mock_gpus = [MagicMock(vram_total_mb=2048)]
    profile = registry.auto_detect(mock_gpus)
    assert profile.name == "single-gpu-2gb"
 def test_profile_registry_load_from_path(tmp_path):
    yaml_content = (
        "schema_version: 1\nname: custom\n"
        "vram_total_mb: 12288\neviction_timeout_s: 5.0\n"
    )
    p = tmp_path / "custom.yaml"
    p.write_text(yaml_content)
    registry = ProfileRegistry()
    profile = registry.load(p)
    assert profile.name == "custom"
    assert profile.vram_total_mb == 12288
--- a/tests/test_resources/test_service_manager.py
+++ b/tests/test_resources/test_service_manager.py
@ -1,194 +0,0 @@
 """Tests for ServiceManager ProcessSpec support."""
 from __future__ import annotations
 from unittest.mock import MagicMock, patch
 import pytest
 from circuitforge_core.resources.agent.service_manager import ServiceManager
 from circuitforge_core.resources.profiles.schema import (
    GpuProfile,
    ProcessSpec,
    ServiceProfile,
 )
 def _make_profile(args_template: str = "--port {port} --gpu-id {gpu_id}") -> GpuProfile:
    return GpuProfile(
        schema_version=1,
        name="test",
        vram_total_mb=8192,
        services={
            "vllm": ServiceProfile(
                max_mb=5120,
                priority=1,
                managed=ProcessSpec(
                    exec_path="/usr/bin/python",
                    args_template=args_template,
                    port=8000,
                    host_port=8000,
                    cwd="/tmp",
                ),
            ),
            "no_managed": ServiceProfile(max_mb=1024, priority=2),
        },
    )
@pytest.fixture
 def manager():
    return ServiceManager(node_id="test-node", profile=_make_profile(), advertise_host="127.0.0.1")
 # ---------------------------------------------------------------------------
 # is_running
 # ---------------------------------------------------------------------------
 def test_is_running_returns_false_when_no_proc(manager):
    assert manager.is_running("vllm") is False
 def test_is_running_returns_false_when_proc_exited(manager):
    mock_proc = MagicMock()
    mock_proc.poll.return_value = 1  # exited
    manager._procs["vllm"] = mock_proc
    assert manager.is_running("vllm") is False
 def test_is_running_returns_false_when_port_not_listening(manager):
    mock_proc = MagicMock()
    mock_proc.poll.return_value = None  # still running
    manager._procs["vllm"] = mock_proc
    with patch("socket.create_connection", side_effect=OSError("refused")):
        assert manager.is_running("vllm") is False
 def test_is_running_returns_true_when_proc_alive_and_port_open(manager):
    mock_proc = MagicMock()
    mock_proc.poll.return_value = None  # still running
    manager._procs["vllm"] = mock_proc
    mock_socket = MagicMock()
    mock_socket.__enter__ = MagicMock(return_value=mock_socket)
    mock_socket.__exit__ = MagicMock(return_value=False)
    with patch("socket.create_connection", return_value=mock_socket):
        assert manager.is_running("vllm") is True
 def test_is_running_unknown_service_returns_false(manager):
    assert manager.is_running("nonexistent") is False
 def test_is_running_no_managed_spec_returns_false(manager):
    assert manager.is_running("no_managed") is False
 # ---------------------------------------------------------------------------
 # start
 # ---------------------------------------------------------------------------
 def test_start_launches_process_and_returns_url(manager):
    with patch("subprocess.Popen") as mock_popen, \
         patch.object(manager, "is_running", return_value=False):
        mock_popen.return_value = MagicMock()
        url = manager.start("vllm", gpu_id=0, params={"model": "mymodel"})
    assert url == "http://127.0.0.1:8000"
    mock_popen.assert_called_once()
    call_args = mock_popen.call_args
    cmd = call_args[0][0]
    assert cmd[0] == "/usr/bin/python"
    assert "--port" in cmd
    assert "8000" in cmd
    assert "--gpu-id" in cmd
    assert "0" in cmd
 def test_start_returns_url_immediately_when_already_running(manager):
    with patch.object(manager, "is_running", return_value=True):
        with patch("subprocess.Popen") as mock_popen:
            url = manager.start("vllm", gpu_id=0, params={})
    assert url == "http://127.0.0.1:8000"
    mock_popen.assert_not_called()
 def test_start_raises_for_unknown_service(manager):
    with pytest.raises(ValueError, match="not in profile"):
        manager.start("nonexistent", gpu_id=0, params={})
 def test_start_stores_proc_in_procs(manager):
    mock_proc = MagicMock()
    with patch("subprocess.Popen", return_value=mock_proc), \
         patch.object(manager, "is_running", return_value=False):
        manager.start("vllm", gpu_id=0, params={})
    assert manager._procs["vllm"] is mock_proc
 # ---------------------------------------------------------------------------
 # stop
 # ---------------------------------------------------------------------------
 def test_stop_terminates_running_process(manager):
    mock_proc = MagicMock()
    manager._procs["vllm"] = mock_proc
    result = manager.stop("vllm")
    assert result is True
    mock_proc.terminate.assert_called_once()
    mock_proc.wait.assert_called_once()
    assert "vllm" not in manager._procs
 def test_stop_kills_process_that_wont_terminate(manager):
    mock_proc = MagicMock()
    mock_proc.wait.side_effect = Exception("timeout")
    manager._procs["vllm"] = mock_proc
    result = manager.stop("vllm")
    assert result is True
    mock_proc.kill.assert_called_once()
 def test_stop_returns_true_when_no_proc_tracked(manager):
    # No proc in _procs — still returns True (idempotent stop)
    result = manager.stop("vllm")
    assert result is True
 def test_stop_returns_false_for_unknown_service(manager):
    result = manager.stop("nonexistent")
    assert result is False
 # ---------------------------------------------------------------------------
 # list_running / get_url
 # ---------------------------------------------------------------------------
 def test_list_running_returns_running_services(manager):
    def _is_running(svc: str) -> bool:
        return svc == "vllm"
    with patch.object(manager, "is_running", side_effect=_is_running):
        running = manager.list_running()
    assert running == ["vllm"]
 def test_get_url_returns_none_when_not_running(manager):
    with patch.object(manager, "is_running", return_value=False):
        assert manager.get_url("vllm") is None
 def test_get_url_returns_url_when_running(manager):
    with patch.object(manager, "is_running", return_value=True):
        assert manager.get_url("vllm") == "http://127.0.0.1:8000"
--- a/tests/test_resources/test_service_registry.py
+++ b/tests/test_resources/test_service_registry.py
@ -1,86 +0,0 @@
 import time
 import dataclasses
 import pytest
 from circuitforge_core.resources.coordinator.service_registry import (
    ServiceRegistry, ServiceAllocation, ServiceInstance,
 )
@pytest.fixture
 def registry():
    return ServiceRegistry()
 def test_allocate_creates_allocation(registry):
    alloc = registry.allocate(
        service="vllm", node_id="heimdall", gpu_id=0,
        model="Ouro-1.4B", url="http://heimdall:8000",
        caller="test", ttl_s=300.0,
    )
    assert alloc.service == "vllm"
    assert alloc.node_id == "heimdall"
    assert alloc.allocation_id  # non-empty UUID string
 def test_active_allocations_count(registry):
    registry.allocate("vllm", "heimdall", 0, "M", "http://h:8000", "a", 300.0)
    registry.allocate("vllm", "heimdall", 0, "M", "http://h:8000", "b", 300.0)
    assert registry.active_allocations("vllm", "heimdall", 0) == 2
 def test_release_decrements_count(registry):
    alloc = registry.allocate("vllm", "heimdall", 0, "M", "http://h:8000", "a", 300.0)
    registry.release(alloc.allocation_id)
    assert registry.active_allocations("vllm", "heimdall", 0) == 0
 def test_release_nonexistent_returns_false(registry):
    assert registry.release("nonexistent-id") is False
 def test_upsert_instance_sets_running_state(registry):
    registry.upsert_instance("vllm", "heimdall", 0, state="running",
                              model="Ouro-1.4B", url="http://heimdall:8000")
    instances = registry.all_instances()
    assert len(instances) == 1
    assert instances[0].state == "running"
 def test_release_last_alloc_marks_instance_idle(registry):
    registry.upsert_instance("vllm", "heimdall", 0, state="running",
                              model="Ouro-1.4B", url="http://heimdall:8000")
    alloc = registry.allocate("vllm", "heimdall", 0, "Ouro-1.4B", "http://heimdall:8000", "a", 300.0)
    registry.release(alloc.allocation_id)
    instance = registry.all_instances()[0]
    assert instance.state == "idle"
    assert instance.idle_since is not None
 def test_new_alloc_on_idle_instance_marks_it_running(registry):
    registry.upsert_instance("vllm", "heimdall", 0, state="idle",
                              model="M", url="http://h:8000")
    registry.allocate("vllm", "heimdall", 0, "M", "http://h:8000", "x", 300.0)
    assert registry.all_instances()[0].state == "running"
 def test_sweep_expired_allocations(registry):
    # Register a running instance so idle-transition logic has something to act on.
    registry.upsert_instance("vllm", "heimdall", 0, state="running",
                              model="M", url="http://h:8000")
    # Create an allocation with a very short TTL (1 second).
    alloc = registry.allocate("vllm", "heimdall", 0, "M", "http://h:8000", "caller", ttl_s=1)
    assert registry.active_allocations("vllm", "heimdall", 0) == 1
    # Wait for TTL to elapse.
    time.sleep(1.1)
    expired = registry.sweep_expired_allocations()
    # The allocation should have been swept.
    assert alloc.allocation_id in expired
    assert registry.active_allocations("vllm", "heimdall", 0) == 0
    # The instance should have transitioned to idle since no allocations remain.
    instance = registry.all_instances()[0]
    assert instance.state == "idle"
    assert instance.idle_since is not None
		`@ -1 +0,0 @@`
			`from circuitforge_core.resources.client import CFOrchClient, Allocation # noqa: F401`