From c244260d1ce897a06cc037b6c02caa0cc24446ab Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sat, 4 Apr 2026 22:34:27 -0700 Subject: [PATCH] =?UTF-8?q?feat!:=20strip=20resources/=20from=20MIT=20core?= =?UTF-8?q?=20=E2=80=94=20moves=20to=20circuitforge-orch=20(v0.8.0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BREAKING CHANGE: circuitforge_core.resources is no longer available. Import CFOrchClient from circuitforge_orch.client instead. cf-orch CLI entry point is now in the circuitforge-orch package. --- README.md | 22 +- circuitforge_core/__init__.py | 2 +- circuitforge_core/affiliates/programs.py | 13 + circuitforge_core/resources/__init__.py | 1 - circuitforge_core/resources/agent/__init__.py | 0 circuitforge_core/resources/agent/app.py | 105 ---- .../resources/agent/eviction_executor.py | 85 --- .../resources/agent/gpu_monitor.py | 52 -- .../resources/agent/service_manager.py | 186 ------- .../resources/agent/service_probe.py | 123 ----- circuitforge_core/resources/cli.py | 234 -------- circuitforge_core/resources/client.py | 143 ----- circuitforge_core/resources/compose.yml | 44 -- .../resources/coordinator/__init__.py | 0 .../resources/coordinator/agent_supervisor.py | 209 ------- .../resources/coordinator/app.py | 509 ------------------ .../resources/coordinator/auth.py | 197 ------- .../resources/coordinator/dashboard.html | 473 ---------------- .../resources/coordinator/eviction_engine.py | 81 --- .../resources/coordinator/lease_manager.py | 130 ----- .../resources/coordinator/node_selector.py | 74 --- .../resources/coordinator/node_store.py | 85 --- .../resources/coordinator/profile_registry.py | 65 --- .../resources/coordinator/service_registry.py | 173 ------ .../resources/docuvision/__init__.py | 0 circuitforge_core/resources/docuvision/app.py | 250 --------- .../resources/inference/__init__.py | 0 .../resources/inference/llm_server.py | 137 ----- circuitforge_core/resources/models.py | 66 --- .../resources/profiles/__init__.py | 0 .../resources/profiles/public/cpu-16gb.yaml | 41 -- .../resources/profiles/public/cpu-32gb.yaml | 41 -- .../profiles/public/single-gpu-16gb.yaml | 73 --- .../profiles/public/single-gpu-24gb.yaml | 73 --- .../profiles/public/single-gpu-2gb.yaml | 30 -- .../profiles/public/single-gpu-4gb.yaml | 38 -- .../profiles/public/single-gpu-6gb.yaml | 61 --- .../profiles/public/single-gpu-8gb.yaml | 68 --- .../resources/profiles/schema.py | 121 ----- pyproject.toml | 19 +- tests/test_resources/__init__.py | 0 tests/test_resources/test_agent_app.py | 68 --- tests/test_resources/test_agent_supervisor.py | 93 ---- tests/test_resources/test_agent_watchdog.py | 151 ------ tests/test_resources/test_cli.py | 33 -- tests/test_resources/test_client.py | 94 ---- .../test_coordinator_allocate.py | 132 ----- tests/test_resources/test_coordinator_app.py | 183 ------- tests/test_resources/test_coordinator_auth.py | 148 ----- .../test_resources/test_coordinator_probe.py | 215 -------- tests/test_resources/test_docuvision.py | 215 -------- tests/test_resources/test_eviction_engine.py | 67 --- .../test_resources/test_eviction_executor.py | 43 -- tests/test_resources/test_gpu_monitor.py | 60 --- tests/test_resources/test_integration.py | 221 -------- tests/test_resources/test_lease_manager.py | 85 --- tests/test_resources/test_models.py | 47 -- tests/test_resources/test_node_selector.py | 82 --- tests/test_resources/test_node_store.py | 87 --- tests/test_resources/test_ollama_adopt.py | 176 ------ tests/test_resources/test_profile_registry.py | 101 ---- tests/test_resources/test_service_manager.py | 194 ------- tests/test_resources/test_service_registry.py | 86 --- 63 files changed, 34 insertions(+), 6571 deletions(-) delete mode 100644 circuitforge_core/resources/__init__.py delete mode 100644 circuitforge_core/resources/agent/__init__.py delete mode 100644 circuitforge_core/resources/agent/app.py delete mode 100644 circuitforge_core/resources/agent/eviction_executor.py delete mode 100644 circuitforge_core/resources/agent/gpu_monitor.py delete mode 100644 circuitforge_core/resources/agent/service_manager.py delete mode 100644 circuitforge_core/resources/agent/service_probe.py delete mode 100644 circuitforge_core/resources/cli.py delete mode 100644 circuitforge_core/resources/client.py delete mode 100644 circuitforge_core/resources/compose.yml delete mode 100644 circuitforge_core/resources/coordinator/__init__.py delete mode 100644 circuitforge_core/resources/coordinator/agent_supervisor.py delete mode 100644 circuitforge_core/resources/coordinator/app.py delete mode 100644 circuitforge_core/resources/coordinator/auth.py delete mode 100644 circuitforge_core/resources/coordinator/dashboard.html delete mode 100644 circuitforge_core/resources/coordinator/eviction_engine.py delete mode 100644 circuitforge_core/resources/coordinator/lease_manager.py delete mode 100644 circuitforge_core/resources/coordinator/node_selector.py delete mode 100644 circuitforge_core/resources/coordinator/node_store.py delete mode 100644 circuitforge_core/resources/coordinator/profile_registry.py delete mode 100644 circuitforge_core/resources/coordinator/service_registry.py delete mode 100644 circuitforge_core/resources/docuvision/__init__.py delete mode 100644 circuitforge_core/resources/docuvision/app.py delete mode 100644 circuitforge_core/resources/inference/__init__.py delete mode 100644 circuitforge_core/resources/inference/llm_server.py delete mode 100644 circuitforge_core/resources/models.py delete mode 100644 circuitforge_core/resources/profiles/__init__.py delete mode 100644 circuitforge_core/resources/profiles/public/cpu-16gb.yaml delete mode 100644 circuitforge_core/resources/profiles/public/cpu-32gb.yaml delete mode 100644 circuitforge_core/resources/profiles/public/single-gpu-16gb.yaml delete mode 100644 circuitforge_core/resources/profiles/public/single-gpu-24gb.yaml delete mode 100644 circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml delete mode 100644 circuitforge_core/resources/profiles/public/single-gpu-4gb.yaml delete mode 100644 circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml delete mode 100644 circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml delete mode 100644 circuitforge_core/resources/profiles/schema.py delete mode 100644 tests/test_resources/__init__.py delete mode 100644 tests/test_resources/test_agent_app.py delete mode 100644 tests/test_resources/test_agent_supervisor.py delete mode 100644 tests/test_resources/test_agent_watchdog.py delete mode 100644 tests/test_resources/test_cli.py delete mode 100644 tests/test_resources/test_client.py delete mode 100644 tests/test_resources/test_coordinator_allocate.py delete mode 100644 tests/test_resources/test_coordinator_app.py delete mode 100644 tests/test_resources/test_coordinator_auth.py delete mode 100644 tests/test_resources/test_coordinator_probe.py delete mode 100644 tests/test_resources/test_docuvision.py delete mode 100644 tests/test_resources/test_eviction_engine.py delete mode 100644 tests/test_resources/test_eviction_executor.py delete mode 100644 tests/test_resources/test_gpu_monitor.py delete mode 100644 tests/test_resources/test_integration.py delete mode 100644 tests/test_resources/test_lease_manager.py delete mode 100644 tests/test_resources/test_models.py delete mode 100644 tests/test_resources/test_node_selector.py delete mode 100644 tests/test_resources/test_node_store.py delete mode 100644 tests/test_resources/test_ollama_adopt.py delete mode 100644 tests/test_resources/test_profile_registry.py delete mode 100644 tests/test_resources/test_service_manager.py delete mode 100644 tests/test_resources/test_service_registry.py diff --git a/README.md b/README.md index 90ab093..c87359f 100644 --- a/README.md +++ b/README.md @@ -2,15 +2,29 @@ Shared scaffold for CircuitForge products. +**Current version: 0.7.0** + ## Modules +### Implemented + - `circuitforge_core.db` — SQLite connection factory and migration runner -- `circuitforge_core.llm` — LLM router with fallback chain +- `circuitforge_core.llm` — LLM router with fallback chain (Ollama, vLLM, Anthropic, OpenAI-compatible) - `circuitforge_core.tiers` — Tier system with BYOK and local vision unlocks - `circuitforge_core.config` — Env validation and .env loader -- `circuitforge_core.vision` — Vision router stub (v0.2+) -- `circuitforge_core.wizard` — First-run wizard base class stub -- `circuitforge_core.pipeline` — Staging queue stub (v0.2+) +- `circuitforge_core.hardware` — Hardware detection and LLM backend profile generation (VRAM tiers, GPU/CPU auto-select) +- `circuitforge_core.documents` — Document ingestion pipeline: PDF, DOCX, and image OCR → `StructuredDocument` +- `circuitforge_core.affiliates` — Affiliate URL wrapping with opt-out, BYOK user IDs, and CF env-var fallback (`wrap_url`) +- `circuitforge_core.preferences` — User preference store (local YAML file, pluggable backend); dot-path get/set API +- `circuitforge_core.tasks` — VRAM-aware LLM task scheduler; shared slot manager across services (`TaskScheduler`) +- `circuitforge_core.manage` — Cross-platform product process manager (Docker and native modes) +- `circuitforge_core.resources` — Resource coordinator and agent: VRAM allocation, eviction engine, GPU profile registry + +### Stubs (in-tree, not yet implemented) + +- `circuitforge_core.vision` — Vision router base class (planned: moondream2 / Claude vision dispatch) +- `circuitforge_core.wizard` — First-run wizard base class (products subclass `BaseWizard`) +- `circuitforge_core.pipeline` — Staging queue base (`StagingDB`; products provide concrete schema) ## Install diff --git a/circuitforge_core/__init__.py b/circuitforge_core/__init__.py index 49e0fc1..777f190 100644 --- a/circuitforge_core/__init__.py +++ b/circuitforge_core/__init__.py @@ -1 +1 @@ -__version__ = "0.7.0" +__version__ = "0.8.0" diff --git a/circuitforge_core/affiliates/programs.py b/circuitforge_core/affiliates/programs.py index 661bb72..2502369 100644 --- a/circuitforge_core/affiliates/programs.py +++ b/circuitforge_core/affiliates/programs.py @@ -56,6 +56,12 @@ def _build_ebay_url(url: str, affiliate_id: str) -> str: return f"{url}{sep}{params}" +def _build_instacart_url(url: str, affiliate_id: str) -> str: + """Append Instacart affiliate parameter to a search URL.""" + sep = "&" if "?" in url else "?" + return f"{url}{sep}aff={affiliate_id}" + + def _build_amazon_url(url: str, affiliate_id: str) -> str: """Merge an Amazon Associates tag into a product URL's query string.""" parsed = urlparse(url) @@ -101,3 +107,10 @@ register_program(AffiliateProgram( env_var="AMAZON_ASSOCIATES_TAG", build_url=_build_amazon_url, )) + +register_program(AffiliateProgram( + name="Instacart", + retailer_key="instacart", + env_var="INSTACART_AFFILIATE_ID", + build_url=_build_instacart_url, +)) diff --git a/circuitforge_core/resources/__init__.py b/circuitforge_core/resources/__init__.py deleted file mode 100644 index 8bf5235..0000000 --- a/circuitforge_core/resources/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from circuitforge_core.resources.client import CFOrchClient, Allocation # noqa: F401 diff --git a/circuitforge_core/resources/agent/__init__.py b/circuitforge_core/resources/agent/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/circuitforge_core/resources/agent/app.py b/circuitforge_core/resources/agent/app.py deleted file mode 100644 index 162e7c5..0000000 --- a/circuitforge_core/resources/agent/app.py +++ /dev/null @@ -1,105 +0,0 @@ -from __future__ import annotations - -import logging -from typing import Any - -from fastapi import FastAPI, HTTPException -from pydantic import BaseModel - -from circuitforge_core.resources.agent.eviction_executor import EvictionExecutor -from circuitforge_core.resources.agent.gpu_monitor import GpuMonitor -from circuitforge_core.resources.agent.service_manager import ServiceManager - -logger = logging.getLogger(__name__) - - -class EvictRequest(BaseModel): - pid: int - grace_period_s: float = 5.0 - - -class ServiceStartRequest(BaseModel): - gpu_id: int = 0 - params: dict[str, str] = {} - - -def create_agent_app( - node_id: str, - monitor: GpuMonitor | None = None, - executor: EvictionExecutor | None = None, - service_manager: ServiceManager | None = None, -) -> FastAPI: - _monitor = monitor or GpuMonitor() - _executor = executor or EvictionExecutor() - - app = FastAPI(title=f"cf-orch-agent [{node_id}]") - - @app.get("/health") - def health() -> dict[str, Any]: - return {"status": "ok", "node_id": node_id} - - @app.get("/gpu-info") - def gpu_info() -> dict[str, Any]: - gpus = _monitor.poll() - return { - "node_id": node_id, - "gpus": [ - { - "gpu_id": g.gpu_id, - "name": g.name, - "vram_total_mb": g.vram_total_mb, - "vram_used_mb": g.vram_used_mb, - "vram_free_mb": g.vram_free_mb, - } - for g in gpus - ], - } - - @app.post("/evict") - def evict(req: EvictRequest) -> dict[str, Any]: - result = _executor.evict_pid(pid=req.pid, grace_period_s=req.grace_period_s) - return { - "success": result.success, - "method": result.method, - "message": result.message, - } - - @app.get("/resident-info") - def resident_info() -> dict[str, Any]: - """Return which models are currently loaded in each running managed service.""" - if service_manager is None: - return {"residents": []} - from circuitforge_core.resources.agent.service_probe import probe_all - return {"residents": probe_all(service_manager)} - - if service_manager is not None: - @app.get("/services") - def list_services() -> dict: - return {"running": service_manager.list_running()} - - @app.get("/services/{service}") - def service_status(service: str) -> dict: - running = service_manager.is_running(service) - url = service_manager.get_url(service) if running else None - return {"service": service, "running": running, "url": url} - - @app.post("/services/{service}/start") - def start_service(service: str, req: ServiceStartRequest) -> dict: - try: - already_running = service_manager.is_running(service) - url = service_manager.start(service, req.gpu_id, req.params) - # adopted=True signals the coordinator to treat this instance as - # immediately running rather than waiting for the probe loop. - adopted = already_running and service_manager.is_running(service) - return {"service": service, "url": url, "running": True, "adopted": adopted} - except (ValueError, NotImplementedError) as exc: - raise HTTPException(status_code=422, detail=str(exc)) - except Exception as exc: - raise HTTPException(status_code=500, detail=f"Failed to start {service}: {exc}") - - @app.post("/services/{service}/stop") - def stop_service(service: str) -> dict: - stopped = service_manager.stop(service) - return {"service": service, "stopped": stopped} - - return app diff --git a/circuitforge_core/resources/agent/eviction_executor.py b/circuitforge_core/resources/agent/eviction_executor.py deleted file mode 100644 index d6a7c8a..0000000 --- a/circuitforge_core/resources/agent/eviction_executor.py +++ /dev/null @@ -1,85 +0,0 @@ -from __future__ import annotations - -import logging -import os -import signal -import time -from dataclasses import dataclass - -import psutil - -logger = logging.getLogger(__name__) - -_DEFAULT_GRACE_S = 5.0 - - -@dataclass(frozen=True) -class EvictionResult: - success: bool - method: str # "sigterm", "sigkill", "already_gone", "not_found", "error" - message: str - - -class EvictionExecutor: - def __init__(self, grace_period_s: float = _DEFAULT_GRACE_S) -> None: - self._default_grace = grace_period_s - - def evict_pid( - self, - pid: int, - grace_period_s: float | None = None, - ) -> EvictionResult: - grace = grace_period_s if grace_period_s is not None else self._default_grace - - if pid <= 0: - return EvictionResult( - success=False, method="error", - message=f"Refusing to signal invalid PID {pid}" - ) - - if not psutil.pid_exists(pid): - return EvictionResult( - success=False, method="not_found", - message=f"PID {pid} not found" - ) - - try: - os.kill(pid, signal.SIGTERM) - except ProcessLookupError: - return EvictionResult( - success=True, method="already_gone", - message=f"PID {pid} vanished before SIGTERM" - ) - except PermissionError as exc: - return EvictionResult( - success=False, method="error", - message=f"Permission denied terminating PID {pid}: {exc}" - ) - - # Wait for grace period - deadline = time.monotonic() + grace - while time.monotonic() < deadline: - if not psutil.pid_exists(pid): - logger.info("PID %d exited cleanly after SIGTERM", pid) - return EvictionResult( - success=True, method="sigterm", - message=f"PID {pid} exited after SIGTERM" - ) - time.sleep(0.05) - - # Escalate to SIGKILL - if psutil.pid_exists(pid): - try: - os.kill(pid, signal.SIGKILL) - logger.warning("PID %d required SIGKILL", pid) - return EvictionResult( - success=True, method="sigkill", - message=f"PID {pid} killed with SIGKILL" - ) - except ProcessLookupError: - pass - - return EvictionResult( - success=True, method="sigkill", - message=f"PID {pid} is gone" - ) diff --git a/circuitforge_core/resources/agent/gpu_monitor.py b/circuitforge_core/resources/agent/gpu_monitor.py deleted file mode 100644 index 4d058d6..0000000 --- a/circuitforge_core/resources/agent/gpu_monitor.py +++ /dev/null @@ -1,52 +0,0 @@ -from __future__ import annotations - -import logging -import subprocess - -from circuitforge_core.resources.models import GpuInfo - -logger = logging.getLogger(__name__) - -_NVIDIA_SMI_CMD = [ - "nvidia-smi", - "--query-gpu=index,name,memory.total,memory.used,memory.free", - "--format=csv,noheader,nounits", -] - - -class GpuMonitor: - def poll(self) -> list[GpuInfo]: - try: - result = subprocess.run( - _NVIDIA_SMI_CMD, - capture_output=True, - text=True, - timeout=5, - ) - except (FileNotFoundError, subprocess.TimeoutExpired) as exc: - logger.warning("nvidia-smi unavailable: %s", exc) - return [] - - if result.returncode != 0: - logger.warning("nvidia-smi exited %d", result.returncode) - return [] - - return self._parse(result.stdout) - - def _parse(self, output: str) -> list[GpuInfo]: - gpus: list[GpuInfo] = [] - for line in output.strip().splitlines(): - parts = [p.strip() for p in line.split(",")] - if len(parts) != 5: - continue - try: - gpus.append(GpuInfo( - gpu_id=int(parts[0]), - name=parts[1], - vram_total_mb=int(parts[2]), - vram_used_mb=int(parts[3]), - vram_free_mb=int(parts[4]), - )) - except ValueError: - logger.debug("Skipping malformed nvidia-smi line: %r", line) - return gpus diff --git a/circuitforge_core/resources/agent/service_manager.py b/circuitforge_core/resources/agent/service_manager.py deleted file mode 100644 index 5578c24..0000000 --- a/circuitforge_core/resources/agent/service_manager.py +++ /dev/null @@ -1,186 +0,0 @@ -""" -ServiceManager — start/stop Docker containers and processes for cf-orch managed services. - -Container naming convention: cf-orch-{service}-{node_id} -""" -from __future__ import annotations - -import os -import re -import subprocess -from collections import defaultdict -from typing import Any - -from circuitforge_core.resources.profiles.schema import DockerSpec, GpuProfile, ProcessSpec - - -def _expand_volume(v: str) -> str: - """Expand bash-style volume strings including ${VAR:-default} and $VAR.""" - def _sub(m: re.Match) -> str: # type: ignore[type-arg] - var, default = m.group(1), m.group(2) or "" - return os.environ.get(var) or default - v = re.sub(r"\$\{(\w+)(?::-(.*?))?\}", _sub, v) - v = re.sub(r"\$(\w+)", lambda m: os.environ.get(m.group(1), m.group(0)), v) - return v - - -class ServiceManager: - def __init__( - self, - node_id: str, - profile: GpuProfile, - advertise_host: str = "127.0.0.1", - ) -> None: - self.node_id = node_id - self.profile = profile - self.advertise_host = advertise_host - self._procs: dict[str, Any] = {} - - def container_name(self, service: str) -> str: - return f"cf-orch-{service}-{self.node_id}" - - def _get_spec(self, service: str) -> DockerSpec | ProcessSpec | None: - svc = self.profile.services.get(service) - if svc is None: - return None - return svc.managed - - def is_running(self, service: str) -> bool: - spec = self._get_spec(service) - if spec is None: - return False - if isinstance(spec, DockerSpec): - try: - result = subprocess.run( - [ - "docker", - "inspect", - "--format", - "{{.State.Running}}", - self.container_name(service), - ], - capture_output=True, - text=True, - check=True, - ) - return result.stdout.strip() == "true" - except subprocess.CalledProcessError: - return False - if isinstance(spec, ProcessSpec): - # For adopt=True services, check the health endpoint regardless of whether - # we spawned the process (it may be a system daemon we didn't start). - if spec.adopt: - return self._probe_health(spec.host_port, spec.health_path) - proc = self._procs.get(service) - if proc is None or proc.poll() is not None: - return False - import socket - try: - with socket.create_connection(("127.0.0.1", spec.host_port), timeout=1): - return True - except OSError: - return False - return False - - def _probe_health(self, port: int, health_path: str = "/health") -> bool: - """Return True if the service at localhost:port responds 200 on health_path.""" - import urllib.request - try: - url = f"http://127.0.0.1:{port}{health_path}" - with urllib.request.urlopen(url, timeout=2.0) as resp: - return resp.status == 200 - except Exception: - return False - - def start(self, service: str, gpu_id: int, params: dict[str, str]) -> str: - spec = self._get_spec(service) - if spec is None: - raise ValueError(f"Service {service!r} not in profile or has no managed spec") - - if self.is_running(service): - return f"http://{self.advertise_host}:{spec.host_port}" - - if isinstance(spec, DockerSpec): - expanded_volumes = [_expand_volume(v) for v in spec.volumes] - - filler: dict[str, str] = defaultdict(str, params) - expanded_command = spec.command_template.format_map(filler).split() - - cmd = [ - "docker", "run", "-d", "--rm", - "--name", self.container_name(service), - "--runtime", spec.runtime, - "--gpus", f"device={gpu_id}", - "--ipc", spec.ipc, - "-p", f"{spec.host_port}:{spec.port}", - ] - for vol in expanded_volumes: - cmd += ["-v", vol] - for key, val in spec.env.items(): - cmd += ["-e", f"{key}={val}"] - cmd.append(spec.image) - cmd.extend(expanded_command) - - subprocess.run(cmd, check=True, capture_output=True, text=True) - return f"http://{self.advertise_host}:{spec.host_port}" - - if isinstance(spec, ProcessSpec): - # adopt=True: if the service is already healthy, claim it without spawning. - if spec.adopt and self._probe_health(spec.host_port, spec.health_path): - return f"http://{self.advertise_host}:{spec.host_port}" - - import subprocess as _sp - - filler = defaultdict(str, params) - filler.setdefault("port", str(spec.port)) - filler.setdefault("gpu_id", str(gpu_id)) - args_expanded = spec.args_template.format_map(filler).split() - - cmd = [spec.exec_path] + args_expanded - env = {**__import__("os").environ} - proc = _sp.Popen( - cmd, - cwd=spec.cwd or None, - env=env, - stdout=_sp.DEVNULL, - stderr=_sp.DEVNULL, - ) - self._procs[service] = proc - return f"http://{self.advertise_host}:{spec.host_port}" - - raise NotImplementedError(f"Unknown spec type: {type(spec)}") - - def stop(self, service: str) -> bool: - spec = self._get_spec(service) - if spec is None: - return False - if isinstance(spec, DockerSpec): - try: - subprocess.run( - ["docker", "stop", self.container_name(service)], - check=True, - capture_output=True, - text=True, - ) - return True - except subprocess.CalledProcessError: - return False - if isinstance(spec, ProcessSpec): - proc = self._procs.pop(service, None) - if proc is not None: - proc.terminate() - try: - proc.wait(timeout=10) - except Exception: - proc.kill() - return True - return False - - def list_running(self) -> list[str]: - return [svc for svc in self.profile.services if self.is_running(svc)] - - def get_url(self, service: str) -> str | None: - spec = self._get_spec(service) - if spec is None or not self.is_running(service): - return None - return f"http://{self.advertise_host}:{spec.host_port}" diff --git a/circuitforge_core/resources/agent/service_probe.py b/circuitforge_core/resources/agent/service_probe.py deleted file mode 100644 index e2b6efa..0000000 --- a/circuitforge_core/resources/agent/service_probe.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Probe running services to detect which models are currently loaded in VRAM. - -Two probe strategies run together: - -1. Well-known ports — always checked, regardless of who started the service. - Catches ollama, vLLM, etc. running outside cf-orch management. - -2. Managed services — services cf-orch started via ServiceManager. - Checked on their configured host_port, deduplicates with well-known results. - -Each service exposes a different introspection API: - - vllm: GET /v1/models → {"data": [{"id": ""}]} - - ollama: GET /api/ps → {"models": [{"name": "", "size_vram": }]} - -ollama can have multiple models loaded simultaneously; each is reported as a -separate entry so the dashboard shows per-model residency. - -The probe is best-effort: a timeout or connection refusal means model_name=None -but the service is still reported as resident. -""" -from __future__ import annotations - -import json -import logging -import urllib.request -from typing import Any - -from circuitforge_core.resources.profiles.schema import DockerSpec - -logger = logging.getLogger(__name__) - -_PROBE_TIMEOUT_S = 2.0 - -# Well-known service ports probed on every heartbeat. -# key → (service_name, prober_key) -_WELL_KNOWN_PORTS: dict[int, str] = { - 11434: "ollama", - 8000: "vllm", - 8080: "vllm", # common alt vLLM port -} - - -def _fetch_json(url: str) -> dict[str, Any] | None: - """GET a URL and parse JSON; returns None on any error.""" - try: - with urllib.request.urlopen(url, timeout=_PROBE_TIMEOUT_S) as resp: - return json.loads(resp.read()) - except Exception as exc: - logger.debug("Probe %s: %s", url, exc) - return None - - -def _probe_vllm(port: int) -> list[str]: - data = _fetch_json(f"http://127.0.0.1:{port}/v1/models") - if data and data.get("data"): - return [m["id"] for m in data["data"] if m.get("id")] - return [] - - -def _probe_ollama(port: int) -> list[str]: - # /api/ps lists models currently *loaded in memory*, not just downloaded. - data = _fetch_json(f"http://127.0.0.1:{port}/api/ps") - if data and data.get("models"): - return [m["name"] for m in data["models"] if m.get("name")] - return [] - - -_PROBERS: dict[str, Any] = { - "vllm": _probe_vllm, - "ollama": _probe_ollama, -} - - -def probe_all(service_manager: Any) -> list[dict[str, Any]]: - """ - Probe all services — both well-known ports and cf-orch managed services. - - Returns a list of dicts: [{"service": str, "model_name": str | None}]. - Multiple loaded models in one service (e.g. two ollama models) each get - their own entry, disambiguated as "ollama/0", "ollama/1", etc. - """ - results: list[dict[str, Any]] = [] - seen_ports: set[int] = set() - - # ── 1. Well-known ports ────────────────────────────────────────── - for port, service in _WELL_KNOWN_PORTS.items(): - prober = _PROBERS.get(service) - if prober is None: - continue - models = prober(port) - if not models: - continue # nothing on this port right now - seen_ports.add(port) - if len(models) == 1: - results.append({"service": service, "model_name": models[0]}) - else: - for i, model in enumerate(models): - results.append({"service": f"{service}/{i}", "model_name": model}) - - # ── 2. Managed services (cf-orch started) ─────────────────────── - if service_manager is not None: - for service in service_manager.list_running(): - spec = service_manager._get_spec(service) - if not isinstance(spec, DockerSpec): - continue - if spec.host_port in seen_ports: - continue # already captured by well-known probe - prober = _PROBERS.get(service) - if prober is None: - results.append({"service": service, "model_name": None}) - continue - models = prober(spec.host_port) - seen_ports.add(spec.host_port) - if not models: - results.append({"service": service, "model_name": None}) - elif len(models) == 1: - results.append({"service": service, "model_name": models[0]}) - else: - for i, model in enumerate(models): - results.append({"service": f"{service}/{i}", "model_name": model}) - - return results diff --git a/circuitforge_core/resources/cli.py b/circuitforge_core/resources/cli.py deleted file mode 100644 index 7238507..0000000 --- a/circuitforge_core/resources/cli.py +++ /dev/null @@ -1,234 +0,0 @@ -from __future__ import annotations - -import logging -import sys -from pathlib import Path -from typing import Annotated, Optional - -import typer -import uvicorn - -logger = logging.getLogger(__name__) - -app = typer.Typer(name="cf-orch", help="CircuitForge GPU resource orchestrator") - -_SYSTEMD_UNIT_PATH = Path("/etc/systemd/system/cf-orch.service") - -_SYSTEMD_UNIT_TEMPLATE = """\ -[Unit] -Description=CircuitForge GPU Resource Orchestrator -After=network.target - -[Service] -Type=simple -ExecStart={python} -m circuitforge_core.resources.cli start -Restart=on-failure -RestartSec=5 - -[Install] -WantedBy=multi-user.target -""" - - -@app.command() -def start( - profile: Annotated[Optional[Path], typer.Option(help="Profile YAML path")] = None, - host: str = "0.0.0.0", - port: int = 7700, - node_id: str = "local", - agent_port: int = 7701, -) -> None: - """Start the cf-orch coordinator (auto-detects GPU profile if not specified). - - Automatically pre-registers the local agent so its GPUs appear on the - dashboard immediately. Remote nodes self-register via POST /api/nodes. - """ - from circuitforge_core.resources.coordinator.lease_manager import LeaseManager - from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry - from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor - from circuitforge_core.resources.coordinator.app import create_coordinator_app - from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry - from circuitforge_core.resources.agent.gpu_monitor import GpuMonitor - - from circuitforge_core.resources.coordinator.node_store import NodeStore - - lease_manager = LeaseManager() - profile_registry = ProfileRegistry() - service_registry = ServiceRegistry() - node_store = NodeStore() - supervisor = AgentSupervisor( - lease_manager=lease_manager, - service_registry=service_registry, - profile_registry=profile_registry, - node_store=node_store, - ) - restored = supervisor.restore_from_store() - if restored: - typer.echo(f"Restored {restored} known node(s) from previous session") - - monitor = GpuMonitor() - gpus = monitor.poll() - if not gpus: - typer.echo( - "Warning: no GPUs detected via nvidia-smi — coordinator running with 0 VRAM" - ) - else: - typer.echo(f"Detected {len(gpus)} GPU(s)") - - if profile: - active_profile = profile_registry.load(profile) - typer.echo(f"Using profile: {active_profile.name} (from {profile})") - else: - active_profile = ( - profile_registry.auto_detect(gpus) - if gpus - else profile_registry.list_public()[-1] - ) - typer.echo(f"Auto-selected profile: {active_profile.name}") - - # Pre-register the local agent — the heartbeat loop will poll it for live GPU data. - local_agent_url = f"http://127.0.0.1:{agent_port}" - supervisor.register(node_id, local_agent_url) - typer.echo(f"Registered local node '{node_id}' → {local_agent_url}") - - coordinator_app = create_coordinator_app( - lease_manager=lease_manager, - profile_registry=profile_registry, - agent_supervisor=supervisor, - service_registry=service_registry, - ) - - typer.echo(f"Starting cf-orch coordinator on {host}:{port}") - uvicorn.run(coordinator_app, host=host, port=port) - - -@app.command() -def agent( - coordinator: str = "http://localhost:7700", - node_id: str = "local", - host: str = "0.0.0.0", - port: int = 7701, - advertise_host: Optional[str] = None, - profile: Annotated[Optional[Path], typer.Option(help="Profile YAML path")] = None, -) -> None: - """Start a cf-orch node agent and self-register with the coordinator. - - The agent starts its HTTP server, then POSTs its URL to the coordinator - so it appears on the dashboard without manual configuration. - - Use --advertise-host to override the IP the coordinator should use to - reach this agent (e.g. on a multi-homed or NATted host). - """ - import threading - import httpx - from circuitforge_core.resources.agent.app import create_agent_app - from circuitforge_core.resources.agent.service_manager import ServiceManager - from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry - - # The URL the coordinator should use to reach this agent. - reach_host = advertise_host or ("127.0.0.1" if host in ("0.0.0.0", "::") else host) - agent_url = f"http://{reach_host}:{port}" - - _RECONNECT_INTERVAL_S = 30.0 - - def _reconnect_loop() -> None: - """ - Persistently re-register this agent with the coordinator. - - Runs as a daemon thread for the lifetime of the agent process: - - Waits 2 s on first run (uvicorn needs time to bind) - - Re-registers every 30 s thereafter - - If the coordinator is down, silently retries — no crashing - - When the coordinator restarts, the agent re-appears within one cycle - - This means coordinator restarts require no manual intervention on agent hosts. - """ - import time - first = True - while True: - time.sleep(2.0 if first else _RECONNECT_INTERVAL_S) - first = False - try: - resp = httpx.post( - f"{coordinator}/api/nodes", - json={"node_id": node_id, "agent_url": agent_url}, - timeout=5.0, - ) - if resp.is_success: - logger.debug("Registered with coordinator at %s as '%s'", coordinator, node_id) - else: - logger.warning( - "Coordinator registration returned %s", resp.status_code - ) - except Exception as exc: - logger.debug("Coordinator at %s unreachable, will retry: %s", coordinator, exc) - - # Fire reconnect loop in a daemon thread so uvicorn.run() can start blocking immediately. - threading.Thread(target=_reconnect_loop, daemon=True, name="cf-orch-reconnect").start() - typer.echo(f"Reconnect loop started — will register with {coordinator} every {int(_RECONNECT_INTERVAL_S)}s") - - service_manager = None - try: - from circuitforge_core.resources.agent.gpu_monitor import GpuMonitor - pr = ProfileRegistry() - gpus = GpuMonitor().poll() - p = pr.load(Path(profile)) if profile else pr.auto_detect(gpus) - service_manager = ServiceManager(node_id=node_id, profile=p, advertise_host=reach_host) - typer.echo(f"ServiceManager ready with profile: {p.name}") - except Exception as exc: - typer.echo(f"Warning: ServiceManager unavailable ({exc})", err=True) - - agent_app = create_agent_app(node_id=node_id, service_manager=service_manager) - typer.echo(f"Starting cf-orch agent [{node_id}] on {host}:{port}") - uvicorn.run(agent_app, host=host, port=port) - - -@app.command() -def status(coordinator: str = "http://localhost:7700") -> None: - """Show GPU and lease status from the coordinator.""" - import httpx - - try: - resp = httpx.get(f"{coordinator}/api/nodes", timeout=5.0) - resp.raise_for_status() - nodes = resp.json().get("nodes", []) - for node in nodes: - typer.echo(f"\nNode: {node['node_id']}") - for gpu in node.get("gpus", []): - typer.echo( - f" GPU {gpu['gpu_id']}: {gpu['name']} — " - f"{gpu['vram_used_mb']}/{gpu['vram_total_mb']} MB used" - ) - except Exception as exc: - typer.echo(f"Coordinator unreachable at {coordinator}: {exc}", err=True) - raise typer.Exit(1) - - -@app.command("install-service") -def install_service( - dry_run: bool = typer.Option( - False, "--dry-run", help="Print unit file without writing" - ), -) -> None: - """Write a systemd unit file for cf-orch (requires root).""" - python = sys.executable - unit_content = _SYSTEMD_UNIT_TEMPLATE.format(python=python) - if dry_run: - typer.echo(f"Would write to {_SYSTEMD_UNIT_PATH}:\n") - typer.echo(unit_content) - return - try: - _SYSTEMD_UNIT_PATH.write_text(unit_content) - typer.echo(f"Written: {_SYSTEMD_UNIT_PATH}") - typer.echo( - "Run: sudo systemctl daemon-reload && sudo systemctl enable --now cf-orch" - ) - except PermissionError: - typer.echo( - f"Permission denied writing to {_SYSTEMD_UNIT_PATH}. Run as root.", err=True - ) - raise typer.Exit(1) - - -if __name__ == "__main__": - app() diff --git a/circuitforge_core/resources/client.py b/circuitforge_core/resources/client.py deleted file mode 100644 index 94ff4fc..0000000 --- a/circuitforge_core/resources/client.py +++ /dev/null @@ -1,143 +0,0 @@ -from __future__ import annotations - -import logging -import os -from contextlib import contextmanager, asynccontextmanager -from dataclasses import dataclass - -import httpx - -logger = logging.getLogger(__name__) - - -@dataclass -class Allocation: - allocation_id: str - service: str - node_id: str - gpu_id: int - model: str | None - url: str - started: bool - warm: bool - - -class CFOrchClient: - """ - Client for cf-orch coordinator allocation. - - Sync usage (in LLMRouter or other sync code): - client = CFOrchClient(os.environ["CF_ORCH_URL"]) - with client.allocate("vllm", model_candidates=["Ouro-1.4B"]) as alloc: - # alloc.url is the inference endpoint - - Async usage (in FastAPI apps): - async with client.allocate_async("vllm", model_candidates=["Ouro-1.4B"]) as alloc: - ... - - Authentication: - Pass api_key explicitly, or set CF_LICENSE_KEY env var. When set, every - request carries Authorization: Bearer . Required for the hosted - CircuitForge coordinator (orch.circuitforge.tech); optional for local - self-hosted coordinators. - - Raises ValueError immediately if coordinator_url is empty. - """ - - def __init__(self, coordinator_url: str, api_key: str | None = None) -> None: - if not coordinator_url: - raise ValueError("coordinator_url is empty — cf-orch not configured") - self._url = coordinator_url.rstrip("/") - self._api_key = api_key or os.environ.get("CF_LICENSE_KEY", "") - - def _headers(self) -> dict[str, str]: - if self._api_key: - return {"Authorization": f"Bearer {self._api_key}"} - return {} - - def _build_body(self, model_candidates: list[str] | None, ttl_s: float, caller: str) -> dict: - return { - "model_candidates": model_candidates or [], - "ttl_s": ttl_s, - "caller": caller, - } - - def _parse_allocation(self, data: dict, service: str) -> Allocation: - return Allocation( - allocation_id=data["allocation_id"], - service=service, - node_id=data["node_id"], - gpu_id=data["gpu_id"], - model=data.get("model"), - url=data["url"], - started=data.get("started", False), - warm=data.get("warm", False), - ) - - @contextmanager - def allocate( - self, - service: str, - *, - model_candidates: list[str] | None = None, - ttl_s: float = 3600.0, - caller: str = "", - ): - """Sync context manager. Allocates on enter, releases on exit.""" - resp = httpx.post( - f"{self._url}/api/services/{service}/allocate", - json=self._build_body(model_candidates, ttl_s, caller), - headers=self._headers(), - timeout=120.0, - ) - if not resp.is_success: - raise RuntimeError( - f"cf-orch allocation failed for {service!r}: " - f"HTTP {resp.status_code} — {resp.text[:200]}" - ) - alloc = self._parse_allocation(resp.json(), service) - try: - yield alloc - finally: - try: - httpx.delete( - f"{self._url}/api/services/{service}/allocations/{alloc.allocation_id}", - headers=self._headers(), - timeout=10.0, - ) - except Exception as exc: - logger.debug("cf-orch release failed (non-fatal): %s", exc) - - @asynccontextmanager - async def allocate_async( - self, - service: str, - *, - model_candidates: list[str] | None = None, - ttl_s: float = 3600.0, - caller: str = "", - ): - """Async context manager. Allocates on enter, releases on exit.""" - async with httpx.AsyncClient(timeout=120.0) as client: - resp = await client.post( - f"{self._url}/api/services/{service}/allocate", - json=self._build_body(model_candidates, ttl_s, caller), - headers=self._headers(), - ) - if not resp.is_success: - raise RuntimeError( - f"cf-orch allocation failed for {service!r}: " - f"HTTP {resp.status_code} — {resp.text[:200]}" - ) - alloc = self._parse_allocation(resp.json(), service) - try: - yield alloc - finally: - try: - await client.delete( - f"{self._url}/api/services/{service}/allocations/{alloc.allocation_id}", - headers=self._headers(), - timeout=10.0, - ) - except Exception as exc: - logger.debug("cf-orch async release failed (non-fatal): %s", exc) diff --git a/circuitforge_core/resources/compose.yml b/circuitforge_core/resources/compose.yml deleted file mode 100644 index 2cb4345..0000000 --- a/circuitforge_core/resources/compose.yml +++ /dev/null @@ -1,44 +0,0 @@ -# circuitforge_core/resources/compose.yml -# One-command cf-orch deployment for Docker self-hosters: -# docker compose -f path/to/compose.yml up cf-orch-coordinator - -services: - cf-orch-coordinator: - image: python:3.12-slim - command: > - sh -c "pip install 'circuitforge-core[orch]' && - cf-orch start --host 0.0.0.0 --port 7700" - ports: - - "7700:7700" - volumes: - - /run/docker.sock:/var/run/docker.sock:ro - - cf-orch-data:/data - environment: - - CFORCH_PROFILE=${CFORCH_PROFILE:-} - restart: unless-stopped - devices: - - /dev/nvidia0:/dev/nvidia0 - - /dev/nvidiactl:/dev/nvidiactl - runtime: nvidia - - cf-orch-agent: - image: python:3.12-slim - command: > - sh -c "pip install 'circuitforge-core[orch]' && - cf-orch agent --coordinator http://cf-orch-coordinator:7700 - --node-id ${CFORCH_NODE_ID:-local} - --host 0.0.0.0 --port 7701" - ports: - - "7701:7701" - depends_on: - - cf-orch-coordinator - environment: - - CFORCH_NODE_ID=${CFORCH_NODE_ID:-local} - restart: unless-stopped - devices: - - /dev/nvidia0:/dev/nvidia0 - - /dev/nvidiactl:/dev/nvidiactl - runtime: nvidia - -volumes: - cf-orch-data: diff --git a/circuitforge_core/resources/coordinator/__init__.py b/circuitforge_core/resources/coordinator/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/circuitforge_core/resources/coordinator/agent_supervisor.py b/circuitforge_core/resources/coordinator/agent_supervisor.py deleted file mode 100644 index 503c8c5..0000000 --- a/circuitforge_core/resources/coordinator/agent_supervisor.py +++ /dev/null @@ -1,209 +0,0 @@ -from __future__ import annotations - -import asyncio -import logging -import time -from dataclasses import dataclass, field - -import httpx - -from circuitforge_core.resources.coordinator.lease_manager import LeaseManager -from circuitforge_core.resources.coordinator.node_store import NodeStore -from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry -from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry -from circuitforge_core.resources.models import GpuInfo, NodeInfo, ResidentAllocation - -logger = logging.getLogger(__name__) - -_HEARTBEAT_INTERVAL_S = 10.0 -_AGENT_TIMEOUT_S = 5.0 - - -@dataclass -class AgentRecord: - node_id: str - agent_url: str - last_seen: float = field(default_factory=time.time) - gpus: list[GpuInfo] = field(default_factory=list) - online: bool = False - - -class AgentSupervisor: - def __init__( - self, - lease_manager: LeaseManager, - service_registry: ServiceRegistry | None = None, - profile_registry: ProfileRegistry | None = None, - node_store: NodeStore | None = None, - ) -> None: - self._agents: dict[str, AgentRecord] = {} - self._lease_manager = lease_manager - self._running = False - self._service_registry = service_registry - self._profile_registry = profile_registry - self._node_store = node_store - self._heartbeat_tick = 0 - - def restore_from_store(self) -> int: - """ - Load previously-known nodes from NodeStore into the in-memory registry. - - All restored nodes start as offline=False. The heartbeat loop will poll - them on its first tick and promote any that respond to online=True. - - Returns the number of nodes restored. - """ - if self._node_store is None: - return 0 - restored = 0 - for node_id, agent_url in self._node_store.all(): - if node_id not in self._agents: - self._agents[node_id] = AgentRecord( - node_id=node_id, agent_url=agent_url, online=False - ) - restored += 1 - if restored: - logger.info("NodeStore: restored %d known node(s) from previous session", restored) - return restored - - def register(self, node_id: str, agent_url: str) -> None: - if node_id not in self._agents: - self._agents[node_id] = AgentRecord(node_id=node_id, agent_url=agent_url) - logger.info("Registered agent node: %s @ %s", node_id, agent_url) - else: - if self._agents[node_id].agent_url != agent_url: - self._agents[node_id].agent_url = agent_url - logger.info("Updated agent URL for %s → %s", node_id, agent_url) - if self._node_store is not None: - self._node_store.upsert(node_id, agent_url) - - def get_node_info(self, node_id: str) -> NodeInfo | None: - record = self._agents.get(node_id) - if record is None: - return None - return NodeInfo( - node_id=record.node_id, - agent_url=record.agent_url, - gpus=record.gpus, - last_heartbeat=record.last_seen, - ) - - def all_nodes(self) -> list[NodeInfo]: - return [ - NodeInfo( - node_id=r.node_id, - agent_url=r.agent_url, - gpus=r.gpus, - last_heartbeat=r.last_seen, - ) - for r in self._agents.values() - ] - - def online_agents(self) -> "dict[str, AgentRecord]": - """Return only currently-online agents, keyed by node_id.""" - return {nid: rec for nid, rec in self._agents.items() if rec.online} - - async def poll_agent(self, node_id: str) -> bool: - record = self._agents.get(node_id) - if record is None: - return False - try: - async with httpx.AsyncClient(timeout=_AGENT_TIMEOUT_S) as client: - gpu_resp = await client.get(f"{record.agent_url}/gpu-info") - gpu_resp.raise_for_status() - - # Resident-info is best-effort — older agents may not have the endpoint. - try: - res_resp = await client.get(f"{record.agent_url}/resident-info") - resident_data = res_resp.json() if res_resp.is_success else {} - except Exception: - resident_data = {} - - data = gpu_resp.json() - gpus = [ - GpuInfo( - gpu_id=g["gpu_id"], - name=g["name"], - vram_total_mb=g["vram_total_mb"], - vram_used_mb=g["vram_used_mb"], - vram_free_mb=g["vram_free_mb"], - ) - for g in data.get("gpus", []) - ] - record.gpus = gpus - record.last_seen = time.time() - record.online = True - for gpu in gpus: - self._lease_manager.register_gpu(node_id, gpu.gpu_id, gpu.vram_total_mb) - - residents = [ - (r["service"], r.get("model_name")) - for r in resident_data.get("residents", []) - ] - self._lease_manager.set_residents_for_node(node_id, residents) - - return True - except Exception as exc: - logger.warning("Agent %s unreachable: %s", node_id, exc) - record.online = False - return False - - async def poll_all(self) -> None: - await asyncio.gather(*[self.poll_agent(nid) for nid in self._agents]) - - def _build_idle_stop_config(self) -> dict[str, int]: - if self._profile_registry is None: - return {} - config: dict[str, int] = {} - for profile in self._profile_registry.list_public(): - for svc_name, svc in profile.services.items(): - if svc.idle_stop_after_s > 0: - existing = config.get(svc_name, 0) - config[svc_name] = min(existing, svc.idle_stop_after_s) if existing > 0 else svc.idle_stop_after_s - return config - - async def _http_post(self, url: str) -> bool: - try: - async with httpx.AsyncClient(timeout=10.0) as client: - resp = await client.post(url) - return resp.is_success - except Exception as exc: - logger.warning("HTTP POST %s failed: %s", url, exc) - return False - - async def _run_idle_sweep(self) -> None: - if self._service_registry is None: - return - expired = self._service_registry.sweep_expired_allocations() - if expired: - logger.info("TTL sweep: expired %d allocation(s): %s", len(expired), expired) - idle_stop_config = self._build_idle_stop_config() - if not idle_stop_config: - return - timed_out = self._service_registry.idle_past_timeout(idle_stop_config) - for instance in timed_out: - node_info = self.get_node_info(instance.node_id) - if node_info is None: - continue - stop_url = f"{node_info.agent_url}/services/{instance.service}/stop" - logger.info( - "Idle sweep: stopping %s on %s gpu%s (idle timeout)", - instance.service, instance.node_id, instance.gpu_id, - ) - success = await self._http_post(stop_url) - if success: - self._service_registry.mark_stopped( - instance.service, instance.node_id, instance.gpu_id - ) - - async def run_heartbeat_loop(self) -> None: - self._running = True - while self._running: - await self.poll_all() - self._heartbeat_tick += 1 - if self._heartbeat_tick % 3 == 0: - await self._run_idle_sweep() - await asyncio.sleep(_HEARTBEAT_INTERVAL_S) - - def stop(self) -> None: - self._running = False diff --git a/circuitforge_core/resources/coordinator/app.py b/circuitforge_core/resources/coordinator/app.py deleted file mode 100644 index 5d0dac6..0000000 --- a/circuitforge_core/resources/coordinator/app.py +++ /dev/null @@ -1,509 +0,0 @@ -from __future__ import annotations - -import logging -import time -import urllib.request -from contextlib import asynccontextmanager -from pathlib import Path -from typing import Any - -logger = logging.getLogger(__name__) - -from fastapi import FastAPI, HTTPException -from fastapi.responses import HTMLResponse -from pydantic import BaseModel - -from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor -from circuitforge_core.resources.coordinator.eviction_engine import EvictionEngine -from circuitforge_core.resources.coordinator.lease_manager import LeaseManager -from circuitforge_core.resources.coordinator.node_selector import select_node -from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry -from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry -from circuitforge_core.resources.profiles.schema import ProcessSpec - -_DASHBOARD_HTML = (Path(__file__).parent / "dashboard.html").read_text() - - -def _get_health_path(profile_registry: ProfileRegistry, service: str) -> str: - """Return the health_path for a service from the first matching profile spec.""" - for profile in profile_registry.list_public(): - svc = profile.services.get(service) - if svc and isinstance(svc.managed, ProcessSpec): - return svc.managed.health_path - return "/health" - -_PROBE_INTERVAL_S = 5.0 # how often to poll starting instances -_PROBE_TIMEOUT_S = 300.0 # give up and mark stopped after this many seconds - - -async def _run_instance_probe_loop(service_registry: ServiceRegistry) -> None: - """ - Background loop: transition 'starting' instances to 'running' once their - /health endpoint responds, or to 'stopped' after PROBE_TIMEOUT_S. - """ - import asyncio - - start_times: dict[str, float] = {} # instance key → time first seen as starting - - while True: - await asyncio.sleep(_PROBE_INTERVAL_S) - now = time.time() - for inst in service_registry.all_instances(): - if inst.state != "starting": - start_times.pop(f"{inst.service}:{inst.node_id}:{inst.gpu_id}", None) - continue - key = f"{inst.service}:{inst.node_id}:{inst.gpu_id}" - start_times.setdefault(key, now) - - healthy = False - if inst.url: - try: - with urllib.request.urlopen( - inst.url.rstrip("/") + inst.health_path, timeout=2.0 - ) as resp: - healthy = resp.status == 200 - except Exception: - pass - - if healthy: - service_registry.upsert_instance( - service=inst.service, node_id=inst.node_id, gpu_id=inst.gpu_id, - state="running", model=inst.model, url=inst.url, - ) - start_times.pop(key, None) - logger.info("Instance %s/%s gpu=%s transitioned to running", inst.service, inst.node_id, inst.gpu_id) - elif now - start_times[key] > _PROBE_TIMEOUT_S: - service_registry.upsert_instance( - service=inst.service, node_id=inst.node_id, gpu_id=inst.gpu_id, - state="stopped", model=inst.model, url=inst.url, - ) - start_times.pop(key, None) - logger.warning("Instance %s/%s gpu=%s timed out in starting state — marked stopped", inst.service, inst.node_id, inst.gpu_id) - - -class LeaseRequest(BaseModel): - node_id: str - gpu_id: int - mb: int - service: str - priority: int = 2 - ttl_s: float = 0.0 - - -class NodeRegisterRequest(BaseModel): - node_id: str - agent_url: str # e.g. "http://10.1.10.71:7701" - - -class ServiceEnsureRequest(BaseModel): - node_id: str - gpu_id: int = 0 - params: dict[str, str] = {} - ttl_s: float = 3600.0 - # Ordered list of model names to try; falls back down the list if VRAM is tight. - # The "model" key in params is used if this list is empty. - model_candidates: list[str] = [] - - -class ServiceAllocateRequest(BaseModel): - model_candidates: list[str] = [] - gpu_id: int | None = None - params: dict[str, str] = {} - ttl_s: float = 3600.0 - caller: str = "" - - -def create_coordinator_app( - lease_manager: LeaseManager, - profile_registry: ProfileRegistry, - agent_supervisor: AgentSupervisor, - service_registry: ServiceRegistry, -) -> FastAPI: - eviction_engine = EvictionEngine(lease_manager=lease_manager) - - @asynccontextmanager - async def _lifespan(app: FastAPI): # type: ignore[type-arg] - import asyncio - heartbeat_task = asyncio.create_task(agent_supervisor.run_heartbeat_loop()) - probe_task = asyncio.create_task(_run_instance_probe_loop(service_registry)) - yield - agent_supervisor.stop() - heartbeat_task.cancel() - probe_task.cancel() - - app = FastAPI(title="cf-orch-coordinator", lifespan=_lifespan) - - # Optional Heimdall auth — enabled when HEIMDALL_URL env var is set. - # Self-hosted coordinators skip this entirely; the CF-hosted public endpoint - # (orch.circuitforge.tech) sets HEIMDALL_URL to gate paid+ access. - from circuitforge_core.resources.coordinator.auth import HeimdallAuthMiddleware - _auth = HeimdallAuthMiddleware.from_env() - if _auth is not None: - app.middleware("http")(_auth) - - @app.get("/", response_class=HTMLResponse, include_in_schema=False) - def dashboard() -> HTMLResponse: - return HTMLResponse(content=_DASHBOARD_HTML) - - @app.get("/api/health") - def health() -> dict[str, Any]: - return {"status": "ok"} - - @app.get("/api/nodes") - def get_nodes() -> dict[str, Any]: - nodes = agent_supervisor.all_nodes() - return { - "nodes": [ - { - "node_id": n.node_id, - "agent_url": n.agent_url, - "last_heartbeat": n.last_heartbeat, - "gpus": [ - { - "gpu_id": g.gpu_id, - "name": g.name, - "vram_total_mb": g.vram_total_mb, - "vram_used_mb": g.vram_used_mb, - "vram_free_mb": g.vram_free_mb, - } - for g in n.gpus - ], - } - for n in nodes - ] - } - - @app.post("/api/nodes") - async def register_node(req: NodeRegisterRequest) -> dict[str, Any]: - """Agents call this to self-register. Coordinator immediately polls for GPU info.""" - agent_supervisor.register(req.node_id, req.agent_url) - await agent_supervisor.poll_agent(req.node_id) - return {"registered": True, "node_id": req.node_id} - - @app.get("/api/profiles") - def get_profiles() -> dict[str, Any]: - return { - "profiles": [ - {"name": p.name, "vram_total_mb": p.vram_total_mb} - for p in profile_registry.list_public() - ] - } - - @app.get("/api/resident") - def get_residents() -> dict[str, Any]: - return { - "residents": [ - { - "service": r.service, - "node_id": r.node_id, - "model_name": r.model_name, - "first_seen": r.first_seen, - } - for r in lease_manager.all_residents() - ] - } - - @app.get("/api/leases") - def get_leases() -> dict[str, Any]: - return { - "leases": [ - { - "lease_id": lease.lease_id, - "node_id": lease.node_id, - "gpu_id": lease.gpu_id, - "mb_granted": lease.mb_granted, - "holder_service": lease.holder_service, - "priority": lease.priority, - "expires_at": lease.expires_at, - } - for lease in lease_manager.all_leases() - ] - } - - @app.post("/api/leases") - async def request_lease(req: LeaseRequest) -> dict[str, Any]: - node_info = agent_supervisor.get_node_info(req.node_id) - if node_info is None: - raise HTTPException( - status_code=422, - detail=f"Unknown node_id {req.node_id!r} — node not registered", - ) - agent_url = node_info.agent_url - - lease = await eviction_engine.request_lease( - node_id=req.node_id, - gpu_id=req.gpu_id, - mb=req.mb, - service=req.service, - priority=req.priority, - agent_url=agent_url, - ttl_s=req.ttl_s, - ) - if lease is None: - raise HTTPException( - status_code=503, - detail="Insufficient VRAM — no eviction candidates available", - ) - return { - "lease": { - "lease_id": lease.lease_id, - "node_id": lease.node_id, - "gpu_id": lease.gpu_id, - "mb_granted": lease.mb_granted, - "holder_service": lease.holder_service, - "priority": lease.priority, - "expires_at": lease.expires_at, - } - } - - @app.delete("/api/leases/{lease_id}") - async def release_lease(lease_id: str) -> dict[str, Any]: - released = await lease_manager.release(lease_id) - if not released: - raise HTTPException(status_code=404, detail=f"Lease {lease_id!r} not found") - return {"released": True, "lease_id": lease_id} - - @app.post("/api/services/{service}/ensure") - async def ensure_service(service: str, req: ServiceEnsureRequest) -> dict[str, Any]: - """ - Ensure a managed service is running on the given node. - - If model_candidates is provided, tries each model in order, skipping any - that exceed the live free VRAM on the target GPU. Falls back down the list - until one succeeds. The selected model is returned in the response. - """ - import httpx - - node_info = agent_supervisor.get_node_info(req.node_id) - if node_info is None: - raise HTTPException(422, detail=f"Unknown node_id {req.node_id!r}") - - # Resolve candidate list — fall back to params["model"] if not specified. - candidates: list[str] = req.model_candidates or ( - [req.params["model"]] if "model" in req.params else [] - ) - if not candidates: - raise HTTPException(422, detail="No model specified: set params.model or model_candidates") - - # Live free VRAM on the target GPU (used for pre-flight filtering). - gpu = next((g for g in node_info.gpus if g.gpu_id == req.gpu_id), None) - free_mb = gpu.vram_free_mb if gpu else 0 - - # Profile max_mb for the service gives us the VRAM ceiling for this slot. - # Models larger than free_mb are skipped before we even try to start them. - # We use model file size as a rough proxy — skip if free_mb < half of max_mb, - # since a fully-loaded model typically needs ~50-80% of its param size in VRAM. - service_max_mb = 0 - for p in profile_registry.list_public(): - svc = p.services.get(service) - if svc: - service_max_mb = svc.max_mb - break - - # Filter candidates by VRAM headroom — require free VRAM >= service ceiling - # so the model can actually load without competing for VRAM with other processes. - if service_max_mb > 0 and free_mb < service_max_mb: - raise HTTPException( - 503, - detail=f"Insufficient VRAM on gpu {req.gpu_id}: {free_mb}MB free, need {service_max_mb}MB", - ) - - last_error: str = "" - async with httpx.AsyncClient(timeout=120.0) as client: - for model in candidates: - params_with_model = {**req.params, "model": model} - try: - start_resp = await client.post( - f"{node_info.agent_url}/services/{service}/start", - json={"gpu_id": req.gpu_id, "params": params_with_model}, - ) - if start_resp.is_success: - data = start_resp.json() - return { - "service": service, - "node_id": req.node_id, - "gpu_id": req.gpu_id, - "model": model, - "url": data.get("url"), - "running": data.get("running", False), - } - last_error = start_resp.text - except httpx.HTTPError as exc: - raise HTTPException(502, detail=f"Agent unreachable: {exc}") - - raise HTTPException( - 503, - detail=f"All model candidates exhausted for {service!r}. Last error: {last_error}", - ) - - @app.post("/api/services/{service}/allocate") - async def allocate_service(service: str, req: ServiceAllocateRequest) -> dict[str, Any]: - """ - Allocate a managed service — coordinator picks the best node automatically. - Returns a URL + allocation_id. (Allocation not tracked server-side until Phase 2.) - """ - import httpx - - if not req.model_candidates: - raise HTTPException(422, detail="model_candidates must be non-empty") - - # Validate service is known in at least one profile, regardless of gpu_id - if not any(service in p.services for p in profile_registry.list_public()): - raise HTTPException(422, detail=f"Unknown service {service!r} — not in any profile") - - residents = lease_manager.resident_keys() - - if req.gpu_id is None: - online = agent_supervisor.online_agents() - placement = select_node(online, service, profile_registry, residents) - if placement is None: - raise HTTPException( - 503, - detail=f"No online node has capacity for service {service!r}", - ) - node_id, gpu_id = placement - else: - online = agent_supervisor.online_agents() - node_id = next( - (nid for nid, rec in online.items() - if any(g.gpu_id == req.gpu_id for g in rec.gpus)), - None, - ) - if node_id is None: - raise HTTPException(422, detail=f"No online node has gpu_id={req.gpu_id}") - gpu_id = req.gpu_id - - node_info = agent_supervisor.get_node_info(node_id) - if node_info is None: - raise HTTPException(422, detail=f"Node {node_id!r} not found") - - warm = f"{node_id}:{service}" in residents - - async with httpx.AsyncClient(timeout=120.0) as client: - last_error = "" - for model in req.model_candidates: - try: - resp = await client.post( - f"{node_info.agent_url}/services/{service}/start", - json={"gpu_id": gpu_id, "params": {**req.params, "model": model}}, - ) - if resp.is_success: - data = resp.json() - svc_url = data.get("url", "") - alloc = service_registry.allocate( - service=service, - node_id=node_id, - gpu_id=gpu_id, - model=model, - caller=req.caller, - url=svc_url, - ttl_s=req.ttl_s, - ) - # Seed the instance state for first-time starts. - # adopted=True means the agent found it already running. - adopted = data.get("adopted", False) - instance_state = "running" if (warm or adopted) else "starting" - health_path = _get_health_path(profile_registry, service) - service_registry.upsert_instance( - service=service, - node_id=node_id, - gpu_id=gpu_id, - state=instance_state, - model=model, - url=svc_url, - health_path=health_path, - ) - return { - "allocation_id": alloc.allocation_id, - "service": service, - "node_id": node_id, - "gpu_id": gpu_id, - "model": model, - "url": data.get("url"), - "started": not warm, - "warm": warm, - } - last_error = resp.text - except httpx.HTTPError as exc: - raise HTTPException(502, detail=f"Agent unreachable: {exc}") - - raise HTTPException( - 503, - detail=f"All model candidates exhausted for {service!r}. Last error: {last_error}", - ) - - @app.delete("/api/services/{service}/allocations/{allocation_id}") - async def release_allocation(service: str, allocation_id: str) -> dict[str, Any]: - existing = service_registry.get_allocation(allocation_id) - if existing is None or existing.service != service: - raise HTTPException(404, detail=f"Allocation {allocation_id!r} not found for service {service!r}") - released = service_registry.release(allocation_id) - if not released: - raise HTTPException(404, detail=f"Allocation {allocation_id!r} not found") - return {"released": True, "allocation_id": allocation_id} - - @app.get("/api/services/{service}/status") - def get_service_status(service: str) -> dict[str, Any]: - instances = [i for i in service_registry.all_instances() if i.service == service] - allocations = [a for a in service_registry.all_allocations() if a.service == service] - return { - "service": service, - "instances": [ - { - "node_id": i.node_id, - "gpu_id": i.gpu_id, - "state": i.state, - "model": i.model, - "url": i.url, - "idle_since": i.idle_since, - } - for i in instances - ], - "allocations": [ - { - "allocation_id": a.allocation_id, - "node_id": a.node_id, - "gpu_id": a.gpu_id, - "model": a.model, - "caller": a.caller, - "url": a.url, - "expires_at": a.expires_at, - } - for a in allocations - ], - } - - @app.get("/api/services") - def list_services() -> dict[str, Any]: - instances = service_registry.all_instances() - return { - "services": [ - { - "service": i.service, - "node_id": i.node_id, - "gpu_id": i.gpu_id, - "state": i.state, - "model": i.model, - "url": i.url, - } - for i in instances - ] - } - - @app.delete("/api/services/{service}") - async def stop_service(service: str, node_id: str) -> dict[str, Any]: - """Stop a managed service on the given node.""" - node_info = agent_supervisor.get_node_info(node_id) - if node_info is None: - raise HTTPException(422, detail=f"Unknown node_id {node_id!r}") - - import httpx - async with httpx.AsyncClient(timeout=30.0) as client: - try: - resp = await client.post(f"{node_info.agent_url}/services/{service}/stop") - resp.raise_for_status() - return {"service": service, "node_id": node_id, "stopped": resp.json().get("stopped", False)} - except httpx.HTTPError as exc: - raise HTTPException(502, detail=f"Agent unreachable: {exc}") - - return app diff --git a/circuitforge_core/resources/coordinator/auth.py b/circuitforge_core/resources/coordinator/auth.py deleted file mode 100644 index 51675f6..0000000 --- a/circuitforge_core/resources/coordinator/auth.py +++ /dev/null @@ -1,197 +0,0 @@ -""" -cf-orch coordinator auth middleware. - -When HEIMDALL_URL is set, all /api/* requests (except /api/health) must carry: - Authorization: Bearer - -The key is validated against Heimdall and the result cached for -CACHE_TTL_S seconds (default 300 / 5 min). This keeps Heimdall out of the -per-allocation hot path while keeping revocation latency bounded. - -When HEIMDALL_URL is not set, auth is disabled — self-hosted deployments work -with no configuration change. - -Environment variables ---------------------- -HEIMDALL_URL Heimdall base URL, e.g. https://license.circuitforge.tech - When absent, auth is skipped entirely. -HEIMDALL_MIN_TIER Minimum tier required (default: "paid"). - Accepted values: free, paid, premium, ultra. -CF_ORCH_AUTH_SECRET Shared secret sent to Heimdall so it can distinguish - coordinator service calls from end-user requests. - Must match the COORDINATOR_SECRET env var on Heimdall. -""" -from __future__ import annotations - -import logging -import os -import time -from dataclasses import dataclass, field -from threading import Lock - -import httpx -from fastapi import Request -from fastapi.responses import JSONResponse - -logger = logging.getLogger(__name__) - -# Unauthenticated paths — health check must always be accessible for monitoring. -_EXEMPT_PATHS: frozenset[str] = frozenset({"/api/health", "/", "/openapi.json", "/docs", "/redoc"}) - -_TIER_ORDER: dict[str, int] = {"free": 0, "paid": 1, "premium": 2, "ultra": 3} - -CACHE_TTL_S: float = 300.0 # 5 minutes — matches Kiwi cloud session TTL - - -@dataclass -class _CacheEntry: - valid: bool - tier: str - user_id: str - expires_at: float - - -class _ValidationCache: - """Thread-safe TTL cache for Heimdall validation results.""" - - def __init__(self, ttl_s: float = CACHE_TTL_S) -> None: - self._ttl = ttl_s - self._store: dict[str, _CacheEntry] = {} - self._lock = Lock() - - def get(self, key: str) -> _CacheEntry | None: - with self._lock: - entry = self._store.get(key) - if entry is None or time.monotonic() > entry.expires_at: - return None - return entry - - def set(self, key: str, valid: bool, tier: str, user_id: str) -> None: - with self._lock: - self._store[key] = _CacheEntry( - valid=valid, - tier=tier, - user_id=user_id, - expires_at=time.monotonic() + self._ttl, - ) - - def evict(self, key: str) -> None: - with self._lock: - self._store.pop(key, None) - - def prune(self) -> int: - """Remove expired entries. Returns count removed.""" - now = time.monotonic() - with self._lock: - expired = [k for k, e in self._store.items() if now > e.expires_at] - for k in expired: - del self._store[k] - return len(expired) - - -class HeimdallAuthMiddleware: - """ - ASGI middleware that validates CF license keys against Heimdall. - - Attach to a FastAPI app via app.middleware("http"): - - middleware = HeimdallAuthMiddleware.from_env() - if middleware: - app.middleware("http")(middleware) - """ - - def __init__( - self, - heimdall_url: str, - min_tier: str = "paid", - auth_secret: str = "", - cache_ttl_s: float = CACHE_TTL_S, - ) -> None: - self._heimdall = heimdall_url.rstrip("/") - self._min_tier_rank = _TIER_ORDER.get(min_tier, 1) - self._min_tier = min_tier - self._auth_secret = auth_secret - self._cache = _ValidationCache(ttl_s=cache_ttl_s) - logger.info( - "[cf-orch auth] Heimdall auth enabled — url=%s min_tier=%s ttl=%ss", - self._heimdall, min_tier, cache_ttl_s, - ) - - @classmethod - def from_env(cls) -> "HeimdallAuthMiddleware | None": - """Return a configured middleware instance, or None if HEIMDALL_URL is not set.""" - url = os.environ.get("HEIMDALL_URL", "") - if not url: - logger.info("[cf-orch auth] HEIMDALL_URL not set — auth disabled (self-hosted mode)") - return None - return cls( - heimdall_url=url, - min_tier=os.environ.get("HEIMDALL_MIN_TIER", "paid"), - auth_secret=os.environ.get("CF_ORCH_AUTH_SECRET", ""), - ) - - def _validate_against_heimdall(self, license_key: str) -> tuple[bool, str, str]: - """ - Call Heimdall's /licenses/verify endpoint. - - Returns (valid, tier, user_id). - On any network or parse error, returns (False, "", "") — fail closed. - """ - try: - headers: dict[str, str] = {"Content-Type": "application/json"} - if self._auth_secret: - headers["X-Coordinator-Secret"] = self._auth_secret - resp = httpx.post( - f"{self._heimdall}/licenses/verify", - json={"key": license_key, "min_tier": self._min_tier}, - headers=headers, - timeout=5.0, - ) - if resp.status_code == 200: - data = resp.json() - return data.get("valid", False), data.get("tier", ""), data.get("user_id", "") - # 401/403 from Heimdall = key invalid/insufficient tier - logger.debug("[cf-orch auth] Heimdall returned %s for key ...%s", resp.status_code, license_key[-6:]) - return False, "", "" - except Exception as exc: - logger.warning("[cf-orch auth] Heimdall unreachable — failing closed: %s", exc) - return False, "", "" - - def _check_key(self, license_key: str) -> tuple[bool, str]: - """ - Validate key (cache-first). Returns (authorized, reason_if_denied). - """ - cached = self._cache.get(license_key) - if cached is not None: - if not cached.valid: - return False, "license key invalid or expired" - if _TIER_ORDER.get(cached.tier, -1) < self._min_tier_rank: - return False, f"feature requires {self._min_tier} tier (have: {cached.tier})" - return True, "" - - valid, tier, user_id = self._validate_against_heimdall(license_key) - self._cache.set(license_key, valid=valid, tier=tier, user_id=user_id) - - if not valid: - return False, "license key invalid or expired" - if _TIER_ORDER.get(tier, -1) < self._min_tier_rank: - return False, f"feature requires {self._min_tier} tier (have: {tier})" - return True, "" - - async def __call__(self, request: Request, call_next): # type: ignore[no-untyped-def] - if request.url.path in _EXEMPT_PATHS: - return await call_next(request) - - auth_header = request.headers.get("Authorization", "") - if not auth_header.startswith("Bearer "): - return JSONResponse( - status_code=401, - content={"detail": "Authorization: Bearer required"}, - ) - - license_key = auth_header.removeprefix("Bearer ").strip() - authorized, reason = self._check_key(license_key) - if not authorized: - return JSONResponse(status_code=403, content={"detail": reason}) - - return await call_next(request) diff --git a/circuitforge_core/resources/coordinator/dashboard.html b/circuitforge_core/resources/coordinator/dashboard.html deleted file mode 100644 index a657111..0000000 --- a/circuitforge_core/resources/coordinator/dashboard.html +++ /dev/null @@ -1,473 +0,0 @@ - - - - - -cf-orch · dashboard - - - - -
- - coordinator -
auto-refresh 5s
-
- -
- - -
- - -
- -
- - - - - - - - -
ServiceNodeGPUStateModelURL
-
- - - - - - - - - -
ServiceNode / GPUVRAMPriorityTTL / Expires
- - - - - - - - - -
ServiceNodeModelWarm Since
- - - - - - diff --git a/circuitforge_core/resources/coordinator/eviction_engine.py b/circuitforge_core/resources/coordinator/eviction_engine.py deleted file mode 100644 index db85774..0000000 --- a/circuitforge_core/resources/coordinator/eviction_engine.py +++ /dev/null @@ -1,81 +0,0 @@ -from __future__ import annotations - -import asyncio -import logging - -from circuitforge_core.resources.coordinator.lease_manager import LeaseManager -from circuitforge_core.resources.models import VRAMLease - -logger = logging.getLogger(__name__) - -_DEFAULT_EVICTION_TIMEOUT_S = 10.0 - - -class EvictionEngine: - def __init__( - self, - lease_manager: LeaseManager, - eviction_timeout_s: float = _DEFAULT_EVICTION_TIMEOUT_S, - ) -> None: - self.lease_manager = lease_manager - self._timeout = eviction_timeout_s - - async def request_lease( - self, - node_id: str, - gpu_id: int, - mb: int, - service: str, - priority: int, - agent_url: str, - ttl_s: float = 0.0, - ) -> VRAMLease | None: - # Fast path: enough free VRAM - lease = await self.lease_manager.try_grant( - node_id, gpu_id, mb, service, priority, ttl_s - ) - if lease is not None: - return lease - - # Find eviction candidates - candidates = self.lease_manager.get_eviction_candidates( - node_id=node_id, gpu_id=gpu_id, - needed_mb=mb, requester_priority=priority, - ) - if not candidates: - logger.info( - "No eviction candidates for %s on %s:GPU%d (%dMB needed)", - service, node_id, gpu_id, mb, - ) - return None - - # Evict candidates - freed_mb = sum(c.mb_granted for c in candidates) - logger.info( - "Evicting %d lease(s) to free %dMB for %s", - len(candidates), freed_mb, service, - ) - for candidate in candidates: - await self._evict_lease(candidate, agent_url) - - # Wait for evictions to free up VRAM (poll with timeout) - loop = asyncio.get_running_loop() - deadline = loop.time() + self._timeout - while loop.time() < deadline: - lease = await self.lease_manager.try_grant( - node_id, gpu_id, mb, service, priority, ttl_s - ) - if lease is not None: - return lease - await asyncio.sleep(0.1) - - logger.warning("Eviction timed out for %s after %.1fs", service, self._timeout) - return None - - async def _evict_lease(self, lease: VRAMLease, agent_url: str) -> None: - """Release lease accounting. Process-level eviction deferred to Plan B.""" - await self.lease_manager.release(lease.lease_id) - - async def _call_agent_evict(self, agent_url: str, lease: VRAMLease) -> bool: - """POST /evict to the agent. Stub for v1 — real process lookup in Plan B.""" - return True diff --git a/circuitforge_core/resources/coordinator/lease_manager.py b/circuitforge_core/resources/coordinator/lease_manager.py deleted file mode 100644 index 80c7c65..0000000 --- a/circuitforge_core/resources/coordinator/lease_manager.py +++ /dev/null @@ -1,130 +0,0 @@ -from __future__ import annotations - -import asyncio -from collections import defaultdict - -from circuitforge_core.resources.models import ResidentAllocation, VRAMLease - - -class LeaseManager: - def __init__(self) -> None: - self._leases: dict[str, VRAMLease] = {} - self._gpu_total: dict[tuple[str, int], int] = {} - self._gpu_used: dict[tuple[str, int], int] = defaultdict(int) - self._lock = asyncio.Lock() - # Resident allocations — keyed "node_id:service", updated by heartbeat. - # No lock needed: only the single heartbeat task writes this dict. - self._residents: dict[str, ResidentAllocation] = {} - - def register_gpu(self, node_id: str, gpu_id: int, total_mb: int) -> None: - self._gpu_total[(node_id, gpu_id)] = total_mb - - def gpu_total_mb(self, node_id: str, gpu_id: int) -> int: - return self._gpu_total.get((node_id, gpu_id), 0) - - def used_mb(self, node_id: str, gpu_id: int) -> int: - return self._gpu_used[(node_id, gpu_id)] - - async def try_grant( - self, - node_id: str, - gpu_id: int, - mb: int, - service: str, - priority: int, - ttl_s: float = 0.0, - ) -> VRAMLease | None: - async with self._lock: - total = self._gpu_total.get((node_id, gpu_id), 0) - used = self._gpu_used[(node_id, gpu_id)] - if total - used < mb: - return None - lease = VRAMLease.create( - gpu_id=gpu_id, node_id=node_id, mb=mb, - service=service, priority=priority, ttl_s=ttl_s, - ) - self._leases[lease.lease_id] = lease - self._gpu_used[(node_id, gpu_id)] += mb - return lease - - async def release(self, lease_id: str) -> bool: - async with self._lock: - lease = self._leases.pop(lease_id, None) - if lease is None: - return False - self._gpu_used[(lease.node_id, lease.gpu_id)] -= lease.mb_granted - return True - - def get_eviction_candidates( - self, - node_id: str, - gpu_id: int, - needed_mb: int, - requester_priority: int, - ) -> list[VRAMLease]: - candidates = [ - lease for lease in self._leases.values() - if lease.node_id == node_id - and lease.gpu_id == gpu_id - and lease.priority > requester_priority - ] - candidates.sort(key=lambda lease: lease.priority, reverse=True) - selected: list[VRAMLease] = [] - freed = 0 - for candidate in candidates: - selected.append(candidate) - freed += candidate.mb_granted - if freed >= needed_mb: - break - return selected - - def list_leases( - self, node_id: str | None = None, gpu_id: int | None = None - ) -> list[VRAMLease]: - return [ - lease for lease in self._leases.values() - if (node_id is None or lease.node_id == node_id) - and (gpu_id is None or lease.gpu_id == gpu_id) - ] - - def all_leases(self) -> list[VRAMLease]: - return list(self._leases.values()) - - # ── resident tracking ──────────────────────────────────────────── - - def set_residents_for_node( - self, - node_id: str, - residents: list[tuple[str, str | None]], # (service, model_name) - ) -> None: - """ - Replace the resident snapshot for a node. - - Preserves first_seen for entries whose service+model_name are unchanged, - so the dashboard can show how long a model has been warm. - """ - new_keys = {f"{node_id}:{service}" for service, _ in residents} - - # Remove stale entries (service no longer running on this node). - for key in list(self._residents): - if key.startswith(f"{node_id}:") and key not in new_keys: - del self._residents[key] - - # Upsert: preserve first_seen when model is unchanged, reset otherwise. - for service, model_name in residents: - key = f"{node_id}:{service}" - existing = self._residents.get(key) - if existing is not None and existing.model_name == model_name: - continue # same model still loaded — keep original first_seen - self._residents[key] = ResidentAllocation( - service=service, - node_id=node_id, - model_name=model_name, - ) - - def all_residents(self) -> list[ResidentAllocation]: - return list(self._residents.values()) - - def resident_keys(self) -> set[str]: - """Return set of 'node_id:service' strings for currently-warm services.""" - return set(self._residents.keys()) diff --git a/circuitforge_core/resources/coordinator/node_selector.py b/circuitforge_core/resources/coordinator/node_selector.py deleted file mode 100644 index 52ab224..0000000 --- a/circuitforge_core/resources/coordinator/node_selector.py +++ /dev/null @@ -1,74 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from circuitforge_core.resources.coordinator.agent_supervisor import AgentRecord - from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry - -_WARM_BONUS_MB = 1000 - - -@dataclass(frozen=True) -class _Scored: - node_id: str - gpu_id: int - vram_free_mb: int - effective_free_mb: int - can_fit: bool - warm: bool - - -def select_node( - agents: "dict[str, AgentRecord]", - service: str, - profile_registry: "ProfileRegistry", - resident_keys: set[str], -) -> tuple[str, int] | None: - """ - Pick the best (node_id, gpu_id) for the requested service. - Warm nodes (service already running) get priority, then sorted by free VRAM. - Returns None if no suitable node exists. - """ - service_max_mb = _find_service_max_mb(service, profile_registry) - if service_max_mb is None: - return None # service not in any profile - - candidates: list[_Scored] = [] - for node_id, record in agents.items(): - if not record.online: - continue - for gpu in record.gpus: - warm = f"{node_id}:{service}" in resident_keys - effective = gpu.vram_free_mb + (_WARM_BONUS_MB if warm else 0) - can_fit = gpu.vram_free_mb >= service_max_mb - candidates.append(_Scored( - node_id=node_id, - gpu_id=gpu.gpu_id, - vram_free_mb=gpu.vram_free_mb, - effective_free_mb=effective, - can_fit=can_fit, - warm=warm, - )) - if not candidates: - return None - # Prefer: (1) warm nodes (model already resident — no cold start) - # (2) cold nodes that can fit the service (free >= half of max_mb) - # Fallback: best-effort node when nothing fits and nothing is warm - # (coordinator will attempt to start the service anyway; it may evict or fail) - # Note: resident_keys are per-node, not per-GPU. On multi-GPU nodes, the warm - # bonus applies to all GPUs on the node. This is a known coarseness — - # per-GPU resident tracking requires a resident_key format change. - preferred = [c for c in candidates if c.warm or c.can_fit] - pool = preferred if preferred else candidates - best = max(pool, key=lambda c: (c.warm, c.effective_free_mb)) - return best.node_id, best.gpu_id - - -def _find_service_max_mb(service: str, profile_registry: "ProfileRegistry") -> int | None: - for profile in profile_registry.list_public(): - svc = profile.services.get(service) - if svc is not None: - return svc.max_mb - return None diff --git a/circuitforge_core/resources/coordinator/node_store.py b/circuitforge_core/resources/coordinator/node_store.py deleted file mode 100644 index 8dc71f9..0000000 --- a/circuitforge_core/resources/coordinator/node_store.py +++ /dev/null @@ -1,85 +0,0 @@ -""" -circuitforge_core.resources.coordinator.node_store — SQLite persistence for known agent nodes. - -Gives the coordinator restart-safe memory of which nodes have ever registered. -On startup the coordinator reloads all known nodes and immediately probes them; -nodes that respond come back online within one heartbeat cycle (~10 s) without -any manual intervention on the agent hosts. -""" -from __future__ import annotations - -import logging -import sqlite3 -import time -from pathlib import Path - -logger = logging.getLogger(__name__) - -_DEFAULT_DB_PATH = Path.home() / ".local" / "share" / "circuitforge" / "cf-orch-nodes.db" -_STALE_AGE_DAYS = 30 # nodes unseen for this long are pruned automatically - - -class NodeStore: - """ - Thin SQLite wrapper for persisting known agent nodes across coordinator restarts. - - Thread-safe for single-writer use (coordinator runs in one asyncio thread). - """ - - def __init__(self, db_path: Path = _DEFAULT_DB_PATH) -> None: - self.db_path = db_path - db_path.parent.mkdir(parents=True, exist_ok=True) - self._conn = sqlite3.connect(str(db_path), check_same_thread=False) - self._conn.row_factory = sqlite3.Row - self._migrate() - logger.debug("NodeStore initialised at %s", db_path) - - def _migrate(self) -> None: - self._conn.executescript(""" - CREATE TABLE IF NOT EXISTS known_nodes ( - node_id TEXT PRIMARY KEY, - agent_url TEXT NOT NULL, - last_seen REAL NOT NULL - ); - """) - self._conn.commit() - - def upsert(self, node_id: str, agent_url: str) -> None: - """Record or update a node. Called on every successful registration.""" - self._conn.execute( - """ - INSERT INTO known_nodes (node_id, agent_url, last_seen) - VALUES (?, ?, ?) - ON CONFLICT(node_id) DO UPDATE SET - agent_url = excluded.agent_url, - last_seen = excluded.last_seen - """, - (node_id, agent_url, time.time()), - ) - self._conn.commit() - - def all(self) -> list[tuple[str, str]]: - """Return all known (node_id, agent_url) pairs.""" - rows = self._conn.execute( - "SELECT node_id, agent_url FROM known_nodes ORDER BY last_seen DESC" - ).fetchall() - return [(r["node_id"], r["agent_url"]) for r in rows] - - def remove(self, node_id: str) -> None: - self._conn.execute("DELETE FROM known_nodes WHERE node_id = ?", (node_id,)) - self._conn.commit() - - def prune_stale(self, max_age_days: int = _STALE_AGE_DAYS) -> int: - """Delete nodes not seen within max_age_days. Returns count removed.""" - cutoff = time.time() - max_age_days * 86400 - cur = self._conn.execute( - "DELETE FROM known_nodes WHERE last_seen < ?", (cutoff,) - ) - self._conn.commit() - removed = cur.rowcount - if removed: - logger.info("NodeStore: pruned %d stale node(s) (>%d days old)", removed, max_age_days) - return removed - - def close(self) -> None: - self._conn.close() diff --git a/circuitforge_core/resources/coordinator/profile_registry.py b/circuitforge_core/resources/coordinator/profile_registry.py deleted file mode 100644 index 0310c44..0000000 --- a/circuitforge_core/resources/coordinator/profile_registry.py +++ /dev/null @@ -1,65 +0,0 @@ -# circuitforge_core/resources/coordinator/profile_registry.py -from __future__ import annotations - -import logging -from pathlib import Path - -from circuitforge_core.resources.models import GpuInfo -from circuitforge_core.resources.profiles.schema import GpuProfile, load_profile - -_PUBLIC_DIR = Path(__file__).parent.parent / "profiles" / "public" - -# VRAM thresholds for public profile selection (MB) -_PROFILE_THRESHOLDS = [ - (22000, "single-gpu-24gb"), - (14000, "single-gpu-16gb"), - (8000, "single-gpu-8gb"), - (5500, "single-gpu-6gb"), - (3500, "single-gpu-4gb"), - (0, "single-gpu-2gb"), -] - -_log = logging.getLogger(__name__) - - -class ProfileRegistry: - def __init__(self, extra_dirs: list[Path] | None = None) -> None: - self._profiles: dict[str, GpuProfile] = {} - self._load_dir(_PUBLIC_DIR) - for d in (extra_dirs or []): - if d.exists(): - self._load_dir(d) - - def _load_dir(self, directory: Path) -> None: - for yaml_file in directory.glob("*.yaml"): - try: - profile = load_profile(yaml_file) - self._profiles[profile.name] = profile - except Exception as exc: - _log.warning("Skipping %s: %s", yaml_file, exc) - - def load(self, path: Path) -> GpuProfile: - profile = load_profile(path) - self._profiles[profile.name] = profile - return profile - - def list_public(self) -> list[GpuProfile]: - # CPU profiles (cpu-*) are intentionally excluded — this endpoint - # is used to match GPU hardware. CPU inference nodes self-select - # their profile via the CLI and are not listed for lease matching. - return [ - p for p in self._profiles.values() - if p.name.startswith("single-gpu-") - ] - - def get(self, name: str) -> GpuProfile | None: - return self._profiles.get(name) - - def auto_detect(self, gpus: list[GpuInfo]) -> GpuProfile: - primary_vram = gpus[0].vram_total_mb if gpus else 0 - for threshold_mb, profile_name in _PROFILE_THRESHOLDS: - if primary_vram >= threshold_mb: - profile = self._profiles.get(profile_name) - if profile: - return profile - return self._profiles["single-gpu-2gb"] diff --git a/circuitforge_core/resources/coordinator/service_registry.py b/circuitforge_core/resources/coordinator/service_registry.py deleted file mode 100644 index 18c7b20..0000000 --- a/circuitforge_core/resources/coordinator/service_registry.py +++ /dev/null @@ -1,173 +0,0 @@ -from __future__ import annotations - -import dataclasses -import time -import uuid -from dataclasses import dataclass -from typing import Literal - - -@dataclass -class ServiceAllocation: - allocation_id: str - service: str - node_id: str - gpu_id: int - model: str | None - caller: str - url: str - created_at: float - expires_at: float # 0 = no expiry - - -@dataclass -class ServiceInstance: - service: str - node_id: str - gpu_id: int - state: Literal["starting", "running", "idle", "stopped"] - model: str | None - url: str | None - idle_since: float | None = None - health_path: str = "/health" - - -class ServiceRegistry: - """ - In-memory registry of service allocations and instance state. - - Allocations: per-caller request — many per service instance. - Instances: per (service, node_id, gpu_id) — one per running container. - """ - - def __init__(self) -> None: - self._allocations: dict[str, ServiceAllocation] = {} - self._instances: dict[str, ServiceInstance] = {} # key: "service:node_id:gpu_id" - - # ── allocation API ──────────────────────────────────────────────── - - def allocate( - self, - service: str, - node_id: str, - gpu_id: int, - model: str | None, - url: str, - caller: str, - ttl_s: float, - ) -> ServiceAllocation: - alloc = ServiceAllocation( - allocation_id=str(uuid.uuid4()), - service=service, - node_id=node_id, - gpu_id=gpu_id, - model=model, - caller=caller, - url=url, - created_at=time.time(), - expires_at=time.time() + ttl_s if ttl_s > 0 else 0.0, - ) - self._allocations[alloc.allocation_id] = alloc - - # If an instance exists in idle/stopped state, mark it running again - key = f"{service}:{node_id}:{gpu_id}" - if key in self._instances: - inst = self._instances[key] - if inst.state in ("idle", "stopped"): - self._instances[key] = dataclasses.replace( - inst, state="running", idle_since=None - ) - return alloc - - def release(self, allocation_id: str) -> bool: - alloc = self._allocations.pop(allocation_id, None) - if alloc is None: - return False - # If no active allocations remain for this instance, mark it idle - key = f"{alloc.service}:{alloc.node_id}:{alloc.gpu_id}" - if self.active_allocations(alloc.service, alloc.node_id, alloc.gpu_id) == 0: - if key in self._instances: - self._instances[key] = dataclasses.replace( - self._instances[key], state="idle", idle_since=time.time() - ) - return True - - def active_allocations(self, service: str, node_id: str, gpu_id: int) -> int: - return sum( - 1 for a in self._allocations.values() - if a.service == service and a.node_id == node_id and a.gpu_id == gpu_id - ) - - # ── instance API ───────────────────────────────────────────────── - - def upsert_instance( - self, - service: str, - node_id: str, - gpu_id: int, - state: Literal["starting", "running", "idle", "stopped"], - model: str | None, - url: str | None, - health_path: str = "/health", - ) -> ServiceInstance: - key = f"{service}:{node_id}:{gpu_id}" - existing = self._instances.get(key) - idle_since: float | None = None - if state == "idle": - # Preserve idle_since if already idle; set now if transitioning into idle - idle_since = existing.idle_since if (existing and existing.state == "idle") else time.time() - inst = ServiceInstance( - service=service, node_id=node_id, gpu_id=gpu_id, - state=state, model=model, url=url, idle_since=idle_since, - health_path=health_path, - ) - self._instances[key] = inst - return inst - - def get_allocation(self, allocation_id: str) -> ServiceAllocation | None: - return self._allocations.get(allocation_id) - - def sweep_expired_allocations(self) -> list[str]: - """ - Remove all allocations whose TTL has elapsed and transition the - corresponding instance to 'idle' if no active allocations remain. - Returns the list of expired allocation_ids. - """ - now = time.time() - expired = [ - alloc_id - for alloc_id, alloc in self._allocations.items() - if alloc.expires_at > 0 and now > alloc.expires_at - ] - for alloc_id in expired: - self.release(alloc_id) - return expired - - def all_allocations(self) -> list[ServiceAllocation]: - return list(self._allocations.values()) - - def all_instances(self) -> list[ServiceInstance]: - return list(self._instances.values()) - - def mark_stopped(self, service: str, node_id: str, gpu_id: int) -> None: - """Transition an instance to 'stopped' state and clear idle_since.""" - key = f"{service}:{node_id}:{gpu_id}" - if key in self._instances: - self._instances[key] = dataclasses.replace( - self._instances[key], state="stopped", idle_since=None - ) - - def idle_past_timeout(self, idle_stop_config: dict[str, int]) -> list[ServiceInstance]: - """ - Return instances in 'idle' state whose idle time exceeds their configured timeout. - idle_stop_config: {service_name: seconds} — 0 means never stop automatically. - """ - now = time.time() - result = [] - for inst in self._instances.values(): - if inst.state != "idle" or inst.idle_since is None: - continue - timeout = idle_stop_config.get(inst.service, 0) - if timeout > 0 and (now - inst.idle_since) >= timeout: - result.append(inst) - return result diff --git a/circuitforge_core/resources/docuvision/__init__.py b/circuitforge_core/resources/docuvision/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/circuitforge_core/resources/docuvision/app.py b/circuitforge_core/resources/docuvision/app.py deleted file mode 100644 index 3501b45..0000000 --- a/circuitforge_core/resources/docuvision/app.py +++ /dev/null @@ -1,250 +0,0 @@ -""" -cf-docuvision — managed document understanding service. - -Wraps ByteDance/Dolphin-v2 (Qwen2.5-VL backbone) behind a simple HTTP API. -Managed by cf-orch; started/stopped as a ProcessSpec service. - -API ---- -GET /health → {"status": "ok", "model": ""} -POST /extract → ExtractResponse - -Usage (standalone):: - - python -m circuitforge_core.resources.docuvision.app \\ - --model /Library/Assets/LLM/docuvision/models/dolphin-v2 \\ - --port 8003 --gpu-id 0 -""" -from __future__ import annotations - -import argparse -import base64 -import io -import json -import logging -from contextlib import asynccontextmanager -from typing import Any - -import uvicorn -from fastapi import FastAPI, HTTPException -from pydantic import BaseModel - -logger = logging.getLogger(__name__) - -# Module-level state — populated by _load_model() on first /extract call -_model: Any = None -_processor: Any = None -_model_path: str = "" -_device: str = "cpu" - - -# ── lazy loader ─────────────────────────────────────────────────────────────── - -def _load_model() -> None: - """Lazy-load Dolphin-v2. Called once on first /extract request.""" - global _model, _processor, _device - - if _model is not None: - return - - import torch - from transformers import AutoProcessor, AutoModelForCausalLM - - logger.info("Loading Dolphin-v2 from %s ...", _model_path) - _device = "cuda" if torch.cuda.is_available() else "cpu" - - _processor = AutoProcessor.from_pretrained( - _model_path, - trust_remote_code=True, - ) - _model = AutoModelForCausalLM.from_pretrained( - _model_path, - trust_remote_code=True, - torch_dtype=torch.float16 if _device == "cuda" else torch.float32, - device_map=_device, - ) - _model.eval() - logger.info("Dolphin-v2 loaded on %s", _device) - - -# ── FastAPI app ─────────────────────────────────────────────────────────────── - -@asynccontextmanager -async def _lifespan(app: FastAPI): - yield - - -app = FastAPI(title="cf-docuvision", lifespan=_lifespan) - - -# ── request / response models ───────────────────────────────────────────────── - -class ExtractRequest(BaseModel): - """ - Either image_b64 (base64-encoded bytes) or image_path (absolute path) must - be provided. hint guides the extraction mode: - - "auto" - Dolphin-v2 detects layout and element types automatically - - "table" - optimise for tabular data (receipts, invoices, forms) - - "text" - optimise for dense prose (contracts, letters) - - "form" - optimise for form field extraction - """ - image_b64: str | None = None - image_path: str | None = None - hint: str = "auto" - - -class ElementOut(BaseModel): - type: str # heading | paragraph | list | table | figure | formula | code - text: str - bbox: list[float] | None = None # [x0, y0, x1, y1] normalised 0-1 if available - - -class TableOut(BaseModel): - html: str - bbox: list[float] | None = None - - -class ExtractResponse(BaseModel): - elements: list[ElementOut] - raw_text: str - tables: list[TableOut] - metadata: dict[str, Any] - - -# ── helpers ─────────────────────────────────────────────────────────────────── - -_HINT_PROMPTS: dict[str, str] = { - "auto": "Parse this document. Extract all elements with their types and text content.", - "table": "Extract all tables from this document as structured HTML. Also extract any line-item text.", - "text": "Extract all text from this document preserving paragraph and heading structure.", - "form": "Extract all form fields from this document. Return field labels and their values.", -} - - -def _image_from_request(req: ExtractRequest): - """Return a PIL Image from either image_b64 or image_path.""" - from PIL import Image - - if req.image_b64: - img_bytes = base64.b64decode(req.image_b64) - return Image.open(io.BytesIO(img_bytes)).convert("RGB") - - if req.image_path: - from pathlib import Path - p = Path(req.image_path) - if not p.exists(): - raise HTTPException(status_code=404, detail=f"image_path not found: {req.image_path}") - return Image.open(p).convert("RGB") - - raise HTTPException(status_code=422, detail="Either image_b64 or image_path must be provided") - - -def _parse_dolphin_output(raw: str) -> tuple[list[ElementOut], list[TableOut], str]: - """ - Parse Dolphin-v2's structured output into elements and tables. - - Dolphin-v2 returns a JSON array of element dicts with keys: - type, text, [html], [bbox] - - Falls back gracefully if the model returns plain text instead. - """ - elements: list[ElementOut] = [] - tables: list[TableOut] = [] - - # Try JSON parse first - try: - parsed = json.loads(raw) - if isinstance(parsed, list): - for item in parsed: - etype = item.get("type", "paragraph") - text = item.get("text", "") - bbox = item.get("bbox") - if etype == "table": - tables.append(TableOut(html=item.get("html", text), bbox=bbox)) - elements.append(ElementOut(type=etype, text=text, bbox=bbox)) - raw_text = "\n".join(e.text for e in elements) - return elements, tables, raw_text - except (json.JSONDecodeError, TypeError): - pass - - # Plain-text fallback: treat entire output as a single paragraph - elements = [ElementOut(type="paragraph", text=raw.strip())] - return elements, tables, raw.strip() - - -# ── routes ──────────────────────────────────────────────────────────────────── - -@app.get("/health") -async def health() -> dict[str, str]: - return {"status": "ok", "model": _model_path} - - -@app.post("/extract", response_model=ExtractResponse) -async def extract(req: ExtractRequest) -> ExtractResponse: - _load_model() - - image = _image_from_request(req) - prompt = _HINT_PROMPTS.get(req.hint, _HINT_PROMPTS["auto"]) - - import torch - - inputs = _processor( - text=prompt, - images=image, - return_tensors="pt", - ).to(_device) - - with torch.no_grad(): - output_ids = _model.generate( - **inputs, - max_new_tokens=2048, - do_sample=False, - ) - - # Decode only the newly generated tokens - input_len = inputs["input_ids"].shape[1] - raw_output = _processor.decode( - output_ids[0][input_len:], - skip_special_tokens=True, - ) - - elements, tables, raw_text = _parse_dolphin_output(raw_output) - - w, h = image.size - - return ExtractResponse( - elements=elements, - raw_text=raw_text, - tables=tables, - metadata={ - "hint": req.hint, - "width": w, - "height": h, - "model": _model_path, - "device": _device, - }, - ) - - -# ── CLI entry point ─────────────────────────────────────────────────────────── - -def main() -> None: - parser = argparse.ArgumentParser(description="cf-docuvision service") - parser.add_argument("--model", required=True, help="Path to Dolphin-v2 model directory") - parser.add_argument("--port", type=int, default=8003) - parser.add_argument("--host", default="0.0.0.0") - parser.add_argument("--gpu-id", type=int, default=0) - args = parser.parse_args() - - global _model_path - _model_path = args.model - - import os - os.environ.setdefault("CUDA_VISIBLE_DEVICES", str(args.gpu_id)) - - logging.basicConfig(level=logging.INFO, format="%(levelname)s %(name)s: %(message)s") - uvicorn.run(app, host=args.host, port=args.port) - - -if __name__ == "__main__": - main() diff --git a/circuitforge_core/resources/inference/__init__.py b/circuitforge_core/resources/inference/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/circuitforge_core/resources/inference/llm_server.py b/circuitforge_core/resources/inference/llm_server.py deleted file mode 100644 index a049e0f..0000000 --- a/circuitforge_core/resources/inference/llm_server.py +++ /dev/null @@ -1,137 +0,0 @@ -"""Generic OpenAI-compatible inference server for HuggingFace causal LMs.""" -from __future__ import annotations - -import argparse -import time -import uuid -from contextlib import asynccontextmanager -from typing import Any - -import torch -import uvicorn -from fastapi import FastAPI, HTTPException -from pydantic import BaseModel -from transformers import AutoModelForCausalLM, AutoTokenizer - -_model: Any = None -_tokenizer: Any = None -_model_id: str = "" -_device: str = "cpu" - - -@asynccontextmanager -async def lifespan(app: FastAPI): - yield - - -app = FastAPI(lifespan=lifespan) - - -class Message(BaseModel): - role: str - content: str - - -class ChatRequest(BaseModel): - model: str | None = None - messages: list[Message] - max_tokens: int | None = 512 - temperature: float | None = 0.7 - stream: bool | None = False - - -@app.get("/health") -def health() -> dict[str, str]: - return {"status": "ok", "model": _model_id} - - -@app.get("/v1/models") -def list_models() -> dict[str, Any]: - return { - "object": "list", - "data": [{"id": _model_id, "object": "model", "owned_by": "cf-orch"}], - } - - -@app.post("/v1/chat/completions") -def chat_completions(req: ChatRequest) -> dict[str, Any]: - if _model is None: - raise HTTPException(503, detail="Model not loaded") - if req.stream: - raise HTTPException(501, detail="Streaming not supported") - - conversation = [{"role": m.role, "content": m.content} for m in req.messages] - try: - encoded = _tokenizer.apply_chat_template( - conversation, - return_tensors="pt", - add_generation_prompt=True, - ) - # transformers 5.x returns BatchEncoding; 4.x returned a bare tensor - input_ids = (encoded.input_ids if hasattr(encoded, "input_ids") else encoded).to(_device) - except Exception as exc: - raise HTTPException(500, detail=f"Tokenisation failed: {exc}") - - max_new = req.max_tokens or 512 - temp = req.temperature if req.temperature is not None else 0.7 - gen_kwargs: dict[str, Any] = { - "max_new_tokens": max_new, - "do_sample": temp > 0, - "pad_token_id": _tokenizer.eos_token_id, - } - if temp > 0: - gen_kwargs["temperature"] = temp - - with torch.inference_mode(): - output_ids = _model.generate(input_ids, **gen_kwargs) - - new_tokens = output_ids[0][input_ids.shape[-1]:] - reply = _tokenizer.decode(new_tokens, skip_special_tokens=True) - - return { - "id": f"chatcmpl-{uuid.uuid4().hex[:8]}", - "object": "chat.completion", - "created": int(time.time()), - "model": _model_id, - "choices": [ - { - "index": 0, - "message": {"role": "assistant", "content": reply}, - "finish_reason": "stop", - } - ], - "usage": { - "prompt_tokens": input_ids.shape[-1], - "completion_tokens": len(new_tokens), - "total_tokens": input_ids.shape[-1] + len(new_tokens), - }, - } - - -def _load_model(model_path: str, gpu_id: int) -> None: - global _model, _tokenizer, _model_id, _device - _device = f"cuda:{gpu_id}" if torch.cuda.is_available() else "cpu" - _model_id = model_path - _tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) - _model = AutoModelForCausalLM.from_pretrained( - model_path, - dtype=torch.float16 if "cuda" in _device else torch.float32, - device_map={"": _device}, - trust_remote_code=True, - ) - _model.eval() - - -def main() -> None: - parser = argparse.ArgumentParser(description="cf-orch generic LLM inference server") - parser.add_argument("--model", required=True) - parser.add_argument("--port", type=int, default=8000) - parser.add_argument("--host", default="0.0.0.0") - parser.add_argument("--gpu-id", type=int, default=0) - args = parser.parse_args() - _load_model(args.model, args.gpu_id) - uvicorn.run(app, host=args.host, port=args.port, log_level="info") - - -if __name__ == "__main__": - main() diff --git a/circuitforge_core/resources/models.py b/circuitforge_core/resources/models.py deleted file mode 100644 index fb6a6ba..0000000 --- a/circuitforge_core/resources/models.py +++ /dev/null @@ -1,66 +0,0 @@ -from __future__ import annotations - -import time -import uuid -from dataclasses import dataclass, field -from typing import Optional - - -@dataclass(frozen=True) -class VRAMLease: - lease_id: str - gpu_id: int - node_id: str - mb_granted: int - holder_service: str - priority: int - expires_at: float # unix timestamp; 0.0 = no expiry - - @classmethod - def create( - cls, - gpu_id: int, - node_id: str, - mb: int, - service: str, - priority: int, - ttl_s: float = 0.0, - ) -> VRAMLease: - return cls( - lease_id=str(uuid.uuid4()), - gpu_id=gpu_id, - node_id=node_id, - mb_granted=mb, - holder_service=service, - priority=priority, - expires_at=time.time() + ttl_s if ttl_s > 0.0 else 0.0, - ) - - def is_expired(self) -> bool: - return self.expires_at > 0.0 and time.time() > self.expires_at - - -@dataclass(frozen=True) -class GpuInfo: - gpu_id: int - name: str - vram_total_mb: int - vram_used_mb: int - vram_free_mb: int - - -@dataclass(frozen=True) -class ResidentAllocation: - """A model that is loaded and warm in VRAM but not actively serving a request.""" - service: str - node_id: str - model_name: Optional[str] # None if service is running but model probe failed - first_seen: float = field(default_factory=time.time) - - -@dataclass -class NodeInfo: - node_id: str - agent_url: str - gpus: list[GpuInfo] - last_heartbeat: float = field(default_factory=time.time) diff --git a/circuitforge_core/resources/profiles/__init__.py b/circuitforge_core/resources/profiles/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/circuitforge_core/resources/profiles/public/cpu-16gb.yaml b/circuitforge_core/resources/profiles/public/cpu-16gb.yaml deleted file mode 100644 index 4ecec14..0000000 --- a/circuitforge_core/resources/profiles/public/cpu-16gb.yaml +++ /dev/null @@ -1,41 +0,0 @@ -schema_version: 1 -name: cpu-16gb -eviction_timeout_s: 30.0 -services: - ollama: - max_mb: 0 - priority: 1 - managed: - type: process - adopt: true - exec_path: "/usr/local/bin/ollama" - args_template: "serve" - port: 11434 - host_port: 11434 - health_path: /api/tags - cf-stt: - max_mb: 0 - priority: 2 - shared: true - max_concurrent: 1 - backend: moonshine - cf-tts: - max_mb: 0 - priority: 2 - shared: true - max_concurrent: 1 - cf-embed: - max_mb: 0 - priority: 2 - shared: true - max_concurrent: 2 - always_on: true - cf-classify: - max_mb: 0 - priority: 2 - shared: true - max_concurrent: 2 - always_on: true -model_size_hints: - llm_max_params: 3b-q4 - image_gen_max: none diff --git a/circuitforge_core/resources/profiles/public/cpu-32gb.yaml b/circuitforge_core/resources/profiles/public/cpu-32gb.yaml deleted file mode 100644 index 1ae4299..0000000 --- a/circuitforge_core/resources/profiles/public/cpu-32gb.yaml +++ /dev/null @@ -1,41 +0,0 @@ -schema_version: 1 -name: cpu-32gb -eviction_timeout_s: 30.0 -services: - ollama: - max_mb: 0 - priority: 1 - managed: - type: process - adopt: true - exec_path: "/usr/local/bin/ollama" - args_template: "serve" - port: 11434 - host_port: 11434 - health_path: /api/tags - cf-stt: - max_mb: 0 - priority: 2 - shared: true - max_concurrent: 2 - backend: faster-whisper - cf-tts: - max_mb: 0 - priority: 2 - shared: true - max_concurrent: 2 - cf-embed: - max_mb: 0 - priority: 2 - shared: true - max_concurrent: 4 - always_on: true - cf-classify: - max_mb: 0 - priority: 2 - shared: true - max_concurrent: 4 - always_on: true -model_size_hints: - llm_max_params: 7b-q4 - image_gen_max: none diff --git a/circuitforge_core/resources/profiles/public/single-gpu-16gb.yaml b/circuitforge_core/resources/profiles/public/single-gpu-16gb.yaml deleted file mode 100644 index 70d533d..0000000 --- a/circuitforge_core/resources/profiles/public/single-gpu-16gb.yaml +++ /dev/null @@ -1,73 +0,0 @@ -schema_version: 1 -name: single-gpu-16gb -vram_total_mb: 16384 -eviction_timeout_s: 10.0 -services: - vllm: - max_mb: 9000 - priority: 1 - idle_stop_after_s: 600 - managed: - type: process - exec_path: "/devl/miniconda3/envs/cf/bin/python" - args_template: "-m circuitforge_core.resources.inference.llm_server --model /Library/Assets/LLM/vllm/models/{model} --port {port} --gpu-id {gpu_id}" - port: 8000 - host_port: 8000 - cwd: "/Library/Development/CircuitForge/circuitforge-core" - ollama: - max_mb: 12288 - priority: 1 - managed: - type: process - adopt: true - exec_path: "/usr/local/bin/ollama" - args_template: "serve" - port: 11434 - host_port: 11434 - health_path: /api/tags - cf-vision: - max_mb: 3072 - priority: 2 - shared: true - max_concurrent: 4 - cf-docuvision: - max_mb: 6144 - priority: 2 - shared: true - max_concurrent: 3 - managed: - type: process - exec_path: "/devl/miniconda3/envs/cf/bin/python" - args_template: "-m circuitforge_core.resources.docuvision.app --model /Library/Assets/LLM/docuvision/models/dolphin-v2 --port {port} --gpu-id {gpu_id}" - port: 8003 - host_port: 8003 - cwd: "/Library/Development/CircuitForge/circuitforge-core" - cf-stt: - max_mb: 1200 - priority: 2 - shared: true - max_concurrent: 3 - backend: parakeet-tdt - cf-tts: - max_mb: 1024 - priority: 2 - shared: true - max_concurrent: 3 - cf-embed: - max_mb: 512 - priority: 2 - shared: true - max_concurrent: 6 - always_on: true - cf-classify: - max_mb: 512 - priority: 2 - shared: true - max_concurrent: 6 - always_on: true - comfyui: - max_mb: 14336 - priority: 4 -model_size_hints: - llm_max_params: 34b - image_gen_max: flux-dev-fp8 diff --git a/circuitforge_core/resources/profiles/public/single-gpu-24gb.yaml b/circuitforge_core/resources/profiles/public/single-gpu-24gb.yaml deleted file mode 100644 index 073bf0e..0000000 --- a/circuitforge_core/resources/profiles/public/single-gpu-24gb.yaml +++ /dev/null @@ -1,73 +0,0 @@ -schema_version: 1 -name: single-gpu-24gb -vram_total_mb: 24576 -eviction_timeout_s: 10.0 -services: - vllm: - max_mb: 9000 - priority: 1 - idle_stop_after_s: 600 - managed: - type: process - exec_path: "/devl/miniconda3/envs/cf/bin/python" - args_template: "-m circuitforge_core.resources.inference.llm_server --model /Library/Assets/LLM/vllm/models/{model} --port {port} --gpu-id {gpu_id}" - port: 8000 - host_port: 8000 - cwd: "/Library/Development/CircuitForge/circuitforge-core" - ollama: - max_mb: 18432 - priority: 1 - managed: - type: process - adopt: true - exec_path: "/usr/local/bin/ollama" - args_template: "serve" - port: 11434 - host_port: 11434 - health_path: /api/tags - cf-vision: - max_mb: 4096 - priority: 2 - shared: true - max_concurrent: 6 - cf-docuvision: - max_mb: 8192 - priority: 2 - shared: true - max_concurrent: 4 - managed: - type: process - exec_path: "/devl/miniconda3/envs/cf/bin/python" - args_template: "-m circuitforge_core.resources.docuvision.app --model /Library/Assets/LLM/docuvision/models/dolphin-v2 --port {port} --gpu-id {gpu_id}" - port: 8003 - host_port: 8003 - cwd: "/Library/Development/CircuitForge/circuitforge-core" - cf-stt: - max_mb: 1200 - priority: 2 - shared: true - max_concurrent: 4 - backend: parakeet-tdt - cf-tts: - max_mb: 1024 - priority: 2 - shared: true - max_concurrent: 4 - cf-embed: - max_mb: 512 - priority: 2 - shared: true - max_concurrent: 8 - always_on: true - cf-classify: - max_mb: 512 - priority: 2 - shared: true - max_concurrent: 8 - always_on: true - comfyui: - max_mb: 20480 - priority: 4 -model_size_hints: - llm_max_params: 70b - image_gen_max: flux-dev-fp16 diff --git a/circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml b/circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml deleted file mode 100644 index b845dbc..0000000 --- a/circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml +++ /dev/null @@ -1,30 +0,0 @@ -schema_version: 1 -name: single-gpu-2gb -vram_total_mb: 2048 -eviction_timeout_s: 15.0 -services: - ollama: - max_mb: 1536 - priority: 1 - managed: - type: process - adopt: true - exec_path: "/usr/local/bin/ollama" - args_template: "serve" - port: 11434 - host_port: 11434 - health_path: /api/tags - cf-vision: - max_mb: 512 - priority: 2 - shared: true - max_concurrent: 1 - cf-stt: - max_mb: 200 - priority: 2 - shared: true - max_concurrent: 1 - backend: moonshine -model_size_hints: - llm_max_params: 3b - image_gen_max: none diff --git a/circuitforge_core/resources/profiles/public/single-gpu-4gb.yaml b/circuitforge_core/resources/profiles/public/single-gpu-4gb.yaml deleted file mode 100644 index 1bec3e3..0000000 --- a/circuitforge_core/resources/profiles/public/single-gpu-4gb.yaml +++ /dev/null @@ -1,38 +0,0 @@ -schema_version: 1 -name: single-gpu-4gb -vram_total_mb: 4096 -eviction_timeout_s: 15.0 -services: - ollama: - max_mb: 3072 - priority: 1 - managed: - type: process - adopt: true - exec_path: "/usr/local/bin/ollama" - args_template: "serve" - port: 11434 - host_port: 11434 - health_path: /api/tags - cf-vision: - max_mb: 1024 - priority: 2 - shared: true - max_concurrent: 1 - cf-stt: - max_mb: 600 - priority: 2 - shared: true - max_concurrent: 1 - backend: faster-whisper - cf-tts: - max_mb: 512 - priority: 2 - shared: true - max_concurrent: 1 - comfyui: - max_mb: 3584 - priority: 4 -model_size_hints: - llm_max_params: 3b - image_gen_max: sd15-fp8 diff --git a/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml b/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml deleted file mode 100644 index 3446e54..0000000 --- a/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml +++ /dev/null @@ -1,61 +0,0 @@ -schema_version: 1 -name: single-gpu-6gb -vram_total_mb: 6144 -eviction_timeout_s: 10.0 -services: - vllm: - max_mb: 5500 - priority: 1 - idle_stop_after_s: 600 - managed: - type: process - exec_path: "/devl/miniconda3/envs/cf/bin/python" - args_template: "-m circuitforge_core.resources.inference.llm_server --model /Library/Assets/LLM/vllm/models/{model} --port {port} --gpu-id {gpu_id}" - port: 8000 - host_port: 8000 - cwd: "/Library/Development/CircuitForge/circuitforge-core" - ollama: - max_mb: 3584 - priority: 1 - managed: - type: process - adopt: true - exec_path: "/usr/local/bin/ollama" - args_template: "serve" - port: 11434 - host_port: 11434 - health_path: /api/tags - cf-vision: - max_mb: 1536 - priority: 2 - shared: true - max_concurrent: 2 - cf-docuvision: - max_mb: 3072 - priority: 2 - shared: true - max_concurrent: 1 - managed: - type: process - exec_path: "/devl/miniconda3/envs/cf/bin/python" - args_template: "-m circuitforge_core.resources.docuvision.app --model /Library/Assets/LLM/docuvision/models/dolphin-v2 --port {port} --gpu-id {gpu_id}" - port: 8003 - host_port: 8003 - cwd: "/Library/Development/CircuitForge/circuitforge-core" - cf-stt: - max_mb: 600 - priority: 2 - shared: true - max_concurrent: 2 - backend: faster-whisper - cf-tts: - max_mb: 768 - priority: 2 - shared: true - max_concurrent: 1 - comfyui: - max_mb: 5120 - priority: 4 -model_size_hints: - llm_max_params: 7b - image_gen_max: sd15 diff --git a/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml b/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml deleted file mode 100644 index 23ab8d5..0000000 --- a/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml +++ /dev/null @@ -1,68 +0,0 @@ -schema_version: 1 -name: single-gpu-8gb -vram_total_mb: 8192 -eviction_timeout_s: 10.0 -services: - vllm: - max_mb: 6500 - priority: 1 - idle_stop_after_s: 600 - managed: - type: process - exec_path: "/devl/miniconda3/envs/cf/bin/python" - args_template: "-m circuitforge_core.resources.inference.llm_server --model /Library/Assets/LLM/vllm/models/{model} --port {port} --gpu-id {gpu_id}" - port: 8000 - host_port: 8000 - cwd: "/Library/Development/CircuitForge/circuitforge-core" - ollama: - max_mb: 4096 - priority: 1 - managed: - type: process - adopt: true - exec_path: "/usr/local/bin/ollama" - args_template: "serve" - port: 11434 - host_port: 11434 - health_path: /api/tags - cf-vision: - max_mb: 2048 - priority: 2 - shared: true - max_concurrent: 3 - cf-docuvision: - max_mb: 4096 - priority: 2 - shared: true - max_concurrent: 2 - managed: - type: process - exec_path: "/devl/miniconda3/envs/cf/bin/python" - args_template: "-m circuitforge_core.resources.docuvision.app --model /Library/Assets/LLM/docuvision/models/dolphin-v2 --port {port} --gpu-id {gpu_id}" - port: 8003 - host_port: 8003 - cwd: "/Library/Development/CircuitForge/circuitforge-core" - cf-stt: - max_mb: 1200 - priority: 2 - shared: true - max_concurrent: 2 - backend: parakeet-tdt - cf-tts: - max_mb: 1024 - priority: 2 - shared: true - max_concurrent: 2 - comfyui: - max_mb: 6144 - priority: 4 - managed: - type: process - exec_path: "/opt/miniconda3/envs/comfyui/bin/python" - args_template: "/opt/ComfyUI/main.py --listen 0.0.0.0 --port {port} --cuda-device {gpu_id}" - cwd: "/opt/ComfyUI" - port: 8188 - host_port: 8188 -model_size_hints: - llm_max_params: 8b - image_gen_max: sdxl-fp8 diff --git a/circuitforge_core/resources/profiles/schema.py b/circuitforge_core/resources/profiles/schema.py deleted file mode 100644 index 2667916..0000000 --- a/circuitforge_core/resources/profiles/schema.py +++ /dev/null @@ -1,121 +0,0 @@ -# circuitforge_core/resources/profiles/schema.py -from __future__ import annotations - -from pathlib import Path -from typing import Any - -import yaml -from pydantic import BaseModel, Field, model_validator - -SUPPORTED_SCHEMA_VERSION = 1 - - -class DockerSpec(BaseModel): - """Spec for a Docker-managed service.""" - - image: str - port: int - host_port: int - command_template: str = "" - volumes: list[str] = Field(default_factory=list) - env: dict[str, str] = Field(default_factory=dict) - runtime: str = "nvidia" - ipc: str = "host" - - model_config = {"frozen": True} - - -class ProcessSpec(BaseModel): - """Spec for a process-managed service (non-Docker, e.g. conda env).""" - - exec_path: str - args_template: str = "" - cwd: str = "" - env: dict[str, str] = Field(default_factory=dict) - port: int = 0 - host_port: int = 0 - # adopt=True: if the service is already listening on host_port, claim it rather - # than spawning a new process (useful for system daemons like Ollama). - adopt: bool = False - # Override the health probe path; defaults to /health (Ollama uses /api/tags). - health_path: str = "/health" - - model_config = {"frozen": True} - - -class ServiceProfile(BaseModel): - max_mb: int - priority: int - shared: bool = False - max_concurrent: int = 1 - always_on: bool = False - idle_stop_after_s: int = 0 - backend: str | None = None - consumers: list[str] = Field(default_factory=list) - managed: DockerSpec | ProcessSpec | None = None - - model_config = {"frozen": True} - - @model_validator(mode="before") - @classmethod - def _parse_managed(cls, values: Any) -> Any: - if not isinstance(values, dict): - return values - raw = values.get("managed") - if raw is None: - return values - if not isinstance(raw, dict): - return values - spec_type = raw.get("type") - managed_fields = {k: v for k, v in raw.items() if k != "type"} - if spec_type == "docker": - values["managed"] = DockerSpec(**managed_fields) - elif spec_type == "process": - values["managed"] = ProcessSpec(**managed_fields) - else: - raise ValueError(f"Unknown managed service type: {spec_type!r}") - return values - - -class GpuNodeEntry(BaseModel): - id: int - vram_mb: int - role: str - card: str = "unknown" - always_on: bool = False - services: list[str] = Field(default_factory=list) - - model_config = {"frozen": True} - - -class NodeProfile(BaseModel): - gpus: list[GpuNodeEntry] - agent_url: str | None = None - nas_mount: str | None = None - - model_config = {"frozen": True} - - -class GpuProfile(BaseModel): - schema_version: int - name: str - vram_total_mb: int | None = None - eviction_timeout_s: float = 10.0 - services: dict[str, ServiceProfile] = Field(default_factory=dict) - model_size_hints: dict[str, str] = Field(default_factory=dict) - nodes: dict[str, NodeProfile] = Field(default_factory=dict) - - model_config = {"frozen": True} - - -def load_profile(path: Path) -> GpuProfile: - raw: dict[str, Any] = yaml.safe_load(path.read_text()) - if not isinstance(raw, dict): - raise ValueError(f"Profile file {path} must be a YAML mapping, got {type(raw).__name__}") - version = raw.get("schema_version") - if version != SUPPORTED_SCHEMA_VERSION: - raise ValueError( - f"Unsupported schema_version {version!r} in {path}. " - f"Expected {SUPPORTED_SCHEMA_VERSION}." - ) - return GpuProfile.model_validate(raw) diff --git a/pyproject.toml b/pyproject.toml index 0711f78..eecccd8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta" [project] name = "circuitforge-core" -version = "0.7.0" -description = "Shared scaffold for CircuitForge products" +version = "0.8.0" +description = "Shared scaffold for CircuitForge products (MIT)" requires-python = ">=3.11" dependencies = [ "pyyaml>=6.0", @@ -14,32 +14,17 @@ dependencies = [ ] [project.optional-dependencies] -orch = [ - "fastapi>=0.110", - "uvicorn[standard]>=0.29", - "httpx>=0.27", - "pydantic>=2.0", - "typer[all]>=0.12", - "psutil>=5.9", -] -tasks = [ - "httpx>=0.27", -] manage = [ "platformdirs>=4.0", "typer[all]>=0.12", ] dev = [ - "circuitforge-core[orch]", - "circuitforge-core[tasks]", "circuitforge-core[manage]", "pytest>=8.0", "pytest-asyncio>=0.23", - "httpx>=0.27", ] [project.scripts] -cf-orch = "circuitforge_core.resources.cli:app" cf-manage = "circuitforge_core.manage.cli:app" [tool.setuptools.packages.find] diff --git a/tests/test_resources/__init__.py b/tests/test_resources/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_resources/test_agent_app.py b/tests/test_resources/test_agent_app.py deleted file mode 100644 index b24c1aa..0000000 --- a/tests/test_resources/test_agent_app.py +++ /dev/null @@ -1,68 +0,0 @@ -from __future__ import annotations - -import pytest -from unittest.mock import MagicMock -from fastapi.testclient import TestClient - -from circuitforge_core.resources.agent.app import create_agent_app -from circuitforge_core.resources.models import GpuInfo -from circuitforge_core.resources.agent.eviction_executor import EvictionResult - -MOCK_GPUS = [ - GpuInfo( - gpu_id=0, - name="RTX 4000", - vram_total_mb=8192, - vram_used_mb=1024, - vram_free_mb=7168, - ), -] - - -@pytest.fixture -def agent_client(): - mock_monitor = MagicMock() - mock_monitor.poll.return_value = MOCK_GPUS - mock_executor = MagicMock() - app = create_agent_app( - node_id="heimdall", - monitor=mock_monitor, - executor=mock_executor, - ) - return TestClient(app), mock_monitor, mock_executor - - -def test_health_returns_ok(agent_client): - client, _, _ = agent_client - resp = client.get("/health") - assert resp.status_code == 200 - assert resp.json()["status"] == "ok" - assert resp.json()["node_id"] == "heimdall" - - -def test_gpu_info_returns_gpu_list(agent_client): - client, _, _ = agent_client - resp = client.get("/gpu-info") - assert resp.status_code == 200 - data = resp.json() - assert len(data["gpus"]) == 1 - assert data["gpus"][0]["gpu_id"] == 0 - assert data["gpus"][0]["name"] == "RTX 4000" - assert data["gpus"][0]["vram_free_mb"] == 7168 - - -def test_evict_calls_executor(agent_client): - client, _, mock_executor = agent_client - mock_executor.evict_pid.return_value = EvictionResult( - success=True, method="sigterm", message="done" - ) - resp = client.post("/evict", json={"pid": 1234, "grace_period_s": 5.0}) - assert resp.status_code == 200 - assert resp.json()["success"] is True - mock_executor.evict_pid.assert_called_once_with(pid=1234, grace_period_s=5.0) - - -def test_evict_requires_pid(agent_client): - client, _, _ = agent_client - resp = client.post("/evict", json={"grace_period_s": 5.0}) - assert resp.status_code == 422 diff --git a/tests/test_resources/test_agent_supervisor.py b/tests/test_resources/test_agent_supervisor.py deleted file mode 100644 index f669b62..0000000 --- a/tests/test_resources/test_agent_supervisor.py +++ /dev/null @@ -1,93 +0,0 @@ -import asyncio -import time -import pytest -from unittest.mock import AsyncMock, MagicMock, patch -from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor -from circuitforge_core.resources.coordinator.lease_manager import LeaseManager -from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry, ServiceInstance - - -def test_build_idle_stop_config_empty_without_registry(): - lm = LeaseManager() - supervisor = AgentSupervisor(lease_manager=lm) - assert supervisor._build_idle_stop_config() == {} - - -def test_build_idle_stop_config_from_profiles(): - lm = LeaseManager() - mock_svc = MagicMock() - mock_svc.idle_stop_after_s = 600 - mock_profile = MagicMock() - mock_profile.services = {"vllm": mock_svc} - mock_profile_registry = MagicMock() - mock_profile_registry.list_public.return_value = [mock_profile] - - supervisor = AgentSupervisor(lease_manager=lm, profile_registry=mock_profile_registry) - config = supervisor._build_idle_stop_config() - assert config == {"vllm": 600} - - -@pytest.mark.asyncio -async def test_run_idle_sweep_posts_stop(): - lm = LeaseManager() - service_registry = ServiceRegistry() - - # Upsert instance as running, then allocate + release to transition it to idle - service_registry.upsert_instance( - service="vllm", - node_id="heimdall", - gpu_id=0, - state="running", - model="test-model", - url="http://heimdall:8000", - ) - alloc = service_registry.allocate( - service="vllm", - node_id="heimdall", - gpu_id=0, - model="test-model", - url="http://heimdall:8000", - caller="test", - ttl_s=300.0, - ) - service_registry.release(alloc.allocation_id) - - # Backdate idle_since so it exceeds the timeout - import dataclasses - key = "vllm:heimdall:0" - inst = service_registry._instances[key] - service_registry._instances[key] = dataclasses.replace(inst, idle_since=time.time() - 700) - - mock_profile_registry = MagicMock() - mock_svc = MagicMock() - mock_svc.idle_stop_after_s = 600 - mock_profile = MagicMock() - mock_profile.services = {"vllm": mock_svc} - mock_profile_registry.list_public.return_value = [mock_profile] - - supervisor = AgentSupervisor( - lease_manager=lm, - service_registry=service_registry, - profile_registry=mock_profile_registry, - ) - supervisor.register("heimdall", "http://heimdall:7701") - - posted_urls = [] - - async def fake_http_post(url: str) -> bool: - posted_urls.append(url) - return True - - supervisor._http_post = fake_http_post - await supervisor._run_idle_sweep() - - assert len(posted_urls) == 1 - assert posted_urls[0] == "http://heimdall:7701/services/vllm/stop" - - -@pytest.mark.asyncio -async def test_run_idle_sweep_skips_without_registry(): - lm = LeaseManager() - supervisor = AgentSupervisor(lease_manager=lm) - # Should return immediately without error - await supervisor._run_idle_sweep() diff --git a/tests/test_resources/test_agent_watchdog.py b/tests/test_resources/test_agent_watchdog.py deleted file mode 100644 index 78b6d09..0000000 --- a/tests/test_resources/test_agent_watchdog.py +++ /dev/null @@ -1,151 +0,0 @@ -# tests/test_resources/test_agent_watchdog.py -""" -Tests for AgentSupervisor watchdog behaviour: - - restore_from_store() reloads known nodes from NodeStore on startup - - register() persists to NodeStore - - restored nodes start offline and come online after a successful poll - - NodeStore=None path is a no-op (backwards compatibility) -""" -from __future__ import annotations - -from pathlib import Path -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor -from circuitforge_core.resources.coordinator.lease_manager import LeaseManager -from circuitforge_core.resources.coordinator.node_store import NodeStore - - -# ── fixtures ────────────────────────────────────────────────────────────────── - -@pytest.fixture -def store(tmp_path: Path) -> NodeStore: - return NodeStore(db_path=tmp_path / "nodes.db") - - -@pytest.fixture -def supervisor(store: NodeStore) -> AgentSupervisor: - return AgentSupervisor(lease_manager=LeaseManager(), node_store=store) - - -@pytest.fixture -def supervisor_no_store() -> AgentSupervisor: - return AgentSupervisor(lease_manager=LeaseManager(), node_store=None) - - -# ── register() persists ─────────────────────────────────────────────────────── - -def test_register_persists_to_store(supervisor: AgentSupervisor, store: NodeStore) -> None: - supervisor.register("heimdall", "http://127.0.0.1:7701") - rows = store.all() - assert len(rows) == 1 - assert rows[0] == ("heimdall", "http://127.0.0.1:7701") - - -def test_register_updates_url_in_store(supervisor: AgentSupervisor, store: NodeStore) -> None: - supervisor.register("navi", "http://10.1.10.10:7701") - supervisor.register("navi", "http://10.1.10.10:9999") - rows = store.all() - assert len(rows) == 1 - assert rows[0][1] == "http://10.1.10.10:9999" - - -def test_register_without_store_does_not_crash(supervisor_no_store: AgentSupervisor) -> None: - supervisor_no_store.register("heimdall", "http://127.0.0.1:7701") - assert supervisor_no_store.get_node_info("heimdall") is not None - - -# ── restore_from_store() ────────────────────────────────────────────────────── - -def test_restore_loads_known_nodes(tmp_path: Path) -> None: - """Nodes written by a previous supervisor session are restored into a fresh one.""" - db = tmp_path / "nodes.db" - - # Session 1: register two nodes - s1 = NodeStore(db_path=db) - sup1 = AgentSupervisor(lease_manager=LeaseManager(), node_store=s1) - sup1.register("navi", "http://10.1.10.10:7701") - sup1.register("strahl", "http://10.1.10.20:7701") - - # Session 2: fresh supervisor, same DB - s2 = NodeStore(db_path=db) - sup2 = AgentSupervisor(lease_manager=LeaseManager(), node_store=s2) - restored = sup2.restore_from_store() - - assert restored == 2 - assert sup2.get_node_info("navi") is not None - assert sup2.get_node_info("strahl") is not None - - -def test_restore_marks_nodes_offline(tmp_path: Path) -> None: - """Restored nodes start offline — they haven't been polled yet.""" - db = tmp_path / "nodes.db" - - s1 = NodeStore(db_path=db) - AgentSupervisor(lease_manager=LeaseManager(), node_store=s1).register( - "navi", "http://10.1.10.10:7701" - ) - - s2 = NodeStore(db_path=db) - sup2 = AgentSupervisor(lease_manager=LeaseManager(), node_store=s2) - sup2.restore_from_store() - - assert sup2.online_agents() == {} - - -def test_restore_returns_zero_without_store() -> None: - sup = AgentSupervisor(lease_manager=LeaseManager(), node_store=None) - assert sup.restore_from_store() == 0 - - -def test_restore_skips_already_registered(tmp_path: Path) -> None: - """Nodes manually registered before restore_from_store() are not duplicated.""" - db = tmp_path / "nodes.db" - store = NodeStore(db_path=db) - store.upsert("heimdall", "http://127.0.0.1:7701") - - sup = AgentSupervisor(lease_manager=LeaseManager(), node_store=store) - sup.register("heimdall", "http://127.0.0.1:7701") # already in memory - restored = sup.restore_from_store() - - assert restored == 0 # already present, not double-counted - - -# ── restored node comes online after poll ───────────────────────────────────── - -@pytest.mark.asyncio -async def test_restored_node_comes_online_after_poll(tmp_path: Path) -> None: - """After restore, a successful poll_agent() brings the node online.""" - db = tmp_path / "nodes.db" - store = NodeStore(db_path=db) - store.upsert("navi", "http://10.1.10.10:7701") - - sup = AgentSupervisor(lease_manager=LeaseManager(), node_store=store) - sup.restore_from_store() - - # Stub poll_agent to succeed - gpu_payload = {"gpus": [{"gpu_id": 0, "name": "RTX 4000", - "vram_total_mb": 8192, "vram_used_mb": 512, "vram_free_mb": 7680}]} - resident_payload = {"residents": []} - - mock_resp_gpu = MagicMock() - mock_resp_gpu.raise_for_status = MagicMock() - mock_resp_gpu.json.return_value = gpu_payload - - mock_resp_res = MagicMock() - mock_resp_res.is_success = True - mock_resp_res.json.return_value = resident_payload - - mock_client = AsyncMock() - mock_client.get = AsyncMock(side_effect=[mock_resp_gpu, mock_resp_res]) - mock_client.__aenter__ = AsyncMock(return_value=mock_client) - mock_client.__aexit__ = AsyncMock(return_value=False) - - with patch("circuitforge_core.resources.coordinator.agent_supervisor.httpx.AsyncClient", - return_value=mock_client): - result = await sup.poll_agent("navi") - - assert result is True - assert "navi" in sup.online_agents() diff --git a/tests/test_resources/test_cli.py b/tests/test_resources/test_cli.py deleted file mode 100644 index 5ceb715..0000000 --- a/tests/test_resources/test_cli.py +++ /dev/null @@ -1,33 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from unittest.mock import patch - -from typer.testing import CliRunner - -from circuitforge_core.resources.cli import app - -runner = CliRunner() - - -def test_cli_help(): - result = runner.invoke(app, ["--help"]) - assert result.exit_code == 0 - assert "cf-orch" in result.output.lower() or "Usage" in result.output - - -def test_status_command_shows_no_coordinator_message(): - with patch("httpx.get", side_effect=ConnectionRefusedError("refused")): - result = runner.invoke(app, ["status"]) - assert result.exit_code != 0 or "unreachable" in result.output.lower() \ - or "coordinator" in result.output.lower() - - -def test_install_service_creates_systemd_unit(tmp_path: Path): - unit_path = tmp_path / "cf-orch.service" - with patch( - "circuitforge_core.resources.cli._SYSTEMD_UNIT_PATH", unit_path - ): - result = runner.invoke(app, ["install-service", "--dry-run"]) - assert result.exit_code == 0 - assert "cf-orch.service" in result.output or "systemd" in result.output.lower() diff --git a/tests/test_resources/test_client.py b/tests/test_resources/test_client.py deleted file mode 100644 index 288fb64..0000000 --- a/tests/test_resources/test_client.py +++ /dev/null @@ -1,94 +0,0 @@ -import json -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest -import httpretty -from circuitforge_core.resources.client import CFOrchClient, Allocation - -_ALLOC_BODY = ( - '{"allocation_id":"abc123","service":"vllm","node_id":"heimdall",' - '"gpu_id":0,"model":"Ouro-1.4B","url":"http://heimdall:8000","started":false,"warm":true}' -) - - -@httpretty.activate -def test_sync_allocate_returns_allocation(): - httpretty.register_uri( - httpretty.POST, "http://orch:7700/api/services/vllm/allocate", - body=_ALLOC_BODY, content_type="application/json", - ) - httpretty.register_uri( - httpretty.DELETE, "http://orch:7700/api/services/vllm/allocations/abc123", - body='{"released":true}', content_type="application/json", - ) - client = CFOrchClient("http://orch:7700") - with client.allocate("vllm", model_candidates=["Ouro-1.4B"], caller="test") as alloc: - assert isinstance(alloc, Allocation) - assert alloc.url == "http://heimdall:8000" - assert alloc.model == "Ouro-1.4B" - assert alloc.allocation_id == "abc123" - assert httpretty.last_request().method == "DELETE" - - -@httpretty.activate -def test_sync_allocate_ignores_404_on_release(): - httpretty.register_uri( - httpretty.POST, "http://orch:7700/api/services/vllm/allocate", - body='{"allocation_id":"xyz","service":"vllm","node_id":"a","gpu_id":0,' - '"model":"m","url":"http://a:8000","started":false,"warm":false}', - content_type="application/json", - ) - httpretty.register_uri( - httpretty.DELETE, "http://orch:7700/api/services/vllm/allocations/xyz", - status=404, body='{"detail":"not found"}', content_type="application/json", - ) - client = CFOrchClient("http://orch:7700") - with client.allocate("vllm", model_candidates=["m"]) as alloc: - assert alloc.url == "http://a:8000" - # No exception raised — 404 on release is silently ignored - - -@httpretty.activate -def test_sync_allocate_raises_on_503(): - httpretty.register_uri( - httpretty.POST, "http://orch:7700/api/services/vllm/allocate", - status=503, body='{"detail":"no capacity"}', content_type="application/json", - ) - client = CFOrchClient("http://orch:7700") - with pytest.raises(RuntimeError, match="cf-orch allocation failed"): - with client.allocate("vllm", model_candidates=["m"]): - pass - - -async def test_async_allocate_works(): - # httpretty only patches stdlib sockets; httpx async uses anyio sockets so - # we mock httpx.AsyncClient directly instead. - alloc_data = { - "allocation_id": "a1", "service": "vllm", "node_id": "n", - "gpu_id": 0, "model": "m", "url": "http://n:8000", - "started": False, "warm": False, - } - release_data = {"released": True} - - def _make_response(data, status_code=200): - resp = MagicMock() - resp.is_success = status_code < 400 - resp.status_code = status_code - resp.json.return_value = data - return resp - - mock_post = AsyncMock(return_value=_make_response(alloc_data)) - mock_delete = AsyncMock(return_value=_make_response(release_data)) - - mock_async_client = MagicMock() - mock_async_client.post = mock_post - mock_async_client.delete = mock_delete - mock_async_client.__aenter__ = AsyncMock(return_value=mock_async_client) - mock_async_client.__aexit__ = AsyncMock(return_value=False) - - with patch("httpx.AsyncClient", return_value=mock_async_client): - client = CFOrchClient("http://orch:7700") - async with client.allocate_async("vllm", model_candidates=["m"]) as alloc: - assert alloc.url == "http://n:8000" - assert alloc.allocation_id == "a1" - mock_delete.assert_called_once() diff --git a/tests/test_resources/test_coordinator_allocate.py b/tests/test_resources/test_coordinator_allocate.py deleted file mode 100644 index eb9e078..0000000 --- a/tests/test_resources/test_coordinator_allocate.py +++ /dev/null @@ -1,132 +0,0 @@ -import pytest -from unittest.mock import AsyncMock, MagicMock, patch -from fastapi.testclient import TestClient -from circuitforge_core.resources.coordinator.app import create_coordinator_app -from circuitforge_core.resources.coordinator.lease_manager import LeaseManager -from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry -from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry -from circuitforge_core.resources.coordinator.agent_supervisor import AgentRecord -from circuitforge_core.resources.models import GpuInfo, NodeInfo - - -def _make_supervisor_mock(online: bool = True): - sup = MagicMock() - record = AgentRecord(node_id="heimdall", agent_url="http://heimdall:7701") - record.gpus = [GpuInfo(0, "RTX 4000", 8192, 0, 8192)] - record.online = online - sup.online_agents.return_value = {"heimdall": record} if online else {} - sup.get_node_info.return_value = NodeInfo( - node_id="heimdall", - agent_url="http://heimdall:7701", - gpus=record.gpus, - last_heartbeat=0.0, - ) - return sup - - -@pytest.fixture -def alloc_client(): - lm = LeaseManager() - pr = ProfileRegistry() - sup = _make_supervisor_mock() - sr = ServiceRegistry() - app = create_coordinator_app(lease_manager=lm, profile_registry=pr, agent_supervisor=sup, service_registry=sr) - return TestClient(app), sup, sr - - -def test_allocate_returns_allocation_id_and_url(alloc_client): - client, sup, sr = alloc_client - with patch("httpx.AsyncClient") as mock_http: - mock_resp = MagicMock() - mock_resp.is_success = True - mock_resp.json.return_value = {"running": True, "url": "http://heimdall:8000"} - mock_http.return_value.__aenter__.return_value.post = AsyncMock(return_value=mock_resp) - - resp = client.post("/api/services/vllm/allocate", json={ - "model_candidates": ["Ouro-1.4B"], - "ttl_s": 300.0, - "caller": "test", - }) - - assert resp.status_code == 200 - data = resp.json() - assert "allocation_id" in data - assert data["service"] == "vllm" - assert data["node_id"] == "heimdall" - assert data["url"] == "http://heimdall:8000" - - -def test_allocate_returns_503_when_no_online_nodes(alloc_client): - client, sup, sr = alloc_client - sup.online_agents.return_value = {} - resp = client.post("/api/services/vllm/allocate", json={"model_candidates": ["Ouro-1.4B"]}) - assert resp.status_code == 503 - - -def test_allocate_returns_422_for_empty_candidates(alloc_client): - client, _, sr = alloc_client - resp = client.post("/api/services/vllm/allocate", json={"model_candidates": []}) - assert resp.status_code == 422 - - -def test_allocate_returns_422_for_unknown_service(alloc_client): - client, _, sr = alloc_client - resp = client.post("/api/services/cf-made-up/allocate", json={"model_candidates": ["x"]}) - assert resp.status_code == 422 - - -def test_allocate_records_in_registry(alloc_client): - client, sup, sr = alloc_client - with patch("httpx.AsyncClient") as mock_http: - mock_resp = MagicMock() - mock_resp.is_success = True - mock_resp.json.return_value = {"running": True, "url": "http://heimdall:8000"} - mock_http.return_value.__aenter__.return_value.post = AsyncMock(return_value=mock_resp) - - resp = client.post("/api/services/vllm/allocate", json={ - "model_candidates": ["Ouro-1.4B"], - "ttl_s": 300.0, - "caller": "test", - }) - - assert resp.status_code == 200 - allocation_id = resp.json()["allocation_id"] - - status_resp = client.get("/api/services/vllm/status") - assert status_resp.status_code == 200 - status_data = status_resp.json() - assert status_data["service"] == "vllm" - alloc_ids = [a["allocation_id"] for a in status_data["allocations"]] - assert allocation_id in alloc_ids - - -def test_release_allocation(alloc_client): - client, sup, sr = alloc_client - with patch("httpx.AsyncClient") as mock_http: - mock_resp = MagicMock() - mock_resp.is_success = True - mock_resp.json.return_value = {"running": True, "url": "http://heimdall:8000"} - mock_http.return_value.__aenter__.return_value.post = AsyncMock(return_value=mock_resp) - - resp = client.post("/api/services/vllm/allocate", json={ - "model_candidates": ["Ouro-1.4B"], - "ttl_s": 300.0, - "caller": "test", - }) - - assert resp.status_code == 200 - allocation_id = resp.json()["allocation_id"] - - del_resp = client.delete(f"/api/services/vllm/allocations/{allocation_id}") - assert del_resp.status_code == 200 - assert del_resp.json() == {"released": True, "allocation_id": allocation_id} - - status_resp = client.get("/api/services/vllm/status") - alloc_ids = [a["allocation_id"] for a in status_resp.json()["allocations"]] - assert allocation_id not in alloc_ids - - -def test_release_allocation_not_found(alloc_client): - client, _, sr = alloc_client - resp = client.delete("/api/services/vllm/allocations/bad-id") - assert resp.status_code == 404 diff --git a/tests/test_resources/test_coordinator_app.py b/tests/test_resources/test_coordinator_app.py deleted file mode 100644 index 598ea50..0000000 --- a/tests/test_resources/test_coordinator_app.py +++ /dev/null @@ -1,183 +0,0 @@ -import pytest -from unittest.mock import MagicMock -from pathlib import Path -from fastapi.testclient import TestClient -from circuitforge_core.resources.coordinator.app import create_coordinator_app -from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor -from circuitforge_core.resources.coordinator.lease_manager import LeaseManager -from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry -from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry -from circuitforge_core.resources.models import GpuInfo, NodeInfo -from circuitforge_core.resources.profiles.schema import load_profile - - -@pytest.fixture -def coordinator_client(): - lease_manager = LeaseManager() - lease_manager.register_gpu("heimdall", 0, 8192) - profile_registry = ProfileRegistry() - supervisor = MagicMock() - supervisor.all_nodes.return_value = [ - NodeInfo( - node_id="heimdall", - agent_url="http://localhost:7701", - gpus=[GpuInfo(gpu_id=0, name="RTX 4000", - vram_total_mb=8192, vram_used_mb=0, vram_free_mb=8192)], - last_heartbeat=0.0, - ) - ] - supervisor.get_node_info.return_value = NodeInfo( - node_id="heimdall", - agent_url="http://localhost:7701", - gpus=[], - last_heartbeat=0.0, - ) - app = create_coordinator_app( - lease_manager=lease_manager, - profile_registry=profile_registry, - agent_supervisor=supervisor, - service_registry=ServiceRegistry(), - ) - return TestClient(app), lease_manager - - -def test_health_returns_ok(coordinator_client): - client, _ = coordinator_client - resp = client.get("/api/health") - assert resp.status_code == 200 - assert resp.json()["status"] == "ok" - - -def test_get_nodes_returns_list(coordinator_client): - client, _ = coordinator_client - resp = client.get("/api/nodes") - assert resp.status_code == 200 - nodes = resp.json()["nodes"] - assert len(nodes) == 1 - assert nodes[0]["node_id"] == "heimdall" - - -def test_get_profiles_returns_public_profiles(coordinator_client): - client, _ = coordinator_client - resp = client.get("/api/profiles") - assert resp.status_code == 200 - names = [p["name"] for p in resp.json()["profiles"]] - assert "single-gpu-8gb" in names - - -def test_post_lease_grants_lease(coordinator_client): - client, _ = coordinator_client - resp = client.post("/api/leases", json={ - "node_id": "heimdall", "gpu_id": 0, - "mb": 2048, "service": "peregrine", "priority": 1, - }) - assert resp.status_code == 200 - data = resp.json() - assert data["lease"]["mb_granted"] == 2048 - assert data["lease"]["holder_service"] == "peregrine" - assert "lease_id" in data["lease"] - - -def test_delete_lease_releases_it(coordinator_client): - client, _ = coordinator_client - resp = client.post("/api/leases", json={ - "node_id": "heimdall", "gpu_id": 0, - "mb": 2048, "service": "peregrine", "priority": 1, - }) - lease_id = resp.json()["lease"]["lease_id"] - del_resp = client.delete(f"/api/leases/{lease_id}") - assert del_resp.status_code == 200 - assert del_resp.json()["released"] is True - - -def test_delete_unknown_lease_returns_404(coordinator_client): - client, _ = coordinator_client - resp = client.delete("/api/leases/nonexistent-id") - assert resp.status_code == 404 - - -def test_get_leases_returns_active_leases(coordinator_client): - client, _ = coordinator_client - client.post("/api/leases", json={ - "node_id": "heimdall", "gpu_id": 0, - "mb": 1024, "service": "kiwi", "priority": 2, - }) - resp = client.get("/api/leases") - assert resp.status_code == 200 - assert len(resp.json()["leases"]) == 1 - - -def test_dashboard_serves_html(coordinator_client): - """GET / returns the dashboard HTML page.""" - client, _ = coordinator_client - resp = client.get("/") - assert resp.status_code == 200 - assert "text/html" in resp.headers["content-type"] - # Verify key structural markers are present (without asserting exact markup) - assert "cf-orch" in resp.text - assert "/api/nodes" in resp.text - assert "/api/leases" in resp.text - - -def test_online_agents_excludes_offline(): - lm = LeaseManager() - sup = AgentSupervisor(lm) - sup.register("online_node", "http://a:7701") - sup.register("offline_node", "http://b:7701") - sup._agents["online_node"].online = True - sup._agents["offline_node"].online = False - result = sup.online_agents() - assert "online_node" in result - assert "offline_node" not in result - - -def test_resident_keys_returns_set_of_node_service(): - lm = LeaseManager() - lm.set_residents_for_node("heimdall", [("vllm", "Ouro-1.4B"), ("ollama", None)]) - keys = lm.resident_keys() - assert keys == {"heimdall:vllm", "heimdall:ollama"} - - -def test_single_gpu_8gb_profile_has_idle_stop_after_s(): - profile = load_profile( - Path("circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml") - ) - vllm_svc = profile.services.get("vllm") - assert vllm_svc is not None - assert hasattr(vllm_svc, "idle_stop_after_s") - assert vllm_svc.idle_stop_after_s == 600 - - -def test_ensure_service_returns_503_when_vram_too_low(): - """VRAM pre-flight guard fires before any HTTP request when free VRAM < service max_mb.""" - # Threshold = full max_mb (not half); 100 MB free on any profile triggers 503. - lease_manager = LeaseManager() - lease_manager.register_gpu("low-vram-node", 0, 512) - profile_registry = ProfileRegistry() - supervisor = MagicMock() - supervisor.get_node_info.return_value = NodeInfo( - node_id="low-vram-node", - agent_url="http://localhost:7701", - gpus=[GpuInfo(gpu_id=0, name="GTX 1050", - vram_total_mb=512, vram_used_mb=412, vram_free_mb=100)], - last_heartbeat=0.0, - ) - supervisor.all_nodes.return_value = [] - app = create_coordinator_app( - lease_manager=lease_manager, - profile_registry=profile_registry, - agent_supervisor=supervisor, - service_registry=ServiceRegistry(), - ) - client = TestClient(app) - - resp = client.post("/api/services/vllm/ensure", json={ - "node_id": "low-vram-node", - "gpu_id": 0, - "params": {"model": "some-model"}, - }) - - assert resp.status_code == 503 - assert "Insufficient VRAM" in resp.json()["detail"] - # Guard must fire before any agent HTTP call is attempted. - supervisor.get_node_info.assert_called_once_with("low-vram-node") diff --git a/tests/test_resources/test_coordinator_auth.py b/tests/test_resources/test_coordinator_auth.py deleted file mode 100644 index c31c9d0..0000000 --- a/tests/test_resources/test_coordinator_auth.py +++ /dev/null @@ -1,148 +0,0 @@ -"""Tests for HeimdallAuthMiddleware — TTL cache and request gating.""" -import time -import pytest -from unittest.mock import patch, MagicMock -from fastapi import FastAPI -from fastapi.testclient import TestClient - -from circuitforge_core.resources.coordinator.auth import ( - HeimdallAuthMiddleware, - _ValidationCache, - CACHE_TTL_S, -) - - -# ── Cache unit tests ────────────────────────────────────────────────────────── - -def test_cache_miss_returns_none(): - cache = _ValidationCache() - assert cache.get("nonexistent") is None - - -def test_cache_stores_and_retrieves(): - cache = _ValidationCache() - cache.set("key1", valid=True, tier="paid", user_id="u1") - entry = cache.get("key1") - assert entry is not None - assert entry.valid is True - assert entry.tier == "paid" - - -def test_cache_entry_expires(): - cache = _ValidationCache(ttl_s=0.05) - cache.set("key1", valid=True, tier="paid", user_id="u1") - time.sleep(0.1) - assert cache.get("key1") is None - - -def test_cache_evict_removes_key(): - cache = _ValidationCache() - cache.set("key1", valid=True, tier="paid", user_id="u1") - cache.evict("key1") - assert cache.get("key1") is None - - -def test_cache_prune_removes_expired(): - cache = _ValidationCache(ttl_s=0.05) - cache.set("k1", valid=True, tier="paid", user_id="") - cache.set("k2", valid=True, tier="paid", user_id="") - time.sleep(0.1) - removed = cache.prune() - assert removed == 2 - - -# ── Middleware integration tests ────────────────────────────────────────────── - -def _make_app_with_auth(middleware: HeimdallAuthMiddleware) -> TestClient: - app = FastAPI() - app.middleware("http")(middleware) - - @app.get("/api/health") - def health(): - return {"status": "ok"} - - @app.post("/api/services/vllm/allocate") - def allocate(): - return {"allocation_id": "abc", "url": "http://gpu:8000"} - - return TestClient(app, raise_server_exceptions=False) - - -def _patched_middleware(valid: bool, tier: str = "paid") -> HeimdallAuthMiddleware: - """Return a middleware whose Heimdall call is pre-mocked.""" - mw = HeimdallAuthMiddleware( - heimdall_url="http://heimdall.test", - min_tier="paid", - ) - mw._validate_against_heimdall = MagicMock( # type: ignore[method-assign] - return_value=(valid, tier, "user-1" if valid else "") - ) - return mw - - -def test_health_exempt_no_auth_required(): - mw = _patched_middleware(valid=True) - client = _make_app_with_auth(mw) - resp = client.get("/api/health") - assert resp.status_code == 200 - - -def test_missing_auth_header_returns_401(): - mw = _patched_middleware(valid=True) - client = _make_app_with_auth(mw) - resp = client.post("/api/services/vllm/allocate") - assert resp.status_code == 401 - - -def test_invalid_key_returns_403(): - mw = _patched_middleware(valid=False) - client = _make_app_with_auth(mw) - resp = client.post( - "/api/services/vllm/allocate", - headers={"Authorization": "Bearer BAD-KEY"}, - ) - assert resp.status_code == 403 - - -def test_valid_paid_key_passes(): - mw = _patched_middleware(valid=True, tier="paid") - client = _make_app_with_auth(mw) - resp = client.post( - "/api/services/vllm/allocate", - headers={"Authorization": "Bearer CFG-KIWI-GOOD-GOOD-GOOD"}, - ) - assert resp.status_code == 200 - - -def test_free_tier_key_rejected_when_min_is_paid(): - mw = _patched_middleware(valid=True, tier="free") - client = _make_app_with_auth(mw) - resp = client.post( - "/api/services/vllm/allocate", - headers={"Authorization": "Bearer CFG-KIWI-FREE-FREE-FREE"}, - ) - assert resp.status_code == 403 - assert "paid" in resp.json()["detail"] - - -def test_cache_prevents_second_heimdall_call(): - mw = _patched_middleware(valid=True, tier="paid") - client = _make_app_with_auth(mw) - key = "CFG-KIWI-CACHED-KEY-1" - headers = {"Authorization": f"Bearer {key}"} - client.post("/api/services/vllm/allocate", headers=headers) - client.post("/api/services/vllm/allocate", headers=headers) - # Heimdall should only have been called once — second hit is from cache - assert mw._validate_against_heimdall.call_count == 1 # type: ignore[attr-defined] - - -def test_from_env_returns_none_without_heimdall_url(monkeypatch): - monkeypatch.delenv("HEIMDALL_URL", raising=False) - assert HeimdallAuthMiddleware.from_env() is None - - -def test_from_env_returns_middleware_when_set(monkeypatch): - monkeypatch.setenv("HEIMDALL_URL", "http://heimdall.test") - mw = HeimdallAuthMiddleware.from_env() - assert mw is not None - assert mw._heimdall == "http://heimdall.test" diff --git a/tests/test_resources/test_coordinator_probe.py b/tests/test_resources/test_coordinator_probe.py deleted file mode 100644 index 52a86f0..0000000 --- a/tests/test_resources/test_coordinator_probe.py +++ /dev/null @@ -1,215 +0,0 @@ -# tests/test_resources/test_coordinator_probe.py -""" -Unit tests for _run_instance_probe_loop in coordinator/app.py. - -Covers: - - healthy path: /health → 200 → state transitions starting → running - - timeout path: no healthy response within _PROBE_TIMEOUT_S → starting → stopped - - cleanup path: non-starting instance cleans up its start_times entry -""" -from __future__ import annotations - -import asyncio -from unittest.mock import MagicMock, patch - -import pytest - -from circuitforge_core.resources.coordinator.app import ( - _PROBE_TIMEOUT_S, - _run_instance_probe_loop, -) -from circuitforge_core.resources.coordinator.service_registry import ServiceInstance, ServiceRegistry - - -# ── helpers ────────────────────────────────────────────────────────────────── - -def _inst(**kwargs) -> ServiceInstance: - defaults = dict( - service="vllm", node_id="node1", gpu_id=0, - state="starting", model="qwen", url="http://localhost:8000", - ) - defaults.update(kwargs) - return ServiceInstance(**defaults) - - -def _registry(*instances: ServiceInstance) -> MagicMock: - reg = MagicMock(spec=ServiceRegistry) - reg.all_instances.return_value = list(instances) - return reg - - -def _health_resp(status: int = 200) -> MagicMock: - """Context-manager mock that simulates an HTTP response.""" - resp = MagicMock() - resp.status = status - resp.__enter__ = lambda s: resp - resp.__exit__ = MagicMock(return_value=False) - return resp - - -async def _one_tick(coro_fn, registry, *, time_val: float = 1000.0, **url_patch): - """ - Run the probe loop for exactly one iteration then cancel it. - - asyncio.sleep is patched to return immediately on the first call - and raise CancelledError on the second (ending the loop cleanly). - """ - calls = 0 - - async def _fake_sleep(_delay): - nonlocal calls - calls += 1 - if calls > 1: - raise asyncio.CancelledError() - - patches = [ - patch("asyncio.sleep", new=_fake_sleep), - patch("time.time", return_value=time_val), - ] - if url_patch: - patches.append(patch("urllib.request.urlopen", **url_patch)) - - ctx = [p.__enter__() for p in patches] - try: - await coro_fn(registry) - except asyncio.CancelledError: - pass - finally: - for p in reversed(patches): - p.__exit__(None, None, None) - - -# ── tests ──────────────────────────────────────────────────────────────────── - -@pytest.mark.asyncio -async def test_probe_transitions_starting_to_running(): - """GET /health → 200 while in starting state → upsert_instance(state='running').""" - reg = _registry(_inst(state="starting", url="http://localhost:8000")) - - calls = 0 - - async def fake_sleep(_delay): - nonlocal calls - calls += 1 - if calls > 1: - raise asyncio.CancelledError() - - with patch("asyncio.sleep", new=fake_sleep), \ - patch("time.time", return_value=1000.0), \ - patch("urllib.request.urlopen", return_value=_health_resp(200)): - try: - await _run_instance_probe_loop(reg) - except asyncio.CancelledError: - pass - - reg.upsert_instance.assert_called_once_with( - service="vllm", node_id="node1", gpu_id=0, - state="running", model="qwen", url="http://localhost:8000", - ) - - -@pytest.mark.asyncio -async def test_probe_transitions_starting_to_stopped_on_timeout(): - """No healthy response + time past _PROBE_TIMEOUT_S → upsert_instance(state='stopped'). - - Tick 1: seeds start_times[key] = 1000.0 - Tick 2: time has advanced past _PROBE_TIMEOUT_S → timeout fires → stopped - Tick 3: CancelledError exits the loop - """ - reg = _registry(_inst(state="starting", url="http://localhost:8000")) - - tick = 0 - # Tick 1: t=1000 (seed); Tick 2: t=far_future (timeout fires) - times = [1000.0, 1000.0 + _PROBE_TIMEOUT_S + 1.0] - - async def fake_sleep(_delay): - nonlocal tick - tick += 1 - if tick > 2: - raise asyncio.CancelledError() - - with patch("asyncio.sleep", new=fake_sleep), \ - patch("time.time", side_effect=times * 10), \ - patch("urllib.request.urlopen", side_effect=OSError("connection refused")): - try: - await _run_instance_probe_loop(reg) - except asyncio.CancelledError: - pass - - reg.upsert_instance.assert_called_once_with( - service="vllm", node_id="node1", gpu_id=0, - state="stopped", model="qwen", url="http://localhost:8000", - ) - - -@pytest.mark.asyncio -async def test_probe_cleans_up_start_times_for_non_starting(): - """ - An instance that is no longer in 'starting' state should not cause - upsert_instance to be called, and its key should be removed from start_times. - - We verify this indirectly: run two ticks — first with state='starting' (seeds - the key and transitions to running), second with the updated registry returning - state='running' (should not call upsert again). - """ - starting_inst = _inst(state="starting", url="http://localhost:8000") - running_inst = _inst(state="running", url="http://localhost:8000") - - tick = 0 - - # First tick: instance is starting → transitions to running - # Second tick: registry now returns running → no upsert - # Third tick: cancel - def instances_side_effect(): - if tick <= 1: - return [starting_inst] - return [running_inst] - - reg = MagicMock(spec=ServiceRegistry) - reg.all_instances.side_effect = instances_side_effect - - async def fake_sleep(_delay): - nonlocal tick - tick += 1 - if tick > 2: - raise asyncio.CancelledError() - - with patch("asyncio.sleep", new=fake_sleep), \ - patch("time.time", return_value=1000.0), \ - patch("urllib.request.urlopen", return_value=_health_resp(200)): - try: - await _run_instance_probe_loop(reg) - except asyncio.CancelledError: - pass - - # upsert should have been called exactly once (the starting→running transition) - assert reg.upsert_instance.call_count == 1 - reg.upsert_instance.assert_called_once_with( - service="vllm", node_id="node1", gpu_id=0, - state="running", model="qwen", url="http://localhost:8000", - ) - - -@pytest.mark.asyncio -async def test_probe_no_url_does_not_attempt_health_check(): - """Instance with no URL stays in starting state (no health check, no timeout yet).""" - reg = _registry(_inst(state="starting", url=None)) - - tick = 0 - - async def fake_sleep(_delay): - nonlocal tick - tick += 1 - if tick > 1: - raise asyncio.CancelledError() - - with patch("asyncio.sleep", new=fake_sleep), \ - patch("time.time", return_value=1000.0), \ - patch("urllib.request.urlopen") as mock_urlopen: - try: - await _run_instance_probe_loop(reg) - except asyncio.CancelledError: - pass - - mock_urlopen.assert_not_called() - reg.upsert_instance.assert_not_called() diff --git a/tests/test_resources/test_docuvision.py b/tests/test_resources/test_docuvision.py deleted file mode 100644 index 4b4f9f0..0000000 --- a/tests/test_resources/test_docuvision.py +++ /dev/null @@ -1,215 +0,0 @@ -# tests/test_resources/test_docuvision.py -""" -Unit tests for cf-docuvision FastAPI service (circuitforge_core/resources/docuvision/app.py). - -Covers: - - GET /health → status + model path - - POST /extract → image_b64, image_path, hint routing, metadata fields - - _parse_dolphin_output → JSON list path, table detection, plain-text fallback - - _image_from_request → missing both fields → 422; bad image_path → 404 -""" -from __future__ import annotations - -import base64 -import io -import json -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest -from fastapi.testclient import TestClient -from PIL import Image - -import circuitforge_core.resources.docuvision.app as docuvision_module -from circuitforge_core.resources.docuvision.app import ( - _parse_dolphin_output, - app, -) - - -# ── fixtures ────────────────────────────────────────────────────────────────── - -def _make_jpeg_b64(width: int = 10, height: int = 10) -> str: - """Return a base64-encoded 10x10 white JPEG.""" - img = Image.new("RGB", (width, height), color=(255, 255, 255)) - buf = io.BytesIO() - img.save(buf, format="JPEG") - return base64.b64encode(buf.getvalue()).decode() - - -@pytest.fixture(autouse=True) -def _reset_module_state(): - """Reset module-level model state between tests.""" - docuvision_module._model = None - docuvision_module._processor = None - docuvision_module._model_path = "/fake/model" - docuvision_module._device = "cpu" - yield - docuvision_module._model = None - docuvision_module._processor = None - - -@pytest.fixture -def mock_model(): - """ - Inject fake model + processor into the module so _load_model() is skipped. - - The processor returns a dict-like with 'input_ids'; the model generate() - returns a tensor-like whose decode produces a JSON string. - """ - fake_ids = MagicMock() - fake_ids.shape = [1, 5] # input_len = 5 - - fake_inputs = {"input_ids": fake_ids} - fake_inputs_obj = MagicMock() - fake_inputs_obj.__getitem__ = lambda self, k: fake_inputs[k] - fake_inputs_obj.to = lambda device: fake_inputs_obj - - fake_output = MagicMock() - fake_output.__getitem__ = lambda self, idx: MagicMock() # output_ids[0] - - fake_model = MagicMock() - fake_model.generate.return_value = fake_output - - fake_processor = MagicMock() - fake_processor.return_value = fake_inputs_obj - fake_processor.decode.return_value = json.dumps([ - {"type": "heading", "text": "Invoice", "bbox": [0.0, 0.0, 1.0, 0.1]}, - {"type": "table", "text": "row1", "html": "
row1
", - "bbox": [0.0, 0.1, 1.0, 0.5]}, - ]) - - docuvision_module._model = fake_model - docuvision_module._processor = fake_processor - return fake_model, fake_processor - - -@pytest.fixture -def client(): - return TestClient(app) - - -# ── health ──────────────────────────────────────────────────────────────────── - -def test_health_returns_ok(client): - resp = client.get("/health") - assert resp.status_code == 200 - data = resp.json() - assert data["status"] == "ok" - assert data["model"] == "/fake/model" - - -# ── _parse_dolphin_output ──────────────────────────────────────────────────── - -def test_parse_json_list_elements(): - raw = json.dumps([ - {"type": "heading", "text": "Title"}, - {"type": "paragraph", "text": "Body text"}, - ]) - elements, tables, raw_text = _parse_dolphin_output(raw) - assert len(elements) == 2 - assert elements[0].type == "heading" - assert elements[0].text == "Title" - assert elements[1].type == "paragraph" - assert raw_text == "Title\nBody text" - assert tables == [] - - -def test_parse_json_table_extracted(): - raw = json.dumps([ - {"type": "table", "text": "row", "html": "
A
", - "bbox": [0.0, 0.0, 1.0, 0.5]}, - ]) - elements, tables, raw_text = _parse_dolphin_output(raw) - assert len(tables) == 1 - assert tables[0].html == "
A
" - assert tables[0].bbox == [0.0, 0.0, 1.0, 0.5] - assert len(elements) == 1 - assert elements[0].type == "table" - - -def test_parse_plain_text_fallback(): - raw = "This is not JSON at all." - elements, tables, raw_text = _parse_dolphin_output(raw) - assert len(elements) == 1 - assert elements[0].type == "paragraph" - assert elements[0].text == raw - assert tables == [] - assert raw_text == raw - - -def test_parse_empty_string_fallback(): - elements, tables, raw_text = _parse_dolphin_output("") - assert len(elements) == 1 - assert elements[0].type == "paragraph" - assert elements[0].text == "" - - -def test_parse_json_missing_type_defaults_to_paragraph(): - raw = json.dumps([{"text": "no type field"}]) - elements, tables, _ = _parse_dolphin_output(raw) - assert elements[0].type == "paragraph" - - -# ── POST /extract ───────────────────────────────────────────────────────────── - -def test_extract_image_b64(client, mock_model): - resp = client.post("/extract", json={"image_b64": _make_jpeg_b64(), "hint": "auto"}) - assert resp.status_code == 200 - data = resp.json() - assert "elements" in data - assert "raw_text" in data - assert "tables" in data - assert data["metadata"]["hint"] == "auto" - assert data["metadata"]["model"] == "/fake/model" - assert data["metadata"]["width"] == 10 - assert data["metadata"]["height"] == 10 - - -def test_extract_hint_table_routes_correct_prompt(client, mock_model): - _, fake_processor = mock_model - resp = client.post("/extract", json={"image_b64": _make_jpeg_b64(), "hint": "table"}) - assert resp.status_code == 200 - # Verify processor was called with the table-specific prompt - call_kwargs = fake_processor.call_args - assert "table" in call_kwargs.kwargs.get("text", "") or \ - "table" in str(call_kwargs) - - -def test_extract_hint_unknown_falls_back_to_auto(client, mock_model): - """An unrecognised hint silently falls back to the 'auto' prompt.""" - resp = client.post("/extract", json={"image_b64": _make_jpeg_b64(), "hint": "nonsense"}) - assert resp.status_code == 200 - - -def test_extract_image_path(tmp_path, client, mock_model): - img_file = tmp_path / "doc.png" - Image.new("RGB", (8, 8), color=(0, 0, 0)).save(img_file) - resp = client.post("/extract", json={"image_path": str(img_file)}) - assert resp.status_code == 200 - assert resp.json()["metadata"]["width"] == 8 - - -def test_extract_image_path_not_found(client, mock_model): - resp = client.post("/extract", json={"image_path": "/nonexistent/path/img.png"}) - assert resp.status_code == 404 - - -def test_extract_no_image_raises_422(client, mock_model): - resp = client.post("/extract", json={"hint": "auto"}) - assert resp.status_code == 422 - - -def test_extract_response_includes_tables(client, mock_model): - """Verify table objects surface in response when model returns table elements.""" - resp = client.post("/extract", json={"image_b64": _make_jpeg_b64()}) - assert resp.status_code == 200 - data = resp.json() - assert len(data["tables"]) == 1 - assert "" in data["tables"][0]["html"] - - -def test_extract_device_in_metadata(client, mock_model): - resp = client.post("/extract", json={"image_b64": _make_jpeg_b64()}) - assert resp.status_code == 200 - assert "device" in resp.json()["metadata"] diff --git a/tests/test_resources/test_eviction_engine.py b/tests/test_resources/test_eviction_engine.py deleted file mode 100644 index d7051e3..0000000 --- a/tests/test_resources/test_eviction_engine.py +++ /dev/null @@ -1,67 +0,0 @@ -import asyncio -import pytest -from unittest.mock import AsyncMock, patch -from circuitforge_core.resources.coordinator.eviction_engine import EvictionEngine -from circuitforge_core.resources.coordinator.lease_manager import LeaseManager - - -@pytest.fixture -def lease_manager(): - mgr = LeaseManager() - mgr.register_gpu("heimdall", 0, 8192) - return mgr - - -@pytest.fixture -def engine(lease_manager): - return EvictionEngine(lease_manager=lease_manager, eviction_timeout_s=0.1) - - -@pytest.mark.asyncio -async def test_request_lease_grants_when_vram_available(engine, lease_manager): - lease = await engine.request_lease( - node_id="heimdall", gpu_id=0, mb=4096, - service="peregrine", priority=1, - agent_url="http://localhost:7701", - ) - assert lease is not None - assert lease.mb_granted == 4096 - - -@pytest.mark.asyncio -async def test_request_lease_evicts_and_grants(engine, lease_manager): - # Pre-fill with a low-priority lease - big_lease = await lease_manager.try_grant( - "heimdall", 0, 7000, "comfyui", priority=4 - ) - assert big_lease is not None - - # Mock the agent eviction call - with patch( - "circuitforge_core.resources.coordinator.eviction_engine.EvictionEngine._call_agent_evict", - new_callable=AsyncMock, - ) as mock_evict: - mock_evict.return_value = True - # Simulate the comfyui lease being released (as if the agent evicted it) - asyncio.get_event_loop().call_later( - 0.05, lambda: asyncio.ensure_future(lease_manager.release(big_lease.lease_id)) - ) - lease = await engine.request_lease( - node_id="heimdall", gpu_id=0, mb=4096, - service="peregrine", priority=1, - agent_url="http://localhost:7701", - ) - assert lease is not None - assert lease.holder_service == "peregrine" - - -@pytest.mark.asyncio -async def test_request_lease_returns_none_when_no_eviction_candidates(engine): - await engine.lease_manager.try_grant("heimdall", 0, 6000, "vllm", priority=1) - # Requesting 4GB but no lower-priority leases exist - lease = await engine.request_lease( - node_id="heimdall", gpu_id=0, mb=4096, - service="kiwi", priority=2, - agent_url="http://localhost:7701", - ) - assert lease is None diff --git a/tests/test_resources/test_eviction_executor.py b/tests/test_resources/test_eviction_executor.py deleted file mode 100644 index d718732..0000000 --- a/tests/test_resources/test_eviction_executor.py +++ /dev/null @@ -1,43 +0,0 @@ -import signal -from unittest.mock import patch, call -import pytest -from circuitforge_core.resources.agent.eviction_executor import EvictionExecutor, EvictionResult - - -def test_evict_by_pid_sends_sigterm_then_sigkill(): - executor = EvictionExecutor(grace_period_s=0.01) - # pid_exists always True → grace period expires → SIGKILL fires - with patch("os.kill") as mock_kill, \ - patch("circuitforge_core.resources.agent.eviction_executor.psutil") as mock_psutil: - mock_psutil.pid_exists.return_value = True - result = executor.evict_pid(pid=1234, grace_period_s=0.01) - - assert result.success is True - calls = mock_kill.call_args_list - assert call(1234, signal.SIGTERM) in calls - assert call(1234, signal.SIGKILL) in calls - - -def test_evict_pid_succeeds_on_sigterm_alone(): - executor = EvictionExecutor(grace_period_s=0.1) - with patch("os.kill"), \ - patch("circuitforge_core.resources.agent.eviction_executor.psutil") as mock_psutil: - mock_psutil.pid_exists.side_effect = [True, False] # gone after SIGTERM - result = executor.evict_pid(pid=5678, grace_period_s=0.01) - assert result.success is True - assert result.method == "sigterm" - - -def test_evict_pid_not_found_returns_failure(): - executor = EvictionExecutor() - with patch("circuitforge_core.resources.agent.eviction_executor.psutil") as mock_psutil: - mock_psutil.pid_exists.return_value = False - result = executor.evict_pid(pid=9999) - assert result.success is False - assert "not found" in result.message.lower() - - -def test_eviction_result_is_immutable(): - result = EvictionResult(success=True, method="sigterm", message="ok") - with pytest.raises((AttributeError, TypeError)): - result.success = False # type: ignore diff --git a/tests/test_resources/test_gpu_monitor.py b/tests/test_resources/test_gpu_monitor.py deleted file mode 100644 index 617f592..0000000 --- a/tests/test_resources/test_gpu_monitor.py +++ /dev/null @@ -1,60 +0,0 @@ -from unittest.mock import patch -from circuitforge_core.resources.agent.gpu_monitor import GpuMonitor - - -SAMPLE_NVIDIA_SMI_OUTPUT = ( - "0, Quadro RTX 4000, 8192, 6843, 1349\n" - "1, Quadro RTX 4000, 8192, 721, 7471\n" -) - - -def test_parse_returns_list_of_gpu_info(): - monitor = GpuMonitor() - with patch("circuitforge_core.resources.agent.gpu_monitor.subprocess.run") as mock_run: - mock_run.return_value.returncode = 0 - mock_run.return_value.stdout = SAMPLE_NVIDIA_SMI_OUTPUT - gpus = monitor.poll() - assert len(gpus) == 2 - assert gpus[0].gpu_id == 0 - assert gpus[0].name == "Quadro RTX 4000" - assert gpus[0].vram_total_mb == 8192 - assert gpus[0].vram_used_mb == 6843 - assert gpus[0].vram_free_mb == 1349 - - -def test_parse_second_gpu(): - monitor = GpuMonitor() - with patch("circuitforge_core.resources.agent.gpu_monitor.subprocess.run") as mock_run: - mock_run.return_value.returncode = 0 - mock_run.return_value.stdout = SAMPLE_NVIDIA_SMI_OUTPUT - gpus = monitor.poll() - assert gpus[1].gpu_id == 1 - assert gpus[1].vram_used_mb == 721 - assert gpus[1].vram_free_mb == 7471 - - -def test_poll_returns_empty_list_when_nvidia_smi_unavailable(): - monitor = GpuMonitor() - with patch("circuitforge_core.resources.agent.gpu_monitor.subprocess.run", side_effect=FileNotFoundError): - gpus = monitor.poll() - assert gpus == [] - - -def test_poll_returns_empty_list_on_nonzero_exit(): - monitor = GpuMonitor() - with patch("circuitforge_core.resources.agent.gpu_monitor.subprocess.run") as mock_run: - mock_run.return_value.returncode = 1 - mock_run.return_value.stdout = "" - gpus = monitor.poll() - assert gpus == [] - - -def test_poll_skips_malformed_lines(): - monitor = GpuMonitor() - malformed = "0, RTX 4000, 8192, not_a_number, 1024\n1, RTX 4000, 8192, 512, 7680\n" - with patch("circuitforge_core.resources.agent.gpu_monitor.subprocess.run") as mock_run: - mock_run.return_value.returncode = 0 - mock_run.return_value.stdout = malformed - gpus = monitor.poll() - assert len(gpus) == 1 - assert gpus[0].gpu_id == 1 diff --git a/tests/test_resources/test_integration.py b/tests/test_resources/test_integration.py deleted file mode 100644 index 8fa94ad..0000000 --- a/tests/test_resources/test_integration.py +++ /dev/null @@ -1,221 +0,0 @@ -"""Integration test: full lease → eviction → re-grant cycle. - -Runs coordinator in-process (no subprocesses, no real nvidia-smi). -Uses TestClient for HTTP, mocks AgentSupervisor to return fixed node state. -""" -import pytest -from unittest.mock import MagicMock -from fastapi.testclient import TestClient - -from circuitforge_core.resources.coordinator.lease_manager import LeaseManager -from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry -from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor -from circuitforge_core.resources.coordinator.app import create_coordinator_app -from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry -from circuitforge_core.resources.models import GpuInfo, NodeInfo - - -@pytest.fixture -def system(): - """Create an in-process coordinator system with 8GB GPU and mock supervisor.""" - lease_manager = LeaseManager() - lease_manager.register_gpu("local", 0, 8192) - - mock_supervisor = MagicMock(spec=AgentSupervisor) - mock_supervisor.all_nodes.return_value = [ - NodeInfo( - node_id="local", - agent_url="http://localhost:7701", - gpus=[GpuInfo( - gpu_id=0, - name="RTX 4000", - vram_total_mb=8192, - vram_used_mb=0, - vram_free_mb=8192, - )], - last_heartbeat=0.0, - ) - ] - mock_supervisor.get_node_info.return_value = NodeInfo( - node_id="local", - agent_url="http://localhost:7701", - gpus=[], - last_heartbeat=0.0, - ) - - profile_registry = ProfileRegistry() - app = create_coordinator_app( - lease_manager=lease_manager, - profile_registry=profile_registry, - agent_supervisor=mock_supervisor, - service_registry=ServiceRegistry(), - ) - client = TestClient(app) - return client, lease_manager - - -def test_full_lease_cycle(system): - """Test: grant, verify, release, verify gone.""" - client, _ = system - - # Grant a lease - resp = client.post("/api/leases", json={ - "node_id": "local", - "gpu_id": 0, - "mb": 4096, - "service": "peregrine", - "priority": 1, - }) - assert resp.status_code == 200 - lease_data = resp.json()["lease"] - lease_id = lease_data["lease_id"] - assert lease_data["mb_granted"] == 4096 - assert lease_data["holder_service"] == "peregrine" - - # Verify it appears in active leases - resp = client.get("/api/leases") - assert resp.status_code == 200 - leases = resp.json()["leases"] - assert any(l["lease_id"] == lease_id for l in leases) - - # Release it - resp = client.delete(f"/api/leases/{lease_id}") - assert resp.status_code == 200 - assert resp.json()["released"] is True - - # Verify it's gone - resp = client.get("/api/leases") - assert resp.status_code == 200 - leases = resp.json()["leases"] - assert not any(l["lease_id"] == lease_id for l in leases) - - -def test_vram_exhaustion_returns_503(system): - """Test: fill GPU, then request with no eviction candidates returns 503.""" - client, _ = system - - # Fill GPU 0 with high-priority lease - resp = client.post("/api/leases", json={ - "node_id": "local", - "gpu_id": 0, - "mb": 8000, - "service": "vllm", - "priority": 1, - }) - assert resp.status_code == 200 - - # Try to get more VRAM with same priority (no eviction candidates) - resp = client.post("/api/leases", json={ - "node_id": "local", - "gpu_id": 0, - "mb": 2000, - "service": "kiwi", - "priority": 1, - }) - assert resp.status_code == 503 - assert "Insufficient VRAM" in resp.json()["detail"] - - -def test_auto_detect_profile_for_8gb(): - """Test: ProfileRegistry auto-detects single-gpu-8gb for 8GB GPU.""" - registry = ProfileRegistry() - gpu = GpuInfo( - gpu_id=0, - name="RTX 4000", - vram_total_mb=8192, - vram_used_mb=0, - vram_free_mb=8192, - ) - profile = registry.auto_detect([gpu]) - assert profile.name == "single-gpu-8gb" - # Verify profile has services configured - assert hasattr(profile, "services") - - -def test_node_endpoint_shows_nodes(system): - """Test: GET /api/nodes returns the mocked node.""" - client, _ = system - resp = client.get("/api/nodes") - assert resp.status_code == 200 - nodes = resp.json()["nodes"] - assert len(nodes) == 1 - assert nodes[0]["node_id"] == "local" - assert nodes[0]["agent_url"] == "http://localhost:7701" - assert len(nodes[0]["gpus"]) == 1 - assert nodes[0]["gpus"][0]["name"] == "RTX 4000" - - -def test_profiles_endpoint_returns_public_profiles(system): - """Test: GET /api/profiles returns standard public profiles.""" - client, _ = system - resp = client.get("/api/profiles") - assert resp.status_code == 200 - profiles = resp.json()["profiles"] - names = [p["name"] for p in profiles] - # Verify common public profiles are present - assert "single-gpu-8gb" in names - assert "single-gpu-6gb" in names - assert "single-gpu-2gb" in names - - -def test_multiple_leases_tracked_independently(system): - """Test: multiple active leases are tracked correctly.""" - client, _ = system - - # Grant lease 1 - resp1 = client.post("/api/leases", json={ - "node_id": "local", - "gpu_id": 0, - "mb": 2048, - "service": "peregrine", - "priority": 2, - }) - assert resp1.status_code == 200 - lease1_id = resp1.json()["lease"]["lease_id"] - - # Grant lease 2 - resp2 = client.post("/api/leases", json={ - "node_id": "local", - "gpu_id": 0, - "mb": 2048, - "service": "kiwi", - "priority": 2, - }) - assert resp2.status_code == 200 - lease2_id = resp2.json()["lease"]["lease_id"] - - # Both should be in active leases - resp = client.get("/api/leases") - leases = resp.json()["leases"] - lease_ids = [l["lease_id"] for l in leases] - assert lease1_id in lease_ids - assert lease2_id in lease_ids - assert len(leases) == 2 - - # Release lease 1 - resp = client.delete(f"/api/leases/{lease1_id}") - assert resp.status_code == 200 - - # Only lease 2 should remain - resp = client.get("/api/leases") - leases = resp.json()["leases"] - lease_ids = [l["lease_id"] for l in leases] - assert lease1_id not in lease_ids - assert lease2_id in lease_ids - assert len(leases) == 1 - - -def test_delete_nonexistent_lease_returns_404(system): - """Test: deleting a nonexistent lease returns 404.""" - client, _ = system - resp = client.delete("/api/leases/nonexistent-lease-id") - assert resp.status_code == 404 - assert "not found" in resp.json()["detail"] - - -def test_health_endpoint_returns_ok(system): - """Test: GET /api/health returns status ok.""" - client, _ = system - resp = client.get("/api/health") - assert resp.status_code == 200 - assert resp.json()["status"] == "ok" diff --git a/tests/test_resources/test_lease_manager.py b/tests/test_resources/test_lease_manager.py deleted file mode 100644 index cede687..0000000 --- a/tests/test_resources/test_lease_manager.py +++ /dev/null @@ -1,85 +0,0 @@ -import pytest -from circuitforge_core.resources.coordinator.lease_manager import LeaseManager - - -@pytest.fixture -def mgr(): - m = LeaseManager() - m.register_gpu(node_id="heimdall", gpu_id=0, total_mb=8192) - return m - - -@pytest.mark.asyncio -async def test_grant_succeeds_when_vram_available(mgr): - lease = await mgr.try_grant( - node_id="heimdall", gpu_id=0, mb=4096, - service="peregrine", priority=1 - ) - assert lease is not None - assert lease.mb_granted == 4096 - assert lease.node_id == "heimdall" - assert lease.gpu_id == 0 - - -@pytest.mark.asyncio -async def test_grant_fails_when_vram_insufficient(mgr): - await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=7000, - service="vllm", priority=1) - lease = await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=2000, - service="kiwi", priority=2) - assert lease is None - - -@pytest.mark.asyncio -async def test_release_frees_vram(mgr): - lease = await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=7000, - service="vllm", priority=1) - assert lease is not None - released = await mgr.release(lease.lease_id) - assert released is True - lease2 = await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=7000, - service="comfyui", priority=4) - assert lease2 is not None - - -@pytest.mark.asyncio -async def test_release_unknown_lease_returns_false(mgr): - result = await mgr.release("nonexistent-id") - assert result is False - - -@pytest.mark.asyncio -async def test_get_eviction_candidates_returns_lower_priority_leases(mgr): - await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=3000, - service="comfyui", priority=4) - await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=2000, - service="ollama", priority=1) - candidates = mgr.get_eviction_candidates( - node_id="heimdall", gpu_id=0, - needed_mb=3000, requester_priority=2 - ) - assert len(candidates) == 1 - assert candidates[0].holder_service == "comfyui" - - -@pytest.mark.asyncio -async def test_list_leases_for_gpu(mgr): - await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=1024, - service="peregrine", priority=1) - await mgr.try_grant(node_id="heimdall", gpu_id=0, mb=512, - service="kiwi", priority=2) - leases = mgr.list_leases(node_id="heimdall", gpu_id=0) - assert len(leases) == 2 - - -def test_register_gpu_sets_total(mgr): - assert mgr.gpu_total_mb("heimdall", 0) == 8192 - - -@pytest.mark.asyncio -async def test_used_mb_tracks_grants(): - mgr = LeaseManager() - mgr.register_gpu("heimdall", 0, 8192) - await mgr.try_grant("heimdall", 0, 3000, "a", 1) - await mgr.try_grant("heimdall", 0, 2000, "b", 2) - assert mgr.used_mb("heimdall", 0) == 5000 diff --git a/tests/test_resources/test_models.py b/tests/test_resources/test_models.py deleted file mode 100644 index c8e5ac4..0000000 --- a/tests/test_resources/test_models.py +++ /dev/null @@ -1,47 +0,0 @@ -import time -import pytest -from circuitforge_core.resources.models import VRAMLease, GpuInfo, NodeInfo - - -def test_vram_lease_create_assigns_unique_ids(): - lease_a = VRAMLease.create(gpu_id=0, node_id="heimdall", mb=4096, - service="peregrine", priority=1) - lease_b = VRAMLease.create(gpu_id=0, node_id="heimdall", mb=4096, - service="peregrine", priority=1) - assert lease_a.lease_id != lease_b.lease_id - - -def test_vram_lease_create_with_ttl_sets_expiry(): - before = time.time() - lease = VRAMLease.create(gpu_id=0, node_id="heimdall", mb=2048, - service="kiwi", priority=2, ttl_s=60.0) - after = time.time() - assert before + 60.0 <= lease.expires_at <= after + 60.0 - - -def test_vram_lease_create_no_ttl_has_zero_expiry(): - lease = VRAMLease.create(gpu_id=0, node_id="heimdall", mb=1024, - service="snipe", priority=2) - assert lease.expires_at == 0.0 - - -def test_vram_lease_is_immutable(): - lease = VRAMLease.create(gpu_id=0, node_id="heimdall", mb=1024, - service="snipe", priority=2) - with pytest.raises((AttributeError, TypeError)): - lease.mb_granted = 999 # type: ignore - - -def test_gpu_info_fields(): - info = GpuInfo(gpu_id=0, name="RTX 4000", vram_total_mb=8192, - vram_used_mb=2048, vram_free_mb=6144) - assert info.vram_free_mb == 6144 - - -def test_node_info_fields(): - gpu = GpuInfo(gpu_id=0, name="RTX 4000", vram_total_mb=8192, - vram_used_mb=0, vram_free_mb=8192) - node = NodeInfo(node_id="heimdall", agent_url="http://localhost:7701", - gpus=[gpu], last_heartbeat=time.time()) - assert node.node_id == "heimdall" - assert len(node.gpus) == 1 diff --git a/tests/test_resources/test_node_selector.py b/tests/test_resources/test_node_selector.py deleted file mode 100644 index 50b500e..0000000 --- a/tests/test_resources/test_node_selector.py +++ /dev/null @@ -1,82 +0,0 @@ -import pytest -from circuitforge_core.resources.coordinator.node_selector import select_node -from circuitforge_core.resources.coordinator.agent_supervisor import AgentRecord -from circuitforge_core.resources.models import GpuInfo -from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry - - -def _make_agent(node_id: str, free_mb: int, online: bool = True) -> AgentRecord: - r = AgentRecord(node_id=node_id, agent_url=f"http://{node_id}:7701") - r.gpus = [GpuInfo(gpu_id=0, name="RTX", vram_total_mb=8192, - vram_used_mb=8192 - free_mb, vram_free_mb=free_mb)] - r.online = online - return r - - -def test_selects_node_with_most_free_vram(): - agents = { - "a": _make_agent("a", free_mb=2000), - "b": _make_agent("b", free_mb=6000), - } - registry = ProfileRegistry() - result = select_node(agents, "vllm", registry, resident_keys=set()) - assert result == ("b", 0) - - -def test_prefers_warm_node_even_with_less_free_vram(): - agents = { - "a": _make_agent("a", free_mb=2000), - "b": _make_agent("b", free_mb=6000), - } - registry = ProfileRegistry() - result = select_node(agents, "vllm", registry, resident_keys={"a:vllm"}) - assert result == ("a", 0) - - -def test_excludes_offline_nodes(): - agents = { - "a": _make_agent("a", free_mb=8000, online=False), - "b": _make_agent("b", free_mb=2000, online=True), - } - registry = ProfileRegistry() - result = select_node(agents, "vllm", registry, resident_keys=set()) - assert result == ("b", 0) - - -def test_returns_none_when_no_node_has_profile_for_service(): - agents = {"a": _make_agent("a", free_mb=8000)} - registry = ProfileRegistry() - result = select_node(agents, "cf-nonexistent-service", registry, resident_keys=set()) - assert result is None - - -def test_returns_none_when_no_agents(): - registry = ProfileRegistry() - result = select_node({}, "vllm", registry, resident_keys=set()) - assert result is None - - -def test_prefers_node_that_fully_fits_service_over_one_that_does_not(): - """can_fit requires free_mb >= service max_mb (full ceiling, not half). - 9500 MB guarantees above all profile ceilings (max is 9000); 1000 MB is below all. - """ - agents = { - "a": _make_agent("a", free_mb=1000), - "b": _make_agent("b", free_mb=9500), - } - registry = ProfileRegistry() - result = select_node(agents, "vllm", registry, resident_keys=set()) - # "b" is the only node in the preferred (can_fit) pool - assert result == ("b", 0) - - -def test_falls_back_to_best_effort_when_no_node_fully_fits(): - """When nothing can_fit, select_node returns the best-VRAM node as fallback.""" - agents = { - "a": _make_agent("a", free_mb=1000), - "b": _make_agent("b", free_mb=2000), - } - registry = ProfileRegistry() - # Neither has enough free VRAM; fallback picks highest effective_free_mb - result = select_node(agents, "vllm", registry, resident_keys=set()) - assert result == ("b", 0) diff --git a/tests/test_resources/test_node_store.py b/tests/test_resources/test_node_store.py deleted file mode 100644 index 91b6e0c..0000000 --- a/tests/test_resources/test_node_store.py +++ /dev/null @@ -1,87 +0,0 @@ -# tests/test_resources/test_node_store.py -"""Unit tests for NodeStore — SQLite persistence layer for known agent nodes.""" -from __future__ import annotations - -import time -from pathlib import Path - -import pytest - -from circuitforge_core.resources.coordinator.node_store import NodeStore - - -@pytest.fixture -def store(tmp_path: Path) -> NodeStore: - return NodeStore(db_path=tmp_path / "test-nodes.db") - - -def test_upsert_and_all(store: NodeStore) -> None: - store.upsert("heimdall", "http://127.0.0.1:7701") - rows = store.all() - assert len(rows) == 1 - assert rows[0] == ("heimdall", "http://127.0.0.1:7701") - - -def test_upsert_updates_url(store: NodeStore) -> None: - store.upsert("navi", "http://10.1.10.10:7701") - store.upsert("navi", "http://10.1.10.10:7702") - rows = store.all() - assert len(rows) == 1 - assert rows[0][1] == "http://10.1.10.10:7702" - - -def test_multiple_nodes(store: NodeStore) -> None: - store.upsert("heimdall", "http://127.0.0.1:7701") - store.upsert("navi", "http://10.1.10.10:7701") - store.upsert("strahl", "http://10.1.10.20:7701") - assert len(store.all()) == 3 - - -def test_remove(store: NodeStore) -> None: - store.upsert("heimdall", "http://127.0.0.1:7701") - store.upsert("navi", "http://10.1.10.10:7701") - store.remove("navi") - ids = [r[0] for r in store.all()] - assert "navi" not in ids - assert "heimdall" in ids - - -def test_prune_stale_removes_old_entries(store: NodeStore) -> None: - # Insert a node with a last_seen in the distant past - store._conn.execute( - "INSERT INTO known_nodes (node_id, agent_url, last_seen) VALUES (?, ?, ?)", - ("ghost", "http://dead:7701", time.time() - 40 * 86400), - ) - store._conn.commit() - store.upsert("live", "http://live:7701") - - removed = store.prune_stale(max_age_days=30) - assert removed == 1 - ids = [r[0] for r in store.all()] - assert "ghost" not in ids - assert "live" in ids - - -def test_prune_stale_keeps_recent(store: NodeStore) -> None: - store.upsert("recent", "http://recent:7701") - removed = store.prune_stale(max_age_days=30) - assert removed == 0 - assert len(store.all()) == 1 - - -def test_all_empty(store: NodeStore) -> None: - assert store.all() == [] - - -def test_db_persists_across_instances(tmp_path: Path) -> None: - """Data written by one NodeStore instance is visible to a new one on the same file.""" - db = tmp_path / "shared.db" - s1 = NodeStore(db_path=db) - s1.upsert("navi", "http://10.1.10.10:7701") - s1.close() - - s2 = NodeStore(db_path=db) - rows = s2.all() - assert len(rows) == 1 - assert rows[0][0] == "navi" - s2.close() diff --git a/tests/test_resources/test_ollama_adopt.py b/tests/test_resources/test_ollama_adopt.py deleted file mode 100644 index ceaae12..0000000 --- a/tests/test_resources/test_ollama_adopt.py +++ /dev/null @@ -1,176 +0,0 @@ -# tests/test_resources/test_ollama_adopt.py -""" -Tests for the Ollama adopt-if-running path: - - ProcessSpec: adopt and health_path fields parsed from YAML - - ServiceManager.start(): adopt path claims running service; falls through if not running - - ServiceManager.is_running(): adopt path uses health probe, not proc table - - ServiceInstance.health_path persists through upsert_instance - - Probe loop uses inst.health_path instead of hardcoded /health -""" -from __future__ import annotations - -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest - -from circuitforge_core.resources.agent.service_manager import ServiceManager -from circuitforge_core.resources.coordinator.service_registry import ServiceRegistry -from circuitforge_core.resources.profiles.schema import GpuProfile, ProcessSpec, ServiceProfile, load_profile - - -# ── ProcessSpec schema ──────────────────────────────────────────────────────── - -def test_process_spec_defaults(): - spec = ProcessSpec(exec_path="/usr/local/bin/ollama") - assert spec.adopt is False - assert spec.health_path == "/health" - - -def test_process_spec_adopt_fields(): - spec = ProcessSpec( - exec_path="/usr/local/bin/ollama", - adopt=True, - health_path="/api/tags", - port=11434, - host_port=11434, - ) - assert spec.adopt is True - assert spec.health_path == "/api/tags" - - -def test_profile_yaml_parses_adopt(tmp_path: Path): - yaml_text = """\ -schema_version: 1 -name: test -services: - ollama: - max_mb: 4096 - priority: 1 - managed: - type: process - adopt: true - exec_path: /usr/local/bin/ollama - args_template: serve - port: 11434 - host_port: 11434 - health_path: /api/tags -""" - p = tmp_path / "profile.yaml" - p.write_text(yaml_text) - profile = load_profile(p) - spec = profile.services["ollama"].managed - assert isinstance(spec, ProcessSpec) - assert spec.adopt is True - assert spec.health_path == "/api/tags" - assert spec.host_port == 11434 - - -# ── ServiceManager adopt path ───────────────────────────────────────────────── - -def _make_manager_with_ollama(advertise_host: str = "127.0.0.1") -> ServiceManager: - profile = GpuProfile( - schema_version=1, - name="test", - services={ - "ollama": ServiceProfile( - max_mb=4096, - priority=1, - managed=ProcessSpec( - exec_path="/usr/local/bin/ollama", - args_template="serve", - port=11434, - host_port=11434, - adopt=True, - health_path="/api/tags", - ), - ) - }, - ) - return ServiceManager(node_id="heimdall", profile=profile, advertise_host=advertise_host) - - -def test_start_adopt_claims_running_service(): - """When Ollama is already healthy, start() returns its URL without spawning a process.""" - mgr = _make_manager_with_ollama() - with patch.object(mgr, "_probe_health", return_value=True) as mock_probe: - url = mgr.start("ollama", gpu_id=0, params={}) - assert url == "http://127.0.0.1:11434" - mock_probe.assert_called_once_with(11434, "/api/tags") - assert "ollama" not in mgr._procs # no subprocess spawned - - -def test_start_adopt_spawns_when_not_running(): - """When Ollama is not yet running, start() spawns it normally.""" - mgr = _make_manager_with_ollama() - mock_proc = MagicMock() - mock_proc.poll.return_value = None - - with patch.object(mgr, "_probe_health", return_value=False), \ - patch("subprocess.Popen", return_value=mock_proc) as mock_popen: - url = mgr.start("ollama", gpu_id=0, params={}) - - assert url == "http://127.0.0.1:11434" - mock_popen.assert_called_once() - assert "ollama" in mgr._procs - - -def test_is_running_adopt_uses_health_probe(): - """is_running() for adopt=True services checks the health endpoint, not the proc table.""" - mgr = _make_manager_with_ollama() - with patch.object(mgr, "_probe_health", return_value=True): - assert mgr.is_running("ollama") is True - with patch.object(mgr, "_probe_health", return_value=False): - assert mgr.is_running("ollama") is False - - -def test_probe_health_returns_true_on_200(): - mgr = _make_manager_with_ollama() - mock_resp = MagicMock() - mock_resp.status = 200 - mock_resp.__enter__ = lambda s: mock_resp - mock_resp.__exit__ = MagicMock(return_value=False) - - with patch("urllib.request.urlopen", return_value=mock_resp): - assert mgr._probe_health(11434, "/api/tags") is True - - -def test_probe_health_returns_false_on_connection_error(): - mgr = _make_manager_with_ollama() - with patch("urllib.request.urlopen", side_effect=OSError("refused")): - assert mgr._probe_health(11434, "/api/tags") is False - - -# ── ServiceRegistry health_path ─────────────────────────────────────────────── - -def test_upsert_instance_stores_health_path(): - reg = ServiceRegistry() - inst = reg.upsert_instance( - service="ollama", node_id="heimdall", gpu_id=0, - state="running", model=None, url="http://127.0.0.1:11434", - health_path="/api/tags", - ) - assert inst.health_path == "/api/tags" - - -def test_upsert_instance_default_health_path(): - reg = ServiceRegistry() - inst = reg.upsert_instance( - service="vllm", node_id="heimdall", gpu_id=0, - state="starting", model="qwen", url="http://127.0.0.1:8000", - ) - assert inst.health_path == "/health" - - -def test_all_gpu_profiles_have_ollama_managed_block(): - """Sanity check: all public GPU profiles now have a managed block for ollama.""" - from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry - registry = ProfileRegistry() - for profile in registry.list_public(): - svc = profile.services.get("ollama") - if svc is None: - continue # profile may not define ollama - assert svc.managed is not None, f"{profile.name}: ollama missing managed block" - assert isinstance(svc.managed, ProcessSpec) - assert svc.managed.adopt is True, f"{profile.name}: ollama adopt should be True" - assert svc.managed.health_path == "/api/tags", f"{profile.name}: wrong health_path" diff --git a/tests/test_resources/test_profile_registry.py b/tests/test_resources/test_profile_registry.py deleted file mode 100644 index e55bcfa..0000000 --- a/tests/test_resources/test_profile_registry.py +++ /dev/null @@ -1,101 +0,0 @@ -# tests/test_resources/test_profile_registry.py -import pytest -from unittest.mock import MagicMock - -from circuitforge_core.resources.profiles.schema import ( - GpuProfile, ServiceProfile, load_profile -) -from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry - - -def test_load_8gb_profile(tmp_path): - yaml_content = """ -schema_version: 1 -name: single-gpu-8gb -vram_total_mb: 8192 -eviction_timeout_s: 10.0 -services: - vllm: - max_mb: 5120 - priority: 1 - cf-vision: - max_mb: 2048 - priority: 2 - shared: true - max_concurrent: 3 -""" - profile_file = tmp_path / "test.yaml" - profile_file.write_text(yaml_content) - profile = load_profile(profile_file) - - assert profile.name == "single-gpu-8gb" - assert profile.schema_version == 1 - assert profile.vram_total_mb == 8192 - assert profile.eviction_timeout_s == 10.0 - assert "vllm" in profile.services - assert profile.services["vllm"].max_mb == 5120 - assert profile.services["vllm"].priority == 1 - assert profile.services["cf-vision"].shared is True - assert profile.services["cf-vision"].max_concurrent == 3 - - -def test_load_profile_rejects_wrong_schema_version(tmp_path): - yaml_content = "schema_version: 99\nname: future\n" - profile_file = tmp_path / "future.yaml" - profile_file.write_text(yaml_content) - with pytest.raises(ValueError, match="schema_version"): - load_profile(profile_file) - - -def test_service_profile_defaults(): - svc = ServiceProfile(max_mb=1024, priority=2) - assert svc.shared is False - assert svc.max_concurrent == 1 - assert svc.always_on is False - assert svc.backend is None - assert svc.consumers == [] - - -def test_profile_registry_loads_public_profiles(): - registry = ProfileRegistry() - profiles = registry.list_public() - names = [p.name for p in profiles] - assert "single-gpu-8gb" in names - assert "single-gpu-6gb" in names - assert "single-gpu-2gb" in names - - -def test_profile_registry_auto_detect_selects_8gb(): - registry = ProfileRegistry() - mock_gpus = [ - MagicMock(vram_total_mb=8192), - ] - profile = registry.auto_detect(mock_gpus) - assert profile.name == "single-gpu-8gb" - - -def test_profile_registry_auto_detect_selects_6gb(): - registry = ProfileRegistry() - mock_gpus = [MagicMock(vram_total_mb=6144)] - profile = registry.auto_detect(mock_gpus) - assert profile.name == "single-gpu-6gb" - - -def test_profile_registry_auto_detect_selects_2gb(): - registry = ProfileRegistry() - mock_gpus = [MagicMock(vram_total_mb=2048)] - profile = registry.auto_detect(mock_gpus) - assert profile.name == "single-gpu-2gb" - - -def test_profile_registry_load_from_path(tmp_path): - yaml_content = ( - "schema_version: 1\nname: custom\n" - "vram_total_mb: 12288\neviction_timeout_s: 5.0\n" - ) - p = tmp_path / "custom.yaml" - p.write_text(yaml_content) - registry = ProfileRegistry() - profile = registry.load(p) - assert profile.name == "custom" - assert profile.vram_total_mb == 12288 diff --git a/tests/test_resources/test_service_manager.py b/tests/test_resources/test_service_manager.py deleted file mode 100644 index a5c26eb..0000000 --- a/tests/test_resources/test_service_manager.py +++ /dev/null @@ -1,194 +0,0 @@ -"""Tests for ServiceManager ProcessSpec support.""" -from __future__ import annotations - -from unittest.mock import MagicMock, patch - -import pytest - -from circuitforge_core.resources.agent.service_manager import ServiceManager -from circuitforge_core.resources.profiles.schema import ( - GpuProfile, - ProcessSpec, - ServiceProfile, -) - - -def _make_profile(args_template: str = "--port {port} --gpu-id {gpu_id}") -> GpuProfile: - return GpuProfile( - schema_version=1, - name="test", - vram_total_mb=8192, - services={ - "vllm": ServiceProfile( - max_mb=5120, - priority=1, - managed=ProcessSpec( - exec_path="/usr/bin/python", - args_template=args_template, - port=8000, - host_port=8000, - cwd="/tmp", - ), - ), - "no_managed": ServiceProfile(max_mb=1024, priority=2), - }, - ) - - -@pytest.fixture -def manager(): - return ServiceManager(node_id="test-node", profile=_make_profile(), advertise_host="127.0.0.1") - - -# --------------------------------------------------------------------------- -# is_running -# --------------------------------------------------------------------------- - - -def test_is_running_returns_false_when_no_proc(manager): - assert manager.is_running("vllm") is False - - -def test_is_running_returns_false_when_proc_exited(manager): - mock_proc = MagicMock() - mock_proc.poll.return_value = 1 # exited - manager._procs["vllm"] = mock_proc - assert manager.is_running("vllm") is False - - -def test_is_running_returns_false_when_port_not_listening(manager): - mock_proc = MagicMock() - mock_proc.poll.return_value = None # still running - manager._procs["vllm"] = mock_proc - - with patch("socket.create_connection", side_effect=OSError("refused")): - assert manager.is_running("vllm") is False - - -def test_is_running_returns_true_when_proc_alive_and_port_open(manager): - mock_proc = MagicMock() - mock_proc.poll.return_value = None # still running - manager._procs["vllm"] = mock_proc - - mock_socket = MagicMock() - mock_socket.__enter__ = MagicMock(return_value=mock_socket) - mock_socket.__exit__ = MagicMock(return_value=False) - with patch("socket.create_connection", return_value=mock_socket): - assert manager.is_running("vllm") is True - - -def test_is_running_unknown_service_returns_false(manager): - assert manager.is_running("nonexistent") is False - - -def test_is_running_no_managed_spec_returns_false(manager): - assert manager.is_running("no_managed") is False - - -# --------------------------------------------------------------------------- -# start -# --------------------------------------------------------------------------- - - -def test_start_launches_process_and_returns_url(manager): - with patch("subprocess.Popen") as mock_popen, \ - patch.object(manager, "is_running", return_value=False): - mock_popen.return_value = MagicMock() - url = manager.start("vllm", gpu_id=0, params={"model": "mymodel"}) - - assert url == "http://127.0.0.1:8000" - mock_popen.assert_called_once() - call_args = mock_popen.call_args - cmd = call_args[0][0] - assert cmd[0] == "/usr/bin/python" - assert "--port" in cmd - assert "8000" in cmd - assert "--gpu-id" in cmd - assert "0" in cmd - - -def test_start_returns_url_immediately_when_already_running(manager): - with patch.object(manager, "is_running", return_value=True): - with patch("subprocess.Popen") as mock_popen: - url = manager.start("vllm", gpu_id=0, params={}) - - assert url == "http://127.0.0.1:8000" - mock_popen.assert_not_called() - - -def test_start_raises_for_unknown_service(manager): - with pytest.raises(ValueError, match="not in profile"): - manager.start("nonexistent", gpu_id=0, params={}) - - -def test_start_stores_proc_in_procs(manager): - mock_proc = MagicMock() - with patch("subprocess.Popen", return_value=mock_proc), \ - patch.object(manager, "is_running", return_value=False): - manager.start("vllm", gpu_id=0, params={}) - - assert manager._procs["vllm"] is mock_proc - - -# --------------------------------------------------------------------------- -# stop -# --------------------------------------------------------------------------- - - -def test_stop_terminates_running_process(manager): - mock_proc = MagicMock() - manager._procs["vllm"] = mock_proc - - result = manager.stop("vllm") - - assert result is True - mock_proc.terminate.assert_called_once() - mock_proc.wait.assert_called_once() - assert "vllm" not in manager._procs - - -def test_stop_kills_process_that_wont_terminate(manager): - mock_proc = MagicMock() - mock_proc.wait.side_effect = Exception("timeout") - manager._procs["vllm"] = mock_proc - - result = manager.stop("vllm") - - assert result is True - mock_proc.kill.assert_called_once() - - -def test_stop_returns_true_when_no_proc_tracked(manager): - # No proc in _procs — still returns True (idempotent stop) - result = manager.stop("vllm") - assert result is True - - -def test_stop_returns_false_for_unknown_service(manager): - result = manager.stop("nonexistent") - assert result is False - - -# --------------------------------------------------------------------------- -# list_running / get_url -# --------------------------------------------------------------------------- - - -def test_list_running_returns_running_services(manager): - def _is_running(svc: str) -> bool: - return svc == "vllm" - - with patch.object(manager, "is_running", side_effect=_is_running): - running = manager.list_running() - - assert running == ["vllm"] - - -def test_get_url_returns_none_when_not_running(manager): - with patch.object(manager, "is_running", return_value=False): - assert manager.get_url("vllm") is None - - -def test_get_url_returns_url_when_running(manager): - with patch.object(manager, "is_running", return_value=True): - assert manager.get_url("vllm") == "http://127.0.0.1:8000" diff --git a/tests/test_resources/test_service_registry.py b/tests/test_resources/test_service_registry.py deleted file mode 100644 index dc73a9c..0000000 --- a/tests/test_resources/test_service_registry.py +++ /dev/null @@ -1,86 +0,0 @@ -import time -import dataclasses -import pytest -from circuitforge_core.resources.coordinator.service_registry import ( - ServiceRegistry, ServiceAllocation, ServiceInstance, -) - - -@pytest.fixture -def registry(): - return ServiceRegistry() - - -def test_allocate_creates_allocation(registry): - alloc = registry.allocate( - service="vllm", node_id="heimdall", gpu_id=0, - model="Ouro-1.4B", url="http://heimdall:8000", - caller="test", ttl_s=300.0, - ) - assert alloc.service == "vllm" - assert alloc.node_id == "heimdall" - assert alloc.allocation_id # non-empty UUID string - - -def test_active_allocations_count(registry): - registry.allocate("vllm", "heimdall", 0, "M", "http://h:8000", "a", 300.0) - registry.allocate("vllm", "heimdall", 0, "M", "http://h:8000", "b", 300.0) - assert registry.active_allocations("vllm", "heimdall", 0) == 2 - - -def test_release_decrements_count(registry): - alloc = registry.allocate("vllm", "heimdall", 0, "M", "http://h:8000", "a", 300.0) - registry.release(alloc.allocation_id) - assert registry.active_allocations("vllm", "heimdall", 0) == 0 - - -def test_release_nonexistent_returns_false(registry): - assert registry.release("nonexistent-id") is False - - -def test_upsert_instance_sets_running_state(registry): - registry.upsert_instance("vllm", "heimdall", 0, state="running", - model="Ouro-1.4B", url="http://heimdall:8000") - instances = registry.all_instances() - assert len(instances) == 1 - assert instances[0].state == "running" - - -def test_release_last_alloc_marks_instance_idle(registry): - registry.upsert_instance("vllm", "heimdall", 0, state="running", - model="Ouro-1.4B", url="http://heimdall:8000") - alloc = registry.allocate("vllm", "heimdall", 0, "Ouro-1.4B", "http://heimdall:8000", "a", 300.0) - registry.release(alloc.allocation_id) - instance = registry.all_instances()[0] - assert instance.state == "idle" - assert instance.idle_since is not None - - -def test_new_alloc_on_idle_instance_marks_it_running(registry): - registry.upsert_instance("vllm", "heimdall", 0, state="idle", - model="M", url="http://h:8000") - registry.allocate("vllm", "heimdall", 0, "M", "http://h:8000", "x", 300.0) - assert registry.all_instances()[0].state == "running" - - -def test_sweep_expired_allocations(registry): - # Register a running instance so idle-transition logic has something to act on. - registry.upsert_instance("vllm", "heimdall", 0, state="running", - model="M", url="http://h:8000") - # Create an allocation with a very short TTL (1 second). - alloc = registry.allocate("vllm", "heimdall", 0, "M", "http://h:8000", "caller", ttl_s=1) - assert registry.active_allocations("vllm", "heimdall", 0) == 1 - - # Wait for TTL to elapse. - time.sleep(1.1) - - expired = registry.sweep_expired_allocations() - - # The allocation should have been swept. - assert alloc.allocation_id in expired - assert registry.active_allocations("vllm", "heimdall", 0) == 0 - - # The instance should have transitioned to idle since no allocations remain. - instance = registry.all_instances()[0] - assert instance.state == "idle" - assert instance.idle_since is not None