diff --git a/circuitforge_core/resources/profiles/public/single-gpu-16gb.yaml b/circuitforge_core/resources/profiles/public/single-gpu-16gb.yaml index 7ad59f9..84daf6a 100644 --- a/circuitforge_core/resources/profiles/public/single-gpu-16gb.yaml +++ b/circuitforge_core/resources/profiles/public/single-gpu-16gb.yaml @@ -6,6 +6,7 @@ services: vllm: max_mb: 12288 priority: 1 + idle_stop_after_s: 600 ollama: max_mb: 12288 priority: 1 @@ -14,6 +15,11 @@ services: priority: 2 shared: true max_concurrent: 4 + cf-docuvision: + max_mb: 6144 + priority: 2 + shared: true + max_concurrent: 3 cf-stt: max_mb: 1200 priority: 2 diff --git a/circuitforge_core/resources/profiles/public/single-gpu-24gb.yaml b/circuitforge_core/resources/profiles/public/single-gpu-24gb.yaml index 4f98eb8..e0ca256 100644 --- a/circuitforge_core/resources/profiles/public/single-gpu-24gb.yaml +++ b/circuitforge_core/resources/profiles/public/single-gpu-24gb.yaml @@ -6,6 +6,7 @@ services: vllm: max_mb: 20480 priority: 1 + idle_stop_after_s: 600 ollama: max_mb: 18432 priority: 1 @@ -14,6 +15,11 @@ services: priority: 2 shared: true max_concurrent: 6 + cf-docuvision: + max_mb: 8192 + priority: 2 + shared: true + max_concurrent: 4 cf-stt: max_mb: 1200 priority: 2 diff --git a/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml b/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml index 92168ef..1888aa4 100644 --- a/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml +++ b/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml @@ -6,6 +6,17 @@ services: vllm: max_mb: 4096 priority: 1 + idle_stop_after_s: 600 + managed: + type: docker + image: "vllm/vllm-openai:v0.9.2" + port: 8000 + host_port: 8000 + command_template: "--model /models/{model} --trust-remote-code --max-model-len {max_model_len} --gpu-memory-utilization {gpu_mem_util} --enforce-eager --max-num-seqs 8" + volumes: + - "${VLLM_MODELS_DIR:-/Library/Assets/LLM/vllm/models}:/models" + runtime: nvidia + ipc: host ollama: max_mb: 3584 priority: 1 @@ -14,6 +25,11 @@ services: priority: 2 shared: true max_concurrent: 2 + cf-docuvision: + max_mb: 3072 + priority: 2 + shared: true + max_concurrent: 1 cf-stt: max_mb: 600 priority: 2 diff --git a/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml b/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml index 7053419..614416d 100644 --- a/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml +++ b/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml @@ -6,6 +6,17 @@ services: vllm: max_mb: 5120 priority: 1 + idle_stop_after_s: 600 + managed: + type: docker + image: "vllm/vllm-openai:v0.9.2" + port: 8000 + host_port: 8000 + command_template: "--model /models/{model} --trust-remote-code --max-model-len {max_model_len} --gpu-memory-utilization {gpu_mem_util} --enforce-eager --max-num-seqs 8" + volumes: + - "${VLLM_MODELS_DIR:-/Library/Assets/LLM/vllm/models}:/models" + runtime: nvidia + ipc: host ollama: max_mb: 4096 priority: 1 @@ -14,6 +25,11 @@ services: priority: 2 shared: true max_concurrent: 3 + cf-docuvision: + max_mb: 4096 + priority: 2 + shared: true + max_concurrent: 2 cf-stt: max_mb: 1200 priority: 2 @@ -28,6 +44,13 @@ services: comfyui: max_mb: 6144 priority: 4 + managed: + type: process + exec_path: "/opt/miniconda3/envs/comfyui/bin/python" + args_template: "/opt/ComfyUI/main.py --listen 0.0.0.0 --port {port} --cuda-device {gpu_id}" + cwd: "/opt/ComfyUI" + port: 8188 + host_port: 8188 model_size_hints: llm_max_params: 8b image_gen_max: sdxl-fp8 diff --git a/circuitforge_core/resources/profiles/schema.py b/circuitforge_core/resources/profiles/schema.py index ac59020..4439039 100644 --- a/circuitforge_core/resources/profiles/schema.py +++ b/circuitforge_core/resources/profiles/schema.py @@ -5,22 +5,71 @@ from pathlib import Path from typing import Any import yaml -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator SUPPORTED_SCHEMA_VERSION = 1 +class DockerSpec(BaseModel): + """Spec for a Docker-managed service.""" + + image: str + port: int + host_port: int + command_template: str = "" + volumes: list[str] = Field(default_factory=list) + env: dict[str, str] = Field(default_factory=dict) + runtime: str = "nvidia" + ipc: str = "host" + + model_config = {"frozen": True} + + +class ProcessSpec(BaseModel): + """Spec for a process-managed service (non-Docker, e.g. conda env).""" + + exec_path: str + args_template: str = "" + cwd: str = "" + env: dict[str, str] = Field(default_factory=dict) + port: int = 0 + host_port: int = 0 + + model_config = {"frozen": True} + + class ServiceProfile(BaseModel): max_mb: int priority: int shared: bool = False max_concurrent: int = 1 always_on: bool = False + idle_stop_after_s: int = 0 backend: str | None = None consumers: list[str] = Field(default_factory=list) + managed: DockerSpec | ProcessSpec | None = None model_config = {"frozen": True} + @model_validator(mode="before") + @classmethod + def _parse_managed(cls, values: Any) -> Any: + if not isinstance(values, dict): + return values + raw = values.get("managed") + if raw is None: + return values + if not isinstance(raw, dict): + return values + spec_type = raw.pop("type", None) + if spec_type == "docker": + values["managed"] = DockerSpec(**raw) + elif spec_type == "process": + values["managed"] = ProcessSpec(**raw) + else: + raise ValueError(f"Unknown managed service type: {spec_type!r}") + return values + class GpuNodeEntry(BaseModel): id: int diff --git a/tests/test_resources/test_coordinator_app.py b/tests/test_resources/test_coordinator_app.py index 48c40f6..60d67c1 100644 --- a/tests/test_resources/test_coordinator_app.py +++ b/tests/test_resources/test_coordinator_app.py @@ -1,11 +1,13 @@ import pytest from unittest.mock import MagicMock +from pathlib import Path from fastapi.testclient import TestClient from circuitforge_core.resources.coordinator.app import create_coordinator_app from circuitforge_core.resources.coordinator.agent_supervisor import AgentSupervisor from circuitforge_core.resources.coordinator.lease_manager import LeaseManager from circuitforge_core.resources.coordinator.profile_registry import ProfileRegistry from circuitforge_core.resources.models import GpuInfo, NodeInfo +from circuitforge_core.resources.profiles.schema import load_profile @pytest.fixture @@ -132,3 +134,13 @@ def test_resident_keys_returns_set_of_node_service(): lm.set_residents_for_node("heimdall", [("vllm", "Ouro-1.4B"), ("ollama", None)]) keys = lm.resident_keys() assert keys == {"heimdall:vllm", "heimdall:ollama"} + + +def test_single_gpu_8gb_profile_has_idle_stop_after_s(): + profile = load_profile( + Path("circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml") + ) + vllm_svc = profile.services.get("vllm") + assert vllm_svc is not None + assert hasattr(vllm_svc, "idle_stop_after_s") + assert vllm_svc.idle_stop_after_s == 600