From c6a58b6a379adaec036abcaf0a5d30e08a7664a0 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Mon, 30 Mar 2026 20:28:06 -0700 Subject: [PATCH] feat(resources): add GPU profile schema and public 8GB/6GB/2GB profiles --- .../resources/profiles/__init__.py | 0 .../profiles/public/single-gpu-2gb.yaml | 22 +++++++ .../profiles/public/single-gpu-6gb.yaml | 33 ++++++++++ .../profiles/public/single-gpu-8gb.yaml | 33 ++++++++++ .../resources/profiles/schema.py | 64 +++++++++++++++++++ tests/test_resources/test_profile_registry.py | 56 ++++++++++++++++ 6 files changed, 208 insertions(+) create mode 100644 circuitforge_core/resources/profiles/__init__.py create mode 100644 circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml create mode 100644 circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml create mode 100644 circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml create mode 100644 circuitforge_core/resources/profiles/schema.py create mode 100644 tests/test_resources/test_profile_registry.py diff --git a/circuitforge_core/resources/profiles/__init__.py b/circuitforge_core/resources/profiles/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml b/circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml new file mode 100644 index 0000000..d852eea --- /dev/null +++ b/circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml @@ -0,0 +1,22 @@ +schema_version: 1 +name: single-gpu-2gb +vram_total_mb: 2048 +eviction_timeout_s: 15.0 +services: + ollama: + max_mb: 1536 + priority: 1 + cf-vision: + max_mb: 512 + priority: 2 + shared: true + max_concurrent: 1 + cf-stt: + max_mb: 200 + priority: 2 + shared: true + max_concurrent: 1 + backend: moonshine +model_size_hints: + llm_max_params: 3b + image_gen_max: none diff --git a/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml b/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml new file mode 100644 index 0000000..92168ef --- /dev/null +++ b/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml @@ -0,0 +1,33 @@ +schema_version: 1 +name: single-gpu-6gb +vram_total_mb: 6144 +eviction_timeout_s: 10.0 +services: + vllm: + max_mb: 4096 + priority: 1 + ollama: + max_mb: 3584 + priority: 1 + cf-vision: + max_mb: 1536 + priority: 2 + shared: true + max_concurrent: 2 + cf-stt: + max_mb: 600 + priority: 2 + shared: true + max_concurrent: 2 + backend: faster-whisper + cf-tts: + max_mb: 768 + priority: 2 + shared: true + max_concurrent: 1 + comfyui: + max_mb: 5120 + priority: 4 +model_size_hints: + llm_max_params: 7b + image_gen_max: sd15 diff --git a/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml b/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml new file mode 100644 index 0000000..7053419 --- /dev/null +++ b/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml @@ -0,0 +1,33 @@ +schema_version: 1 +name: single-gpu-8gb +vram_total_mb: 8192 +eviction_timeout_s: 10.0 +services: + vllm: + max_mb: 5120 + priority: 1 + ollama: + max_mb: 4096 + priority: 1 + cf-vision: + max_mb: 2048 + priority: 2 + shared: true + max_concurrent: 3 + cf-stt: + max_mb: 1200 + priority: 2 + shared: true + max_concurrent: 2 + backend: parakeet-tdt + cf-tts: + max_mb: 1024 + priority: 2 + shared: true + max_concurrent: 2 + comfyui: + max_mb: 6144 + priority: 4 +model_size_hints: + llm_max_params: 8b + image_gen_max: sdxl-fp8 diff --git a/circuitforge_core/resources/profiles/schema.py b/circuitforge_core/resources/profiles/schema.py new file mode 100644 index 0000000..f3cc808 --- /dev/null +++ b/circuitforge_core/resources/profiles/schema.py @@ -0,0 +1,64 @@ +# circuitforge_core/resources/profiles/schema.py +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import yaml +from pydantic import BaseModel, Field + +SUPPORTED_SCHEMA_VERSION = 1 + + +class ServiceProfile(BaseModel): + max_mb: int + priority: int + shared: bool = False + max_concurrent: int = 1 + always_on: bool = False + backend: str | None = None + consumers: list[str] = Field(default_factory=list) + + model_config = {"frozen": True} + + +class GpuNodeEntry(BaseModel): + id: int + vram_mb: int + role: str + card: str = "unknown" + always_on: bool = False + services: list[str] = Field(default_factory=list) + + model_config = {"frozen": True} + + +class NodeProfile(BaseModel): + gpus: list[GpuNodeEntry] + agent_url: str | None = None + nas_mount: str | None = None + + model_config = {"frozen": True} + + +class GpuProfile(BaseModel): + schema_version: int + name: str + vram_total_mb: int | None = None + eviction_timeout_s: float = 10.0 + services: dict[str, ServiceProfile] = Field(default_factory=dict) + model_size_hints: dict[str, str] = Field(default_factory=dict) + nodes: dict[str, NodeProfile] = Field(default_factory=dict) + + model_config = {"frozen": True} + + +def load_profile(path: Path) -> GpuProfile: + raw: dict[str, Any] = yaml.safe_load(path.read_text()) + version = raw.get("schema_version") + if version != SUPPORTED_SCHEMA_VERSION: + raise ValueError( + f"Unsupported schema_version {version!r} in {path}. " + f"Expected {SUPPORTED_SCHEMA_VERSION}." + ) + return GpuProfile.model_validate(raw) diff --git a/tests/test_resources/test_profile_registry.py b/tests/test_resources/test_profile_registry.py new file mode 100644 index 0000000..4e808ff --- /dev/null +++ b/tests/test_resources/test_profile_registry.py @@ -0,0 +1,56 @@ +# tests/test_resources/test_profile_registry.py +import pytest +from pathlib import Path +from circuitforge_core.resources.profiles.schema import ( + GpuProfile, ServiceProfile, load_profile +) + +FIXTURES = Path(__file__).parent / "fixtures" + + +def test_load_8gb_profile(tmp_path): + yaml_content = """ +schema_version: 1 +name: single-gpu-8gb +vram_total_mb: 8192 +eviction_timeout_s: 10.0 +services: + vllm: + max_mb: 5120 + priority: 1 + cf-vision: + max_mb: 2048 + priority: 2 + shared: true + max_concurrent: 3 +""" + profile_file = tmp_path / "test.yaml" + profile_file.write_text(yaml_content) + profile = load_profile(profile_file) + + assert profile.name == "single-gpu-8gb" + assert profile.schema_version == 1 + assert profile.vram_total_mb == 8192 + assert profile.eviction_timeout_s == 10.0 + assert "vllm" in profile.services + assert profile.services["vllm"].max_mb == 5120 + assert profile.services["vllm"].priority == 1 + assert profile.services["cf-vision"].shared is True + assert profile.services["cf-vision"].max_concurrent == 3 + + +def test_load_profile_rejects_wrong_schema_version(tmp_path): + yaml_content = "schema_version: 99\nname: future\n" + profile_file = tmp_path / "future.yaml" + profile_file.write_text(yaml_content) + with pytest.raises(ValueError, match="schema_version"): + load_profile(profile_file) + + +def test_service_profile_defaults(): + svc = ServiceProfile(max_mb=1024, priority=2) + assert svc.shared is False + assert svc.max_concurrent == 1 + assert svc.always_on is False + assert svc.backend is None + assert svc.consumers == []