feat(resources): add GPU profile schema and public 8GB/6GB/2GB profiles

2026-03-30 20:28:06 -07:00 · 2026-03-30 20:28:06 -07:00 · c6a58b6a37
commit c6a58b6a37
parent b774afb6b0
6 changed files with 208 additions and 0 deletions
--- a/circuitforge_core/resources/profiles/init.py
+++ b/circuitforge_core/resources/profiles/init.py
--- a/circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml
@ -0,0 +1,22 @@
+schema_version: 1
+name: single-gpu-2gb
+vram_total_mb: 2048
+eviction_timeout_s: 15.0
+services:
+  ollama:
+    max_mb: 1536
+    priority: 1
+  cf-vision:
+    max_mb: 512
+    priority: 2
+    shared: true
+    max_concurrent: 1
+  cf-stt:
+    max_mb: 200
+    priority: 2
+    shared: true
+    max_concurrent: 1
+    backend: moonshine
+model_size_hints:
+  llm_max_params: 3b
+  image_gen_max: none
--- a/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml
@ -0,0 +1,33 @@
+schema_version: 1
+name: single-gpu-6gb
+vram_total_mb: 6144
+eviction_timeout_s: 10.0
+services:
+  vllm:
+    max_mb: 4096
+    priority: 1
+  ollama:
+    max_mb: 3584
+    priority: 1
+  cf-vision:
+    max_mb: 1536
+    priority: 2
+    shared: true
+    max_concurrent: 2
+  cf-stt:
+    max_mb: 600
+    priority: 2
+    shared: true
+    max_concurrent: 2
+    backend: faster-whisper
+  cf-tts:
+    max_mb: 768
+    priority: 2
+    shared: true
+    max_concurrent: 1
+  comfyui:
+    max_mb: 5120
+    priority: 4
+model_size_hints:
+  llm_max_params: 7b
+  image_gen_max: sd15
--- a/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml
@ -0,0 +1,33 @@
+schema_version: 1
+name: single-gpu-8gb
+vram_total_mb: 8192
+eviction_timeout_s: 10.0
+services:
+  vllm:
+    max_mb: 5120
+    priority: 1
+  ollama:
+    max_mb: 4096
+    priority: 1
+  cf-vision:
+    max_mb: 2048
+    priority: 2
+    shared: true
+    max_concurrent: 3
+  cf-stt:
+    max_mb: 1200
+    priority: 2
+    shared: true
+    max_concurrent: 2
+    backend: parakeet-tdt
+  cf-tts:
+    max_mb: 1024
+    priority: 2
+    shared: true
+    max_concurrent: 2
+  comfyui:
+    max_mb: 6144
+    priority: 4
+model_size_hints:
+  llm_max_params: 8b
+  image_gen_max: sdxl-fp8
--- a/circuitforge_core/resources/profiles/schema.py
+++ b/circuitforge_core/resources/profiles/schema.py
@ -0,0 +1,64 @@
+# circuitforge_core/resources/profiles/schema.py
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import yaml
+from pydantic import BaseModel, Field
+
+SUPPORTED_SCHEMA_VERSION = 1
+
+
+class ServiceProfile(BaseModel):
+    max_mb: int
+    priority: int
+    shared: bool = False
+    max_concurrent: int = 1
+    always_on: bool = False
+    backend: str | None = None
+    consumers: list[str] = Field(default_factory=list)
+
+    model_config = {"frozen": True}
+
+
+class GpuNodeEntry(BaseModel):
+    id: int
+    vram_mb: int
+    role: str
+    card: str = "unknown"
+    always_on: bool = False
+    services: list[str] = Field(default_factory=list)
+
+    model_config = {"frozen": True}
+
+
+class NodeProfile(BaseModel):
+    gpus: list[GpuNodeEntry]
+    agent_url: str | None = None
+    nas_mount: str | None = None
+
+    model_config = {"frozen": True}
+
+
+class GpuProfile(BaseModel):
+    schema_version: int
+    name: str
+    vram_total_mb: int | None = None
+    eviction_timeout_s: float = 10.0
+    services: dict[str, ServiceProfile] = Field(default_factory=dict)
+    model_size_hints: dict[str, str] = Field(default_factory=dict)
+    nodes: dict[str, NodeProfile] = Field(default_factory=dict)
+
+    model_config = {"frozen": True}
+
+
+def load_profile(path: Path) -> GpuProfile:
+    raw: dict[str, Any] = yaml.safe_load(path.read_text())
+    version = raw.get("schema_version")
+    if version != SUPPORTED_SCHEMA_VERSION:
+        raise ValueError(
+            f"Unsupported schema_version {version!r} in {path}. "
+            f"Expected {SUPPORTED_SCHEMA_VERSION}."
+        )
+    return GpuProfile.model_validate(raw)
--- a/tests/test_resources/test_profile_registry.py
+++ b/tests/test_resources/test_profile_registry.py
@ -0,0 +1,56 @@
+# tests/test_resources/test_profile_registry.py
+import pytest
+from pathlib import Path
+from circuitforge_core.resources.profiles.schema import (
+    GpuProfile, ServiceProfile, load_profile
+)
+
+FIXTURES = Path(__file__).parent / "fixtures"
+
+
+def test_load_8gb_profile(tmp_path):
+    yaml_content = """
+schema_version: 1
+name: single-gpu-8gb
+vram_total_mb: 8192
+eviction_timeout_s: 10.0
+services:
+  vllm:
+    max_mb: 5120
+    priority: 1
+  cf-vision:
+    max_mb: 2048
+    priority: 2
+    shared: true
+    max_concurrent: 3
+"""
+    profile_file = tmp_path / "test.yaml"
+    profile_file.write_text(yaml_content)
+    profile = load_profile(profile_file)
+
+    assert profile.name == "single-gpu-8gb"
+    assert profile.schema_version == 1
+    assert profile.vram_total_mb == 8192
+    assert profile.eviction_timeout_s == 10.0
+    assert "vllm" in profile.services
+    assert profile.services["vllm"].max_mb == 5120
+    assert profile.services["vllm"].priority == 1
+    assert profile.services["cf-vision"].shared is True
+    assert profile.services["cf-vision"].max_concurrent == 3
+
+
+def test_load_profile_rejects_wrong_schema_version(tmp_path):
+    yaml_content = "schema_version: 99\nname: future\n"
+    profile_file = tmp_path / "future.yaml"
+    profile_file.write_text(yaml_content)
+    with pytest.raises(ValueError, match="schema_version"):
+        load_profile(profile_file)
+
+
+def test_service_profile_defaults():
+    svc = ServiceProfile(max_mb=1024, priority=2)
+    assert svc.shared is False
+    assert svc.max_concurrent == 1
+    assert svc.always_on is False
+    assert svc.backend is None
+    assert svc.consumers == []