feat(resources): add GPU profile schema and public 8GB/6GB/2GB profiles

2026-03-30 20:28:06 -07:00 · 2026-03-30 20:28:06 -07:00 · c6a58b6a37
commit c6a58b6a37
parent b774afb6b0
6 changed files with 208 additions and 0 deletions
--- a/circuitforge_core/resources/profiles/init.py
+++ b/circuitforge_core/resources/profiles/init.py
--- a/circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-2gb.yaml
@ -0,0 +1,22 @@
 schema_version: 1
 name: single-gpu-2gb
 vram_total_mb: 2048
 eviction_timeout_s: 15.0
 services:
  ollama:
    max_mb: 1536
    priority: 1
  cf-vision:
    max_mb: 512
    priority: 2
    shared: true
    max_concurrent: 1
  cf-stt:
    max_mb: 200
    priority: 2
    shared: true
    max_concurrent: 1
    backend: moonshine
 model_size_hints:
  llm_max_params: 3b
  image_gen_max: none
--- a/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-6gb.yaml
@ -0,0 +1,33 @@
 schema_version: 1
 name: single-gpu-6gb
 vram_total_mb: 6144
 eviction_timeout_s: 10.0
 services:
  vllm:
    max_mb: 4096
    priority: 1
  ollama:
    max_mb: 3584
    priority: 1
  cf-vision:
    max_mb: 1536
    priority: 2
    shared: true
    max_concurrent: 2
  cf-stt:
    max_mb: 600
    priority: 2
    shared: true
    max_concurrent: 2
    backend: faster-whisper
  cf-tts:
    max_mb: 768
    priority: 2
    shared: true
    max_concurrent: 1
  comfyui:
    max_mb: 5120
    priority: 4
 model_size_hints:
  llm_max_params: 7b
  image_gen_max: sd15
--- a/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml
+++ b/circuitforge_core/resources/profiles/public/single-gpu-8gb.yaml
@ -0,0 +1,33 @@
 schema_version: 1
 name: single-gpu-8gb
 vram_total_mb: 8192
 eviction_timeout_s: 10.0
 services:
  vllm:
    max_mb: 5120
    priority: 1
  ollama:
    max_mb: 4096
    priority: 1
  cf-vision:
    max_mb: 2048
    priority: 2
    shared: true
    max_concurrent: 3
  cf-stt:
    max_mb: 1200
    priority: 2
    shared: true
    max_concurrent: 2
    backend: parakeet-tdt
  cf-tts:
    max_mb: 1024
    priority: 2
    shared: true
    max_concurrent: 2
  comfyui:
    max_mb: 6144
    priority: 4
 model_size_hints:
  llm_max_params: 8b
  image_gen_max: sdxl-fp8
--- a/circuitforge_core/resources/profiles/schema.py
+++ b/circuitforge_core/resources/profiles/schema.py
@ -0,0 +1,64 @@
 # circuitforge_core/resources/profiles/schema.py
 from __future__ import annotations
 from pathlib import Path
 from typing import Any
 import yaml
 from pydantic import BaseModel, Field
 SUPPORTED_SCHEMA_VERSION = 1
 class ServiceProfile(BaseModel):
    max_mb: int
    priority: int
    shared: bool = False
    max_concurrent: int = 1
    always_on: bool = False
    backend: str | None = None
    consumers: list[str] = Field(default_factory=list)
    model_config = {"frozen": True}
 class GpuNodeEntry(BaseModel):
    id: int
    vram_mb: int
    role: str
    card: str = "unknown"
    always_on: bool = False
    services: list[str] = Field(default_factory=list)
    model_config = {"frozen": True}
 class NodeProfile(BaseModel):
    gpus: list[GpuNodeEntry]
    agent_url: str | None = None
    nas_mount: str | None = None
    model_config = {"frozen": True}
 class GpuProfile(BaseModel):
    schema_version: int
    name: str
    vram_total_mb: int | None = None
    eviction_timeout_s: float = 10.0
    services: dict[str, ServiceProfile] = Field(default_factory=dict)
    model_size_hints: dict[str, str] = Field(default_factory=dict)
    nodes: dict[str, NodeProfile] = Field(default_factory=dict)
    model_config = {"frozen": True}
 def load_profile(path: Path) -> GpuProfile:
    raw: dict[str, Any] = yaml.safe_load(path.read_text())
    version = raw.get("schema_version")
    if version != SUPPORTED_SCHEMA_VERSION:
        raise ValueError(
            f"Unsupported schema_version {version!r} in {path}. "
            f"Expected {SUPPORTED_SCHEMA_VERSION}."
        )
    return GpuProfile.model_validate(raw)
--- a/tests/test_resources/test_profile_registry.py
+++ b/tests/test_resources/test_profile_registry.py
@ -0,0 +1,56 @@
 # tests/test_resources/test_profile_registry.py
 import pytest
 from pathlib import Path
 from circuitforge_core.resources.profiles.schema import (
    GpuProfile, ServiceProfile, load_profile
 )
 FIXTURES = Path(__file__).parent / "fixtures"
 def test_load_8gb_profile(tmp_path):
    yaml_content = """
 schema_version: 1
 name: single-gpu-8gb
 vram_total_mb: 8192
 eviction_timeout_s: 10.0
 services:
  vllm:
    max_mb: 5120
    priority: 1
  cf-vision:
    max_mb: 2048
    priority: 2
    shared: true
    max_concurrent: 3
 """
    profile_file = tmp_path / "test.yaml"
    profile_file.write_text(yaml_content)
    profile = load_profile(profile_file)
    assert profile.name == "single-gpu-8gb"
    assert profile.schema_version == 1
    assert profile.vram_total_mb == 8192
    assert profile.eviction_timeout_s == 10.0
    assert "vllm" in profile.services
    assert profile.services["vllm"].max_mb == 5120
    assert profile.services["vllm"].priority == 1
    assert profile.services["cf-vision"].shared is True
    assert profile.services["cf-vision"].max_concurrent == 3
 def test_load_profile_rejects_wrong_schema_version(tmp_path):
    yaml_content = "schema_version: 99\nname: future\n"
    profile_file = tmp_path / "future.yaml"
    profile_file.write_text(yaml_content)
    with pytest.raises(ValueError, match="schema_version"):
        load_profile(profile_file)
 def test_service_profile_defaults():
    svc = ServiceProfile(max_mb=1024, priority=2)
    assert svc.shared is False
    assert svc.max_concurrent == 1
    assert svc.always_on is False
    assert svc.backend is None
    assert svc.consumers == []