feat(resources): add GPU profile schema and public 8GB/6GB/2GB profiles

This commit is contained in:
pyr0ball 2026-03-30 20:28:06 -07:00
parent b774afb6b0
commit c6a58b6a37
6 changed files with 208 additions and 0 deletions

View file

@ -0,0 +1,22 @@
schema_version: 1
name: single-gpu-2gb
vram_total_mb: 2048
eviction_timeout_s: 15.0
services:
ollama:
max_mb: 1536
priority: 1
cf-vision:
max_mb: 512
priority: 2
shared: true
max_concurrent: 1
cf-stt:
max_mb: 200
priority: 2
shared: true
max_concurrent: 1
backend: moonshine
model_size_hints:
llm_max_params: 3b
image_gen_max: none

View file

@ -0,0 +1,33 @@
schema_version: 1
name: single-gpu-6gb
vram_total_mb: 6144
eviction_timeout_s: 10.0
services:
vllm:
max_mb: 4096
priority: 1
ollama:
max_mb: 3584
priority: 1
cf-vision:
max_mb: 1536
priority: 2
shared: true
max_concurrent: 2
cf-stt:
max_mb: 600
priority: 2
shared: true
max_concurrent: 2
backend: faster-whisper
cf-tts:
max_mb: 768
priority: 2
shared: true
max_concurrent: 1
comfyui:
max_mb: 5120
priority: 4
model_size_hints:
llm_max_params: 7b
image_gen_max: sd15

View file

@ -0,0 +1,33 @@
schema_version: 1
name: single-gpu-8gb
vram_total_mb: 8192
eviction_timeout_s: 10.0
services:
vllm:
max_mb: 5120
priority: 1
ollama:
max_mb: 4096
priority: 1
cf-vision:
max_mb: 2048
priority: 2
shared: true
max_concurrent: 3
cf-stt:
max_mb: 1200
priority: 2
shared: true
max_concurrent: 2
backend: parakeet-tdt
cf-tts:
max_mb: 1024
priority: 2
shared: true
max_concurrent: 2
comfyui:
max_mb: 6144
priority: 4
model_size_hints:
llm_max_params: 8b
image_gen_max: sdxl-fp8

View file

@ -0,0 +1,64 @@
# circuitforge_core/resources/profiles/schema.py
from __future__ import annotations
from pathlib import Path
from typing import Any
import yaml
from pydantic import BaseModel, Field
SUPPORTED_SCHEMA_VERSION = 1
class ServiceProfile(BaseModel):
max_mb: int
priority: int
shared: bool = False
max_concurrent: int = 1
always_on: bool = False
backend: str | None = None
consumers: list[str] = Field(default_factory=list)
model_config = {"frozen": True}
class GpuNodeEntry(BaseModel):
id: int
vram_mb: int
role: str
card: str = "unknown"
always_on: bool = False
services: list[str] = Field(default_factory=list)
model_config = {"frozen": True}
class NodeProfile(BaseModel):
gpus: list[GpuNodeEntry]
agent_url: str | None = None
nas_mount: str | None = None
model_config = {"frozen": True}
class GpuProfile(BaseModel):
schema_version: int
name: str
vram_total_mb: int | None = None
eviction_timeout_s: float = 10.0
services: dict[str, ServiceProfile] = Field(default_factory=dict)
model_size_hints: dict[str, str] = Field(default_factory=dict)
nodes: dict[str, NodeProfile] = Field(default_factory=dict)
model_config = {"frozen": True}
def load_profile(path: Path) -> GpuProfile:
raw: dict[str, Any] = yaml.safe_load(path.read_text())
version = raw.get("schema_version")
if version != SUPPORTED_SCHEMA_VERSION:
raise ValueError(
f"Unsupported schema_version {version!r} in {path}. "
f"Expected {SUPPORTED_SCHEMA_VERSION}."
)
return GpuProfile.model_validate(raw)

View file

@ -0,0 +1,56 @@
# tests/test_resources/test_profile_registry.py
import pytest
from pathlib import Path
from circuitforge_core.resources.profiles.schema import (
GpuProfile, ServiceProfile, load_profile
)
FIXTURES = Path(__file__).parent / "fixtures"
def test_load_8gb_profile(tmp_path):
yaml_content = """
schema_version: 1
name: single-gpu-8gb
vram_total_mb: 8192
eviction_timeout_s: 10.0
services:
vllm:
max_mb: 5120
priority: 1
cf-vision:
max_mb: 2048
priority: 2
shared: true
max_concurrent: 3
"""
profile_file = tmp_path / "test.yaml"
profile_file.write_text(yaml_content)
profile = load_profile(profile_file)
assert profile.name == "single-gpu-8gb"
assert profile.schema_version == 1
assert profile.vram_total_mb == 8192
assert profile.eviction_timeout_s == 10.0
assert "vllm" in profile.services
assert profile.services["vllm"].max_mb == 5120
assert profile.services["vllm"].priority == 1
assert profile.services["cf-vision"].shared is True
assert profile.services["cf-vision"].max_concurrent == 3
def test_load_profile_rejects_wrong_schema_version(tmp_path):
yaml_content = "schema_version: 99\nname: future\n"
profile_file = tmp_path / "future.yaml"
profile_file.write_text(yaml_content)
with pytest.raises(ValueError, match="schema_version"):
load_profile(profile_file)
def test_service_profile_defaults():
svc = ServiceProfile(max_mb=1024, priority=2)
assert svc.shared is False
assert svc.max_concurrent == 1
assert svc.always_on is False
assert svc.backend is None
assert svc.consumers == []