feat(resources): add GPU profile schema and public 8GB/6GB/2GB profiles
This commit is contained in:
parent
b774afb6b0
commit
c6a58b6a37
6 changed files with 208 additions and 0 deletions
0
circuitforge_core/resources/profiles/__init__.py
Normal file
0
circuitforge_core/resources/profiles/__init__.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
schema_version: 1
|
||||
name: single-gpu-2gb
|
||||
vram_total_mb: 2048
|
||||
eviction_timeout_s: 15.0
|
||||
services:
|
||||
ollama:
|
||||
max_mb: 1536
|
||||
priority: 1
|
||||
cf-vision:
|
||||
max_mb: 512
|
||||
priority: 2
|
||||
shared: true
|
||||
max_concurrent: 1
|
||||
cf-stt:
|
||||
max_mb: 200
|
||||
priority: 2
|
||||
shared: true
|
||||
max_concurrent: 1
|
||||
backend: moonshine
|
||||
model_size_hints:
|
||||
llm_max_params: 3b
|
||||
image_gen_max: none
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
schema_version: 1
|
||||
name: single-gpu-6gb
|
||||
vram_total_mb: 6144
|
||||
eviction_timeout_s: 10.0
|
||||
services:
|
||||
vllm:
|
||||
max_mb: 4096
|
||||
priority: 1
|
||||
ollama:
|
||||
max_mb: 3584
|
||||
priority: 1
|
||||
cf-vision:
|
||||
max_mb: 1536
|
||||
priority: 2
|
||||
shared: true
|
||||
max_concurrent: 2
|
||||
cf-stt:
|
||||
max_mb: 600
|
||||
priority: 2
|
||||
shared: true
|
||||
max_concurrent: 2
|
||||
backend: faster-whisper
|
||||
cf-tts:
|
||||
max_mb: 768
|
||||
priority: 2
|
||||
shared: true
|
||||
max_concurrent: 1
|
||||
comfyui:
|
||||
max_mb: 5120
|
||||
priority: 4
|
||||
model_size_hints:
|
||||
llm_max_params: 7b
|
||||
image_gen_max: sd15
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
schema_version: 1
|
||||
name: single-gpu-8gb
|
||||
vram_total_mb: 8192
|
||||
eviction_timeout_s: 10.0
|
||||
services:
|
||||
vllm:
|
||||
max_mb: 5120
|
||||
priority: 1
|
||||
ollama:
|
||||
max_mb: 4096
|
||||
priority: 1
|
||||
cf-vision:
|
||||
max_mb: 2048
|
||||
priority: 2
|
||||
shared: true
|
||||
max_concurrent: 3
|
||||
cf-stt:
|
||||
max_mb: 1200
|
||||
priority: 2
|
||||
shared: true
|
||||
max_concurrent: 2
|
||||
backend: parakeet-tdt
|
||||
cf-tts:
|
||||
max_mb: 1024
|
||||
priority: 2
|
||||
shared: true
|
||||
max_concurrent: 2
|
||||
comfyui:
|
||||
max_mb: 6144
|
||||
priority: 4
|
||||
model_size_hints:
|
||||
llm_max_params: 8b
|
||||
image_gen_max: sdxl-fp8
|
||||
64
circuitforge_core/resources/profiles/schema.py
Normal file
64
circuitforge_core/resources/profiles/schema.py
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
# circuitforge_core/resources/profiles/schema.py
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
SUPPORTED_SCHEMA_VERSION = 1
|
||||
|
||||
|
||||
class ServiceProfile(BaseModel):
|
||||
max_mb: int
|
||||
priority: int
|
||||
shared: bool = False
|
||||
max_concurrent: int = 1
|
||||
always_on: bool = False
|
||||
backend: str | None = None
|
||||
consumers: list[str] = Field(default_factory=list)
|
||||
|
||||
model_config = {"frozen": True}
|
||||
|
||||
|
||||
class GpuNodeEntry(BaseModel):
|
||||
id: int
|
||||
vram_mb: int
|
||||
role: str
|
||||
card: str = "unknown"
|
||||
always_on: bool = False
|
||||
services: list[str] = Field(default_factory=list)
|
||||
|
||||
model_config = {"frozen": True}
|
||||
|
||||
|
||||
class NodeProfile(BaseModel):
|
||||
gpus: list[GpuNodeEntry]
|
||||
agent_url: str | None = None
|
||||
nas_mount: str | None = None
|
||||
|
||||
model_config = {"frozen": True}
|
||||
|
||||
|
||||
class GpuProfile(BaseModel):
|
||||
schema_version: int
|
||||
name: str
|
||||
vram_total_mb: int | None = None
|
||||
eviction_timeout_s: float = 10.0
|
||||
services: dict[str, ServiceProfile] = Field(default_factory=dict)
|
||||
model_size_hints: dict[str, str] = Field(default_factory=dict)
|
||||
nodes: dict[str, NodeProfile] = Field(default_factory=dict)
|
||||
|
||||
model_config = {"frozen": True}
|
||||
|
||||
|
||||
def load_profile(path: Path) -> GpuProfile:
|
||||
raw: dict[str, Any] = yaml.safe_load(path.read_text())
|
||||
version = raw.get("schema_version")
|
||||
if version != SUPPORTED_SCHEMA_VERSION:
|
||||
raise ValueError(
|
||||
f"Unsupported schema_version {version!r} in {path}. "
|
||||
f"Expected {SUPPORTED_SCHEMA_VERSION}."
|
||||
)
|
||||
return GpuProfile.model_validate(raw)
|
||||
56
tests/test_resources/test_profile_registry.py
Normal file
56
tests/test_resources/test_profile_registry.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
# tests/test_resources/test_profile_registry.py
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from circuitforge_core.resources.profiles.schema import (
|
||||
GpuProfile, ServiceProfile, load_profile
|
||||
)
|
||||
|
||||
FIXTURES = Path(__file__).parent / "fixtures"
|
||||
|
||||
|
||||
def test_load_8gb_profile(tmp_path):
|
||||
yaml_content = """
|
||||
schema_version: 1
|
||||
name: single-gpu-8gb
|
||||
vram_total_mb: 8192
|
||||
eviction_timeout_s: 10.0
|
||||
services:
|
||||
vllm:
|
||||
max_mb: 5120
|
||||
priority: 1
|
||||
cf-vision:
|
||||
max_mb: 2048
|
||||
priority: 2
|
||||
shared: true
|
||||
max_concurrent: 3
|
||||
"""
|
||||
profile_file = tmp_path / "test.yaml"
|
||||
profile_file.write_text(yaml_content)
|
||||
profile = load_profile(profile_file)
|
||||
|
||||
assert profile.name == "single-gpu-8gb"
|
||||
assert profile.schema_version == 1
|
||||
assert profile.vram_total_mb == 8192
|
||||
assert profile.eviction_timeout_s == 10.0
|
||||
assert "vllm" in profile.services
|
||||
assert profile.services["vllm"].max_mb == 5120
|
||||
assert profile.services["vllm"].priority == 1
|
||||
assert profile.services["cf-vision"].shared is True
|
||||
assert profile.services["cf-vision"].max_concurrent == 3
|
||||
|
||||
|
||||
def test_load_profile_rejects_wrong_schema_version(tmp_path):
|
||||
yaml_content = "schema_version: 99\nname: future\n"
|
||||
profile_file = tmp_path / "future.yaml"
|
||||
profile_file.write_text(yaml_content)
|
||||
with pytest.raises(ValueError, match="schema_version"):
|
||||
load_profile(profile_file)
|
||||
|
||||
|
||||
def test_service_profile_defaults():
|
||||
svc = ServiceProfile(max_mb=1024, priority=2)
|
||||
assert svc.shared is False
|
||||
assert svc.max_concurrent == 1
|
||||
assert svc.always_on is False
|
||||
assert svc.backend is None
|
||||
assert svc.consumers == []
|
||||
Loading…
Reference in a new issue