feat(resources): add GPU profile schema and public 8GB/6GB/2GB profiles
This commit is contained in:
parent
b774afb6b0
commit
c6a58b6a37
6 changed files with 208 additions and 0 deletions
0
circuitforge_core/resources/profiles/__init__.py
Normal file
0
circuitforge_core/resources/profiles/__init__.py
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
schema_version: 1
|
||||||
|
name: single-gpu-2gb
|
||||||
|
vram_total_mb: 2048
|
||||||
|
eviction_timeout_s: 15.0
|
||||||
|
services:
|
||||||
|
ollama:
|
||||||
|
max_mb: 1536
|
||||||
|
priority: 1
|
||||||
|
cf-vision:
|
||||||
|
max_mb: 512
|
||||||
|
priority: 2
|
||||||
|
shared: true
|
||||||
|
max_concurrent: 1
|
||||||
|
cf-stt:
|
||||||
|
max_mb: 200
|
||||||
|
priority: 2
|
||||||
|
shared: true
|
||||||
|
max_concurrent: 1
|
||||||
|
backend: moonshine
|
||||||
|
model_size_hints:
|
||||||
|
llm_max_params: 3b
|
||||||
|
image_gen_max: none
|
||||||
|
|
@ -0,0 +1,33 @@
|
||||||
|
schema_version: 1
|
||||||
|
name: single-gpu-6gb
|
||||||
|
vram_total_mb: 6144
|
||||||
|
eviction_timeout_s: 10.0
|
||||||
|
services:
|
||||||
|
vllm:
|
||||||
|
max_mb: 4096
|
||||||
|
priority: 1
|
||||||
|
ollama:
|
||||||
|
max_mb: 3584
|
||||||
|
priority: 1
|
||||||
|
cf-vision:
|
||||||
|
max_mb: 1536
|
||||||
|
priority: 2
|
||||||
|
shared: true
|
||||||
|
max_concurrent: 2
|
||||||
|
cf-stt:
|
||||||
|
max_mb: 600
|
||||||
|
priority: 2
|
||||||
|
shared: true
|
||||||
|
max_concurrent: 2
|
||||||
|
backend: faster-whisper
|
||||||
|
cf-tts:
|
||||||
|
max_mb: 768
|
||||||
|
priority: 2
|
||||||
|
shared: true
|
||||||
|
max_concurrent: 1
|
||||||
|
comfyui:
|
||||||
|
max_mb: 5120
|
||||||
|
priority: 4
|
||||||
|
model_size_hints:
|
||||||
|
llm_max_params: 7b
|
||||||
|
image_gen_max: sd15
|
||||||
|
|
@ -0,0 +1,33 @@
|
||||||
|
schema_version: 1
|
||||||
|
name: single-gpu-8gb
|
||||||
|
vram_total_mb: 8192
|
||||||
|
eviction_timeout_s: 10.0
|
||||||
|
services:
|
||||||
|
vllm:
|
||||||
|
max_mb: 5120
|
||||||
|
priority: 1
|
||||||
|
ollama:
|
||||||
|
max_mb: 4096
|
||||||
|
priority: 1
|
||||||
|
cf-vision:
|
||||||
|
max_mb: 2048
|
||||||
|
priority: 2
|
||||||
|
shared: true
|
||||||
|
max_concurrent: 3
|
||||||
|
cf-stt:
|
||||||
|
max_mb: 1200
|
||||||
|
priority: 2
|
||||||
|
shared: true
|
||||||
|
max_concurrent: 2
|
||||||
|
backend: parakeet-tdt
|
||||||
|
cf-tts:
|
||||||
|
max_mb: 1024
|
||||||
|
priority: 2
|
||||||
|
shared: true
|
||||||
|
max_concurrent: 2
|
||||||
|
comfyui:
|
||||||
|
max_mb: 6144
|
||||||
|
priority: 4
|
||||||
|
model_size_hints:
|
||||||
|
llm_max_params: 8b
|
||||||
|
image_gen_max: sdxl-fp8
|
||||||
64
circuitforge_core/resources/profiles/schema.py
Normal file
64
circuitforge_core/resources/profiles/schema.py
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
# circuitforge_core/resources/profiles/schema.py
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
SUPPORTED_SCHEMA_VERSION = 1
|
||||||
|
|
||||||
|
|
||||||
|
class ServiceProfile(BaseModel):
|
||||||
|
max_mb: int
|
||||||
|
priority: int
|
||||||
|
shared: bool = False
|
||||||
|
max_concurrent: int = 1
|
||||||
|
always_on: bool = False
|
||||||
|
backend: str | None = None
|
||||||
|
consumers: list[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
model_config = {"frozen": True}
|
||||||
|
|
||||||
|
|
||||||
|
class GpuNodeEntry(BaseModel):
|
||||||
|
id: int
|
||||||
|
vram_mb: int
|
||||||
|
role: str
|
||||||
|
card: str = "unknown"
|
||||||
|
always_on: bool = False
|
||||||
|
services: list[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
model_config = {"frozen": True}
|
||||||
|
|
||||||
|
|
||||||
|
class NodeProfile(BaseModel):
|
||||||
|
gpus: list[GpuNodeEntry]
|
||||||
|
agent_url: str | None = None
|
||||||
|
nas_mount: str | None = None
|
||||||
|
|
||||||
|
model_config = {"frozen": True}
|
||||||
|
|
||||||
|
|
||||||
|
class GpuProfile(BaseModel):
|
||||||
|
schema_version: int
|
||||||
|
name: str
|
||||||
|
vram_total_mb: int | None = None
|
||||||
|
eviction_timeout_s: float = 10.0
|
||||||
|
services: dict[str, ServiceProfile] = Field(default_factory=dict)
|
||||||
|
model_size_hints: dict[str, str] = Field(default_factory=dict)
|
||||||
|
nodes: dict[str, NodeProfile] = Field(default_factory=dict)
|
||||||
|
|
||||||
|
model_config = {"frozen": True}
|
||||||
|
|
||||||
|
|
||||||
|
def load_profile(path: Path) -> GpuProfile:
|
||||||
|
raw: dict[str, Any] = yaml.safe_load(path.read_text())
|
||||||
|
version = raw.get("schema_version")
|
||||||
|
if version != SUPPORTED_SCHEMA_VERSION:
|
||||||
|
raise ValueError(
|
||||||
|
f"Unsupported schema_version {version!r} in {path}. "
|
||||||
|
f"Expected {SUPPORTED_SCHEMA_VERSION}."
|
||||||
|
)
|
||||||
|
return GpuProfile.model_validate(raw)
|
||||||
56
tests/test_resources/test_profile_registry.py
Normal file
56
tests/test_resources/test_profile_registry.py
Normal file
|
|
@ -0,0 +1,56 @@
|
||||||
|
# tests/test_resources/test_profile_registry.py
|
||||||
|
import pytest
|
||||||
|
from pathlib import Path
|
||||||
|
from circuitforge_core.resources.profiles.schema import (
|
||||||
|
GpuProfile, ServiceProfile, load_profile
|
||||||
|
)
|
||||||
|
|
||||||
|
FIXTURES = Path(__file__).parent / "fixtures"
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_8gb_profile(tmp_path):
|
||||||
|
yaml_content = """
|
||||||
|
schema_version: 1
|
||||||
|
name: single-gpu-8gb
|
||||||
|
vram_total_mb: 8192
|
||||||
|
eviction_timeout_s: 10.0
|
||||||
|
services:
|
||||||
|
vllm:
|
||||||
|
max_mb: 5120
|
||||||
|
priority: 1
|
||||||
|
cf-vision:
|
||||||
|
max_mb: 2048
|
||||||
|
priority: 2
|
||||||
|
shared: true
|
||||||
|
max_concurrent: 3
|
||||||
|
"""
|
||||||
|
profile_file = tmp_path / "test.yaml"
|
||||||
|
profile_file.write_text(yaml_content)
|
||||||
|
profile = load_profile(profile_file)
|
||||||
|
|
||||||
|
assert profile.name == "single-gpu-8gb"
|
||||||
|
assert profile.schema_version == 1
|
||||||
|
assert profile.vram_total_mb == 8192
|
||||||
|
assert profile.eviction_timeout_s == 10.0
|
||||||
|
assert "vllm" in profile.services
|
||||||
|
assert profile.services["vllm"].max_mb == 5120
|
||||||
|
assert profile.services["vllm"].priority == 1
|
||||||
|
assert profile.services["cf-vision"].shared is True
|
||||||
|
assert profile.services["cf-vision"].max_concurrent == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_profile_rejects_wrong_schema_version(tmp_path):
|
||||||
|
yaml_content = "schema_version: 99\nname: future\n"
|
||||||
|
profile_file = tmp_path / "future.yaml"
|
||||||
|
profile_file.write_text(yaml_content)
|
||||||
|
with pytest.raises(ValueError, match="schema_version"):
|
||||||
|
load_profile(profile_file)
|
||||||
|
|
||||||
|
|
||||||
|
def test_service_profile_defaults():
|
||||||
|
svc = ServiceProfile(max_mb=1024, priority=2)
|
||||||
|
assert svc.shared is False
|
||||||
|
assert svc.max_concurrent == 1
|
||||||
|
assert svc.always_on is False
|
||||||
|
assert svc.backend is None
|
||||||
|
assert svc.consumers == []
|
||||||
Loading…
Reference in a new issue