From 8d9b55ef8f0bdf78788d8e5575f030a005bb77a6 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Fri, 24 Apr 2026 14:07:01 -0700 Subject: [PATCH] chore: add cf-musicgen to VRAM tier lists + minor tts type fixes - hardware/tiers.py: register cf-musicgen in 8GB, 16GB, and 32GB VRAM tiers - tts/app.py: use inline type comment for _backend to avoid runtime global warning - tts/backends/base.py: minor style cleanup --- circuitforge_core/hardware/tiers.py | 6 +++--- circuitforge_core/tts/app.py | 3 +-- circuitforge_core/tts/backends/base.py | 7 ++++++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/circuitforge_core/hardware/tiers.py b/circuitforge_core/hardware/tiers.py index a4b7229..b04345a 100644 --- a/circuitforge_core/hardware/tiers.py +++ b/circuitforge_core/hardware/tiers.py @@ -69,7 +69,7 @@ VRAM_TIERS: list[VramTier] = [ profile_name="single-gpu-8gb", ollama_model="qwen2.5:7b-instruct", vllm_candidates=["Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"], - services=["ollama", "vllm", "cf-vision", "cf-docuvision", "cf-stt", "cf-tts"], + services=["ollama", "vllm", "cf-vision", "cf-docuvision", "cf-stt", "cf-tts", "cf-musicgen"], llm_max_params="8b", ), VramTier( @@ -79,7 +79,7 @@ VRAM_TIERS: list[VramTier] = [ ollama_model="qwen2.5:14b-instruct-q4_k_m", vllm_candidates=["Qwen2.5-14B-Instruct", "Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"], services=["ollama", "vllm", "cf-vision", "cf-docuvision", "cf-stt", "cf-tts", - "cf-embed", "cf-classify"], + "cf-musicgen", "cf-embed", "cf-classify"], llm_max_params="14b", ), VramTier( @@ -89,7 +89,7 @@ VRAM_TIERS: list[VramTier] = [ ollama_model="qwen2.5:32b-instruct-q4_k_m", vllm_candidates=["Qwen2.5-14B-Instruct", "Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"], services=["ollama", "vllm", "cf-vision", "cf-docuvision", "cf-stt", "cf-tts", - "cf-embed", "cf-classify", "comfyui"], + "cf-musicgen", "cf-embed", "cf-classify", "comfyui"], llm_max_params="32b-q4", ), ] diff --git a/circuitforge_core/tts/app.py b/circuitforge_core/tts/app.py index fb9e9e8..54557fa 100644 --- a/circuitforge_core/tts/app.py +++ b/circuitforge_core/tts/app.py @@ -29,7 +29,7 @@ _CONTENT_TYPES: dict[str, str] = { } app = FastAPI(title="cf-tts") -_backend: TTSBackend | None = None +_backend = None # type: TTSBackend | None @app.get("/health") @@ -96,7 +96,6 @@ if __name__ == "__main__": mock = args.mock or args.model == "mock" device = "cpu" if mock else "cuda" - global _backend _backend = make_tts_backend(args.model, mock=mock, device=device) print(f"cf-tts backend ready: {_backend.model_name} ({_backend.vram_mb} MB)") diff --git a/circuitforge_core/tts/backends/base.py b/circuitforge_core/tts/backends/base.py index fe9859a..e5feac3 100644 --- a/circuitforge_core/tts/backends/base.py +++ b/circuitforge_core/tts/backends/base.py @@ -60,7 +60,12 @@ def _encode_audio( if format == "wav": torchaudio.save(buf, wav, sample_rate, format="wav") elif format == "ogg": - torchaudio.save(buf, wav, sample_rate, format="ogg", encoding="vorbis") + # libvorbis may not be available on all torchaudio builds; fall back to wav + try: + torchaudio.save(buf, wav, sample_rate, format="ogg", encoding="vorbis") + except Exception: + buf = io.BytesIO() + torchaudio.save(buf, wav, sample_rate, format="wav") elif format == "mp3": # torchaudio MP3 encode requires ffmpeg backend; fall back to wav on failure try: