Compare commits
No commits in common. "383897f990b8fb8f5d9aa53274eac40a059b8682" and "82f0b4c3d04672f63a65c06af4bbac14fa00cc84" have entirely different histories.
383897f990
...
82f0b4c3d0
43 changed files with 16 additions and 2904 deletions
|
|
@ -1,42 +0,0 @@
|
||||||
-- 005_recipe_tags.sql
|
|
||||||
-- Community-contributed recipe subcategory tags.
|
|
||||||
--
|
|
||||||
-- Users can tag corpus recipes (from a product's local recipe dataset) with a
|
|
||||||
-- domain/category/subcategory from that product's browse taxonomy. Tags are
|
|
||||||
-- keyed by (recipe_source, recipe_ref) so a single table serves all CF products
|
|
||||||
-- that have a recipe corpus (currently: kiwi).
|
|
||||||
--
|
|
||||||
-- Acceptance threshold: upvotes >= 2 (submitter's implicit vote counts as 1,
|
|
||||||
-- so one additional voter is enough to publish). Browse counts caches merge
|
|
||||||
-- accepted tags into subcategory totals on each nightly refresh.
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS recipe_tags (
|
|
||||||
id BIGSERIAL PRIMARY KEY,
|
|
||||||
recipe_source TEXT NOT NULL CHECK (recipe_source IN ('corpus')),
|
|
||||||
recipe_ref TEXT NOT NULL, -- corpus integer recipe ID stored as text
|
|
||||||
domain TEXT NOT NULL,
|
|
||||||
category TEXT NOT NULL,
|
|
||||||
subcategory TEXT, -- NULL = category-level tag (no subcategory)
|
|
||||||
pseudonym TEXT NOT NULL,
|
|
||||||
upvotes INTEGER NOT NULL DEFAULT 1, -- starts at 1 (submitter's own vote)
|
|
||||||
source_product TEXT NOT NULL DEFAULT 'kiwi',
|
|
||||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
||||||
-- one tag per (recipe, location, user) — prevents submitting the same tag twice
|
|
||||||
UNIQUE (recipe_source, recipe_ref, domain, category, subcategory, pseudonym)
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_recipe_tags_lookup
|
|
||||||
ON recipe_tags (source_product, domain, category, subcategory)
|
|
||||||
WHERE upvotes >= 2;
|
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_recipe_tags_recipe
|
|
||||||
ON recipe_tags (recipe_source, recipe_ref);
|
|
||||||
|
|
||||||
-- Tracks who voted on which tag to prevent double-voting.
|
|
||||||
-- The submitter's self-vote is inserted here at submission time.
|
|
||||||
CREATE TABLE IF NOT EXISTS recipe_tag_votes (
|
|
||||||
tag_id BIGINT NOT NULL REFERENCES recipe_tags(id) ON DELETE CASCADE,
|
|
||||||
pseudonym TEXT NOT NULL,
|
|
||||||
voted_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
||||||
PRIMARY KEY (tag_id, pseudonym)
|
|
||||||
);
|
|
||||||
|
|
@ -207,170 +207,3 @@ class SharedStore:
|
||||||
raise
|
raise
|
||||||
finally:
|
finally:
|
||||||
self._db.putconn(conn)
|
self._db.putconn(conn)
|
||||||
|
|
||||||
# ── Recipe tags ───────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def submit_recipe_tag(
|
|
||||||
self,
|
|
||||||
recipe_id: int,
|
|
||||||
domain: str,
|
|
||||||
category: str,
|
|
||||||
subcategory: str | None,
|
|
||||||
pseudonym: str,
|
|
||||||
source_product: str = "kiwi",
|
|
||||||
) -> dict:
|
|
||||||
"""Submit a new subcategory tag for a corpus recipe.
|
|
||||||
|
|
||||||
Inserts the tag with upvotes=1 and records the submitter's self-vote in
|
|
||||||
recipe_tag_votes. Returns the created tag row as a dict.
|
|
||||||
|
|
||||||
Raises psycopg2.errors.UniqueViolation if the same user has already
|
|
||||||
tagged this recipe to this location — let the caller handle it.
|
|
||||||
"""
|
|
||||||
conn = self._db.getconn()
|
|
||||||
try:
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
cur.execute(
|
|
||||||
"""
|
|
||||||
INSERT INTO recipe_tags
|
|
||||||
(recipe_source, recipe_ref, domain, category, subcategory,
|
|
||||||
pseudonym, upvotes, source_product)
|
|
||||||
VALUES ('corpus', %s, %s, %s, %s, %s, 1, %s)
|
|
||||||
RETURNING id, recipe_ref, domain, category, subcategory,
|
|
||||||
pseudonym, upvotes, created_at
|
|
||||||
""",
|
|
||||||
(str(recipe_id), domain, category, subcategory,
|
|
||||||
pseudonym, source_product),
|
|
||||||
)
|
|
||||||
row = dict(zip([d[0] for d in cur.description], cur.fetchone()))
|
|
||||||
# Record submitter's self-vote
|
|
||||||
cur.execute(
|
|
||||||
"INSERT INTO recipe_tag_votes (tag_id, pseudonym) VALUES (%s, %s)",
|
|
||||||
(row["id"], pseudonym),
|
|
||||||
)
|
|
||||||
conn.commit()
|
|
||||||
return row
|
|
||||||
except Exception:
|
|
||||||
conn.rollback()
|
|
||||||
raise
|
|
||||||
finally:
|
|
||||||
self._db.putconn(conn)
|
|
||||||
|
|
||||||
def upvote_recipe_tag(self, tag_id: int, pseudonym: str) -> int:
|
|
||||||
"""Add an upvote to a tag from pseudonym. Returns new upvote count.
|
|
||||||
|
|
||||||
Raises psycopg2.errors.UniqueViolation if this pseudonym already voted.
|
|
||||||
Raises ValueError if the tag does not exist.
|
|
||||||
"""
|
|
||||||
conn = self._db.getconn()
|
|
||||||
try:
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
cur.execute(
|
|
||||||
"INSERT INTO recipe_tag_votes (tag_id, pseudonym) VALUES (%s, %s)",
|
|
||||||
(tag_id, pseudonym),
|
|
||||||
)
|
|
||||||
cur.execute(
|
|
||||||
"UPDATE recipe_tags SET upvotes = upvotes + 1 WHERE id = %s"
|
|
||||||
" RETURNING upvotes",
|
|
||||||
(tag_id,),
|
|
||||||
)
|
|
||||||
row = cur.fetchone()
|
|
||||||
if row is None:
|
|
||||||
raise ValueError(f"recipe_tag {tag_id} not found")
|
|
||||||
conn.commit()
|
|
||||||
return row[0]
|
|
||||||
except Exception:
|
|
||||||
conn.rollback()
|
|
||||||
raise
|
|
||||||
finally:
|
|
||||||
self._db.putconn(conn)
|
|
||||||
|
|
||||||
def get_recipe_tag_by_id(self, tag_id: int) -> dict | None:
|
|
||||||
"""Return a single recipe_tag row by ID, or None if not found."""
|
|
||||||
conn = self._db.getconn()
|
|
||||||
try:
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
cur.execute(
|
|
||||||
"""
|
|
||||||
SELECT id, recipe_ref, domain, category, subcategory,
|
|
||||||
pseudonym, upvotes, created_at
|
|
||||||
FROM recipe_tags WHERE id = %s
|
|
||||||
""",
|
|
||||||
(tag_id,),
|
|
||||||
)
|
|
||||||
row = cur.fetchone()
|
|
||||||
if row is None:
|
|
||||||
return None
|
|
||||||
return dict(zip([d[0] for d in cur.description], row))
|
|
||||||
finally:
|
|
||||||
self._db.putconn(conn)
|
|
||||||
|
|
||||||
def list_tags_for_recipe(
|
|
||||||
self,
|
|
||||||
recipe_id: int,
|
|
||||||
source_product: str = "kiwi",
|
|
||||||
) -> list[dict]:
|
|
||||||
"""Return all tags for a corpus recipe, accepted or not, newest first."""
|
|
||||||
conn = self._db.getconn()
|
|
||||||
try:
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
cur.execute(
|
|
||||||
"""
|
|
||||||
SELECT id, domain, category, subcategory, pseudonym,
|
|
||||||
upvotes, created_at
|
|
||||||
FROM recipe_tags
|
|
||||||
WHERE recipe_source = 'corpus'
|
|
||||||
AND recipe_ref = %s
|
|
||||||
AND source_product = %s
|
|
||||||
ORDER BY upvotes DESC, created_at DESC
|
|
||||||
""",
|
|
||||||
(str(recipe_id), source_product),
|
|
||||||
)
|
|
||||||
cols = [d[0] for d in cur.description]
|
|
||||||
return [dict(zip(cols, r)) for r in cur.fetchall()]
|
|
||||||
finally:
|
|
||||||
self._db.putconn(conn)
|
|
||||||
|
|
||||||
def get_accepted_recipe_ids_for_subcategory(
|
|
||||||
self,
|
|
||||||
domain: str,
|
|
||||||
category: str,
|
|
||||||
subcategory: str | None,
|
|
||||||
source_product: str = "kiwi",
|
|
||||||
threshold: int = 2,
|
|
||||||
) -> list[int]:
|
|
||||||
"""Return corpus recipe IDs with accepted community tags for a subcategory.
|
|
||||||
|
|
||||||
Used by browse_counts_cache refresh and browse_recipes() FTS fallback.
|
|
||||||
Only includes tags that have reached the acceptance threshold.
|
|
||||||
"""
|
|
||||||
conn = self._db.getconn()
|
|
||||||
try:
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
if subcategory is None:
|
|
||||||
cur.execute(
|
|
||||||
"""
|
|
||||||
SELECT DISTINCT recipe_ref::INTEGER
|
|
||||||
FROM recipe_tags
|
|
||||||
WHERE source_product = %s
|
|
||||||
AND domain = %s AND category = %s
|
|
||||||
AND subcategory IS NULL
|
|
||||||
AND upvotes >= %s
|
|
||||||
""",
|
|
||||||
(source_product, domain, category, threshold),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
cur.execute(
|
|
||||||
"""
|
|
||||||
SELECT DISTINCT recipe_ref::INTEGER
|
|
||||||
FROM recipe_tags
|
|
||||||
WHERE source_product = %s
|
|
||||||
AND domain = %s AND category = %s
|
|
||||||
AND subcategory = %s
|
|
||||||
AND upvotes >= %s
|
|
||||||
""",
|
|
||||||
(source_product, domain, category, subcategory, threshold),
|
|
||||||
)
|
|
||||||
return [r[0] for r in cur.fetchall()]
|
|
||||||
finally:
|
|
||||||
self._db.putconn(conn)
|
|
||||||
|
|
|
||||||
|
|
@ -69,7 +69,7 @@ VRAM_TIERS: list[VramTier] = [
|
||||||
profile_name="single-gpu-8gb",
|
profile_name="single-gpu-8gb",
|
||||||
ollama_model="qwen2.5:7b-instruct",
|
ollama_model="qwen2.5:7b-instruct",
|
||||||
vllm_candidates=["Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"],
|
vllm_candidates=["Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"],
|
||||||
services=["ollama", "vllm", "cf-vision", "cf-docuvision", "cf-stt", "cf-tts", "cf-musicgen"],
|
services=["ollama", "vllm", "cf-vision", "cf-docuvision", "cf-stt", "cf-tts"],
|
||||||
llm_max_params="8b",
|
llm_max_params="8b",
|
||||||
),
|
),
|
||||||
VramTier(
|
VramTier(
|
||||||
|
|
@ -79,7 +79,7 @@ VRAM_TIERS: list[VramTier] = [
|
||||||
ollama_model="qwen2.5:14b-instruct-q4_k_m",
|
ollama_model="qwen2.5:14b-instruct-q4_k_m",
|
||||||
vllm_candidates=["Qwen2.5-14B-Instruct", "Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"],
|
vllm_candidates=["Qwen2.5-14B-Instruct", "Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"],
|
||||||
services=["ollama", "vllm", "cf-vision", "cf-docuvision", "cf-stt", "cf-tts",
|
services=["ollama", "vllm", "cf-vision", "cf-docuvision", "cf-stt", "cf-tts",
|
||||||
"cf-musicgen", "cf-embed", "cf-classify"],
|
"cf-embed", "cf-classify"],
|
||||||
llm_max_params="14b",
|
llm_max_params="14b",
|
||||||
),
|
),
|
||||||
VramTier(
|
VramTier(
|
||||||
|
|
@ -89,7 +89,7 @@ VRAM_TIERS: list[VramTier] = [
|
||||||
ollama_model="qwen2.5:32b-instruct-q4_k_m",
|
ollama_model="qwen2.5:32b-instruct-q4_k_m",
|
||||||
vllm_candidates=["Qwen2.5-14B-Instruct", "Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"],
|
vllm_candidates=["Qwen2.5-14B-Instruct", "Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"],
|
||||||
services=["ollama", "vllm", "cf-vision", "cf-docuvision", "cf-stt", "cf-tts",
|
services=["ollama", "vllm", "cf-vision", "cf-docuvision", "cf-stt", "cf-tts",
|
||||||
"cf-musicgen", "cf-embed", "cf-classify", "comfyui"],
|
"cf-embed", "cf-classify", "comfyui"],
|
||||||
llm_max_params="32b-q4",
|
llm_max_params="32b-q4",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
"""circuitforge_core.musicgen — music continuation service (BSL 1.1)."""
|
|
||||||
|
|
@ -1,138 +0,0 @@
|
||||||
"""
|
|
||||||
cf-musicgen FastAPI service — managed by cf-orch.
|
|
||||||
|
|
||||||
Endpoints:
|
|
||||||
GET /health -> {"status": "ok", "model": str, "vram_mb": int}
|
|
||||||
POST /continue -> audio bytes (Content-Type: audio/wav or audio/mpeg)
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
python -m circuitforge_core.musicgen.app \
|
|
||||||
--model facebook/musicgen-melody \
|
|
||||||
--port 8006 \
|
|
||||||
--gpu-id 0
|
|
||||||
|
|
||||||
The service streams back raw audio bytes. Headers include:
|
|
||||||
X-Duration-S generated duration in seconds
|
|
||||||
X-Prompt-Duration-S how many seconds of the input were used as prompt
|
|
||||||
X-Model model name
|
|
||||||
X-Sample-Rate output sample rate (32000 for all MusicGen variants)
|
|
||||||
|
|
||||||
Model weights are cached at /Library/Assets/LLM/musicgen/.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
from typing import Annotated
|
|
||||||
|
|
||||||
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
|
||||||
from fastapi.responses import Response
|
|
||||||
|
|
||||||
from circuitforge_core.musicgen.backends.base import (
|
|
||||||
MODEL_MELODY,
|
|
||||||
MODEL_SMALL,
|
|
||||||
AudioFormat,
|
|
||||||
MusicGenBackend,
|
|
||||||
make_musicgen_backend,
|
|
||||||
)
|
|
||||||
|
|
||||||
_CONTENT_TYPES: dict[str, str] = {
|
|
||||||
"wav": "audio/wav",
|
|
||||||
"mp3": "audio/mpeg",
|
|
||||||
}
|
|
||||||
|
|
||||||
app = FastAPI(title="cf-musicgen", version="0.1.0")
|
|
||||||
_backend: MusicGenBackend | None = None
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
|
||||||
def health() -> dict:
|
|
||||||
if _backend is None:
|
|
||||||
raise HTTPException(503, detail="backend not initialised")
|
|
||||||
return {
|
|
||||||
"status": "ok",
|
|
||||||
"model": _backend.model_name,
|
|
||||||
"vram_mb": _backend.vram_mb,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/continue")
|
|
||||||
async def continue_audio(
|
|
||||||
audio: UploadFile = File(..., description="Audio file (WAV, MP3, FLAC, OGG, ...)"),
|
|
||||||
description: Annotated[str | None, Form()] = None,
|
|
||||||
duration_s: Annotated[float, Form()] = 15.0,
|
|
||||||
prompt_duration_s: Annotated[float, Form()] = 10.0,
|
|
||||||
format: Annotated[AudioFormat, Form()] = "wav",
|
|
||||||
) -> Response:
|
|
||||||
if _backend is None:
|
|
||||||
raise HTTPException(503, detail="backend not initialised")
|
|
||||||
if duration_s <= 0 or duration_s > 60:
|
|
||||||
raise HTTPException(422, detail="duration_s must be between 0 and 60")
|
|
||||||
if prompt_duration_s <= 0 or prompt_duration_s > 30:
|
|
||||||
raise HTTPException(422, detail="prompt_duration_s must be between 0 and 30")
|
|
||||||
|
|
||||||
audio_bytes = await audio.read()
|
|
||||||
if not audio_bytes:
|
|
||||||
raise HTTPException(400, detail="Empty audio file")
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = _backend.continue_audio(
|
|
||||||
audio_bytes,
|
|
||||||
description=description or None,
|
|
||||||
duration_s=duration_s,
|
|
||||||
prompt_duration_s=prompt_duration_s,
|
|
||||||
format=format,
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
logging.exception("Music continuation failed")
|
|
||||||
raise HTTPException(500, detail=str(exc)) from exc
|
|
||||||
|
|
||||||
return Response(
|
|
||||||
content=result.audio_bytes,
|
|
||||||
media_type=_CONTENT_TYPES.get(result.format, "audio/wav"),
|
|
||||||
headers={
|
|
||||||
"X-Duration-S": str(round(result.duration_s, 3)),
|
|
||||||
"X-Prompt-Duration-S": str(round(result.prompt_duration_s, 3)),
|
|
||||||
"X-Model": result.model,
|
|
||||||
"X-Sample-Rate": str(result.sample_rate),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_args() -> argparse.Namespace:
|
|
||||||
p = argparse.ArgumentParser(description="cf-musicgen service")
|
|
||||||
p.add_argument(
|
|
||||||
"--model",
|
|
||||||
default=MODEL_MELODY,
|
|
||||||
choices=[MODEL_MELODY, MODEL_SMALL, "facebook/musicgen-medium", "facebook/musicgen-large"],
|
|
||||||
help="MusicGen model variant",
|
|
||||||
)
|
|
||||||
p.add_argument("--port", type=int, default=8006)
|
|
||||||
p.add_argument("--host", default="0.0.0.0")
|
|
||||||
p.add_argument("--gpu-id", type=int, default=0,
|
|
||||||
help="CUDA device index (sets CUDA_VISIBLE_DEVICES)")
|
|
||||||
p.add_argument("--device", default="cuda", choices=["cuda", "cpu"])
|
|
||||||
p.add_argument("--mock", action="store_true",
|
|
||||||
help="Run with mock backend (no GPU, for testing)")
|
|
||||||
return p.parse_args()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import uvicorn
|
|
||||||
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format="%(asctime)s %(levelname)s %(name)s %(message)s",
|
|
||||||
)
|
|
||||||
args = _parse_args()
|
|
||||||
|
|
||||||
if args.device == "cuda" and not args.mock:
|
|
||||||
os.environ.setdefault("CUDA_VISIBLE_DEVICES", str(args.gpu_id))
|
|
||||||
|
|
||||||
mock = args.mock or args.model == "mock"
|
|
||||||
device = "cpu" if mock else args.device
|
|
||||||
|
|
||||||
_backend = make_musicgen_backend(model_name=args.model, mock=mock, device=device)
|
|
||||||
|
|
||||||
uvicorn.run(app, host=args.host, port=args.port, log_level="info")
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
"""MusicGen backend implementations."""
|
|
||||||
|
|
@ -1,128 +0,0 @@
|
||||||
"""
|
|
||||||
AudioCraft MusicGen backend — music continuation via Meta's MusicGen.
|
|
||||||
|
|
||||||
Models are downloaded to /Library/Assets/LLM/musicgen/ (HF hub cache).
|
|
||||||
The melody model (~8 GB VRAM) is the default; small (~1.5 GB) is available
|
|
||||||
for lower-VRAM nodes.
|
|
||||||
|
|
||||||
Continuation workflow:
|
|
||||||
1. Decode input audio with torchaudio (any format ffmpeg understands)
|
|
||||||
2. Trim to the last `prompt_duration_s` seconds — this anchors the generation
|
|
||||||
3. Call model.generate_continuation(prompt_waveform, prompt_sample_rate, ...)
|
|
||||||
4. Output tensor is the NEW audio only (not prompt + continuation)
|
|
||||||
5. Encode to the requested format and return
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
|
|
||||||
from circuitforge_core.musicgen.backends.base import (
|
|
||||||
AudioFormat,
|
|
||||||
MusicContinueResult,
|
|
||||||
decode_audio,
|
|
||||||
encode_audio,
|
|
||||||
)
|
|
||||||
|
|
||||||
# All MusicGen/AudioCraft weights land here — consistent with other CF model dirs.
|
|
||||||
_MUSICGEN_CACHE = "/Library/Assets/LLM/musicgen"
|
|
||||||
|
|
||||||
# VRAM estimates (MB) per model variant
|
|
||||||
_VRAM_MB: dict[str, int] = {
|
|
||||||
"facebook/musicgen-small": 1500,
|
|
||||||
"facebook/musicgen-medium": 4500,
|
|
||||||
"facebook/musicgen-melody": 8000,
|
|
||||||
"facebook/musicgen-large": 8500,
|
|
||||||
}
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class AudioCraftBackend:
|
|
||||||
"""MusicGen backend using Meta's AudioCraft library."""
|
|
||||||
|
|
||||||
def __init__(self, model_name: str = "facebook/musicgen-melody", device: str = "cuda") -> None:
|
|
||||||
# Redirect HF hub cache before the first import so weights go to /Library/Assets
|
|
||||||
os.environ.setdefault("HF_HOME", _MUSICGEN_CACHE)
|
|
||||||
os.makedirs(_MUSICGEN_CACHE, exist_ok=True)
|
|
||||||
|
|
||||||
from audiocraft.models import MusicGen # noqa: PLC0415
|
|
||||||
|
|
||||||
logger.info("Loading MusicGen model: %s on %s", model_name, device)
|
|
||||||
self._model = MusicGen.get_pretrained(model_name, device=device)
|
|
||||||
self._model_name = model_name
|
|
||||||
self._device = device
|
|
||||||
logger.info("MusicGen ready: %s", model_name)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def model_name(self) -> str:
|
|
||||||
return self._model_name
|
|
||||||
|
|
||||||
@property
|
|
||||||
def vram_mb(self) -> int:
|
|
||||||
return _VRAM_MB.get(self._model_name, 8000)
|
|
||||||
|
|
||||||
def continue_audio(
|
|
||||||
self,
|
|
||||||
audio_bytes: bytes,
|
|
||||||
*,
|
|
||||||
description: str | None = None,
|
|
||||||
duration_s: float = 15.0,
|
|
||||||
prompt_duration_s: float = 10.0,
|
|
||||||
format: AudioFormat = "wav",
|
|
||||||
) -> MusicContinueResult:
|
|
||||||
import torch
|
|
||||||
|
|
||||||
# Decode input audio -> [C, T] tensor
|
|
||||||
wav, sr = decode_audio(audio_bytes)
|
|
||||||
|
|
||||||
# Trim to the last `prompt_duration_s` seconds to form the conditioning prompt.
|
|
||||||
# Using the end of the track (not the beginning) gives the model the musical
|
|
||||||
# context closest to where we want to continue.
|
|
||||||
max_prompt_samples = int(prompt_duration_s * sr)
|
|
||||||
if wav.shape[-1] > max_prompt_samples:
|
|
||||||
wav = wav[..., -max_prompt_samples:]
|
|
||||||
|
|
||||||
# MusicGen expects [batch, channels, time]
|
|
||||||
prompt_tensor = wav.unsqueeze(0).to(self._device)
|
|
||||||
|
|
||||||
# Build descriptions list — one entry per batch item (batch=1 here)
|
|
||||||
descriptions = [description] if description else [None]
|
|
||||||
|
|
||||||
self._model.set_generation_params(
|
|
||||||
duration=duration_s,
|
|
||||||
top_k=250,
|
|
||||||
temperature=1.0,
|
|
||||||
cfg_coef=3.0,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
"Generating %.1fs continuation (prompt=%.1fs) model=%s",
|
|
||||||
duration_s,
|
|
||||||
prompt_duration_s,
|
|
||||||
self._model_name,
|
|
||||||
)
|
|
||||||
|
|
||||||
with torch.no_grad():
|
|
||||||
output = self._model.generate_continuation(
|
|
||||||
prompt=prompt_tensor,
|
|
||||||
prompt_sample_rate=sr,
|
|
||||||
descriptions=descriptions,
|
|
||||||
progress=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
# output: [batch, channels, time] at model sample rate (32 kHz)
|
|
||||||
output_wav = output[0] # [C, T]
|
|
||||||
model_sr = self._model.sample_rate
|
|
||||||
|
|
||||||
actual_duration_s = output_wav.shape[-1] / model_sr
|
|
||||||
audio_bytes_out = encode_audio(output_wav, model_sr, format)
|
|
||||||
|
|
||||||
return MusicContinueResult(
|
|
||||||
audio_bytes=audio_bytes_out,
|
|
||||||
sample_rate=model_sr,
|
|
||||||
duration_s=actual_duration_s,
|
|
||||||
format=format,
|
|
||||||
model=self._model_name,
|
|
||||||
prompt_duration_s=prompt_duration_s,
|
|
||||||
)
|
|
||||||
|
|
@ -1,97 +0,0 @@
|
||||||
"""
|
|
||||||
MusicGenBackend Protocol — backend-agnostic music continuation interface.
|
|
||||||
|
|
||||||
All backends accept an audio prompt (raw bytes, any ffmpeg-readable format) and
|
|
||||||
return MusicContinueResult with the generated continuation as audio bytes.
|
|
||||||
|
|
||||||
The continuation is the *new* audio only (not prompt + continuation). Callers
|
|
||||||
that want a seamless joined file can concatenate the original + result themselves.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import io
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import Literal, Protocol, runtime_checkable
|
|
||||||
|
|
||||||
AudioFormat = Literal["wav", "mp3"]
|
|
||||||
|
|
||||||
MODEL_SMALL = "facebook/musicgen-small"
|
|
||||||
MODEL_MELODY = "facebook/musicgen-melody"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class MusicContinueResult:
|
|
||||||
audio_bytes: bytes
|
|
||||||
sample_rate: int
|
|
||||||
duration_s: float
|
|
||||||
format: AudioFormat
|
|
||||||
model: str
|
|
||||||
prompt_duration_s: float
|
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
|
||||||
class MusicGenBackend(Protocol):
|
|
||||||
def continue_audio(
|
|
||||||
self,
|
|
||||||
audio_bytes: bytes,
|
|
||||||
*,
|
|
||||||
description: str | None = None,
|
|
||||||
duration_s: float = 15.0,
|
|
||||||
prompt_duration_s: float = 10.0,
|
|
||||||
format: AudioFormat = "wav",
|
|
||||||
) -> MusicContinueResult: ...
|
|
||||||
|
|
||||||
@property
|
|
||||||
def model_name(self) -> str: ...
|
|
||||||
|
|
||||||
@property
|
|
||||||
def vram_mb(self) -> int: ...
|
|
||||||
|
|
||||||
|
|
||||||
def encode_audio(wav_tensor, sample_rate: int, format: AudioFormat) -> bytes:
|
|
||||||
"""Encode a [C, T] or [1, C, T] torch tensor to audio bytes."""
|
|
||||||
import io
|
|
||||||
import torch
|
|
||||||
import torchaudio
|
|
||||||
|
|
||||||
wav = wav_tensor
|
|
||||||
if wav.dim() == 3:
|
|
||||||
wav = wav.squeeze(0) # [1, C, T] -> [C, T]
|
|
||||||
if wav.dim() == 1:
|
|
||||||
wav = wav.unsqueeze(0) # [T] -> [1, T]
|
|
||||||
wav = wav.to(torch.float32).cpu()
|
|
||||||
|
|
||||||
buf = io.BytesIO()
|
|
||||||
if format == "wav":
|
|
||||||
torchaudio.save(buf, wav, sample_rate, format="wav")
|
|
||||||
elif format == "mp3":
|
|
||||||
try:
|
|
||||||
torchaudio.save(buf, wav, sample_rate, format="mp3")
|
|
||||||
except Exception:
|
|
||||||
# ffmpeg backend not available; fall back to wav
|
|
||||||
buf = io.BytesIO()
|
|
||||||
torchaudio.save(buf, wav, sample_rate, format="wav")
|
|
||||||
return buf.getvalue()
|
|
||||||
|
|
||||||
|
|
||||||
def decode_audio(audio_bytes: bytes) -> tuple:
|
|
||||||
"""Decode arbitrary audio bytes to (waveform [C, T], sample_rate)."""
|
|
||||||
import io
|
|
||||||
import torchaudio
|
|
||||||
|
|
||||||
buf = io.BytesIO(audio_bytes)
|
|
||||||
wav, sr = torchaudio.load(buf)
|
|
||||||
return wav, sr
|
|
||||||
|
|
||||||
|
|
||||||
def make_musicgen_backend(
|
|
||||||
model_name: str = MODEL_MELODY,
|
|
||||||
*,
|
|
||||||
mock: bool = False,
|
|
||||||
device: str = "cuda",
|
|
||||||
) -> MusicGenBackend:
|
|
||||||
if mock:
|
|
||||||
from circuitforge_core.musicgen.backends.mock import MockMusicGenBackend
|
|
||||||
return MockMusicGenBackend()
|
|
||||||
from circuitforge_core.musicgen.backends.audiocraft import AudioCraftBackend
|
|
||||||
return AudioCraftBackend(model_name=model_name, device=device)
|
|
||||||
|
|
@ -1,53 +0,0 @@
|
||||||
"""
|
|
||||||
Mock MusicGenBackend — returns silent WAV audio; no GPU required.
|
|
||||||
|
|
||||||
Used in unit tests and CI where GPU is unavailable.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import io
|
|
||||||
import struct
|
|
||||||
import wave
|
|
||||||
|
|
||||||
from circuitforge_core.musicgen.backends.base import AudioFormat, MusicContinueResult
|
|
||||||
|
|
||||||
|
|
||||||
class MockMusicGenBackend:
|
|
||||||
"""Returns a silent WAV file of the requested duration."""
|
|
||||||
|
|
||||||
@property
|
|
||||||
def model_name(self) -> str:
|
|
||||||
return "mock"
|
|
||||||
|
|
||||||
@property
|
|
||||||
def vram_mb(self) -> int:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def continue_audio(
|
|
||||||
self,
|
|
||||||
audio_bytes: bytes,
|
|
||||||
*,
|
|
||||||
description: str | None = None,
|
|
||||||
duration_s: float = 15.0,
|
|
||||||
prompt_duration_s: float = 10.0,
|
|
||||||
format: AudioFormat = "wav",
|
|
||||||
) -> MusicContinueResult:
|
|
||||||
sample_rate = 32000
|
|
||||||
n_samples = int(duration_s * sample_rate)
|
|
||||||
silent_samples = b"\x00\x00" * n_samples # 16-bit PCM silence
|
|
||||||
|
|
||||||
buf = io.BytesIO()
|
|
||||||
with wave.open(buf, "wb") as wf:
|
|
||||||
wf.setnchannels(1)
|
|
||||||
wf.setsampwidth(2)
|
|
||||||
wf.setframerate(sample_rate)
|
|
||||||
wf.writeframes(silent_samples)
|
|
||||||
|
|
||||||
return MusicContinueResult(
|
|
||||||
audio_bytes=buf.getvalue(),
|
|
||||||
sample_rate=sample_rate,
|
|
||||||
duration_s=duration_s,
|
|
||||||
format="wav",
|
|
||||||
model="mock",
|
|
||||||
prompt_duration_s=prompt_duration_s,
|
|
||||||
)
|
|
||||||
|
|
@ -1,183 +0,0 @@
|
||||||
"""eBay OAuth Authorization Code flow — user-level token manager.
|
|
||||||
|
|
||||||
Implements the Authorization Code Grant for eBay's Trading API.
|
|
||||||
App-level client credentials (Browse API) are handled separately in
|
|
||||||
the product-level EbayTokenManager (snipe/app/platforms/ebay/auth.py).
|
|
||||||
|
|
||||||
Usage (Snipe):
|
|
||||||
manager = EbayUserTokenManager(
|
|
||||||
client_id=app_id,
|
|
||||||
client_secret=cert_id,
|
|
||||||
runame=runame,
|
|
||||||
redirect_uri=redirect_uri,
|
|
||||||
env="production",
|
|
||||||
)
|
|
||||||
|
|
||||||
# 1. Send user to eBay
|
|
||||||
url = manager.get_authorization_url(state="csrf-token-here")
|
|
||||||
redirect(url)
|
|
||||||
|
|
||||||
# 2. Handle callback
|
|
||||||
tokens = manager.exchange_code(code) # returns EbayUserTokens
|
|
||||||
# store tokens.access_token, tokens.refresh_token, tokens.expires_at
|
|
||||||
|
|
||||||
# 3. Get a fresh access token for API calls
|
|
||||||
access_token = manager.refresh(stored_refresh_token)
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import base64
|
|
||||||
import time
|
|
||||||
import urllib.parse
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
EBAY_AUTH_URLS = {
|
|
||||||
"production": "https://auth.ebay.com/oauth2/authorize",
|
|
||||||
"sandbox": "https://auth.sandbox.ebay.com/oauth2/authorize",
|
|
||||||
}
|
|
||||||
|
|
||||||
EBAY_TOKEN_URLS = {
|
|
||||||
"production": "https://api.ebay.com/identity/v1/oauth2/token",
|
|
||||||
"sandbox": "https://api.sandbox.ebay.com/identity/v1/oauth2/token",
|
|
||||||
}
|
|
||||||
|
|
||||||
# Scopes needed for Trading API GetUser (account age + category feedback).
|
|
||||||
# https://developer.ebay.com/api-docs/static/oauth-scopes.html
|
|
||||||
DEFAULT_SCOPES = [
|
|
||||||
"https://api.ebay.com/oauth/api_scope",
|
|
||||||
"https://api.ebay.com/oauth/api_scope/sell.account.readonly",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class EbayUserTokens:
|
|
||||||
access_token: str
|
|
||||||
refresh_token: str
|
|
||||||
expires_at: float # epoch seconds
|
|
||||||
scopes: list[str]
|
|
||||||
|
|
||||||
|
|
||||||
class EbayUserTokenManager:
|
|
||||||
"""Manages eBay Authorization Code OAuth tokens for a single user.
|
|
||||||
|
|
||||||
One instance per user session. Does NOT persist tokens — callers are
|
|
||||||
responsible for storing/loading tokens via the DB migration
|
|
||||||
013_ebay_user_tokens.sql.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
client_id: str,
|
|
||||||
client_secret: str,
|
|
||||||
runame: str,
|
|
||||||
redirect_uri: str,
|
|
||||||
env: str = "production",
|
|
||||||
scopes: Optional[list[str]] = None,
|
|
||||||
):
|
|
||||||
self._client_id = client_id
|
|
||||||
self._client_secret = client_secret
|
|
||||||
self._runame = runame
|
|
||||||
self._redirect_uri = redirect_uri
|
|
||||||
self._auth_url = EBAY_AUTH_URLS[env]
|
|
||||||
self._token_url = EBAY_TOKEN_URLS[env]
|
|
||||||
self._scopes = scopes or DEFAULT_SCOPES
|
|
||||||
|
|
||||||
# ── Authorization URL ──────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def get_authorization_url(self, state: str = "") -> str:
|
|
||||||
"""Build the eBay OAuth authorization URL to redirect the user to.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
state: CSRF token or opaque value passed through unchanged.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Full URL string to redirect the user's browser to.
|
|
||||||
"""
|
|
||||||
params = {
|
|
||||||
"client_id": self._client_id,
|
|
||||||
"response_type": "code",
|
|
||||||
"redirect_uri": self._runame, # eBay uses RuName, not the raw URI
|
|
||||||
"scope": " ".join(self._scopes),
|
|
||||||
}
|
|
||||||
if state:
|
|
||||||
params["state"] = state
|
|
||||||
return f"{self._auth_url}?{urllib.parse.urlencode(params)}"
|
|
||||||
|
|
||||||
# ── Code exchange ──────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def exchange_code(self, code: str) -> EbayUserTokens:
|
|
||||||
"""Exchange an authorization code for access + refresh tokens.
|
|
||||||
|
|
||||||
Called from the OAuth callback endpoint after eBay redirects back.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
requests.HTTPError on non-2xx eBay response.
|
|
||||||
KeyError if eBay response is missing expected fields.
|
|
||||||
"""
|
|
||||||
resp = requests.post(
|
|
||||||
self._token_url,
|
|
||||||
headers={
|
|
||||||
"Authorization": f"Basic {self._credentials_b64()}",
|
|
||||||
"Content-Type": "application/x-www-form-urlencoded",
|
|
||||||
},
|
|
||||||
data={
|
|
||||||
"grant_type": "authorization_code",
|
|
||||||
"code": code,
|
|
||||||
"redirect_uri": self._runame,
|
|
||||||
},
|
|
||||||
timeout=15,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
return self._parse_token_response(resp.json())
|
|
||||||
|
|
||||||
# ── Token refresh ──────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def refresh(self, refresh_token: str) -> EbayUserTokens:
|
|
||||||
"""Exchange a refresh token for a new access token.
|
|
||||||
|
|
||||||
eBay refresh tokens are valid for 18 months. Access tokens last 2h.
|
|
||||||
Call this before making Trading API requests when the stored token
|
|
||||||
is within 60 seconds of expiry.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
requests.HTTPError if the refresh token is expired or revoked.
|
|
||||||
"""
|
|
||||||
resp = requests.post(
|
|
||||||
self._token_url,
|
|
||||||
headers={
|
|
||||||
"Authorization": f"Basic {self._credentials_b64()}",
|
|
||||||
"Content-Type": "application/x-www-form-urlencoded",
|
|
||||||
},
|
|
||||||
data={
|
|
||||||
"grant_type": "refresh_token",
|
|
||||||
"refresh_token": refresh_token,
|
|
||||||
"scope": " ".join(self._scopes),
|
|
||||||
},
|
|
||||||
timeout=15,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
# Refresh responses do NOT include a new refresh_token — the original stays valid
|
|
||||||
data = resp.json()
|
|
||||||
return EbayUserTokens(
|
|
||||||
access_token=data["access_token"],
|
|
||||||
refresh_token=refresh_token, # unchanged
|
|
||||||
expires_at=time.time() + data["expires_in"],
|
|
||||||
scopes=data.get("scope", "").split(),
|
|
||||||
)
|
|
||||||
|
|
||||||
# ── Helpers ────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def _credentials_b64(self) -> str:
|
|
||||||
raw = f"{self._client_id}:{self._client_secret}"
|
|
||||||
return base64.b64encode(raw.encode()).decode()
|
|
||||||
|
|
||||||
def _parse_token_response(self, data: dict) -> EbayUserTokens:
|
|
||||||
return EbayUserTokens(
|
|
||||||
access_token=data["access_token"],
|
|
||||||
refresh_token=data["refresh_token"],
|
|
||||||
expires_at=time.time() + data["expires_in"],
|
|
||||||
scopes=data.get("scope", "").split(),
|
|
||||||
)
|
|
||||||
|
|
@ -16,12 +16,6 @@ Usage:
|
||||||
--port 8006 \
|
--port 8006 \
|
||||||
--gpu-id 0
|
--gpu-id 0
|
||||||
|
|
||||||
Multi-GPU (spans two GPUs via CUDA_VISIBLE_DEVICES, device_map=auto):
|
|
||||||
python -m circuitforge_core.text.app \
|
|
||||||
--model /Library/Assets/LLM/deepseek-14b \
|
|
||||||
--port 8006 \
|
|
||||||
--gpu-ids 0,1
|
|
||||||
|
|
||||||
Mock mode (no model or GPU required):
|
Mock mode (no model or GPU required):
|
||||||
CF_TEXT_MOCK=1 python -m circuitforge_core.text.app --port 8006
|
CF_TEXT_MOCK=1 python -m circuitforge_core.text.app --port 8006
|
||||||
"""
|
"""
|
||||||
|
|
@ -117,17 +111,9 @@ class OAIChatResponse(BaseModel):
|
||||||
def create_app(
|
def create_app(
|
||||||
model_path: str,
|
model_path: str,
|
||||||
gpu_id: int = 0,
|
gpu_id: int = 0,
|
||||||
gpu_ids: str | None = None,
|
|
||||||
backend: str | None = None,
|
backend: str | None = None,
|
||||||
mock: bool = False,
|
mock: bool = False,
|
||||||
) -> FastAPI:
|
) -> FastAPI:
|
||||||
"""Start the cf-text FastAPI app.
|
|
||||||
|
|
||||||
``gpu_ids``: comma-separated CUDA device indices for multi-GPU spanning
|
|
||||||
(e.g. "0,1"). When set, overrides ``gpu_id`` and sets
|
|
||||||
``CUDA_VISIBLE_DEVICES`` to the full list so HuggingFace Accelerate's
|
|
||||||
``device_map="auto"`` can shard the model across all listed devices.
|
|
||||||
"""
|
|
||||||
global _backend
|
global _backend
|
||||||
|
|
||||||
if not mock and not model_path:
|
if not mock and not model_path:
|
||||||
|
|
@ -136,8 +122,7 @@ def create_app(
|
||||||
"Pass a GGUF path, a HuggingFace model ID, or set CF_TEXT_MOCK=1 for mock mode."
|
"Pass a GGUF path, a HuggingFace model ID, or set CF_TEXT_MOCK=1 for mock mode."
|
||||||
)
|
)
|
||||||
|
|
||||||
visible = gpu_ids if gpu_ids else str(gpu_id)
|
os.environ.setdefault("CUDA_VISIBLE_DEVICES", str(gpu_id))
|
||||||
os.environ.setdefault("CUDA_VISIBLE_DEVICES", visible)
|
|
||||||
|
|
||||||
_backend = make_text_backend(model_path, backend=backend, mock=mock)
|
_backend = make_text_backend(model_path, backend=backend, mock=mock)
|
||||||
logger.info("cf-text ready: model=%r vram=%dMB", _backend.model_name, _backend.vram_mb)
|
logger.info("cf-text ready: model=%r vram=%dMB", _backend.model_name, _backend.vram_mb)
|
||||||
|
|
@ -226,10 +211,7 @@ def _parse_args() -> argparse.Namespace:
|
||||||
parser.add_argument("--port", type=int, default=8006)
|
parser.add_argument("--port", type=int, default=8006)
|
||||||
parser.add_argument("--host", default="0.0.0.0")
|
parser.add_argument("--host", default="0.0.0.0")
|
||||||
parser.add_argument("--gpu-id", type=int, default=0,
|
parser.add_argument("--gpu-id", type=int, default=0,
|
||||||
help="CUDA device index to use (single GPU)")
|
help="CUDA device index to use")
|
||||||
parser.add_argument("--gpu-ids", default=None,
|
|
||||||
help="Comma-separated CUDA device indices for multi-GPU spanning "
|
|
||||||
"(e.g. '0,1'). Overrides --gpu-id when set.")
|
|
||||||
parser.add_argument("--backend", choices=["llamacpp", "transformers"], default=None)
|
parser.add_argument("--backend", choices=["llamacpp", "transformers"], default=None)
|
||||||
parser.add_argument("--mock", action="store_true",
|
parser.add_argument("--mock", action="store_true",
|
||||||
help="Run in mock mode (no model or GPU needed)")
|
help="Run in mock mode (no model or GPU needed)")
|
||||||
|
|
@ -244,7 +226,6 @@ if __name__ == "__main__":
|
||||||
app = create_app(
|
app = create_app(
|
||||||
model_path=args.model,
|
model_path=args.model,
|
||||||
gpu_id=args.gpu_id,
|
gpu_id=args.gpu_id,
|
||||||
gpu_ids=args.gpu_ids,
|
|
||||||
backend=args.backend,
|
backend=args.backend,
|
||||||
mock=mock,
|
mock=mock,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -121,19 +121,17 @@ class TextBackend(Protocol):
|
||||||
|
|
||||||
def _select_backend(model_path: str, backend: str | None) -> str:
|
def _select_backend(model_path: str, backend: str | None) -> str:
|
||||||
"""
|
"""
|
||||||
Return "llamacpp", "transformers", "ollama", or "vllm" for the given model path.
|
Return "llamacpp" or "transformers" for the given model path.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
model_path Path to the model file, HuggingFace repo ID, "ollama://<name>",
|
model_path Path to the model file or HuggingFace repo ID (e.g. "Qwen/Qwen2.5-3B").
|
||||||
or "vllm://<model-id>".
|
backend Explicit override from the caller ("llamacpp" | "transformers" | None).
|
||||||
backend Explicit override from the caller
|
|
||||||
("llamacpp" | "transformers" | "ollama" | "vllm" | None).
|
|
||||||
When provided, trust it without inspection.
|
When provided, trust it without inspection.
|
||||||
|
|
||||||
Raise ValueError for unrecognised override values.
|
Return "llamacpp" or "transformers". Raise ValueError for unrecognised values.
|
||||||
"""
|
"""
|
||||||
_VALID = ("llamacpp", "transformers", "ollama", "vllm")
|
_VALID = ("llamacpp", "transformers")
|
||||||
|
|
||||||
# 1. Caller-supplied override — highest trust, no inspection needed.
|
# 1. Caller-supplied override — highest trust, no inspection needed.
|
||||||
resolved = backend or os.environ.get("CF_TEXT_BACKEND")
|
resolved = backend or os.environ.get("CF_TEXT_BACKEND")
|
||||||
|
|
@ -144,17 +142,11 @@ def _select_backend(model_path: str, backend: str | None) -> str:
|
||||||
)
|
)
|
||||||
return resolved
|
return resolved
|
||||||
|
|
||||||
# 2. Proxy prefixes — unambiguous routing regardless of model name format.
|
# 2. Format detection — GGUF files are unambiguously llama-cpp territory.
|
||||||
if model_path.startswith("ollama://"):
|
|
||||||
return "ollama"
|
|
||||||
if model_path.startswith("vllm://"):
|
|
||||||
return "vllm"
|
|
||||||
|
|
||||||
# 3. Format detection — GGUF files are unambiguously llama-cpp territory.
|
|
||||||
if model_path.lower().endswith(".gguf"):
|
if model_path.lower().endswith(".gguf"):
|
||||||
return "llamacpp"
|
return "llamacpp"
|
||||||
|
|
||||||
# 4. Safe default — transformers covers HF repo IDs and safetensors dirs.
|
# 3. Safe default — transformers covers HF repo IDs and safetensors dirs.
|
||||||
return "transformers"
|
return "transformers"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -187,12 +179,4 @@ def make_text_backend(
|
||||||
from circuitforge_core.text.backends.transformers import TransformersBackend
|
from circuitforge_core.text.backends.transformers import TransformersBackend
|
||||||
return TransformersBackend(model_path=model_path)
|
return TransformersBackend(model_path=model_path)
|
||||||
|
|
||||||
if resolved == "ollama":
|
raise ValueError(f"Unknown backend {resolved!r}. Expected 'llamacpp' or 'transformers'.")
|
||||||
from circuitforge_core.text.backends.ollama import OllamaBackend
|
|
||||||
return OllamaBackend(model_path=model_path)
|
|
||||||
|
|
||||||
if resolved == "vllm":
|
|
||||||
from circuitforge_core.text.backends.vllm import VllmBackend
|
|
||||||
return VllmBackend(model_path=model_path)
|
|
||||||
|
|
||||||
raise ValueError(f"Unknown backend {resolved!r}. Expected 'llamacpp', 'transformers', 'ollama', or 'vllm'.")
|
|
||||||
|
|
|
||||||
|
|
@ -1,201 +0,0 @@
|
||||||
# circuitforge_core/text/backends/ollama.py — Ollama proxy backend for cf-text
|
|
||||||
#
|
|
||||||
# Routes inference requests to a running Ollama instance via its HTTP API.
|
|
||||||
# cf-text itself holds no GPU memory; Ollama manages the model and VRAM.
|
|
||||||
#
|
|
||||||
# Model path format: "ollama://<model-name>" e.g. "ollama://llama3.1:8b"
|
|
||||||
# The "ollama://" prefix is stripped before forwarding to the API.
|
|
||||||
#
|
|
||||||
# Environment:
|
|
||||||
# CF_TEXT_OLLAMA_URL Base URL of the Ollama server (default: http://localhost:11434)
|
|
||||||
#
|
|
||||||
# MIT licensed.
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json as _json
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
from typing import AsyncIterator, Iterator
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
from circuitforge_core.text.backends.base import GenerateResult
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
_DEFAULT_OLLAMA_URL = "http://localhost:11434"
|
|
||||||
|
|
||||||
|
|
||||||
class OllamaBackend:
|
|
||||||
"""
|
|
||||||
cf-text backend that proxies inference to a local Ollama instance.
|
|
||||||
|
|
||||||
This backend holds no GPU memory itself — Ollama owns the model and VRAM.
|
|
||||||
vram_mb is therefore reported as 0 so cf-orch does not double-count VRAM
|
|
||||||
against the separate ollama service budget.
|
|
||||||
|
|
||||||
Supports /generate, /chat, and /v1/chat/completions (via generate/chat).
|
|
||||||
Streaming is implemented for all variants.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, model_path: str, *, vram_mb: int = 0) -> None:
|
|
||||||
# Strip the "ollama://" prefix from catalog paths
|
|
||||||
self._model = model_path.removeprefix("ollama://")
|
|
||||||
self._url = os.environ.get("CF_TEXT_OLLAMA_URL", _DEFAULT_OLLAMA_URL).rstrip("/")
|
|
||||||
self._vram_mb = vram_mb
|
|
||||||
logger.info("OllamaBackend: model=%r url=%r", self._model, self._url)
|
|
||||||
|
|
||||||
# ── Protocol properties ───────────────────────────────────────────────────
|
|
||||||
|
|
||||||
@property
|
|
||||||
def model_name(self) -> str:
|
|
||||||
return self._model
|
|
||||||
|
|
||||||
@property
|
|
||||||
def vram_mb(self) -> int:
|
|
||||||
# Ollama manages its own VRAM; cf-text holds nothing.
|
|
||||||
return self._vram_mb
|
|
||||||
|
|
||||||
# ── Synchronous interface ─────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def generate(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
*,
|
|
||||||
max_tokens: int = 512,
|
|
||||||
temperature: float = 0.7,
|
|
||||||
stop: list[str] | None = None,
|
|
||||||
) -> GenerateResult:
|
|
||||||
t0 = time.monotonic()
|
|
||||||
payload: dict = {
|
|
||||||
"model": self._model,
|
|
||||||
"prompt": prompt,
|
|
||||||
"stream": False,
|
|
||||||
"options": {"temperature": temperature, "num_predict": max_tokens},
|
|
||||||
}
|
|
||||||
if stop:
|
|
||||||
payload["options"]["stop"] = stop
|
|
||||||
with httpx.Client(timeout=180.0) as client:
|
|
||||||
resp = client.post(f"{self._url}/api/generate", json=payload)
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
|
||||||
elapsed_ms = round((time.monotonic() - t0) * 1000)
|
|
||||||
return GenerateResult(
|
|
||||||
text=data.get("response", ""),
|
|
||||||
tokens_used=data.get("eval_count", 0),
|
|
||||||
model=self._model,
|
|
||||||
)
|
|
||||||
|
|
||||||
def generate_stream(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
*,
|
|
||||||
max_tokens: int = 512,
|
|
||||||
temperature: float = 0.7,
|
|
||||||
stop: list[str] | None = None,
|
|
||||||
) -> Iterator[str]:
|
|
||||||
payload: dict = {
|
|
||||||
"model": self._model,
|
|
||||||
"prompt": prompt,
|
|
||||||
"stream": True,
|
|
||||||
"options": {"temperature": temperature, "num_predict": max_tokens},
|
|
||||||
}
|
|
||||||
if stop:
|
|
||||||
payload["options"]["stop"] = stop
|
|
||||||
with httpx.Client(timeout=180.0) as client:
|
|
||||||
with client.stream("POST", f"{self._url}/api/generate", json=payload) as resp:
|
|
||||||
resp.raise_for_status()
|
|
||||||
for line in resp.iter_lines():
|
|
||||||
if not line:
|
|
||||||
continue
|
|
||||||
chunk = _json.loads(line)
|
|
||||||
token = chunk.get("response", "")
|
|
||||||
if token:
|
|
||||||
yield token
|
|
||||||
if chunk.get("done"):
|
|
||||||
break
|
|
||||||
|
|
||||||
def chat(
|
|
||||||
self,
|
|
||||||
messages: list[dict],
|
|
||||||
*,
|
|
||||||
max_tokens: int = 512,
|
|
||||||
temperature: float = 0.7,
|
|
||||||
) -> GenerateResult:
|
|
||||||
t0 = time.monotonic()
|
|
||||||
payload: dict = {
|
|
||||||
"model": self._model,
|
|
||||||
"messages": messages,
|
|
||||||
"stream": False,
|
|
||||||
"options": {"temperature": temperature, "num_predict": max_tokens},
|
|
||||||
}
|
|
||||||
with httpx.Client(timeout=180.0) as client:
|
|
||||||
resp = client.post(f"{self._url}/api/chat", json=payload)
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
|
||||||
elapsed_ms = round((time.monotonic() - t0) * 1000)
|
|
||||||
return GenerateResult(
|
|
||||||
text=data.get("message", {}).get("content", ""),
|
|
||||||
tokens_used=data.get("eval_count", 0),
|
|
||||||
model=self._model,
|
|
||||||
)
|
|
||||||
|
|
||||||
# ── Async interface ───────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
async def generate_async(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
*,
|
|
||||||
max_tokens: int = 512,
|
|
||||||
temperature: float = 0.7,
|
|
||||||
stop: list[str] | None = None,
|
|
||||||
) -> GenerateResult:
|
|
||||||
t0 = time.monotonic()
|
|
||||||
payload: dict = {
|
|
||||||
"model": self._model,
|
|
||||||
"prompt": prompt,
|
|
||||||
"stream": False,
|
|
||||||
"options": {"temperature": temperature, "num_predict": max_tokens},
|
|
||||||
}
|
|
||||||
if stop:
|
|
||||||
payload["options"]["stop"] = stop
|
|
||||||
async with httpx.AsyncClient(timeout=180.0) as client:
|
|
||||||
resp = await client.post(f"{self._url}/api/generate", json=payload)
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
|
||||||
elapsed_ms = round((time.monotonic() - t0) * 1000)
|
|
||||||
return GenerateResult(
|
|
||||||
text=data.get("response", ""),
|
|
||||||
tokens_used=data.get("eval_count", 0),
|
|
||||||
model=self._model,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def generate_stream_async(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
*,
|
|
||||||
max_tokens: int = 512,
|
|
||||||
temperature: float = 0.7,
|
|
||||||
stop: list[str] | None = None,
|
|
||||||
) -> AsyncIterator[str]:
|
|
||||||
payload: dict = {
|
|
||||||
"model": self._model,
|
|
||||||
"prompt": prompt,
|
|
||||||
"stream": True,
|
|
||||||
"options": {"temperature": temperature, "num_predict": max_tokens},
|
|
||||||
}
|
|
||||||
if stop:
|
|
||||||
payload["options"]["stop"] = stop
|
|
||||||
async with httpx.AsyncClient(timeout=180.0) as client:
|
|
||||||
async with client.stream("POST", f"{self._url}/api/generate", json=payload) as resp:
|
|
||||||
resp.raise_for_status()
|
|
||||||
async for line in resp.aiter_lines():
|
|
||||||
if not line:
|
|
||||||
continue
|
|
||||||
chunk = _json.loads(line)
|
|
||||||
token = chunk.get("response", "")
|
|
||||||
if token:
|
|
||||||
yield token
|
|
||||||
if chunk.get("done"):
|
|
||||||
break
|
|
||||||
|
|
@ -1,213 +0,0 @@
|
||||||
# circuitforge_core/text/backends/vllm.py — vllm proxy backend for cf-text
|
|
||||||
#
|
|
||||||
# Routes inference requests to a running vllm instance via its OpenAI-compatible
|
|
||||||
# HTTP API (/v1/chat/completions, /v1/completions).
|
|
||||||
# cf-text itself holds no GPU memory; vllm manages the model and VRAM.
|
|
||||||
#
|
|
||||||
# Model path format: "vllm://<model-id>" e.g. "vllm://Qwen/Qwen2.5-7B-Instruct"
|
|
||||||
# The "vllm://" prefix is stripped; the remainder is the model_id sent to vllm.
|
|
||||||
#
|
|
||||||
# Environment:
|
|
||||||
# CF_TEXT_VLLM_URL Base URL of the vllm server (default: http://localhost:8000)
|
|
||||||
#
|
|
||||||
# MIT licensed.
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json as _json
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
from typing import AsyncIterator, Iterator
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
from circuitforge_core.text.backends.base import ChatMessage, GenerateResult
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
_DEFAULT_VLLM_URL = "http://localhost:8000"
|
|
||||||
|
|
||||||
|
|
||||||
class VllmBackend:
|
|
||||||
"""
|
|
||||||
cf-text backend that proxies inference to a local vllm instance.
|
|
||||||
|
|
||||||
vllm exposes an OpenAI-compatible API (/v1/chat/completions).
|
|
||||||
This backend holds no GPU memory — vllm owns the model and VRAM.
|
|
||||||
vram_mb is reported as 0 so cf-orch does not double-count VRAM
|
|
||||||
against the separate vllm service budget.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, model_path: str, *, vram_mb: int = 0) -> None:
|
|
||||||
# Strip the "vllm://" prefix from catalog paths
|
|
||||||
self._model = model_path.removeprefix("vllm://")
|
|
||||||
self._url = os.environ.get("CF_TEXT_VLLM_URL", _DEFAULT_VLLM_URL).rstrip("/")
|
|
||||||
self._vram_mb = vram_mb
|
|
||||||
logger.info("VllmBackend: model=%r url=%r", self._model, self._url)
|
|
||||||
|
|
||||||
# ── Protocol properties ───────────────────────────────────────────────────
|
|
||||||
|
|
||||||
@property
|
|
||||||
def model_name(self) -> str:
|
|
||||||
return self._model
|
|
||||||
|
|
||||||
@property
|
|
||||||
def vram_mb(self) -> int:
|
|
||||||
# vllm manages its own VRAM; cf-text holds nothing.
|
|
||||||
return self._vram_mb
|
|
||||||
|
|
||||||
# ── Internal helpers ──────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def _chat_payload(
|
|
||||||
self,
|
|
||||||
messages: list[dict],
|
|
||||||
*,
|
|
||||||
max_tokens: int,
|
|
||||||
temperature: float,
|
|
||||||
stop: list[str] | None,
|
|
||||||
stream: bool,
|
|
||||||
) -> dict:
|
|
||||||
payload: dict = {
|
|
||||||
"model": self._model,
|
|
||||||
"messages": messages,
|
|
||||||
"max_tokens": max_tokens,
|
|
||||||
"temperature": temperature,
|
|
||||||
"stream": stream,
|
|
||||||
}
|
|
||||||
if stop:
|
|
||||||
payload["stop"] = stop
|
|
||||||
return payload
|
|
||||||
|
|
||||||
def _prompt_as_messages(self, prompt: str) -> list[dict]:
|
|
||||||
return [{"role": "user", "content": prompt}]
|
|
||||||
|
|
||||||
# ── Synchronous interface ─────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def generate(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
*,
|
|
||||||
max_tokens: int = 512,
|
|
||||||
temperature: float = 0.7,
|
|
||||||
stop: list[str] | None = None,
|
|
||||||
) -> GenerateResult:
|
|
||||||
t0 = time.monotonic()
|
|
||||||
payload = self._chat_payload(
|
|
||||||
self._prompt_as_messages(prompt),
|
|
||||||
max_tokens=max_tokens, temperature=temperature, stop=stop, stream=False,
|
|
||||||
)
|
|
||||||
with httpx.Client(timeout=180.0) as client:
|
|
||||||
resp = client.post(f"{self._url}/v1/chat/completions", json=payload)
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
|
||||||
return GenerateResult(
|
|
||||||
text=data["choices"][0]["message"]["content"],
|
|
||||||
tokens_used=data.get("usage", {}).get("completion_tokens", 0),
|
|
||||||
model=self._model,
|
|
||||||
)
|
|
||||||
|
|
||||||
def generate_stream(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
*,
|
|
||||||
max_tokens: int = 512,
|
|
||||||
temperature: float = 0.7,
|
|
||||||
stop: list[str] | None = None,
|
|
||||||
) -> Iterator[str]:
|
|
||||||
payload = self._chat_payload(
|
|
||||||
self._prompt_as_messages(prompt),
|
|
||||||
max_tokens=max_tokens, temperature=temperature, stop=stop, stream=True,
|
|
||||||
)
|
|
||||||
with httpx.Client(timeout=180.0) as client:
|
|
||||||
with client.stream("POST", f"{self._url}/v1/chat/completions", json=payload) as resp:
|
|
||||||
resp.raise_for_status()
|
|
||||||
for line in resp.iter_lines():
|
|
||||||
token = _parse_sse_token(line)
|
|
||||||
if token:
|
|
||||||
yield token
|
|
||||||
|
|
||||||
def chat(
|
|
||||||
self,
|
|
||||||
messages: list[ChatMessage],
|
|
||||||
*,
|
|
||||||
max_tokens: int = 512,
|
|
||||||
temperature: float = 0.7,
|
|
||||||
) -> GenerateResult:
|
|
||||||
dicts = [m.to_dict() if hasattr(m, "to_dict") else m for m in messages]
|
|
||||||
payload = self._chat_payload(
|
|
||||||
dicts, max_tokens=max_tokens, temperature=temperature, stop=None, stream=False,
|
|
||||||
)
|
|
||||||
with httpx.Client(timeout=180.0) as client:
|
|
||||||
resp = client.post(f"{self._url}/v1/chat/completions", json=payload)
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
|
||||||
return GenerateResult(
|
|
||||||
text=data["choices"][0]["message"]["content"],
|
|
||||||
tokens_used=data.get("usage", {}).get("completion_tokens", 0),
|
|
||||||
model=self._model,
|
|
||||||
)
|
|
||||||
|
|
||||||
# ── Async interface ───────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
async def generate_async(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
*,
|
|
||||||
max_tokens: int = 512,
|
|
||||||
temperature: float = 0.7,
|
|
||||||
stop: list[str] | None = None,
|
|
||||||
) -> GenerateResult:
|
|
||||||
payload = self._chat_payload(
|
|
||||||
self._prompt_as_messages(prompt),
|
|
||||||
max_tokens=max_tokens, temperature=temperature, stop=stop, stream=False,
|
|
||||||
)
|
|
||||||
async with httpx.AsyncClient(timeout=180.0) as client:
|
|
||||||
resp = await client.post(f"{self._url}/v1/chat/completions", json=payload)
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
|
||||||
return GenerateResult(
|
|
||||||
text=data["choices"][0]["message"]["content"],
|
|
||||||
tokens_used=data.get("usage", {}).get("completion_tokens", 0),
|
|
||||||
model=self._model,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def generate_stream_async(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
*,
|
|
||||||
max_tokens: int = 512,
|
|
||||||
temperature: float = 0.7,
|
|
||||||
stop: list[str] | None = None,
|
|
||||||
) -> AsyncIterator[str]:
|
|
||||||
payload = self._chat_payload(
|
|
||||||
self._prompt_as_messages(prompt),
|
|
||||||
max_tokens=max_tokens, temperature=temperature, stop=stop, stream=True,
|
|
||||||
)
|
|
||||||
async with httpx.AsyncClient(timeout=180.0) as client:
|
|
||||||
async with client.stream("POST", f"{self._url}/v1/chat/completions", json=payload) as resp:
|
|
||||||
resp.raise_for_status()
|
|
||||||
async for line in resp.aiter_lines():
|
|
||||||
token = _parse_sse_token(line)
|
|
||||||
if token:
|
|
||||||
yield token
|
|
||||||
|
|
||||||
|
|
||||||
# ── SSE parser (OpenAI/vllm format) ──────────────────────────────────────────
|
|
||||||
|
|
||||||
def _parse_sse_token(line: str) -> str:
|
|
||||||
"""Extract content token from an OpenAI-format SSE line.
|
|
||||||
|
|
||||||
Lines look like: data: {"choices": [{"delta": {"content": "word"}}]}
|
|
||||||
Terminal line: data: [DONE]
|
|
||||||
Returns the token string, or "" for empty/done/non-data lines.
|
|
||||||
"""
|
|
||||||
if not line.startswith("data:"):
|
|
||||||
return ""
|
|
||||||
payload = line[5:].strip()
|
|
||||||
if payload == "[DONE]":
|
|
||||||
return ""
|
|
||||||
try:
|
|
||||||
chunk = _json.loads(payload)
|
|
||||||
return chunk["choices"][0]["delta"].get("content", "") or ""
|
|
||||||
except (KeyError, IndexError, _json.JSONDecodeError):
|
|
||||||
return ""
|
|
||||||
|
|
@ -29,7 +29,7 @@ _CONTENT_TYPES: dict[str, str] = {
|
||||||
}
|
}
|
||||||
|
|
||||||
app = FastAPI(title="cf-tts")
|
app = FastAPI(title="cf-tts")
|
||||||
_backend = None # type: TTSBackend | None
|
_backend: TTSBackend | None = None
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
|
|
@ -96,6 +96,7 @@ if __name__ == "__main__":
|
||||||
mock = args.mock or args.model == "mock"
|
mock = args.mock or args.model == "mock"
|
||||||
device = "cpu" if mock else "cuda"
|
device = "cpu" if mock else "cuda"
|
||||||
|
|
||||||
|
global _backend
|
||||||
_backend = make_tts_backend(args.model, mock=mock, device=device)
|
_backend = make_tts_backend(args.model, mock=mock, device=device)
|
||||||
print(f"cf-tts backend ready: {_backend.model_name} ({_backend.vram_mb} MB)")
|
print(f"cf-tts backend ready: {_backend.model_name} ({_backend.vram_mb} MB)")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -60,12 +60,7 @@ def _encode_audio(
|
||||||
if format == "wav":
|
if format == "wav":
|
||||||
torchaudio.save(buf, wav, sample_rate, format="wav")
|
torchaudio.save(buf, wav, sample_rate, format="wav")
|
||||||
elif format == "ogg":
|
elif format == "ogg":
|
||||||
# libvorbis may not be available on all torchaudio builds; fall back to wav
|
torchaudio.save(buf, wav, sample_rate, format="ogg", encoding="vorbis")
|
||||||
try:
|
|
||||||
torchaudio.save(buf, wav, sample_rate, format="ogg", encoding="vorbis")
|
|
||||||
except Exception:
|
|
||||||
buf = io.BytesIO()
|
|
||||||
torchaudio.save(buf, wav, sample_rate, format="wav")
|
|
||||||
elif format == "mp3":
|
elif format == "mp3":
|
||||||
# torchaudio MP3 encode requires ffmpeg backend; fall back to wav on failure
|
# torchaudio MP3 encode requires ffmpeg backend; fall back to wav on failure
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -1,129 +0,0 @@
|
||||||
# Adding a Module to cf-core
|
|
||||||
|
|
||||||
This guide walks through extracting a pattern from a product into a shared cf-core module. The goal is to move battle-tested implementations here once they've stabilized in at least two products.
|
|
||||||
|
|
||||||
## When to add a module
|
|
||||||
|
|
||||||
Add a module when:
|
|
||||||
- The same pattern exists in two or more products with minor variations
|
|
||||||
- The interface is stable enough that changing it would require coordinated updates across products
|
|
||||||
- The code has no product-specific business logic baked in
|
|
||||||
|
|
||||||
Do not add a module for:
|
|
||||||
- One-off utilities that only one product needs
|
|
||||||
- Anything still in active design flux
|
|
||||||
- Product-specific configuration or policy decisions
|
|
||||||
|
|
||||||
## Module structure
|
|
||||||
|
|
||||||
```
|
|
||||||
circuitforge_core/
|
|
||||||
└── mymodule/
|
|
||||||
├── __init__.py # Public API — what products import
|
|
||||||
├── base.py # Core implementation
|
|
||||||
└── backends/ # Optional: pluggable backends
|
|
||||||
├── __init__.py
|
|
||||||
├── local.py
|
|
||||||
└── cloud.py
|
|
||||||
```
|
|
||||||
|
|
||||||
Keep the public API in `__init__.py` clean. Products should import from `circuitforge_core.mymodule`, not from internal submodules.
|
|
||||||
|
|
||||||
## Step 1: Define the interface
|
|
||||||
|
|
||||||
Write the public interface first — the classes and functions products will call. Get this right before implementing, because changing it requires updating every product shim.
|
|
||||||
|
|
||||||
```python
|
|
||||||
# circuitforge_core/mymodule/__init__.py
|
|
||||||
|
|
||||||
from .base import MyThing, get_my_thing
|
|
||||||
|
|
||||||
__all__ = ["MyThing", "get_my_thing"]
|
|
||||||
```
|
|
||||||
|
|
||||||
## Step 2: Implement with a stub
|
|
||||||
|
|
||||||
Start with a minimal working implementation. Stub out anything uncertain:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# circuitforge_core/mymodule/base.py
|
|
||||||
|
|
||||||
class MyThing:
|
|
||||||
def __init__(self, config: dict):
|
|
||||||
self._config = config
|
|
||||||
|
|
||||||
def do_thing(self, input: str) -> str:
|
|
||||||
raise NotImplementedError("Override in product or backend")
|
|
||||||
```
|
|
||||||
|
|
||||||
## Step 3: Write tests
|
|
||||||
|
|
||||||
Tests go in `circuitforge_core/tests/test_mymodule.py`. Use `pytest`. The cf env has pytest installed.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
conda run -n cf python -m pytest tests/test_mymodule.py -v
|
|
||||||
```
|
|
||||||
|
|
||||||
Cover:
|
|
||||||
- Happy path with realistic input
|
|
||||||
- Missing config / bad input (fail loudly, not silently)
|
|
||||||
- Cloud vs local mode if applicable
|
|
||||||
|
|
||||||
## Step 4: Update `pyproject.toml`
|
|
||||||
|
|
||||||
Add any new dependencies:
|
|
||||||
|
|
||||||
```toml
|
|
||||||
[project.optional-dependencies]
|
|
||||||
mymodule = ["some-dep>=1.0"]
|
|
||||||
```
|
|
||||||
|
|
||||||
Use optional dependency groups so products that don't use the module don't pay the install cost.
|
|
||||||
|
|
||||||
## Step 5: Write the docs page
|
|
||||||
|
|
||||||
Add `docs/modules/mymodule.md` following the pattern of the existing module docs. Include:
|
|
||||||
- Import path
|
|
||||||
- Why this module exists / design rationale
|
|
||||||
- Full public API with examples
|
|
||||||
- Any gotchas or non-obvious behavior
|
|
||||||
- Status (Stable / Stub)
|
|
||||||
|
|
||||||
Update `docs/modules/index.md` and `mkdocs.yml` to include the new page.
|
|
||||||
|
|
||||||
## Step 6: Update products
|
|
||||||
|
|
||||||
In each product that uses the pattern:
|
|
||||||
1. Add a shim if the product needs to override behavior
|
|
||||||
2. Replace the inline implementation with imports from cf-core
|
|
||||||
3. Run the product's tests
|
|
||||||
|
|
||||||
The shim pattern:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# myproduct/app/mything.py
|
|
||||||
from circuitforge_core.mymodule import get_my_thing as _base_get_my_thing
|
|
||||||
from .config import get_settings
|
|
||||||
|
|
||||||
def get_my_thing():
|
|
||||||
settings = get_settings()
|
|
||||||
return _base_get_my_thing(config=settings.mything_config)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Licensing boundary
|
|
||||||
|
|
||||||
The module's license depends on what it does:
|
|
||||||
|
|
||||||
| Code | License |
|
|
||||||
|------|---------|
|
|
||||||
| Discovery, pipeline, data access | **MIT** |
|
|
||||||
| LLM inference, AI features, fine-tuned model access | **BSL 1.1** |
|
|
||||||
| Anything that would give SaaS competitors a free AI product | **BSL 1.1** |
|
|
||||||
|
|
||||||
When in doubt, BSL 1.1. See the [licensing guide](licensing.md) for the full decision tree.
|
|
||||||
|
|
||||||
## Versioning
|
|
||||||
|
|
||||||
cf-core uses semantic versioning. Adding a new module with a stable API is a **minor** version bump. Breaking an existing interface is a **major** bump and requires coordinated updates to all products.
|
|
||||||
|
|
||||||
Update `pyproject.toml` and `CHANGELOG.md` before merging.
|
|
||||||
|
|
@ -1,74 +0,0 @@
|
||||||
# Editable Install Pattern
|
|
||||||
|
|
||||||
CircuitForge products depend on cf-core via `pip install -e` (editable install) from a local clone, not from a package registry. This is a deliberate architectural choice that makes the development loop fast and the dependency relationship explicit.
|
|
||||||
|
|
||||||
## How it works
|
|
||||||
|
|
||||||
`pip install -e /path/to/circuitforge-core` installs the package in "editable" mode: instead of copying files into `site-packages`, pip creates a `.pth` file pointing at the source directory. Python imports resolve directly from the cloned repo.
|
|
||||||
|
|
||||||
This means:
|
|
||||||
- Changes to cf-core source take effect immediately in all products — no reinstall needed
|
|
||||||
- Restarting the product process (or Docker container) is sufficient to pick up changes
|
|
||||||
- `git pull` in the cf-core repo automatically affects all products using it
|
|
||||||
|
|
||||||
## Docker considerations
|
|
||||||
|
|
||||||
In Docker, editable install requires the cf-core source to be present inside the container at build time. Two patterns:
|
|
||||||
|
|
||||||
**Pattern A: COPY at build time (production)**
|
|
||||||
|
|
||||||
```dockerfile
|
|
||||||
COPY circuitforge-core/ /circuitforge-core/
|
|
||||||
RUN pip install -e /circuitforge-core
|
|
||||||
```
|
|
||||||
|
|
||||||
The build context must include the cf-core directory. `compose.yml` sets the build context to the parent directory:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
services:
|
|
||||||
api:
|
|
||||||
build:
|
|
||||||
context: .. # parent of both product and cf-core
|
|
||||||
dockerfile: myproduct/Dockerfile
|
|
||||||
```
|
|
||||||
|
|
||||||
**Pattern B: Bind-mount for dev**
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
# compose.override.yml (dev only, gitignored)
|
|
||||||
services:
|
|
||||||
api:
|
|
||||||
volumes:
|
|
||||||
- ../circuitforge-core:/circuitforge-core:ro
|
|
||||||
```
|
|
||||||
|
|
||||||
This lets you edit cf-core and restart the container without rebuilding the image.
|
|
||||||
|
|
||||||
## Python `.pyc` cache gotcha
|
|
||||||
|
|
||||||
Python caches compiled bytecode in `__pycache__/` directories and `.pyc` files. When cf-core source is updated but the product hasn't been restarted, the old `.pyc` files can serve stale code even with the bind-mount in place.
|
|
||||||
|
|
||||||
Fix: delete `.pyc` files and restart:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
find /path/to/circuitforge-core -name "*.pyc" -delete
|
|
||||||
docker compose restart api
|
|
||||||
```
|
|
||||||
|
|
||||||
This is especially common when fixing an import error — the old `ImportError` may persist even after the fix if the bytecode cache isn't cleared.
|
|
||||||
|
|
||||||
## When to reinstall
|
|
||||||
|
|
||||||
A full `pip install -e .` reinstall is needed when:
|
|
||||||
- `pyproject.toml` changes (new dependencies, entry points, package metadata)
|
|
||||||
- A new subpackage directory is added (pip needs to discover it)
|
|
||||||
- The `.egg-info` directory gets corrupted (delete it and reinstall)
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Reinstall in the cf env
|
|
||||||
conda run -n cf pip install -e /Library/Development/CircuitForge/circuitforge-core
|
|
||||||
```
|
|
||||||
|
|
||||||
## Future: Forgejo Packages
|
|
||||||
|
|
||||||
When cf-core reaches a stable enough interface (currently targeting "third product shipped"), it will be published to the Circuit-Forge Forgejo private PyPI registry. Products will then depend on it via version pin, and the editable install will be for development only. The shim pattern is designed to make this transition smooth — product code stays the same, only the import source changes.
|
|
||||||
|
|
@ -1,51 +0,0 @@
|
||||||
# BSL vs MIT — Licensing Boundaries
|
|
||||||
|
|
||||||
circuitforge-core contains both MIT and BSL 1.1 licensed code. Understanding the boundary matters for contributors and for deciding where new modules belong.
|
|
||||||
|
|
||||||
## The rule
|
|
||||||
|
|
||||||
| Code category | License |
|
|
||||||
|---------------|---------|
|
|
||||||
| Discovery, ingestion, data pipeline | **MIT** |
|
|
||||||
| LLM inference, AI generation, fine-tuned model access | **BSL 1.1** |
|
|
||||||
| UI scaffolding, process management | **MIT** |
|
|
||||||
| Tier gates, license validation | **BSL 1.1** |
|
|
||||||
| Database, storage, configuration | **MIT** |
|
|
||||||
|
|
||||||
**Heuristic:** If a competitor could use the module to build a commercial AI product without building the hard parts themselves, it's BSL 1.1. If it's plumbing that any software project might need, it's MIT.
|
|
||||||
|
|
||||||
## BSL 1.1 in practice
|
|
||||||
|
|
||||||
BSL 1.1 means:
|
|
||||||
- Free for personal non-commercial self-hosting
|
|
||||||
- Free for internal business use (using the software, not selling it)
|
|
||||||
- Commercial SaaS re-hosting requires a paid license from Circuit Forge LLC
|
|
||||||
- Converts to MIT after 4 years
|
|
||||||
|
|
||||||
"Commercial SaaS re-hosting" means: taking cf-core's AI features and building a competing product that charges users for them without a license. It does NOT restrict:
|
|
||||||
- Running cf-core on your own server for your own use
|
|
||||||
- Modifying cf-core for personal use
|
|
||||||
- Contributing back to cf-core
|
|
||||||
|
|
||||||
## What this means for contributors
|
|
||||||
|
|
||||||
If you're adding a module:
|
|
||||||
- Add MIT code to the `MIT` section of `pyproject.toml`
|
|
||||||
- Add BSL 1.1 code to the `BSL` section
|
|
||||||
- Don't mix MIT and BSL code in the same module
|
|
||||||
- If uncertain, ask before submitting — wrong license on a module causes legal headaches
|
|
||||||
|
|
||||||
## The `Co-Authored-By` policy
|
|
||||||
|
|
||||||
Do NOT add `Co-Authored-By: Claude` (or any AI attribution trailer) to commits in CircuitForge repos. This is required for BSL 1.1 commercial viability — AI-assisted code with attribution claims can complicate licensing in ways that affect the ability to enforce BSL terms.
|
|
||||||
|
|
||||||
This is not about hiding AI use. It's a legal precaution for a company that depends on BSL enforcement to fund its mission.
|
|
||||||
|
|
||||||
## BSL conversion timeline
|
|
||||||
|
|
||||||
| Module | BSL since | MIT date |
|
|
||||||
|--------|-----------|----------|
|
|
||||||
| `tiers` | 2025-01-01 | 2029-01-01 |
|
|
||||||
| `llm` | 2025-01-01 | 2029-01-01 |
|
|
||||||
|
|
||||||
The conversion dates are tracked in `LICENSE` and will be updated as modules are added.
|
|
||||||
|
|
@ -1,58 +0,0 @@
|
||||||
# Installation
|
|
||||||
|
|
||||||
circuitforge-core is distributed as an editable install from a local clone. It is not yet on PyPI.
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
|
|
||||||
- Python 3.11+
|
|
||||||
- A conda environment (CircuitForge uses `cf` by convention; older envs may be named `job-seeker`)
|
|
||||||
- The `circuitforge-core` repo cloned alongside your product repo
|
|
||||||
|
|
||||||
## Typical layout
|
|
||||||
|
|
||||||
```
|
|
||||||
/Library/Development/CircuitForge/
|
|
||||||
├── circuitforge-core/ ← this repo
|
|
||||||
├── kiwi/
|
|
||||||
├── peregrine/
|
|
||||||
├── snipe/
|
|
||||||
└── ...
|
|
||||||
```
|
|
||||||
|
|
||||||
## Install
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# From inside a product repo, assuming circuitforge-core is a sibling
|
|
||||||
conda run -n cf pip install -e ../circuitforge-core
|
|
||||||
|
|
||||||
# Or activate first, then install
|
|
||||||
conda activate cf
|
|
||||||
pip install -e ../circuitforge-core
|
|
||||||
```
|
|
||||||
|
|
||||||
The editable install means changes to circuitforge-core source are reflected immediately in all products without reinstalling. Only restart the product's process after changes (or Docker container if running in Docker).
|
|
||||||
|
|
||||||
## Verify
|
|
||||||
|
|
||||||
```python
|
|
||||||
import circuitforge_core
|
|
||||||
print(circuitforge_core.__version__) # 0.9.0
|
|
||||||
```
|
|
||||||
|
|
||||||
## Inside Docker
|
|
||||||
|
|
||||||
Product Dockerfiles copy or mount both the product source and cf-core:
|
|
||||||
|
|
||||||
```dockerfile
|
|
||||||
# Copy cf-core alongside product source
|
|
||||||
COPY --from=build /circuitforge-core /circuitforge-core
|
|
||||||
RUN pip install -e /circuitforge-core
|
|
||||||
```
|
|
||||||
|
|
||||||
The `compose.yml` for each product typically bind-mounts both directories in dev mode so live edits propagate without rebuilding the image.
|
|
||||||
|
|
||||||
## Upgrading
|
|
||||||
|
|
||||||
cf-core follows semantic versioning. Since it's an editable install, `git pull` in the cf-core repo is sufficient — no reinstall needed for pure Python changes.
|
|
||||||
|
|
||||||
For schema changes (new migrations) or new module dependencies, check the CHANGELOG for any additional steps.
|
|
||||||
|
|
@ -1,89 +0,0 @@
|
||||||
# Using cf-core in a Product
|
|
||||||
|
|
||||||
After [installation](installation.md), import modules directly from the package. Each module is independent — import only what you need.
|
|
||||||
|
|
||||||
## Minimal wiring example
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.config import Settings
|
|
||||||
from circuitforge_core.db import get_db
|
|
||||||
from circuitforge_core.tiers import require_tier
|
|
||||||
from circuitforge_core.llm import LLMRouter
|
|
||||||
|
|
||||||
settings = Settings()
|
|
||||||
db = get_db(settings.db_path)
|
|
||||||
router = LLMRouter(settings)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Module shim pattern
|
|
||||||
|
|
||||||
Products that need to extend or override cf-core behavior use a shim module. This is the recommended pattern — it keeps product-specific config resolution separate from the shared implementation.
|
|
||||||
|
|
||||||
```python
|
|
||||||
# myproduct/app/llm_router.py — shim
|
|
||||||
from circuitforge_core.llm.router import LLMRouter as _BaseLLMRouter
|
|
||||||
from .config import get_settings
|
|
||||||
|
|
||||||
class LLMRouter(_BaseLLMRouter):
|
|
||||||
def __init__(self):
|
|
||||||
settings = get_settings()
|
|
||||||
super().__init__(
|
|
||||||
config_path=settings.llm_config_path,
|
|
||||||
cloud_mode=settings.cloud_mode,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
Product code then imports from the shim, never directly from cf-core. This means tri-level config resolution (env → config file → defaults) and cloud mode wiring stay in one place.
|
|
||||||
|
|
||||||
!!! warning "Never import cf-core modules directly in scripts"
|
|
||||||
Always import from the product shim. Bypassing the shim silently breaks cloud mode and config resolution. See [Peregrine's llm_router shim](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine) for the reference implementation.
|
|
||||||
|
|
||||||
## Per-user isolation (cloud mode)
|
|
||||||
|
|
||||||
When `CLOUD_MODE=true`, products use per-user SQLite trees rather than a shared database. cf-core's `db` module provides the factory; products implement their own `cloud_session.py` to resolve the per-user path from the `X-CF-Session` JWT header.
|
|
||||||
|
|
||||||
```python
|
|
||||||
# In a FastAPI endpoint with cloud mode
|
|
||||||
from .cloud_session import get_user_db_path
|
|
||||||
from circuitforge_core.db import get_db
|
|
||||||
|
|
||||||
@router.get("/items")
|
|
||||||
async def list_items(request: Request):
|
|
||||||
db_path = get_user_db_path(request)
|
|
||||||
db = get_db(db_path)
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
## Tier gates
|
|
||||||
|
|
||||||
Apply the `@require_tier` decorator to any endpoint or function that should be restricted:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.tiers import require_tier
|
|
||||||
|
|
||||||
@router.post("/suggest")
|
|
||||||
@require_tier("paid")
|
|
||||||
async def suggest_recipe(request: Request):
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
The decorator reads the user's tier from the request context (via Heimdall JWT validation) and raises `403` if the tier is insufficient.
|
|
||||||
|
|
||||||
## Background tasks with VRAM awareness
|
|
||||||
|
|
||||||
Use `TaskScheduler` for any LLM inference that should be queued rather than run inline:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.tasks import TaskScheduler
|
|
||||||
|
|
||||||
scheduler = TaskScheduler(service_name="myproduct", coordinator_url=settings.coordinator_url)
|
|
||||||
|
|
||||||
async def enqueue_generation(item_id: str):
|
|
||||||
await scheduler.submit(
|
|
||||||
task_type="generate",
|
|
||||||
payload={"item_id": item_id},
|
|
||||||
vram_gb=4.0,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
See the [tasks module reference](../modules/tasks.md) for the full API.
|
|
||||||
|
|
@ -1,68 +0,0 @@
|
||||||
# circuitforge-core
|
|
||||||
|
|
||||||
Shared scaffold for all CircuitForge products. Every product in the menagerie depends on it via editable install.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -e ../circuitforge-core
|
|
||||||
# or inside conda:
|
|
||||||
conda run -n cf pip install -e ../circuitforge-core
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## What it provides
|
|
||||||
|
|
||||||
circuitforge-core gives every product the same foundation so patterns proven in one product propagate to all others automatically. The 17 modules cover the full stack from database access to LLM routing to tier gates.
|
|
||||||
|
|
||||||
```
|
|
||||||
circuitforge_core/
|
|
||||||
├── db/ SQLite factory + migration runner
|
|
||||||
├── llm/ LLM router with fallback chain
|
|
||||||
├── tiers/ Tier gates — free / paid / premium / ultra
|
|
||||||
├── config/ Env-driven settings + .env loader
|
|
||||||
├── hardware/ GPU/CPU detection + VRAM profile generation
|
|
||||||
├── documents/ PDF, DOCX, image OCR → StructuredDocument
|
|
||||||
├── affiliates/ URL wrapping with opt-out + BYOK user IDs
|
|
||||||
├── preferences/ Per-user YAML preference store (dot-path API)
|
|
||||||
├── tasks/ VRAM-aware background task scheduler
|
|
||||||
├── manage/ Cross-platform process manager (Docker + native)
|
|
||||||
├── resources/ VRAM allocation + eviction engine
|
|
||||||
├── text/ Text processing utilities
|
|
||||||
├── stt/ Speech-to-text router (stub)
|
|
||||||
├── tts/ Text-to-speech router (stub)
|
|
||||||
├── pipeline/ Staging queue base — StagingDB (stub)
|
|
||||||
├── vision/ Vision router base class (stub)
|
|
||||||
└── wizard/ First-run wizard base class (stub)
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Module status
|
|
||||||
|
|
||||||
| Module | Status | Purpose |
|
|
||||||
|--------|--------|---------|
|
|
||||||
| `db` | Stable | SQLite connection factory, migration runner |
|
|
||||||
| `llm` | Stable | LLM fallback router (Ollama, vLLM, Anthropic, OpenAI-compatible) |
|
|
||||||
| `tiers` | Stable | `@require_tier()` decorator, BYOK unlock logic |
|
|
||||||
| `config` | Stable | Env-driven settings, `.env` loader |
|
|
||||||
| `hardware` | Stable | GPU enumeration, VRAM tier profiling |
|
|
||||||
| `documents` | Stable | PDF/DOCX/image ingestion → `StructuredDocument` |
|
|
||||||
| `affiliates` | Stable | `wrap_url()` with opt-out and BYOK user IDs |
|
|
||||||
| `preferences` | Stable | Dot-path `get()`/`set()` over local YAML; pluggable backend |
|
|
||||||
| `tasks` | Stable | `TaskScheduler` — VRAM-aware slot management |
|
|
||||||
| `manage` | Stable | `manage.sh` scaffolding for Docker and native processes |
|
|
||||||
| `resources` | Stable | VRAM allocation, eviction engine, GPU profile registry |
|
|
||||||
| `text` | Stable | Text normalization, truncation, chunking utilities |
|
|
||||||
| `stt` | Stub | Speech-to-text router (planned: whisper.cpp / faster-whisper) |
|
|
||||||
| `tts` | Stub | Text-to-speech router (planned: piper / espeak) |
|
|
||||||
| `pipeline` | Stub | `StagingDB` base — products provide concrete schema |
|
|
||||||
| `vision` | Stub | Vision router base class (moondream2 / Claude dispatch) |
|
|
||||||
| `wizard` | Stub | `BaseWizard` — products subclass for first-run setup |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Version
|
|
||||||
|
|
||||||
**v0.9.0** — MIT licensed for discovery/pipeline layers, BSL 1.1 for AI features.
|
|
||||||
|
|
||||||
See the [developer guide](developer/adding-module.md) to add a new module.
|
|
||||||
|
|
@ -1,60 +0,0 @@
|
||||||
# affiliates
|
|
||||||
|
|
||||||
Affiliate URL wrapping with user opt-out and BYOK user IDs. Shared across all CircuitForge products that surface external purchase or listing links.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.affiliates import wrap_url
|
|
||||||
```
|
|
||||||
|
|
||||||
## Design principle
|
|
||||||
|
|
||||||
Affiliate links are disclosed to users and opt-out is always one click away. CF earns a small commission when users buy through wrapped links; this is the primary monetization path for free-tier products. The implementation is transparent: no dark patterns, no hidden redirects.
|
|
||||||
|
|
||||||
## `wrap_url(url, user_id=None, product=None) -> str`
|
|
||||||
|
|
||||||
Wraps a URL with the configured affiliate parameters. Returns the original URL unchanged if:
|
|
||||||
- Affiliate links are disabled globally (`CF_AFFILIATES_ENABLED=false`)
|
|
||||||
- The user has opted out (`preferences.get("affiliates.opted_out")`)
|
|
||||||
- The domain is not in the supported affiliate network list
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.affiliates import wrap_url
|
|
||||||
|
|
||||||
wrapped = wrap_url(
|
|
||||||
"https://www.ebay.com/itm/123456",
|
|
||||||
user_id="user_abc123",
|
|
||||||
product="snipe",
|
|
||||||
)
|
|
||||||
# → "https://www.ebay.com/itm/123456?mkrid=711-53200-19255-0&campid=CF_SNIPE_abc123&..."
|
|
||||||
```
|
|
||||||
|
|
||||||
## User opt-out
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.preferences import get_prefs
|
|
||||||
|
|
||||||
prefs = get_prefs(user_id)
|
|
||||||
prefs.set("affiliates.opted_out", True)
|
|
||||||
```
|
|
||||||
|
|
||||||
When `opted_out` is `True`, `wrap_url()` returns the bare URL. The UI should surface this setting prominently — never bury it.
|
|
||||||
|
|
||||||
## BYOK user IDs
|
|
||||||
|
|
||||||
BYOK users (those with their own license key or API key) get a unique affiliate sub-ID so their contributions are tracked separately. This is handled automatically when a `user_id` is passed.
|
|
||||||
|
|
||||||
## Supported networks
|
|
||||||
|
|
||||||
| Product | Network | Notes |
|
|
||||||
|---------|---------|-------|
|
|
||||||
| Snipe | eBay Partner Network | `campid` encodes product + user |
|
|
||||||
| Kiwi | Amazon Associates (planned) | For pantry staples / equipment |
|
|
||||||
| Waxwing | Various garden suppliers (planned) | |
|
|
||||||
|
|
||||||
## Environment variables
|
|
||||||
|
|
||||||
```bash
|
|
||||||
CF_AFFILIATES_ENABLED=true # global kill switch
|
|
||||||
CF_EBAY_CAMPAIGN_ID=your_campaign # eBay Partner Network campaign ID
|
|
||||||
CF_AMAZON_ASSOCIATE_TAG=your_tag # Amazon Associates tag
|
|
||||||
```
|
|
||||||
|
|
@ -1,80 +0,0 @@
|
||||||
# config
|
|
||||||
|
|
||||||
Env-driven settings with `.env` file loading. Provides a base `Settings` class that products subclass to add their own fields.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.config import Settings
|
|
||||||
```
|
|
||||||
|
|
||||||
## Design
|
|
||||||
|
|
||||||
Configuration follows a strict priority order: **environment variables > `.env` file > defaults**. This means Docker compose `environment:` overrides always win, which is essential for cloud vs local deployment switching without image rebuilds.
|
|
||||||
|
|
||||||
## Base Settings
|
|
||||||
|
|
||||||
```python
|
|
||||||
class Settings(BaseSettings):
|
|
||||||
# Database
|
|
||||||
db_path: str = "data/app.db"
|
|
||||||
|
|
||||||
# LLM
|
|
||||||
llm_config_path: str = "config/llm.yaml"
|
|
||||||
|
|
||||||
# Tier system
|
|
||||||
license_key: str | None = None
|
|
||||||
cloud_mode: bool = False
|
|
||||||
|
|
||||||
# Cloud
|
|
||||||
cloud_data_root: str = "/devl/app-cloud-data"
|
|
||||||
cloud_auth_bypass_ips: list[str] = []
|
|
||||||
coordinator_url: str = "http://10.1.10.71:7700"
|
|
||||||
|
|
||||||
class Config:
|
|
||||||
env_file = ".env"
|
|
||||||
env_file_encoding = "utf-8"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Extending in a product
|
|
||||||
|
|
||||||
```python
|
|
||||||
# myproduct/app/core/config.py
|
|
||||||
from circuitforge_core.config import Settings as _BaseSettings
|
|
||||||
|
|
||||||
class Settings(_BaseSettings):
|
|
||||||
# Product-specific settings
|
|
||||||
max_pantry_items: int = 500
|
|
||||||
barcode_timeout_ms: int = 5000
|
|
||||||
recipe_corpus_path: str = "data/recipes.db"
|
|
||||||
|
|
||||||
class Config(_BaseSettings.Config):
|
|
||||||
env_prefix = "MYPRODUCT_"
|
|
||||||
```
|
|
||||||
|
|
||||||
## `.env` file
|
|
||||||
|
|
||||||
Each product ships a `.env.example` (committed) and a `.env` (gitignored). The `.env` file is loaded automatically by the `Settings` class.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# .env.example
|
|
||||||
DB_PATH=data/app.db
|
|
||||||
CLOUD_MODE=false
|
|
||||||
LICENSE_KEY=
|
|
||||||
```
|
|
||||||
|
|
||||||
!!! tip "Never commit `.env`"
|
|
||||||
`.env` files contain secrets and environment-specific paths. Always commit `.env.example` instead.
|
|
||||||
|
|
||||||
## Singleton pattern
|
|
||||||
|
|
||||||
Products typically expose a cached `get_settings()` function:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from functools import lru_cache
|
|
||||||
from .config import Settings
|
|
||||||
|
|
||||||
@lru_cache(maxsize=1)
|
|
||||||
def get_settings() -> Settings:
|
|
||||||
return Settings()
|
|
||||||
```
|
|
||||||
|
|
||||||
This ensures the `.env` file is only read once at startup, and all modules share the same settings instance.
|
|
||||||
|
|
@ -1,56 +0,0 @@
|
||||||
# db
|
|
||||||
|
|
||||||
SQLite connection factory and migration runner. Every CircuitForge product uses this for all persistent storage.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.db import get_db, run_migrations
|
|
||||||
```
|
|
||||||
|
|
||||||
## Why SQLite
|
|
||||||
|
|
||||||
SQLite is local-first by nature — no server process, no network dependency, trivially backed up, and fast enough for single-user workloads. circuitforge-core's `db` module adds migration management and connection pooling on top.
|
|
||||||
|
|
||||||
## API
|
|
||||||
|
|
||||||
### `get_db(path: str | Path) -> Connection`
|
|
||||||
|
|
||||||
Returns a SQLite connection to the database at `path`. Creates the file if it doesn't exist. Enables WAL mode, foreign keys, and sets a sensible busy timeout by default.
|
|
||||||
|
|
||||||
```python
|
|
||||||
db = get_db("/devl/kiwi-data/kiwi.db")
|
|
||||||
```
|
|
||||||
|
|
||||||
In cloud mode, the path comes from the per-user session resolver — never hardcode `DB_PATH` directly in endpoints. Use `_request_db.get() or DB_PATH` or a product shim.
|
|
||||||
|
|
||||||
### `run_migrations(db: Connection, migrations_dir: str | Path)`
|
|
||||||
|
|
||||||
Discovers and applies all `.sql` files in `migrations_dir` that haven't yet been applied, in filename order. Migration state is tracked in a `_migrations` table created on first run.
|
|
||||||
|
|
||||||
```python
|
|
||||||
run_migrations(db, "app/db/migrations/")
|
|
||||||
```
|
|
||||||
|
|
||||||
**Migration file naming:** `001_initial.sql`, `002_add_column.sql`, etc. Always prefix with zero-padded integers. Never renumber or delete applied migrations.
|
|
||||||
|
|
||||||
### `RETURNING *` gotcha
|
|
||||||
|
|
||||||
SQLite added `RETURNING *` in version 3.35 (2021). When using it:
|
|
||||||
|
|
||||||
```python
|
|
||||||
cursor = db.execute("INSERT INTO items (...) VALUES (?) RETURNING *", (...,))
|
|
||||||
row = cursor.fetchone() # fetch BEFORE commit — row disappears after commit
|
|
||||||
db.commit()
|
|
||||||
```
|
|
||||||
|
|
||||||
This is a known SQLite behavior that differs from PostgreSQL. cf-core does not paper over it; fetch before committing.
|
|
||||||
|
|
||||||
## Migration conventions
|
|
||||||
|
|
||||||
- Files go in `app/db/migrations/` inside each product repo
|
|
||||||
- One concern per file — don't combine unrelated schema changes
|
|
||||||
- Never use `ALTER TABLE` to rename columns (not supported in SQLite < 3.25); add a new column and migrate data instead
|
|
||||||
- `IF NOT EXISTS` and `IF EXISTS` guards make migrations idempotent
|
|
||||||
|
|
||||||
## Cloud mode
|
|
||||||
|
|
||||||
In cloud mode, each user gets their own SQLite file under `CLOUD_DATA_ROOT`. The `db` module is unaware of this; the product's `cloud_session.py` resolves the per-user path before calling `get_db()`.
|
|
||||||
|
|
@ -1,63 +0,0 @@
|
||||||
# documents
|
|
||||||
|
|
||||||
Document ingestion pipeline. Converts PDF, DOCX, ODT, and images into a normalized `StructuredDocument` for downstream processing.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.documents import ingest, StructuredDocument
|
|
||||||
```
|
|
||||||
|
|
||||||
## Supported formats
|
|
||||||
|
|
||||||
| Format | Method | Notes |
|
|
||||||
|--------|--------|-------|
|
|
||||||
| PDF | `pdfplumber` | Two-column detection via gutter analysis |
|
|
||||||
| DOCX | `python-docx` | Paragraph and table extraction |
|
|
||||||
| ODT | stdlib `zipfile` + `ElementTree` | No external deps required |
|
|
||||||
| PNG/JPG | cf-docuvision fast-path, local fallback | OCR via vision router |
|
|
||||||
|
|
||||||
## `ingest(path: str | Path) -> StructuredDocument`
|
|
||||||
|
|
||||||
Main entry point. Detects format by file extension and routes to the appropriate parser.
|
|
||||||
|
|
||||||
```python
|
|
||||||
doc = ingest("/tmp/invoice.pdf")
|
|
||||||
print(doc.text) # full extracted text
|
|
||||||
print(doc.pages) # list of per-page content
|
|
||||||
print(doc.metadata) # title, author, creation date if available
|
|
||||||
```
|
|
||||||
|
|
||||||
## StructuredDocument
|
|
||||||
|
|
||||||
```python
|
|
||||||
@dataclass
|
|
||||||
class StructuredDocument:
|
|
||||||
text: str # full plain text
|
|
||||||
pages: list[str] # per-page text (PDFs)
|
|
||||||
sections: dict[str, str] # named sections if detected
|
|
||||||
metadata: dict[str, Any] # format-specific metadata
|
|
||||||
source_path: str
|
|
||||||
format: str # "pdf" | "docx" | "odt" | "image"
|
|
||||||
```
|
|
||||||
|
|
||||||
## PDF specifics
|
|
||||||
|
|
||||||
Two-column PDFs (common in resumes and academic papers) are handled by `_find_column_split()`, which detects the gutter via word x-positions and extracts left and right columns separately before merging.
|
|
||||||
|
|
||||||
CID glyph references (`(cid:NNN)`) from ATS-reembedded fonts are stripped automatically. Common bullet CIDs (127, 149, 183) are mapped to `•`.
|
|
||||||
|
|
||||||
## OCR path
|
|
||||||
|
|
||||||
Image inputs go through the vision router (see the [vision module](vision.md)). In practice this means:
|
|
||||||
|
|
||||||
1. cf-docuvision fast-path (if available on the cf-orch coordinator)
|
|
||||||
2. Local moondream2 fallback
|
|
||||||
|
|
||||||
OCR results are treated as unstructured text — no section detection is attempted.
|
|
||||||
|
|
||||||
## ATS gotcha
|
|
||||||
|
|
||||||
Some ATS-exported PDFs embed fonts in ways that cause `pdfplumber` to extract garbled text. If `doc.text` looks corrupted (common with Oracle Taleo exports), try the image fallback:
|
|
||||||
|
|
||||||
```python
|
|
||||||
doc = ingest(path, force_ocr=True)
|
|
||||||
```
|
|
||||||
|
|
@ -1,51 +0,0 @@
|
||||||
# hardware
|
|
||||||
|
|
||||||
GPU enumeration and VRAM-tier profile generation. Used by `manage.sh` at startup to recommend a Docker Compose profile and by the cf-orch coordinator for resource allocation.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.hardware import get_gpus, recommend_profile, HardwareProfile
|
|
||||||
```
|
|
||||||
|
|
||||||
## GPU detection
|
|
||||||
|
|
||||||
`get_gpus()` returns a list of detected GPUs with their VRAM capacity. Detection strategy:
|
|
||||||
|
|
||||||
1. Try `nvidia-smi` (Linux/Windows NVIDIA)
|
|
||||||
2. Fall back to `system_profiler SPDisplaysDataType` on Darwin when `hw.optional.arm64=1` (Apple Silicon)
|
|
||||||
3. Return CPU-only profile if neither succeeds
|
|
||||||
|
|
||||||
```python
|
|
||||||
gpus = get_gpus()
|
|
||||||
# [{"name": "RTX 4090", "vram_gb": 24.0, "type": "nvidia"},
|
|
||||||
# {"name": "Apple M2 Max", "vram_gb": 32.0, "type": "apple_silicon"}]
|
|
||||||
```
|
|
||||||
|
|
||||||
## Compose profile recommendation
|
|
||||||
|
|
||||||
```python
|
|
||||||
profile = recommend_profile(gpus)
|
|
||||||
# "single-gpu" | "dual-gpu" | "cpu" | "remote"
|
|
||||||
```
|
|
||||||
|
|
||||||
Profile selection rules:
|
|
||||||
- `single-gpu`: one NVIDIA GPU with >= 8GB VRAM
|
|
||||||
- `dual-gpu`: two or more NVIDIA GPUs
|
|
||||||
- `cpu`: no NVIDIA GPU (Apple Silicon uses `cpu` since Docker on Mac has no Metal passthrough)
|
|
||||||
- `remote`: explicitly requested or when local inference would exceed available VRAM
|
|
||||||
|
|
||||||
!!! note "Apple Silicon"
|
|
||||||
Apple Silicon Macs should run Ollama natively (outside Docker) for Metal-accelerated inference. Docker on macOS runs in a Linux VM with no Metal passthrough. `preflight.py` in each product detects native Ollama on :11434 and adopts it automatically.
|
|
||||||
|
|
||||||
## VRAM tiers
|
|
||||||
|
|
||||||
| VRAM | Models that fit |
|
|
||||||
|------|----------------|
|
|
||||||
| < 4 GB | Quantized 1B–3B models (Phi-3 mini, Llama 3.2 3B Q4) |
|
|
||||||
| 4–8 GB | 7B–8B models Q4 (Llama 3.1 8B, Mistral 7B) |
|
|
||||||
| 8–16 GB | 13B–14B models Q4, 7B models in full precision |
|
|
||||||
| 16–24 GB | 30B models Q4, 13B full precision |
|
|
||||||
| 24 GB+ | 70B models Q4 |
|
|
||||||
|
|
||||||
## HardwareProfile
|
|
||||||
|
|
||||||
The `HardwareProfile` dataclass is written to `compose.override.yml` by `preflight.py` at product startup, making GPU capabilities available to Docker Compose without hardcoding.
|
|
||||||
|
|
@ -1,23 +0,0 @@
|
||||||
# Module Reference
|
|
||||||
|
|
||||||
All circuitforge-core modules live under the `circuitforge_core` package. Each is independently importable.
|
|
||||||
|
|
||||||
| Module | Import | Status | One-line summary |
|
|
||||||
|--------|--------|--------|-----------------|
|
|
||||||
| [db](db.md) | `circuitforge_core.db` | Stable | SQLite connection factory + migration runner |
|
|
||||||
| [llm](llm.md) | `circuitforge_core.llm` | Stable | LLM router with fallback chain |
|
|
||||||
| [tiers](tiers.md) | `circuitforge_core.tiers` | Stable | `@require_tier()` decorator, BYOK unlock |
|
|
||||||
| [config](config.md) | `circuitforge_core.config` | Stable | Env-driven settings, `.env` loader |
|
|
||||||
| [hardware](hardware.md) | `circuitforge_core.hardware` | Stable | GPU/CPU detection, VRAM profile generation |
|
|
||||||
| [documents](documents.md) | `circuitforge_core.documents` | Stable | Document ingestion → `StructuredDocument` |
|
|
||||||
| [affiliates](affiliates.md) | `circuitforge_core.affiliates` | Stable | `wrap_url()` with opt-out + BYOK user IDs |
|
|
||||||
| [preferences](preferences.md) | `circuitforge_core.preferences` | Stable | Dot-path preference store over local YAML |
|
|
||||||
| [tasks](tasks.md) | `circuitforge_core.tasks` | Stable | VRAM-aware background task scheduler |
|
|
||||||
| [manage](manage.md) | `circuitforge_core.manage` | Stable | `manage.sh` scaffolding, Docker + native |
|
|
||||||
| [resources](resources.md) | `circuitforge_core.resources` | Stable | VRAM allocation + eviction engine |
|
|
||||||
| [text](text.md) | `circuitforge_core.text` | Stable | Text normalization, chunking utilities |
|
|
||||||
| [stt](stt.md) | `circuitforge_core.stt` | Stub | Speech-to-text router |
|
|
||||||
| [tts](tts.md) | `circuitforge_core.tts` | Stub | Text-to-speech router |
|
|
||||||
| [pipeline](pipeline.md) | `circuitforge_core.pipeline` | Stub | `StagingDB` base class |
|
|
||||||
| [vision](vision.md) | `circuitforge_core.vision` | Stub | Vision router base class |
|
|
||||||
| [wizard](wizard.md) | `circuitforge_core.wizard` | Stub | First-run wizard base class |
|
|
||||||
|
|
@ -1,88 +0,0 @@
|
||||||
# llm
|
|
||||||
|
|
||||||
LLM router with a configurable fallback chain. Abstracts over Ollama, vLLM, Anthropic, and any OpenAI-compatible backend. Products never talk to a specific LLM backend directly.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.llm import LLMRouter
|
|
||||||
```
|
|
||||||
|
|
||||||
## Design principle
|
|
||||||
|
|
||||||
The router implements "local inference first." Cloud backends sit at the end of the fallback chain. A product configured with only Ollama will never silently fall through to a paid API.
|
|
||||||
|
|
||||||
## Configuration
|
|
||||||
|
|
||||||
The router reads `config/llm.yaml` from the product's working directory (or the path passed to the constructor). Each product maintains its own `llm.yaml`; cf-core provides the router, not the config.
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
# config/llm.yaml example
|
|
||||||
fallback_order:
|
|
||||||
- ollama
|
|
||||||
- vllm
|
|
||||||
- anthropic
|
|
||||||
|
|
||||||
ollama:
|
|
||||||
enabled: true
|
|
||||||
base_url: http://localhost:11434
|
|
||||||
model: llama3.2:3b
|
|
||||||
|
|
||||||
vllm:
|
|
||||||
enabled: false
|
|
||||||
base_url: http://localhost:8000
|
|
||||||
|
|
||||||
anthropic:
|
|
||||||
enabled: false
|
|
||||||
api_key_env: ANTHROPIC_API_KEY
|
|
||||||
```
|
|
||||||
|
|
||||||
## API
|
|
||||||
|
|
||||||
### `LLMRouter(config_path=None, cloud_mode=False)`
|
|
||||||
|
|
||||||
Instantiate the router. In most products, instantiation happens inside a shim that injects product-specific config resolution.
|
|
||||||
|
|
||||||
### `router.complete(prompt, system=None, images=None, fallback_order=None) -> str`
|
|
||||||
|
|
||||||
Send a completion request. Tries backends in order; falls through on error or unavailability.
|
|
||||||
|
|
||||||
```python
|
|
||||||
router = LLMRouter()
|
|
||||||
response = router.complete(
|
|
||||||
prompt="Summarize this recipe in one sentence.",
|
|
||||||
system="You are a cooking assistant.",
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
Pass `images: list[str]` (base64-encoded) for vision requests — non-vision backends are automatically skipped when images are present.
|
|
||||||
|
|
||||||
Pass `fallback_order=["vllm", "anthropic"]` to override the config chain for a specific call (useful for task-specific routing).
|
|
||||||
|
|
||||||
### `router.stream(prompt, system=None) -> Iterator[str]`
|
|
||||||
|
|
||||||
Streaming variant. Yields token chunks as they arrive. Not all backends support streaming; the router logs a warning and falls back to a non-streaming backend if needed.
|
|
||||||
|
|
||||||
## Shim requirement
|
|
||||||
|
|
||||||
!!! warning "Always use the product shim"
|
|
||||||
Scripts and endpoints must import `LLMRouter` from the product shim (`scripts/llm_router.py` or `app/llm_router.py`), never directly from `circuitforge_core.llm.router`. The shim handles tri-level config resolution (env vars override config file overrides defaults) and cloud mode wiring. Bypassing it breaks cloud deployments silently.
|
|
||||||
|
|
||||||
## Backends
|
|
||||||
|
|
||||||
| Backend | Type | Notes |
|
|
||||||
|---------|------|-------|
|
|
||||||
| `ollama` | Local | Preferred default; model names from `config/llm.yaml` |
|
|
||||||
| `vllm` | Local GPU | For high-throughput or large models |
|
|
||||||
| `anthropic` | Cloud | Requires `ANTHROPIC_API_KEY` env var |
|
|
||||||
| `openai` | Cloud | Any OpenAI-compatible endpoint |
|
|
||||||
| `claude_code` | Local wrapper | claude-bridge OpenAI-compatible wrapper on :3009 |
|
|
||||||
|
|
||||||
## Vision routing
|
|
||||||
|
|
||||||
When images are included in a `complete()` call, the router checks each backend's vision capability before trying it. Configure vision priority separately:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
vision_fallback_order:
|
|
||||||
- vision_service # local moondream2 via FastAPI on :8002
|
|
||||||
- claude_code
|
|
||||||
- anthropic
|
|
||||||
```
|
|
||||||
|
|
@ -1,67 +0,0 @@
|
||||||
# manage
|
|
||||||
|
|
||||||
`manage.sh` scaffolding for cross-platform product process management. Every CircuitForge product ships a `manage.sh` generated from this module.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.manage import generate_manage_sh, ProcessManager
|
|
||||||
```
|
|
||||||
|
|
||||||
## Purpose
|
|
||||||
|
|
||||||
`manage.sh` is the single entry point for starting, stopping, restarting, and checking the status of a product. It abstracts over Docker Compose (production) and native Python processes (development without Docker).
|
|
||||||
|
|
||||||
## Commands
|
|
||||||
|
|
||||||
Every product's `manage.sh` supports:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
bash manage.sh start # Start all services
|
|
||||||
bash manage.sh stop # Stop all services
|
|
||||||
bash manage.sh restart # Stop then start
|
|
||||||
bash manage.sh status # Print running state
|
|
||||||
bash manage.sh logs # Tail logs
|
|
||||||
bash manage.sh open # Open the product UI in a browser
|
|
||||||
bash manage.sh update # Pull latest and restart
|
|
||||||
```
|
|
||||||
|
|
||||||
Products add their own subcommands by extending the base script.
|
|
||||||
|
|
||||||
## Docker mode (production)
|
|
||||||
|
|
||||||
In Docker mode, `manage.sh` delegates to `docker compose`. The script auto-detects whether Docker is available and falls back to native mode if not.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# manage.sh internals (Docker mode)
|
|
||||||
docker compose -f compose.yml up -d
|
|
||||||
docker compose -f compose.yml logs -f
|
|
||||||
```
|
|
||||||
|
|
||||||
For cloud deployments, products have a `compose.cloud.yml` that's overlaid:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker compose -f compose.yml -f compose.cloud.yml up -d
|
|
||||||
```
|
|
||||||
|
|
||||||
## Preflight
|
|
||||||
|
|
||||||
`manage.sh start` calls `preflight.py` before launching containers. Preflight:
|
|
||||||
1. Enumerates GPUs and writes a Docker Compose profile recommendation
|
|
||||||
2. Checks for port conflicts and auto-increments if needed
|
|
||||||
3. Detects external services (Ollama, vLLM, SearXNG) already running and adopts them via `compose.override.yml`
|
|
||||||
4. Writes the final `.env` for the current session
|
|
||||||
|
|
||||||
## Extending manage.sh
|
|
||||||
|
|
||||||
Products add subcommands by checking `$1` before the default case:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
case "$1" in
|
|
||||||
backfill)
|
|
||||||
conda run -n cf python scripts/backfill_keywords.py
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
# Default manage.sh handling
|
|
||||||
...
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
```
|
|
||||||
|
|
@ -1,60 +0,0 @@
|
||||||
# pipeline
|
|
||||||
|
|
||||||
Staging queue base class. **Stub — partially implemented.**
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.pipeline import StagingDB # base class
|
|
||||||
```
|
|
||||||
|
|
||||||
## Purpose
|
|
||||||
|
|
||||||
`StagingDB` is the base class for the staging layer that sits between discovery/ingestion and the main product workflow. Products subclass it to add their concrete schema.
|
|
||||||
|
|
||||||
The pattern:
|
|
||||||
```
|
|
||||||
Source (scraper / scan / upload)
|
|
||||||
→ StagingDB (raw, unreviewed records)
|
|
||||||
→ Human review / approval
|
|
||||||
→ Main product DB (approved records)
|
|
||||||
```
|
|
||||||
|
|
||||||
This is explicit in Peregrine (jobs go from `pending` → `approved` → `applied`) and analogous in Kiwi (receipts go from `uploaded` → `parsed` → `pantry`).
|
|
||||||
|
|
||||||
## Crystallization engine
|
|
||||||
|
|
||||||
The pipeline module also contains the crystallization engine: a system for promoting AI-generated drafts through a series of structured human-approval checkpoints before the output "crystallizes" into a permanent record.
|
|
||||||
|
|
||||||
Each stage in the pipeline has:
|
|
||||||
- An **AI step** that produces a draft
|
|
||||||
- A **human approval gate** that must be explicitly cleared
|
|
||||||
- A **rollback path** back to the previous stage if rejected
|
|
||||||
|
|
||||||
This is the architectural embodiment of the "LLMs as drafts, never decisions" principle.
|
|
||||||
|
|
||||||
## Current status
|
|
||||||
|
|
||||||
`StagingDB` base class exists and is used by Peregrine's job pipeline. The crystallization engine design is documented in `circuitforge-plans/shared/superpowers/specs/` and is being extracted into this module as it stabilizes across products.
|
|
||||||
|
|
||||||
## `StagingDB` base class
|
|
||||||
|
|
||||||
```python
|
|
||||||
class StagingDB:
|
|
||||||
def __init__(self, db: Connection):
|
|
||||||
self.db = db
|
|
||||||
|
|
||||||
def stage(self, record: dict) -> str:
|
|
||||||
"""Insert a record into staging. Returns record ID."""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def approve(self, record_id: str, reviewer_id: str | None = None):
|
|
||||||
"""Promote a record past the approval gate."""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def reject(self, record_id: str, reason: str | None = None):
|
|
||||||
"""Mark a record as rejected."""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def pending(self) -> list[dict]:
|
|
||||||
"""Return all records awaiting review."""
|
|
||||||
raise NotImplementedError
|
|
||||||
```
|
|
||||||
|
|
@ -1,76 +0,0 @@
|
||||||
# preferences
|
|
||||||
|
|
||||||
Per-user preference store. Provides a dot-path `get()`/`set()` API over a local YAML file, with a pluggable backend for cloud deployments.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.preferences import get_prefs, UserPreferences
|
|
||||||
```
|
|
||||||
|
|
||||||
## API
|
|
||||||
|
|
||||||
### `get_prefs(user_id: str | None = None) -> UserPreferences`
|
|
||||||
|
|
||||||
Returns the preference store for the given user. In local mode, `user_id` is ignored and a shared local file is used. In cloud mode, each user gets an isolated preference file under `CLOUD_DATA_ROOT`.
|
|
||||||
|
|
||||||
### `prefs.get(key: str, default=None) -> Any`
|
|
||||||
|
|
||||||
Dot-path key access. Returns `default` if the key doesn't exist.
|
|
||||||
|
|
||||||
```python
|
|
||||||
prefs = get_prefs()
|
|
||||||
theme = prefs.get("ui.theme", "light")
|
|
||||||
opted_out = prefs.get("affiliates.opted_out", False)
|
|
||||||
```
|
|
||||||
|
|
||||||
### `prefs.set(key: str, value: Any)`
|
|
||||||
|
|
||||||
Sets a value at the dot path. Creates intermediate keys as needed. Persists immediately.
|
|
||||||
|
|
||||||
```python
|
|
||||||
prefs.set("ui.theme", "dark")
|
|
||||||
prefs.set("dietary.restrictions", ["vegan", "gluten-free"])
|
|
||||||
```
|
|
||||||
|
|
||||||
### `prefs.delete(key: str)`
|
|
||||||
|
|
||||||
Removes a key. No-ops silently if the key doesn't exist.
|
|
||||||
|
|
||||||
## Accessibility preferences
|
|
||||||
|
|
||||||
The `preferences` module includes first-class support for accessibility needs under the `accessibility.*` namespace. These are surfaced in product settings UIs and respected throughout the UI layer.
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
# Stored in user preferences
|
|
||||||
accessibility:
|
|
||||||
reduce_motion: true # No animations or transitions
|
|
||||||
high_contrast: false
|
|
||||||
font_size: large # small | medium | large | x-large
|
|
||||||
screen_reader_hints: true # Extra ARIA labels and descriptions
|
|
||||||
plain_language: true # Simplified text throughout UI
|
|
||||||
extra_confirmation_steps: true # Additional "are you sure?" prompts
|
|
||||||
```
|
|
||||||
|
|
||||||
Products should read these at render time and pass them to UI components. See the design philosophy for why ND/adaptive needs users are a primary audience.
|
|
||||||
|
|
||||||
## Pluggable backend
|
|
||||||
|
|
||||||
The default backend is a local YAML file. Products can substitute a database backend for cloud deployments:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.preferences import get_prefs, SQLitePreferenceBackend
|
|
||||||
|
|
||||||
prefs = get_prefs(user_id, backend=SQLitePreferenceBackend(db_path))
|
|
||||||
```
|
|
||||||
|
|
||||||
## Storage format
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
# ~/.local/share/circuitforge/myproduct/prefs.yaml (or per-user cloud path)
|
|
||||||
ui:
|
|
||||||
theme: dark
|
|
||||||
affiliates:
|
|
||||||
opted_out: false
|
|
||||||
dietary:
|
|
||||||
restrictions:
|
|
||||||
- vegan
|
|
||||||
```
|
|
||||||
|
|
@ -1,51 +0,0 @@
|
||||||
# resources
|
|
||||||
|
|
||||||
VRAM allocation engine and GPU profile registry. Works alongside the [tasks](tasks.md) module to prevent GPU OOM errors across concurrent LLM workloads.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.resources import ResourceCoordinator, VRAMSlot
|
|
||||||
```
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
|
|
||||||
The resource coordinator runs as a sidecar alongside each product (via `compose.override.yml`) and registers with the cf-orch coordinator at `http://10.1.10.71:7700`. The coordinator maintains a global view of VRAM allocation across all products and all GPUs.
|
|
||||||
|
|
||||||
```
|
|
||||||
Product A (kiwi) ─┐
|
|
||||||
Product B (peregrine) ─┤ → cf-orch coordinator → GPU 0 (24GB)
|
|
||||||
Product C (snipe) ─┘ → GPU 1 (8GB)
|
|
||||||
```
|
|
||||||
|
|
||||||
## VRAM allocation
|
|
||||||
|
|
||||||
`VRAMSlot` represents a lease on a fixed VRAM budget:
|
|
||||||
|
|
||||||
```python
|
|
||||||
slot = VRAMSlot(service="kiwi", task_type="recipe_llm", vram_gb=4.0)
|
|
||||||
async with coordinator.lease(slot):
|
|
||||||
result = await run_inference(prompt)
|
|
||||||
# VRAM released automatically on context exit
|
|
||||||
```
|
|
||||||
|
|
||||||
If the requested VRAM is not available, the coordinator queues the request. Tasks are executed in FIFO order within each priority class.
|
|
||||||
|
|
||||||
## Eviction engine
|
|
||||||
|
|
||||||
When a high-priority task needs VRAM that is held by a lower-priority task, the eviction engine signals the lower-priority task to checkpoint and pause. Eviction is cooperative, not forced — tasks must implement the `checkpoint()` callback.
|
|
||||||
|
|
||||||
## GPU profile registry
|
|
||||||
|
|
||||||
The registry maps GPU models to capability profiles:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.resources import get_gpu_profile
|
|
||||||
|
|
||||||
profile = get_gpu_profile("RTX 4090")
|
|
||||||
# GpuProfile(vram_gb=24.0, fp16=True, int8=True, int4=True, max_batch=32)
|
|
||||||
```
|
|
||||||
|
|
||||||
Profiles are used by the LLM router to determine which model quantizations a GPU can run.
|
|
||||||
|
|
||||||
## Local fallback
|
|
||||||
|
|
||||||
When the cf-orch coordinator is not reachable (local dev without the sidecar), the resource coordinator falls back to a local-only mode: tasks run sequentially with no cross-product coordination. This is safe for development but should not be used in production if multiple products are running concurrently on the same GPU.
|
|
||||||
|
|
@ -1,27 +0,0 @@
|
||||||
# stt
|
|
||||||
|
|
||||||
Speech-to-text router. **Stub — not yet implemented.**
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.stt import STTRouter # planned
|
|
||||||
```
|
|
||||||
|
|
||||||
## Planned design
|
|
||||||
|
|
||||||
The STT module will provide a unified interface over local speech-to-text backends, following the same fallback-chain pattern as the [LLM router](llm.md).
|
|
||||||
|
|
||||||
**Planned backends:**
|
|
||||||
- `whisper_cpp` — local, CPU/GPU, various model sizes
|
|
||||||
- `faster_whisper` — local, GPU-accelerated, CTranslate2 backend
|
|
||||||
- `whisper_openai` — cloud, requires `OPENAI_API_KEY`
|
|
||||||
|
|
||||||
**Planned use cases across the menagerie:**
|
|
||||||
- Osprey: transcribe hold music + IVR menu audio for navigation
|
|
||||||
- Linnet: real-time speech annotation (tone classification requires transcript)
|
|
||||||
- Peregrine: interview practice sessions
|
|
||||||
|
|
||||||
## Current status
|
|
||||||
|
|
||||||
The `circuitforge_core.stt` directory exists in-tree with a stub `__init__.py`. No working implementation yet. Planned for the milestone after Osprey reaches beta.
|
|
||||||
|
|
||||||
If you need STT before this module ships, use `faster-whisper` directly in the product and plan to migrate to this interface once it stabilizes.
|
|
||||||
|
|
@ -1,78 +0,0 @@
|
||||||
# tasks
|
|
||||||
|
|
||||||
VRAM-aware background task scheduler. Manages a queue of LLM inference jobs and coordinates VRAM allocation with the cf-orch coordinator before executing each task.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.tasks import TaskScheduler, get_scheduler, reset_scheduler
|
|
||||||
```
|
|
||||||
|
|
||||||
## Why VRAM-aware scheduling
|
|
||||||
|
|
||||||
Running multiple LLM inference jobs concurrently on a single GPU causes OOM errors and corrupted outputs. The scheduler serializes LLM work per service and negotiates with the cf-orch coordinator so tasks across multiple products don't compete for the same VRAM budget.
|
|
||||||
|
|
||||||
## Core API
|
|
||||||
|
|
||||||
### `get_scheduler() -> TaskScheduler`
|
|
||||||
|
|
||||||
Returns the singleton scheduler for the current process. Creates it on first call.
|
|
||||||
|
|
||||||
### `reset_scheduler()`
|
|
||||||
|
|
||||||
Tears down the scheduler (releases VRAM leases, cancels pending tasks). Called during FastAPI lifespan teardown.
|
|
||||||
|
|
||||||
```python
|
|
||||||
# In FastAPI lifespan
|
|
||||||
from circuitforge_core.tasks import get_scheduler, reset_scheduler
|
|
||||||
|
|
||||||
@asynccontextmanager
|
|
||||||
async def lifespan(app: FastAPI):
|
|
||||||
scheduler = get_scheduler()
|
|
||||||
yield
|
|
||||||
reset_scheduler()
|
|
||||||
```
|
|
||||||
|
|
||||||
### `scheduler.submit(task_type, payload, vram_gb) -> str`
|
|
||||||
|
|
||||||
Enqueues a task. Returns the task ID. The scheduler acquires a VRAM lease from the coordinator before executing.
|
|
||||||
|
|
||||||
```python
|
|
||||||
task_id = await scheduler.submit(
|
|
||||||
task_type="recipe_llm",
|
|
||||||
payload={"pantry_ids": [1, 2, 3]},
|
|
||||||
vram_gb=4.0,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### `scheduler.result(task_id) -> TaskResult | None`
|
|
||||||
|
|
||||||
Polls for a completed result. Returns `None` if still running.
|
|
||||||
|
|
||||||
## VRAM budgets
|
|
||||||
|
|
||||||
Each product defines its VRAM budgets in `compose.yml` / `compose.override.yml`:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
environment:
|
|
||||||
VRAM_BUDGET_RECIPE_LLM: "4.0"
|
|
||||||
VRAM_BUDGET_EXPIRY_LLM: "2.0"
|
|
||||||
```
|
|
||||||
|
|
||||||
These map to task types in the scheduler. If the coordinator is unavailable (local dev without cf-orch), the scheduler falls back to sequential local execution.
|
|
||||||
|
|
||||||
## Shim pattern
|
|
||||||
|
|
||||||
Products that need to re-export scheduler functions for backward compatibility use a shim:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# myproduct/app/tasks/scheduler.py
|
|
||||||
from circuitforge_core.tasks.scheduler import (
|
|
||||||
get_scheduler as _base_get_scheduler,
|
|
||||||
reset_scheduler, # re-export for lifespan teardown
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_scheduler():
|
|
||||||
"""Product-specific scheduler with service name injected."""
|
|
||||||
return _base_get_scheduler(service_name="myproduct")
|
|
||||||
```
|
|
||||||
|
|
||||||
Always re-export `reset_scheduler` from the shim so the FastAPI lifespan can import it from one place.
|
|
||||||
|
|
@ -1,57 +0,0 @@
|
||||||
# text
|
|
||||||
|
|
||||||
Text processing utilities. Normalization, truncation, chunking, and token estimation — shared across all products that manipulate text before or after LLM inference.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.text import normalize, chunk, truncate, estimate_tokens
|
|
||||||
```
|
|
||||||
|
|
||||||
## `normalize(text: str) -> str`
|
|
||||||
|
|
||||||
Strips excess whitespace, normalizes unicode (NFC), and removes null bytes and control characters that can cause downstream issues with SQLite FTS5 or LLM tokenizers.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.text import normalize
|
|
||||||
|
|
||||||
clean = normalize(" Hello\u00a0world\x00 ")
|
|
||||||
# → "Hello world"
|
|
||||||
```
|
|
||||||
|
|
||||||
## `truncate(text: str, max_tokens: int, model: str = "default") -> str`
|
|
||||||
|
|
||||||
Truncates text to approximately `max_tokens` tokens, breaking at sentence or paragraph boundaries where possible. Uses a simple byte-based heuristic (1 token ≈ 4 bytes) unless a specific model tokenizer is requested.
|
|
||||||
|
|
||||||
```python
|
|
||||||
excerpt = truncate(long_doc, max_tokens=2048)
|
|
||||||
```
|
|
||||||
|
|
||||||
## `chunk(text: str, chunk_size: int, overlap: int = 0) -> list[str]`
|
|
||||||
|
|
||||||
Splits text into overlapping chunks for RAG (retrieval-augmented generation) pipelines. Respects paragraph boundaries.
|
|
||||||
|
|
||||||
```python
|
|
||||||
chunks = chunk(article_text, chunk_size=512, overlap=64)
|
|
||||||
```
|
|
||||||
|
|
||||||
## `estimate_tokens(text: str, model: str = "default") -> int`
|
|
||||||
|
|
||||||
Estimates token count without loading a full tokenizer. Accurate enough for context window budget planning (within ~10%).
|
|
||||||
|
|
||||||
## FTS5 helpers
|
|
||||||
|
|
||||||
SQLite FTS5 has quirks with special characters in MATCH expressions. The `text` module provides helpers used by the recipe engine and other FTS5 consumers:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.text import fts_quote, strip_apostrophes
|
|
||||||
|
|
||||||
# Always double-quote FTS5 terms — bare tokens break on brand names
|
|
||||||
query = " ".join(fts_quote(term) for term in tokens)
|
|
||||||
# → '"chicken" "breast" "lemon"'
|
|
||||||
|
|
||||||
# Strip apostrophes before FTS5 queries
|
|
||||||
clean = strip_apostrophes("O'Doul's")
|
|
||||||
# → "ODoulS"
|
|
||||||
```
|
|
||||||
|
|
||||||
!!! warning "FTS5 gotcha"
|
|
||||||
Always quote ALL terms in MATCH expressions. Bare tokens break on brand names (e.g., `O'Doul's`), plant-based ingredient names, and anything with punctuation.
|
|
||||||
|
|
@ -1,67 +0,0 @@
|
||||||
# tiers
|
|
||||||
|
|
||||||
Tier system implementation. Provides the `@require_tier()` decorator used on FastAPI endpoints and the BYOK (bring your own key) unlock logic.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.tiers import require_tier, TierLevel
|
|
||||||
```
|
|
||||||
|
|
||||||
## Tier levels
|
|
||||||
|
|
||||||
| Tier | Constant | What it unlocks |
|
|
||||||
|------|----------|----------------|
|
|
||||||
| Free | `TierLevel.FREE` | Core pipeline, basic AI assist, local LLM only |
|
|
||||||
| Paid | `TierLevel.PAID` | Cloud LLM, integrations, full AI generation suite |
|
|
||||||
| Premium | `TierLevel.PREMIUM` | Fine-tuned models, multi-user, advanced analytics |
|
|
||||||
| Ultra | `TierLevel.ULTRA` | Human-in-the-loop operator execution |
|
|
||||||
|
|
||||||
## BYOK unlocks
|
|
||||||
|
|
||||||
Users who configure their own LLM backend (via `config/llm.yaml`) can unlock features that would otherwise require a paid tier. The `tiers` module checks for configured BYOK backends before enforcing tier gates.
|
|
||||||
|
|
||||||
This is intentional: privacy-preserving self-hosting is rewarded, not penalized. A user running their own Ollama instance gets AI features without a subscription.
|
|
||||||
|
|
||||||
## `@require_tier(tier: str)`
|
|
||||||
|
|
||||||
Decorator for FastAPI route handlers. Resolves the calling user's tier from the request context (Heimdall JWT, validated by Caddy) and raises HTTP 403 if insufficient.
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.tiers import require_tier
|
|
||||||
|
|
||||||
@router.post("/recipes/suggest")
|
|
||||||
@require_tier("paid")
|
|
||||||
async def suggest_recipes(request: Request, body: SuggestRequest):
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
In local (non-cloud) mode with no license configured, all users default to `free`. BYOK detection runs first — if a local LLM backend is configured, relevant paid features unlock regardless of license tier.
|
|
||||||
|
|
||||||
## Per-product overrides
|
|
||||||
|
|
||||||
Products define which specific features are gated at which tier in their own `app/tiers.py`, using the cf-core decorators as building blocks. The cf-core `tiers` module provides the mechanism; the product owns the policy.
|
|
||||||
|
|
||||||
```python
|
|
||||||
# kiwi/app/tiers.py
|
|
||||||
from circuitforge_core.tiers import require_tier
|
|
||||||
|
|
||||||
# Re-export with product-specific names if desired
|
|
||||||
require_paid = require_tier("paid")
|
|
||||||
require_premium = require_tier("premium")
|
|
||||||
|
|
||||||
# BYOK unlockable features — defined per product
|
|
||||||
BYOK_UNLOCKABLE = [
|
|
||||||
"recipe_suggestion_l3",
|
|
||||||
"receipt_ocr",
|
|
||||||
"expiry_llm_fallback",
|
|
||||||
]
|
|
||||||
```
|
|
||||||
|
|
||||||
## Checking tier in non-endpoint code
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.tiers import get_user_tier, TierLevel
|
|
||||||
|
|
||||||
tier = get_user_tier(user_id)
|
|
||||||
if tier >= TierLevel.PAID:
|
|
||||||
# run AI feature
|
|
||||||
```
|
|
||||||
|
|
@ -1,27 +0,0 @@
|
||||||
# tts
|
|
||||||
|
|
||||||
Text-to-speech router. **Stub — not yet implemented.**
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.tts import TTSRouter # planned
|
|
||||||
```
|
|
||||||
|
|
||||||
## Planned design
|
|
||||||
|
|
||||||
The TTS module will mirror the [LLM router](llm.md) pattern: a configurable fallback chain over local and cloud TTS backends.
|
|
||||||
|
|
||||||
**Planned backends:**
|
|
||||||
- `piper` — local, fast, offline-capable; excellent quality for a neural TTS
|
|
||||||
- `espeak` — local, minimal resource use, robotic but reliable fallback
|
|
||||||
- `openai_tts` — cloud, `tts-1` and `tts-1-hd`; requires `OPENAI_API_KEY`
|
|
||||||
|
|
||||||
**Planned use cases:**
|
|
||||||
- Osprey: reading back IVR menus aloud; accessibility for users who can't monitor hold music
|
|
||||||
- Linnet: speaking annotated tone labels alongside the original audio
|
|
||||||
- Any product: accessible audio output for users with print disabilities
|
|
||||||
|
|
||||||
## Current status
|
|
||||||
|
|
||||||
Stub only. Planned to ship alongside or shortly after the STT module, as most use cases need both.
|
|
||||||
|
|
||||||
**Piper** is the recommended path when this lands: it runs locally at 10–20x real-time on CPU, supports 40+ language/speaker models, and has no API key requirement. See [rhasspy/piper](https://github.com/rhasspy/piper) for model downloads.
|
|
||||||
|
|
@ -1,45 +0,0 @@
|
||||||
# vision
|
|
||||||
|
|
||||||
Vision router base class. **Stub — partially implemented.**
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.vision import VisionRouter # base class
|
|
||||||
```
|
|
||||||
|
|
||||||
## Planned design
|
|
||||||
|
|
||||||
The vision module mirrors the [LLM router](llm.md) pattern for multimodal inputs. Products subclass `VisionRouter` and configure a fallback chain over vision-capable backends.
|
|
||||||
|
|
||||||
**Planned backends:**
|
|
||||||
- `moondream2` — local, 1.8GB, fast; via the vision service FastAPI sidecar on :8002
|
|
||||||
- `claude_code` — local wrapper with vision capability
|
|
||||||
- `anthropic` — cloud, Claude's vision models
|
|
||||||
- `openai` — cloud, GPT-4o vision
|
|
||||||
|
|
||||||
## Current usage
|
|
||||||
|
|
||||||
The vision service (`scripts/vision_service/main.py` in Peregrine, and the cf-docuvision path in Kiwi) currently implements vision routing directly without going through this module. This module is being designed to absorb those implementations once the interface stabilizes.
|
|
||||||
|
|
||||||
## `VisionRouter` base class
|
|
||||||
|
|
||||||
```python
|
|
||||||
class VisionRouter:
|
|
||||||
def analyze(
|
|
||||||
self,
|
|
||||||
images: list[str], # base64-encoded
|
|
||||||
prompt: str,
|
|
||||||
max_tokens: int = 1024,
|
|
||||||
) -> str:
|
|
||||||
"""Run vision inference. Returns text response."""
|
|
||||||
raise NotImplementedError
|
|
||||||
```
|
|
||||||
|
|
||||||
## moondream2 specifics
|
|
||||||
|
|
||||||
moondream2 is the preferred local vision model — it's small enough for CPU use (1.8GB download) and fast enough for interactive use on GPU. Products using it:
|
|
||||||
|
|
||||||
- **Peregrine**: survey screenshot analysis (culture-fit survey assistant)
|
|
||||||
- **Kiwi**: receipt OCR fast-path, barcode label reading
|
|
||||||
|
|
||||||
!!! note "VRAM requirement"
|
|
||||||
moondream2 uses ~1.5GB VRAM in 4-bit quantization. Stop the main LLM service before starting the vision service if you're on a card with < 6GB VRAM.
|
|
||||||
|
|
@ -1,51 +0,0 @@
|
||||||
# wizard
|
|
||||||
|
|
||||||
First-run wizard base class. **Stub.**
|
|
||||||
|
|
||||||
```python
|
|
||||||
from circuitforge_core.wizard import BaseWizard # planned
|
|
||||||
```
|
|
||||||
|
|
||||||
## Purpose
|
|
||||||
|
|
||||||
`BaseWizard` provides a standard scaffold for first-run product setup. Every CircuitForge product has a first-run wizard that:
|
|
||||||
|
|
||||||
1. Validates prerequisites (Docker, required ports, disk space)
|
|
||||||
2. Configures the LLM backend (local Ollama / vLLM / BYOK cloud)
|
|
||||||
3. Sets user preferences and accessibility options
|
|
||||||
4. Issues or validates a license key
|
|
||||||
5. Runs a smoke test and confirms everything is working
|
|
||||||
|
|
||||||
## Existing implementations
|
|
||||||
|
|
||||||
Each product currently implements its own wizard:
|
|
||||||
|
|
||||||
- **Peregrine**: `app/pages/0_Setup.py` (Streamlit) — gates app until `config/user.yaml` exists
|
|
||||||
- **Kiwi**: Vue 3 wizard component with step-by-step hardware detection, LLM config, dietary preferences
|
|
||||||
|
|
||||||
These will be refactored to share the `BaseWizard` scaffold once the interface stabilizes.
|
|
||||||
|
|
||||||
## Planned `BaseWizard` API
|
|
||||||
|
|
||||||
```python
|
|
||||||
class BaseWizard:
|
|
||||||
steps: list[WizardStep] # ordered list of setup steps
|
|
||||||
|
|
||||||
def run(self) -> WizardResult:
|
|
||||||
"""Execute all steps in order. Returns result with completion status."""
|
|
||||||
...
|
|
||||||
|
|
||||||
def resume(self, from_step: int) -> WizardResult:
|
|
||||||
"""Resume from a specific step (e.g., after fixing a failed prereq)."""
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
## Accessibility in the wizard
|
|
||||||
|
|
||||||
The wizard is the first thing new users see. It must meet CF's accessibility standards:
|
|
||||||
|
|
||||||
- All steps must be completable with keyboard only
|
|
||||||
- No time limits on any step
|
|
||||||
- Plain-language instructions throughout (no jargon)
|
|
||||||
- Accessibility preferences collected early (step 2 or 3) so the rest of the wizard can immediately adapt
|
|
||||||
- Progress saved after each step so users can pause and return
|
|
||||||
|
|
@ -1,129 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
"""
|
|
||||||
Standalone music continuation test — no service required.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
conda run -n cf python scripts/test_musicgen.py \
|
|
||||||
--input "/Library/Audio/Music/KAESUL/Schedule I - Original Soundtrack (2025)/KAESUL - Schedule I - Original Soundtrack - 17 - the life i lead (reveal trailer).mp3"
|
|
||||||
|
|
||||||
Options:
|
|
||||||
--input PATH Audio file to continue (any ffmpeg-readable format)
|
|
||||||
--output PATH Output WAV path (default: /tmp/continuation_output.wav)
|
|
||||||
--model MODEL MusicGen variant (default: facebook/musicgen-melody)
|
|
||||||
--duration SECS Seconds of new audio to generate (default: 30)
|
|
||||||
--prompt-duration SECS Seconds from end of song to condition on (default: 10)
|
|
||||||
--description TEXT Optional style hint, e.g. "dark ambient electronic"
|
|
||||||
--device DEVICE cuda or cpu (default: cuda)
|
|
||||||
--join Concatenate original prompt segment + continuation in output
|
|
||||||
|
|
||||||
The generated file is saved to --output. Open it in any audio player to listen.
|
|
||||||
Model weights download to /Library/Assets/LLM/musicgen/ on first run (~8 GB for melody).
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
# Redirect HF cache before any audiocraft import
|
|
||||||
os.environ.setdefault("HF_HOME", "/Library/Assets/LLM/musicgen")
|
|
||||||
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format="%(asctime)s %(levelname)s %(message)s",
|
|
||||||
)
|
|
||||||
log = logging.getLogger("test_musicgen")
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args() -> argparse.Namespace:
|
|
||||||
p = argparse.ArgumentParser(description="cf-musicgen standalone test")
|
|
||||||
p.add_argument("--input", required=True, help="Input audio file path")
|
|
||||||
p.add_argument("--output", default="/tmp/continuation_output.wav")
|
|
||||||
p.add_argument("--model", default="facebook/musicgen-melody")
|
|
||||||
p.add_argument("--duration", type=float, default=30.0,
|
|
||||||
help="Seconds of new audio to generate")
|
|
||||||
p.add_argument("--prompt-duration", type=float, default=10.0,
|
|
||||||
help="Seconds from end of song used as prompt")
|
|
||||||
p.add_argument("--description", default=None,
|
|
||||||
help="Optional text description to guide the style")
|
|
||||||
p.add_argument("--device", default="cuda", choices=["cuda", "cpu"])
|
|
||||||
p.add_argument("--join", action="store_true",
|
|
||||||
help="Prepend the prompt segment to the output file")
|
|
||||||
return p.parse_args()
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
args = parse_args()
|
|
||||||
|
|
||||||
if not os.path.exists(args.input):
|
|
||||||
log.error("Input file not found: %s", args.input)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
log.info("Input: %s", args.input)
|
|
||||||
log.info("Model: %s", args.model)
|
|
||||||
log.info("Duration: %.1fs | Prompt: %.1fs", args.duration, args.prompt_duration)
|
|
||||||
if args.description:
|
|
||||||
log.info("Style hint: %s", args.description)
|
|
||||||
|
|
||||||
import torch
|
|
||||||
import torchaudio
|
|
||||||
|
|
||||||
log.info("Loading model (weights -> /Library/Assets/LLM/musicgen/)")
|
|
||||||
from audiocraft.models import MusicGen
|
|
||||||
|
|
||||||
model = MusicGen.get_pretrained(args.model, device=args.device)
|
|
||||||
model.set_generation_params(duration=args.duration, top_k=250, temperature=1.0, cfg_coef=3.0)
|
|
||||||
|
|
||||||
# Load input audio
|
|
||||||
wav, sr = torchaudio.load(args.input)
|
|
||||||
log.info("Loaded audio: %.1fs @ %d Hz (%d ch)", wav.shape[-1] / sr, sr, wav.shape[0])
|
|
||||||
|
|
||||||
# Trim to last prompt_duration_s seconds
|
|
||||||
max_prompt_samples = int(args.prompt_duration * sr)
|
|
||||||
prompt_wav = wav[..., -max_prompt_samples:] if wav.shape[-1] > max_prompt_samples else wav
|
|
||||||
log.info("Using %.1fs prompt from end of track", prompt_wav.shape[-1] / sr)
|
|
||||||
|
|
||||||
# MusicGen expects [batch, channels, time]
|
|
||||||
prompt_tensor = prompt_wav.unsqueeze(0).to(args.device)
|
|
||||||
|
|
||||||
log.info("Generating %.1fs of continuation ...", args.duration)
|
|
||||||
t0 = time.time()
|
|
||||||
|
|
||||||
with torch.no_grad():
|
|
||||||
output = model.generate_continuation(
|
|
||||||
prompt=prompt_tensor,
|
|
||||||
prompt_sample_rate=sr,
|
|
||||||
descriptions=[args.description],
|
|
||||||
progress=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
elapsed = time.time() - t0
|
|
||||||
model_sr = model.sample_rate
|
|
||||||
output_wav = output[0].cpu() # [C, T]
|
|
||||||
actual_s = output_wav.shape[-1] / model_sr
|
|
||||||
log.info("Done in %.1fs -> %.1fs of audio at %d Hz", elapsed, actual_s, model_sr)
|
|
||||||
|
|
||||||
if args.join:
|
|
||||||
# Resample prompt to model sample rate so concatenation is seamless
|
|
||||||
prompt_resampled = torchaudio.functional.resample(prompt_wav, sr, model_sr)
|
|
||||||
# Reconcile channel count: MusicGen outputs 1ch; prompt may be stereo.
|
|
||||||
# Convert to mono by averaging if needed so cat doesn't blow up.
|
|
||||||
if prompt_resampled.shape[0] != output_wav.shape[0]:
|
|
||||||
if output_wav.shape[0] == 1 and prompt_resampled.shape[0] > 1:
|
|
||||||
prompt_resampled = prompt_resampled.mean(dim=0, keepdim=True)
|
|
||||||
elif prompt_resampled.shape[0] == 1 and output_wav.shape[0] > 1:
|
|
||||||
prompt_resampled = prompt_resampled.expand_as(output_wav)
|
|
||||||
output_wav = torch.cat([prompt_resampled, output_wav], dim=-1)
|
|
||||||
total_s = output_wav.shape[-1] / model_sr
|
|
||||||
log.info("Joined prompt + continuation: %.1fs total", total_s)
|
|
||||||
|
|
||||||
os.makedirs(os.path.dirname(os.path.abspath(args.output)), exist_ok=True)
|
|
||||||
torchaudio.save(args.output, output_wav, model_sr)
|
|
||||||
log.info("Saved: %s", args.output)
|
|
||||||
log.info("Play: ffplay %r (or open in any audio player)", args.output)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
Loading…
Reference in a new issue