43 changed files with 16 additions and 2904 deletions
--- a/circuitforge_core/community/migrations/005_recipe_tags.sql
+++ b/circuitforge_core/community/migrations/005_recipe_tags.sql
@ -1,42 +0,0 @@
 -- 005_recipe_tags.sql
 -- Community-contributed recipe subcategory tags.
 --
 -- Users can tag corpus recipes (from a product's local recipe dataset) with a
 -- domain/category/subcategory from that product's browse taxonomy. Tags are
 -- keyed by (recipe_source, recipe_ref) so a single table serves all CF products
 -- that have a recipe corpus (currently: kiwi).
 --
 -- Acceptance threshold: upvotes >= 2 (submitter's implicit vote counts as 1,
 -- so one additional voter is enough to publish). Browse counts caches merge
 -- accepted tags into subcategory totals on each nightly refresh.
 CREATE TABLE IF NOT EXISTS recipe_tags (
    id              BIGSERIAL PRIMARY KEY,
    recipe_source   TEXT NOT NULL CHECK (recipe_source IN ('corpus')),
    recipe_ref      TEXT NOT NULL,      -- corpus integer recipe ID stored as text
    domain          TEXT NOT NULL,
    category        TEXT NOT NULL,
    subcategory     TEXT,               -- NULL = category-level tag (no subcategory)
    pseudonym       TEXT NOT NULL,
    upvotes         INTEGER NOT NULL DEFAULT 1,  -- starts at 1 (submitter's own vote)
    source_product  TEXT NOT NULL DEFAULT 'kiwi',
    created_at      TIMESTAMPTZ NOT NULL DEFAULT now(),
    -- one tag per (recipe, location, user) — prevents submitting the same tag twice
    UNIQUE (recipe_source, recipe_ref, domain, category, subcategory, pseudonym)
 );
 CREATE INDEX IF NOT EXISTS idx_recipe_tags_lookup
    ON recipe_tags (source_product, domain, category, subcategory)
    WHERE upvotes >= 2;
 CREATE INDEX IF NOT EXISTS idx_recipe_tags_recipe
    ON recipe_tags (recipe_source, recipe_ref);
 -- Tracks who voted on which tag to prevent double-voting.
 -- The submitter's self-vote is inserted here at submission time.
 CREATE TABLE IF NOT EXISTS recipe_tag_votes (
    tag_id      BIGINT NOT NULL REFERENCES recipe_tags(id) ON DELETE CASCADE,
    pseudonym   TEXT NOT NULL,
    voted_at    TIMESTAMPTZ NOT NULL DEFAULT now(),
    PRIMARY KEY (tag_id, pseudonym)
 );
--- a/circuitforge_core/community/store.py
+++ b/circuitforge_core/community/store.py
@ -207,170 +207,3 @@ class SharedStore:
            raise
        finally:
            self._db.putconn(conn)
    # ── Recipe tags ───────────────────────────────────────────────────────────
    def submit_recipe_tag(
        self,
        recipe_id: int,
        domain: str,
        category: str,
        subcategory: str | None,
        pseudonym: str,
        source_product: str = "kiwi",
    ) -> dict:
        """Submit a new subcategory tag for a corpus recipe.
        Inserts the tag with upvotes=1 and records the submitter's self-vote in
        recipe_tag_votes. Returns the created tag row as a dict.
        Raises psycopg2.errors.UniqueViolation if the same user has already
        tagged this recipe to this location — let the caller handle it.
        """
        conn = self._db.getconn()
        try:
            with conn.cursor() as cur:
                cur.execute(
                    """
                    INSERT INTO recipe_tags
                        (recipe_source, recipe_ref, domain, category, subcategory,
                         pseudonym, upvotes, source_product)
                    VALUES ('corpus', %s, %s, %s, %s, %s, 1, %s)
                    RETURNING id, recipe_ref, domain, category, subcategory,
                              pseudonym, upvotes, created_at
                    """,
                    (str(recipe_id), domain, category, subcategory,
                     pseudonym, source_product),
                )
                row = dict(zip([d[0] for d in cur.description], cur.fetchone()))
                # Record submitter's self-vote
                cur.execute(
                    "INSERT INTO recipe_tag_votes (tag_id, pseudonym) VALUES (%s, %s)",
                    (row["id"], pseudonym),
                )
                conn.commit()
                return row
        except Exception:
            conn.rollback()
            raise
        finally:
            self._db.putconn(conn)
    def upvote_recipe_tag(self, tag_id: int, pseudonym: str) -> int:
        """Add an upvote to a tag from pseudonym. Returns new upvote count.
        Raises psycopg2.errors.UniqueViolation if this pseudonym already voted.
        Raises ValueError if the tag does not exist.
        """
        conn = self._db.getconn()
        try:
            with conn.cursor() as cur:
                cur.execute(
                    "INSERT INTO recipe_tag_votes (tag_id, pseudonym) VALUES (%s, %s)",
                    (tag_id, pseudonym),
                )
                cur.execute(
                    "UPDATE recipe_tags SET upvotes = upvotes + 1 WHERE id = %s"
                    " RETURNING upvotes",
                    (tag_id,),
                )
                row = cur.fetchone()
                if row is None:
                    raise ValueError(f"recipe_tag {tag_id} not found")
                conn.commit()
                return row[0]
        except Exception:
            conn.rollback()
            raise
        finally:
            self._db.putconn(conn)
    def get_recipe_tag_by_id(self, tag_id: int) -> dict | None:
        """Return a single recipe_tag row by ID, or None if not found."""
        conn = self._db.getconn()
        try:
            with conn.cursor() as cur:
                cur.execute(
                    """
                    SELECT id, recipe_ref, domain, category, subcategory,
                           pseudonym, upvotes, created_at
                    FROM recipe_tags WHERE id = %s
                    """,
                    (tag_id,),
                )
                row = cur.fetchone()
                if row is None:
                    return None
                return dict(zip([d[0] for d in cur.description], row))
        finally:
            self._db.putconn(conn)
    def list_tags_for_recipe(
        self,
        recipe_id: int,
        source_product: str = "kiwi",
    ) -> list[dict]:
        """Return all tags for a corpus recipe, accepted or not, newest first."""
        conn = self._db.getconn()
        try:
            with conn.cursor() as cur:
                cur.execute(
                    """
                    SELECT id, domain, category, subcategory, pseudonym,
                           upvotes, created_at
                    FROM recipe_tags
                    WHERE recipe_source = 'corpus'
                      AND recipe_ref = %s
                      AND source_product = %s
                    ORDER BY upvotes DESC, created_at DESC
                    """,
                    (str(recipe_id), source_product),
                )
                cols = [d[0] for d in cur.description]
                return [dict(zip(cols, r)) for r in cur.fetchall()]
        finally:
            self._db.putconn(conn)
    def get_accepted_recipe_ids_for_subcategory(
        self,
        domain: str,
        category: str,
        subcategory: str | None,
        source_product: str = "kiwi",
        threshold: int = 2,
    ) -> list[int]:
        """Return corpus recipe IDs with accepted community tags for a subcategory.
        Used by browse_counts_cache refresh and browse_recipes() FTS fallback.
        Only includes tags that have reached the acceptance threshold.
        """
        conn = self._db.getconn()
        try:
            with conn.cursor() as cur:
                if subcategory is None:
                    cur.execute(
                        """
                        SELECT DISTINCT recipe_ref::INTEGER
                        FROM recipe_tags
                        WHERE source_product = %s
                          AND domain = %s AND category = %s
                          AND subcategory IS NULL
                          AND upvotes >= %s
                        """,
                        (source_product, domain, category, threshold),
                    )
                else:
                    cur.execute(
                        """
                        SELECT DISTINCT recipe_ref::INTEGER
                        FROM recipe_tags
                        WHERE source_product = %s
                          AND domain = %s AND category = %s
                          AND subcategory = %s
                          AND upvotes >= %s
                        """,
                        (source_product, domain, category, subcategory, threshold),
                    )
                return [r[0] for r in cur.fetchall()]
        finally:
            self._db.putconn(conn)
--- a/circuitforge_core/hardware/tiers.py
+++ b/circuitforge_core/hardware/tiers.py
@ -69,7 +69,7 @@ VRAM_TIERS: list[VramTier] = [
        profile_name="single-gpu-8gb",
        ollama_model="qwen2.5:7b-instruct",
        vllm_candidates=["Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"],
-        services=["ollama", "vllm", "cf-vision", "cf-docuvision", "cf-stt", "cf-tts", "cf-musicgen"],
+        services=["ollama", "vllm", "cf-vision", "cf-docuvision", "cf-stt", "cf-tts"],
        llm_max_params="8b",
    ),
    VramTier(
@ -79,7 +79,7 @@ VRAM_TIERS: list[VramTier] = [
        ollama_model="qwen2.5:14b-instruct-q4_k_m",
        vllm_candidates=["Qwen2.5-14B-Instruct", "Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"],
        services=["ollama", "vllm", "cf-vision", "cf-docuvision", "cf-stt", "cf-tts",
-                  "cf-musicgen", "cf-embed", "cf-classify"],
+                  "cf-embed", "cf-classify"],
        llm_max_params="14b",
    ),
    VramTier(
@ -89,7 +89,7 @@ VRAM_TIERS: list[VramTier] = [
        ollama_model="qwen2.5:32b-instruct-q4_k_m",
        vllm_candidates=["Qwen2.5-14B-Instruct", "Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"],
        services=["ollama", "vllm", "cf-vision", "cf-docuvision", "cf-stt", "cf-tts",
-                  "cf-musicgen", "cf-embed", "cf-classify", "comfyui"],
+                  "cf-embed", "cf-classify", "comfyui"],
        llm_max_params="32b-q4",
    ),
 ]
--- a/circuitforge_core/musicgen/init.py
+++ b/circuitforge_core/musicgen/init.py
@ -1 +0,0 @@
 """circuitforge_core.musicgen — music continuation service (BSL 1.1)."""
--- a/circuitforge_core/musicgen/app.py
+++ b/circuitforge_core/musicgen/app.py
@ -1,138 +0,0 @@
 """
 cf-musicgen FastAPI service — managed by cf-orch.
 Endpoints:
  GET  /health     -> {"status": "ok", "model": str, "vram_mb": int}
  POST /continue   -> audio bytes (Content-Type: audio/wav or audio/mpeg)
 Usage:
    python -m circuitforge_core.musicgen.app \
        --model facebook/musicgen-melody \
        --port 8006 \
        --gpu-id 0
 The service streams back raw audio bytes. Headers include:
  X-Duration-S      generated duration in seconds
  X-Prompt-Duration-S   how many seconds of the input were used as prompt
  X-Model           model name
  X-Sample-Rate     output sample rate (32000 for all MusicGen variants)
 Model weights are cached at /Library/Assets/LLM/musicgen/.
 """
 from __future__ import annotations
 import argparse
 import logging
 import os
 from typing import Annotated
 from fastapi import FastAPI, File, Form, HTTPException, UploadFile
 from fastapi.responses import Response
 from circuitforge_core.musicgen.backends.base import (
    MODEL_MELODY,
    MODEL_SMALL,
    AudioFormat,
    MusicGenBackend,
    make_musicgen_backend,
 )
 _CONTENT_TYPES: dict[str, str] = {
    "wav": "audio/wav",
    "mp3": "audio/mpeg",
 }
 app = FastAPI(title="cf-musicgen", version="0.1.0")
 _backend: MusicGenBackend | None = None
@app.get("/health")
 def health() -> dict:
    if _backend is None:
        raise HTTPException(503, detail="backend not initialised")
    return {
        "status": "ok",
        "model": _backend.model_name,
        "vram_mb": _backend.vram_mb,
    }
@app.post("/continue")
 async def continue_audio(
    audio: UploadFile = File(..., description="Audio file (WAV, MP3, FLAC, OGG, ...)"),
    description: Annotated[str | None, Form()] = None,
    duration_s: Annotated[float, Form()] = 15.0,
    prompt_duration_s: Annotated[float, Form()] = 10.0,
    format: Annotated[AudioFormat, Form()] = "wav",
 ) -> Response:
    if _backend is None:
        raise HTTPException(503, detail="backend not initialised")
    if duration_s <= 0 or duration_s > 60:
        raise HTTPException(422, detail="duration_s must be between 0 and 60")
    if prompt_duration_s <= 0 or prompt_duration_s > 30:
        raise HTTPException(422, detail="prompt_duration_s must be between 0 and 30")
    audio_bytes = await audio.read()
    if not audio_bytes:
        raise HTTPException(400, detail="Empty audio file")
    try:
        result = _backend.continue_audio(
            audio_bytes,
            description=description or None,
            duration_s=duration_s,
            prompt_duration_s=prompt_duration_s,
            format=format,
        )
    except Exception as exc:
        logging.exception("Music continuation failed")
        raise HTTPException(500, detail=str(exc)) from exc
    return Response(
        content=result.audio_bytes,
        media_type=_CONTENT_TYPES.get(result.format, "audio/wav"),
        headers={
            "X-Duration-S": str(round(result.duration_s, 3)),
            "X-Prompt-Duration-S": str(round(result.prompt_duration_s, 3)),
            "X-Model": result.model,
            "X-Sample-Rate": str(result.sample_rate),
        },
    )
 def _parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(description="cf-musicgen service")
    p.add_argument(
        "--model",
        default=MODEL_MELODY,
        choices=[MODEL_MELODY, MODEL_SMALL, "facebook/musicgen-medium", "facebook/musicgen-large"],
        help="MusicGen model variant",
    )
    p.add_argument("--port", type=int, default=8006)
    p.add_argument("--host", default="0.0.0.0")
    p.add_argument("--gpu-id", type=int, default=0,
                   help="CUDA device index (sets CUDA_VISIBLE_DEVICES)")
    p.add_argument("--device", default="cuda", choices=["cuda", "cpu"])
    p.add_argument("--mock", action="store_true",
                   help="Run with mock backend (no GPU, for testing)")
    return p.parse_args()
 if __name__ == "__main__":
    import uvicorn
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s %(levelname)s %(name)s  %(message)s",
    )
    args = _parse_args()
    if args.device == "cuda" and not args.mock:
        os.environ.setdefault("CUDA_VISIBLE_DEVICES", str(args.gpu_id))
    mock = args.mock or args.model == "mock"
    device = "cpu" if mock else args.device
    _backend = make_musicgen_backend(model_name=args.model, mock=mock, device=device)
    uvicorn.run(app, host=args.host, port=args.port, log_level="info")
--- a/circuitforge_core/musicgen/backends/init.py
+++ b/circuitforge_core/musicgen/backends/init.py
@ -1 +0,0 @@
 """MusicGen backend implementations."""
--- a/circuitforge_core/musicgen/backends/audiocraft.py
+++ b/circuitforge_core/musicgen/backends/audiocraft.py
@ -1,128 +0,0 @@
 """
 AudioCraft MusicGen backend — music continuation via Meta's MusicGen.
 Models are downloaded to /Library/Assets/LLM/musicgen/ (HF hub cache).
 The melody model (~8 GB VRAM) is the default; small (~1.5 GB) is available
 for lower-VRAM nodes.
 Continuation workflow:
  1. Decode input audio with torchaudio (any format ffmpeg understands)
  2. Trim to the last `prompt_duration_s` seconds — this anchors the generation
  3. Call model.generate_continuation(prompt_waveform, prompt_sample_rate, ...)
  4. Output tensor is the NEW audio only (not prompt + continuation)
  5. Encode to the requested format and return
 """
 from __future__ import annotations
 import logging
 import os
 from circuitforge_core.musicgen.backends.base import (
    AudioFormat,
    MusicContinueResult,
    decode_audio,
    encode_audio,
 )
 # All MusicGen/AudioCraft weights land here — consistent with other CF model dirs.
 _MUSICGEN_CACHE = "/Library/Assets/LLM/musicgen"
 # VRAM estimates (MB) per model variant
 _VRAM_MB: dict[str, int] = {
    "facebook/musicgen-small": 1500,
    "facebook/musicgen-medium": 4500,
    "facebook/musicgen-melody": 8000,
    "facebook/musicgen-large": 8500,
 }
 logger = logging.getLogger(__name__)
 class AudioCraftBackend:
    """MusicGen backend using Meta's AudioCraft library."""
    def __init__(self, model_name: str = "facebook/musicgen-melody", device: str = "cuda") -> None:
        # Redirect HF hub cache before the first import so weights go to /Library/Assets
        os.environ.setdefault("HF_HOME", _MUSICGEN_CACHE)
        os.makedirs(_MUSICGEN_CACHE, exist_ok=True)
        from audiocraft.models import MusicGen  # noqa: PLC0415
        logger.info("Loading MusicGen model: %s on %s", model_name, device)
        self._model = MusicGen.get_pretrained(model_name, device=device)
        self._model_name = model_name
        self._device = device
        logger.info("MusicGen ready: %s", model_name)
    @property
    def model_name(self) -> str:
        return self._model_name
    @property
    def vram_mb(self) -> int:
        return _VRAM_MB.get(self._model_name, 8000)
    def continue_audio(
        self,
        audio_bytes: bytes,
        *,
        description: str | None = None,
        duration_s: float = 15.0,
        prompt_duration_s: float = 10.0,
        format: AudioFormat = "wav",
    ) -> MusicContinueResult:
        import torch
        # Decode input audio -> [C, T] tensor
        wav, sr = decode_audio(audio_bytes)
        # Trim to the last `prompt_duration_s` seconds to form the conditioning prompt.
        # Using the end of the track (not the beginning) gives the model the musical
        # context closest to where we want to continue.
        max_prompt_samples = int(prompt_duration_s * sr)
        if wav.shape[-1] > max_prompt_samples:
            wav = wav[..., -max_prompt_samples:]
        # MusicGen expects [batch, channels, time]
        prompt_tensor = wav.unsqueeze(0).to(self._device)
        # Build descriptions list — one entry per batch item (batch=1 here)
        descriptions = [description] if description else [None]
        self._model.set_generation_params(
            duration=duration_s,
            top_k=250,
            temperature=1.0,
            cfg_coef=3.0,
        )
        logger.info(
            "Generating %.1fs continuation (prompt=%.1fs) model=%s",
            duration_s,
            prompt_duration_s,
            self._model_name,
        )
        with torch.no_grad():
            output = self._model.generate_continuation(
                prompt=prompt_tensor,
                prompt_sample_rate=sr,
                descriptions=descriptions,
                progress=True,
            )
        # output: [batch, channels, time] at model sample rate (32 kHz)
        output_wav = output[0]  # [C, T]
        model_sr = self._model.sample_rate
        actual_duration_s = output_wav.shape[-1] / model_sr
        audio_bytes_out = encode_audio(output_wav, model_sr, format)
        return MusicContinueResult(
            audio_bytes=audio_bytes_out,
            sample_rate=model_sr,
            duration_s=actual_duration_s,
            format=format,
            model=self._model_name,
            prompt_duration_s=prompt_duration_s,
        )
--- a/circuitforge_core/musicgen/backends/base.py
+++ b/circuitforge_core/musicgen/backends/base.py
@ -1,97 +0,0 @@
 """
 MusicGenBackend Protocol — backend-agnostic music continuation interface.
 All backends accept an audio prompt (raw bytes, any ffmpeg-readable format) and
 return MusicContinueResult with the generated continuation as audio bytes.
 The continuation is the *new* audio only (not prompt + continuation). Callers
 that want a seamless joined file can concatenate the original + result themselves.
 """
 from __future__ import annotations
 import io
 from dataclasses import dataclass
 from typing import Literal, Protocol, runtime_checkable
 AudioFormat = Literal["wav", "mp3"]
 MODEL_SMALL = "facebook/musicgen-small"
 MODEL_MELODY = "facebook/musicgen-melody"
@dataclass(frozen=True)
 class MusicContinueResult:
    audio_bytes: bytes
    sample_rate: int
    duration_s: float
    format: AudioFormat
    model: str
    prompt_duration_s: float
@runtime_checkable
 class MusicGenBackend(Protocol):
    def continue_audio(
        self,
        audio_bytes: bytes,
        *,
        description: str | None = None,
        duration_s: float = 15.0,
        prompt_duration_s: float = 10.0,
        format: AudioFormat = "wav",
    ) -> MusicContinueResult: ...
    @property
    def model_name(self) -> str: ...
    @property
    def vram_mb(self) -> int: ...
 def encode_audio(wav_tensor, sample_rate: int, format: AudioFormat) -> bytes:
    """Encode a [C, T] or [1, C, T] torch tensor to audio bytes."""
    import io
    import torch
    import torchaudio
    wav = wav_tensor
    if wav.dim() == 3:
        wav = wav.squeeze(0)          # [1, C, T] -> [C, T]
    if wav.dim() == 1:
        wav = wav.unsqueeze(0)        # [T] -> [1, T]
    wav = wav.to(torch.float32).cpu()
    buf = io.BytesIO()
    if format == "wav":
        torchaudio.save(buf, wav, sample_rate, format="wav")
    elif format == "mp3":
        try:
            torchaudio.save(buf, wav, sample_rate, format="mp3")
        except Exception:
            # ffmpeg backend not available; fall back to wav
            buf = io.BytesIO()
            torchaudio.save(buf, wav, sample_rate, format="wav")
    return buf.getvalue()
 def decode_audio(audio_bytes: bytes) -> tuple:
    """Decode arbitrary audio bytes to (waveform [C, T], sample_rate)."""
    import io
    import torchaudio
    buf = io.BytesIO(audio_bytes)
    wav, sr = torchaudio.load(buf)
    return wav, sr
 def make_musicgen_backend(
    model_name: str = MODEL_MELODY,
    *,
    mock: bool = False,
    device: str = "cuda",
 ) -> MusicGenBackend:
    if mock:
        from circuitforge_core.musicgen.backends.mock import MockMusicGenBackend
        return MockMusicGenBackend()
    from circuitforge_core.musicgen.backends.audiocraft import AudioCraftBackend
    return AudioCraftBackend(model_name=model_name, device=device)
--- a/circuitforge_core/musicgen/backends/mock.py
+++ b/circuitforge_core/musicgen/backends/mock.py
@ -1,53 +0,0 @@
 """
 Mock MusicGenBackend — returns silent WAV audio; no GPU required.
 Used in unit tests and CI where GPU is unavailable.
 """
 from __future__ import annotations
 import io
 import struct
 import wave
 from circuitforge_core.musicgen.backends.base import AudioFormat, MusicContinueResult
 class MockMusicGenBackend:
    """Returns a silent WAV file of the requested duration."""
    @property
    def model_name(self) -> str:
        return "mock"
    @property
    def vram_mb(self) -> int:
        return 0
    def continue_audio(
        self,
        audio_bytes: bytes,
        *,
        description: str | None = None,
        duration_s: float = 15.0,
        prompt_duration_s: float = 10.0,
        format: AudioFormat = "wav",
    ) -> MusicContinueResult:
        sample_rate = 32000
        n_samples = int(duration_s * sample_rate)
        silent_samples = b"\x00\x00" * n_samples  # 16-bit PCM silence
        buf = io.BytesIO()
        with wave.open(buf, "wb") as wf:
            wf.setnchannels(1)
            wf.setsampwidth(2)
            wf.setframerate(sample_rate)
            wf.writeframes(silent_samples)
        return MusicContinueResult(
            audio_bytes=buf.getvalue(),
            sample_rate=sample_rate,
            duration_s=duration_s,
            format="wav",
            model="mock",
            prompt_duration_s=prompt_duration_s,
        )
--- a/circuitforge_core/platforms/init.py
+++ b/circuitforge_core/platforms/init.py
--- a/circuitforge_core/platforms/ebay/init.py
+++ b/circuitforge_core/platforms/ebay/init.py
--- a/circuitforge_core/platforms/ebay/oauth.py
+++ b/circuitforge_core/platforms/ebay/oauth.py
@ -1,183 +0,0 @@
 """eBay OAuth Authorization Code flow — user-level token manager.
 Implements the Authorization Code Grant for eBay's Trading API.
 App-level client credentials (Browse API) are handled separately in
 the product-level EbayTokenManager (snipe/app/platforms/ebay/auth.py).
 Usage (Snipe):
    manager = EbayUserTokenManager(
        client_id=app_id,
        client_secret=cert_id,
        runame=runame,
        redirect_uri=redirect_uri,
        env="production",
    )
    # 1. Send user to eBay
    url = manager.get_authorization_url(state="csrf-token-here")
    redirect(url)
    # 2. Handle callback
    tokens = manager.exchange_code(code)   # returns EbayUserTokens
    # store tokens.access_token, tokens.refresh_token, tokens.expires_at
    # 3. Get a fresh access token for API calls
    access_token = manager.refresh(stored_refresh_token)
 """
 from __future__ import annotations
 import base64
 import time
 import urllib.parse
 from dataclasses import dataclass
 from typing import Optional
 import requests
 EBAY_AUTH_URLS = {
    "production": "https://auth.ebay.com/oauth2/authorize",
    "sandbox":    "https://auth.sandbox.ebay.com/oauth2/authorize",
 }
 EBAY_TOKEN_URLS = {
    "production": "https://api.ebay.com/identity/v1/oauth2/token",
    "sandbox":    "https://api.sandbox.ebay.com/identity/v1/oauth2/token",
 }
 # Scopes needed for Trading API GetUser (account age + category feedback).
 # https://developer.ebay.com/api-docs/static/oauth-scopes.html
 DEFAULT_SCOPES = [
    "https://api.ebay.com/oauth/api_scope",
    "https://api.ebay.com/oauth/api_scope/sell.account.readonly",
 ]
@dataclass
 class EbayUserTokens:
    access_token: str
    refresh_token: str
    expires_at: float       # epoch seconds
    scopes: list[str]
 class EbayUserTokenManager:
    """Manages eBay Authorization Code OAuth tokens for a single user.
    One instance per user session. Does NOT persist tokens — callers are
    responsible for storing/loading tokens via the DB migration
    013_ebay_user_tokens.sql.
    """
    def __init__(
        self,
        client_id: str,
        client_secret: str,
        runame: str,
        redirect_uri: str,
        env: str = "production",
        scopes: Optional[list[str]] = None,
    ):
        self._client_id = client_id
        self._client_secret = client_secret
        self._runame = runame
        self._redirect_uri = redirect_uri
        self._auth_url = EBAY_AUTH_URLS[env]
        self._token_url = EBAY_TOKEN_URLS[env]
        self._scopes = scopes or DEFAULT_SCOPES
    # ── Authorization URL ──────────────────────────────────────────────────────
    def get_authorization_url(self, state: str = "") -> str:
        """Build the eBay OAuth authorization URL to redirect the user to.
        Args:
            state: CSRF token or opaque value passed through unchanged.
        Returns:
            Full URL string to redirect the user's browser to.
        """
        params = {
            "client_id": self._client_id,
            "response_type": "code",
            "redirect_uri": self._runame,   # eBay uses RuName, not the raw URI
            "scope": " ".join(self._scopes),
        }
        if state:
            params["state"] = state
        return f"{self._auth_url}?{urllib.parse.urlencode(params)}"
    # ── Code exchange ──────────────────────────────────────────────────────────
    def exchange_code(self, code: str) -> EbayUserTokens:
        """Exchange an authorization code for access + refresh tokens.
        Called from the OAuth callback endpoint after eBay redirects back.
        Raises:
            requests.HTTPError on non-2xx eBay response.
            KeyError if eBay response is missing expected fields.
        """
        resp = requests.post(
            self._token_url,
            headers={
                "Authorization": f"Basic {self._credentials_b64()}",
                "Content-Type": "application/x-www-form-urlencoded",
            },
            data={
                "grant_type": "authorization_code",
                "code": code,
                "redirect_uri": self._runame,
            },
            timeout=15,
        )
        resp.raise_for_status()
        return self._parse_token_response(resp.json())
    # ── Token refresh ──────────────────────────────────────────────────────────
    def refresh(self, refresh_token: str) -> EbayUserTokens:
        """Exchange a refresh token for a new access token.
        eBay refresh tokens are valid for 18 months. Access tokens last 2h.
        Call this before making Trading API requests when the stored token
        is within 60 seconds of expiry.
        Raises:
            requests.HTTPError if the refresh token is expired or revoked.
        """
        resp = requests.post(
            self._token_url,
            headers={
                "Authorization": f"Basic {self._credentials_b64()}",
                "Content-Type": "application/x-www-form-urlencoded",
            },
            data={
                "grant_type": "refresh_token",
                "refresh_token": refresh_token,
                "scope": " ".join(self._scopes),
            },
            timeout=15,
        )
        resp.raise_for_status()
        # Refresh responses do NOT include a new refresh_token — the original stays valid
        data = resp.json()
        return EbayUserTokens(
            access_token=data["access_token"],
            refresh_token=refresh_token,    # unchanged
            expires_at=time.time() + data["expires_in"],
            scopes=data.get("scope", "").split(),
        )
    # ── Helpers ────────────────────────────────────────────────────────────────
    def _credentials_b64(self) -> str:
        raw = f"{self._client_id}:{self._client_secret}"
        return base64.b64encode(raw.encode()).decode()
    def _parse_token_response(self, data: dict) -> EbayUserTokens:
        return EbayUserTokens(
            access_token=data["access_token"],
            refresh_token=data["refresh_token"],
            expires_at=time.time() + data["expires_in"],
            scopes=data.get("scope", "").split(),
        )
--- a/circuitforge_core/text/app.py
+++ b/circuitforge_core/text/app.py
@ -16,12 +16,6 @@ Usage:
        --port 8006 \
        --gpu-id 0
 Multi-GPU (spans two GPUs via CUDA_VISIBLE_DEVICES, device_map=auto):
    python -m circuitforge_core.text.app \
        --model /Library/Assets/LLM/deepseek-14b \
        --port 8006 \
        --gpu-ids 0,1
 Mock mode (no model or GPU required):
    CF_TEXT_MOCK=1 python -m circuitforge_core.text.app --port 8006
 """
@ -117,17 +111,9 @@ class OAIChatResponse(BaseModel):
 def create_app(
    model_path: str,
    gpu_id: int = 0,
    gpu_ids: str | None = None,
    backend: str | None = None,
    mock: bool = False,
 ) -> FastAPI:
    """Start the cf-text FastAPI app.
    ``gpu_ids``: comma-separated CUDA device indices for multi-GPU spanning
    (e.g. "0,1"). When set, overrides ``gpu_id`` and sets
    ``CUDA_VISIBLE_DEVICES`` to the full list so HuggingFace Accelerate's
    ``device_map="auto"`` can shard the model across all listed devices.
    """
    global _backend
    if not mock and not model_path:
@ -136,8 +122,7 @@ def create_app(
            "Pass a GGUF path, a HuggingFace model ID, or set CF_TEXT_MOCK=1 for mock mode."
        )
-    visible = gpu_ids if gpu_ids else str(gpu_id)
+    os.environ.setdefault("CUDA_VISIBLE_DEVICES", str(gpu_id))
    os.environ.setdefault("CUDA_VISIBLE_DEVICES", visible)
    _backend = make_text_backend(model_path, backend=backend, mock=mock)
    logger.info("cf-text ready: model=%r vram=%dMB", _backend.model_name, _backend.vram_mb)
@ -226,10 +211,7 @@ def _parse_args() -> argparse.Namespace:
    parser.add_argument("--port", type=int, default=8006)
    parser.add_argument("--host", default="0.0.0.0")
    parser.add_argument("--gpu-id", type=int, default=0,
-                        help="CUDA device index to use (single GPU)")
+                        help="CUDA device index to use")
    parser.add_argument("--gpu-ids", default=None,
                        help="Comma-separated CUDA device indices for multi-GPU spanning "
                             "(e.g. '0,1'). Overrides --gpu-id when set.")
    parser.add_argument("--backend", choices=["llamacpp", "transformers"], default=None)
    parser.add_argument("--mock", action="store_true",
                        help="Run in mock mode (no model or GPU needed)")
@ -244,7 +226,6 @@ if __name__ == "__main__":
    app = create_app(
        model_path=args.model,
        gpu_id=args.gpu_id,
        gpu_ids=args.gpu_ids,
        backend=args.backend,
        mock=mock,
    )
--- a/circuitforge_core/text/backends/base.py
+++ b/circuitforge_core/text/backends/base.py
@ -121,19 +121,17 @@ class TextBackend(Protocol):
 def _select_backend(model_path: str, backend: str | None) -> str:
    """
-    Return "llamacpp", "transformers", "ollama", or "vllm" for the given model path.
+    Return "llamacpp" or "transformers" for the given model path.
    Parameters
    ----------
-    model_path  Path to the model file, HuggingFace repo ID, "ollama://<name>",
+    model_path  Path to the model file or HuggingFace repo ID (e.g. "Qwen/Qwen2.5-3B").
-                or "vllm://<model-id>".
+    backend     Explicit override from the caller ("llamacpp" | "transformers" | None).
    backend     Explicit override from the caller
                ("llamacpp" | "transformers" | "ollama" | "vllm" | None).
                When provided, trust it without inspection.
-    Raise ValueError for unrecognised override values.
+    Return "llamacpp" or "transformers". Raise ValueError for unrecognised values.
    """
-    _VALID = ("llamacpp", "transformers", "ollama", "vllm")
+    _VALID = ("llamacpp", "transformers")
    # 1. Caller-supplied override — highest trust, no inspection needed.
    resolved = backend or os.environ.get("CF_TEXT_BACKEND")
@ -144,17 +142,11 @@ def _select_backend(model_path: str, backend: str | None) -> str:
            )
        return resolved
-    # 2. Proxy prefixes — unambiguous routing regardless of model name format.
+    # 2. Format detection — GGUF files are unambiguously llama-cpp territory.
    if model_path.startswith("ollama://"):
        return "ollama"
    if model_path.startswith("vllm://"):
        return "vllm"
    # 3. Format detection — GGUF files are unambiguously llama-cpp territory.
    if model_path.lower().endswith(".gguf"):
        return "llamacpp"
-    # 4. Safe default — transformers covers HF repo IDs and safetensors dirs.
+    # 3. Safe default — transformers covers HF repo IDs and safetensors dirs.
    return "transformers"
@ -187,12 +179,4 @@ def make_text_backend(
        from circuitforge_core.text.backends.transformers import TransformersBackend
        return TransformersBackend(model_path=model_path)
-    if resolved == "ollama":
+    raise ValueError(f"Unknown backend {resolved!r}. Expected 'llamacpp' or 'transformers'.")
        from circuitforge_core.text.backends.ollama import OllamaBackend
        return OllamaBackend(model_path=model_path)
    if resolved == "vllm":
        from circuitforge_core.text.backends.vllm import VllmBackend
        return VllmBackend(model_path=model_path)
    raise ValueError(f"Unknown backend {resolved!r}. Expected 'llamacpp', 'transformers', 'ollama', or 'vllm'.")
--- a/circuitforge_core/text/backends/ollama.py
+++ b/circuitforge_core/text/backends/ollama.py
@ -1,201 +0,0 @@
 # circuitforge_core/text/backends/ollama.py — Ollama proxy backend for cf-text
 #
 # Routes inference requests to a running Ollama instance via its HTTP API.
 # cf-text itself holds no GPU memory; Ollama manages the model and VRAM.
 #
 # Model path format: "ollama://<model-name>"  e.g. "ollama://llama3.1:8b"
 # The "ollama://" prefix is stripped before forwarding to the API.
 #
 # Environment:
 #   CF_TEXT_OLLAMA_URL   Base URL of the Ollama server (default: http://localhost:11434)
 #
 # MIT licensed.
 from __future__ import annotations
 import json as _json
 import logging
 import os
 import time
 from typing import AsyncIterator, Iterator
 import httpx
 from circuitforge_core.text.backends.base import GenerateResult
 logger = logging.getLogger(__name__)
 _DEFAULT_OLLAMA_URL = "http://localhost:11434"
 class OllamaBackend:
    """
    cf-text backend that proxies inference to a local Ollama instance.
    This backend holds no GPU memory itself — Ollama owns the model and VRAM.
    vram_mb is therefore reported as 0 so cf-orch does not double-count VRAM
    against the separate ollama service budget.
    Supports /generate, /chat, and /v1/chat/completions (via generate/chat).
    Streaming is implemented for all variants.
    """
    def __init__(self, model_path: str, *, vram_mb: int = 0) -> None:
        # Strip the "ollama://" prefix from catalog paths
        self._model = model_path.removeprefix("ollama://")
        self._url = os.environ.get("CF_TEXT_OLLAMA_URL", _DEFAULT_OLLAMA_URL).rstrip("/")
        self._vram_mb = vram_mb
        logger.info("OllamaBackend: model=%r url=%r", self._model, self._url)
    # ── Protocol properties ───────────────────────────────────────────────────
    @property
    def model_name(self) -> str:
        return self._model
    @property
    def vram_mb(self) -> int:
        # Ollama manages its own VRAM; cf-text holds nothing.
        return self._vram_mb
    # ── Synchronous interface ─────────────────────────────────────────────────
    def generate(
        self,
        prompt: str,
        *,
        max_tokens: int = 512,
        temperature: float = 0.7,
        stop: list[str] | None = None,
    ) -> GenerateResult:
        t0 = time.monotonic()
        payload: dict = {
            "model":   self._model,
            "prompt":  prompt,
            "stream":  False,
            "options": {"temperature": temperature, "num_predict": max_tokens},
        }
        if stop:
            payload["options"]["stop"] = stop
        with httpx.Client(timeout=180.0) as client:
            resp = client.post(f"{self._url}/api/generate", json=payload)
            resp.raise_for_status()
        data = resp.json()
        elapsed_ms = round((time.monotonic() - t0) * 1000)
        return GenerateResult(
            text=data.get("response", ""),
            tokens_used=data.get("eval_count", 0),
            model=self._model,
        )
    def generate_stream(
        self,
        prompt: str,
        *,
        max_tokens: int = 512,
        temperature: float = 0.7,
        stop: list[str] | None = None,
    ) -> Iterator[str]:
        payload: dict = {
            "model":   self._model,
            "prompt":  prompt,
            "stream":  True,
            "options": {"temperature": temperature, "num_predict": max_tokens},
        }
        if stop:
            payload["options"]["stop"] = stop
        with httpx.Client(timeout=180.0) as client:
            with client.stream("POST", f"{self._url}/api/generate", json=payload) as resp:
                resp.raise_for_status()
                for line in resp.iter_lines():
                    if not line:
                        continue
                    chunk = _json.loads(line)
                    token = chunk.get("response", "")
                    if token:
                        yield token
                    if chunk.get("done"):
                        break
    def chat(
        self,
        messages: list[dict],
        *,
        max_tokens: int = 512,
        temperature: float = 0.7,
    ) -> GenerateResult:
        t0 = time.monotonic()
        payload: dict = {
            "model":    self._model,
            "messages": messages,
            "stream":   False,
            "options":  {"temperature": temperature, "num_predict": max_tokens},
        }
        with httpx.Client(timeout=180.0) as client:
            resp = client.post(f"{self._url}/api/chat", json=payload)
            resp.raise_for_status()
        data = resp.json()
        elapsed_ms = round((time.monotonic() - t0) * 1000)
        return GenerateResult(
            text=data.get("message", {}).get("content", ""),
            tokens_used=data.get("eval_count", 0),
            model=self._model,
        )
    # ── Async interface ───────────────────────────────────────────────────────
    async def generate_async(
        self,
        prompt: str,
        *,
        max_tokens: int = 512,
        temperature: float = 0.7,
        stop: list[str] | None = None,
    ) -> GenerateResult:
        t0 = time.monotonic()
        payload: dict = {
            "model":   self._model,
            "prompt":  prompt,
            "stream":  False,
            "options": {"temperature": temperature, "num_predict": max_tokens},
        }
        if stop:
            payload["options"]["stop"] = stop
        async with httpx.AsyncClient(timeout=180.0) as client:
            resp = await client.post(f"{self._url}/api/generate", json=payload)
            resp.raise_for_status()
        data = resp.json()
        elapsed_ms = round((time.monotonic() - t0) * 1000)
        return GenerateResult(
            text=data.get("response", ""),
            tokens_used=data.get("eval_count", 0),
            model=self._model,
        )
    async def generate_stream_async(
        self,
        prompt: str,
        *,
        max_tokens: int = 512,
        temperature: float = 0.7,
        stop: list[str] | None = None,
    ) -> AsyncIterator[str]:
        payload: dict = {
            "model":   self._model,
            "prompt":  prompt,
            "stream":  True,
            "options": {"temperature": temperature, "num_predict": max_tokens},
        }
        if stop:
            payload["options"]["stop"] = stop
        async with httpx.AsyncClient(timeout=180.0) as client:
            async with client.stream("POST", f"{self._url}/api/generate", json=payload) as resp:
                resp.raise_for_status()
                async for line in resp.aiter_lines():
                    if not line:
                        continue
                    chunk = _json.loads(line)
                    token = chunk.get("response", "")
                    if token:
                        yield token
                    if chunk.get("done"):
                        break
--- a/circuitforge_core/text/backends/vllm.py
+++ b/circuitforge_core/text/backends/vllm.py
@ -1,213 +0,0 @@
 # circuitforge_core/text/backends/vllm.py — vllm proxy backend for cf-text
 #
 # Routes inference requests to a running vllm instance via its OpenAI-compatible
 # HTTP API (/v1/chat/completions, /v1/completions).
 # cf-text itself holds no GPU memory; vllm manages the model and VRAM.
 #
 # Model path format: "vllm://<model-id>"  e.g. "vllm://Qwen/Qwen2.5-7B-Instruct"
 # The "vllm://" prefix is stripped; the remainder is the model_id sent to vllm.
 #
 # Environment:
 #   CF_TEXT_VLLM_URL   Base URL of the vllm server (default: http://localhost:8000)
 #
 # MIT licensed.
 from __future__ import annotations
 import json as _json
 import logging
 import os
 import time
 from typing import AsyncIterator, Iterator
 import httpx
 from circuitforge_core.text.backends.base import ChatMessage, GenerateResult
 logger = logging.getLogger(__name__)
 _DEFAULT_VLLM_URL = "http://localhost:8000"
 class VllmBackend:
    """
    cf-text backend that proxies inference to a local vllm instance.
    vllm exposes an OpenAI-compatible API (/v1/chat/completions).
    This backend holds no GPU memory — vllm owns the model and VRAM.
    vram_mb is reported as 0 so cf-orch does not double-count VRAM
    against the separate vllm service budget.
    """
    def __init__(self, model_path: str, *, vram_mb: int = 0) -> None:
        # Strip the "vllm://" prefix from catalog paths
        self._model = model_path.removeprefix("vllm://")
        self._url = os.environ.get("CF_TEXT_VLLM_URL", _DEFAULT_VLLM_URL).rstrip("/")
        self._vram_mb = vram_mb
        logger.info("VllmBackend: model=%r url=%r", self._model, self._url)
    # ── Protocol properties ───────────────────────────────────────────────────
    @property
    def model_name(self) -> str:
        return self._model
    @property
    def vram_mb(self) -> int:
        # vllm manages its own VRAM; cf-text holds nothing.
        return self._vram_mb
    # ── Internal helpers ──────────────────────────────────────────────────────
    def _chat_payload(
        self,
        messages: list[dict],
        *,
        max_tokens: int,
        temperature: float,
        stop: list[str] | None,
        stream: bool,
    ) -> dict:
        payload: dict = {
            "model":       self._model,
            "messages":    messages,
            "max_tokens":  max_tokens,
            "temperature": temperature,
            "stream":      stream,
        }
        if stop:
            payload["stop"] = stop
        return payload
    def _prompt_as_messages(self, prompt: str) -> list[dict]:
        return [{"role": "user", "content": prompt}]
    # ── Synchronous interface ─────────────────────────────────────────────────
    def generate(
        self,
        prompt: str,
        *,
        max_tokens: int = 512,
        temperature: float = 0.7,
        stop: list[str] | None = None,
    ) -> GenerateResult:
        t0 = time.monotonic()
        payload = self._chat_payload(
            self._prompt_as_messages(prompt),
            max_tokens=max_tokens, temperature=temperature, stop=stop, stream=False,
        )
        with httpx.Client(timeout=180.0) as client:
            resp = client.post(f"{self._url}/v1/chat/completions", json=payload)
            resp.raise_for_status()
        data = resp.json()
        return GenerateResult(
            text=data["choices"][0]["message"]["content"],
            tokens_used=data.get("usage", {}).get("completion_tokens", 0),
            model=self._model,
        )
    def generate_stream(
        self,
        prompt: str,
        *,
        max_tokens: int = 512,
        temperature: float = 0.7,
        stop: list[str] | None = None,
    ) -> Iterator[str]:
        payload = self._chat_payload(
            self._prompt_as_messages(prompt),
            max_tokens=max_tokens, temperature=temperature, stop=stop, stream=True,
        )
        with httpx.Client(timeout=180.0) as client:
            with client.stream("POST", f"{self._url}/v1/chat/completions", json=payload) as resp:
                resp.raise_for_status()
                for line in resp.iter_lines():
                    token = _parse_sse_token(line)
                    if token:
                        yield token
    def chat(
        self,
        messages: list[ChatMessage],
        *,
        max_tokens: int = 512,
        temperature: float = 0.7,
    ) -> GenerateResult:
        dicts = [m.to_dict() if hasattr(m, "to_dict") else m for m in messages]
        payload = self._chat_payload(
            dicts, max_tokens=max_tokens, temperature=temperature, stop=None, stream=False,
        )
        with httpx.Client(timeout=180.0) as client:
            resp = client.post(f"{self._url}/v1/chat/completions", json=payload)
            resp.raise_for_status()
        data = resp.json()
        return GenerateResult(
            text=data["choices"][0]["message"]["content"],
            tokens_used=data.get("usage", {}).get("completion_tokens", 0),
            model=self._model,
        )
    # ── Async interface ───────────────────────────────────────────────────────
    async def generate_async(
        self,
        prompt: str,
        *,
        max_tokens: int = 512,
        temperature: float = 0.7,
        stop: list[str] | None = None,
    ) -> GenerateResult:
        payload = self._chat_payload(
            self._prompt_as_messages(prompt),
            max_tokens=max_tokens, temperature=temperature, stop=stop, stream=False,
        )
        async with httpx.AsyncClient(timeout=180.0) as client:
            resp = await client.post(f"{self._url}/v1/chat/completions", json=payload)
            resp.raise_for_status()
        data = resp.json()
        return GenerateResult(
            text=data["choices"][0]["message"]["content"],
            tokens_used=data.get("usage", {}).get("completion_tokens", 0),
            model=self._model,
        )
    async def generate_stream_async(
        self,
        prompt: str,
        *,
        max_tokens: int = 512,
        temperature: float = 0.7,
        stop: list[str] | None = None,
    ) -> AsyncIterator[str]:
        payload = self._chat_payload(
            self._prompt_as_messages(prompt),
            max_tokens=max_tokens, temperature=temperature, stop=stop, stream=True,
        )
        async with httpx.AsyncClient(timeout=180.0) as client:
            async with client.stream("POST", f"{self._url}/v1/chat/completions", json=payload) as resp:
                resp.raise_for_status()
                async for line in resp.aiter_lines():
                    token = _parse_sse_token(line)
                    if token:
                        yield token
 # ── SSE parser (OpenAI/vllm format) ──────────────────────────────────────────
 def _parse_sse_token(line: str) -> str:
    """Extract content token from an OpenAI-format SSE line.
    Lines look like:  data: {"choices": [{"delta": {"content": "word"}}]}
    Terminal line:    data: [DONE]
    Returns the token string, or "" for empty/done/non-data lines.
    """
    if not line.startswith("data:"):
        return ""
    payload = line[5:].strip()
    if payload == "[DONE]":
        return ""
    try:
        chunk = _json.loads(payload)
        return chunk["choices"][0]["delta"].get("content", "") or ""
    except (KeyError, IndexError, _json.JSONDecodeError):
        return ""
--- a/circuitforge_core/tts/app.py
+++ b/circuitforge_core/tts/app.py
@ -29,7 +29,7 @@ _CONTENT_TYPES: dict[str, str] = {
 }
 app = FastAPI(title="cf-tts")
-_backend = None  # type: TTSBackend | None
+_backend: TTSBackend | None = None
@app.get("/health")
@ -96,6 +96,7 @@ if __name__ == "__main__":
    mock = args.mock or args.model == "mock"
    device = "cpu" if mock else "cuda"
    global _backend
    _backend = make_tts_backend(args.model, mock=mock, device=device)
    print(f"cf-tts backend ready: {_backend.model_name} ({_backend.vram_mb} MB)")
--- a/circuitforge_core/tts/backends/base.py
+++ b/circuitforge_core/tts/backends/base.py
@ -60,12 +60,7 @@ def _encode_audio(
    if format == "wav":
        torchaudio.save(buf, wav, sample_rate, format="wav")
    elif format == "ogg":
-        # libvorbis may not be available on all torchaudio builds; fall back to wav
+        torchaudio.save(buf, wav, sample_rate, format="ogg", encoding="vorbis")
        try:
            torchaudio.save(buf, wav, sample_rate, format="ogg", encoding="vorbis")
        except Exception:
            buf = io.BytesIO()
            torchaudio.save(buf, wav, sample_rate, format="wav")
    elif format == "mp3":
        # torchaudio MP3 encode requires ffmpeg backend; fall back to wav on failure
        try:
--- a/docs/developer/adding-module.md
+++ b/docs/developer/adding-module.md
@ -1,129 +0,0 @@
 # Adding a Module to cf-core
 This guide walks through extracting a pattern from a product into a shared cf-core module. The goal is to move battle-tested implementations here once they've stabilized in at least two products.
 ## When to add a module
 Add a module when:
 - The same pattern exists in two or more products with minor variations
 - The interface is stable enough that changing it would require coordinated updates across products
 - The code has no product-specific business logic baked in
 Do not add a module for:
 - One-off utilities that only one product needs
 - Anything still in active design flux
 - Product-specific configuration or policy decisions
 ## Module structure
 ```
 circuitforge_core/
 └── mymodule/
    ├── __init__.py        # Public API — what products import
    ├── base.py            # Core implementation
    └── backends/          # Optional: pluggable backends
        ├── __init__.py
        ├── local.py
        └── cloud.py
 ```
 Keep the public API in `__init__.py` clean. Products should import from `circuitforge_core.mymodule`, not from internal submodules.
 ## Step 1: Define the interface
 Write the public interface first — the classes and functions products will call. Get this right before implementing, because changing it requires updating every product shim.
 ```python
 # circuitforge_core/mymodule/__init__.py
 from .base import MyThing, get_my_thing
 __all__ = ["MyThing", "get_my_thing"]
 ```
 ## Step 2: Implement with a stub
 Start with a minimal working implementation. Stub out anything uncertain:
 ```python
 # circuitforge_core/mymodule/base.py
 class MyThing:
    def __init__(self, config: dict):
        self._config = config
    def do_thing(self, input: str) -> str:
        raise NotImplementedError("Override in product or backend")
 ```
 ## Step 3: Write tests
 Tests go in `circuitforge_core/tests/test_mymodule.py`. Use `pytest`. The cf env has pytest installed.
 ```bash
 conda run -n cf python -m pytest tests/test_mymodule.py -v
 ```
 Cover:
 - Happy path with realistic input
 - Missing config / bad input (fail loudly, not silently)
 - Cloud vs local mode if applicable
 ## Step 4: Update `pyproject.toml`
 Add any new dependencies:
 ```toml
 [project.optional-dependencies]
 mymodule = ["some-dep>=1.0"]
 ```
 Use optional dependency groups so products that don't use the module don't pay the install cost.
 ## Step 5: Write the docs page
 Add `docs/modules/mymodule.md` following the pattern of the existing module docs. Include:
 - Import path
 - Why this module exists / design rationale
 - Full public API with examples
 - Any gotchas or non-obvious behavior
 - Status (Stable / Stub)
 Update `docs/modules/index.md` and `mkdocs.yml` to include the new page.
 ## Step 6: Update products
 In each product that uses the pattern:
 1. Add a shim if the product needs to override behavior
 2. Replace the inline implementation with imports from cf-core
 3. Run the product's tests
 The shim pattern:
 ```python
 # myproduct/app/mything.py
 from circuitforge_core.mymodule import get_my_thing as _base_get_my_thing
 from .config import get_settings
 def get_my_thing():
    settings = get_settings()
    return _base_get_my_thing(config=settings.mything_config)
 ```
 ## Licensing boundary
 The module's license depends on what it does:
 | Code | License |
 |------|---------|
 | Discovery, pipeline, data access | **MIT** |
 | LLM inference, AI features, fine-tuned model access | **BSL 1.1** |
 | Anything that would give SaaS competitors a free AI product | **BSL 1.1** |
 When in doubt, BSL 1.1. See the [licensing guide](licensing.md) for the full decision tree.
 ## Versioning
 cf-core uses semantic versioning. Adding a new module with a stable API is a **minor** version bump. Breaking an existing interface is a **major** bump and requires coordinated updates to all products.
 Update `pyproject.toml` and `CHANGELOG.md` before merging.
--- a/docs/developer/editable-install.md
+++ b/docs/developer/editable-install.md
@ -1,74 +0,0 @@
 # Editable Install Pattern
 CircuitForge products depend on cf-core via `pip install -e` (editable install) from a local clone, not from a package registry. This is a deliberate architectural choice that makes the development loop fast and the dependency relationship explicit.
 ## How it works
 `pip install -e /path/to/circuitforge-core` installs the package in "editable" mode: instead of copying files into `site-packages`, pip creates a `.pth` file pointing at the source directory. Python imports resolve directly from the cloned repo.
 This means:
 - Changes to cf-core source take effect immediately in all products — no reinstall needed
 - Restarting the product process (or Docker container) is sufficient to pick up changes
 - `git pull` in the cf-core repo automatically affects all products using it
 ## Docker considerations
 In Docker, editable install requires the cf-core source to be present inside the container at build time. Two patterns:
 **Pattern A: COPY at build time (production)**
 ```dockerfile
 COPY circuitforge-core/ /circuitforge-core/
 RUN pip install -e /circuitforge-core
 ```
 The build context must include the cf-core directory. `compose.yml` sets the build context to the parent directory:
 ```yaml
 services:
  api:
    build:
      context: ..          # parent of both product and cf-core
      dockerfile: myproduct/Dockerfile
 ```
 **Pattern B: Bind-mount for dev**
 ```yaml
 # compose.override.yml (dev only, gitignored)
 services:
  api:
    volumes:
      - ../circuitforge-core:/circuitforge-core:ro
 ```
 This lets you edit cf-core and restart the container without rebuilding the image.
 ## Python `.pyc` cache gotcha
 Python caches compiled bytecode in `__pycache__/` directories and `.pyc` files. When cf-core source is updated but the product hasn't been restarted, the old `.pyc` files can serve stale code even with the bind-mount in place.
 Fix: delete `.pyc` files and restart:
 ```bash
 find /path/to/circuitforge-core -name "*.pyc" -delete
 docker compose restart api
 ```
 This is especially common when fixing an import error — the old `ImportError` may persist even after the fix if the bytecode cache isn't cleared.
 ## When to reinstall
 A full `pip install -e .` reinstall is needed when:
 - `pyproject.toml` changes (new dependencies, entry points, package metadata)
 - A new subpackage directory is added (pip needs to discover it)
 - The `.egg-info` directory gets corrupted (delete it and reinstall)
 ```bash
 # Reinstall in the cf env
 conda run -n cf pip install -e /Library/Development/CircuitForge/circuitforge-core
 ```
 ## Future: Forgejo Packages
 When cf-core reaches a stable enough interface (currently targeting "third product shipped"), it will be published to the Circuit-Forge Forgejo private PyPI registry. Products will then depend on it via version pin, and the editable install will be for development only. The shim pattern is designed to make this transition smooth — product code stays the same, only the import source changes.
--- a/docs/developer/licensing.md
+++ b/docs/developer/licensing.md
@ -1,51 +0,0 @@
 # BSL vs MIT — Licensing Boundaries
 circuitforge-core contains both MIT and BSL 1.1 licensed code. Understanding the boundary matters for contributors and for deciding where new modules belong.
 ## The rule
 | Code category | License |
 |---------------|---------|
 | Discovery, ingestion, data pipeline | **MIT** |
 | LLM inference, AI generation, fine-tuned model access | **BSL 1.1** |
 | UI scaffolding, process management | **MIT** |
 | Tier gates, license validation | **BSL 1.1** |
 | Database, storage, configuration | **MIT** |
 **Heuristic:** If a competitor could use the module to build a commercial AI product without building the hard parts themselves, it's BSL 1.1. If it's plumbing that any software project might need, it's MIT.
 ## BSL 1.1 in practice
 BSL 1.1 means:
 - Free for personal non-commercial self-hosting
 - Free for internal business use (using the software, not selling it)
 - Commercial SaaS re-hosting requires a paid license from Circuit Forge LLC
 - Converts to MIT after 4 years
 "Commercial SaaS re-hosting" means: taking cf-core's AI features and building a competing product that charges users for them without a license. It does NOT restrict:
 - Running cf-core on your own server for your own use
 - Modifying cf-core for personal use
 - Contributing back to cf-core
 ## What this means for contributors
 If you're adding a module:
 - Add MIT code to the `MIT` section of `pyproject.toml`
 - Add BSL 1.1 code to the `BSL` section
 - Don't mix MIT and BSL code in the same module
 - If uncertain, ask before submitting — wrong license on a module causes legal headaches
 ## The `Co-Authored-By` policy
 Do NOT add `Co-Authored-By: Claude` (or any AI attribution trailer) to commits in CircuitForge repos. This is required for BSL 1.1 commercial viability — AI-assisted code with attribution claims can complicate licensing in ways that affect the ability to enforce BSL terms.
 This is not about hiding AI use. It's a legal precaution for a company that depends on BSL enforcement to fund its mission.
 ## BSL conversion timeline
 | Module | BSL since | MIT date |
 |--------|-----------|----------|
 | `tiers` | 2025-01-01 | 2029-01-01 |
 | `llm` | 2025-01-01 | 2029-01-01 |
 The conversion dates are tracked in `LICENSE` and will be updated as modules are added.
--- a/docs/getting-started/installation.md
+++ b/docs/getting-started/installation.md
@ -1,58 +0,0 @@
 # Installation
 circuitforge-core is distributed as an editable install from a local clone. It is not yet on PyPI.
 ## Prerequisites
 - Python 3.11+
 - A conda environment (CircuitForge uses `cf` by convention; older envs may be named `job-seeker`)
 - The `circuitforge-core` repo cloned alongside your product repo
 ## Typical layout
 ```
 /Library/Development/CircuitForge/
 ├── circuitforge-core/   ← this repo
 ├── kiwi/
 ├── peregrine/
 ├── snipe/
 └── ...
 ```
 ## Install
 ```bash
 # From inside a product repo, assuming circuitforge-core is a sibling
 conda run -n cf pip install -e ../circuitforge-core
 # Or activate first, then install
 conda activate cf
 pip install -e ../circuitforge-core
 ```
 The editable install means changes to circuitforge-core source are reflected immediately in all products without reinstalling. Only restart the product's process after changes (or Docker container if running in Docker).
 ## Verify
 ```python
 import circuitforge_core
 print(circuitforge_core.__version__)  # 0.9.0
 ```
 ## Inside Docker
 Product Dockerfiles copy or mount both the product source and cf-core:
 ```dockerfile
 # Copy cf-core alongside product source
 COPY --from=build /circuitforge-core /circuitforge-core
 RUN pip install -e /circuitforge-core
 ```
 The `compose.yml` for each product typically bind-mounts both directories in dev mode so live edits propagate without rebuilding the image.
 ## Upgrading
 cf-core follows semantic versioning. Since it's an editable install, `git pull` in the cf-core repo is sufficient — no reinstall needed for pure Python changes.
 For schema changes (new migrations) or new module dependencies, check the CHANGELOG for any additional steps.
--- a/docs/getting-started/using-in-product.md
+++ b/docs/getting-started/using-in-product.md
@ -1,89 +0,0 @@
 # Using cf-core in a Product
 After [installation](installation.md), import modules directly from the package. Each module is independent — import only what you need.
 ## Minimal wiring example
 ```python
 from circuitforge_core.config import Settings
 from circuitforge_core.db import get_db
 from circuitforge_core.tiers import require_tier
 from circuitforge_core.llm import LLMRouter
 settings = Settings()
 db = get_db(settings.db_path)
 router = LLMRouter(settings)
 ```
 ## Module shim pattern
 Products that need to extend or override cf-core behavior use a shim module. This is the recommended pattern — it keeps product-specific config resolution separate from the shared implementation.
 ```python
 # myproduct/app/llm_router.py  — shim
 from circuitforge_core.llm.router import LLMRouter as _BaseLLMRouter
 from .config import get_settings
 class LLMRouter(_BaseLLMRouter):
    def __init__(self):
        settings = get_settings()
        super().__init__(
            config_path=settings.llm_config_path,
            cloud_mode=settings.cloud_mode,
        )
 ```
 Product code then imports from the shim, never directly from cf-core. This means tri-level config resolution (env → config file → defaults) and cloud mode wiring stay in one place.
 !!! warning "Never import cf-core modules directly in scripts"
    Always import from the product shim. Bypassing the shim silently breaks cloud mode and config resolution. See [Peregrine's llm_router shim](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine) for the reference implementation.
 ## Per-user isolation (cloud mode)
 When `CLOUD_MODE=true`, products use per-user SQLite trees rather than a shared database. cf-core's `db` module provides the factory; products implement their own `cloud_session.py` to resolve the per-user path from the `X-CF-Session` JWT header.
 ```python
 # In a FastAPI endpoint with cloud mode
 from .cloud_session import get_user_db_path
 from circuitforge_core.db import get_db
@router.get("/items")
 async def list_items(request: Request):
    db_path = get_user_db_path(request)
    db = get_db(db_path)
    ...
 ```
 ## Tier gates
 Apply the `@require_tier` decorator to any endpoint or function that should be restricted:
 ```python
 from circuitforge_core.tiers import require_tier
@router.post("/suggest")
@require_tier("paid")
 async def suggest_recipe(request: Request):
    ...
 ```
 The decorator reads the user's tier from the request context (via Heimdall JWT validation) and raises `403` if the tier is insufficient.
 ## Background tasks with VRAM awareness
 Use `TaskScheduler` for any LLM inference that should be queued rather than run inline:
 ```python
 from circuitforge_core.tasks import TaskScheduler
 scheduler = TaskScheduler(service_name="myproduct", coordinator_url=settings.coordinator_url)
 async def enqueue_generation(item_id: str):
    await scheduler.submit(
        task_type="generate",
        payload={"item_id": item_id},
        vram_gb=4.0,
    )
 ```
 See the [tasks module reference](../modules/tasks.md) for the full API.
--- a/docs/index.md
+++ b/docs/index.md
@ -1,68 +0,0 @@
 # circuitforge-core
 Shared scaffold for all CircuitForge products. Every product in the menagerie depends on it via editable install.
 ```bash
 pip install -e ../circuitforge-core
 # or inside conda:
 conda run -n cf pip install -e ../circuitforge-core
 ```
 ---
 ## What it provides
 circuitforge-core gives every product the same foundation so patterns proven in one product propagate to all others automatically. The 17 modules cover the full stack from database access to LLM routing to tier gates.
 ```
 circuitforge_core/
 ├── db/          SQLite factory + migration runner
 ├── llm/         LLM router with fallback chain
 ├── tiers/       Tier gates — free / paid / premium / ultra
 ├── config/      Env-driven settings + .env loader
 ├── hardware/    GPU/CPU detection + VRAM profile generation
 ├── documents/   PDF, DOCX, image OCR → StructuredDocument
 ├── affiliates/  URL wrapping with opt-out + BYOK user IDs
 ├── preferences/ Per-user YAML preference store (dot-path API)
 ├── tasks/       VRAM-aware background task scheduler
 ├── manage/      Cross-platform process manager (Docker + native)
 ├── resources/   VRAM allocation + eviction engine
 ├── text/        Text processing utilities
 ├── stt/         Speech-to-text router (stub)
 ├── tts/         Text-to-speech router (stub)
 ├── pipeline/    Staging queue base — StagingDB (stub)
 ├── vision/      Vision router base class (stub)
 └── wizard/      First-run wizard base class (stub)
 ```
 ---
 ## Module status
 | Module | Status | Purpose |
 |--------|--------|---------|
 | `db` | Stable | SQLite connection factory, migration runner |
 | `llm` | Stable | LLM fallback router (Ollama, vLLM, Anthropic, OpenAI-compatible) |
 | `tiers` | Stable | `@require_tier()` decorator, BYOK unlock logic |
 | `config` | Stable | Env-driven settings, `.env` loader |
 | `hardware` | Stable | GPU enumeration, VRAM tier profiling |
 | `documents` | Stable | PDF/DOCX/image ingestion → `StructuredDocument` |
 | `affiliates` | Stable | `wrap_url()` with opt-out and BYOK user IDs |
 | `preferences` | Stable | Dot-path `get()`/`set()` over local YAML; pluggable backend |
 | `tasks` | Stable | `TaskScheduler` — VRAM-aware slot management |
 | `manage` | Stable | `manage.sh` scaffolding for Docker and native processes |
 | `resources` | Stable | VRAM allocation, eviction engine, GPU profile registry |
 | `text` | Stable | Text normalization, truncation, chunking utilities |
 | `stt` | Stub | Speech-to-text router (planned: whisper.cpp / faster-whisper) |
 | `tts` | Stub | Text-to-speech router (planned: piper / espeak) |
 | `pipeline` | Stub | `StagingDB` base — products provide concrete schema |
 | `vision` | Stub | Vision router base class (moondream2 / Claude dispatch) |
 | `wizard` | Stub | `BaseWizard` — products subclass for first-run setup |
 ---
 ## Version
 **v0.9.0** — MIT licensed for discovery/pipeline layers, BSL 1.1 for AI features.
 See the [developer guide](developer/adding-module.md) to add a new module.
--- a/docs/modules/affiliates.md
+++ b/docs/modules/affiliates.md
@ -1,60 +0,0 @@
 # affiliates
 Affiliate URL wrapping with user opt-out and BYOK user IDs. Shared across all CircuitForge products that surface external purchase or listing links.
 ```python
 from circuitforge_core.affiliates import wrap_url
 ```
 ## Design principle
 Affiliate links are disclosed to users and opt-out is always one click away. CF earns a small commission when users buy through wrapped links; this is the primary monetization path for free-tier products. The implementation is transparent: no dark patterns, no hidden redirects.
 ## `wrap_url(url, user_id=None, product=None) -> str`
 Wraps a URL with the configured affiliate parameters. Returns the original URL unchanged if:
 - Affiliate links are disabled globally (`CF_AFFILIATES_ENABLED=false`)
 - The user has opted out (`preferences.get("affiliates.opted_out")`)
 - The domain is not in the supported affiliate network list
 ```python
 from circuitforge_core.affiliates import wrap_url
 wrapped = wrap_url(
    "https://www.ebay.com/itm/123456",
    user_id="user_abc123",
    product="snipe",
 )
 # → "https://www.ebay.com/itm/123456?mkrid=711-53200-19255-0&campid=CF_SNIPE_abc123&..."
 ```
 ## User opt-out
 ```python
 from circuitforge_core.preferences import get_prefs
 prefs = get_prefs(user_id)
 prefs.set("affiliates.opted_out", True)
 ```
 When `opted_out` is `True`, `wrap_url()` returns the bare URL. The UI should surface this setting prominently — never bury it.
 ## BYOK user IDs
 BYOK users (those with their own license key or API key) get a unique affiliate sub-ID so their contributions are tracked separately. This is handled automatically when a `user_id` is passed.
 ## Supported networks
 | Product | Network | Notes |
 |---------|---------|-------|
 | Snipe | eBay Partner Network | `campid` encodes product + user |
 | Kiwi | Amazon Associates (planned) | For pantry staples / equipment |
 | Waxwing | Various garden suppliers (planned) | |
 ## Environment variables
 ```bash
 CF_AFFILIATES_ENABLED=true          # global kill switch
 CF_EBAY_CAMPAIGN_ID=your_campaign   # eBay Partner Network campaign ID
 CF_AMAZON_ASSOCIATE_TAG=your_tag    # Amazon Associates tag
 ```
--- a/docs/modules/config.md
+++ b/docs/modules/config.md
@ -1,80 +0,0 @@
 # config
 Env-driven settings with `.env` file loading. Provides a base `Settings` class that products subclass to add their own fields.
 ```python
 from circuitforge_core.config import Settings
 ```
 ## Design
 Configuration follows a strict priority order: **environment variables > `.env` file > defaults**. This means Docker compose `environment:` overrides always win, which is essential for cloud vs local deployment switching without image rebuilds.
 ## Base Settings
 ```python
 class Settings(BaseSettings):
    # Database
    db_path: str = "data/app.db"
    # LLM
    llm_config_path: str = "config/llm.yaml"
    # Tier system
    license_key: str | None = None
    cloud_mode: bool = False
    # Cloud
    cloud_data_root: str = "/devl/app-cloud-data"
    cloud_auth_bypass_ips: list[str] = []
    coordinator_url: str = "http://10.1.10.71:7700"
    class Config:
        env_file = ".env"
        env_file_encoding = "utf-8"
 ```
 ## Extending in a product
 ```python
 # myproduct/app/core/config.py
 from circuitforge_core.config import Settings as _BaseSettings
 class Settings(_BaseSettings):
    # Product-specific settings
    max_pantry_items: int = 500
    barcode_timeout_ms: int = 5000
    recipe_corpus_path: str = "data/recipes.db"
    class Config(_BaseSettings.Config):
        env_prefix = "MYPRODUCT_"
 ```
 ## `.env` file
 Each product ships a `.env.example` (committed) and a `.env` (gitignored). The `.env` file is loaded automatically by the `Settings` class.
 ```bash
 # .env.example
 DB_PATH=data/app.db
 CLOUD_MODE=false
 LICENSE_KEY=
 ```
 !!! tip "Never commit `.env`"
    `.env` files contain secrets and environment-specific paths. Always commit `.env.example` instead.
 ## Singleton pattern
 Products typically expose a cached `get_settings()` function:
 ```python
 from functools import lru_cache
 from .config import Settings
@lru_cache(maxsize=1)
 def get_settings() -> Settings:
    return Settings()
 ```
 This ensures the `.env` file is only read once at startup, and all modules share the same settings instance.
--- a/docs/modules/db.md
+++ b/docs/modules/db.md
@ -1,56 +0,0 @@
 # db
 SQLite connection factory and migration runner. Every CircuitForge product uses this for all persistent storage.
 ```python
 from circuitforge_core.db import get_db, run_migrations
 ```
 ## Why SQLite
 SQLite is local-first by nature — no server process, no network dependency, trivially backed up, and fast enough for single-user workloads. circuitforge-core's `db` module adds migration management and connection pooling on top.
 ## API
 ### `get_db(path: str | Path) -> Connection`
 Returns a SQLite connection to the database at `path`. Creates the file if it doesn't exist. Enables WAL mode, foreign keys, and sets a sensible busy timeout by default.
 ```python
 db = get_db("/devl/kiwi-data/kiwi.db")
 ```
 In cloud mode, the path comes from the per-user session resolver — never hardcode `DB_PATH` directly in endpoints. Use `_request_db.get() or DB_PATH` or a product shim.
 ### `run_migrations(db: Connection, migrations_dir: str | Path)`
 Discovers and applies all `.sql` files in `migrations_dir` that haven't yet been applied, in filename order. Migration state is tracked in a `_migrations` table created on first run.
 ```python
 run_migrations(db, "app/db/migrations/")
 ```
 **Migration file naming:** `001_initial.sql`, `002_add_column.sql`, etc. Always prefix with zero-padded integers. Never renumber or delete applied migrations.
 ### `RETURNING *` gotcha
 SQLite added `RETURNING *` in version 3.35 (2021). When using it:
 ```python
 cursor = db.execute("INSERT INTO items (...) VALUES (?) RETURNING *", (...,))
 row = cursor.fetchone()   # fetch BEFORE commit — row disappears after commit
 db.commit()
 ```
 This is a known SQLite behavior that differs from PostgreSQL. cf-core does not paper over it; fetch before committing.
 ## Migration conventions
 - Files go in `app/db/migrations/` inside each product repo
 - One concern per file — don't combine unrelated schema changes
 - Never use `ALTER TABLE` to rename columns (not supported in SQLite < 3.25); add a new column and migrate data instead
 - `IF NOT EXISTS` and `IF EXISTS` guards make migrations idempotent
 ## Cloud mode
 In cloud mode, each user gets their own SQLite file under `CLOUD_DATA_ROOT`. The `db` module is unaware of this; the product's `cloud_session.py` resolves the per-user path before calling `get_db()`.
--- a/docs/modules/documents.md
+++ b/docs/modules/documents.md
@ -1,63 +0,0 @@
 # documents
 Document ingestion pipeline. Converts PDF, DOCX, ODT, and images into a normalized `StructuredDocument` for downstream processing.
 ```python
 from circuitforge_core.documents import ingest, StructuredDocument
 ```
 ## Supported formats
 | Format | Method | Notes |
 |--------|--------|-------|
 | PDF | `pdfplumber` | Two-column detection via gutter analysis |
 | DOCX | `python-docx` | Paragraph and table extraction |
 | ODT | stdlib `zipfile` + `ElementTree` | No external deps required |
 | PNG/JPG | cf-docuvision fast-path, local fallback | OCR via vision router |
 ## `ingest(path: str | Path) -> StructuredDocument`
 Main entry point. Detects format by file extension and routes to the appropriate parser.
 ```python
 doc = ingest("/tmp/invoice.pdf")
 print(doc.text)       # full extracted text
 print(doc.pages)      # list of per-page content
 print(doc.metadata)   # title, author, creation date if available
 ```
 ## StructuredDocument
 ```python
@dataclass
 class StructuredDocument:
    text: str                        # full plain text
    pages: list[str]                 # per-page text (PDFs)
    sections: dict[str, str]         # named sections if detected
    metadata: dict[str, Any]         # format-specific metadata
    source_path: str
    format: str                      # "pdf" | "docx" | "odt" | "image"
 ```
 ## PDF specifics
 Two-column PDFs (common in resumes and academic papers) are handled by `_find_column_split()`, which detects the gutter via word x-positions and extracts left and right columns separately before merging.
 CID glyph references (`(cid:NNN)`) from ATS-reembedded fonts are stripped automatically. Common bullet CIDs (127, 149, 183) are mapped to `•`.
 ## OCR path
 Image inputs go through the vision router (see the [vision module](vision.md)). In practice this means:
 1. cf-docuvision fast-path (if available on the cf-orch coordinator)
 2. Local moondream2 fallback
 OCR results are treated as unstructured text — no section detection is attempted.
 ## ATS gotcha
 Some ATS-exported PDFs embed fonts in ways that cause `pdfplumber` to extract garbled text. If `doc.text` looks corrupted (common with Oracle Taleo exports), try the image fallback:
 ```python
 doc = ingest(path, force_ocr=True)
 ```
--- a/docs/modules/hardware.md
+++ b/docs/modules/hardware.md
@ -1,51 +0,0 @@
 # hardware
 GPU enumeration and VRAM-tier profile generation. Used by `manage.sh` at startup to recommend a Docker Compose profile and by the cf-orch coordinator for resource allocation.
 ```python
 from circuitforge_core.hardware import get_gpus, recommend_profile, HardwareProfile
 ```
 ## GPU detection
 `get_gpus()` returns a list of detected GPUs with their VRAM capacity. Detection strategy:
 1. Try `nvidia-smi` (Linux/Windows NVIDIA)
 2. Fall back to `system_profiler SPDisplaysDataType` on Darwin when `hw.optional.arm64=1` (Apple Silicon)
 3. Return CPU-only profile if neither succeeds
 ```python
 gpus = get_gpus()
 # [{"name": "RTX 4090", "vram_gb": 24.0, "type": "nvidia"},
 #  {"name": "Apple M2 Max", "vram_gb": 32.0, "type": "apple_silicon"}]
 ```
 ## Compose profile recommendation
 ```python
 profile = recommend_profile(gpus)
 # "single-gpu" | "dual-gpu" | "cpu" | "remote"
 ```
 Profile selection rules:
 - `single-gpu`: one NVIDIA GPU with >= 8GB VRAM
 - `dual-gpu`: two or more NVIDIA GPUs
 - `cpu`: no NVIDIA GPU (Apple Silicon uses `cpu` since Docker on Mac has no Metal passthrough)
 - `remote`: explicitly requested or when local inference would exceed available VRAM
 !!! note "Apple Silicon"
    Apple Silicon Macs should run Ollama natively (outside Docker) for Metal-accelerated inference. Docker on macOS runs in a Linux VM with no Metal passthrough. `preflight.py` in each product detects native Ollama on :11434 and adopts it automatically.
 ## VRAM tiers
 | VRAM | Models that fit |
 |------|----------------|
 | < 4 GB | Quantized 1B–3B models (Phi-3 mini, Llama 3.2 3B Q4) |
 | 4–8 GB | 7B–8B models Q4 (Llama 3.1 8B, Mistral 7B) |
 | 8–16 GB | 13B–14B models Q4, 7B models in full precision |
 | 16–24 GB | 30B models Q4, 13B full precision |
 | 24 GB+ | 70B models Q4 |
 ## HardwareProfile
 The `HardwareProfile` dataclass is written to `compose.override.yml` by `preflight.py` at product startup, making GPU capabilities available to Docker Compose without hardcoding.
--- a/docs/modules/index.md
+++ b/docs/modules/index.md
@ -1,23 +0,0 @@
 # Module Reference
 All circuitforge-core modules live under the `circuitforge_core` package. Each is independently importable.
 | Module | Import | Status | One-line summary |
 |--------|--------|--------|-----------------|
 | [db](db.md) | `circuitforge_core.db` | Stable | SQLite connection factory + migration runner |
 | [llm](llm.md) | `circuitforge_core.llm` | Stable | LLM router with fallback chain |
 | [tiers](tiers.md) | `circuitforge_core.tiers` | Stable | `@require_tier()` decorator, BYOK unlock |
 | [config](config.md) | `circuitforge_core.config` | Stable | Env-driven settings, `.env` loader |
 | [hardware](hardware.md) | `circuitforge_core.hardware` | Stable | GPU/CPU detection, VRAM profile generation |
 | [documents](documents.md) | `circuitforge_core.documents` | Stable | Document ingestion → `StructuredDocument` |
 | [affiliates](affiliates.md) | `circuitforge_core.affiliates` | Stable | `wrap_url()` with opt-out + BYOK user IDs |
 | [preferences](preferences.md) | `circuitforge_core.preferences` | Stable | Dot-path preference store over local YAML |
 | [tasks](tasks.md) | `circuitforge_core.tasks` | Stable | VRAM-aware background task scheduler |
 | [manage](manage.md) | `circuitforge_core.manage` | Stable | `manage.sh` scaffolding, Docker + native |
 | [resources](resources.md) | `circuitforge_core.resources` | Stable | VRAM allocation + eviction engine |
 | [text](text.md) | `circuitforge_core.text` | Stable | Text normalization, chunking utilities |
 | [stt](stt.md) | `circuitforge_core.stt` | Stub | Speech-to-text router |
 | [tts](tts.md) | `circuitforge_core.tts` | Stub | Text-to-speech router |
 | [pipeline](pipeline.md) | `circuitforge_core.pipeline` | Stub | `StagingDB` base class |
 | [vision](vision.md) | `circuitforge_core.vision` | Stub | Vision router base class |
 | [wizard](wizard.md) | `circuitforge_core.wizard` | Stub | First-run wizard base class |
--- a/docs/modules/llm.md
+++ b/docs/modules/llm.md
@ -1,88 +0,0 @@
 # llm
 LLM router with a configurable fallback chain. Abstracts over Ollama, vLLM, Anthropic, and any OpenAI-compatible backend. Products never talk to a specific LLM backend directly.
 ```python
 from circuitforge_core.llm import LLMRouter
 ```
 ## Design principle
 The router implements "local inference first." Cloud backends sit at the end of the fallback chain. A product configured with only Ollama will never silently fall through to a paid API.
 ## Configuration
 The router reads `config/llm.yaml` from the product's working directory (or the path passed to the constructor). Each product maintains its own `llm.yaml`; cf-core provides the router, not the config.
 ```yaml
 # config/llm.yaml example
 fallback_order:
  - ollama
  - vllm
  - anthropic
 ollama:
  enabled: true
  base_url: http://localhost:11434
  model: llama3.2:3b
 vllm:
  enabled: false
  base_url: http://localhost:8000
 anthropic:
  enabled: false
  api_key_env: ANTHROPIC_API_KEY
 ```
 ## API
 ### `LLMRouter(config_path=None, cloud_mode=False)`
 Instantiate the router. In most products, instantiation happens inside a shim that injects product-specific config resolution.
 ### `router.complete(prompt, system=None, images=None, fallback_order=None) -> str`
 Send a completion request. Tries backends in order; falls through on error or unavailability.
 ```python
 router = LLMRouter()
 response = router.complete(
    prompt="Summarize this recipe in one sentence.",
    system="You are a cooking assistant.",
 )
 ```
 Pass `images: list[str]` (base64-encoded) for vision requests — non-vision backends are automatically skipped when images are present.
 Pass `fallback_order=["vllm", "anthropic"]` to override the config chain for a specific call (useful for task-specific routing).
 ### `router.stream(prompt, system=None) -> Iterator[str]`
 Streaming variant. Yields token chunks as they arrive. Not all backends support streaming; the router logs a warning and falls back to a non-streaming backend if needed.
 ## Shim requirement
 !!! warning "Always use the product shim"
    Scripts and endpoints must import `LLMRouter` from the product shim (`scripts/llm_router.py` or `app/llm_router.py`), never directly from `circuitforge_core.llm.router`. The shim handles tri-level config resolution (env vars override config file overrides defaults) and cloud mode wiring. Bypassing it breaks cloud deployments silently.
 ## Backends
 | Backend | Type | Notes |
 |---------|------|-------|
 | `ollama` | Local | Preferred default; model names from `config/llm.yaml` |
 | `vllm` | Local GPU | For high-throughput or large models |
 | `anthropic` | Cloud | Requires `ANTHROPIC_API_KEY` env var |
 | `openai` | Cloud | Any OpenAI-compatible endpoint |
 | `claude_code` | Local wrapper | claude-bridge OpenAI-compatible wrapper on :3009 |
 ## Vision routing
 When images are included in a `complete()` call, the router checks each backend's vision capability before trying it. Configure vision priority separately:
 ```yaml
 vision_fallback_order:
  - vision_service   # local moondream2 via FastAPI on :8002
  - claude_code
  - anthropic
 ```
--- a/docs/modules/manage.md
+++ b/docs/modules/manage.md
@ -1,67 +0,0 @@
 # manage
 `manage.sh` scaffolding for cross-platform product process management. Every CircuitForge product ships a `manage.sh` generated from this module.
 ```python
 from circuitforge_core.manage import generate_manage_sh, ProcessManager
 ```
 ## Purpose
 `manage.sh` is the single entry point for starting, stopping, restarting, and checking the status of a product. It abstracts over Docker Compose (production) and native Python processes (development without Docker).
 ## Commands
 Every product's `manage.sh` supports:
 ```bash
 bash manage.sh start          # Start all services
 bash manage.sh stop           # Stop all services
 bash manage.sh restart        # Stop then start
 bash manage.sh status         # Print running state
 bash manage.sh logs           # Tail logs
 bash manage.sh open           # Open the product UI in a browser
 bash manage.sh update         # Pull latest and restart
 ```
 Products add their own subcommands by extending the base script.
 ## Docker mode (production)
 In Docker mode, `manage.sh` delegates to `docker compose`. The script auto-detects whether Docker is available and falls back to native mode if not.
 ```bash
 # manage.sh internals (Docker mode)
 docker compose -f compose.yml up -d
 docker compose -f compose.yml logs -f
 ```
 For cloud deployments, products have a `compose.cloud.yml` that's overlaid:
 ```bash
 docker compose -f compose.yml -f compose.cloud.yml up -d
 ```
 ## Preflight
 `manage.sh start` calls `preflight.py` before launching containers. Preflight:
 1. Enumerates GPUs and writes a Docker Compose profile recommendation
 2. Checks for port conflicts and auto-increments if needed
 3. Detects external services (Ollama, vLLM, SearXNG) already running and adopts them via `compose.override.yml`
 4. Writes the final `.env` for the current session
 ## Extending manage.sh
 Products add subcommands by checking `$1` before the default case:
 ```bash
 case "$1" in
  backfill)
    conda run -n cf python scripts/backfill_keywords.py
    ;;
  *)
    # Default manage.sh handling
    ...
    ;;
 esac
 ```
--- a/docs/modules/pipeline.md
+++ b/docs/modules/pipeline.md
@ -1,60 +0,0 @@
 # pipeline
 Staging queue base class. **Stub — partially implemented.**
 ```python
 from circuitforge_core.pipeline import StagingDB  # base class
 ```
 ## Purpose
 `StagingDB` is the base class for the staging layer that sits between discovery/ingestion and the main product workflow. Products subclass it to add their concrete schema.
 The pattern:
 ```
 Source (scraper / scan / upload)
    → StagingDB (raw, unreviewed records)
    → Human review / approval
    → Main product DB (approved records)
 ```
 This is explicit in Peregrine (jobs go from `pending` → `approved` → `applied`) and analogous in Kiwi (receipts go from `uploaded` → `parsed` → `pantry`).
 ## Crystallization engine
 The pipeline module also contains the crystallization engine: a system for promoting AI-generated drafts through a series of structured human-approval checkpoints before the output "crystallizes" into a permanent record.
 Each stage in the pipeline has:
 - An **AI step** that produces a draft
 - A **human approval gate** that must be explicitly cleared
 - A **rollback path** back to the previous stage if rejected
 This is the architectural embodiment of the "LLMs as drafts, never decisions" principle.
 ## Current status
 `StagingDB` base class exists and is used by Peregrine's job pipeline. The crystallization engine design is documented in `circuitforge-plans/shared/superpowers/specs/` and is being extracted into this module as it stabilizes across products.
 ## `StagingDB` base class
 ```python
 class StagingDB:
    def __init__(self, db: Connection):
        self.db = db
    def stage(self, record: dict) -> str:
        """Insert a record into staging. Returns record ID."""
        raise NotImplementedError
    def approve(self, record_id: str, reviewer_id: str | None = None):
        """Promote a record past the approval gate."""
        raise NotImplementedError
    def reject(self, record_id: str, reason: str | None = None):
        """Mark a record as rejected."""
        raise NotImplementedError
    def pending(self) -> list[dict]:
        """Return all records awaiting review."""
        raise NotImplementedError
 ```
--- a/docs/modules/preferences.md
+++ b/docs/modules/preferences.md
@ -1,76 +0,0 @@
 # preferences
 Per-user preference store. Provides a dot-path `get()`/`set()` API over a local YAML file, with a pluggable backend for cloud deployments.
 ```python
 from circuitforge_core.preferences import get_prefs, UserPreferences
 ```
 ## API
 ### `get_prefs(user_id: str | None = None) -> UserPreferences`
 Returns the preference store for the given user. In local mode, `user_id` is ignored and a shared local file is used. In cloud mode, each user gets an isolated preference file under `CLOUD_DATA_ROOT`.
 ### `prefs.get(key: str, default=None) -> Any`
 Dot-path key access. Returns `default` if the key doesn't exist.
 ```python
 prefs = get_prefs()
 theme = prefs.get("ui.theme", "light")
 opted_out = prefs.get("affiliates.opted_out", False)
 ```
 ### `prefs.set(key: str, value: Any)`
 Sets a value at the dot path. Creates intermediate keys as needed. Persists immediately.
 ```python
 prefs.set("ui.theme", "dark")
 prefs.set("dietary.restrictions", ["vegan", "gluten-free"])
 ```
 ### `prefs.delete(key: str)`
 Removes a key. No-ops silently if the key doesn't exist.
 ## Accessibility preferences
 The `preferences` module includes first-class support for accessibility needs under the `accessibility.*` namespace. These are surfaced in product settings UIs and respected throughout the UI layer.
 ```yaml
 # Stored in user preferences
 accessibility:
  reduce_motion: true           # No animations or transitions
  high_contrast: false
  font_size: large              # small | medium | large | x-large
  screen_reader_hints: true     # Extra ARIA labels and descriptions
  plain_language: true          # Simplified text throughout UI
  extra_confirmation_steps: true # Additional "are you sure?" prompts
 ```
 Products should read these at render time and pass them to UI components. See the design philosophy for why ND/adaptive needs users are a primary audience.
 ## Pluggable backend
 The default backend is a local YAML file. Products can substitute a database backend for cloud deployments:
 ```python
 from circuitforge_core.preferences import get_prefs, SQLitePreferenceBackend
 prefs = get_prefs(user_id, backend=SQLitePreferenceBackend(db_path))
 ```
 ## Storage format
 ```yaml
 # ~/.local/share/circuitforge/myproduct/prefs.yaml (or per-user cloud path)
 ui:
  theme: dark
 affiliates:
  opted_out: false
 dietary:
  restrictions:
    - vegan
 ```
--- a/docs/modules/resources.md
+++ b/docs/modules/resources.md
@ -1,51 +0,0 @@
 # resources
 VRAM allocation engine and GPU profile registry. Works alongside the [tasks](tasks.md) module to prevent GPU OOM errors across concurrent LLM workloads.
 ```python
 from circuitforge_core.resources import ResourceCoordinator, VRAMSlot
 ```
 ## Architecture
 The resource coordinator runs as a sidecar alongside each product (via `compose.override.yml`) and registers with the cf-orch coordinator at `http://10.1.10.71:7700`. The coordinator maintains a global view of VRAM allocation across all products and all GPUs.
 ```
 Product A (kiwi)     ─┐
 Product B (peregrine) ─┤ → cf-orch coordinator → GPU 0 (24GB)
 Product C (snipe)    ─┘                        → GPU 1 (8GB)
 ```
 ## VRAM allocation
 `VRAMSlot` represents a lease on a fixed VRAM budget:
 ```python
 slot = VRAMSlot(service="kiwi", task_type="recipe_llm", vram_gb=4.0)
 async with coordinator.lease(slot):
    result = await run_inference(prompt)
 # VRAM released automatically on context exit
 ```
 If the requested VRAM is not available, the coordinator queues the request. Tasks are executed in FIFO order within each priority class.
 ## Eviction engine
 When a high-priority task needs VRAM that is held by a lower-priority task, the eviction engine signals the lower-priority task to checkpoint and pause. Eviction is cooperative, not forced — tasks must implement the `checkpoint()` callback.
 ## GPU profile registry
 The registry maps GPU models to capability profiles:
 ```python
 from circuitforge_core.resources import get_gpu_profile
 profile = get_gpu_profile("RTX 4090")
 # GpuProfile(vram_gb=24.0, fp16=True, int8=True, int4=True, max_batch=32)
 ```
 Profiles are used by the LLM router to determine which model quantizations a GPU can run.
 ## Local fallback
 When the cf-orch coordinator is not reachable (local dev without the sidecar), the resource coordinator falls back to a local-only mode: tasks run sequentially with no cross-product coordination. This is safe for development but should not be used in production if multiple products are running concurrently on the same GPU.
--- a/docs/modules/stt.md
+++ b/docs/modules/stt.md
@ -1,27 +0,0 @@
 # stt
 Speech-to-text router. **Stub — not yet implemented.**
 ```python
 from circuitforge_core.stt import STTRouter  # planned
 ```
 ## Planned design
 The STT module will provide a unified interface over local speech-to-text backends, following the same fallback-chain pattern as the [LLM router](llm.md).
 **Planned backends:**
 - `whisper_cpp` — local, CPU/GPU, various model sizes
 - `faster_whisper` — local, GPU-accelerated, CTranslate2 backend
 - `whisper_openai` — cloud, requires `OPENAI_API_KEY`
 **Planned use cases across the menagerie:**
 - Osprey: transcribe hold music + IVR menu audio for navigation
 - Linnet: real-time speech annotation (tone classification requires transcript)
 - Peregrine: interview practice sessions
 ## Current status
 The `circuitforge_core.stt` directory exists in-tree with a stub `__init__.py`. No working implementation yet. Planned for the milestone after Osprey reaches beta.
 If you need STT before this module ships, use `faster-whisper` directly in the product and plan to migrate to this interface once it stabilizes.
--- a/docs/modules/tasks.md
+++ b/docs/modules/tasks.md
@ -1,78 +0,0 @@
 # tasks
 VRAM-aware background task scheduler. Manages a queue of LLM inference jobs and coordinates VRAM allocation with the cf-orch coordinator before executing each task.
 ```python
 from circuitforge_core.tasks import TaskScheduler, get_scheduler, reset_scheduler
 ```
 ## Why VRAM-aware scheduling
 Running multiple LLM inference jobs concurrently on a single GPU causes OOM errors and corrupted outputs. The scheduler serializes LLM work per service and negotiates with the cf-orch coordinator so tasks across multiple products don't compete for the same VRAM budget.
 ## Core API
 ### `get_scheduler() -> TaskScheduler`
 Returns the singleton scheduler for the current process. Creates it on first call.
 ### `reset_scheduler()`
 Tears down the scheduler (releases VRAM leases, cancels pending tasks). Called during FastAPI lifespan teardown.
 ```python
 # In FastAPI lifespan
 from circuitforge_core.tasks import get_scheduler, reset_scheduler
@asynccontextmanager
 async def lifespan(app: FastAPI):
    scheduler = get_scheduler()
    yield
    reset_scheduler()
 ```
 ### `scheduler.submit(task_type, payload, vram_gb) -> str`
 Enqueues a task. Returns the task ID. The scheduler acquires a VRAM lease from the coordinator before executing.
 ```python
 task_id = await scheduler.submit(
    task_type="recipe_llm",
    payload={"pantry_ids": [1, 2, 3]},
    vram_gb=4.0,
 )
 ```
 ### `scheduler.result(task_id) -> TaskResult | None`
 Polls for a completed result. Returns `None` if still running.
 ## VRAM budgets
 Each product defines its VRAM budgets in `compose.yml` / `compose.override.yml`:
 ```yaml
 environment:
  VRAM_BUDGET_RECIPE_LLM: "4.0"
  VRAM_BUDGET_EXPIRY_LLM: "2.0"
 ```
 These map to task types in the scheduler. If the coordinator is unavailable (local dev without cf-orch), the scheduler falls back to sequential local execution.
 ## Shim pattern
 Products that need to re-export scheduler functions for backward compatibility use a shim:
 ```python
 # myproduct/app/tasks/scheduler.py
 from circuitforge_core.tasks.scheduler import (
    get_scheduler as _base_get_scheduler,
    reset_scheduler,          # re-export for lifespan teardown
 )
 def get_scheduler():
    """Product-specific scheduler with service name injected."""
    return _base_get_scheduler(service_name="myproduct")
 ```
 Always re-export `reset_scheduler` from the shim so the FastAPI lifespan can import it from one place.
--- a/docs/modules/text.md
+++ b/docs/modules/text.md
@ -1,57 +0,0 @@
 # text
 Text processing utilities. Normalization, truncation, chunking, and token estimation — shared across all products that manipulate text before or after LLM inference.
 ```python
 from circuitforge_core.text import normalize, chunk, truncate, estimate_tokens
 ```
 ## `normalize(text: str) -> str`
 Strips excess whitespace, normalizes unicode (NFC), and removes null bytes and control characters that can cause downstream issues with SQLite FTS5 or LLM tokenizers.
 ```python
 from circuitforge_core.text import normalize
 clean = normalize("  Hello\u00a0world\x00  ")
 # → "Hello world"
 ```
 ## `truncate(text: str, max_tokens: int, model: str = "default") -> str`
 Truncates text to approximately `max_tokens` tokens, breaking at sentence or paragraph boundaries where possible. Uses a simple byte-based heuristic (1 token ≈ 4 bytes) unless a specific model tokenizer is requested.
 ```python
 excerpt = truncate(long_doc, max_tokens=2048)
 ```
 ## `chunk(text: str, chunk_size: int, overlap: int = 0) -> list[str]`
 Splits text into overlapping chunks for RAG (retrieval-augmented generation) pipelines. Respects paragraph boundaries.
 ```python
 chunks = chunk(article_text, chunk_size=512, overlap=64)
 ```
 ## `estimate_tokens(text: str, model: str = "default") -> int`
 Estimates token count without loading a full tokenizer. Accurate enough for context window budget planning (within ~10%).
 ## FTS5 helpers
 SQLite FTS5 has quirks with special characters in MATCH expressions. The `text` module provides helpers used by the recipe engine and other FTS5 consumers:
 ```python
 from circuitforge_core.text import fts_quote, strip_apostrophes
 # Always double-quote FTS5 terms — bare tokens break on brand names
 query = " ".join(fts_quote(term) for term in tokens)
 # → '"chicken" "breast" "lemon"'
 # Strip apostrophes before FTS5 queries
 clean = strip_apostrophes("O'Doul's")
 # → "ODoulS"
 ```
 !!! warning "FTS5 gotcha"
    Always quote ALL terms in MATCH expressions. Bare tokens break on brand names (e.g., `O'Doul's`), plant-based ingredient names, and anything with punctuation.
--- a/docs/modules/tiers.md
+++ b/docs/modules/tiers.md
@ -1,67 +0,0 @@
 # tiers
 Tier system implementation. Provides the `@require_tier()` decorator used on FastAPI endpoints and the BYOK (bring your own key) unlock logic.
 ```python
 from circuitforge_core.tiers import require_tier, TierLevel
 ```
 ## Tier levels
 | Tier | Constant | What it unlocks |
 |------|----------|----------------|
 | Free | `TierLevel.FREE` | Core pipeline, basic AI assist, local LLM only |
 | Paid | `TierLevel.PAID` | Cloud LLM, integrations, full AI generation suite |
 | Premium | `TierLevel.PREMIUM` | Fine-tuned models, multi-user, advanced analytics |
 | Ultra | `TierLevel.ULTRA` | Human-in-the-loop operator execution |
 ## BYOK unlocks
 Users who configure their own LLM backend (via `config/llm.yaml`) can unlock features that would otherwise require a paid tier. The `tiers` module checks for configured BYOK backends before enforcing tier gates.
 This is intentional: privacy-preserving self-hosting is rewarded, not penalized. A user running their own Ollama instance gets AI features without a subscription.
 ## `@require_tier(tier: str)`
 Decorator for FastAPI route handlers. Resolves the calling user's tier from the request context (Heimdall JWT, validated by Caddy) and raises HTTP 403 if insufficient.
 ```python
 from circuitforge_core.tiers import require_tier
@router.post("/recipes/suggest")
@require_tier("paid")
 async def suggest_recipes(request: Request, body: SuggestRequest):
    ...
 ```
 In local (non-cloud) mode with no license configured, all users default to `free`. BYOK detection runs first — if a local LLM backend is configured, relevant paid features unlock regardless of license tier.
 ## Per-product overrides
 Products define which specific features are gated at which tier in their own `app/tiers.py`, using the cf-core decorators as building blocks. The cf-core `tiers` module provides the mechanism; the product owns the policy.
 ```python
 # kiwi/app/tiers.py
 from circuitforge_core.tiers import require_tier
 # Re-export with product-specific names if desired
 require_paid = require_tier("paid")
 require_premium = require_tier("premium")
 # BYOK unlockable features — defined per product
 BYOK_UNLOCKABLE = [
    "recipe_suggestion_l3",
    "receipt_ocr",
    "expiry_llm_fallback",
 ]
 ```
 ## Checking tier in non-endpoint code
 ```python
 from circuitforge_core.tiers import get_user_tier, TierLevel
 tier = get_user_tier(user_id)
 if tier >= TierLevel.PAID:
    # run AI feature
 ```
--- a/docs/modules/tts.md
+++ b/docs/modules/tts.md
@ -1,27 +0,0 @@
 # tts
 Text-to-speech router. **Stub — not yet implemented.**
 ```python
 from circuitforge_core.tts import TTSRouter  # planned
 ```
 ## Planned design
 The TTS module will mirror the [LLM router](llm.md) pattern: a configurable fallback chain over local and cloud TTS backends.
 **Planned backends:**
 - `piper` — local, fast, offline-capable; excellent quality for a neural TTS
 - `espeak` — local, minimal resource use, robotic but reliable fallback
 - `openai_tts` — cloud, `tts-1` and `tts-1-hd`; requires `OPENAI_API_KEY`
 **Planned use cases:**
 - Osprey: reading back IVR menus aloud; accessibility for users who can't monitor hold music
 - Linnet: speaking annotated tone labels alongside the original audio
 - Any product: accessible audio output for users with print disabilities
 ## Current status
 Stub only. Planned to ship alongside or shortly after the STT module, as most use cases need both.
 **Piper** is the recommended path when this lands: it runs locally at 10–20x real-time on CPU, supports 40+ language/speaker models, and has no API key requirement. See [rhasspy/piper](https://github.com/rhasspy/piper) for model downloads.
--- a/docs/modules/vision.md
+++ b/docs/modules/vision.md
@ -1,45 +0,0 @@
 # vision
 Vision router base class. **Stub — partially implemented.**
 ```python
 from circuitforge_core.vision import VisionRouter  # base class
 ```
 ## Planned design
 The vision module mirrors the [LLM router](llm.md) pattern for multimodal inputs. Products subclass `VisionRouter` and configure a fallback chain over vision-capable backends.
 **Planned backends:**
 - `moondream2` — local, 1.8GB, fast; via the vision service FastAPI sidecar on :8002
 - `claude_code` — local wrapper with vision capability
 - `anthropic` — cloud, Claude's vision models
 - `openai` — cloud, GPT-4o vision
 ## Current usage
 The vision service (`scripts/vision_service/main.py` in Peregrine, and the cf-docuvision path in Kiwi) currently implements vision routing directly without going through this module. This module is being designed to absorb those implementations once the interface stabilizes.
 ## `VisionRouter` base class
 ```python
 class VisionRouter:
    def analyze(
        self,
        images: list[str],        # base64-encoded
        prompt: str,
        max_tokens: int = 1024,
    ) -> str:
        """Run vision inference. Returns text response."""
        raise NotImplementedError
 ```
 ## moondream2 specifics
 moondream2 is the preferred local vision model — it's small enough for CPU use (1.8GB download) and fast enough for interactive use on GPU. Products using it:
 - **Peregrine**: survey screenshot analysis (culture-fit survey assistant)
 - **Kiwi**: receipt OCR fast-path, barcode label reading
 !!! note "VRAM requirement"
    moondream2 uses ~1.5GB VRAM in 4-bit quantization. Stop the main LLM service before starting the vision service if you're on a card with < 6GB VRAM.
--- a/docs/modules/wizard.md
+++ b/docs/modules/wizard.md
@ -1,51 +0,0 @@
 # wizard
 First-run wizard base class. **Stub.**
 ```python
 from circuitforge_core.wizard import BaseWizard  # planned
 ```
 ## Purpose
 `BaseWizard` provides a standard scaffold for first-run product setup. Every CircuitForge product has a first-run wizard that:
 1. Validates prerequisites (Docker, required ports, disk space)
 2. Configures the LLM backend (local Ollama / vLLM / BYOK cloud)
 3. Sets user preferences and accessibility options
 4. Issues or validates a license key
 5. Runs a smoke test and confirms everything is working
 ## Existing implementations
 Each product currently implements its own wizard:
 - **Peregrine**: `app/pages/0_Setup.py` (Streamlit) — gates app until `config/user.yaml` exists
 - **Kiwi**: Vue 3 wizard component with step-by-step hardware detection, LLM config, dietary preferences
 These will be refactored to share the `BaseWizard` scaffold once the interface stabilizes.
 ## Planned `BaseWizard` API
 ```python
 class BaseWizard:
    steps: list[WizardStep]         # ordered list of setup steps
    def run(self) -> WizardResult:
        """Execute all steps in order. Returns result with completion status."""
        ...
    def resume(self, from_step: int) -> WizardResult:
        """Resume from a specific step (e.g., after fixing a failed prereq)."""
        ...
 ```
 ## Accessibility in the wizard
 The wizard is the first thing new users see. It must meet CF's accessibility standards:
 - All steps must be completable with keyboard only
 - No time limits on any step
 - Plain-language instructions throughout (no jargon)
 - Accessibility preferences collected early (step 2 or 3) so the rest of the wizard can immediately adapt
 - Progress saved after each step so users can pause and return
--- a/scripts/test_musicgen.py
+++ b/scripts/test_musicgen.py
@ -1,129 +0,0 @@
 #!/usr/bin/env python
 """
 Standalone music continuation test — no service required.
 Usage:
    conda run -n cf python scripts/test_musicgen.py \
        --input "/Library/Audio/Music/KAESUL/Schedule I - Original Soundtrack (2025)/KAESUL - Schedule I - Original Soundtrack - 17 - the life i lead (reveal trailer).mp3"
 Options:
    --input PATH          Audio file to continue (any ffmpeg-readable format)
    --output PATH         Output WAV path (default: /tmp/continuation_output.wav)
    --model MODEL         MusicGen variant (default: facebook/musicgen-melody)
    --duration SECS       Seconds of new audio to generate (default: 30)
    --prompt-duration SECS  Seconds from end of song to condition on (default: 10)
    --description TEXT    Optional style hint, e.g. "dark ambient electronic"
    --device DEVICE       cuda or cpu (default: cuda)
    --join                Concatenate original prompt segment + continuation in output
 The generated file is saved to --output. Open it in any audio player to listen.
 Model weights download to /Library/Assets/LLM/musicgen/ on first run (~8 GB for melody).
 """
 from __future__ import annotations
 import argparse
 import logging
 import os
 import sys
 import time
 # Redirect HF cache before any audiocraft import
 os.environ.setdefault("HF_HOME", "/Library/Assets/LLM/musicgen")
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s  %(message)s",
 )
 log = logging.getLogger("test_musicgen")
 def parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(description="cf-musicgen standalone test")
    p.add_argument("--input", required=True, help="Input audio file path")
    p.add_argument("--output", default="/tmp/continuation_output.wav")
    p.add_argument("--model", default="facebook/musicgen-melody")
    p.add_argument("--duration", type=float, default=30.0,
                   help="Seconds of new audio to generate")
    p.add_argument("--prompt-duration", type=float, default=10.0,
                   help="Seconds from end of song used as prompt")
    p.add_argument("--description", default=None,
                   help="Optional text description to guide the style")
    p.add_argument("--device", default="cuda", choices=["cuda", "cpu"])
    p.add_argument("--join", action="store_true",
                   help="Prepend the prompt segment to the output file")
    return p.parse_args()
 def main() -> None:
    args = parse_args()
    if not os.path.exists(args.input):
        log.error("Input file not found: %s", args.input)
        sys.exit(1)
    log.info("Input:  %s", args.input)
    log.info("Model:  %s", args.model)
    log.info("Duration: %.1fs  |  Prompt: %.1fs", args.duration, args.prompt_duration)
    if args.description:
        log.info("Style hint: %s", args.description)
    import torch
    import torchaudio
    log.info("Loading model (weights -> /Library/Assets/LLM/musicgen/)")
    from audiocraft.models import MusicGen
    model = MusicGen.get_pretrained(args.model, device=args.device)
    model.set_generation_params(duration=args.duration, top_k=250, temperature=1.0, cfg_coef=3.0)
    # Load input audio
    wav, sr = torchaudio.load(args.input)
    log.info("Loaded audio: %.1fs @ %d Hz (%d ch)", wav.shape[-1] / sr, sr, wav.shape[0])
    # Trim to last prompt_duration_s seconds
    max_prompt_samples = int(args.prompt_duration * sr)
    prompt_wav = wav[..., -max_prompt_samples:] if wav.shape[-1] > max_prompt_samples else wav
    log.info("Using %.1fs prompt from end of track", prompt_wav.shape[-1] / sr)
    # MusicGen expects [batch, channels, time]
    prompt_tensor = prompt_wav.unsqueeze(0).to(args.device)
    log.info("Generating %.1fs of continuation ...", args.duration)
    t0 = time.time()
    with torch.no_grad():
        output = model.generate_continuation(
            prompt=prompt_tensor,
            prompt_sample_rate=sr,
            descriptions=[args.description],
            progress=True,
        )
    elapsed = time.time() - t0
    model_sr = model.sample_rate
    output_wav = output[0].cpu()  # [C, T]
    actual_s = output_wav.shape[-1] / model_sr
    log.info("Done in %.1fs  ->  %.1fs of audio at %d Hz", elapsed, actual_s, model_sr)
    if args.join:
        # Resample prompt to model sample rate so concatenation is seamless
        prompt_resampled = torchaudio.functional.resample(prompt_wav, sr, model_sr)
        # Reconcile channel count: MusicGen outputs 1ch; prompt may be stereo.
        # Convert to mono by averaging if needed so cat doesn't blow up.
        if prompt_resampled.shape[0] != output_wav.shape[0]:
            if output_wav.shape[0] == 1 and prompt_resampled.shape[0] > 1:
                prompt_resampled = prompt_resampled.mean(dim=0, keepdim=True)
            elif prompt_resampled.shape[0] == 1 and output_wav.shape[0] > 1:
                prompt_resampled = prompt_resampled.expand_as(output_wav)
        output_wav = torch.cat([prompt_resampled, output_wav], dim=-1)
        total_s = output_wav.shape[-1] / model_sr
        log.info("Joined prompt + continuation: %.1fs total", total_s)
    os.makedirs(os.path.dirname(os.path.abspath(args.output)), exist_ok=True)
    torchaudio.save(args.output, output_wav, model_sr)
    log.info("Saved: %s", args.output)
    log.info("Play:  ffplay %r  (or open in any audio player)", args.output)
 if __name__ == "__main__":
    main()
		`@ -1 +0,0 @@`
			`"""circuitforge_core.musicgen — music continuation service (BSL 1.1)."""`