From 5eab4c43a434c0a038470d57d2d81edd9273c8f2 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Fri, 5 Jun 2026 11:59:48 -0700 Subject: [PATCH] docs: self-hoster service docs for text, video, and mqtt modules text.md: add LLM inference service section with three-path decision table (GGUF/transformers/VLM mmproj/classifier), multimodal content- block API, mock mode, CF_TEXT_URL wiring. video.md: new file covering Marlin-2B service, server-local video_path callout, CUDA 13 nightly path, trust_remote_code note, MIT/BSL boundary (current wrapper is MIT; special sauce pipelines go in separate BSL module, not cf-core). mqtt.md: new file covering broker vs serial decision tree, MQTTClient usage, TopicRouter.matches() NotImplementedError with workaround, install extras. --- docs/modules/mqtt.md | 193 +++++++++++++++++++++++++++++++++++++++ docs/modules/text.md | 166 +++++++++++++++++++++++++++++++++ docs/modules/video.md | 208 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 567 insertions(+) create mode 100644 docs/modules/mqtt.md create mode 100644 docs/modules/video.md diff --git a/docs/modules/mqtt.md b/docs/modules/mqtt.md new file mode 100644 index 0000000..6b59957 --- /dev/null +++ b/docs/modules/mqtt.md @@ -0,0 +1,193 @@ +# circuitforge_core.mqtt + +Async MQTT messaging and Meshtastic mesh radio integration. MIT licensed. + +## What are you connecting to? + +Choose your backend before installing: + +| Backend | When to use | +|---|---| +| MQTT broker | You have a running MQTT broker (Mosquitto, HiveMQ, etc.) and want to send/receive structured messages over TCP | +| Meshtastic serial | You have a Meshtastic-compatible radio connected via USB and want to send messages over LoRa mesh | + +These are independent backends, not sequential steps. Pick one. + +--- + +## MQTT broker path + +### Install + +```bash +pip install "circuitforge-core[mqtt]" +``` + +### Quick start + +```python +import asyncio +from circuitforge_core.mqtt.client import MQTTClient +from circuitforge_core.mqtt.models import MQTTConfig + +cfg = MQTTConfig(host="localhost", port=1883) +client = MQTTClient(cfg) + +@client.on("sensor/#") +async def handle_sensor(msg): + print(msg.topic, msg.text()) + +asyncio.run(client.run()) +``` + +`client.run()` subscribes to all registered patterns and reconnects automatically if the connection drops. + +### Iterating raw messages + +```python +from circuitforge_core.mqtt.client import MQTTClient +from circuitforge_core.mqtt.models import MQTTConfig + +cfg = MQTTConfig(host="localhost") +client = MQTTClient(cfg) + +async with client as messages: + async for msg in messages: + print(msg.topic, msg.payload) +``` + +### MQTTConfig + +```python +from circuitforge_core.mqtt.models import MQTTConfig + +cfg = MQTTConfig( + host="localhost", # required + port=1883, # default + username=None, # optional + password=None, # optional + tls=False, # set True for port 8883 + client_id=None, # auto-generated if None +) +``` + +### Publishing + +```python +await client.publish("sensor/room1/temp", payload=b"22.5") +``` + +--- + +## Meshtastic serial path + +### Hardware required + +A Meshtastic-compatible LoRa radio connected via USB serial. Supported boards include T-Beam, T-Echo, Heltec V3, RAK4631, and others listed at [meshtastic.org/docs/hardware](https://meshtastic.org/docs/hardware/). + +### Install + +```bash +pip install "circuitforge-core[meshtastic-serial]" +``` + +### Quick start + +```python +import asyncio +from circuitforge_core.mqtt.meshtastic import MeshtasticSerialClient + +async def main(): + async with MeshtasticSerialClient(port="/dev/ttyUSB0") as mesh: + await mesh.send_text("hello mesh", channel=0) + async for packet in mesh.packets(): + print(packet) + +asyncio.run(main()) +``` + +### Port detection + +If you are unsure of the device path: + +```bash +ls /dev/ttyUSB* /dev/ttyACM* +# or on macOS: +ls /dev/cu.* +``` + +--- + +## TopicRouter + +`TopicRouter` lets you register pattern-matched handlers for MQTT topics. + +```python +from circuitforge_core.mqtt.router import TopicRouter + +router = TopicRouter() + +@router.on("sensor/+/temp") +async def handle_temp(msg): + print(msg.topic, msg.text()) + +@router.on("alerts/#") +async def handle_alert(msg): + print("alert:", msg.text()) +``` + +Pass the router to `MQTTClient`: + +```python +client = MQTTClient(cfg, router=router) +await client.run() +``` + +!!! warning "Known issue: `matches()` not yet implemented" + The `matches()` function used internally by `TopicRouter` to route messages to handlers raises `NotImplementedError`. Dispatching to handlers via pattern matching will fail at runtime. + + **Workaround:** Use the raw message iteration path (`async with client as messages`) and match topics manually: + + ```python + async with client as messages: + async for msg in messages: + if msg.topic.startswith("sensor/"): + await handle_sensor(msg) + ``` + + Tracked at [circuitforge-core#TBD] — `matches()` is marked TODO in `router.py`. + +--- + +## MQTTMessage + +```python +from circuitforge_core.mqtt.models import MQTTMessage + +msg.topic # str — full topic string +msg.payload # bytes — raw payload +msg.text() # str — payload decoded as UTF-8 +msg.json() # Any — payload parsed as JSON +msg.received_at # datetime — UTC timestamp +``` + +--- + +## Install extras + +| Extra | What it installs | +|---|---| +| `mqtt` | `aiomqtt` — MQTT broker connectivity | +| `meshtastic-serial` | `meshtastic`, `pypubsub` — USB serial radio | +| `meshtastic-service` | Both of the above + FastAPI + uvicorn | + +```bash +# MQTT broker only +pip install "circuitforge-core[mqtt]" + +# Meshtastic serial only +pip install "circuitforge-core[meshtastic-serial]" + +# Both + FastAPI service layer +pip install "circuitforge-core[meshtastic-service]" +``` diff --git a/docs/modules/text.md b/docs/modules/text.md index 823bb0d..4caf927 100644 --- a/docs/modules/text.md +++ b/docs/modules/text.md @@ -55,3 +55,169 @@ clean = strip_apostrophes("O'Doul's") !!! warning "FTS5 gotcha" Always quote ALL terms in MATCH expressions. Bare tokens break on brand names (e.g., `O'Doul's`), plant-based ingredient names, and anything with punctuation. + +--- + +## LLM inference service + +`circuitforge_core.text.app` is a self-contained FastAPI inference server. It exposes a local LLM (or PII classifier) over HTTP so that products can call it via `CF_TEXT_URL` without bundling heavy ML dependencies themselves. + +### What are you running? + +Three independent paths — pick one before installing: + +| Path | Use case | Extra | +|---|---|---| +| **LLM inference** | Chat, completion, summarisation using a GGUF or HuggingFace model | `text-llamacpp` or `text-transformers` | +| **VLM inference** | Vision-language model that accepts images alongside text | `text-llamacpp` (GGUF with `--mmproj`) or `text-transformers` | +| **Classifier / PII filter** | NER-based PII detection and redaction | `text-transformers` | + +--- + +### LLM inference (GGUF via llama.cpp) + +```bash +pip install "circuitforge-core[text-llamacpp]" +``` + +```bash +python -m circuitforge_core.text.app \ + --model /path/to/model.gguf \ + --port 8006 \ + --gpu-id 0 +``` + +4-bit quantisation (GGUF files ending in `q4_k_m`, `q4_0`, etc.) runs well on 6–8GB VRAM. Full-precision (`f16`) requires more. + +Multi-GPU (splits across two GPUs via `device_map=auto`): + +```bash +python -m circuitforge_core.text.app \ + --model /path/to/large-model \ + --port 8006 \ + --gpu-ids 0,1 +``` + +--- + +### LLM inference (HuggingFace transformers) + +```bash +pip install "circuitforge-core[text-transformers]" +# 4-bit quantisation (bitsandbytes): +pip install "circuitforge-core[text-transformers-4bit]" +``` + +```bash +python -m circuitforge_core.text.app \ + --model /path/to/model-or-hf-repo \ + --backend transformers \ + --port 8006 +``` + +--- + +### VLM inference (GGUF with mmproj) + +LLaVA-style models (LLaVA, BakLLaVA, llava-phi) require a separate projector file (`--mmproj`): + +```bash +python -m circuitforge_core.text.app \ + --model /path/to/llava-model.gguf \ + --mmproj /path/to/mmproj.gguf \ + --port 8006 \ + --gpu-id 0 +``` + +Embedded VLMs (Qwen2-VL, MiniCPM-V, Moondream) have the projector baked in — no `--mmproj` needed. + +Sending images via the multimodal API: + +```json +POST /chat +{ + "messages": [ + { + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,"}}, + {"type": "text", "text": "What is in this document?"} + ] + } + ] +} +``` + +Sending an image to a text-only model returns HTTP 422. + +--- + +### Classifier / PII filter + +```bash +pip install "circuitforge-core[text-transformers]" +``` + +```bash +python -m circuitforge_core.text.app \ + --backend classifier \ + --model dslim/bert-base-NER \ + --port 8006 +``` + +Recommended model for English PII detection: `dslim/bert-base-NER`. Substituting other HuggingFace NER models is supported. + +Calling the filter endpoint: + +```json +POST /filter +{ + "text": "Please contact John Smith at john@example.com.", + "mode": "redact" +} +``` + +Modes: `redact` (replace spans with `[REDACTED]`), `detect` (return boolean), `spans` (return span list with labels and confidence). + +--- + +### Mock mode (no model required) + +```bash +CF_TEXT_MOCK=1 python -m circuitforge_core.text.app --port 8006 +``` + +Returns deterministic canned responses for all endpoints. No GPU, no model download. Suitable for CI and integration testing. + +--- + +### Configuration + +| Variable | Default | Description | +|---|---|---| +| `CF_TEXT_URL` | — | URL products use to reach cf-text (e.g. `http://localhost:8006`) | +| `CF_TEXT_MOCK` | — | Set to `1` to enable mock mode | + +CLI flags: `--model`, `--backend` (`llamacpp`/`transformers`/`classifier`/`mock`), `--port`, `--gpu-id`, `--gpu-ids`, `--mmproj`. + +--- + +### API endpoints + +| Endpoint | Backend | Description | +|---|---|---| +| `GET /health` | all | `{"status":"ok","model":str,"backend":str,"vram_mb":int}` | +| `POST /generate` | text-gen | Single prompt completion | +| `POST /chat` | text-gen | OpenAI-compatible chat (supports multimodal content blocks) | +| `POST /v1/chat/completions` | text-gen | OpenAI-compatible alias for `/chat` | +| `POST /filter` | classifier | PII detection and redaction | + +--- + +### Connecting from a product + +```bash +CF_TEXT_URL=http://localhost:8006 +``` + +Products using cf-core's LLM router pick this up automatically when the `text` backend is enabled in `config/llm.yaml`. diff --git a/docs/modules/video.md b/docs/modules/video.md new file mode 100644 index 0000000..ec9f5ef --- /dev/null +++ b/docs/modules/video.md @@ -0,0 +1,208 @@ +# circuitforge_core.video + +Video captioning and temporal grounding service using [Marlin-2B](https://huggingface.co/NemoStation/Marlin-2B) (Apache 2.0). + +## What it does + +- **Caption:** Produces a scene summary and a timestamped list of detected events for a video file. +- **Find:** Grounds a natural-language event description to a time span within the video. + +## Prerequisites + +### Hardware + +| GPU VRAM | Result | +|----------|--------| +| 16GB+ | Recommended for full-precision inference | +| 12GB | Minimum for most videos | +| Under 12GB | OOM likely on longer clips | + +CPU mode is not supported — Marlin-2B requires a CUDA-capable GPU. + +### CUDA version + +```bash +nvidia-smi | grep "CUDA Version" +``` + +| CUDA version | Install path | +|---|---| +| 12.x or earlier | Standard install — see below | +| 13.x (RTX 50-series / Blackwell) | PyTorch nightly required — see below | + +### Security note + +Marlin-2B requires `trust_remote_code=True`. Review the model's `modeling_marlin.py` on HuggingFace before deploying on a production node. The model is Apache 2.0 and the source is auditable at [huggingface.co/NemoStation/Marlin-2B](https://huggingface.co/NemoStation/Marlin-2B). + +--- + +## Install + +Standard (CUDA 12.x): + +```bash +pip install "circuitforge-core[video-service]" +``` + +CUDA 13.x (RTX 50-series / Blackwell) — PyTorch nightly required: + +```bash +pip install --index-url https://download.pytorch.org/whl/nightly/cu130 torch torchvision +pip install "circuitforge-core[video-service]" --no-deps +pip install transformers>=5.7.0 torchcodec "qwen-vl-utils>=0.0.14" av Pillow accelerate fastapi "uvicorn[standard]" +``` + +--- + +## Running the service + +Download the model to a local path first (one-time, approximately 4–6 GB): + +```bash +huggingface-cli download NemoStation/Marlin-2B --local-dir /path/to/models/Marlin-2B +``` + +Start the service: + +```bash +CUDA_DEVICE_ORDER=PCI_BUS_ID python -m circuitforge_core.video.app \ + --model /path/to/models/Marlin-2B \ + --port 8016 \ + --gpu-id 0 +``` + +The service blocks at startup until the model is loaded, then prints ready status. Confirm: + +```bash +curl http://localhost:8016/health +# {"status": "ok", "model": "/path/to/models/Marlin-2B", "vram_mb": ...} +``` + +Point products at the service with: + +```bash +CF_VIDEO_URL=http://localhost:8016 +``` + +--- + +## API reference + +### `GET /health` + +Returns 200 when model is loaded. `vram_mb` is the GPU memory in use. + +```json +{"status": "ok", "model": "/models/Marlin-2B", "vram_mb": 4200} +``` + +### `POST /caption` + +Generate a scene summary and timestamped events for a video. + +> **Important:** `video_path` must be an absolute path on the machine running cf-video — not the calling machine. If cf-video runs in Docker, mount your video directory into the container and use the container-side path. + +**Request:** + +```json +{ + "video_path": "/absolute/path/to/video.mp4", + "max_new_tokens": 2048 +} +``` + +**Response:** + +```json +{ + "scene": "A kitchen scene where someone prepares pasta.", + "events": [ + {"start": 0.0, "end": 4.5, "description": "Filling pot with water"}, + {"start": 4.5, "end": 12.0, "description": "Boiling water on stovetop"} + ], + "caption": "Kitchen cooking scene with pasta preparation steps.", + "model": "/models/Marlin-2B" +} +``` + +### `POST /find` + +Ground a natural-language event description to a time span. + +**Request:** + +```json +{ + "video_path": "/absolute/path/to/video.mp4", + "event": "person adds salt to the water", + "max_new_tokens": 256 +} +``` + +**Response:** + +```json +{ + "span": [8.2, 10.6], + "format_ok": true, + "raw": "[8.2, 10.6]", + "model": "/models/Marlin-2B" +} +``` + +`span` is `null` when the model cannot ground the event in the video. `format_ok` indicates whether the model produced a parseable time range. + +--- + +## Docker Compose setup + +```yaml +# compose.yml excerpt +services: + cf-video: + image: ghcr.io/circuit-forge/cf-video:latest # or build locally + network_mode: host + environment: + CF_VIDEO_MODEL: /models/Marlin-2B + CF_VIDEO_PORT: "8016" + volumes: + - /path/to/models/Marlin-2B:/models/Marlin-2B:ro + - /path/to/your/videos:/videos:ro # mount video storage + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: unless-stopped +``` + +Pass video paths relative to the container mount: + +```json +{"video_path": "/videos/my-video.mp4", "event": "person enters room"} +``` + +--- + +## Troubleshooting + +**`CUDA out of memory`** +Marlin-2B requires 12GB+ VRAM. No CPU fallback is available. + +**`No such file or directory: /home/user/video.mp4`** +`video_path` is resolved on the server, not the client. If cf-video runs in Docker, you must mount the directory containing the video into the container and use the container-side path. + +**CUDA version mismatch** +RTX 50-series (Blackwell) cards use CUDA 13. Standard PyTorch stable does not support CUDA 13 — install PyTorch nightly as described in Prerequisites. + +**`trust_remote_code` errors** +Make sure `transformers >= 5.7.0` is installed. Older versions do not support the Marlin architecture registration. + +--- + +## License + +- cf-video service code: MIT — CircuitForge LLC +- Marlin-2B model: [Apache 2.0](https://huggingface.co/NemoStation/Marlin-2B) — NemoStation