cf-vision/cf_vision/router.py
pyr0ball 353525c1f4 feat: initial cf-vision scaffold — ImageFrame API, stub inference modules
- cf_vision/models.py: ImageFrame + ImageElement + BoundingBox (MIT)
  Full Dolphin-v2 element taxonomy (21 types), convenience accessors
  (text_blocks, barcodes, tables, full_text)
- cf_vision/router.py: VisionRouter — mock + real paths, task routing
  (document, barcode, receipt, general)
- cf_vision/barcode.py: BarcodeScanner — pyzbar wrapper, CPU-only, MIT
- cf_vision/ocr.py: DolphinOCR — ByteDance/Dolphin-v2 async stub (BSL 1.1)
- cf_vision/receipt.py: ReceiptParser stub — Kiwi Phase 2 target (BSL 1.1)
- cf_vision/camera.py: CameraCapture — OpenCV single-frame capture (MIT)
- pyproject.toml: inference / barcode / camera optional extras
- .env.example: HF_TOKEN, CF_VISION_DEVICE, CF_VISION_MOCK
- README: module map, ImageFrame API reference, consumer roadmap
- tests: 6 passing (ImageFrame accessors, VisionRouter mock/real)

Extracted from circuitforge_core.vision per cf-core#36.
2026-04-06 17:59:00 -07:00

107 lines
3.6 KiB
Python

# cf_vision/router.py — VisionRouter, the primary consumer API
#
# BSL 1.1 when real inference models are integrated (Dolphin-v2, Claude vision).
# Currently a stub: analyze() raises NotImplementedError unless mock=True.
from __future__ import annotations
import os
from typing import Literal
from cf_vision.models import ImageFrame, ImageElement, BoundingBox
_MOCK_ELEMENTS = [
ImageElement(
element_type="title",
text="[Mock document title]",
confidence=0.99,
bbox=BoundingBox(x=0.05, y=0.02, width=0.9, height=0.06),
),
ImageElement(
element_type="plain_text",
text="[Mock paragraph — real content requires cf-vision[inference] and a vision model.]",
confidence=0.95,
bbox=BoundingBox(x=0.05, y=0.12, width=0.9, height=0.08),
),
]
class VisionRouter:
"""
Routes image analysis requests to local or cloud vision models.
Local models (Free tier):
- Dolphin-v2 (ByteDance) — universal document parser, 21 element types
- pyzbar — barcode / QR code scanning (no GPU required)
Cloud fallback (Paid tier):
- Claude vision API — general-purpose image understanding
Usage
-----
router = VisionRouter.from_env()
frame = router.analyze(image_bytes, task="document")
for element in frame.elements:
print(element.element_type, element.text)
"""
def __init__(
self,
mock: bool = False,
device: str = "auto",
) -> None:
self._mock = mock
self._device = device
@classmethod
def from_env(cls) -> "VisionRouter":
"""Construct from CF_VISION_MOCK and CF_VISION_DEVICE env vars."""
mock = os.environ.get("CF_VISION_MOCK", "") == "1"
device = os.environ.get("CF_VISION_DEVICE", "auto")
return cls(mock=mock, device=device)
def analyze(
self,
image_bytes: bytes,
task: Literal["document", "barcode", "receipt", "general"] = "document",
prompt: str = "",
) -> ImageFrame:
"""
Analyse image_bytes and return a structured ImageFrame.
task:
"document" — full document parsing via Dolphin-v2 (all 21 element types)
"barcode" — barcode / QR code extraction via pyzbar (lightweight)
"receipt" — receipt line-item extraction (Dolphin-v2 + post-processing)
"general" — general image understanding via Claude vision (cloud, Paid tier)
Stub: raises NotImplementedError unless CF_VISION_MOCK=1 or mock=True.
Real implementation lands with Kiwi Phase 2 (cf_vision.ocr, cf_vision.barcode).
"""
if self._mock:
return self._mock_frame(image_bytes, task)
raise NotImplementedError(
"VisionRouter real inference is not yet implemented. "
"Set CF_VISION_MOCK=1 or mock=True to use synthetic frames. "
"Real analysis requires: pip install cf-vision[inference]"
)
def _mock_frame(self, image_bytes: bytes, task: str) -> ImageFrame:
from cf_vision.models import ImageElement, BoundingBox
if task == "barcode":
elements = [
ImageElement(
element_type="barcode",
text="0123456789012",
confidence=0.99,
metadata={"format": "EAN13"},
)
]
else:
elements = list(_MOCK_ELEMENTS)
return ImageFrame(
source="mock",
image_bytes=None,
elements=elements,
model="mock",
)