- cf_vision/models.py: ImageFrame + ImageElement + BoundingBox (MIT) Full Dolphin-v2 element taxonomy (21 types), convenience accessors (text_blocks, barcodes, tables, full_text) - cf_vision/router.py: VisionRouter — mock + real paths, task routing (document, barcode, receipt, general) - cf_vision/barcode.py: BarcodeScanner — pyzbar wrapper, CPU-only, MIT - cf_vision/ocr.py: DolphinOCR — ByteDance/Dolphin-v2 async stub (BSL 1.1) - cf_vision/receipt.py: ReceiptParser stub — Kiwi Phase 2 target (BSL 1.1) - cf_vision/camera.py: CameraCapture — OpenCV single-frame capture (MIT) - pyproject.toml: inference / barcode / camera optional extras - .env.example: HF_TOKEN, CF_VISION_DEVICE, CF_VISION_MOCK - README: module map, ImageFrame API reference, consumer roadmap - tests: 6 passing (ImageFrame accessors, VisionRouter mock/real) Extracted from circuitforge_core.vision per cf-core#36.
107 lines
3.6 KiB
Python
107 lines
3.6 KiB
Python
# cf_vision/router.py — VisionRouter, the primary consumer API
|
|
#
|
|
# BSL 1.1 when real inference models are integrated (Dolphin-v2, Claude vision).
|
|
# Currently a stub: analyze() raises NotImplementedError unless mock=True.
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from typing import Literal
|
|
|
|
from cf_vision.models import ImageFrame, ImageElement, BoundingBox
|
|
|
|
_MOCK_ELEMENTS = [
|
|
ImageElement(
|
|
element_type="title",
|
|
text="[Mock document title]",
|
|
confidence=0.99,
|
|
bbox=BoundingBox(x=0.05, y=0.02, width=0.9, height=0.06),
|
|
),
|
|
ImageElement(
|
|
element_type="plain_text",
|
|
text="[Mock paragraph — real content requires cf-vision[inference] and a vision model.]",
|
|
confidence=0.95,
|
|
bbox=BoundingBox(x=0.05, y=0.12, width=0.9, height=0.08),
|
|
),
|
|
]
|
|
|
|
|
|
class VisionRouter:
|
|
"""
|
|
Routes image analysis requests to local or cloud vision models.
|
|
|
|
Local models (Free tier):
|
|
- Dolphin-v2 (ByteDance) — universal document parser, 21 element types
|
|
- pyzbar — barcode / QR code scanning (no GPU required)
|
|
|
|
Cloud fallback (Paid tier):
|
|
- Claude vision API — general-purpose image understanding
|
|
|
|
Usage
|
|
-----
|
|
router = VisionRouter.from_env()
|
|
frame = router.analyze(image_bytes, task="document")
|
|
for element in frame.elements:
|
|
print(element.element_type, element.text)
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
mock: bool = False,
|
|
device: str = "auto",
|
|
) -> None:
|
|
self._mock = mock
|
|
self._device = device
|
|
|
|
@classmethod
|
|
def from_env(cls) -> "VisionRouter":
|
|
"""Construct from CF_VISION_MOCK and CF_VISION_DEVICE env vars."""
|
|
mock = os.environ.get("CF_VISION_MOCK", "") == "1"
|
|
device = os.environ.get("CF_VISION_DEVICE", "auto")
|
|
return cls(mock=mock, device=device)
|
|
|
|
def analyze(
|
|
self,
|
|
image_bytes: bytes,
|
|
task: Literal["document", "barcode", "receipt", "general"] = "document",
|
|
prompt: str = "",
|
|
) -> ImageFrame:
|
|
"""
|
|
Analyse image_bytes and return a structured ImageFrame.
|
|
|
|
task:
|
|
"document" — full document parsing via Dolphin-v2 (all 21 element types)
|
|
"barcode" — barcode / QR code extraction via pyzbar (lightweight)
|
|
"receipt" — receipt line-item extraction (Dolphin-v2 + post-processing)
|
|
"general" — general image understanding via Claude vision (cloud, Paid tier)
|
|
|
|
Stub: raises NotImplementedError unless CF_VISION_MOCK=1 or mock=True.
|
|
Real implementation lands with Kiwi Phase 2 (cf_vision.ocr, cf_vision.barcode).
|
|
"""
|
|
if self._mock:
|
|
return self._mock_frame(image_bytes, task)
|
|
|
|
raise NotImplementedError(
|
|
"VisionRouter real inference is not yet implemented. "
|
|
"Set CF_VISION_MOCK=1 or mock=True to use synthetic frames. "
|
|
"Real analysis requires: pip install cf-vision[inference]"
|
|
)
|
|
|
|
def _mock_frame(self, image_bytes: bytes, task: str) -> ImageFrame:
|
|
from cf_vision.models import ImageElement, BoundingBox
|
|
if task == "barcode":
|
|
elements = [
|
|
ImageElement(
|
|
element_type="barcode",
|
|
text="0123456789012",
|
|
confidence=0.99,
|
|
metadata={"format": "EAN13"},
|
|
)
|
|
]
|
|
else:
|
|
elements = list(_MOCK_ELEMENTS)
|
|
return ImageFrame(
|
|
source="mock",
|
|
image_bytes=None,
|
|
elements=elements,
|
|
model="mock",
|
|
)
|