# cf_vision/router.py — VisionRouter, the primary consumer API # # BSL 1.1 when real inference models are integrated (Dolphin-v2, Claude vision). # Currently a stub: analyze() raises NotImplementedError unless mock=True. from __future__ import annotations import os from typing import Literal from cf_vision.models import ImageFrame, ImageElement, BoundingBox _MOCK_ELEMENTS = [ ImageElement( element_type="title", text="[Mock document title]", confidence=0.99, bbox=BoundingBox(x=0.05, y=0.02, width=0.9, height=0.06), ), ImageElement( element_type="plain_text", text="[Mock paragraph — real content requires cf-vision[inference] and a vision model.]", confidence=0.95, bbox=BoundingBox(x=0.05, y=0.12, width=0.9, height=0.08), ), ] class VisionRouter: """ Routes image analysis requests to local or cloud vision models. Local models (Free tier): - Dolphin-v2 (ByteDance) — universal document parser, 21 element types - pyzbar — barcode / QR code scanning (no GPU required) Cloud fallback (Paid tier): - Claude vision API — general-purpose image understanding Usage ----- router = VisionRouter.from_env() frame = router.analyze(image_bytes, task="document") for element in frame.elements: print(element.element_type, element.text) """ def __init__( self, mock: bool = False, device: str = "auto", ) -> None: self._mock = mock self._device = device @classmethod def from_env(cls) -> "VisionRouter": """Construct from CF_VISION_MOCK and CF_VISION_DEVICE env vars.""" mock = os.environ.get("CF_VISION_MOCK", "") == "1" device = os.environ.get("CF_VISION_DEVICE", "auto") return cls(mock=mock, device=device) def analyze( self, image_bytes: bytes, task: Literal["document", "barcode", "receipt", "general"] = "document", prompt: str = "", ) -> ImageFrame: """ Analyse image_bytes and return a structured ImageFrame. task: "document" — full document parsing via Dolphin-v2 (all 21 element types) "barcode" — barcode / QR code extraction via pyzbar (lightweight) "receipt" — receipt line-item extraction (Dolphin-v2 + post-processing) "general" — general image understanding via Claude vision (cloud, Paid tier) Stub: raises NotImplementedError unless CF_VISION_MOCK=1 or mock=True. Real implementation lands with Kiwi Phase 2 (cf_vision.ocr, cf_vision.barcode). """ if self._mock: return self._mock_frame(image_bytes, task) raise NotImplementedError( "VisionRouter real inference is not yet implemented. " "Set CF_VISION_MOCK=1 or mock=True to use synthetic frames. " "Real analysis requires: pip install cf-vision[inference]" ) def _mock_frame(self, image_bytes: bytes, task: str) -> ImageFrame: from cf_vision.models import ImageElement, BoundingBox if task == "barcode": elements = [ ImageElement( element_type="barcode", text="0123456789012", confidence=0.99, metadata={"format": "EAN13"}, ) ] else: elements = list(_MOCK_ELEMENTS) return ImageFrame( source="mock", image_bytes=None, elements=elements, model="mock", )