- cf_vision/models.py: ImageFrame + ImageElement + BoundingBox (MIT) Full Dolphin-v2 element taxonomy (21 types), convenience accessors (text_blocks, barcodes, tables, full_text) - cf_vision/router.py: VisionRouter — mock + real paths, task routing (document, barcode, receipt, general) - cf_vision/barcode.py: BarcodeScanner — pyzbar wrapper, CPU-only, MIT - cf_vision/ocr.py: DolphinOCR — ByteDance/Dolphin-v2 async stub (BSL 1.1) - cf_vision/receipt.py: ReceiptParser stub — Kiwi Phase 2 target (BSL 1.1) - cf_vision/camera.py: CameraCapture — OpenCV single-frame capture (MIT) - pyproject.toml: inference / barcode / camera optional extras - .env.example: HF_TOKEN, CF_VISION_DEVICE, CF_VISION_MOCK - README: module map, ImageFrame API reference, consumer roadmap - tests: 6 passing (ImageFrame accessors, VisionRouter mock/real) Extracted from circuitforge_core.vision per cf-core#36.
44 lines
1.5 KiB
Python
44 lines
1.5 KiB
Python
# cf_vision/receipt.py — receipt line-item extraction
|
|
#
|
|
# BSL 1.1: real inference. Dolphin-v2 + post-processing.
|
|
# Stub: raises NotImplementedError until Kiwi Phase 2.
|
|
#
|
|
# Planned pipeline:
|
|
# DolphinOCR.parse(image_bytes) → ImageFrame with table/text elements
|
|
# ReceiptParser.extract(frame) → list[LineItem]
|
|
# ProductResolver.resolve(items) → matched pantry items (Kiwi-specific)
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
|
|
|
|
@dataclass
|
|
class LineItem:
|
|
"""A single line item extracted from a receipt."""
|
|
name: str
|
|
quantity: float = 1.0
|
|
unit: str = "" # "g", "ml", "oz", "each", etc.
|
|
price: float | None = None
|
|
barcode: str | None = None
|
|
confidence: float = 0.0
|
|
|
|
|
|
class ReceiptParser:
|
|
"""
|
|
Extract line items from a receipt ImageFrame.
|
|
|
|
Stub: raises NotImplementedError until Kiwi Phase 2.
|
|
Consumer: Kiwi Phase 2 pantry auto-population from receipt photos.
|
|
|
|
Real pipeline:
|
|
1. DolphinOCR produces an ImageFrame with table rows and text blocks
|
|
2. ReceiptParser identifies the items section (skip header/footer/totals)
|
|
3. Per-row NLP extracts name, quantity, unit, price
|
|
4. Optional: barcode lookup if any barcode elements present
|
|
"""
|
|
|
|
def extract(self, frame: "ImageFrame") -> list[LineItem]: # type: ignore[name-defined]
|
|
raise NotImplementedError(
|
|
"ReceiptParser.extract() is not yet implemented. "
|
|
"Tracking: Kiwi Phase 2 / cf-vision#TBD"
|
|
)
|