cf-vision/cf_vision/receipt.py

# cf_vision/receipt.py — receipt line-item extraction
#
# BSL 1.1: real inference. Dolphin-v2 + post-processing.
# Stub: raises NotImplementedError until Kiwi Phase 2.
#
# Planned pipeline:
#   DolphinOCR.parse(image_bytes)   → ImageFrame with table/text elements
#   ReceiptParser.extract(frame)    → list[LineItem]
#   ProductResolver.resolve(items)  → matched pantry items (Kiwi-specific)
from __future__ import annotations

from dataclasses import dataclass, field


@dataclass
class LineItem:
    """A single line item extracted from a receipt."""
    name: str
    quantity: float = 1.0
    unit: str = ""           # "g", "ml", "oz", "each", etc.
    price: float | None = None
    barcode: str | None = None
    confidence: float = 0.0


class ReceiptParser:
    """
    Extract line items from a receipt ImageFrame.

    Stub: raises NotImplementedError until Kiwi Phase 2.
    Consumer: Kiwi Phase 2 pantry auto-population from receipt photos.

    Real pipeline:
        1. DolphinOCR produces an ImageFrame with table rows and text blocks
        2. ReceiptParser identifies the items section (skip header/footer/totals)
        3. Per-row NLP extracts name, quantity, unit, price
        4. Optional: barcode lookup if any barcode elements present
    """

    def extract(self, frame: "ImageFrame") -> list[LineItem]:  # type: ignore[name-defined]
        raise NotImplementedError(
            "ReceiptParser.extract() is not yet implemented. "
            "Tracking: Kiwi Phase 2 / cf-vision#TBD"
        )