"""Visual label capture service for unenriched products (kiwi#79). Wraps the cf-core VisionRouter to extract structured nutrition data from a photographed nutrition facts panel. When the VisionRouter is not yet wired (NotImplementedError) the service falls back to a mock extraction so the barcode scan flow can be exercised end-to-end in development. JSON contract returned by the vision model (and mock): { "product_name": str | null, "brand": str | null, "serving_size_g": number | null, "calories": number | null, "fat_g": number | null, "saturated_fat_g": number | null, "carbs_g": number | null, "sugar_g": number | null, "fiber_g": number | null, "protein_g": number | null, "sodium_mg": number | null, "ingredient_names": [str], "allergens": [str], "confidence": number (0.0–1.0) } """ from __future__ import annotations import json import logging import os from typing import Any log = logging.getLogger(__name__) # Confidence below this threshold surfaces amber highlights in the UI. REVIEW_THRESHOLD = 0.7 _MOCK_EXTRACTION: dict[str, Any] = { "product_name": "Unknown Product", "brand": None, "serving_size_g": None, "calories": None, "fat_g": None, "saturated_fat_g": None, "carbs_g": None, "sugar_g": None, "fiber_g": None, "protein_g": None, "sodium_mg": None, "ingredient_names": [], "allergens": [], "confidence": 0.0, } _EXTRACTION_PROMPT = """You are reading a nutrition facts label photograph. Extract the following fields as a JSON object with no extra text: { "product_name": , "brand": , "serving_size_g": , "calories": , "fat_g": , "saturated_fat_g": , "carbs_g": , "sugar_g": , "fiber_g": , "protein_g": , "sodium_mg": , "ingredient_names": [list of individual ingredients as strings], "allergens": [list of allergens explicitly stated on label], "confidence": } Use null for any field you cannot read clearly. Do not guess values. Respond with JSON only.""" def extract_label(image_bytes: bytes) -> dict[str, Any]: """Run vision model extraction on raw label image bytes. Returns a dict matching the nutrition JSON contract above. Falls back to a zero-confidence mock if the VisionRouter is not yet implemented (stub) or if the model returns unparseable output. """ # Allow unit tests to bypass the vision model entirely. if os.environ.get("KIWI_LABEL_CAPTURE_MOCK") == "1": log.debug("label_capture: mock mode active") return dict(_MOCK_EXTRACTION) try: from circuitforge_core.vision import caption as vision_caption result = vision_caption(image_bytes, prompt=_EXTRACTION_PROMPT) raw = result.caption or "" return _parse_extraction(raw) except Exception as exc: log.warning("label_capture: extraction failed (%s) — returning mock extraction", exc) return dict(_MOCK_EXTRACTION) def _parse_extraction(raw: str) -> dict[str, Any]: """Parse the JSON string returned by the vision model. Strips markdown code fences if present. Validates required shape. Returns the mock on any parse error. """ text = raw.strip() if text.startswith("```"): # Strip ```json ... ``` fences lines = text.splitlines() text = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:]) try: data = json.loads(text) except json.JSONDecodeError as exc: log.warning("label_capture: could not parse vision response: %s", exc) return dict(_MOCK_EXTRACTION) if not isinstance(data, dict): log.warning("label_capture: vision response is not a dict") return dict(_MOCK_EXTRACTION) # Normalise list fields — model may return None instead of [] for list_key in ("ingredient_names", "allergens"): if not isinstance(data.get(list_key), list): data[list_key] = [] # Clamp confidence to [0, 1] confidence = data.get("confidence") if not isinstance(confidence, (int, float)): confidence = 0.0 data["confidence"] = max(0.0, min(1.0, float(confidence))) return data def needs_review(extraction: dict[str, Any]) -> bool: """Return True when the extraction confidence is below REVIEW_THRESHOLD.""" return float(extraction.get("confidence", 0.0)) < REVIEW_THRESHOLD