kiwi/app/services/openfoodfacts.py
pyr0ball 0de6182f48 feat(scan): barcode miss fallback chain — Open Beauty Facts + Open Products Facts
When a barcode is not found in Open Food Facts, the service now tries
Open Beauty Facts and Open Products Facts before giving up. All three
share the same API format; only the host URL differs.

When all databases miss, the scan endpoint sets needs_manual_entry=true
in the result. The frontend detects this, shows a calm informational
message, and switches to manual entry mode automatically.

Also fixes a latent bug where not-found scans showed 'Added: item to
pantry' due to the success condition checking barcodes_found (always 1)
instead of added_to_inventory.

Closes #65
2026-04-16 08:30:49 -07:00

279 lines
9.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
OpenFoodFacts API integration service.
This module provides functionality to look up product information
from the OpenFoodFacts database using barcodes (UPC/EAN).
"""
import httpx
from typing import Optional, Dict, Any
from app.core.config import settings
import logging
logger = logging.getLogger(__name__)
class OpenFoodFactsService:
"""
Service for interacting with the Open*Facts family of databases.
Primary: OpenFoodFacts (food products).
Fallback chain: Open Beauty Facts (personal care) → Open Products Facts (household).
All three databases share the same API path and JSON format.
"""
BASE_URL = "https://world.openfoodfacts.org/api/v2"
USER_AGENT = "Kiwi/0.1.0 (https://circuitforge.tech)"
# Fallback databases tried in order when OFFs returns no match.
# Same API format as OFFs — only the host differs.
_FALLBACK_DATABASES = [
"https://world.openbeautyfacts.org/api/v2",
"https://world.openproductsfacts.org/api/v2",
]
async def _lookup_in_database(self, barcode: str, base_url: str) -> Optional[Dict[str, Any]]:
"""Try one Open*Facts database. Returns parsed product dict or None."""
try:
async with httpx.AsyncClient() as client:
response = await client.get(
f"{base_url}/product/{barcode}.json",
headers={"User-Agent": self.USER_AGENT},
timeout=10.0,
)
if response.status_code == 404:
return None
response.raise_for_status()
data = response.json()
if data.get("status") != 1:
return None
return self._parse_product_data(data, barcode)
except httpx.HTTPError as e:
logger.debug("HTTP error for %s at %s: %s", barcode, base_url, e)
return None
except Exception as e:
logger.debug("Lookup failed for %s at %s: %s", barcode, base_url, e)
return None
async def lookup_product(self, barcode: str) -> Optional[Dict[str, Any]]:
"""
Look up a product by barcode, trying OFFs then fallback databases.
Args:
barcode: UPC/EAN barcode (8-13 digits)
Returns:
Dictionary with product information, or None if not found in any database.
"""
result = await self._lookup_in_database(barcode, self.BASE_URL)
if result:
return result
for db_url in self._FALLBACK_DATABASES:
result = await self._lookup_in_database(barcode, db_url)
if result:
logger.info("Barcode %s found in fallback database: %s", barcode, db_url)
return result
logger.info("Barcode %s not found in any Open*Facts database", barcode)
return None
def _parse_product_data(self, data: Dict[str, Any], barcode: str) -> Dict[str, Any]:
"""
Parse OpenFoodFacts API response into our product format.
Args:
data: Raw API response
barcode: Original barcode
Returns:
Parsed product dictionary
"""
product = data.get("product", {})
# Extract basic info
name = (
product.get("product_name")
or product.get("product_name_en")
or f"Unknown Product ({barcode})"
)
brand = product.get("brands", "").split(",")[0].strip() if product.get("brands") else None
# Categories (comma-separated string to list)
categories_str = product.get("categories", "")
categories = [c.strip() for c in categories_str.split(",") if c.strip()]
category = categories[0] if categories else None
# Description
description = product.get("generic_name") or product.get("generic_name_en")
# Image
image_url = product.get("image_url") or product.get("image_front_url")
# Nutrition data
nutrition_data = self._extract_nutrition_data(product)
# Allergens and dietary info
allergens = product.get("allergens_tags", [])
labels = product.get("labels_tags", [])
# Pack size detection: prefer explicit unit_count, fall back to serving count
pack_quantity, pack_unit = self._extract_pack_size(product)
return {
"name": name,
"brand": brand,
"category": category,
"categories": categories,
"description": description,
"image_url": image_url,
"nutrition_data": nutrition_data,
"allergens": allergens,
"labels": labels,
"pack_quantity": pack_quantity,
"pack_unit": pack_unit,
"raw_data": product, # Store full response for debugging
}
def _extract_pack_size(self, product: Dict[str, Any]) -> tuple[float | None, str | None]:
"""Return (quantity, unit) for multi-pack products, or (None, None).
OFFs fields tried in order:
1. `number_of_units` (explicit count, highest confidence)
2. `serving_quantity` + `product_quantity_unit` (e.g. 6 x 150g yoghurt)
3. Parse `quantity` string like "4 x 113 g" or "6 pack"
Returns None, None when data is absent, ambiguous, or single-unit.
"""
import re
# Field 1: explicit unit count
unit_count = product.get("number_of_units")
if unit_count:
try:
n = float(unit_count)
if n > 1:
return n, product.get("serving_size_unit") or "unit"
except (ValueError, TypeError):
pass
# Field 2: parse quantity string for "N x ..." pattern
qty_str = product.get("quantity", "")
if qty_str:
m = re.match(r"^(\d+(?:\.\d+)?)\s*[xX×]\s*", qty_str.strip())
if m:
n = float(m.group(1))
if n > 1:
# Try to get a sensible sub-unit label from the rest
rest = qty_str[m.end():].strip()
unit_label = re.sub(r"[\d.,\s]+", "", rest).strip()[:20] or "unit"
return n, unit_label
return None, None
def _extract_nutrition_data(self, product: Dict[str, Any]) -> Dict[str, Any]:
"""
Extract nutrition facts from product data.
Args:
product: Product data from OpenFoodFacts
Returns:
Dictionary of nutrition facts
"""
nutriments = product.get("nutriments", {})
# Extract common nutrients (per 100g)
nutrition = {}
# Energy
if "energy-kcal_100g" in nutriments:
nutrition["calories"] = nutriments["energy-kcal_100g"]
elif "energy_100g" in nutriments:
# Convert kJ to kcal (1 kcal = 4.184 kJ)
nutrition["calories"] = round(nutriments["energy_100g"] / 4.184, 1)
# Macronutrients
if "fat_100g" in nutriments:
nutrition["fat_g"] = nutriments["fat_100g"]
if "saturated-fat_100g" in nutriments:
nutrition["saturated_fat_g"] = nutriments["saturated-fat_100g"]
if "carbohydrates_100g" in nutriments:
nutrition["carbohydrates_g"] = nutriments["carbohydrates_100g"]
if "sugars_100g" in nutriments:
nutrition["sugars_g"] = nutriments["sugars_100g"]
if "fiber_100g" in nutriments:
nutrition["fiber_g"] = nutriments["fiber_100g"]
if "proteins_100g" in nutriments:
nutrition["protein_g"] = nutriments["proteins_100g"]
# Minerals
if "salt_100g" in nutriments:
nutrition["salt_g"] = nutriments["salt_100g"]
elif "sodium_100g" in nutriments:
# Convert sodium to salt (1g sodium = 2.5g salt)
nutrition["salt_g"] = round(nutriments["sodium_100g"] * 2.5, 2)
# Serving size
if "serving_size" in product:
nutrition["serving_size"] = product["serving_size"]
return nutrition
async def search_products(
self,
query: str,
page: int = 1,
page_size: int = 20
) -> Dict[str, Any]:
"""
Search for products by name in OpenFoodFacts.
Args:
query: Search query
page: Page number (1-indexed)
page_size: Number of results per page
Returns:
Dictionary with search results and metadata
"""
try:
async with httpx.AsyncClient() as client:
url = f"{self.BASE_URL}/search"
response = await client.get(
url,
params={
"search_terms": query,
"page": page,
"page_size": page_size,
"json": 1,
},
headers={"User-Agent": self.USER_AGENT},
timeout=10.0,
)
response.raise_for_status()
data = response.json()
products = [
self._parse_product_data({"product": p}, p.get("code", ""))
for p in data.get("products", [])
]
return {
"products": products,
"count": data.get("count", 0),
"page": data.get("page", page),
"page_size": data.get("page_size", page_size),
}
except Exception as e:
logger.error(f"Error searching OpenFoodFacts: {e}")
return {
"products": [],
"count": 0,
"page": page,
"page_size": page_size,
}