kiwi/app/services/openfoodfacts.py
pyr0ball 9a277f9b42 fix: barcode scan performance + timeout + success message
- Refactor _lookup_in_database to accept a shared httpx.AsyncClient so
  all three Open*Facts database attempts reuse one TLS connection instead
  of opening a new one per call; restores pre-fallback scan speed
- Increase recipe suggest timeout to 120s (was 30s) to survive cf-orch
  model cold-start on first request of a session
- Include product brand in barcode scan success message so the user can
  clearly see what was found (e.g. "Added: Cheerios (General Mills) to pantry")
2026-04-16 09:57:53 -07:00

284 lines
9.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
OpenFoodFacts API integration service.
This module provides functionality to look up product information
from the OpenFoodFacts database using barcodes (UPC/EAN).
"""
import httpx
from typing import Optional, Dict, Any
from app.core.config import settings
import logging
logger = logging.getLogger(__name__)
class OpenFoodFactsService:
"""
Service for interacting with the Open*Facts family of databases.
Primary: OpenFoodFacts (food products).
Fallback chain: Open Beauty Facts (personal care) → Open Products Facts (household).
All three databases share the same API path and JSON format.
"""
BASE_URL = "https://world.openfoodfacts.org/api/v2"
USER_AGENT = "Kiwi/0.1.0 (https://circuitforge.tech)"
# Fallback databases tried in order when OFFs returns no match.
# Same API format as OFFs — only the host differs.
_FALLBACK_DATABASES = [
"https://world.openbeautyfacts.org/api/v2",
"https://world.openproductsfacts.org/api/v2",
]
async def _lookup_in_database(
self, barcode: str, base_url: str, client: httpx.AsyncClient
) -> Optional[Dict[str, Any]]:
"""Try one Open*Facts database using an existing client. Returns parsed product dict or None."""
try:
response = await client.get(
f"{base_url}/product/{barcode}.json",
headers={"User-Agent": self.USER_AGENT},
timeout=10.0,
)
if response.status_code == 404:
return None
response.raise_for_status()
data = response.json()
if data.get("status") != 1:
return None
return self._parse_product_data(data, barcode)
except httpx.HTTPError as e:
logger.debug("HTTP error for %s at %s: %s", barcode, base_url, e)
return None
except Exception as e:
logger.debug("Lookup failed for %s at %s: %s", barcode, base_url, e)
return None
async def lookup_product(self, barcode: str) -> Optional[Dict[str, Any]]:
"""
Look up a product by barcode, trying OFFs then fallback databases.
A single httpx.AsyncClient is created for the whole lookup chain so that
connection pooling and TLS session reuse apply across all database attempts.
Args:
barcode: UPC/EAN barcode (8-13 digits)
Returns:
Dictionary with product information, or None if not found in any database.
"""
async with httpx.AsyncClient() as client:
result = await self._lookup_in_database(barcode, self.BASE_URL, client)
if result:
return result
for db_url in self._FALLBACK_DATABASES:
result = await self._lookup_in_database(barcode, db_url, client)
if result:
logger.info("Barcode %s found in fallback database: %s", barcode, db_url)
return result
logger.info("Barcode %s not found in any Open*Facts database", barcode)
return None
def _parse_product_data(self, data: Dict[str, Any], barcode: str) -> Dict[str, Any]:
"""
Parse OpenFoodFacts API response into our product format.
Args:
data: Raw API response
barcode: Original barcode
Returns:
Parsed product dictionary
"""
product = data.get("product", {})
# Extract basic info
name = (
product.get("product_name")
or product.get("product_name_en")
or f"Unknown Product ({barcode})"
)
brand = product.get("brands", "").split(",")[0].strip() if product.get("brands") else None
# Categories (comma-separated string to list)
categories_str = product.get("categories", "")
categories = [c.strip() for c in categories_str.split(",") if c.strip()]
category = categories[0] if categories else None
# Description
description = product.get("generic_name") or product.get("generic_name_en")
# Image
image_url = product.get("image_url") or product.get("image_front_url")
# Nutrition data
nutrition_data = self._extract_nutrition_data(product)
# Allergens and dietary info
allergens = product.get("allergens_tags", [])
labels = product.get("labels_tags", [])
# Pack size detection: prefer explicit unit_count, fall back to serving count
pack_quantity, pack_unit = self._extract_pack_size(product)
return {
"name": name,
"brand": brand,
"category": category,
"categories": categories,
"description": description,
"image_url": image_url,
"nutrition_data": nutrition_data,
"allergens": allergens,
"labels": labels,
"pack_quantity": pack_quantity,
"pack_unit": pack_unit,
"raw_data": product, # Store full response for debugging
}
def _extract_pack_size(self, product: Dict[str, Any]) -> tuple[float | None, str | None]:
"""Return (quantity, unit) for multi-pack products, or (None, None).
OFFs fields tried in order:
1. `number_of_units` (explicit count, highest confidence)
2. `serving_quantity` + `product_quantity_unit` (e.g. 6 x 150g yoghurt)
3. Parse `quantity` string like "4 x 113 g" or "6 pack"
Returns None, None when data is absent, ambiguous, or single-unit.
"""
import re
# Field 1: explicit unit count
unit_count = product.get("number_of_units")
if unit_count:
try:
n = float(unit_count)
if n > 1:
return n, product.get("serving_size_unit") or "unit"
except (ValueError, TypeError):
pass
# Field 2: parse quantity string for "N x ..." pattern
qty_str = product.get("quantity", "")
if qty_str:
m = re.match(r"^(\d+(?:\.\d+)?)\s*[xX×]\s*", qty_str.strip())
if m:
n = float(m.group(1))
if n > 1:
# Try to get a sensible sub-unit label from the rest
rest = qty_str[m.end():].strip()
unit_label = re.sub(r"[\d.,\s]+", "", rest).strip()[:20] or "unit"
return n, unit_label
return None, None
def _extract_nutrition_data(self, product: Dict[str, Any]) -> Dict[str, Any]:
"""
Extract nutrition facts from product data.
Args:
product: Product data from OpenFoodFacts
Returns:
Dictionary of nutrition facts
"""
nutriments = product.get("nutriments", {})
# Extract common nutrients (per 100g)
nutrition = {}
# Energy
if "energy-kcal_100g" in nutriments:
nutrition["calories"] = nutriments["energy-kcal_100g"]
elif "energy_100g" in nutriments:
# Convert kJ to kcal (1 kcal = 4.184 kJ)
nutrition["calories"] = round(nutriments["energy_100g"] / 4.184, 1)
# Macronutrients
if "fat_100g" in nutriments:
nutrition["fat_g"] = nutriments["fat_100g"]
if "saturated-fat_100g" in nutriments:
nutrition["saturated_fat_g"] = nutriments["saturated-fat_100g"]
if "carbohydrates_100g" in nutriments:
nutrition["carbohydrates_g"] = nutriments["carbohydrates_100g"]
if "sugars_100g" in nutriments:
nutrition["sugars_g"] = nutriments["sugars_100g"]
if "fiber_100g" in nutriments:
nutrition["fiber_g"] = nutriments["fiber_100g"]
if "proteins_100g" in nutriments:
nutrition["protein_g"] = nutriments["proteins_100g"]
# Minerals
if "salt_100g" in nutriments:
nutrition["salt_g"] = nutriments["salt_100g"]
elif "sodium_100g" in nutriments:
# Convert sodium to salt (1g sodium = 2.5g salt)
nutrition["salt_g"] = round(nutriments["sodium_100g"] * 2.5, 2)
# Serving size
if "serving_size" in product:
nutrition["serving_size"] = product["serving_size"]
return nutrition
async def search_products(
self,
query: str,
page: int = 1,
page_size: int = 20
) -> Dict[str, Any]:
"""
Search for products by name in OpenFoodFacts.
Args:
query: Search query
page: Page number (1-indexed)
page_size: Number of results per page
Returns:
Dictionary with search results and metadata
"""
try:
async with httpx.AsyncClient() as client:
url = f"{self.BASE_URL}/search"
response = await client.get(
url,
params={
"search_terms": query,
"page": page,
"page_size": page_size,
"json": 1,
},
headers={"User-Agent": self.USER_AGENT},
timeout=10.0,
)
response.raise_for_status()
data = response.json()
products = [
self._parse_product_data({"product": p}, p.get("code", ""))
for p in data.get("products", [])
]
return {
"products": products,
"count": data.get("count", 0),
"page": data.get("page", page),
"page_size": data.get("page_size", page_size),
}
except Exception as e:
logger.error(f"Error searching OpenFoodFacts: {e}")
return {
"products": [],
"count": 0,
"page": page,
"page_size": page_size,
}