kiwi/app/services/expiration_predictor.py

450 lines
21 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Expiration Date Prediction Service.
Predicts expiration dates for food items based on category and storage location.
Fast path: deterministic lookup table (USDA FoodKeeper / FDA guidelines).
Fallback path: LLMRouter — only fires for unknown products when tier allows it
and a LLM backend is configured.
"""
import logging
import re
from datetime import date, timedelta
from typing import Optional, List
from circuitforge_core.llm.router import LLMRouter
from app.tiers import can_use
logger = logging.getLogger(__name__)
class ExpirationPredictor:
"""Predict expiration dates based on product category and storage location."""
# Canonical location names and their aliases.
# All location strings are normalised through this before table lookup.
LOCATION_ALIASES: dict[str, str] = {
'garage_freezer': 'freezer',
'chest_freezer': 'freezer',
'deep_freezer': 'freezer',
'upright_freezer': 'freezer',
'refrigerator': 'fridge',
'frig': 'fridge',
'cupboard': 'cabinet',
'shelf': 'pantry',
'counter': 'pantry',
}
# When a category has no entry for the requested location, try these
# alternatives in order — prioritising same-temperature storage first.
LOCATION_FALLBACK: dict[str, tuple[str, ...]] = {
'freezer': ('freezer', 'fridge', 'pantry', 'cabinet'),
'fridge': ('fridge', 'pantry', 'cabinet', 'freezer'),
'pantry': ('pantry', 'cabinet', 'fridge', 'freezer'),
'cabinet': ('cabinet', 'pantry', 'fridge', 'freezer'),
}
# Default shelf life in days by category and location
# Sources: USDA FoodKeeper app, FDA guidelines
SHELF_LIFE = {
# Dairy
'dairy': {'fridge': 7, 'freezer': 90},
'milk': {'fridge': 7, 'freezer': 90},
'cheese': {'fridge': 21, 'freezer': 180},
'yogurt': {'fridge': 14, 'freezer': 60},
'butter': {'fridge': 30, 'freezer': 365},
'cream': {'fridge': 5, 'freezer': 60},
# Meat & Poultry
'meat': {'fridge': 3, 'freezer': 180},
'beef': {'fridge': 3, 'freezer': 270},
'pork': {'fridge': 3, 'freezer': 180},
'lamb': {'fridge': 3, 'freezer': 270},
'poultry': {'fridge': 2, 'freezer': 270},
'chicken': {'fridge': 2, 'freezer': 270},
'turkey': {'fridge': 2, 'freezer': 270},
'tempeh': {'fridge': 10, 'freezer': 365},
'tofu': {'fridge': 5, 'freezer': 180},
'ground_meat': {'fridge': 2, 'freezer': 120},
# Seafood
'fish': {'fridge': 2, 'freezer': 180},
'seafood': {'fridge': 2, 'freezer': 180},
'shrimp': {'fridge': 2, 'freezer': 180},
'salmon': {'fridge': 2, 'freezer': 180},
# Eggs
'eggs': {'fridge': 35, 'freezer': None},
# Produce
'vegetables': {'fridge': 7, 'pantry': 5, 'freezer': 270},
'fruits': {'fridge': 7, 'pantry': 5, 'freezer': 365},
'leafy_greens': {'fridge': 5, 'freezer': 270},
'berries': {'fridge': 5, 'freezer': 270},
'apples': {'fridge': 30, 'pantry': 14},
'bananas': {'pantry': 5, 'fridge': 7},
'citrus': {'fridge': 21, 'pantry': 7},
# Bread & Bakery
'bread': {'pantry': 5, 'freezer': 90},
'bakery': {'pantry': 3, 'fridge': 7, 'freezer': 90},
# Frozen
'frozen_foods': {'freezer': 180, 'fridge': 3},
'frozen_vegetables': {'freezer': 270, 'fridge': 4},
'frozen_fruit': {'freezer': 365, 'fridge': 4},
'ice_cream': {'freezer': 60},
# Pantry Staples
'canned_goods': {'pantry': 730, 'cabinet': 730},
'dry_goods': {'pantry': 365, 'cabinet': 365},
'pasta': {'pantry': 730, 'cabinet': 730},
'rice': {'pantry': 730, 'cabinet': 730},
'flour': {'pantry': 180, 'cabinet': 180},
'sugar': {'pantry': 730, 'cabinet': 730},
'cereal': {'pantry': 180, 'cabinet': 180},
'chips': {'pantry': 90, 'cabinet': 90},
'cookies': {'pantry': 90, 'cabinet': 90},
# Condiments
'condiments': {'fridge': 90, 'pantry': 180},
'ketchup': {'fridge': 180, 'pantry': 365},
'mustard': {'fridge': 365, 'pantry': 365},
'mayo': {'fridge': 60, 'pantry': 180},
'salad_dressing': {'fridge': 90, 'pantry': 180},
'soy_sauce': {'fridge': 730, 'pantry': 730},
# Beverages
'beverages': {'fridge': 14, 'pantry': 180},
'juice': {'fridge': 7, 'freezer': 90},
'soda': {'fridge': 270, 'pantry': 270},
'water': {'fridge': 365, 'pantry': 365},
# Other
'deli_meat': {'fridge': 5, 'freezer': 60},
'leftovers': {'fridge': 4, 'freezer': 90},
'prepared_foods': {'fridge': 4, 'freezer': 90},
}
# Keyword lists are checked in declaration order — most specific first.
# Rules:
# - canned/processed goods BEFORE raw-meat terms (canned chicken != raw chicken)
# - frozen prepared foods BEFORE generic protein terms
# - multi-word phrases before single words where ambiguity exists
CATEGORY_KEYWORDS = {
# ── Frozen prepared foods ─────────────────────────────────────────────
# Before raw protein entries so plant-based frozen products don't
# inherit 23 day raw-meat shelf lives.
'ice_cream': ['ice cream', 'gelato', 'frozen yogurt', 'sorbet', 'sherbet'],
'frozen_fruit': [
'frozen berries', 'frozen mango', 'frozen strawberries',
'frozen blueberries', 'frozen raspberries', 'frozen peaches',
'frozen fruit', 'frozen cherries',
],
'frozen_vegetables': [
'frozen veg', 'frozen corn', 'frozen peas', 'frozen broccoli',
'frozen spinach', 'frozen edamame', 'frozen green beans',
'frozen mixed vegetables', 'frozen carrots',
'peas & carrots', 'peas and carrots', 'mixed vegetables',
'spring rolls', 'vegetable spring rolls',
],
'frozen_foods': [
'plant-based', 'plant based', 'meatless', 'impossible',
"chik'n", 'chikn', 'veggie burger', 'veggie patty',
'nugget', 'tater tot', 'waffle fries', 'hash brown',
'onion ring', 'fish stick', 'fish fillet', 'potsticker',
'dumpling', 'egg roll', 'empanada', 'tamale', 'falafel',
'mac & cheese bite', 'cauliflower wing', 'ranchero potato',
],
# ── Canned / shelf-stable processed goods ─────────────────────────────
# Before raw protein keywords so "canned chicken", "cream of chicken",
# and "lentil soup" resolve here rather than to raw chicken/cream.
'canned_goods': [
'canned', 'can of', 'tin of', 'tinned',
'cream of ', 'condensed soup', 'condensed cream',
'baked beans', 'refried beans',
'canned beans', 'canned tomatoes', 'canned corn', 'canned peas',
'canned soup', 'canned tuna', 'canned salmon', 'canned chicken',
'canned fruit', 'canned peaches', 'canned pears',
'enchilada sauce', 'tomato sauce', 'tomato paste',
'lentil soup', 'bean soup', 'chicken noodle soup',
],
# ── Condiments & brined items ─────────────────────────────────────────
# Before produce/protein terms so brined olives, jarred peppers, etc.
# don't inherit raw vegetable shelf lives.
'ketchup': ['ketchup', 'catsup'],
'mustard': ['mustard', 'dijon', 'dijion', 'stoneground mustard'],
'mayo': ['mayo', 'mayonnaise', 'miracle whip'],
'soy_sauce': ['soy sauce', 'tamari', 'shoyu'],
'salad_dressing': ['salad dressing', 'ranch', 'italian dressing', 'vinaigrette'],
'condiments': [
# brined / jarred items
'dill chips', 'hamburger chips', 'gherkin',
'olive', 'capers', 'jalapeño', 'jalapeno', 'pepperoncini',
'pimiento', 'banana pepper', 'cornichon',
# sauces
'hot sauce', 'hot pepper sauce', 'sriracha', 'cholula',
'worcestershire', 'barbecue sauce', 'bbq sauce',
'chipotle sauce', 'chipotle mayo', 'chipotle creamy',
'salsa', 'chutney', 'relish',
'teriyaki', 'hoisin', 'oyster sauce', 'fish sauce',
'miso', 'ssamjang', 'gochujang', 'doenjang',
'soybean paste', 'fermented soybean',
# nut butters / spreads
'peanut butter', 'almond butter', 'tahini', 'hummus',
# seasoning mixes
'seasoning', 'spice blend', 'borracho',
# other shelf-stable sauces
'yuzu', 'ponzu', 'lizano',
],
# ── Soy / fermented proteins ──────────────────────────────────────────
'tempeh': ['tempeh'],
'tofu': ['tofu', 'bean curd'],
# ── Dairy ─────────────────────────────────────────────────────────────
'milk': ['milk', 'whole milk', '2% milk', 'skim milk', 'almond milk', 'oat milk', 'soy milk'],
'cheese': ['cheese', 'cheddar', 'mozzarella', 'swiss', 'parmesan', 'feta', 'gouda', 'velveeta'],
'yogurt': ['yogurt', 'greek yogurt', 'yoghurt'],
'butter': ['butter', 'margarine'],
# Bare 'cream' removed — "cream of X" is canned_goods (matched above).
'cream': ['heavy cream', 'whipping cream', 'sour cream', 'crème fraîche',
'cream cheese', 'whipped topping', 'whipped cream'],
'eggs': ['eggs', 'egg'],
# ── Raw proteins ──────────────────────────────────────────────────────
# After canned/frozen so "canned chicken" is already resolved above.
'salmon': ['salmon'],
'shrimp': ['shrimp', 'prawns'],
'fish': ['fish', 'cod', 'tilapia', 'halibut', 'pollock'],
# Specific chicken cuts only — bare 'chicken' handled in generic fallback
'chicken': ['chicken breast', 'chicken thigh', 'chicken wings', 'chicken leg',
'whole chicken', 'rotisserie chicken', 'raw chicken'],
'turkey': ['turkey breast', 'whole turkey'],
'ground_meat': ['ground beef', 'ground pork', 'ground chicken', 'ground turkey',
'ground lamb', 'ground bison'],
'pork': ['pork', 'bacon', 'ham', 'pork chop', 'pork loin'],
'beef': ['beef', 'steak', 'brisket', 'ribeye', 'sirloin', 'roast beef'],
'deli_meat': ['deli', 'sliced turkey', 'sliced ham', 'lunch meat', 'cold cuts',
'prosciutto', 'salami', 'pepperoni'],
# ── Produce ───────────────────────────────────────────────────────────
'leafy_greens': ['lettuce', 'spinach', 'kale', 'arugula', 'mixed greens'],
'berries': ['strawberries', 'blueberries', 'raspberries', 'blackberries'],
'apples': ['apple', 'apples'],
'bananas': ['banana', 'bananas'],
'citrus': ['orange', 'lemon', 'lime', 'grapefruit', 'tangerine'],
# ── Bakery ────────────────────────────────────────────────────────────
'bakery': [
'muffin', 'croissant', 'donut', 'danish', 'puff pastry', 'pastry puff',
'cinnamon roll', 'dinner roll', 'parkerhouse roll', 'scone',
],
'bread': ['bread', 'loaf', 'baguette', 'bagel', 'bun', 'pita', 'naan',
'english muffin', 'sourdough'],
# ── Dry pantry staples ────────────────────────────────────────────────
'pasta': ['pasta', 'spaghetti', 'penne', 'macaroni', 'noodles', 'couscous', 'orzo'],
'rice': ['rice', 'brown rice', 'white rice', 'jasmine rice', 'basmati',
'spanish rice', 'rice mix'],
'cereal': ['cereal', 'granola', 'oatmeal'],
'chips': ['chips', 'crisps', 'tortilla chips', 'pretzel', 'popcorn'],
'cookies': ['cookies', 'biscuits', 'crackers', 'graham cracker', 'wafer'],
# ── Beverages ─────────────────────────────────────────────────────────
'juice': ['juice', 'orange juice', 'apple juice', 'lemonade'],
'soda': ['soda', 'cola', 'sprite', 'pepsi', 'coke', 'carbonated soft drink'],
}
def __init__(self) -> None:
self._router: Optional[LLMRouter] = None
try:
self._router = LLMRouter()
except FileNotFoundError:
logger.debug("LLM config not found — expiry LLM fallback disabled")
except Exception as e:
logger.warning("LLMRouter init failed (%s) — expiry LLM fallback disabled", e)
# ── Public API ────────────────────────────────────────────────────────────
def predict_expiration(
self,
category: Optional[str],
location: str,
purchase_date: Optional[date] = None,
product_name: Optional[str] = None,
tier: str = "free",
has_byok: bool = False,
) -> Optional[date]:
"""
Predict expiration date.
Fast path: deterministic lookup table.
Fallback: LLM query when table has no match, tier allows it, and a
backend is configured. Returns None rather than crashing if
inference fails.
"""
if not purchase_date:
purchase_date = date.today()
days = self._lookup_days(category, location)
if days is None and product_name and self._router and can_use("expiry_llm_matching", tier, has_byok):
days = self._llm_predict_days(product_name, category, location)
if days is None:
return None
return purchase_date + timedelta(days=days)
def get_category_from_product(
self,
product_name: str,
product_category: Optional[str] = None,
tags: Optional[List[str]] = None,
location: Optional[str] = None,
) -> Optional[str]:
"""Determine category from product name, existing category, and tags.
location is used as a last-resort hint: unknown items in the freezer
default to frozen_foods rather than dry_goods.
"""
if product_category:
cat = product_category.lower().strip()
if cat in self.SHELF_LIFE:
return cat
for key in self.SHELF_LIFE:
if key in cat or cat in key:
return key
if tags:
for tag in tags:
t = tag.lower().strip()
if t in self.SHELF_LIFE:
return t
name = product_name.lower().strip()
for category, keywords in self.CATEGORY_KEYWORDS.items():
if any(kw in name for kw in keywords):
return category
# Generic single-word fallbacks — checked after the keyword dict so
# multi-word phrases (e.g. "canned chicken") already matched above.
for words, fallback in [
(['frozen'], 'frozen_foods'),
(['canned', 'tinned'], 'canned_goods'),
# bare 'chicken' / 'sausage' / 'ham' kept here so raw-meat names
# that don't appear in the specific keyword lists still resolve.
(['chicken', 'turkey'], 'poultry'),
(['sausage', 'ham', 'bacon'], 'pork'),
(['beef', 'steak'], 'beef'),
(['meat', 'pork'], 'meat'),
(['vegetable', 'veggie', 'produce'], 'vegetables'),
(['fruit'], 'fruits'),
(['dairy'], 'dairy'),
]:
if any(w in name for w in words):
return fallback
# Location-aware final fallback: unknown item in a freezer → frozen_foods.
# This handles unlabelled frozen products (e.g. "Birthday Littles",
# "Pulled BBQ Crumbles") without requiring every brand name to be listed.
canon_loc = self._normalize_location(location or '')
if canon_loc == 'freezer':
return 'frozen_foods'
return 'dry_goods'
def get_shelf_life_info(self, category: str, location: str) -> Optional[int]:
"""Shelf life in days for a given category + location, or None."""
return self._lookup_days(category, location)
def list_categories(self) -> List[str]:
return list(self.SHELF_LIFE.keys())
def list_locations(self) -> List[str]:
locations: set[str] = set()
for shelf_life in self.SHELF_LIFE.values():
locations.update(shelf_life.keys())
return sorted(locations)
# ── Private helpers ───────────────────────────────────────────────────────
def _normalize_location(self, location: str) -> str:
"""Resolve location aliases to canonical names."""
loc = location.lower().strip()
return self.LOCATION_ALIASES.get(loc, loc)
def _lookup_days(self, category: Optional[str], location: str) -> Optional[int]:
"""Pure deterministic lookup — no I/O.
Normalises location aliases (e.g. garage_freezer → freezer) and uses
a context-aware fallback order so pantry items don't accidentally get
fridge shelf-life and vice versa.
"""
if not category:
return None
cat = category.lower().strip()
if cat not in self.SHELF_LIFE:
for key in self.SHELF_LIFE:
if key in cat or cat in key:
cat = key
break
else:
return None
canon_loc = self._normalize_location(location)
shelf = self.SHELF_LIFE[cat]
# Try the canonical location first, then work through the
# context-aware fallback chain for that location type.
fallback_order = self.LOCATION_FALLBACK.get(
canon_loc, (canon_loc, 'pantry', 'fridge', 'cabinet', 'freezer')
)
for loc in fallback_order:
days = shelf.get(loc)
if days is not None:
return days
return None
def _llm_predict_days(
self,
product_name: str,
category: Optional[str],
location: str,
) -> Optional[int]:
"""
Ask the LLM how many days this product keeps in the given location.
TODO: Fill in the prompts below. Good prompts should:
- Give enough context for the LLM to reason about food safety
- Specify output format clearly (just an integer — nothing else)
- Err conservative (shorter shelf life) when uncertain
- Stay concise — this fires on every unknown barcode scan
Parameters available:
product_name — e.g. "Trader Joe's Organic Tempeh"
category — best-guess from get_category_from_product(), may be None
location — "fridge" | "freezer" | "pantry" | "cabinet"
"""
assert self._router is not None
system = (
"You are a food safety expert. Given a food product name, an optional "
"category hint, and a storage location, respond with ONLY a single "
"integer: the number of days the product typically remains safe to eat "
"from purchase when stored as specified. No explanation, no units, no "
"punctuation — just the integer. When uncertain, give the conservative "
"(shorter) estimate."
)
parts = [f"Product: {product_name}"]
if category:
parts.append(f"Category: {category}")
parts.append(f"Storage location: {location}")
parts.append("Days until expiry from purchase:")
prompt = "\n".join(parts)
try:
raw = self._router.complete(prompt, system=system, max_tokens=16)
match = re.search(r'\b(\d+)\b', raw)
if match:
days = int(match.group(1))
# Sanity cap: >5 years is implausible for a perishable unknown to
# the deterministic table. If the LLM returns something absurd,
# fall back to None rather than storing a misleading date.
if days > 1825:
logger.warning(
"LLM returned implausible shelf life (%d days) for %r — discarding",
days, product_name,
)
return None
logger.debug(
"LLM shelf life for %r in %s: %d days", product_name, location, days
)
return days
except Exception as e:
logger.warning("LLM expiry prediction failed for %r: %s", product_name, e)
return None