kiwi/app/services/expiration_predictor.py
pyr0ball e45b07c203 feat: expand secondary use windows + dietary constraint filter (kiwi#110)
Adds 10 new secondary use entries and corrects all 8 existing ones.
New: apples/soft, leafy_greens/wilting, tomatoes/soft, cooked_pasta/day-old,
cooked_potatoes/day-old, yogurt/tangy, cream/sour, wine/open,
cooked_beans/day-old, cooked_meat/leftover.

Corrections: milk uses (specific recipes, not 'baking'/'sauces'); dairy uses
expanded; cheese label well-aged→rind-ready with named dishes (minestrone,
ribollita); rice uses (onigiri, arancini, congee); tortillas warning added;
bakery uses and synonyms expanded to named pastries; bananas synonyms
(spotty/brown/black/mushy); rice synonyms (old rice).

New fields on every SECONDARY_WINDOW entry:
- discard_signs: qualitative cues for when the item has gone past its
  secondary window (shown in UI alongside uses)
- constraints_exclude: dietary labels that suppress the entry entirely
  (wine suppressed for halal/alcohol-free)

ExpirationPredictor.filter_secondary_by_constraints() applies constraint
suppression; _enrich_item() now accepts user_constraints and passes
secondary_discard_signs through to the API response.
2026-04-24 17:08:45 -07:00

714 lines
33 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Expiration Date Prediction Service.
Predicts expiration dates for food items based on category and storage location.
Fast path: deterministic lookup table (USDA FoodKeeper / FDA guidelines).
Fallback path: LLMRouter — only fires for unknown products when tier allows it
and a LLM backend is configured.
"""
import logging
import re
from datetime import date, timedelta
from typing import Optional, List
from circuitforge_core.llm.router import LLMRouter
from app.tiers import can_use
logger = logging.getLogger(__name__)
class ExpirationPredictor:
"""Predict expiration dates based on product category and storage location."""
# Canonical location names and their aliases.
# All location strings are normalised through this before table lookup.
LOCATION_ALIASES: dict[str, str] = {
'garage_freezer': 'freezer',
'chest_freezer': 'freezer',
'deep_freezer': 'freezer',
'upright_freezer': 'freezer',
'refrigerator': 'fridge',
'frig': 'fridge',
'cupboard': 'cabinet',
'shelf': 'pantry',
'counter': 'pantry',
}
# When a category has no entry for the requested location, try these
# alternatives in order — prioritising same-temperature storage first.
LOCATION_FALLBACK: dict[str, tuple[str, ...]] = {
'freezer': ('freezer', 'fridge', 'pantry', 'cabinet'),
'fridge': ('fridge', 'pantry', 'cabinet', 'freezer'),
'pantry': ('pantry', 'cabinet', 'fridge', 'freezer'),
'cabinet': ('cabinet', 'pantry', 'fridge', 'freezer'),
}
# Default shelf life in days by category and location
# Sources: USDA FoodKeeper app, FDA guidelines
SHELF_LIFE = {
# Dairy
'dairy': {'fridge': 7, 'freezer': 90},
'milk': {'fridge': 7, 'freezer': 90},
'cheese': {'fridge': 21, 'freezer': 180},
'yogurt': {'fridge': 14, 'freezer': 60},
'butter': {'fridge': 30, 'freezer': 365},
'cream': {'fridge': 5, 'freezer': 60},
# Meat & Poultry
'meat': {'fridge': 3, 'freezer': 180},
'beef': {'fridge': 3, 'freezer': 270},
'pork': {'fridge': 3, 'freezer': 180},
'lamb': {'fridge': 3, 'freezer': 270},
'poultry': {'fridge': 2, 'freezer': 270},
'chicken': {'fridge': 2, 'freezer': 270},
'turkey': {'fridge': 2, 'freezer': 270},
'tempeh': {'fridge': 10, 'freezer': 365},
'tofu': {'fridge': 5, 'freezer': 180},
'ground_meat': {'fridge': 2, 'freezer': 120},
# Seafood
'fish': {'fridge': 2, 'freezer': 180},
'seafood': {'fridge': 2, 'freezer': 180},
'shrimp': {'fridge': 2, 'freezer': 180},
'salmon': {'fridge': 2, 'freezer': 180},
# Eggs
'eggs': {'fridge': 35, 'freezer': None},
# Produce
'vegetables': {'fridge': 7, 'pantry': 5, 'freezer': 270},
'fruits': {'fridge': 7, 'pantry': 5, 'freezer': 365},
'leafy_greens': {'fridge': 5, 'freezer': 270},
'berries': {'fridge': 5, 'freezer': 270},
'apples': {'fridge': 30, 'pantry': 14},
'bananas': {'pantry': 5, 'fridge': 7},
'citrus': {'fridge': 21, 'pantry': 7},
# Bread & Bakery
'bread': {'pantry': 5, 'freezer': 90},
'bakery': {'pantry': 3, 'fridge': 7, 'freezer': 90},
# Frozen
'frozen_foods': {'freezer': 180, 'fridge': 3},
'frozen_vegetables': {'freezer': 270, 'fridge': 4},
'frozen_fruit': {'freezer': 365, 'fridge': 4},
'ice_cream': {'freezer': 60},
# Pantry Staples
'canned_goods': {'pantry': 730, 'cabinet': 730},
'dry_goods': {'pantry': 365, 'cabinet': 365},
'pasta': {'pantry': 730, 'cabinet': 730},
'rice': {'pantry': 730, 'cabinet': 730},
'flour': {'pantry': 180, 'cabinet': 180},
'sugar': {'pantry': 730, 'cabinet': 730},
'cereal': {'pantry': 180, 'cabinet': 180},
'chips': {'pantry': 90, 'cabinet': 90},
'cookies': {'pantry': 90, 'cabinet': 90},
# Condiments
'condiments': {'fridge': 90, 'pantry': 180},
'ketchup': {'fridge': 180, 'pantry': 365},
'mustard': {'fridge': 365, 'pantry': 365},
'mayo': {'fridge': 60, 'pantry': 180},
'salad_dressing': {'fridge': 90, 'pantry': 180},
'soy_sauce': {'fridge': 730, 'pantry': 730},
# Beverages
'beverages': {'fridge': 14, 'pantry': 180},
'juice': {'fridge': 7, 'freezer': 90},
'soda': {'fridge': 270, 'pantry': 270},
'water': {'fridge': 365, 'pantry': 365},
# Other
'deli_meat': {'fridge': 5, 'freezer': 60},
'leftovers': {'fridge': 4, 'freezer': 90},
'prepared_foods': {'fridge': 4, 'freezer': 90},
}
# Secondary shelf life in days after a package is opened.
# Sources: USDA FoodKeeper app, FDA consumer guides.
# Only categories where opening significantly shortens shelf life are listed.
# Items not listed default to None (no secondary window tracked).
SHELF_LIFE_AFTER_OPENING: dict[str, int] = {
# Dairy — once opened, clock ticks fast
'dairy': 5,
'milk': 5,
'cream': 3,
'yogurt': 7,
'cheese': 14,
'butter': 30,
# Condiments — refrigerated after opening
'condiments': 30,
'ketchup': 30,
'mustard': 30,
'mayo': 14,
'salad_dressing': 30,
'soy_sauce': 90,
# Canned goods — once opened, very short
'canned_goods': 4,
# Beverages
'juice': 7,
'soda': 4,
# Bread / Bakery
'bread': 5,
'bakery': 3,
# Produce
'leafy_greens': 3,
'berries': 3,
# Pantry staples (open bag)
'chips': 14,
'cookies': 14,
'cereal': 30,
'flour': 90,
}
# Post-expiry secondary use window.
# These are NOT spoilage extensions — they describe a qualitative state
# change where the ingredient is specifically suited for certain preparations.
# Sources: USDA FoodKeeper, food science, culinary tradition.
#
# Fields:
# window_days — days past nominal expiry still usable in secondary state
# label — short UI label for the state
# uses — recipe contexts suited to this state (shown in UI)
# warning — safety note, calm tone, None if none needed
# discard_signs — qualitative signs the item has gone past the secondary window
# constraints_exclude — dietary constraint labels that suppress this entry entirely
# (e.g. alcohol-containing items suppressed for halal/alcohol-free)
SECONDARY_WINDOW: dict[str, dict] = {
'bread': {
'window_days': 5,
'label': 'stale',
'uses': ['croutons', 'stuffing', 'bread pudding', 'French toast', 'panzanella'],
'warning': 'Check for mold before use — discard if any is visible.',
'discard_signs': 'Visible mold (any colour), or unpleasant smell beyond dry/yeasty.',
'constraints_exclude': [],
},
'bakery': {
'window_days': 3,
'label': 'day-old',
'uses': ['French toast', 'bread pudding', 'crumbles', 'trifle base', 'cake pops', 'streusel topping', 'bread crumbs'],
'warning': 'Check for mold before use — discard if any is visible.',
'discard_signs': 'Visible mold, sliminess, or strong sour smell.',
'constraints_exclude': [],
},
'bananas': {
'window_days': 5,
'label': 'overripe',
'uses': ['banana bread', 'smoothies', 'pancakes', 'muffins'],
'warning': None,
'discard_signs': 'Leaking liquid, fermented smell, or mold on skin.',
'constraints_exclude': [],
},
'milk': {
'window_days': 3,
'label': 'sour',
'uses': ['pancakes', 'scones', 'waffles', 'muffins', 'quick breads', 'béchamel', 'baked mac and cheese'],
'warning': 'Use only in cooked recipes — do not drink.',
'discard_signs': 'Chunky texture, strong unpleasant smell beyond tangy, or visible separation with grey colour.',
'constraints_exclude': [],
},
'dairy': {
'window_days': 2,
'label': 'sour',
'uses': ['pancakes', 'scones', 'quick breads', 'muffins', 'waffles'],
'warning': 'Use only in cooked recipes — do not drink.',
'discard_signs': 'Strong unpleasant smell, unusual colour, or chunky texture.',
'constraints_exclude': [],
},
'cheese': {
'window_days': 14,
'label': 'rind-ready',
'uses': ['parmesan broth', 'minestrone', 'ribollita', 'risotto', 'polenta', 'bean soups', 'gratins'],
'warning': None,
'discard_signs': 'Soft or wet texture on hard cheese, pink or black mold (white/green surface mold on hard cheese can be cut off with 1cm margin).',
'constraints_exclude': [],
},
'rice': {
'window_days': 2,
'label': 'day-old',
'uses': ['fried rice', 'onigiri', 'rice porridge', 'congee', 'arancini', 'stuffed peppers', 'rice fritters'],
'warning': 'Refrigerate immediately after cooking — do not leave at room temp.',
'discard_signs': 'Slimy texture, unusual smell, or more than 4 days since cooking.',
'constraints_exclude': [],
},
'tortillas': {
'window_days': 5,
'label': 'stale',
'uses': ['chilaquiles', 'migas', 'tortilla soup', 'casserole'],
'warning': 'Check for mold, especially if stored in a sealed bag — discard if any is visible.',
'discard_signs': 'Visible mold (check seams and edges), or strong sour smell.',
'constraints_exclude': [],
},
# ── New entries ──────────────────────────────────────────────────────
'apples': {
'window_days': 7,
'label': 'soft',
'uses': ['applesauce', 'apple butter', 'baked apples', 'apple crisp', 'smoothies', 'chutney'],
'warning': None,
'discard_signs': 'Large bruised areas with fermented smell, visible mold, or liquid leaking from skin.',
'constraints_exclude': [],
},
'leafy_greens': {
'window_days': 2,
'label': 'wilting',
'uses': ['sautéed greens', 'soups', 'smoothies', 'frittata', 'pasta add-in', 'stir fry'],
'warning': None,
'discard_signs': 'Slimy texture, strong unpleasant smell, or yellowed and mushy leaves.',
'constraints_exclude': [],
},
'tomatoes': {
'window_days': 4,
'label': 'soft',
'uses': ['roasted tomatoes', 'tomato sauce', 'shakshuka', 'bruschetta', 'soup', 'salsa'],
'warning': None,
'discard_signs': 'Broken skin with liquid pooling, mold, or fermented smell.',
'constraints_exclude': [],
},
'cooked_pasta': {
'window_days': 3,
'label': 'day-old',
'uses': ['pasta frittata', 'pasta salad', 'baked pasta', 'soup add-in', 'fried pasta cakes'],
'warning': 'Refrigerate within 2 hours of cooking.',
'discard_signs': 'Slimy texture, off smell, or more than 4 days since cooking.',
'constraints_exclude': [],
},
'cooked_potatoes': {
'window_days': 3,
'label': 'day-old',
'uses': ['potato pancakes', 'hash browns', 'potato soup', 'gnocchi', 'twice-baked potatoes', 'croquettes'],
'warning': 'Refrigerate within 2 hours of cooking.',
'discard_signs': 'Slimy texture, off smell, or more than 4 days since cooking.',
'constraints_exclude': [],
},
'yogurt': {
'window_days': 7,
'label': 'tangy',
'uses': ['marinades', 'flatbreads', 'smoothies', 'tzatziki', 'baked goods', 'salad dressings'],
'warning': None,
'discard_signs': 'Pink or orange discolouration, visible mold, or strongly unpleasant smell (not just tangy).',
'constraints_exclude': [],
},
'cream': {
'window_days': 2,
'label': 'sour',
'uses': ['soups', 'sauces', 'scones', 'quick breads', 'mashed potatoes'],
'warning': 'Use in cooked recipes only. Discard if the smell is strongly unpleasant rather than tangy.',
'discard_signs': 'Strong unpleasant smell beyond tangy, unusual colour, or chunky texture.',
'constraints_exclude': [],
},
'wine': {
'window_days': 4,
'label': 'open',
'uses': ['pan sauces', 'braises', 'risotto', 'marinades', 'poaching liquid', 'wine reduction'],
'warning': None,
'discard_signs': 'Strong vinegar smell (still usable in braises/marinades), or visible cloudiness with off-smell.',
'constraints_exclude': ['halal', 'alcohol-free'],
},
'cooked_beans': {
'window_days': 3,
'label': 'day-old',
'uses': ['refried beans', 'bean soup', 'bean fritters', 'hummus', 'bean dip', 'grain bowls'],
'warning': 'Refrigerate within 2 hours of cooking.',
'discard_signs': 'Slimy texture, off smell, or more than 4 days since cooking.',
'constraints_exclude': [],
},
'cooked_meat': {
'window_days': 2,
'label': 'leftover',
'uses': ['grain bowls', 'tacos', 'soups', 'fried rice', 'sandwiches', 'hash', 'pasta add-in'],
'warning': 'Refrigerate within 2 hours of cooking.',
'discard_signs': 'Off smell, slimy texture, or more than 34 days since cooking.',
'constraints_exclude': [],
},
}
def days_after_opening(self, category: str | None) -> int | None:
"""Return days of shelf life remaining once a package is opened.
Returns None if the category is unknown or not tracked after opening
(e.g. frozen items, raw meat — category check irrelevant once opened).
"""
if not category:
return None
return self.SHELF_LIFE_AFTER_OPENING.get(category.lower())
def secondary_state(
self, category: str | None, expiry_date: str | None
) -> dict | None:
"""Return secondary use info if the item is in its post-expiry secondary window.
Returns a dict with label, uses, warning, discard_signs, constraints_exclude,
days_past, and window_days when the item is past its nominal expiry date but
still within the secondary use window.
Returns None in all other cases (unknown category, no window defined, not yet
expired, or past the secondary window).
Callers should apply constraints_exclude against user dietary constraints
and suppress the result entirely if any excluded constraint is active.
See filter_secondary_by_constraints().
"""
if not category or not expiry_date:
return None
entry = self.SECONDARY_WINDOW.get(category.lower())
if not entry:
return None
try:
from datetime import date
today = date.today()
exp = date.fromisoformat(expiry_date)
days_past = (today - exp).days
if 0 <= days_past <= entry['window_days']:
return {
'label': entry['label'],
'uses': list(entry['uses']),
'warning': entry['warning'],
'discard_signs': entry.get('discard_signs'),
'constraints_exclude': list(entry.get('constraints_exclude') or []),
'days_past': days_past,
'window_days': entry['window_days'],
}
except ValueError:
pass
return None
@staticmethod
def filter_secondary_by_constraints(
sec: dict | None,
user_constraints: list[str],
) -> dict | None:
"""Suppress secondary state entirely if any excluded constraint is active.
Call after secondary_state() when user dietary constraints are available.
Returns sec unchanged when no constraints match, or None when suppressed.
"""
if sec is None:
return None
excluded = sec.get('constraints_exclude') or []
if any(c.lower() in [e.lower() for e in excluded] for c in user_constraints):
return None
return sec
# Keyword lists are checked in declaration order — most specific first.
# Rules:
# - canned/processed goods BEFORE raw-meat terms (canned chicken != raw chicken)
# - frozen prepared foods BEFORE generic protein terms
# - multi-word phrases before single words where ambiguity exists
CATEGORY_KEYWORDS = {
# ── Frozen prepared foods ─────────────────────────────────────────────
# Before raw protein entries so plant-based frozen products don't
# inherit 23 day raw-meat shelf lives.
'ice_cream': ['ice cream', 'gelato', 'frozen yogurt', 'sorbet', 'sherbet'],
'frozen_fruit': [
'frozen berries', 'frozen mango', 'frozen strawberries',
'frozen blueberries', 'frozen raspberries', 'frozen peaches',
'frozen fruit', 'frozen cherries',
],
'frozen_vegetables': [
'frozen veg', 'frozen corn', 'frozen peas', 'frozen broccoli',
'frozen spinach', 'frozen edamame', 'frozen green beans',
'frozen mixed vegetables', 'frozen carrots',
'peas & carrots', 'peas and carrots', 'mixed vegetables',
'spring rolls', 'vegetable spring rolls',
],
'frozen_foods': [
'plant-based', 'plant based', 'meatless', 'impossible',
"chik'n", 'chikn', 'veggie burger', 'veggie patty',
'nugget', 'tater tot', 'waffle fries', 'hash brown',
'onion ring', 'fish stick', 'fish fillet', 'potsticker',
'dumpling', 'egg roll', 'empanada', 'tamale', 'falafel',
'mac & cheese bite', 'cauliflower wing', 'ranchero potato',
],
# ── Canned / shelf-stable processed goods ─────────────────────────────
# Before raw protein keywords so "canned chicken", "cream of chicken",
# and "lentil soup" resolve here rather than to raw chicken/cream.
'canned_goods': [
'canned', 'can of', 'tin of', 'tinned',
'cream of ', 'condensed soup', 'condensed cream',
'baked beans', 'refried beans',
'canned beans', 'canned tomatoes', 'canned corn', 'canned peas',
'canned soup', 'canned tuna', 'canned salmon', 'canned chicken',
'canned fruit', 'canned peaches', 'canned pears',
'enchilada sauce', 'tomato sauce', 'tomato paste',
'lentil soup', 'bean soup', 'chicken noodle soup',
],
# ── Condiments & brined items ─────────────────────────────────────────
# Before produce/protein terms so brined olives, jarred peppers, etc.
# don't inherit raw vegetable shelf lives.
'ketchup': ['ketchup', 'catsup'],
'mustard': ['mustard', 'dijon', 'dijion', 'stoneground mustard'],
'mayo': ['mayo', 'mayonnaise', 'miracle whip'],
'soy_sauce': ['soy sauce', 'tamari', 'shoyu'],
'salad_dressing': ['salad dressing', 'ranch', 'italian dressing', 'vinaigrette'],
'condiments': [
# brined / jarred items
'dill chips', 'hamburger chips', 'gherkin',
'olive', 'capers', 'jalapeño', 'jalapeno', 'pepperoncini',
'pimiento', 'banana pepper', 'cornichon',
# sauces
'hot sauce', 'hot pepper sauce', 'sriracha', 'cholula',
'worcestershire', 'barbecue sauce', 'bbq sauce',
'chipotle sauce', 'chipotle mayo', 'chipotle creamy',
'salsa', 'chutney', 'relish',
'teriyaki', 'hoisin', 'oyster sauce', 'fish sauce',
'miso', 'ssamjang', 'gochujang', 'doenjang',
'soybean paste', 'fermented soybean',
# nut butters / spreads
'peanut butter', 'almond butter', 'tahini', 'hummus',
# seasoning mixes
'seasoning', 'spice blend', 'borracho',
# other shelf-stable sauces
'yuzu', 'ponzu', 'lizano',
],
# ── Soy / fermented proteins ──────────────────────────────────────────
'tempeh': ['tempeh'],
'tofu': ['tofu', 'bean curd'],
# ── Dairy ─────────────────────────────────────────────────────────────
'milk': ['milk', 'whole milk', '2% milk', 'skim milk', 'almond milk', 'oat milk', 'soy milk'],
'cheese': ['cheese', 'cheddar', 'mozzarella', 'swiss', 'parmesan', 'feta', 'gouda', 'velveeta'],
'yogurt': ['yogurt', 'greek yogurt', 'yoghurt'],
'butter': ['butter', 'margarine'],
# Bare 'cream' removed — "cream of X" is canned_goods (matched above).
'cream': ['heavy cream', 'whipping cream', 'sour cream', 'crème fraîche',
'cream cheese', 'whipped topping', 'whipped cream'],
'eggs': ['eggs', 'egg'],
# ── Raw proteins ──────────────────────────────────────────────────────
# After canned/frozen so "canned chicken" is already resolved above.
'salmon': ['salmon'],
'shrimp': ['shrimp', 'prawns'],
'fish': ['fish', 'cod', 'tilapia', 'halibut', 'pollock'],
# Specific chicken cuts only — bare 'chicken' handled in generic fallback
'chicken': ['chicken breast', 'chicken thigh', 'chicken wings', 'chicken leg',
'whole chicken', 'rotisserie chicken', 'raw chicken'],
'turkey': ['turkey breast', 'whole turkey'],
'ground_meat': ['ground beef', 'ground pork', 'ground chicken', 'ground turkey',
'ground lamb', 'ground bison'],
'pork': ['pork', 'bacon', 'ham', 'pork chop', 'pork loin'],
'beef': ['beef', 'steak', 'brisket', 'ribeye', 'sirloin', 'roast beef'],
'deli_meat': ['deli', 'sliced turkey', 'sliced ham', 'lunch meat', 'cold cuts',
'prosciutto', 'salami', 'pepperoni'],
# ── Produce ───────────────────────────────────────────────────────────
'leafy_greens': ['lettuce', 'spinach', 'kale', 'arugula', 'mixed greens'],
'berries': ['strawberries', 'blueberries', 'raspberries', 'blackberries'],
'apples': ['apple', 'apples'],
'bananas': ['banana', 'bananas'],
'citrus': ['orange', 'lemon', 'lime', 'grapefruit', 'tangerine'],
# ── Bakery ────────────────────────────────────────────────────────────
'bakery': [
'muffin', 'croissant', 'donut', 'danish', 'puff pastry', 'pastry puff',
'cinnamon roll', 'dinner roll', 'parkerhouse roll', 'scone',
],
'bread': ['bread', 'loaf', 'baguette', 'bagel', 'bun', 'pita', 'naan',
'english muffin', 'sourdough'],
# ── Dry pantry staples ────────────────────────────────────────────────
'pasta': ['pasta', 'spaghetti', 'penne', 'macaroni', 'noodles', 'couscous', 'orzo'],
'rice': ['rice', 'brown rice', 'white rice', 'jasmine rice', 'basmati',
'spanish rice', 'rice mix'],
'cereal': ['cereal', 'granola', 'oatmeal'],
'chips': ['chips', 'crisps', 'tortilla chips', 'pretzel', 'popcorn'],
'cookies': ['cookies', 'biscuits', 'crackers', 'graham cracker', 'wafer'],
# ── Beverages ─────────────────────────────────────────────────────────
'juice': ['juice', 'orange juice', 'apple juice', 'lemonade'],
'soda': ['soda', 'cola', 'sprite', 'pepsi', 'coke', 'carbonated soft drink'],
}
def __init__(self) -> None:
self._router: Optional[LLMRouter] = None
try:
self._router = LLMRouter()
except FileNotFoundError:
logger.debug("LLM config not found — expiry LLM fallback disabled")
except Exception as e:
logger.warning("LLMRouter init failed (%s) — expiry LLM fallback disabled", e)
# ── Public API ────────────────────────────────────────────────────────────
def predict_expiration(
self,
category: Optional[str],
location: str,
purchase_date: Optional[date] = None,
product_name: Optional[str] = None,
tier: str = "free",
has_byok: bool = False,
) -> Optional[date]:
"""
Predict expiration date.
Fast path: deterministic lookup table.
Fallback: LLM query when table has no match, tier allows it, and a
backend is configured. Returns None rather than crashing if
inference fails.
"""
if not purchase_date:
purchase_date = date.today()
days = self._lookup_days(category, location)
if days is None and product_name and self._router and can_use("expiry_llm_matching", tier, has_byok):
days = self._llm_predict_days(product_name, category, location)
if days is None:
return None
return purchase_date + timedelta(days=days)
def get_category_from_product(
self,
product_name: str,
product_category: Optional[str] = None,
tags: Optional[List[str]] = None,
location: Optional[str] = None,
) -> Optional[str]:
"""Determine category from product name, existing category, and tags.
location is used as a last-resort hint: unknown items in the freezer
default to frozen_foods rather than dry_goods.
"""
if product_category:
cat = product_category.lower().strip()
if cat in self.SHELF_LIFE:
return cat
for key in self.SHELF_LIFE:
if key in cat or cat in key:
return key
if tags:
for tag in tags:
t = tag.lower().strip()
if t in self.SHELF_LIFE:
return t
name = product_name.lower().strip()
for category, keywords in self.CATEGORY_KEYWORDS.items():
if any(kw in name for kw in keywords):
return category
# Generic single-word fallbacks — checked after the keyword dict so
# multi-word phrases (e.g. "canned chicken") already matched above.
for words, fallback in [
(['frozen'], 'frozen_foods'),
(['canned', 'tinned'], 'canned_goods'),
# bare 'chicken' / 'sausage' / 'ham' kept here so raw-meat names
# that don't appear in the specific keyword lists still resolve.
(['chicken', 'turkey'], 'poultry'),
(['sausage', 'ham', 'bacon'], 'pork'),
(['beef', 'steak'], 'beef'),
(['meat', 'pork'], 'meat'),
(['vegetable', 'veggie', 'produce'], 'vegetables'),
(['fruit'], 'fruits'),
(['dairy'], 'dairy'),
]:
if any(w in name for w in words):
return fallback
# Location-aware final fallback: unknown item in a freezer → frozen_foods.
# This handles unlabelled frozen products (e.g. "Birthday Littles",
# "Pulled BBQ Crumbles") without requiring every brand name to be listed.
canon_loc = self._normalize_location(location or '')
if canon_loc == 'freezer':
return 'frozen_foods'
return 'dry_goods'
def get_shelf_life_info(self, category: str, location: str) -> Optional[int]:
"""Shelf life in days for a given category + location, or None."""
return self._lookup_days(category, location)
def list_categories(self) -> List[str]:
return list(self.SHELF_LIFE.keys())
def list_locations(self) -> List[str]:
locations: set[str] = set()
for shelf_life in self.SHELF_LIFE.values():
locations.update(shelf_life.keys())
return sorted(locations)
# ── Private helpers ───────────────────────────────────────────────────────
def _normalize_location(self, location: str) -> str:
"""Resolve location aliases to canonical names."""
loc = location.lower().strip()
return self.LOCATION_ALIASES.get(loc, loc)
def _lookup_days(self, category: Optional[str], location: str) -> Optional[int]:
"""Pure deterministic lookup — no I/O.
Normalises location aliases (e.g. garage_freezer → freezer) and uses
a context-aware fallback order so pantry items don't accidentally get
fridge shelf-life and vice versa.
"""
if not category:
return None
cat = category.lower().strip()
if cat not in self.SHELF_LIFE:
for key in self.SHELF_LIFE:
if key in cat or cat in key:
cat = key
break
else:
return None
canon_loc = self._normalize_location(location)
shelf = self.SHELF_LIFE[cat]
# Try the canonical location first, then work through the
# context-aware fallback chain for that location type.
fallback_order = self.LOCATION_FALLBACK.get(
canon_loc, (canon_loc, 'pantry', 'fridge', 'cabinet', 'freezer')
)
for loc in fallback_order:
days = shelf.get(loc)
if days is not None:
return days
return None
def _llm_predict_days(
self,
product_name: str,
category: Optional[str],
location: str,
) -> Optional[int]:
"""
Ask the LLM how many days this product keeps in the given location.
TODO: Fill in the prompts below. Good prompts should:
- Give enough context for the LLM to reason about food safety
- Specify output format clearly (just an integer — nothing else)
- Err conservative (shorter shelf life) when uncertain
- Stay concise — this fires on every unknown barcode scan
Parameters available:
product_name — e.g. "Trader Joe's Organic Tempeh"
category — best-guess from get_category_from_product(), may be None
location — "fridge" | "freezer" | "pantry" | "cabinet"
"""
assert self._router is not None
system = (
"You are a food safety expert. Given a food product name, an optional "
"category hint, and a storage location, respond with ONLY a single "
"integer: the number of days the product typically remains safe to eat "
"from purchase when stored as specified. No explanation, no units, no "
"punctuation — just the integer. When uncertain, give the conservative "
"(shorter) estimate."
)
parts = [f"Product: {product_name}"]
if category:
parts.append(f"Category: {category}")
parts.append(f"Storage location: {location}")
parts.append("Days until expiry from purchase:")
prompt = "\n".join(parts)
try:
raw = self._router.complete(prompt, system=system, max_tokens=16)
match = re.search(r'\b(\d+)\b', raw)
if match:
days = int(match.group(1))
# Sanity cap: >5 years is implausible for a perishable unknown to
# the deterministic table. If the LLM returns something absurd,
# fall back to None rather than storing a misleading date.
if days > 1825:
logger.warning(
"LLM returned implausible shelf life (%d days) for %r — discarding",
days, product_name,
)
return None
logger.debug(
"LLM shelf life for %r in %s: %d days", product_name, location, days
)
return days
except Exception as e:
logger.warning("LLM expiry prediction failed for %r: %s", product_name, e)
return None