kiwi/app/services/expiration_predictor.py
pyr0ball c166e5216a chore: initial commit — kiwi Phase 2 complete
Pantry tracker app with:
- FastAPI backend + Vue 3 SPA frontend
- SQLite via circuitforge-core (migrations 001-005)
- Inventory CRUD, barcode scan, receipt OCR pipeline
- Expiry prediction (deterministic + LLM fallback)
- CF-core tier system integration
- Cloud session support (menagerie)
2026-03-30 22:20:48 -07:00

306 lines
13 KiB
Python

"""
Expiration Date Prediction Service.
Predicts expiration dates for food items based on category and storage location.
Fast path: deterministic lookup table (USDA FoodKeeper / FDA guidelines).
Fallback path: LLMRouter — only fires for unknown products when tier allows it
and a LLM backend is configured.
"""
import logging
import re
from datetime import date, timedelta
from typing import Optional, List
from circuitforge_core.llm.router import LLMRouter
from app.tiers import can_use
logger = logging.getLogger(__name__)
class ExpirationPredictor:
"""Predict expiration dates based on product category and storage location."""
# Default shelf life in days by category and location
# Sources: USDA FoodKeeper app, FDA guidelines
SHELF_LIFE = {
# Dairy
'dairy': {'fridge': 7, 'freezer': 90},
'milk': {'fridge': 7, 'freezer': 90},
'cheese': {'fridge': 21, 'freezer': 180},
'yogurt': {'fridge': 14, 'freezer': 60},
'butter': {'fridge': 30, 'freezer': 365},
'cream': {'fridge': 5, 'freezer': 60},
# Meat & Poultry
'meat': {'fridge': 3, 'freezer': 180},
'beef': {'fridge': 3, 'freezer': 270},
'pork': {'fridge': 3, 'freezer': 180},
'lamb': {'fridge': 3, 'freezer': 270},
'poultry': {'fridge': 2, 'freezer': 270},
'chicken': {'fridge': 2, 'freezer': 270},
'turkey': {'fridge': 2, 'freezer': 270},
'ground_meat': {'fridge': 2, 'freezer': 120},
# Seafood
'fish': {'fridge': 2, 'freezer': 180},
'seafood': {'fridge': 2, 'freezer': 180},
'shrimp': {'fridge': 2, 'freezer': 180},
'salmon': {'fridge': 2, 'freezer': 180},
# Eggs
'eggs': {'fridge': 35, 'freezer': None},
# Produce
'vegetables': {'fridge': 7, 'pantry': 5, 'freezer': 270},
'fruits': {'fridge': 7, 'pantry': 5, 'freezer': 365},
'leafy_greens': {'fridge': 5, 'freezer': 270},
'berries': {'fridge': 5, 'freezer': 270},
'apples': {'fridge': 30, 'pantry': 14},
'bananas': {'pantry': 5, 'fridge': 7},
'citrus': {'fridge': 21, 'pantry': 7},
# Bread & Bakery
'bread': {'pantry': 5, 'freezer': 90},
'bakery': {'pantry': 3, 'fridge': 7, 'freezer': 90},
# Frozen
'frozen_foods': {'freezer': 180},
'frozen_vegetables': {'freezer': 270},
'frozen_fruit': {'freezer': 365},
'ice_cream': {'freezer': 60},
# Pantry Staples
'canned_goods': {'pantry': 730, 'cabinet': 730},
'dry_goods': {'pantry': 365, 'cabinet': 365},
'pasta': {'pantry': 730, 'cabinet': 730},
'rice': {'pantry': 730, 'cabinet': 730},
'flour': {'pantry': 180, 'cabinet': 180},
'sugar': {'pantry': 730, 'cabinet': 730},
'cereal': {'pantry': 180, 'cabinet': 180},
'chips': {'pantry': 90, 'cabinet': 90},
'cookies': {'pantry': 90, 'cabinet': 90},
# Condiments
'condiments': {'fridge': 90, 'pantry': 180},
'ketchup': {'fridge': 180, 'pantry': 365},
'mustard': {'fridge': 365, 'pantry': 365},
'mayo': {'fridge': 60, 'pantry': 180},
'salad_dressing': {'fridge': 90, 'pantry': 180},
'soy_sauce': {'fridge': 730, 'pantry': 730},
# Beverages
'beverages': {'fridge': 14, 'pantry': 180},
'juice': {'fridge': 7, 'freezer': 90},
'soda': {'fridge': 270, 'pantry': 270},
'water': {'fridge': 365, 'pantry': 365},
# Other
'deli_meat': {'fridge': 5, 'freezer': 60},
'leftovers': {'fridge': 4, 'freezer': 90},
'prepared_foods': {'fridge': 4, 'freezer': 90},
}
CATEGORY_KEYWORDS = {
'milk': ['milk', 'whole milk', '2% milk', 'skim milk', 'almond milk', 'oat milk', 'soy milk'],
'cheese': ['cheese', 'cheddar', 'mozzarella', 'swiss', 'parmesan', 'feta', 'gouda'],
'yogurt': ['yogurt', 'greek yogurt', 'yoghurt'],
'butter': ['butter', 'margarine'],
'cream': ['cream', 'heavy cream', 'whipping cream', 'sour cream'],
'eggs': ['eggs', 'egg'],
'beef': ['beef', 'steak', 'roast', 'brisket', 'ribeye', 'sirloin'],
'pork': ['pork', 'bacon', 'ham', 'sausage', 'pork chop'],
'chicken': ['chicken', 'chicken breast', 'chicken thigh', 'chicken wings'],
'turkey': ['turkey', 'turkey breast', 'ground turkey'],
'ground_meat': ['ground beef', 'ground pork', 'ground chicken', 'hamburger'],
'fish': ['fish', 'cod', 'tilapia', 'halibut'],
'salmon': ['salmon'],
'shrimp': ['shrimp', 'prawns'],
'leafy_greens': ['lettuce', 'spinach', 'kale', 'arugula', 'mixed greens', 'salad'],
'berries': ['strawberries', 'blueberries', 'raspberries', 'blackberries'],
'apples': ['apple', 'apples'],
'bananas': ['banana', 'bananas'],
'citrus': ['orange', 'lemon', 'lime', 'grapefruit', 'tangerine'],
'bread': ['bread', 'loaf', 'baguette', 'roll', 'bagel', 'bun'],
'bakery': ['muffin', 'croissant', 'donut', 'danish', 'pastry'],
'deli_meat': ['deli', 'sliced turkey', 'sliced ham', 'lunch meat', 'cold cuts'],
'frozen_vegetables': ['frozen veg', 'frozen corn', 'frozen peas', 'frozen broccoli'],
'frozen_fruit': ['frozen berries', 'frozen mango', 'frozen strawberries'],
'ice_cream': ['ice cream', 'gelato', 'frozen yogurt'],
'pasta': ['pasta', 'spaghetti', 'penne', 'macaroni', 'noodles'],
'rice': ['rice', 'brown rice', 'white rice', 'jasmine'],
'cereal': ['cereal', 'granola', 'oatmeal'],
'chips': ['chips', 'crisps', 'tortilla chips'],
'cookies': ['cookies', 'biscuits', 'crackers'],
'ketchup': ['ketchup', 'catsup'],
'mustard': ['mustard'],
'mayo': ['mayo', 'mayonnaise', 'miracle whip'],
'salad_dressing': ['salad dressing', 'ranch', 'italian dressing', 'vinaigrette'],
'soy_sauce': ['soy sauce', 'tamari'],
'juice': ['juice', 'orange juice', 'apple juice'],
'soda': ['soda', 'pop', 'cola', 'sprite', 'pepsi', 'coke'],
}
def __init__(self) -> None:
self._router: Optional[LLMRouter] = None
try:
self._router = LLMRouter()
except FileNotFoundError:
logger.debug("LLM config not found — expiry LLM fallback disabled")
except Exception as e:
logger.warning("LLMRouter init failed (%s) — expiry LLM fallback disabled", e)
# ── Public API ────────────────────────────────────────────────────────────
def predict_expiration(
self,
category: Optional[str],
location: str,
purchase_date: Optional[date] = None,
product_name: Optional[str] = None,
tier: str = "free",
has_byok: bool = False,
) -> Optional[date]:
"""
Predict expiration date.
Fast path: deterministic lookup table.
Fallback: LLM query when table has no match, tier allows it, and a
backend is configured. Returns None rather than crashing if
inference fails.
"""
if not purchase_date:
purchase_date = date.today()
days = self._lookup_days(category, location)
if days is None and product_name and self._router and can_use("expiry_llm_matching", tier, has_byok):
days = self._llm_predict_days(product_name, category, location)
if days is None:
return None
return purchase_date + timedelta(days=days)
def get_category_from_product(
self,
product_name: str,
product_category: Optional[str] = None,
tags: Optional[List[str]] = None,
) -> Optional[str]:
"""Determine category from product name, existing category, and tags."""
if product_category:
cat = product_category.lower().strip()
if cat in self.SHELF_LIFE:
return cat
for key in self.SHELF_LIFE:
if key in cat or cat in key:
return key
if tags:
for tag in tags:
t = tag.lower().strip()
if t in self.SHELF_LIFE:
return t
name = product_name.lower().strip()
for category, keywords in self.CATEGORY_KEYWORDS.items():
if any(kw in name for kw in keywords):
return category
for words, fallback in [
(['meat', 'beef', 'pork', 'chicken'], 'meat'),
(['vegetable', 'veggie', 'produce'], 'vegetables'),
(['fruit'], 'fruits'),
(['dairy'], 'dairy'),
(['frozen'], 'frozen_foods'),
]:
if any(w in name for w in words):
return fallback
return 'dry_goods'
def get_shelf_life_info(self, category: str, location: str) -> Optional[int]:
"""Shelf life in days for a given category + location, or None."""
return self.SHELF_LIFE.get(category.lower().strip(), {}).get(location)
def list_categories(self) -> List[str]:
return list(self.SHELF_LIFE.keys())
def list_locations(self) -> List[str]:
locations: set[str] = set()
for shelf_life in self.SHELF_LIFE.values():
locations.update(shelf_life.keys())
return sorted(locations)
# ── Private helpers ───────────────────────────────────────────────────────
def _lookup_days(self, category: Optional[str], location: str) -> Optional[int]:
"""Pure deterministic lookup — no I/O."""
if not category:
return None
cat = category.lower().strip()
if cat not in self.SHELF_LIFE:
for key in self.SHELF_LIFE:
if key in cat or cat in key:
cat = key
break
else:
return None
days = self.SHELF_LIFE[cat].get(location)
if days is None:
for loc in ('fridge', 'pantry', 'freezer', 'cabinet'):
days = self.SHELF_LIFE[cat].get(loc)
if days is not None:
break
return days
def _llm_predict_days(
self,
product_name: str,
category: Optional[str],
location: str,
) -> Optional[int]:
"""
Ask the LLM how many days this product keeps in the given location.
TODO: Fill in the prompts below. Good prompts should:
- Give enough context for the LLM to reason about food safety
- Specify output format clearly (just an integer — nothing else)
- Err conservative (shorter shelf life) when uncertain
- Stay concise — this fires on every unknown barcode scan
Parameters available:
product_name — e.g. "Trader Joe's Organic Tempeh"
category — best-guess from get_category_from_product(), may be None
location — "fridge" | "freezer" | "pantry" | "cabinet"
"""
assert self._router is not None
system = (
"You are a food safety expert. Given a food product name, an optional "
"category hint, and a storage location, respond with ONLY a single "
"integer: the number of days the product typically remains safe to eat "
"from purchase when stored as specified. No explanation, no units, no "
"punctuation — just the integer. When uncertain, give the conservative "
"(shorter) estimate."
)
parts = [f"Product: {product_name}"]
if category:
parts.append(f"Category: {category}")
parts.append(f"Storage location: {location}")
parts.append("Days until expiry from purchase:")
prompt = "\n".join(parts)
try:
raw = self._router.complete(prompt, system=system, max_tokens=16)
match = re.search(r'\b(\d+)\b', raw)
if match:
days = int(match.group(1))
# Sanity cap: >5 years is implausible for a perishable unknown to
# the deterministic table. If the LLM returns something absurd,
# fall back to None rather than storing a misleading date.
if days > 1825:
logger.warning(
"LLM returned implausible shelf life (%d days) for %r — discarding",
days, product_name,
)
return None
logger.debug(
"LLM shelf life for %r in %s: %d days", product_name, location, days
)
return days
except Exception as e:
logger.warning("LLM expiry prediction failed for %r: %s", product_name, e)
return None