kiwi/app/services/recipe/recipe_engine.py
pyr0ball 1a493e0ad9 feat: recipe engine — assembly templates, prep notes, FTS fixes, texture backfill
- Assembly template system (13 templates: burrito, fried rice, omelette, stir fry,
  pasta, sandwich, grain bowl, soup/stew, casserole, pancakes, porridge, pie, pudding)
  with role-based matching, whole-word single-keyword guard, deterministic titles
  via MD5 pantry hash
- Prep-state stripping: strips 'melted butter' → 'butter' for coverage checks;
  reconstructs actionable states as 'Before you start:' cooking instructions
  (NutritionPanel prep_notes field + RecipesView.vue display block)
- FTS5 fixes: always double-quote all terms; strip apostrophes to prevent
  syntax errors on brands like "Stouffer's"; 'plant-based' → bare 'based' crash
- Bidirectional synonym expansion: alt-meat, alt-chicken, alt-beef, alt-pork
  mapped to canonical texture class; pantry expansion covers 'hamburger' from
  'burger patties' etc.
- Texture profile backfill script (378K ingredient_profiles rows) with macro-derived
  classification in priority order (fatty → creamy → starchy → firm → fibrous →
  tender → liquid → neutral); oats/legumes starchy-first fix
- LLM prompt: ban flavoured/sweetened ingredients (vanilla yoghurt) from savoury
- Migrations 014 (nutrition macros) + 015 (recipe FTS index)
- Nutrition estimation pipeline script
- gitignore MagicMock sqlite test artifacts
2026-04-02 22:12:35 -07:00

583 lines
22 KiB
Python

"""
RecipeEngine — orchestrates the four creativity levels.
Level 1: corpus lookup ranked by ingredient match + expiry urgency
Level 2: Level 1 + deterministic substitution swaps
Level 3: element scaffold → LLM constrained prompt (see llm_recipe.py)
Level 4: wildcard LLM (see llm_recipe.py)
Amendments:
- max_missing: filter to recipes missing ≤ N pantry items
- hard_day_mode: filter to easy-method recipes only
- grocery_list: aggregated missing ingredients across suggestions
"""
from __future__ import annotations
import json
import re
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from app.db.store import Store
from app.models.schemas.recipe import GroceryLink, NutritionPanel, RecipeRequest, RecipeResult, RecipeSuggestion, SwapCandidate
from app.services.recipe.assembly_recipes import match_assembly_templates
from app.services.recipe.element_classifier import ElementClassifier
from app.services.recipe.grocery_links import GroceryLinkBuilder
from app.services.recipe.substitution_engine import SubstitutionEngine
_LEFTOVER_DAILY_MAX_FREE = 5
# Words that carry no ingredient-identity signal — stripped before overlap scoring
_SWAP_STOPWORDS = frozenset({
"a", "an", "the", "of", "in", "for", "with", "and", "or",
"to", "from", "at", "by", "as", "on",
})
# Maps product-label substrings to recipe-corpus canonical terms.
# Kept in sync with Store._FTS_SYNONYMS — both must agree on canonical names.
# Used to expand pantry_set so single-word recipe ingredients can match
# multi-word product names (e.g. "hamburger" satisfied by "burger patties").
_PANTRY_LABEL_SYNONYMS: dict[str, str] = {
"burger patt": "hamburger",
"beef patt": "hamburger",
"ground beef": "hamburger",
"ground chuck": "hamburger",
"ground round": "hamburger",
"mince": "hamburger",
"veggie burger": "hamburger",
"beyond burger": "hamburger",
"impossible burger": "hamburger",
"plant burger": "hamburger",
"chicken patt": "chicken patty",
"kielbasa": "sausage",
"bratwurst": "sausage",
"frankfurter": "hotdog",
"wiener": "hotdog",
"chicken breast": "chicken",
"chicken thigh": "chicken",
"chicken drumstick": "chicken",
"chicken wing": "chicken",
"rotisserie chicken": "chicken",
"chicken tender": "chicken",
"chicken strip": "chicken",
"chicken piece": "chicken",
"fake chicken": "chicken",
"plant chicken": "chicken",
"vegan chicken": "chicken",
"daring": "chicken",
"gardein chick": "chicken",
"quorn chick": "chicken",
"chick'n": "chicken",
"chikn": "chicken",
"not-chicken": "chicken",
"no-chicken": "chicken",
# Plant-based beef subs → broad "beef" (strips ≠ ground; texture matters)
"not-beef": "beef",
"no-beef": "beef",
"plant beef": "beef",
"vegan beef": "beef",
# Plant-based pork subs
"not-pork": "pork",
"no-pork": "pork",
"plant pork": "pork",
"vegan pork": "pork",
"omnipork": "pork",
"omni pork": "pork",
# Generic alt-meat catch-alls → broad "beef"
"fake meat": "beef",
"plant meat": "beef",
"vegan meat": "beef",
"meat-free": "beef",
"meatless": "beef",
"pork chop": "pork",
"pork loin": "pork",
"pork tenderloin": "pork",
"marinara": "tomato sauce",
"pasta sauce": "tomato sauce",
"spaghetti sauce": "tomato sauce",
"pizza sauce": "tomato sauce",
"macaroni": "pasta",
"noodles": "pasta",
"spaghetti": "pasta",
"penne": "pasta",
"fettuccine": "pasta",
"rigatoni": "pasta",
"linguine": "pasta",
"rotini": "pasta",
"farfalle": "pasta",
"shredded cheese": "cheese",
"sliced cheese": "cheese",
"american cheese": "cheese",
"cheddar": "cheese",
"mozzarella": "cheese",
"heavy cream": "cream",
"whipping cream": "cream",
"half and half": "cream",
"burger bun": "buns",
"hamburger bun": "buns",
"hot dog bun": "buns",
"bread roll": "buns",
"dinner roll": "buns",
# Tortillas / wraps — assembly dishes (burritos, tacos, quesadillas)
"flour tortilla": "tortillas",
"corn tortilla": "tortillas",
"tortilla wrap": "tortillas",
"soft taco shell": "tortillas",
"taco shell": "taco shells",
"pita bread": "pita",
"flatbread": "flatbread",
# Canned beans — extremely interchangeable in assembly dishes
"black bean": "beans",
"pinto bean": "beans",
"kidney bean": "beans",
"refried bean": "beans",
"chickpea": "beans",
"garbanzo": "beans",
# Rice variants
"white rice": "rice",
"brown rice": "rice",
"jasmine rice": "rice",
"basmati rice": "rice",
"instant rice": "rice",
"microwavable rice": "rice",
# Salsa / hot sauce
"hot sauce": "salsa",
"taco sauce": "salsa",
"enchilada sauce": "salsa",
# Sour cream / Greek yogurt — functional substitutes
"greek yogurt": "sour cream",
# Frozen/prepackaged meal token extraction — handled by individual token
# fallback in _normalize_for_fts; these are the most common single-serve meal types
"lean cuisine": "casserole",
"stouffer": "casserole",
"healthy choice": "casserole",
"marie callender": "casserole",
}
# Matches leading quantity/unit prefixes in recipe ingredient strings,
# e.g. "2 cups flour" → "flour", "1/2 c. ketchup" → "ketchup",
# "3 oz. butter" → "butter"
_QUANTITY_PREFIX = re.compile(
r"^\s*(?:\d+(?:[./]\d+)?\s*)?" # optional leading number (1, 1/2, 2.5)
r"(?:to\s+\d+\s*)?" # optional "to N" range
r"(?:c\.|cup|cups|tbsp|tsp|oz|lb|lbs|g|kg|ml|l|"
r"can|cans|pkg|pkg\.|package|slice|slices|clove|cloves|"
r"small|medium|large|bunch|head|piece|pieces|"
r"pinch|dash|handful|sprig|sprigs)\s*\b",
re.IGNORECASE,
)
# Preparation-state words that modify an ingredient without changing what it is.
# Stripped from both ends so "melted butter", "butter, melted" both → "butter".
_PREP_STATES = re.compile(
r"\b(melted|softened|cold|warm|hot|room.temperature|"
r"diced|sliced|chopped|minced|grated|shredded|shredded|beaten|whipped|"
r"cooked|raw|frozen|canned|dried|dehydrated|marinated|seasoned|"
r"roasted|toasted|ground|crushed|pressed|peeled|seeded|pitted|"
r"boneless|skinless|trimmed|halved|quartered|julienned|"
r"thinly|finely|roughly|coarsely|freshly|lightly|"
r"packed|heaping|level|sifted|divided|optional)\b",
re.IGNORECASE,
)
# Trailing comma + optional prep state (e.g. "butter, melted")
_TRAILING_PREP = re.compile(r",\s*\w+$")
# Maps prep-state words to human-readable instruction templates.
# {ingredient} is replaced with the actual ingredient name.
# None means the state is passive (frozen, canned) — no note needed.
_PREP_INSTRUCTIONS: dict[str, str | None] = {
"melted": "Melt the {ingredient} before starting.",
"softened": "Let the {ingredient} soften to room temperature before using.",
"room temperature": "Bring the {ingredient} to room temperature before using.",
"beaten": "Beat the {ingredient} lightly before adding.",
"whipped": "Whip the {ingredient} until soft peaks form.",
"sifted": "Sift the {ingredient} before measuring.",
"toasted": "Toast the {ingredient} in a dry pan until fragrant.",
"roasted": "Roast the {ingredient} before using.",
"pressed": "Press the {ingredient} to remove excess moisture.",
"diced": "Dice the {ingredient} into small pieces.",
"sliced": "Slice the {ingredient} thinly.",
"chopped": "Chop the {ingredient} roughly.",
"minced": "Mince the {ingredient} finely.",
"grated": "Grate the {ingredient}.",
"shredded": "Shred the {ingredient}.",
"ground": "Grind the {ingredient}.",
"crushed": "Crush the {ingredient}.",
"peeled": "Peel the {ingredient} before use.",
"seeded": "Remove seeds from the {ingredient}.",
"pitted": "Pit the {ingredient} before use.",
"trimmed": "Trim any excess from the {ingredient}.",
"julienned": "Cut the {ingredient} into thin matchstick strips.",
"cooked": "Pre-cook the {ingredient} before adding.",
# Passive states — ingredient is used as-is, no prep note needed
"cold": None,
"warm": None,
"hot": None,
"raw": None,
"frozen": None,
"canned": None,
"dried": None,
"dehydrated": None,
"marinated": None,
"seasoned": None,
"boneless": None,
"skinless": None,
"divided": None,
"optional": None,
"fresh": None,
"freshly": None,
"thinly": None,
"finely": None,
"roughly": None,
"coarsely": None,
"lightly": None,
"packed": None,
"heaping": None,
"level": None,
}
# Finds the first actionable prep state in an ingredient string
_PREP_STATE_SEARCH = re.compile(
r"\b(" + "|".join(re.escape(k) for k in _PREP_INSTRUCTIONS) + r")\b",
re.IGNORECASE,
)
def _strip_quantity(ingredient: str) -> str:
"""Remove leading quantity/unit and preparation-state words from a recipe ingredient.
e.g. "2 tbsp melted butter""butter"
"butter, melted""butter"
"1/4 cup flour, sifted""flour"
"""
stripped = _QUANTITY_PREFIX.sub("", ingredient).strip()
# Strip any remaining leading number (e.g. "3 eggs" → "eggs")
stripped = re.sub(r"^\d+\s+", "", stripped)
# Strip trailing ", prep_state"
stripped = _TRAILING_PREP.sub("", stripped).strip()
# Strip prep-state words (may be leading or embedded)
stripped = _PREP_STATES.sub("", stripped).strip()
# Clean up any double spaces left behind
stripped = re.sub(r"\s{2,}", " ", stripped).strip()
return stripped or ingredient
def _prep_note_for(ingredient: str) -> str | None:
"""Return a human-readable prep instruction for this ingredient string, or None.
e.g. "2 tbsp melted butter""Melt the butter before starting."
"onion, diced""Dice the onion into small pieces."
"frozen peas" → None (passive state, no action needed)
"""
match = _PREP_STATE_SEARCH.search(ingredient)
if not match:
return None
state = match.group(1).lower()
template = _PREP_INSTRUCTIONS.get(state)
if not template:
return None
# Use the stripped ingredient name as the subject
ingredient_name = _strip_quantity(ingredient)
return template.format(ingredient=ingredient_name)
def _expand_pantry_set(pantry_items: list[str]) -> set[str]:
"""Return pantry_set expanded with canonical recipe-corpus synonyms.
For each pantry item, checks _PANTRY_LABEL_SYNONYMS for substring matches
and adds the canonical form. This lets single-word recipe ingredients
("hamburger", "chicken") match product-label pantry entries
("burger patties", "rotisserie chicken").
"""
expanded: set[str] = set()
for item in pantry_items:
lower = item.lower().strip()
expanded.add(lower)
for pattern, canonical in _PANTRY_LABEL_SYNONYMS.items():
if pattern in lower:
expanded.add(canonical)
return expanded
def _ingredient_in_pantry(ingredient: str, pantry_set: set[str]) -> bool:
"""Return True if the recipe ingredient is satisfied by the pantry.
Checks three layers in order:
1. Exact match after quantity stripping
2. Synonym lookup: ingredient → canonical → in pantry_set
(handles "ground beef" matched by "burger patties" via shared canonical)
3. Token subset: all content tokens of the ingredient appear in pantry
(handles "diced onions" when "onions" is in pantry)
"""
clean = _strip_quantity(ingredient).lower()
if clean in pantry_set:
return True
# Check if this recipe ingredient maps to a canonical that's in pantry
for pattern, canonical in _PANTRY_LABEL_SYNONYMS.items():
if pattern in clean and canonical in pantry_set:
return True
# Single-token ingredient whose token appears in pantry (e.g. "ketchup" in "c. ketchup")
tokens = [t for t in clean.split() if t not in _SWAP_STOPWORDS and len(t) > 2]
if tokens and all(t in pantry_set for t in tokens):
return True
return False
def _content_tokens(text: str) -> frozenset[str]:
return frozenset(
w for w in text.lower().split()
if w not in _SWAP_STOPWORDS and len(w) > 1
)
def _pantry_creative_swap(required: str, pantry_items: set[str]) -> str | None:
"""Return a pantry item that's a plausible creative substitute, or None.
Requires ≥2 shared content tokens AND ≥50% bidirectional overlap so that
single-word differences (cream-of-mushroom vs cream-of-potato) qualify while
single-word ingredients (butter, flour) don't accidentally match supersets
(peanut butter, bread flour).
"""
req_tokens = _content_tokens(required)
if len(req_tokens) < 2:
return None # single-word ingredients must already be in pantry_set
best: str | None = None
best_score = 0.0
for item in pantry_items:
if item.lower() == required.lower():
continue
pan_tokens = _content_tokens(item)
if not pan_tokens:
continue
overlap = len(req_tokens & pan_tokens)
if overlap < 2:
continue
score = min(overlap / len(req_tokens), overlap / len(pan_tokens))
if score >= 0.5 and score > best_score:
best_score = score
best = item
return best
# Method complexity classification patterns
_EASY_METHODS = re.compile(
r"\b(microwave|mix|stir|blend|toast|assemble|heat)\b", re.IGNORECASE
)
_INVOLVED_METHODS = re.compile(
r"\b(braise|roast|knead|deep.?fry|fry|sauté|saute|bake|boil)\b", re.IGNORECASE
)
def _classify_method_complexity(
directions: list[str],
available_equipment: list[str] | None = None,
) -> str:
"""Classify recipe method complexity from direction strings.
Returns 'easy', 'moderate', or 'involved'.
available_equipment can expand the easy set (e.g. ['toaster', 'air fryer']).
"""
text = " ".join(directions).lower()
equipment_set = {e.lower() for e in (available_equipment or [])}
if _INVOLVED_METHODS.search(text):
return "involved"
if _EASY_METHODS.search(text):
return "easy"
# Check equipment-specific easy methods
for equip in equipment_set:
if equip in text:
return "easy"
return "moderate"
class RecipeEngine:
def __init__(self, store: "Store") -> None:
self._store = store
self._classifier = ElementClassifier(store)
self._substitution = SubstitutionEngine(store)
def suggest(
self,
req: RecipeRequest,
available_equipment: list[str] | None = None,
) -> RecipeResult:
# Load cooking equipment from user settings when hard_day_mode is active
if req.hard_day_mode and available_equipment is None:
equipment_json = self._store.get_setting("cooking_equipment")
if equipment_json:
try:
available_equipment = json.loads(equipment_json)
except (json.JSONDecodeError, TypeError):
available_equipment = []
else:
available_equipment = []
# Rate-limit leftover mode for free tier
if req.expiry_first and req.tier == "free":
allowed, count = self._store.check_and_increment_rate_limit(
"leftover_mode", _LEFTOVER_DAILY_MAX_FREE
)
if not allowed:
return RecipeResult(
suggestions=[], element_gaps=[], rate_limited=True, rate_limit_count=count
)
profiles = self._classifier.classify_batch(req.pantry_items)
gaps = self._classifier.identify_gaps(profiles)
pantry_set = _expand_pantry_set(req.pantry_items)
if req.level >= 3:
from app.services.recipe.llm_recipe import LLMRecipeGenerator
gen = LLMRecipeGenerator(self._store)
return gen.generate(req, profiles, gaps)
# Level 1 & 2: deterministic path
nf = req.nutrition_filters
rows = self._store.search_recipes_by_ingredients(
req.pantry_items,
limit=20,
category=req.category or None,
max_calories=nf.max_calories,
max_sugar_g=nf.max_sugar_g,
max_carbs_g=nf.max_carbs_g,
max_sodium_mg=nf.max_sodium_mg,
excluded_ids=req.excluded_ids or [],
)
suggestions = []
for row in rows:
ingredient_names: list[str] = row.get("ingredient_names") or []
if isinstance(ingredient_names, str):
try:
ingredient_names = json.loads(ingredient_names)
except Exception:
ingredient_names = []
# Compute missing ingredients, detecting pantry coverage first.
# When covered, collect any prep-state annotations (e.g. "melted butter"
# → note "Melt the butter before starting.") to surface separately.
swap_candidates: list[SwapCandidate] = []
missing: list[str] = []
prep_note_set: set[str] = set()
for n in ingredient_names:
if _ingredient_in_pantry(n, pantry_set):
note = _prep_note_for(n)
if note:
prep_note_set.add(note)
continue
swap_item = _pantry_creative_swap(n, pantry_set)
if swap_item:
swap_candidates.append(SwapCandidate(
original_name=n,
substitute_name=swap_item,
constraint_label="pantry_swap",
explanation=f"You have {swap_item} — use it in place of {n}.",
compensation_hints=[],
))
else:
missing.append(n)
# Filter by max_missing (pantry swaps don't count as missing)
if req.max_missing is not None and len(missing) > req.max_missing:
continue
# Filter by hard_day_mode
if req.hard_day_mode:
directions: list[str] = row.get("directions") or []
if isinstance(directions, str):
try:
directions = json.loads(directions)
except Exception:
directions = [directions]
complexity = _classify_method_complexity(directions, available_equipment)
if complexity == "involved":
continue
# Level 2: also add dietary constraint swaps from substitution_pairs
if req.level == 2 and req.constraints:
for ing in ingredient_names:
for constraint in req.constraints:
swaps = self._substitution.find_substitutes(ing, constraint)
for swap in swaps[:1]:
swap_candidates.append(SwapCandidate(
original_name=swap.original_name,
substitute_name=swap.substitute_name,
constraint_label=swap.constraint_label,
explanation=swap.explanation,
compensation_hints=swap.compensation_hints,
))
coverage_raw = row.get("element_coverage") or {}
if isinstance(coverage_raw, str):
try:
coverage_raw = json.loads(coverage_raw)
except Exception:
coverage_raw = {}
servings = row.get("servings") or None
nutrition = NutritionPanel(
calories=row.get("calories"),
fat_g=row.get("fat_g"),
protein_g=row.get("protein_g"),
carbs_g=row.get("carbs_g"),
fiber_g=row.get("fiber_g"),
sugar_g=row.get("sugar_g"),
sodium_mg=row.get("sodium_mg"),
servings=servings,
estimated=bool(row.get("nutrition_estimated", 0)),
)
has_nutrition = any(
v is not None
for v in (nutrition.calories, nutrition.sugar_g, nutrition.carbs_g)
)
suggestions.append(RecipeSuggestion(
id=row["id"],
title=row["title"],
match_count=int(row.get("match_count") or 0),
element_coverage=coverage_raw,
swap_candidates=swap_candidates,
missing_ingredients=missing,
prep_notes=sorted(prep_note_set),
level=req.level,
nutrition=nutrition if has_nutrition else None,
))
# Prepend assembly-dish templates (burrito, stir fry, omelette, etc.)
# These fire regardless of corpus coverage — any pantry can make a burrito.
assembly = match_assembly_templates(
pantry_items=req.pantry_items,
pantry_set=pantry_set,
excluded_ids=req.excluded_ids or [],
)
suggestions = assembly + suggestions
# Build grocery list — deduplicated union of all missing ingredients
seen: set[str] = set()
grocery_list: list[str] = []
for s in suggestions:
for item in s.missing_ingredients:
if item not in seen:
grocery_list.append(item)
seen.add(item)
# Build grocery links — affiliate deeplinks for each missing ingredient
link_builder = GroceryLinkBuilder(tier=req.tier, has_byok=req.has_byok)
grocery_links = link_builder.build_all(grocery_list)
return RecipeResult(
suggestions=suggestions,
element_gaps=gaps,
grocery_list=grocery_list,
grocery_links=grocery_links,
)