feat: recipe engine — assembly templates, prep notes, FTS fixes, texture backfill
- Assembly template system (13 templates: burrito, fried rice, omelette, stir fry, pasta, sandwich, grain bowl, soup/stew, casserole, pancakes, porridge, pie, pudding) with role-based matching, whole-word single-keyword guard, deterministic titles via MD5 pantry hash - Prep-state stripping: strips 'melted butter' → 'butter' for coverage checks; reconstructs actionable states as 'Before you start:' cooking instructions (NutritionPanel prep_notes field + RecipesView.vue display block) - FTS5 fixes: always double-quote all terms; strip apostrophes to prevent syntax errors on brands like "Stouffer's"; 'plant-based' → bare 'based' crash - Bidirectional synonym expansion: alt-meat, alt-chicken, alt-beef, alt-pork mapped to canonical texture class; pantry expansion covers 'hamburger' from 'burger patties' etc. - Texture profile backfill script (378K ingredient_profiles rows) with macro-derived classification in priority order (fatty → creamy → starchy → firm → fibrous → tender → liquid → neutral); oats/legumes starchy-first fix - LLM prompt: ban flavoured/sweetened ingredients (vanilla yoghurt) from savoury - Migrations 014 (nutrition macros) + 015 (recipe FTS index) - Nutrition estimation pipeline script - gitignore MagicMock sqlite test artifacts
This commit is contained in:
parent
b9c308ab28
commit
1a493e0ad9
11 changed files with 1888 additions and 49 deletions
18
app/db/migrations/014_nutrition_macros.sql
Normal file
18
app/db/migrations/014_nutrition_macros.sql
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
-- Migration 014: Add macro nutrition columns to recipes and ingredient_profiles.
|
||||
--
|
||||
-- recipes: sugar, carbs, fiber, servings, and an estimated flag.
|
||||
-- ingredient_profiles: carbs, fiber, calories, sugar per 100g (for estimation fallback).
|
||||
|
||||
ALTER TABLE recipes ADD COLUMN sugar_g REAL;
|
||||
ALTER TABLE recipes ADD COLUMN carbs_g REAL;
|
||||
ALTER TABLE recipes ADD COLUMN fiber_g REAL;
|
||||
ALTER TABLE recipes ADD COLUMN servings REAL;
|
||||
ALTER TABLE recipes ADD COLUMN nutrition_estimated INTEGER NOT NULL DEFAULT 0;
|
||||
|
||||
ALTER TABLE ingredient_profiles ADD COLUMN carbs_g_per_100g REAL DEFAULT 0.0;
|
||||
ALTER TABLE ingredient_profiles ADD COLUMN fiber_g_per_100g REAL DEFAULT 0.0;
|
||||
ALTER TABLE ingredient_profiles ADD COLUMN calories_per_100g REAL DEFAULT 0.0;
|
||||
ALTER TABLE ingredient_profiles ADD COLUMN sugar_g_per_100g REAL DEFAULT 0.0;
|
||||
|
||||
CREATE INDEX idx_recipes_sugar_g ON recipes (sugar_g);
|
||||
CREATE INDEX idx_recipes_carbs_g ON recipes (carbs_g);
|
||||
16
app/db/migrations/015_recipe_fts.sql
Normal file
16
app/db/migrations/015_recipe_fts.sql
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
-- Migration 015: FTS5 inverted index for recipe ingredient lookup.
|
||||
--
|
||||
-- Content table backed by `recipes` — stores only the inverted index, no text duplication.
|
||||
-- MATCH queries replace O(N) LIKE scans with O(log N) token lookups.
|
||||
--
|
||||
-- One-time rebuild cost on 3.2M rows: ~15-30 seconds at startup.
|
||||
-- Subsequent startups skip this migration entirely.
|
||||
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS recipes_fts USING fts5(
|
||||
ingredient_names,
|
||||
content=recipes,
|
||||
content_rowid=id,
|
||||
tokenize="unicode61"
|
||||
);
|
||||
|
||||
INSERT INTO recipes_fts(recipes_fts) VALUES('rebuild');
|
||||
384
app/db/store.py
384
app/db/store.py
|
|
@ -232,6 +232,72 @@ class Store:
|
|||
(str(days),),
|
||||
)
|
||||
|
||||
def recalculate_expiry(
|
||||
self,
|
||||
tier: str = "local",
|
||||
has_byok: bool = False,
|
||||
) -> tuple[int, int]:
|
||||
"""Re-run the expiration predictor over all available inventory items.
|
||||
|
||||
Uses each item's existing purchase_date (falls back to today if NULL)
|
||||
and its current location. Skips items that have an explicit
|
||||
expiration_date from a source other than auto-prediction (i.e. items
|
||||
whose expiry was found on a receipt or entered by the user) cannot be
|
||||
distinguished — all available items are recalculated.
|
||||
|
||||
Returns (updated_count, skipped_count).
|
||||
"""
|
||||
from datetime import date
|
||||
from app.services.expiration_predictor import ExpirationPredictor
|
||||
|
||||
predictor = ExpirationPredictor()
|
||||
rows = self._fetch_all(
|
||||
"""SELECT i.id, i.location, i.purchase_date,
|
||||
p.name AS product_name, p.category AS product_category
|
||||
FROM inventory_items i
|
||||
JOIN products p ON p.id = i.product_id
|
||||
WHERE i.status = 'available'""",
|
||||
(),
|
||||
)
|
||||
|
||||
updated = skipped = 0
|
||||
for row in rows:
|
||||
cat = predictor.get_category_from_product(
|
||||
row["product_name"] or "",
|
||||
product_category=row.get("product_category"),
|
||||
location=row.get("location"),
|
||||
)
|
||||
purchase_date_raw = row.get("purchase_date")
|
||||
try:
|
||||
purchase_date = (
|
||||
date.fromisoformat(purchase_date_raw)
|
||||
if purchase_date_raw
|
||||
else date.today()
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
purchase_date = date.today()
|
||||
|
||||
exp = predictor.predict_expiration(
|
||||
cat,
|
||||
row["location"] or "pantry",
|
||||
purchase_date=purchase_date,
|
||||
product_name=row["product_name"],
|
||||
tier=tier,
|
||||
has_byok=has_byok,
|
||||
)
|
||||
if exp is None:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
self.conn.execute(
|
||||
"UPDATE inventory_items SET expiration_date = ?, updated_at = datetime('now') WHERE id = ?",
|
||||
(str(exp), row["id"]),
|
||||
)
|
||||
updated += 1
|
||||
|
||||
self.conn.commit()
|
||||
return updated, skipped
|
||||
|
||||
# ── receipt_data ──────────────────────────────────────────────────────
|
||||
|
||||
def upsert_receipt_data(self, receipt_id: int, data: dict) -> dict[str, Any]:
|
||||
|
|
@ -266,16 +332,323 @@ class Store:
|
|||
|
||||
# ── recipes ───────────────────────────────────────────────────────────
|
||||
|
||||
def _fts_ready(self) -> bool:
|
||||
"""Return True if the recipes_fts virtual table exists."""
|
||||
row = self._fetch_one(
|
||||
"SELECT 1 FROM sqlite_master WHERE type='table' AND name='recipes_fts'"
|
||||
)
|
||||
return row is not None
|
||||
|
||||
# Words that carry no recipe-ingredient signal and should be filtered
|
||||
# out when tokenising multi-word product names for FTS expansion.
|
||||
_FTS_TOKEN_STOPWORDS: frozenset[str] = frozenset({
|
||||
# Common English stopwords
|
||||
"a", "an", "the", "of", "in", "for", "with", "and", "or", "to",
|
||||
"from", "at", "by", "as", "on", "into",
|
||||
# Brand / marketing words that appear in product names
|
||||
"lean", "cuisine", "healthy", "choice", "stouffer", "original",
|
||||
"classic", "deluxe", "homestyle", "family", "style", "grade",
|
||||
"premium", "select", "natural", "organic", "fresh", "lite",
|
||||
"ready", "quick", "easy", "instant", "microwave", "frozen",
|
||||
"brand", "size", "large", "small", "medium", "extra",
|
||||
# Plant-based / alt-meat brand names
|
||||
"daring", "gardein", "morningstar", "lightlife", "tofurky",
|
||||
"quorn", "omni", "nuggs", "simulate", "simulate",
|
||||
# Preparation states — "cut up chicken" is still chicken
|
||||
"cut", "diced", "sliced", "chopped", "minced", "shredded",
|
||||
"cooked", "raw", "whole", "boneless", "skinless", "trimmed",
|
||||
"pre", "prepared", "marinated", "seasoned", "breaded", "battered",
|
||||
"grilled", "roasted", "smoked", "canned", "dried", "dehydrated",
|
||||
"pieces", "piece", "strips", "strip", "chunks", "chunk",
|
||||
"fillets", "fillet", "cutlets", "cutlet", "tenders", "nuggets",
|
||||
# Units / packaging
|
||||
"oz", "lb", "lbs", "pkg", "pack", "box", "can", "bag", "jar",
|
||||
})
|
||||
|
||||
# Maps substrings found in product-label names to canonical recipe-corpus
|
||||
# ingredient terms. Checked as substring matches against the lower-cased
|
||||
# full product name, then against each individual token.
|
||||
_FTS_SYNONYMS: dict[str, str] = {
|
||||
# Ground / minced beef
|
||||
"burger patt": "hamburger",
|
||||
"beef patt": "hamburger",
|
||||
"ground beef": "hamburger",
|
||||
"ground chuck": "hamburger",
|
||||
"ground round": "hamburger",
|
||||
"mince": "hamburger",
|
||||
"veggie burger": "hamburger",
|
||||
"beyond burger": "hamburger",
|
||||
"impossible burger": "hamburger",
|
||||
"plant burger": "hamburger",
|
||||
"chicken patt": "hamburger", # FTS match only — recipe scoring still works
|
||||
# Sausages
|
||||
"kielbasa": "sausage",
|
||||
"bratwurst": "sausage",
|
||||
"brat ": "sausage",
|
||||
"frankfurter": "hotdog",
|
||||
"wiener": "hotdog",
|
||||
# Chicken cuts + plant-based chicken → generic chicken for broader matching
|
||||
"chicken breast": "chicken",
|
||||
"chicken thigh": "chicken",
|
||||
"chicken drumstick": "chicken",
|
||||
"chicken wing": "chicken",
|
||||
"rotisserie chicken": "chicken",
|
||||
"chicken tender": "chicken",
|
||||
"chicken strip": "chicken",
|
||||
"chicken piece": "chicken",
|
||||
"fake chicken": "chicken",
|
||||
"plant chicken": "chicken",
|
||||
"vegan chicken": "chicken",
|
||||
"daring": "chicken", # Daring Foods brand
|
||||
"gardein chick": "chicken",
|
||||
"quorn chick": "chicken",
|
||||
"chick'n": "chicken",
|
||||
"chikn": "chicken",
|
||||
"not-chicken": "chicken",
|
||||
"no-chicken": "chicken",
|
||||
# Plant-based beef subs — map to broad "beef" not "hamburger"
|
||||
# (texture varies: strips ≠ ground; let corpus handle the specific form)
|
||||
"not-beef": "beef",
|
||||
"no-beef": "beef",
|
||||
"plant beef": "beef",
|
||||
"vegan beef": "beef",
|
||||
# Plant-based pork subs
|
||||
"not-pork": "pork",
|
||||
"no-pork": "pork",
|
||||
"plant pork": "pork",
|
||||
"vegan pork": "pork",
|
||||
"omnipork": "pork",
|
||||
"omni pork": "pork",
|
||||
# Generic alt-meat catch-alls → broad "beef" (safer than hamburger)
|
||||
"fake meat": "beef",
|
||||
"plant meat": "beef",
|
||||
"vegan meat": "beef",
|
||||
"meat-free": "beef",
|
||||
"meatless": "beef",
|
||||
# Pork cuts
|
||||
"pork chop": "pork",
|
||||
"pork loin": "pork",
|
||||
"pork tenderloin": "pork",
|
||||
# Tomato-based sauces
|
||||
"marinara": "tomato sauce",
|
||||
"pasta sauce": "tomato sauce",
|
||||
"spaghetti sauce": "tomato sauce",
|
||||
"pizza sauce": "tomato sauce",
|
||||
# Pasta shapes — map to generic "pasta" so FTS finds any pasta recipe
|
||||
"macaroni": "pasta",
|
||||
"noodles": "pasta",
|
||||
"spaghetti": "pasta",
|
||||
"penne": "pasta",
|
||||
"fettuccine": "pasta",
|
||||
"rigatoni": "pasta",
|
||||
"linguine": "pasta",
|
||||
"rotini": "pasta",
|
||||
"farfalle": "pasta",
|
||||
# Cheese variants → "cheese" for broad matching
|
||||
"shredded cheese": "cheese",
|
||||
"sliced cheese": "cheese",
|
||||
"american cheese": "cheese",
|
||||
"cheddar": "cheese",
|
||||
"mozzarella": "cheese",
|
||||
# Cream variants
|
||||
"heavy cream": "cream",
|
||||
"whipping cream": "cream",
|
||||
"half and half": "cream",
|
||||
# Buns / rolls
|
||||
"burger bun": "buns",
|
||||
"hamburger bun": "buns",
|
||||
"hot dog bun": "buns",
|
||||
"bread roll": "buns",
|
||||
"dinner roll": "buns",
|
||||
# Tortillas / wraps
|
||||
"flour tortilla": "tortillas",
|
||||
"corn tortilla": "tortillas",
|
||||
"tortilla wrap": "tortillas",
|
||||
"soft taco shell": "tortillas",
|
||||
"taco shell": "taco shells",
|
||||
"pita bread": "pita",
|
||||
"flatbread": "flatbread",
|
||||
# Canned beans
|
||||
"black bean": "beans",
|
||||
"pinto bean": "beans",
|
||||
"kidney bean": "beans",
|
||||
"refried bean": "beans",
|
||||
"chickpea": "beans",
|
||||
"garbanzo": "beans",
|
||||
# Rice variants
|
||||
"white rice": "rice",
|
||||
"brown rice": "rice",
|
||||
"jasmine rice": "rice",
|
||||
"basmati rice": "rice",
|
||||
"instant rice": "rice",
|
||||
"microwavable rice": "rice",
|
||||
# Salsa / hot sauce
|
||||
"hot sauce": "salsa",
|
||||
"taco sauce": "salsa",
|
||||
"enchilada sauce": "salsa",
|
||||
# Sour cream substitute
|
||||
"greek yogurt": "sour cream",
|
||||
# Prepackaged meals
|
||||
"lean cuisine": "casserole",
|
||||
"stouffer": "casserole",
|
||||
"healthy choice": "casserole",
|
||||
"marie callender": "casserole",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _normalize_for_fts(name: str) -> list[str]:
|
||||
"""Expand one pantry item to all FTS search terms it should contribute.
|
||||
|
||||
Returns the original name plus:
|
||||
- Any synonym-map canonical terms (handles product-label → corpus name)
|
||||
- Individual significant tokens from multi-word product names
|
||||
(handles packaged meals like "Lean Cuisine Chicken Alfredo" → also
|
||||
searches for "chicken" and "alfredo" independently)
|
||||
"""
|
||||
lower = name.lower().strip()
|
||||
if not lower:
|
||||
return []
|
||||
|
||||
terms: list[str] = [lower]
|
||||
|
||||
# Substring synonym check on full name
|
||||
for pattern, canonical in Store._FTS_SYNONYMS.items():
|
||||
if pattern in lower:
|
||||
terms.append(canonical)
|
||||
|
||||
# For multi-word product names, also add individual significant tokens
|
||||
if " " in lower:
|
||||
for token in lower.split():
|
||||
if len(token) <= 3 or token in Store._FTS_TOKEN_STOPWORDS:
|
||||
continue
|
||||
if token not in terms:
|
||||
terms.append(token)
|
||||
# Synonym-expand individual tokens too
|
||||
if token in Store._FTS_SYNONYMS:
|
||||
canonical = Store._FTS_SYNONYMS[token]
|
||||
if canonical not in terms:
|
||||
terms.append(canonical)
|
||||
|
||||
return terms
|
||||
|
||||
@staticmethod
|
||||
def _build_fts_query(ingredient_names: list[str]) -> str:
|
||||
"""Build an FTS5 MATCH expression ORing all ingredient terms.
|
||||
|
||||
Each pantry item is expanded via _normalize_for_fts so that
|
||||
product-label names (e.g. "burger patties") also search for their
|
||||
recipe-corpus equivalents (e.g. "hamburger"), and multi-word packaged
|
||||
product names contribute their individual ingredient tokens.
|
||||
"""
|
||||
parts: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for name in ingredient_names:
|
||||
for term in Store._normalize_for_fts(name):
|
||||
# Strip characters that break FTS5 query syntax
|
||||
clean = term.replace('"', "").replace("'", "")
|
||||
if not clean or clean in seen:
|
||||
continue
|
||||
seen.add(clean)
|
||||
parts.append(f'"{clean}"')
|
||||
return " OR ".join(parts)
|
||||
|
||||
def search_recipes_by_ingredients(
|
||||
self,
|
||||
ingredient_names: list[str],
|
||||
limit: int = 20,
|
||||
category: str | None = None,
|
||||
max_calories: float | None = None,
|
||||
max_sugar_g: float | None = None,
|
||||
max_carbs_g: float | None = None,
|
||||
max_sodium_mg: float | None = None,
|
||||
excluded_ids: list[int] | None = None,
|
||||
) -> list[dict]:
|
||||
"""Find recipes containing any of the given ingredient names.
|
||||
Scores by match count and returns highest-scoring first."""
|
||||
Scores by match count and returns highest-scoring first.
|
||||
|
||||
Uses FTS5 index (migration 015) when available — O(log N) per query.
|
||||
Falls back to LIKE scans on older databases.
|
||||
|
||||
Nutrition filters use NULL-passthrough: rows without nutrition data
|
||||
always pass (they may be estimated or absent entirely).
|
||||
"""
|
||||
if not ingredient_names:
|
||||
return []
|
||||
|
||||
extra_clauses: list[str] = []
|
||||
extra_params: list = []
|
||||
if category:
|
||||
extra_clauses.append("r.category = ?")
|
||||
extra_params.append(category)
|
||||
if max_calories is not None:
|
||||
extra_clauses.append("(r.calories IS NULL OR r.calories <= ?)")
|
||||
extra_params.append(max_calories)
|
||||
if max_sugar_g is not None:
|
||||
extra_clauses.append("(r.sugar_g IS NULL OR r.sugar_g <= ?)")
|
||||
extra_params.append(max_sugar_g)
|
||||
if max_carbs_g is not None:
|
||||
extra_clauses.append("(r.carbs_g IS NULL OR r.carbs_g <= ?)")
|
||||
extra_params.append(max_carbs_g)
|
||||
if max_sodium_mg is not None:
|
||||
extra_clauses.append("(r.sodium_mg IS NULL OR r.sodium_mg <= ?)")
|
||||
extra_params.append(max_sodium_mg)
|
||||
if excluded_ids:
|
||||
placeholders = ",".join("?" * len(excluded_ids))
|
||||
extra_clauses.append(f"r.id NOT IN ({placeholders})")
|
||||
extra_params.extend(excluded_ids)
|
||||
where_extra = (" AND " + " AND ".join(extra_clauses)) if extra_clauses else ""
|
||||
|
||||
if self._fts_ready():
|
||||
return self._search_recipes_fts(
|
||||
ingredient_names, limit, where_extra, extra_params
|
||||
)
|
||||
return self._search_recipes_like(
|
||||
ingredient_names, limit, where_extra, extra_params
|
||||
)
|
||||
|
||||
def _search_recipes_fts(
|
||||
self,
|
||||
ingredient_names: list[str],
|
||||
limit: int,
|
||||
where_extra: str,
|
||||
extra_params: list,
|
||||
) -> list[dict]:
|
||||
"""FTS5-backed ingredient search. Candidates fetched via inverted index;
|
||||
match_count computed in Python over the small candidate set."""
|
||||
fts_query = self._build_fts_query(ingredient_names)
|
||||
if not fts_query:
|
||||
return []
|
||||
|
||||
# Pull up to 10× limit candidates so ranking has enough headroom.
|
||||
sql = f"""
|
||||
SELECT r.*
|
||||
FROM recipes_fts
|
||||
JOIN recipes r ON r.id = recipes_fts.rowid
|
||||
WHERE recipes_fts MATCH ?
|
||||
{where_extra}
|
||||
LIMIT ?
|
||||
"""
|
||||
rows = self._fetch_all(sql, (fts_query, *extra_params, limit * 10))
|
||||
|
||||
pantry_set = {n.lower().strip() for n in ingredient_names}
|
||||
scored: list[dict] = []
|
||||
for row in rows:
|
||||
raw = row.get("ingredient_names") or []
|
||||
names: list[str] = raw if isinstance(raw, list) else json.loads(raw or "[]")
|
||||
match_count = sum(1 for n in names if n.lower() in pantry_set)
|
||||
scored.append({**row, "match_count": match_count})
|
||||
|
||||
scored.sort(key=lambda r: (-r["match_count"], r["id"]))
|
||||
return scored[:limit]
|
||||
|
||||
def _search_recipes_like(
|
||||
self,
|
||||
ingredient_names: list[str],
|
||||
limit: int,
|
||||
where_extra: str,
|
||||
extra_params: list,
|
||||
) -> list[dict]:
|
||||
"""Legacy LIKE-based ingredient search (O(N×rows) — slow on large corpora)."""
|
||||
like_params = [f'%"{n}"%' for n in ingredient_names]
|
||||
like_clauses = " OR ".join(
|
||||
"r.ingredient_names LIKE ?" for _ in ingredient_names
|
||||
|
|
@ -284,20 +657,15 @@ class Store:
|
|||
"CASE WHEN r.ingredient_names LIKE ? THEN 1 ELSE 0 END"
|
||||
for _ in ingredient_names
|
||||
)
|
||||
category_clause = ""
|
||||
category_params: list = []
|
||||
if category:
|
||||
category_clause = "AND r.category = ?"
|
||||
category_params = [category]
|
||||
sql = f"""
|
||||
SELECT r.*, ({match_score}) AS match_count
|
||||
FROM recipes r
|
||||
WHERE ({like_clauses})
|
||||
{category_clause}
|
||||
{where_extra}
|
||||
ORDER BY match_count DESC, r.id ASC
|
||||
LIMIT ?
|
||||
"""
|
||||
all_params = like_params + like_params + category_params + [limit]
|
||||
all_params = like_params + like_params + extra_params + [limit]
|
||||
return self._fetch_all(sql, tuple(all_params))
|
||||
|
||||
def get_recipe(self, recipe_id: int) -> dict | None:
|
||||
|
|
|
|||
|
|
@ -12,6 +12,20 @@ class SwapCandidate(BaseModel):
|
|||
compensation_hints: list[dict] = Field(default_factory=list)
|
||||
|
||||
|
||||
class NutritionPanel(BaseModel):
|
||||
"""Per-recipe macro summary. All values are per-serving when servings is known,
|
||||
otherwise for the full recipe. None means data is unavailable."""
|
||||
calories: float | None = None
|
||||
fat_g: float | None = None
|
||||
protein_g: float | None = None
|
||||
carbs_g: float | None = None
|
||||
fiber_g: float | None = None
|
||||
sugar_g: float | None = None
|
||||
sodium_mg: float | None = None
|
||||
servings: float | None = None
|
||||
estimated: bool = False # True when nutrition was inferred from ingredient profiles
|
||||
|
||||
|
||||
class RecipeSuggestion(BaseModel):
|
||||
id: int
|
||||
title: str
|
||||
|
|
@ -20,9 +34,11 @@ class RecipeSuggestion(BaseModel):
|
|||
swap_candidates: list[SwapCandidate] = Field(default_factory=list)
|
||||
missing_ingredients: list[str] = Field(default_factory=list)
|
||||
directions: list[str] = Field(default_factory=list)
|
||||
prep_notes: list[str] = Field(default_factory=list)
|
||||
notes: str = ""
|
||||
level: int = 1
|
||||
is_wildcard: bool = False
|
||||
nutrition: NutritionPanel | None = None
|
||||
|
||||
|
||||
class GroceryLink(BaseModel):
|
||||
|
|
@ -40,6 +56,14 @@ class RecipeResult(BaseModel):
|
|||
rate_limit_count: int = 0
|
||||
|
||||
|
||||
class NutritionFilters(BaseModel):
|
||||
"""Optional per-serving upper bounds for macro filtering. None = no filter."""
|
||||
max_calories: float | None = None
|
||||
max_sugar_g: float | None = None
|
||||
max_carbs_g: float | None = None
|
||||
max_sodium_mg: float | None = None
|
||||
|
||||
|
||||
class RecipeRequest(BaseModel):
|
||||
pantry_items: list[str]
|
||||
level: int = Field(default=1, ge=1, le=4)
|
||||
|
|
@ -48,7 +72,10 @@ class RecipeRequest(BaseModel):
|
|||
hard_day_mode: bool = False
|
||||
max_missing: int | None = None
|
||||
style_id: str | None = None
|
||||
category: str | None = None
|
||||
tier: str = "free"
|
||||
has_byok: bool = False
|
||||
wildcard_confirmed: bool = False
|
||||
allergies: list[str] = Field(default_factory=list)
|
||||
nutrition_filters: NutritionFilters = Field(default_factory=NutritionFilters)
|
||||
excluded_ids: list[int] = Field(default_factory=list)
|
||||
|
|
|
|||
647
app/services/recipe/assembly_recipes.py
Normal file
647
app/services/recipe/assembly_recipes.py
Normal file
|
|
@ -0,0 +1,647 @@
|
|||
"""
|
||||
Assembly-dish template matcher for Level 1/2.
|
||||
|
||||
Assembly dishes (burritos, stir fry, fried rice, omelettes, sandwiches, etc.)
|
||||
are defined by structural roles -- container + filler + sauce -- not by a fixed
|
||||
ingredient list. The corpus can never fully cover them.
|
||||
|
||||
This module fires when the pantry covers all *required* roles of a template.
|
||||
Results are injected at the top of the Level 1/2 suggestion list with negative
|
||||
ids (client displays them identically to corpus recipes).
|
||||
|
||||
Templates define:
|
||||
- required: list of role sets -- ALL must have at least one pantry match
|
||||
- optional: role sets whose matched items are shown as extras
|
||||
- directions: short cooking instructions
|
||||
- notes: serving suggestions / variations
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from dataclasses import dataclass
|
||||
|
||||
from app.models.schemas.recipe import RecipeSuggestion
|
||||
|
||||
|
||||
# IDs in range -100..-1 are reserved for assembly-generated suggestions
|
||||
_ASSEMBLY_ID_START = -1
|
||||
|
||||
|
||||
@dataclass
|
||||
class AssemblyRole:
|
||||
"""One role in a template (e.g. 'protein').
|
||||
|
||||
display: human-readable role label
|
||||
keywords: substrings matched against pantry item (lowercased)
|
||||
"""
|
||||
display: str
|
||||
keywords: list[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class AssemblyTemplate:
|
||||
"""A template assembly dish."""
|
||||
id: int
|
||||
title: str
|
||||
required: list[AssemblyRole]
|
||||
optional: list[AssemblyRole]
|
||||
directions: list[str]
|
||||
notes: str = ""
|
||||
|
||||
|
||||
def _matches_role(role: AssemblyRole, pantry_set: set[str]) -> list[str]:
|
||||
"""Return pantry items that satisfy this role.
|
||||
|
||||
Single-word keywords use whole-word matching (word must appear as a
|
||||
discrete token) so short words like 'pea', 'ham', 'egg' don't false-match
|
||||
inside longer words like 'peanut', 'hamburger', 'eggnog'.
|
||||
Multi-word keywords (e.g. 'burger patt') use substring matching.
|
||||
"""
|
||||
hits: list[str] = []
|
||||
for item in pantry_set:
|
||||
item_lower = item.lower()
|
||||
item_words = set(item_lower.split())
|
||||
for kw in role.keywords:
|
||||
if " " in kw:
|
||||
# Multi-word: substring match
|
||||
if kw in item_lower:
|
||||
hits.append(item)
|
||||
break
|
||||
else:
|
||||
# Single-word: whole-word match only
|
||||
if kw in item_words:
|
||||
hits.append(item)
|
||||
break
|
||||
return hits
|
||||
|
||||
|
||||
def _pick_one(items: list[str], seed: int) -> str:
|
||||
"""Deterministically pick one item from a list using a seed."""
|
||||
return sorted(items)[seed % len(items)]
|
||||
|
||||
|
||||
def _pantry_hash(pantry_set: set[str]) -> int:
|
||||
"""Stable integer derived from pantry contents — used for deterministic picks."""
|
||||
key = ",".join(sorted(pantry_set))
|
||||
return int(hashlib.md5(key.encode()).hexdigest(), 16) # noqa: S324 — non-crypto use
|
||||
|
||||
|
||||
def _keyword_label(item: str, role: AssemblyRole) -> str:
|
||||
"""Return a short, clean label derived from the keyword that matched.
|
||||
|
||||
Uses the longest matching keyword (most specific) as the base label,
|
||||
then title-cases it. This avoids pasting full raw pantry names like
|
||||
'Organic Extra Firm Tofu' into titles — just 'Tofu' instead.
|
||||
"""
|
||||
lower = item.lower()
|
||||
best_kw = ""
|
||||
for kw in role.keywords:
|
||||
if kw in lower and len(kw) > len(best_kw):
|
||||
best_kw = kw
|
||||
label = (best_kw or item).strip().title()
|
||||
# Drop trailing 's' from keywords like "beans" → "Bean" when it reads better
|
||||
return label
|
||||
|
||||
|
||||
def _personalized_title(tmpl: AssemblyTemplate, pantry_set: set[str], seed: int) -> str:
|
||||
"""Build a specific title using actual pantry items, e.g. 'Chicken & Broccoli Burrito'.
|
||||
|
||||
Uses the matched keyword as the label (not the full pantry item name) so
|
||||
'Organic Extra Firm Tofu Block' → 'Tofu' in the title.
|
||||
Picks at most two roles; prefers protein then vegetable.
|
||||
"""
|
||||
priority_displays = ["protein", "vegetables", "sauce base", "cheese"]
|
||||
|
||||
picked: list[str] = []
|
||||
for display in priority_displays:
|
||||
for role in tmpl.optional:
|
||||
if role.display != display:
|
||||
continue
|
||||
hits = _matches_role(role, pantry_set)
|
||||
if hits:
|
||||
item = _pick_one(hits, seed)
|
||||
label = _keyword_label(item, role)
|
||||
if label not in picked:
|
||||
picked.append(label)
|
||||
if len(picked) >= 2:
|
||||
break
|
||||
|
||||
if not picked:
|
||||
return tmpl.title
|
||||
return f"{' & '.join(picked)} {tmpl.title}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Template definitions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
ASSEMBLY_TEMPLATES: list[AssemblyTemplate] = [
|
||||
AssemblyTemplate(
|
||||
id=-1,
|
||||
title="Burrito / Taco",
|
||||
required=[
|
||||
AssemblyRole("tortilla or wrap", [
|
||||
"tortilla", "wrap", "taco shell", "flatbread", "pita",
|
||||
]),
|
||||
],
|
||||
optional=[
|
||||
AssemblyRole("protein", [
|
||||
"chicken", "beef", "steak", "pork", "sausage", "hamburger",
|
||||
"burger patt", "shrimp", "egg", "tofu", "beans", "bean",
|
||||
]),
|
||||
AssemblyRole("rice or starch", ["rice", "quinoa", "potato"]),
|
||||
AssemblyRole("cheese", [
|
||||
"cheese", "cheddar", "mozzarella", "monterey", "queso",
|
||||
]),
|
||||
AssemblyRole("salsa or sauce", [
|
||||
"salsa", "hot sauce", "taco sauce", "enchilada", "guacamole",
|
||||
]),
|
||||
AssemblyRole("sour cream or yogurt", ["sour cream", "greek yogurt", "crema"]),
|
||||
AssemblyRole("vegetables", [
|
||||
"pepper", "onion", "tomato", "lettuce", "corn", "avocado",
|
||||
"spinach", "broccoli", "zucchini",
|
||||
]),
|
||||
],
|
||||
directions=[
|
||||
"Warm the tortilla in a dry skillet or microwave for 20 seconds.",
|
||||
"Heat any proteins or vegetables in a pan until cooked through.",
|
||||
"Layer ingredients down the center: rice first, then protein, then vegetables.",
|
||||
"Add cheese, salsa, and sour cream last so they stay cool.",
|
||||
"Fold in the sides and roll tightly. Optionally toast seam-side down 1-2 minutes.",
|
||||
],
|
||||
notes="Works as a burrito (rolled), taco (folded), or quesadilla (cheese only, pressed flat).",
|
||||
),
|
||||
AssemblyTemplate(
|
||||
id=-2,
|
||||
title="Fried Rice",
|
||||
required=[
|
||||
AssemblyRole("cooked rice", [
|
||||
"rice", "leftover rice", "instant rice", "microwavable rice",
|
||||
]),
|
||||
],
|
||||
optional=[
|
||||
AssemblyRole("protein", [
|
||||
"chicken", "beef", "pork", "shrimp", "egg", "tofu",
|
||||
"sausage", "ham", "spam",
|
||||
]),
|
||||
AssemblyRole("soy sauce or seasoning", [
|
||||
"soy sauce", "tamari", "teriyaki", "oyster sauce", "fish sauce",
|
||||
]),
|
||||
AssemblyRole("oil", ["oil", "butter", "sesame"]),
|
||||
AssemblyRole("egg", ["egg"]),
|
||||
AssemblyRole("vegetables", [
|
||||
"carrot", "peas", "corn", "onion", "scallion", "green onion",
|
||||
"broccoli", "bok choy", "bean sprout", "zucchini", "spinach",
|
||||
]),
|
||||
AssemblyRole("garlic or ginger", ["garlic", "ginger"]),
|
||||
],
|
||||
directions=[
|
||||
"Use day-old cold rice if available -- it fries better than fresh.",
|
||||
"Heat oil in a large skillet or wok over high heat.",
|
||||
"Add garlic/ginger and any raw vegetables; stir fry 2-3 minutes.",
|
||||
"Push to the side, scramble eggs in the same pan if using.",
|
||||
"Add protein (pre-cooked or raw) and cook through.",
|
||||
"Add rice, breaking up clumps. Stir fry until heated and lightly toasted.",
|
||||
"Season with soy sauce and any other sauces. Toss to combine.",
|
||||
],
|
||||
notes="Add a fried egg on top. A drizzle of sesame oil at the end adds a lot.",
|
||||
),
|
||||
AssemblyTemplate(
|
||||
id=-3,
|
||||
title="Omelette / Scramble",
|
||||
required=[
|
||||
AssemblyRole("eggs", ["egg"]),
|
||||
],
|
||||
optional=[
|
||||
AssemblyRole("cheese", [
|
||||
"cheese", "cheddar", "mozzarella", "feta", "parmesan",
|
||||
]),
|
||||
AssemblyRole("vegetables", [
|
||||
"pepper", "onion", "tomato", "spinach", "mushroom",
|
||||
"broccoli", "zucchini", "scallion", "avocado",
|
||||
]),
|
||||
AssemblyRole("protein", [
|
||||
"ham", "bacon", "sausage", "chicken", "turkey",
|
||||
"smoked salmon",
|
||||
]),
|
||||
AssemblyRole("herbs or seasoning", [
|
||||
"herb", "basil", "chive", "parsley", "salt", "pepper",
|
||||
"hot sauce", "salsa",
|
||||
]),
|
||||
],
|
||||
directions=[
|
||||
"Beat eggs with a splash of water or milk and a pinch of salt.",
|
||||
"Saute any vegetables and proteins in butter or oil over medium heat until softened.",
|
||||
"Pour eggs over fillings (scramble) or pour into a clean buttered pan (omelette).",
|
||||
"For omelette: cook until nearly set, add fillings to one side, fold over.",
|
||||
"For scramble: stir gently over medium-low heat until just set.",
|
||||
"Season and serve immediately.",
|
||||
],
|
||||
notes="Works for breakfast, lunch, or a quick dinner. Any leftover vegetables work well.",
|
||||
),
|
||||
AssemblyTemplate(
|
||||
id=-4,
|
||||
title="Stir Fry",
|
||||
required=[
|
||||
AssemblyRole("vegetables", [
|
||||
"pepper", "broccoli", "carrot", "snap pea", "bok choy",
|
||||
"zucchini", "mushroom", "corn", "onion", "bean sprout",
|
||||
"cabbage", "spinach", "asparagus",
|
||||
]),
|
||||
],
|
||||
optional=[
|
||||
AssemblyRole("protein", [
|
||||
"chicken", "beef", "pork", "shrimp", "tofu", "egg",
|
||||
]),
|
||||
AssemblyRole("sauce", [
|
||||
"soy sauce", "teriyaki", "oyster sauce", "hoisin",
|
||||
"stir fry sauce", "sesame",
|
||||
]),
|
||||
AssemblyRole("starch base", ["rice", "noodle", "pasta", "ramen"]),
|
||||
AssemblyRole("garlic or ginger", ["garlic", "ginger"]),
|
||||
AssemblyRole("oil", ["oil", "sesame"]),
|
||||
],
|
||||
directions=[
|
||||
"Cut all proteins and vegetables into similar-sized pieces for even cooking.",
|
||||
"Heat oil in a wok or large skillet over the highest heat your stove allows.",
|
||||
"Cook protein first until nearly done; remove and set aside.",
|
||||
"Add dense vegetables (carrots, broccoli) first; quick-cooking veg last.",
|
||||
"Return protein, add sauce, and toss everything together for 1-2 minutes.",
|
||||
"Serve over rice or noodles.",
|
||||
],
|
||||
notes="High heat is the key. Do not crowd the pan -- cook in batches if needed.",
|
||||
),
|
||||
AssemblyTemplate(
|
||||
id=-5,
|
||||
title="Pasta with Whatever You Have",
|
||||
required=[
|
||||
AssemblyRole("pasta", [
|
||||
"pasta", "spaghetti", "penne", "fettuccine", "rigatoni",
|
||||
"linguine", "rotini", "farfalle", "macaroni", "noodle",
|
||||
]),
|
||||
],
|
||||
optional=[
|
||||
AssemblyRole("sauce base", [
|
||||
"tomato", "marinara", "pasta sauce", "cream", "butter",
|
||||
"olive oil", "pesto",
|
||||
]),
|
||||
AssemblyRole("protein", [
|
||||
"chicken", "beef", "pork", "shrimp", "sausage", "bacon",
|
||||
"ham", "tuna", "canned fish",
|
||||
]),
|
||||
AssemblyRole("cheese", [
|
||||
"parmesan", "romano", "mozzarella", "ricotta", "feta",
|
||||
]),
|
||||
AssemblyRole("vegetables", [
|
||||
"tomato", "spinach", "mushroom", "pepper", "zucchini",
|
||||
"broccoli", "artichoke", "olive", "onion",
|
||||
]),
|
||||
AssemblyRole("garlic", ["garlic"]),
|
||||
],
|
||||
directions=[
|
||||
"Cook pasta in well-salted boiling water until al dente. Reserve 1 cup pasta water.",
|
||||
"While pasta cooks, saute garlic in olive oil over medium heat.",
|
||||
"Add proteins and cook through; add vegetables until tender.",
|
||||
"Add sauce base and simmer 5 minutes. Add pasta water to loosen if needed.",
|
||||
"Toss cooked pasta with sauce. Finish with cheese if using.",
|
||||
],
|
||||
notes="Pasta water is the secret -- the starch thickens and binds any sauce.",
|
||||
),
|
||||
AssemblyTemplate(
|
||||
id=-6,
|
||||
title="Sandwich / Wrap",
|
||||
required=[
|
||||
AssemblyRole("bread or wrap", [
|
||||
"bread", "roll", "bun", "wrap", "tortilla", "pita",
|
||||
"bagel", "english muffin", "croissant", "flatbread",
|
||||
]),
|
||||
],
|
||||
optional=[
|
||||
AssemblyRole("protein", [
|
||||
"chicken", "turkey", "ham", "roast beef", "tuna", "egg",
|
||||
"bacon", "salami", "pepperoni", "tofu", "tempeh",
|
||||
]),
|
||||
AssemblyRole("cheese", [
|
||||
"cheese", "cheddar", "swiss", "provolone", "mozzarella",
|
||||
]),
|
||||
AssemblyRole("condiment", [
|
||||
"mayo", "mustard", "ketchup", "hot sauce", "ranch",
|
||||
"hummus", "pesto", "aioli",
|
||||
]),
|
||||
AssemblyRole("vegetables", [
|
||||
"lettuce", "tomato", "onion", "cucumber", "avocado",
|
||||
"pepper", "sprout", "arugula",
|
||||
]),
|
||||
],
|
||||
directions=[
|
||||
"Toast bread if desired.",
|
||||
"Spread condiments on both inner surfaces.",
|
||||
"Layer protein first, then cheese, then vegetables.",
|
||||
"Press together and cut diagonally.",
|
||||
],
|
||||
notes="Leftovers, deli meat, canned fish -- nearly anything works between bread.",
|
||||
),
|
||||
AssemblyTemplate(
|
||||
id=-7,
|
||||
title="Grain Bowl",
|
||||
required=[
|
||||
AssemblyRole("grain base", [
|
||||
"rice", "quinoa", "farro", "barley", "couscous",
|
||||
"bulgur", "freekeh", "polenta",
|
||||
]),
|
||||
],
|
||||
optional=[
|
||||
AssemblyRole("protein", [
|
||||
"chicken", "beef", "pork", "tofu", "egg", "shrimp",
|
||||
"beans", "bean", "lentil", "chickpea",
|
||||
]),
|
||||
AssemblyRole("vegetables", [
|
||||
"roasted", "broccoli", "carrot", "kale", "spinach",
|
||||
"cucumber", "tomato", "corn", "edamame", "avocado",
|
||||
"beet", "sweet potato",
|
||||
]),
|
||||
AssemblyRole("dressing or sauce", [
|
||||
"dressing", "tahini", "vinaigrette", "sauce",
|
||||
"olive oil", "lemon", "soy sauce",
|
||||
]),
|
||||
AssemblyRole("toppings", [
|
||||
"nut", "seed", "feta", "parmesan", "herb",
|
||||
]),
|
||||
],
|
||||
directions=[
|
||||
"Cook grain base according to package directions; season with salt.",
|
||||
"Roast or saute vegetables with oil, salt, and pepper until tender.",
|
||||
"Cook or slice protein.",
|
||||
"Arrange grain in a bowl, top with protein and vegetables.",
|
||||
"Drizzle with dressing and add toppings.",
|
||||
],
|
||||
notes="Great for meal prep -- cook grains and proteins in bulk, assemble bowls all week.",
|
||||
),
|
||||
AssemblyTemplate(
|
||||
id=-8,
|
||||
title="Soup / Stew",
|
||||
required=[
|
||||
AssemblyRole("broth or liquid base", [
|
||||
"broth", "stock", "bouillon",
|
||||
"tomato sauce", "coconut milk", "cream of",
|
||||
]),
|
||||
],
|
||||
optional=[
|
||||
AssemblyRole("protein", [
|
||||
"chicken", "beef", "pork", "sausage", "shrimp",
|
||||
"beans", "bean", "lentil", "tofu",
|
||||
]),
|
||||
AssemblyRole("vegetables", [
|
||||
"carrot", "celery", "onion", "potato", "tomato",
|
||||
"spinach", "kale", "corn", "pea", "zucchini",
|
||||
]),
|
||||
AssemblyRole("starch thickener", [
|
||||
"potato", "pasta", "noodle", "rice", "barley",
|
||||
"flour", "cornstarch",
|
||||
]),
|
||||
AssemblyRole("seasoning", [
|
||||
"garlic", "herb", "bay leaf", "thyme", "rosemary",
|
||||
"cumin", "paprika", "chili",
|
||||
]),
|
||||
],
|
||||
directions=[
|
||||
"Saute onion, celery, and garlic in oil until softened, about 5 minutes.",
|
||||
"Add any raw proteins and cook until browned.",
|
||||
"Add broth or liquid base and bring to a simmer.",
|
||||
"Add dense vegetables (carrots, potatoes) first; quick-cooking veg in the last 10 minutes.",
|
||||
"Add starches and cook until tender.",
|
||||
"Season to taste and simmer at least 20 minutes for flavors to develop.",
|
||||
],
|
||||
notes="Soups and stews improve overnight in the fridge. Almost any combination works.",
|
||||
),
|
||||
AssemblyTemplate(
|
||||
id=-9,
|
||||
title="Casserole / Bake",
|
||||
required=[
|
||||
AssemblyRole("starch or base", [
|
||||
"pasta", "rice", "potato", "noodle", "bread",
|
||||
"tortilla", "polenta", "grits", "macaroni",
|
||||
]),
|
||||
AssemblyRole("binder or sauce", [
|
||||
"cream of", "cheese", "cream cheese", "sour cream",
|
||||
"soup mix", "gravy", "tomato sauce", "marinara",
|
||||
"broth", "stock", "milk", "cream",
|
||||
]),
|
||||
],
|
||||
optional=[
|
||||
AssemblyRole("protein", [
|
||||
"chicken", "beef", "pork", "tuna", "ham", "sausage",
|
||||
"ground", "shrimp", "beans", "bean", "lentil",
|
||||
]),
|
||||
AssemblyRole("vegetables", [
|
||||
"broccoli", "corn", "pea", "onion", "mushroom",
|
||||
"spinach", "zucchini", "tomato", "pepper", "carrot",
|
||||
]),
|
||||
AssemblyRole("cheese topping", [
|
||||
"cheddar", "mozzarella", "parmesan", "swiss",
|
||||
"cheese", "breadcrumb",
|
||||
]),
|
||||
AssemblyRole("seasoning", [
|
||||
"garlic", "herb", "thyme", "rosemary", "paprika",
|
||||
"onion powder", "salt", "pepper",
|
||||
]),
|
||||
],
|
||||
directions=[
|
||||
"Preheat oven to 375 F (190 C). Grease a 9x13 baking dish.",
|
||||
"Cook starch base (pasta, rice, potato) until just underdone -- it finishes in the oven.",
|
||||
"Mix cooked starch with sauce/binder, protein, and vegetables in the dish.",
|
||||
"Season generously -- casseroles need salt.",
|
||||
"Top with cheese or breadcrumbs if using.",
|
||||
"Bake covered 25 minutes, then uncovered 15 minutes until golden and bubbly.",
|
||||
],
|
||||
notes="Classic pantry dump dinner. Cream of anything soup is the universal binder.",
|
||||
),
|
||||
AssemblyTemplate(
|
||||
id=-10,
|
||||
title="Pancakes / Waffles / Quick Bread",
|
||||
required=[
|
||||
AssemblyRole("flour or baking mix", [
|
||||
"flour", "bisquick", "pancake mix", "waffle mix",
|
||||
"baking mix", "cornmeal", "oats",
|
||||
]),
|
||||
AssemblyRole("leavening or egg", [
|
||||
"egg", "baking powder", "baking soda", "yeast",
|
||||
]),
|
||||
],
|
||||
optional=[
|
||||
AssemblyRole("liquid", [
|
||||
"milk", "buttermilk", "water", "juice",
|
||||
"almond milk", "oat milk", "sour cream",
|
||||
]),
|
||||
AssemblyRole("fat", [
|
||||
"butter", "oil", "margarine",
|
||||
]),
|
||||
AssemblyRole("sweetener", [
|
||||
"sugar", "honey", "maple syrup", "brown sugar",
|
||||
]),
|
||||
AssemblyRole("mix-ins", [
|
||||
"blueberr", "banana", "apple", "chocolate chip",
|
||||
"nut", "berry", "cinnamon", "vanilla",
|
||||
]),
|
||||
],
|
||||
directions=[
|
||||
"Whisk dry ingredients (flour, leavening, sugar, salt) together in a bowl.",
|
||||
"Whisk wet ingredients (egg, milk, melted butter) in a separate bowl.",
|
||||
"Fold wet into dry until just combined -- lumps are fine, do not overmix.",
|
||||
"For pancakes: cook on a buttered griddle over medium heat, flip when bubbles form.",
|
||||
"For waffles: pour into preheated waffle iron according to manufacturer instructions.",
|
||||
"For muffins or quick bread: pour into greased pan, bake at 375 F until a toothpick comes out clean.",
|
||||
],
|
||||
notes="Overmixing develops gluten and makes pancakes tough. Stop when just combined.",
|
||||
),
|
||||
AssemblyTemplate(
|
||||
id=-11,
|
||||
title="Porridge / Oatmeal",
|
||||
required=[
|
||||
AssemblyRole("oats or grain porridge", [
|
||||
"oat", "porridge", "grits", "semolina", "cream of wheat",
|
||||
"polenta", "congee", "rice porridge",
|
||||
]),
|
||||
],
|
||||
optional=[
|
||||
AssemblyRole("liquid", ["milk", "water", "almond milk", "oat milk", "coconut milk"]),
|
||||
AssemblyRole("sweetener", ["sugar", "honey", "maple syrup", "brown sugar", "agave"]),
|
||||
AssemblyRole("fruit", ["banana", "berry", "apple", "raisin", "date", "mango"]),
|
||||
AssemblyRole("toppings", ["nut", "seed", "granola", "coconut", "chocolate"]),
|
||||
AssemblyRole("spice", ["cinnamon", "nutmeg", "vanilla", "cardamom"]),
|
||||
],
|
||||
directions=[
|
||||
"Combine oats with liquid in a pot — typically 1 part oats to 2 parts liquid.",
|
||||
"Bring to a gentle simmer over medium heat, stirring occasionally.",
|
||||
"Cook 5 minutes (rolled oats) or 2 minutes (quick oats) until thickened to your liking.",
|
||||
"Stir in sweetener and spices.",
|
||||
"Top with fruit, nuts, or seeds and serve immediately.",
|
||||
],
|
||||
notes="Overnight oats: skip cooking — soak oats in cold milk overnight in the fridge.",
|
||||
),
|
||||
AssemblyTemplate(
|
||||
id=-12,
|
||||
title="Pie / Pot Pie",
|
||||
required=[
|
||||
AssemblyRole("pastry or crust", [
|
||||
"pastry", "puff pastry", "pie crust", "shortcrust",
|
||||
"pie shell", "phyllo", "filo", "biscuit",
|
||||
]),
|
||||
],
|
||||
optional=[
|
||||
AssemblyRole("protein filling", [
|
||||
"chicken", "beef", "pork", "lamb", "turkey", "tofu",
|
||||
"mushroom", "beans", "bean", "lentil", "tuna", "salmon",
|
||||
]),
|
||||
AssemblyRole("vegetables", [
|
||||
"carrot", "pea", "corn", "potato", "onion", "leek",
|
||||
"broccoli", "spinach", "mushroom", "parsnip", "swede",
|
||||
]),
|
||||
AssemblyRole("sauce or binder", [
|
||||
"gravy", "cream of", "stock", "broth", "cream",
|
||||
"white sauce", "bechamel", "cheese sauce",
|
||||
]),
|
||||
AssemblyRole("seasoning", [
|
||||
"thyme", "rosemary", "sage", "garlic", "herb",
|
||||
"mustard", "worcestershire",
|
||||
]),
|
||||
AssemblyRole("sweet filling", [
|
||||
"apple", "berry", "cherry", "pear", "peach",
|
||||
"rhubarb", "plum", "custard",
|
||||
]),
|
||||
],
|
||||
directions=[
|
||||
"For pot pie: make a sauce by combining stock or cream-of-something with cooked vegetables and protein.",
|
||||
"Season generously — fillings need more salt than you think.",
|
||||
"Pour filling into a baking dish and top with pastry, pressing edges to seal.",
|
||||
"Cut a few slits in the top to release steam. Brush with egg wash or milk if available.",
|
||||
"Bake at 400 F (200 C) for 25-35 minutes until pastry is golden brown.",
|
||||
"For sweet pie: fill unbaked crust with fruit filling, top with second crust or crumble, bake similarly.",
|
||||
],
|
||||
notes="Puff pastry from the freezer is the shortcut to impressive pot pies. Thaw in the fridge overnight.",
|
||||
),
|
||||
AssemblyTemplate(
|
||||
id=-13,
|
||||
title="Pudding / Custard",
|
||||
required=[
|
||||
AssemblyRole("dairy or dairy-free milk", [
|
||||
"milk", "cream", "oat milk", "almond milk",
|
||||
"soy milk", "coconut milk",
|
||||
]),
|
||||
AssemblyRole("thickener or set", [
|
||||
"egg", "cornstarch", "custard powder", "gelatin",
|
||||
"agar", "tapioca", "arrowroot",
|
||||
]),
|
||||
],
|
||||
optional=[
|
||||
AssemblyRole("sweetener", ["sugar", "honey", "maple syrup", "condensed milk"]),
|
||||
AssemblyRole("flavouring", [
|
||||
"vanilla", "chocolate", "cocoa", "caramel",
|
||||
"lemon", "orange", "cinnamon", "nutmeg",
|
||||
]),
|
||||
AssemblyRole("starchy base", [
|
||||
"rice", "bread", "sponge", "cake", "biscuit",
|
||||
]),
|
||||
AssemblyRole("fruit", ["raisin", "sultana", "berry", "banana", "apple"]),
|
||||
],
|
||||
directions=[
|
||||
"For stovetop custard: whisk eggs and sugar together, heat milk until steaming.",
|
||||
"Slowly pour hot milk into egg mixture while whisking constantly (tempering).",
|
||||
"Return to low heat and stir until mixture coats the back of a spoon.",
|
||||
"For cornstarch pudding: whisk cornstarch into cold milk first, then heat while stirring.",
|
||||
"Add flavourings (vanilla, cocoa) once off heat.",
|
||||
"Pour into dishes and refrigerate at least 2 hours to set.",
|
||||
],
|
||||
notes="UK-style pudding is broad — bread pudding, rice pudding, spotted dick, treacle sponge all count.",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def match_assembly_templates(
|
||||
pantry_items: list[str],
|
||||
pantry_set: set[str],
|
||||
excluded_ids: list[int],
|
||||
) -> list[RecipeSuggestion]:
|
||||
"""Return assembly-dish suggestions whose required roles are all satisfied.
|
||||
|
||||
Titles are personalized with specific pantry items (deterministically chosen
|
||||
from the pantry contents so the same pantry always produces the same title).
|
||||
Skips templates whose id is in excluded_ids (dismiss/load-more support).
|
||||
"""
|
||||
excluded = set(excluded_ids)
|
||||
seed = _pantry_hash(pantry_set)
|
||||
results: list[RecipeSuggestion] = []
|
||||
|
||||
for tmpl in ASSEMBLY_TEMPLATES:
|
||||
if tmpl.id in excluded:
|
||||
continue
|
||||
|
||||
# All required roles must be satisfied
|
||||
if any(not _matches_role(role, pantry_set) for role in tmpl.required):
|
||||
continue
|
||||
|
||||
optional_hit_count = sum(
|
||||
1 for role in tmpl.optional if _matches_role(role, pantry_set)
|
||||
)
|
||||
|
||||
results.append(RecipeSuggestion(
|
||||
id=tmpl.id,
|
||||
title=_personalized_title(tmpl, pantry_set, seed + tmpl.id),
|
||||
match_count=len(tmpl.required) + optional_hit_count,
|
||||
element_coverage={},
|
||||
swap_candidates=[],
|
||||
missing_ingredients=[],
|
||||
directions=tmpl.directions,
|
||||
notes=tmpl.notes,
|
||||
level=1,
|
||||
is_wildcard=False,
|
||||
nutrition=None,
|
||||
))
|
||||
|
||||
# Sort by optional coverage descending — best-matched templates first
|
||||
results.sort(key=lambda s: s.match_count, reverse=True)
|
||||
return results
|
||||
|
|
@ -3,6 +3,7 @@ from __future__ import annotations
|
|||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from contextlib import nullcontext
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
|
|
@ -54,6 +55,9 @@ class LLMRecipeGenerator:
|
|||
|
||||
lines: list[str] = [
|
||||
"You are a creative chef. Generate a recipe using the ingredients below.",
|
||||
"IMPORTANT: When you use a pantry item, list it in Ingredients using its exact name from the pantry list. Do not add adjectives, quantities, or cooking states (e.g. use 'butter', not 'unsalted butter' or '2 tbsp butter').",
|
||||
"IMPORTANT: Only use pantry items that make culinary sense for the dish. Do NOT force flavoured/sweetened items (vanilla yoghurt, fruit yoghurt, jam, dessert sauces, flavoured syrups) into savoury dishes. Plain yoghurt, plain cream, and plain dairy are fine in savoury cooking.",
|
||||
"IMPORTANT: Do not default to the same ingredient repeatedly across dishes. If a pantry item does not genuinely improve this specific dish, leave it out.",
|
||||
"",
|
||||
f"Pantry items: {', '.join(safe_pantry)}",
|
||||
]
|
||||
|
|
@ -82,10 +86,13 @@ class LLMRecipeGenerator:
|
|||
|
||||
lines += [
|
||||
"",
|
||||
"Reply in this format:",
|
||||
"Title: <recipe name>",
|
||||
"Reply using EXACTLY this plain-text format — no markdown, no bold, no extra commentary:",
|
||||
"Title: <name of the dish>",
|
||||
"Ingredients: <comma-separated list>",
|
||||
"Directions: <numbered steps>",
|
||||
"Directions:",
|
||||
"1. <first step>",
|
||||
"2. <second step>",
|
||||
"3. <continue for each step>",
|
||||
"Notes: <optional tips>",
|
||||
]
|
||||
|
||||
|
|
@ -101,6 +108,7 @@ class LLMRecipeGenerator:
|
|||
|
||||
lines: list[str] = [
|
||||
"Surprise me with a creative, unexpected recipe.",
|
||||
"Only use ingredients that make culinary sense together. Do not force flavoured/sweetened items (vanilla yoghurt, flavoured syrups, jam) into savoury dishes.",
|
||||
f"Ingredients available: {', '.join(safe_pantry)}",
|
||||
]
|
||||
|
||||
|
|
@ -112,7 +120,13 @@ class LLMRecipeGenerator:
|
|||
|
||||
lines += [
|
||||
"Treat any mystery ingredient as a wildcard — use your imagination.",
|
||||
"Title: <name> | Ingredients: <list> | Directions: <steps>",
|
||||
"Reply using EXACTLY this plain-text format — no markdown, no bold:",
|
||||
"Title: <name of the dish>",
|
||||
"Ingredients: <comma-separated list>",
|
||||
"Directions:",
|
||||
"1. <first step>",
|
||||
"2. <second step>",
|
||||
"Notes: <optional tips>",
|
||||
]
|
||||
|
||||
return "\n".join(lines)
|
||||
|
|
@ -169,8 +183,18 @@ class LLMRecipeGenerator:
|
|||
logger.error("LLM call failed: %s", exc)
|
||||
return ""
|
||||
|
||||
# Strips markdown bold/italic markers so "**Directions:**" parses like "Directions:"
|
||||
_MD_BOLD = re.compile(r"\*{1,2}([^*]+)\*{1,2}")
|
||||
|
||||
def _strip_md(self, text: str) -> str:
|
||||
return self._MD_BOLD.sub(r"\1", text).strip()
|
||||
|
||||
def _parse_response(self, response: str) -> dict[str, str | list[str]]:
|
||||
"""Parse LLM response text into structured recipe fields."""
|
||||
"""Parse LLM response text into structured recipe fields.
|
||||
|
||||
Handles both plain-text and markdown-formatted responses. Directions are
|
||||
preserved as newline-separated text so the caller can split on step numbers.
|
||||
"""
|
||||
result: dict[str, str | list[str]] = {
|
||||
"title": "",
|
||||
"ingredients": [],
|
||||
|
|
@ -184,14 +208,17 @@ class LLMRecipeGenerator:
|
|||
def _flush(key: str | None, buf: list[str]) -> None:
|
||||
if key is None or not buf:
|
||||
return
|
||||
text = " ".join(buf).strip()
|
||||
if key == "ingredients":
|
||||
if key == "directions":
|
||||
result["directions"] = "\n".join(buf)
|
||||
elif key == "ingredients":
|
||||
text = " ".join(buf)
|
||||
result["ingredients"] = [i.strip() for i in text.split(",") if i.strip()]
|
||||
else:
|
||||
result[key] = text
|
||||
result[key] = " ".join(buf).strip()
|
||||
|
||||
for line in response.splitlines():
|
||||
lower = line.lower().strip()
|
||||
for raw_line in response.splitlines():
|
||||
line = self._strip_md(raw_line)
|
||||
lower = line.lower()
|
||||
if lower.startswith("title:"):
|
||||
_flush(current_key, buffer)
|
||||
current_key, buffer = "title", [line.split(":", 1)[1].strip()]
|
||||
|
|
@ -200,12 +227,18 @@ class LLMRecipeGenerator:
|
|||
current_key, buffer = "ingredients", [line.split(":", 1)[1].strip()]
|
||||
elif lower.startswith("directions:"):
|
||||
_flush(current_key, buffer)
|
||||
current_key, buffer = "directions", [line.split(":", 1)[1].strip()]
|
||||
rest = line.split(":", 1)[1].strip()
|
||||
current_key, buffer = "directions", ([rest] if rest else [])
|
||||
elif lower.startswith("notes:"):
|
||||
_flush(current_key, buffer)
|
||||
current_key, buffer = "notes", [line.split(":", 1)[1].strip()]
|
||||
elif current_key and line.strip():
|
||||
buffer.append(line.strip())
|
||||
elif current_key is None and line.strip() and ":" not in line:
|
||||
# Before any section header: a 2-10 word colon-free line is the dish name
|
||||
words = line.split()
|
||||
if 2 <= len(words) <= 10 and not result["title"]:
|
||||
result["title"] = line.strip()
|
||||
|
||||
_flush(current_key, buffer)
|
||||
return result
|
||||
|
|
@ -230,17 +263,37 @@ class LLMRecipeGenerator:
|
|||
parsed = self._parse_response(response)
|
||||
|
||||
raw_directions = parsed.get("directions", "")
|
||||
directions_list: list[str] = (
|
||||
[s.strip() for s in raw_directions.split(".") if s.strip()]
|
||||
if isinstance(raw_directions, str)
|
||||
else list(raw_directions)
|
||||
)
|
||||
if isinstance(raw_directions, str):
|
||||
# Split on newlines; strip leading step numbers ("1.", "2.", "- ", "* ")
|
||||
_step_prefix = re.compile(r"^\s*(?:\d+[.)]\s*|[-*]\s+)")
|
||||
directions_list = [
|
||||
_step_prefix.sub("", s).strip()
|
||||
for s in raw_directions.splitlines()
|
||||
if s.strip()
|
||||
]
|
||||
else:
|
||||
directions_list = list(raw_directions)
|
||||
raw_notes = parsed.get("notes", "")
|
||||
notes_str: str = raw_notes if isinstance(raw_notes, str) else ""
|
||||
|
||||
all_ingredients: list[str] = list(parsed.get("ingredients", []))
|
||||
pantry_set = {item.lower() for item in (req.pantry_items or [])}
|
||||
missing = [i for i in all_ingredients if i.lower() not in pantry_set]
|
||||
|
||||
# Strip leading quantities/units (e.g. "2 cups rice" → "rice") before
|
||||
# checking against pantry, since LLMs return formatted ingredient strings.
|
||||
_qty_re = re.compile(
|
||||
r"^\s*[\d½¼¾⅓⅔]+[\s/\-]*" # leading digits or fractions
|
||||
r"(?:cup|cups|tbsp|tsp|tablespoon|teaspoon|oz|lb|lbs|g|kg|"
|
||||
r"can|cans|clove|cloves|bunch|package|pkg|slice|slices|"
|
||||
r"piece|pieces|pinch|dash|handful|head|heads|large|small|medium"
|
||||
r")s?\b[,\s]*",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
missing = []
|
||||
for ing in all_ingredients:
|
||||
bare = _qty_re.sub("", ing).strip().lower()
|
||||
if bare not in pantry_set and ing.lower() not in pantry_set:
|
||||
missing.append(bare or ing)
|
||||
|
||||
suggestion = RecipeSuggestion(
|
||||
id=0,
|
||||
|
|
|
|||
|
|
@ -20,13 +20,353 @@ from typing import TYPE_CHECKING
|
|||
if TYPE_CHECKING:
|
||||
from app.db.store import Store
|
||||
|
||||
from app.models.schemas.recipe import GroceryLink, RecipeRequest, RecipeResult, RecipeSuggestion, SwapCandidate
|
||||
from app.models.schemas.recipe import GroceryLink, NutritionPanel, RecipeRequest, RecipeResult, RecipeSuggestion, SwapCandidate
|
||||
from app.services.recipe.assembly_recipes import match_assembly_templates
|
||||
from app.services.recipe.element_classifier import ElementClassifier
|
||||
from app.services.recipe.grocery_links import GroceryLinkBuilder
|
||||
from app.services.recipe.substitution_engine import SubstitutionEngine
|
||||
|
||||
_LEFTOVER_DAILY_MAX_FREE = 5
|
||||
|
||||
# Words that carry no ingredient-identity signal — stripped before overlap scoring
|
||||
_SWAP_STOPWORDS = frozenset({
|
||||
"a", "an", "the", "of", "in", "for", "with", "and", "or",
|
||||
"to", "from", "at", "by", "as", "on",
|
||||
})
|
||||
|
||||
# Maps product-label substrings to recipe-corpus canonical terms.
|
||||
# Kept in sync with Store._FTS_SYNONYMS — both must agree on canonical names.
|
||||
# Used to expand pantry_set so single-word recipe ingredients can match
|
||||
# multi-word product names (e.g. "hamburger" satisfied by "burger patties").
|
||||
_PANTRY_LABEL_SYNONYMS: dict[str, str] = {
|
||||
"burger patt": "hamburger",
|
||||
"beef patt": "hamburger",
|
||||
"ground beef": "hamburger",
|
||||
"ground chuck": "hamburger",
|
||||
"ground round": "hamburger",
|
||||
"mince": "hamburger",
|
||||
"veggie burger": "hamburger",
|
||||
"beyond burger": "hamburger",
|
||||
"impossible burger": "hamburger",
|
||||
"plant burger": "hamburger",
|
||||
"chicken patt": "chicken patty",
|
||||
"kielbasa": "sausage",
|
||||
"bratwurst": "sausage",
|
||||
"frankfurter": "hotdog",
|
||||
"wiener": "hotdog",
|
||||
"chicken breast": "chicken",
|
||||
"chicken thigh": "chicken",
|
||||
"chicken drumstick": "chicken",
|
||||
"chicken wing": "chicken",
|
||||
"rotisserie chicken": "chicken",
|
||||
"chicken tender": "chicken",
|
||||
"chicken strip": "chicken",
|
||||
"chicken piece": "chicken",
|
||||
"fake chicken": "chicken",
|
||||
"plant chicken": "chicken",
|
||||
"vegan chicken": "chicken",
|
||||
"daring": "chicken",
|
||||
"gardein chick": "chicken",
|
||||
"quorn chick": "chicken",
|
||||
"chick'n": "chicken",
|
||||
"chikn": "chicken",
|
||||
"not-chicken": "chicken",
|
||||
"no-chicken": "chicken",
|
||||
# Plant-based beef subs → broad "beef" (strips ≠ ground; texture matters)
|
||||
"not-beef": "beef",
|
||||
"no-beef": "beef",
|
||||
"plant beef": "beef",
|
||||
"vegan beef": "beef",
|
||||
# Plant-based pork subs
|
||||
"not-pork": "pork",
|
||||
"no-pork": "pork",
|
||||
"plant pork": "pork",
|
||||
"vegan pork": "pork",
|
||||
"omnipork": "pork",
|
||||
"omni pork": "pork",
|
||||
# Generic alt-meat catch-alls → broad "beef"
|
||||
"fake meat": "beef",
|
||||
"plant meat": "beef",
|
||||
"vegan meat": "beef",
|
||||
"meat-free": "beef",
|
||||
"meatless": "beef",
|
||||
"pork chop": "pork",
|
||||
"pork loin": "pork",
|
||||
"pork tenderloin": "pork",
|
||||
"marinara": "tomato sauce",
|
||||
"pasta sauce": "tomato sauce",
|
||||
"spaghetti sauce": "tomato sauce",
|
||||
"pizza sauce": "tomato sauce",
|
||||
"macaroni": "pasta",
|
||||
"noodles": "pasta",
|
||||
"spaghetti": "pasta",
|
||||
"penne": "pasta",
|
||||
"fettuccine": "pasta",
|
||||
"rigatoni": "pasta",
|
||||
"linguine": "pasta",
|
||||
"rotini": "pasta",
|
||||
"farfalle": "pasta",
|
||||
"shredded cheese": "cheese",
|
||||
"sliced cheese": "cheese",
|
||||
"american cheese": "cheese",
|
||||
"cheddar": "cheese",
|
||||
"mozzarella": "cheese",
|
||||
"heavy cream": "cream",
|
||||
"whipping cream": "cream",
|
||||
"half and half": "cream",
|
||||
"burger bun": "buns",
|
||||
"hamburger bun": "buns",
|
||||
"hot dog bun": "buns",
|
||||
"bread roll": "buns",
|
||||
"dinner roll": "buns",
|
||||
# Tortillas / wraps — assembly dishes (burritos, tacos, quesadillas)
|
||||
"flour tortilla": "tortillas",
|
||||
"corn tortilla": "tortillas",
|
||||
"tortilla wrap": "tortillas",
|
||||
"soft taco shell": "tortillas",
|
||||
"taco shell": "taco shells",
|
||||
"pita bread": "pita",
|
||||
"flatbread": "flatbread",
|
||||
# Canned beans — extremely interchangeable in assembly dishes
|
||||
"black bean": "beans",
|
||||
"pinto bean": "beans",
|
||||
"kidney bean": "beans",
|
||||
"refried bean": "beans",
|
||||
"chickpea": "beans",
|
||||
"garbanzo": "beans",
|
||||
# Rice variants
|
||||
"white rice": "rice",
|
||||
"brown rice": "rice",
|
||||
"jasmine rice": "rice",
|
||||
"basmati rice": "rice",
|
||||
"instant rice": "rice",
|
||||
"microwavable rice": "rice",
|
||||
# Salsa / hot sauce
|
||||
"hot sauce": "salsa",
|
||||
"taco sauce": "salsa",
|
||||
"enchilada sauce": "salsa",
|
||||
# Sour cream / Greek yogurt — functional substitutes
|
||||
"greek yogurt": "sour cream",
|
||||
# Frozen/prepackaged meal token extraction — handled by individual token
|
||||
# fallback in _normalize_for_fts; these are the most common single-serve meal types
|
||||
"lean cuisine": "casserole",
|
||||
"stouffer": "casserole",
|
||||
"healthy choice": "casserole",
|
||||
"marie callender": "casserole",
|
||||
}
|
||||
|
||||
|
||||
# Matches leading quantity/unit prefixes in recipe ingredient strings,
|
||||
# e.g. "2 cups flour" → "flour", "1/2 c. ketchup" → "ketchup",
|
||||
# "3 oz. butter" → "butter"
|
||||
_QUANTITY_PREFIX = re.compile(
|
||||
r"^\s*(?:\d+(?:[./]\d+)?\s*)?" # optional leading number (1, 1/2, 2.5)
|
||||
r"(?:to\s+\d+\s*)?" # optional "to N" range
|
||||
r"(?:c\.|cup|cups|tbsp|tsp|oz|lb|lbs|g|kg|ml|l|"
|
||||
r"can|cans|pkg|pkg\.|package|slice|slices|clove|cloves|"
|
||||
r"small|medium|large|bunch|head|piece|pieces|"
|
||||
r"pinch|dash|handful|sprig|sprigs)\s*\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
# Preparation-state words that modify an ingredient without changing what it is.
|
||||
# Stripped from both ends so "melted butter", "butter, melted" both → "butter".
|
||||
_PREP_STATES = re.compile(
|
||||
r"\b(melted|softened|cold|warm|hot|room.temperature|"
|
||||
r"diced|sliced|chopped|minced|grated|shredded|shredded|beaten|whipped|"
|
||||
r"cooked|raw|frozen|canned|dried|dehydrated|marinated|seasoned|"
|
||||
r"roasted|toasted|ground|crushed|pressed|peeled|seeded|pitted|"
|
||||
r"boneless|skinless|trimmed|halved|quartered|julienned|"
|
||||
r"thinly|finely|roughly|coarsely|freshly|lightly|"
|
||||
r"packed|heaping|level|sifted|divided|optional)\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
# Trailing comma + optional prep state (e.g. "butter, melted")
|
||||
_TRAILING_PREP = re.compile(r",\s*\w+$")
|
||||
|
||||
|
||||
# Maps prep-state words to human-readable instruction templates.
|
||||
# {ingredient} is replaced with the actual ingredient name.
|
||||
# None means the state is passive (frozen, canned) — no note needed.
|
||||
_PREP_INSTRUCTIONS: dict[str, str | None] = {
|
||||
"melted": "Melt the {ingredient} before starting.",
|
||||
"softened": "Let the {ingredient} soften to room temperature before using.",
|
||||
"room temperature": "Bring the {ingredient} to room temperature before using.",
|
||||
"beaten": "Beat the {ingredient} lightly before adding.",
|
||||
"whipped": "Whip the {ingredient} until soft peaks form.",
|
||||
"sifted": "Sift the {ingredient} before measuring.",
|
||||
"toasted": "Toast the {ingredient} in a dry pan until fragrant.",
|
||||
"roasted": "Roast the {ingredient} before using.",
|
||||
"pressed": "Press the {ingredient} to remove excess moisture.",
|
||||
"diced": "Dice the {ingredient} into small pieces.",
|
||||
"sliced": "Slice the {ingredient} thinly.",
|
||||
"chopped": "Chop the {ingredient} roughly.",
|
||||
"minced": "Mince the {ingredient} finely.",
|
||||
"grated": "Grate the {ingredient}.",
|
||||
"shredded": "Shred the {ingredient}.",
|
||||
"ground": "Grind the {ingredient}.",
|
||||
"crushed": "Crush the {ingredient}.",
|
||||
"peeled": "Peel the {ingredient} before use.",
|
||||
"seeded": "Remove seeds from the {ingredient}.",
|
||||
"pitted": "Pit the {ingredient} before use.",
|
||||
"trimmed": "Trim any excess from the {ingredient}.",
|
||||
"julienned": "Cut the {ingredient} into thin matchstick strips.",
|
||||
"cooked": "Pre-cook the {ingredient} before adding.",
|
||||
# Passive states — ingredient is used as-is, no prep note needed
|
||||
"cold": None,
|
||||
"warm": None,
|
||||
"hot": None,
|
||||
"raw": None,
|
||||
"frozen": None,
|
||||
"canned": None,
|
||||
"dried": None,
|
||||
"dehydrated": None,
|
||||
"marinated": None,
|
||||
"seasoned": None,
|
||||
"boneless": None,
|
||||
"skinless": None,
|
||||
"divided": None,
|
||||
"optional": None,
|
||||
"fresh": None,
|
||||
"freshly": None,
|
||||
"thinly": None,
|
||||
"finely": None,
|
||||
"roughly": None,
|
||||
"coarsely": None,
|
||||
"lightly": None,
|
||||
"packed": None,
|
||||
"heaping": None,
|
||||
"level": None,
|
||||
}
|
||||
|
||||
# Finds the first actionable prep state in an ingredient string
|
||||
_PREP_STATE_SEARCH = re.compile(
|
||||
r"\b(" + "|".join(re.escape(k) for k in _PREP_INSTRUCTIONS) + r")\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _strip_quantity(ingredient: str) -> str:
|
||||
"""Remove leading quantity/unit and preparation-state words from a recipe ingredient.
|
||||
|
||||
e.g. "2 tbsp melted butter" → "butter"
|
||||
"butter, melted" → "butter"
|
||||
"1/4 cup flour, sifted" → "flour"
|
||||
"""
|
||||
stripped = _QUANTITY_PREFIX.sub("", ingredient).strip()
|
||||
# Strip any remaining leading number (e.g. "3 eggs" → "eggs")
|
||||
stripped = re.sub(r"^\d+\s+", "", stripped)
|
||||
# Strip trailing ", prep_state"
|
||||
stripped = _TRAILING_PREP.sub("", stripped).strip()
|
||||
# Strip prep-state words (may be leading or embedded)
|
||||
stripped = _PREP_STATES.sub("", stripped).strip()
|
||||
# Clean up any double spaces left behind
|
||||
stripped = re.sub(r"\s{2,}", " ", stripped).strip()
|
||||
return stripped or ingredient
|
||||
|
||||
|
||||
def _prep_note_for(ingredient: str) -> str | None:
|
||||
"""Return a human-readable prep instruction for this ingredient string, or None.
|
||||
|
||||
e.g. "2 tbsp melted butter" → "Melt the butter before starting."
|
||||
"onion, diced" → "Dice the onion into small pieces."
|
||||
"frozen peas" → None (passive state, no action needed)
|
||||
"""
|
||||
match = _PREP_STATE_SEARCH.search(ingredient)
|
||||
if not match:
|
||||
return None
|
||||
state = match.group(1).lower()
|
||||
template = _PREP_INSTRUCTIONS.get(state)
|
||||
if not template:
|
||||
return None
|
||||
# Use the stripped ingredient name as the subject
|
||||
ingredient_name = _strip_quantity(ingredient)
|
||||
return template.format(ingredient=ingredient_name)
|
||||
|
||||
|
||||
def _expand_pantry_set(pantry_items: list[str]) -> set[str]:
|
||||
"""Return pantry_set expanded with canonical recipe-corpus synonyms.
|
||||
|
||||
For each pantry item, checks _PANTRY_LABEL_SYNONYMS for substring matches
|
||||
and adds the canonical form. This lets single-word recipe ingredients
|
||||
("hamburger", "chicken") match product-label pantry entries
|
||||
("burger patties", "rotisserie chicken").
|
||||
"""
|
||||
expanded: set[str] = set()
|
||||
for item in pantry_items:
|
||||
lower = item.lower().strip()
|
||||
expanded.add(lower)
|
||||
for pattern, canonical in _PANTRY_LABEL_SYNONYMS.items():
|
||||
if pattern in lower:
|
||||
expanded.add(canonical)
|
||||
return expanded
|
||||
|
||||
|
||||
def _ingredient_in_pantry(ingredient: str, pantry_set: set[str]) -> bool:
|
||||
"""Return True if the recipe ingredient is satisfied by the pantry.
|
||||
|
||||
Checks three layers in order:
|
||||
1. Exact match after quantity stripping
|
||||
2. Synonym lookup: ingredient → canonical → in pantry_set
|
||||
(handles "ground beef" matched by "burger patties" via shared canonical)
|
||||
3. Token subset: all content tokens of the ingredient appear in pantry
|
||||
(handles "diced onions" when "onions" is in pantry)
|
||||
"""
|
||||
clean = _strip_quantity(ingredient).lower()
|
||||
if clean in pantry_set:
|
||||
return True
|
||||
|
||||
# Check if this recipe ingredient maps to a canonical that's in pantry
|
||||
for pattern, canonical in _PANTRY_LABEL_SYNONYMS.items():
|
||||
if pattern in clean and canonical in pantry_set:
|
||||
return True
|
||||
|
||||
# Single-token ingredient whose token appears in pantry (e.g. "ketchup" in "c. ketchup")
|
||||
tokens = [t for t in clean.split() if t not in _SWAP_STOPWORDS and len(t) > 2]
|
||||
if tokens and all(t in pantry_set for t in tokens):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _content_tokens(text: str) -> frozenset[str]:
|
||||
return frozenset(
|
||||
w for w in text.lower().split()
|
||||
if w not in _SWAP_STOPWORDS and len(w) > 1
|
||||
)
|
||||
|
||||
|
||||
def _pantry_creative_swap(required: str, pantry_items: set[str]) -> str | None:
|
||||
"""Return a pantry item that's a plausible creative substitute, or None.
|
||||
|
||||
Requires ≥2 shared content tokens AND ≥50% bidirectional overlap so that
|
||||
single-word differences (cream-of-mushroom vs cream-of-potato) qualify while
|
||||
single-word ingredients (butter, flour) don't accidentally match supersets
|
||||
(peanut butter, bread flour).
|
||||
"""
|
||||
req_tokens = _content_tokens(required)
|
||||
if len(req_tokens) < 2:
|
||||
return None # single-word ingredients must already be in pantry_set
|
||||
|
||||
best: str | None = None
|
||||
best_score = 0.0
|
||||
for item in pantry_items:
|
||||
if item.lower() == required.lower():
|
||||
continue
|
||||
pan_tokens = _content_tokens(item)
|
||||
if not pan_tokens:
|
||||
continue
|
||||
overlap = len(req_tokens & pan_tokens)
|
||||
if overlap < 2:
|
||||
continue
|
||||
score = min(overlap / len(req_tokens), overlap / len(pan_tokens))
|
||||
if score >= 0.5 and score > best_score:
|
||||
best_score = score
|
||||
best = item
|
||||
return best
|
||||
|
||||
|
||||
# Method complexity classification patterns
|
||||
_EASY_METHODS = re.compile(
|
||||
r"\b(microwave|mix|stir|blend|toast|assemble|heat)\b", re.IGNORECASE
|
||||
|
|
@ -95,7 +435,7 @@ class RecipeEngine:
|
|||
|
||||
profiles = self._classifier.classify_batch(req.pantry_items)
|
||||
gaps = self._classifier.identify_gaps(profiles)
|
||||
pantry_set = {item.lower().strip() for item in req.pantry_items}
|
||||
pantry_set = _expand_pantry_set(req.pantry_items)
|
||||
|
||||
if req.level >= 3:
|
||||
from app.services.recipe.llm_recipe import LLMRecipeGenerator
|
||||
|
|
@ -103,7 +443,17 @@ class RecipeEngine:
|
|||
return gen.generate(req, profiles, gaps)
|
||||
|
||||
# Level 1 & 2: deterministic path
|
||||
rows = self._store.search_recipes_by_ingredients(req.pantry_items, limit=20)
|
||||
nf = req.nutrition_filters
|
||||
rows = self._store.search_recipes_by_ingredients(
|
||||
req.pantry_items,
|
||||
limit=20,
|
||||
category=req.category or None,
|
||||
max_calories=nf.max_calories,
|
||||
max_sugar_g=nf.max_sugar_g,
|
||||
max_carbs_g=nf.max_carbs_g,
|
||||
max_sodium_mg=nf.max_sodium_mg,
|
||||
excluded_ids=req.excluded_ids or [],
|
||||
)
|
||||
suggestions = []
|
||||
|
||||
for row in rows:
|
||||
|
|
@ -114,10 +464,31 @@ class RecipeEngine:
|
|||
except Exception:
|
||||
ingredient_names = []
|
||||
|
||||
# Compute missing ingredients
|
||||
missing = [n for n in ingredient_names if n.lower() not in pantry_set]
|
||||
# Compute missing ingredients, detecting pantry coverage first.
|
||||
# When covered, collect any prep-state annotations (e.g. "melted butter"
|
||||
# → note "Melt the butter before starting.") to surface separately.
|
||||
swap_candidates: list[SwapCandidate] = []
|
||||
missing: list[str] = []
|
||||
prep_note_set: set[str] = set()
|
||||
for n in ingredient_names:
|
||||
if _ingredient_in_pantry(n, pantry_set):
|
||||
note = _prep_note_for(n)
|
||||
if note:
|
||||
prep_note_set.add(note)
|
||||
continue
|
||||
swap_item = _pantry_creative_swap(n, pantry_set)
|
||||
if swap_item:
|
||||
swap_candidates.append(SwapCandidate(
|
||||
original_name=n,
|
||||
substitute_name=swap_item,
|
||||
constraint_label="pantry_swap",
|
||||
explanation=f"You have {swap_item} — use it in place of {n}.",
|
||||
compensation_hints=[],
|
||||
))
|
||||
else:
|
||||
missing.append(n)
|
||||
|
||||
# Filter by max_missing
|
||||
# Filter by max_missing (pantry swaps don't count as missing)
|
||||
if req.max_missing is not None and len(missing) > req.max_missing:
|
||||
continue
|
||||
|
||||
|
|
@ -133,8 +504,7 @@ class RecipeEngine:
|
|||
if complexity == "involved":
|
||||
continue
|
||||
|
||||
# Build swap candidates for Level 2
|
||||
swap_candidates: list[SwapCandidate] = []
|
||||
# Level 2: also add dietary constraint swaps from substitution_pairs
|
||||
if req.level == 2 and req.constraints:
|
||||
for ing in ingredient_names:
|
||||
for constraint in req.constraints:
|
||||
|
|
@ -155,6 +525,22 @@ class RecipeEngine:
|
|||
except Exception:
|
||||
coverage_raw = {}
|
||||
|
||||
servings = row.get("servings") or None
|
||||
nutrition = NutritionPanel(
|
||||
calories=row.get("calories"),
|
||||
fat_g=row.get("fat_g"),
|
||||
protein_g=row.get("protein_g"),
|
||||
carbs_g=row.get("carbs_g"),
|
||||
fiber_g=row.get("fiber_g"),
|
||||
sugar_g=row.get("sugar_g"),
|
||||
sodium_mg=row.get("sodium_mg"),
|
||||
servings=servings,
|
||||
estimated=bool(row.get("nutrition_estimated", 0)),
|
||||
)
|
||||
has_nutrition = any(
|
||||
v is not None
|
||||
for v in (nutrition.calories, nutrition.sugar_g, nutrition.carbs_g)
|
||||
)
|
||||
suggestions.append(RecipeSuggestion(
|
||||
id=row["id"],
|
||||
title=row["title"],
|
||||
|
|
@ -162,9 +548,20 @@ class RecipeEngine:
|
|||
element_coverage=coverage_raw,
|
||||
swap_candidates=swap_candidates,
|
||||
missing_ingredients=missing,
|
||||
prep_notes=sorted(prep_note_set),
|
||||
level=req.level,
|
||||
nutrition=nutrition if has_nutrition else None,
|
||||
))
|
||||
|
||||
# Prepend assembly-dish templates (burrito, stir fry, omelette, etc.)
|
||||
# These fire regardless of corpus coverage — any pantry can make a burrito.
|
||||
assembly = match_assembly_templates(
|
||||
pantry_items=req.pantry_items,
|
||||
pantry_set=pantry_set,
|
||||
excluded_ids=req.excluded_ids or [],
|
||||
)
|
||||
suggestions = assembly + suggestions
|
||||
|
||||
# Build grocery list — deduplicated union of all missing ingredients
|
||||
seen: set[str] = set()
|
||||
grocery_list: list[str] = []
|
||||
|
|
|
|||
134
scripts/backfill_texture_profiles.py
Normal file
134
scripts/backfill_texture_profiles.py
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Backfill texture_profile in ingredient_profiles from existing macro data.
|
||||
|
||||
Texture categories and their macro signatures (all values g/100g):
|
||||
fatty - fat > 60 (oils, lard, pure butter)
|
||||
creamy - fat 15-60 (cream, cheese, fatty meats, nut butter)
|
||||
firm - protein > 15, fat < 15 (lean meats, fish, legumes, firm tofu)
|
||||
starchy - carbs > 40, fat < 10 (flour, oats, rice, bread, potatoes)
|
||||
fibrous - fiber > 4, carbs < 40 (brassicas, leafy greens, whole grains)
|
||||
tender - protein 2-15, fat < 10, (soft veg, eggs, soft tofu, cooked beans)
|
||||
carbs < 40
|
||||
liquid - calories < 25, fat < 1, (broth, juice, dilute sauces)
|
||||
protein < 3
|
||||
neutral - fallthrough default
|
||||
|
||||
Rules are applied in priority order: fatty → creamy → firm → starchy →
|
||||
fibrous → tender → liquid → neutral.
|
||||
|
||||
Run:
|
||||
python scripts/backfill_texture_profiles.py [path/to/kiwi.db]
|
||||
|
||||
Or inside the container:
|
||||
docker exec kiwi-cloud-api-1 python /app/kiwi/scripts/backfill_texture_profiles.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Default DB paths to try
|
||||
_DEFAULT_PATHS = [
|
||||
"/devl/kiwi-cloud-data/local-dev/kiwi.db",
|
||||
"/devl/kiwi-data/kiwi.db",
|
||||
]
|
||||
|
||||
BATCH_SIZE = 5_000
|
||||
|
||||
|
||||
def _classify(fat: float, protein: float, carbs: float,
|
||||
fiber: float, calories: float) -> str:
|
||||
# Cap runaway values — data quality issue in some branded entries
|
||||
fat = min(fat or 0.0, 100.0)
|
||||
protein = min(protein or 0.0, 100.0)
|
||||
carbs = min(carbs or 0.0, 100.0)
|
||||
fiber = min(fiber or 0.0, 50.0)
|
||||
calories = min(calories or 0.0, 900.0)
|
||||
|
||||
if fat > 60:
|
||||
return "fatty"
|
||||
if fat > 15:
|
||||
return "creamy"
|
||||
# Starchy before firm: oats/legumes have high protein AND high carbs — carbs win
|
||||
if carbs > 40 and fat < 10:
|
||||
return "starchy"
|
||||
# Firm: lean proteins with low carbs (meats, fish, hard tofu)
|
||||
# Lower protein threshold (>7) catches tofu (9%) and similar plant proteins
|
||||
if protein > 7 and fat < 12 and carbs < 20:
|
||||
return "firm"
|
||||
if fiber > 4 and carbs < 40:
|
||||
return "fibrous"
|
||||
if 2 < protein <= 15 and fat < 10 and carbs < 40:
|
||||
return "tender"
|
||||
if calories < 25 and fat < 1 and protein < 3:
|
||||
return "liquid"
|
||||
return "neutral"
|
||||
|
||||
|
||||
def backfill(db_path: str) -> None:
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
total = conn.execute("SELECT COUNT(*) FROM ingredient_profiles").fetchone()[0]
|
||||
print(f"Total rows: {total:,}")
|
||||
|
||||
updated = 0
|
||||
offset = 0
|
||||
counts: dict[str, int] = {}
|
||||
|
||||
while True:
|
||||
rows = conn.execute(
|
||||
"""SELECT id, fat_pct, protein_pct, carbs_g_per_100g,
|
||||
fiber_g_per_100g, calories_per_100g
|
||||
FROM ingredient_profiles
|
||||
LIMIT ? OFFSET ?""",
|
||||
(BATCH_SIZE, offset),
|
||||
).fetchall()
|
||||
|
||||
if not rows:
|
||||
break
|
||||
|
||||
batch: list[tuple[str, int]] = []
|
||||
for row in rows:
|
||||
texture = _classify(
|
||||
row["fat_pct"],
|
||||
row["protein_pct"],
|
||||
row["carbs_g_per_100g"],
|
||||
row["fiber_g_per_100g"],
|
||||
row["calories_per_100g"],
|
||||
)
|
||||
counts[texture] = counts.get(texture, 0) + 1
|
||||
batch.append((texture, row["id"]))
|
||||
|
||||
conn.executemany(
|
||||
"UPDATE ingredient_profiles SET texture_profile = ? WHERE id = ?",
|
||||
batch,
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
updated += len(batch)
|
||||
offset += BATCH_SIZE
|
||||
print(f" {updated:,} / {total:,} updated...", end="\r")
|
||||
|
||||
print(f"\nDone. {updated:,} rows updated.\n")
|
||||
print("Texture distribution:")
|
||||
for texture, count in sorted(counts.items(), key=lambda x: -x[1]):
|
||||
pct = count / updated * 100
|
||||
print(f" {texture:10s} {count:8,} ({pct:.1f}%)")
|
||||
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 1:
|
||||
path = sys.argv[1]
|
||||
else:
|
||||
path = next((p for p in _DEFAULT_PATHS if Path(p).exists()), None)
|
||||
if not path:
|
||||
print(f"No DB found. Pass path as argument or create one of: {_DEFAULT_PATHS}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Backfilling texture profiles in: {path}")
|
||||
backfill(path)
|
||||
|
|
@ -83,9 +83,30 @@ def build(db_path: Path, usda_fdc_path: Path, usda_branded_path: Path) -> None:
|
|||
"Fiber, total dietary": "fiber_pct",
|
||||
"Sodium, Na": "sodium_mg_per_100g",
|
||||
"Water": "moisture_pct",
|
||||
"Energy": "calories_per_100g",
|
||||
}
|
||||
df = df_fdc.rename(columns={k: v for k, v in fdc_col_map.items() if k in df_fdc.columns})
|
||||
|
||||
# Build a sugar lookup from the branded parquet (keyed by normalized name).
|
||||
# usda_branded has SUGARS, TOTAL (G) for processed/packaged foods.
|
||||
branded_col_map = {
|
||||
"FOOD_NAME": "name",
|
||||
"SUGARS, TOTAL (G)": "sugar_g_per_100g",
|
||||
}
|
||||
df_branded_slim = df_branded.rename(
|
||||
columns={k: v for k, v in branded_col_map.items() if k in df_branded.columns}
|
||||
)[list(set(branded_col_map.values()) & set(df_branded.rename(columns=branded_col_map).columns))]
|
||||
sugar_lookup: dict[str, float] = {}
|
||||
for _, brow in df_branded_slim.iterrows():
|
||||
bname = normalize_name(str(brow.get("name", "")))
|
||||
val = brow.get("sugar_g_per_100g")
|
||||
try:
|
||||
fval = float(val) # type: ignore[arg-type]
|
||||
if fval > 0 and bname not in sugar_lookup:
|
||||
sugar_lookup[bname] = fval
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
inserted = 0
|
||||
for _, row in df.iterrows():
|
||||
name = normalize_name(str(row.get("name", "")))
|
||||
|
|
@ -98,25 +119,40 @@ def build(db_path: Path, usda_fdc_path: Path, usda_branded_path: Path) -> None:
|
|||
"moisture_pct": float(row.get("moisture_pct") or 0),
|
||||
"sodium_mg_per_100g": float(row.get("sodium_mg_per_100g") or 0),
|
||||
"starch_pct": 0.0,
|
||||
"carbs_g_per_100g": float(row.get("carb_pct") or 0),
|
||||
"fiber_g_per_100g": float(row.get("fiber_pct") or 0),
|
||||
"calories_per_100g": float(row.get("calories_per_100g") or 0),
|
||||
"sugar_g_per_100g": sugar_lookup.get(name, 0.0),
|
||||
}
|
||||
r["binding_score"] = derive_binding_score(r)
|
||||
r["elements"] = derive_elements(r)
|
||||
r["is_fermented"] = int(any(k in name for k in _FERMENTED_KEYWORDS))
|
||||
|
||||
try:
|
||||
# Insert new profile or update macro columns on existing one.
|
||||
conn.execute("""
|
||||
INSERT OR IGNORE INTO ingredient_profiles
|
||||
INSERT INTO ingredient_profiles
|
||||
(name, elements, fat_pct, fat_saturated_pct, moisture_pct,
|
||||
protein_pct, starch_pct, binding_score, sodium_mg_per_100g,
|
||||
is_fermented, source)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?)
|
||||
is_fermented,
|
||||
carbs_g_per_100g, fiber_g_per_100g, calories_per_100g, sugar_g_per_100g,
|
||||
source)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||
ON CONFLICT(name) DO UPDATE SET
|
||||
carbs_g_per_100g = excluded.carbs_g_per_100g,
|
||||
fiber_g_per_100g = excluded.fiber_g_per_100g,
|
||||
calories_per_100g = excluded.calories_per_100g,
|
||||
sugar_g_per_100g = excluded.sugar_g_per_100g
|
||||
""", (
|
||||
r["name"], json.dumps(r["elements"]),
|
||||
r["fat_pct"], 0.0, r["moisture_pct"],
|
||||
r["protein_pct"], r["starch_pct"], r["binding_score"],
|
||||
r["sodium_mg_per_100g"], r["is_fermented"], "usda_fdc",
|
||||
r["sodium_mg_per_100g"], r["is_fermented"],
|
||||
r["carbs_g_per_100g"], r["fiber_g_per_100g"],
|
||||
r["calories_per_100g"], r["sugar_g_per_100g"],
|
||||
"usda_fdc",
|
||||
))
|
||||
inserted += conn.execute("SELECT changes()").fetchone()[0]
|
||||
inserted += 1
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
|
|
|
|||
|
|
@ -28,6 +28,30 @@ _TRAILING_QUALIFIER = re.compile(
|
|||
_QUOTED = re.compile(r'"([^"]*)"')
|
||||
|
||||
|
||||
def _float_or_none(val: object) -> float | None:
|
||||
"""Return float > 0, or None for missing / zero values."""
|
||||
try:
|
||||
v = float(val) # type: ignore[arg-type]
|
||||
return v if v > 0 else None
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _safe_list(val: object) -> list:
|
||||
"""Convert a value to a list, handling NaN/float/None gracefully."""
|
||||
if val is None:
|
||||
return []
|
||||
try:
|
||||
import math
|
||||
if isinstance(val, float) and math.isnan(val):
|
||||
return []
|
||||
except Exception:
|
||||
pass
|
||||
if isinstance(val, list):
|
||||
return val
|
||||
return []
|
||||
|
||||
|
||||
def _parse_r_vector(s: str) -> list[str]:
|
||||
"""Parse R character vector format: c("a", "b") -> ["a", "b"]."""
|
||||
return _QUOTED.findall(s)
|
||||
|
|
@ -93,14 +117,14 @@ def _row_to_fields(row: pd.Series) -> tuple[str, str, list[str], list[str]]:
|
|||
if isinstance(raw_parts, str):
|
||||
parsed = _parse_r_vector(raw_parts)
|
||||
raw_parts = parsed if parsed else [raw_parts]
|
||||
raw_ingredients = [str(i) for i in (raw_parts or [])]
|
||||
raw_ingredients = [str(i) for i in (_safe_list(raw_parts))]
|
||||
|
||||
raw_dirs = row.get("RecipeInstructions", [])
|
||||
if isinstance(raw_dirs, str):
|
||||
parsed_dirs = _parse_r_vector(raw_dirs)
|
||||
directions = parsed_dirs if parsed_dirs else [raw_dirs]
|
||||
else:
|
||||
directions = [str(d) for d in (raw_dirs or [])]
|
||||
directions = [str(d) for d in (_safe_list(raw_dirs))]
|
||||
|
||||
title = str(row.get("Name", ""))[:500]
|
||||
external_id = str(row.get("RecipeId", ""))
|
||||
|
|
@ -144,12 +168,18 @@ def build(db_path: Path, recipes_path: Path, batch_size: int = 10000) -> None:
|
|||
json.dumps(ingredient_names),
|
||||
json.dumps(directions),
|
||||
str(row.get("RecipeCategory", "") or ""),
|
||||
json.dumps(list(row.get("Keywords", []) or [])),
|
||||
float(row.get("Calories") or 0) or None,
|
||||
float(row.get("FatContent") or 0) or None,
|
||||
float(row.get("ProteinContent") or 0) or None,
|
||||
float(row.get("SodiumContent") or 0) or None,
|
||||
json.dumps(_safe_list(row.get("Keywords"))),
|
||||
_float_or_none(row.get("Calories")),
|
||||
_float_or_none(row.get("FatContent")),
|
||||
_float_or_none(row.get("ProteinContent")),
|
||||
_float_or_none(row.get("SodiumContent")),
|
||||
json.dumps(coverage),
|
||||
# New macro columns (migration 014)
|
||||
_float_or_none(row.get("SugarContent")),
|
||||
_float_or_none(row.get("CarbohydrateContent")),
|
||||
_float_or_none(row.get("FiberContent")),
|
||||
_float_or_none(row.get("RecipeServings")),
|
||||
0, # nutrition_estimated — food.com direct data is authoritative
|
||||
))
|
||||
|
||||
if len(batch) >= batch_size:
|
||||
|
|
@ -157,8 +187,10 @@ def build(db_path: Path, recipes_path: Path, batch_size: int = 10000) -> None:
|
|||
conn.executemany("""
|
||||
INSERT OR REPLACE INTO recipes
|
||||
(external_id, title, ingredients, ingredient_names, directions,
|
||||
category, keywords, calories, fat_g, protein_g, sodium_mg, element_coverage)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
|
||||
category, keywords, calories, fat_g, protein_g, sodium_mg,
|
||||
element_coverage,
|
||||
sugar_g, carbs_g, fiber_g, servings, nutrition_estimated)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||
""", batch)
|
||||
conn.commit()
|
||||
inserted += conn.total_changes - before
|
||||
|
|
@ -170,8 +202,10 @@ def build(db_path: Path, recipes_path: Path, batch_size: int = 10000) -> None:
|
|||
conn.executemany("""
|
||||
INSERT OR REPLACE INTO recipes
|
||||
(external_id, title, ingredients, ingredient_names, directions,
|
||||
category, keywords, calories, fat_g, protein_g, sodium_mg, element_coverage)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
|
||||
category, keywords, calories, fat_g, protein_g, sodium_mg,
|
||||
element_coverage,
|
||||
sugar_g, carbs_g, fiber_g, servings, nutrition_estimated)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||
""", batch)
|
||||
conn.commit()
|
||||
inserted += conn.total_changes - before
|
||||
|
|
|
|||
109
scripts/pipeline/estimate_recipe_nutrition.py
Normal file
109
scripts/pipeline/estimate_recipe_nutrition.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
"""
|
||||
Estimate macro nutrition for recipes that have no direct data.
|
||||
|
||||
For each recipe where sugar_g / carbs_g / fiber_g / calories are NULL,
|
||||
look up the matched ingredient_profiles and average their per-100g values,
|
||||
then scale by a rough 150g-per-ingredient portion assumption.
|
||||
|
||||
Mark such rows with nutrition_estimated=1 so the UI can display a disclaimer.
|
||||
Recipes with food.com direct data (nutrition_estimated=0 and values set) are untouched.
|
||||
|
||||
Usage:
|
||||
conda run -n job-seeker python scripts/pipeline/estimate_recipe_nutrition.py \
|
||||
--db /path/to/kiwi.db
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import argparse
|
||||
import json
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
# Rough grams per ingredient when no quantity data is available.
|
||||
_GRAMS_PER_INGREDIENT = 150.0
|
||||
|
||||
|
||||
def estimate(db_path: Path) -> None:
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
|
||||
# Load ingredient_profiles macro data into memory for fast lookup.
|
||||
profile_macros: dict[str, dict[str, float]] = {}
|
||||
for row in conn.execute(
|
||||
"SELECT name, calories_per_100g, carbs_g_per_100g, fiber_g_per_100g, sugar_g_per_100g "
|
||||
"FROM ingredient_profiles"
|
||||
):
|
||||
name, cal, carbs, fiber, sugar = row
|
||||
if name:
|
||||
profile_macros[name] = {
|
||||
"calories": float(cal or 0),
|
||||
"carbs": float(carbs or 0),
|
||||
"fiber": float(fiber or 0),
|
||||
"sugar": float(sugar or 0),
|
||||
}
|
||||
|
||||
# Select recipes with no direct nutrition data.
|
||||
rows = conn.execute(
|
||||
"SELECT id, ingredient_names FROM recipes "
|
||||
"WHERE sugar_g IS NULL AND carbs_g IS NULL AND fiber_g IS NULL"
|
||||
).fetchall()
|
||||
|
||||
updated = 0
|
||||
batch: list[tuple] = []
|
||||
|
||||
for recipe_id, ingredient_names_json in rows:
|
||||
try:
|
||||
names: list[str] = json.loads(ingredient_names_json or "[]")
|
||||
except Exception:
|
||||
names = []
|
||||
|
||||
matched = [profile_macros[n] for n in names if n in profile_macros]
|
||||
if not matched:
|
||||
continue
|
||||
|
||||
# Average per-100g macros across matched ingredients,
|
||||
# then multiply by assumed portion weight per ingredient.
|
||||
n = len(matched)
|
||||
portion_factor = _GRAMS_PER_INGREDIENT / 100.0
|
||||
|
||||
total_cal = sum(m["calories"] for m in matched) / n * portion_factor * n
|
||||
total_carbs = sum(m["carbs"] for m in matched) / n * portion_factor * n
|
||||
total_fiber = sum(m["fiber"] for m in matched) / n * portion_factor * n
|
||||
total_sugar = sum(m["sugar"] for m in matched) / n * portion_factor * n
|
||||
|
||||
batch.append((
|
||||
round(total_cal, 1) or None,
|
||||
round(total_carbs, 2) or None,
|
||||
round(total_fiber, 2) or None,
|
||||
round(total_sugar, 2) or None,
|
||||
recipe_id,
|
||||
))
|
||||
|
||||
if len(batch) >= 5000:
|
||||
conn.executemany(
|
||||
"UPDATE recipes SET calories=?, carbs_g=?, fiber_g=?, sugar_g=?, "
|
||||
"nutrition_estimated=1 WHERE id=?",
|
||||
batch,
|
||||
)
|
||||
conn.commit()
|
||||
updated += len(batch)
|
||||
print(f" {updated} recipes estimated...")
|
||||
batch = []
|
||||
|
||||
if batch:
|
||||
conn.executemany(
|
||||
"UPDATE recipes SET calories=?, carbs_g=?, fiber_g=?, sugar_g=?, "
|
||||
"nutrition_estimated=1 WHERE id=?",
|
||||
batch,
|
||||
)
|
||||
conn.commit()
|
||||
updated += len(batch)
|
||||
|
||||
conn.close()
|
||||
print(f"Total: {updated} recipes received estimated nutrition")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--db", required=True, type=Path)
|
||||
args = parser.parse_args()
|
||||
estimate(args.db)
|
||||
Loading…
Reference in a new issue