From c9fcfde6949ad7fe4a423338399e341cfeab3212 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Mon, 27 Apr 2026 07:13:12 -0700 Subject: [PATCH] feat(browse): active time estimation, prep scaling, required-ingredient filter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Time effort (time_effort.py): - Passive defaults per cooking technique (bake 30 min, slow cook 300 min, etc.) - Prep action detection with n^0.75 quantity scaling for prep-needing ingredients - Cross-reference ingredients/ingredient_names arrays to distribute quantity across steps - Effort label now time-based (quick ≤20 min, moderate ≤45 min, involved >45 min) - prep_min field added to StepAnalysis schema and Pydantic model - All parse_time_effort call sites updated to pass ingredients + ingredient_names Browse required-ingredient filter: - New required_ingredient query param on GET /recipes/browse/{domain}/{category} - Enter-to-commit input in RecipeBrowserPanel with auto-clear-on-empty watch - Substring match via FTS5 ingredient_names column prefix filter - FTS5 replaces LIKE '%X%' throughout browse_recipes and _browse_by_match - _all + required_ingredient: 8.4s → 74ms; category + required_ingredient: 2s → 35ms - _ingredient_fts_term() helper builds 'ingredient_names : "X"*' prefix queries - Combined keywords + ingredient into single FTS MATCH to avoid secondary scans Tests: 369/369 passing --- .gitignore | 3 + app/api/endpoints/recipes.py | 43 +- app/db/store.py | 131 +++- app/models/schemas/recipe.py | 1 + app/services/recipe/recipe_engine.py | 7 +- app/services/recipe/time_effort.py | 593 +++++++++++++++--- .../src/components/RecipeBrowserPanel.vue | 26 + frontend/src/services/api.ts | 1 + tests/api/test_browse_time_effort.py | 11 +- tests/test_services/test_time_effort.py | 37 +- 10 files changed, 718 insertions(+), 135 deletions(-) diff --git a/.gitignore b/.gitignore index ee14ab5..b00d5e9 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,9 @@ dist/ # Data directories data/ +# Local dev database +*.db + # Test artifacts (MagicMock sqlite files from pytest) dict: """Return a paginated list of recipes for a domain/category. @@ -335,6 +336,7 @@ async def browse_recipes( Pass subcategory to narrow within a category that has subcategories. Pass q to filter by title substring. Pass sort for ordering (default/alpha/alpha_desc/match). sort=match orders by pantry coverage DESC; falls back to default when no pantry_items. + Pass required_ingredient to restrict results to recipes that must include that ingredient. """ if domain not in DOMAINS: raise HTTPException(status_code=404, detail=f"Unknown domain '{domain}'.") @@ -377,6 +379,7 @@ async def browse_recipes( q=q or None, sort=sort, sensory_exclude=sensory_exclude, + required_ingredient=required_ingredient or None, ) # ── Attach time/effort signals to each browse result ──────────────── @@ -389,7 +392,11 @@ async def browse_recipes( except Exception: directions_raw = [] if directions_raw: - _profile = parse_time_effort(directions_raw) + _profile = parse_time_effort( + directions_raw, + ingredients=recipe_row.get("ingredients") or [], + ingredient_names=recipe_row.get("ingredient_names") or [], + ) recipe_row["active_min"] = _profile.active_min recipe_row["passive_min"] = _profile.passive_min else: @@ -424,7 +431,11 @@ async def browse_recipes( except Exception: directions_raw = [] if directions_raw: - _profile = parse_time_effort(directions_raw) + _profile = parse_time_effort( + directions_raw, + ingredients=recipe_row.get("ingredients") or [], + ingredient_names=recipe_row.get("ingredient_names") or [], + ) recipe_row["active_min"] = _profile.active_min recipe_row["passive_min"] = _profile.passive_min else: @@ -574,8 +585,28 @@ async def get_recipe(recipe_id: int, session: CloudUser = Depends(get_session)) except Exception: _directions_for_te = [] + _ingredients_for_te = recipe.get("ingredients") or [] + if isinstance(_ingredients_for_te, str): + import json as _json3 + try: + _ingredients_for_te = _json3.loads(_ingredients_for_te) + except Exception: + _ingredients_for_te = [] + + _ingredient_names_for_te = recipe.get("ingredient_names") or [] + if isinstance(_ingredient_names_for_te, str): + import json as _json4 + try: + _ingredient_names_for_te = _json4.loads(_ingredient_names_for_te) + except Exception: + _ingredient_names_for_te = [] + if _directions_for_te: - _te = parse_time_effort(_directions_for_te) + _te = parse_time_effort( + _directions_for_te, + ingredients=_ingredients_for_te, + ingredient_names=_ingredient_names_for_te, + ) _time_effort_out: dict | None = { "active_min": _te.active_min, "passive_min": _te.passive_min, @@ -583,7 +614,11 @@ async def get_recipe(recipe_id: int, session: CloudUser = Depends(get_session)) "effort_label": _te.effort_label, "equipment": _te.equipment, "step_analyses": [ - {"is_passive": sa.is_passive, "detected_minutes": sa.detected_minutes} + { + "is_passive": sa.is_passive, + "detected_minutes": sa.detected_minutes, + "prep_min": sa.prep_min, + } for sa in _te.step_analyses ], } diff --git a/app/db/store.py b/app/db/store.py index 818ca6c..85291af 100644 --- a/app/db/store.py +++ b/app/db/store.py @@ -1129,6 +1129,19 @@ class Store: phrases = ['"' + kw.replace('"', '""') + '"' for kw in keywords] return " OR ".join(phrases) + @staticmethod + def _ingredient_fts_term(ingredient: str) -> str: + """Build an FTS5 ingredient_names column prefix-filter. + + Returns e.g. 'ingredient_names : "potato"*' which matches any recipe whose + ingredient_names column contains a token starting with that word. Prefix + matching (*) means "potato" also matches "potatoes", "sweet potato", etc. + Apostrophes are stripped because the FTS5 tokenizer drops them. + """ + cleaned = ingredient.replace("'", "").strip() + escaped = cleaned.replace('"', '""') + return f'ingredient_names : "{escaped}"*' + def _count_recipes_for_keywords(self, keywords: list[str]) -> int: if not keywords: return 0 @@ -1157,6 +1170,7 @@ class Store: q: str | None = None, sort: str = "default", sensory_exclude: SensoryExclude | None = None, + required_ingredient: str | None = None, ) -> dict: """Return a page of recipes matching the keyword set. @@ -1165,9 +1179,11 @@ class Store: is provided. match_pct is the fraction of ingredient_names covered by the pantry set — computed deterministically, no LLM needed. - q: optional title substring filter (case-insensitive LIKE). - sort: "default" (corpus order) | "alpha" (A→Z) | "alpha_desc" (Z→A) - | "match" (pantry coverage DESC — falls back to default when no pantry). + q: optional title substring filter (case-insensitive LIKE). + sort: "default" (corpus order) | "alpha" (A→Z) | "alpha_desc" (Z→A) + | "match" (pantry coverage DESC — falls back to default when no pantry). + required_ingredient: when set, only return recipes whose ingredient_names contain + this substring (case-insensitive). "must include" filter. """ if keywords is not None and not keywords: return {"recipes": [], "total": 0, "page": page} @@ -1186,20 +1202,48 @@ class Store: q_param = f"%{q.strip()}%" if q and q.strip() else None + # ── required-ingredient FTS filter (must-include) ───────────────────── + # FTS5 column prefix-filter avoids the full table scan that LIKE '%X%' would do. + req_fts_term = ( + self._ingredient_fts_term(required_ingredient) if required_ingredient else "" + ) + # ── match sort: push match_pct computation into SQL so ORDER BY works ── if effective_sort == "match" and pantry_set: return self._browse_by_match( keywords, page, page_size, offset, pantry_set, q_param, c, sensory_exclude=sensory_exclude, + required_ingredient=required_ingredient, ) cols = ( f"SELECT id, title, category, keywords, ingredient_names," f" calories, fat_g, protein_g, sodium_mg, directions, sensory_tags FROM {c}recipes" ) + fts_sub = f"id IN (SELECT rowid FROM {c}recipe_browser_fts WHERE recipe_browser_fts MATCH ?)" if keywords is None: - if q_param: + if req_fts_term: + # Ingredient filter: use FTS index — much faster than LIKE on full table + if q_param: + total = self.conn.execute( + f"SELECT COUNT(*) FROM {c}recipes WHERE {fts_sub} AND LOWER(title) LIKE LOWER(?)", + (req_fts_term, q_param), + ).fetchone()[0] + rows = self._fetch_all( + f"{cols} WHERE {fts_sub} AND LOWER(title) LIKE LOWER(?) {order_clause} LIMIT ? OFFSET ?", + (req_fts_term, q_param, page_size, offset), + ) + else: + total = self.conn.execute( + f"SELECT COUNT(*) FROM {c}recipes WHERE {fts_sub}", + (req_fts_term,), + ).fetchone()[0] + rows = self._fetch_all( + f"{cols} WHERE {fts_sub} {order_clause} LIMIT ? OFFSET ?", + (req_fts_term, page_size, offset), + ) + elif q_param: total = self.conn.execute( f"SELECT COUNT(*) FROM {c}recipes WHERE LOWER(title) LIKE LOWER(?)", (q_param,), @@ -1215,23 +1259,32 @@ class Store: (page_size, offset), ) else: - match_expr = self._browser_fts_query(keywords) - fts_sub = f"id IN (SELECT rowid FROM {c}recipe_browser_fts WHERE recipe_browser_fts MATCH ?)" + keywords_expr = self._browser_fts_query(keywords) + # Combine keywords + ingredient into one FTS MATCH to use a single index pass + combined_match = ( + f"({keywords_expr}) AND {req_fts_term}" if req_fts_term else keywords_expr + ) if q_param: total = self.conn.execute( f"SELECT COUNT(*) FROM {c}recipes WHERE {fts_sub} AND LOWER(title) LIKE LOWER(?)", - (match_expr, q_param), + (combined_match, q_param), ).fetchone()[0] rows = self._fetch_all( f"{cols} WHERE {fts_sub} AND LOWER(title) LIKE LOWER(?) {order_clause} LIMIT ? OFFSET ?", - (match_expr, q_param, page_size, offset), + (combined_match, q_param, page_size, offset), ) else: - # Reuse cached count — avoids a second index scan on every page turn. - total = self._count_recipes_for_keywords(keywords) + if required_ingredient: + total = self.conn.execute( + f"SELECT COUNT(*) FROM {c}recipes WHERE {fts_sub}", + (combined_match,), + ).fetchone()[0] + else: + # Reuse cached count — avoids a second index scan on every page turn. + total = self._count_recipes_for_keywords(keywords) rows = self._fetch_all( f"{cols} WHERE {fts_sub} {order_clause} LIMIT ? OFFSET ?", - (match_expr, page_size, offset), + (combined_match, page_size, offset), ) # Community tag fallback: if FTS found nothing, check whether # community-tagged recipe IDs exist for this keyword context. @@ -1313,6 +1366,7 @@ class Store: q_param: str | None, c: str, sensory_exclude: SensoryExclude | None = None, + required_ingredient: str | None = None, ) -> dict: """Browse recipes sorted by pantry match percentage. @@ -1327,16 +1381,48 @@ class Store: pantry_lower = {p.lower() for p in pantry_set} + # ── required-ingredient FTS filter (must-include) ───────────────────── + req_fts_term = ( + self._ingredient_fts_term(required_ingredient) if required_ingredient else "" + ) + # ── Fetch candidate pool from FTS ──────────────────────────────────── base_cols = ( f"SELECT r.id, r.title, r.category, r.ingredient_names, r.directions, r.sensory_tags" f" FROM {c}recipes r" ) + fts_sub = ( + f"r.id IN (SELECT rowid FROM {c}recipe_browser_fts" + f" WHERE recipe_browser_fts MATCH ?)" + ) self.conn.row_factory = sqlite3.Row if keywords is None: - if q_param: + if req_fts_term: + if q_param: + total = self.conn.execute( + f"SELECT COUNT(*) FROM {c}recipes WHERE id IN" + f" (SELECT rowid FROM {c}recipe_browser_fts WHERE recipe_browser_fts MATCH ?)" + f" AND LOWER(title) LIKE LOWER(?)", + (req_fts_term, q_param), + ).fetchone()[0] + rows = self.conn.execute( + f"{base_cols} WHERE {fts_sub} AND LOWER(r.title) LIKE LOWER(?)" + f" ORDER BY r.id ASC LIMIT ?", + (req_fts_term, q_param, self._MATCH_POOL_SIZE), + ).fetchall() + else: + total = self.conn.execute( + f"SELECT COUNT(*) FROM {c}recipes WHERE id IN" + f" (SELECT rowid FROM {c}recipe_browser_fts WHERE recipe_browser_fts MATCH ?)", + (req_fts_term,), + ).fetchone()[0] + rows = self.conn.execute( + f"{base_cols} WHERE {fts_sub} ORDER BY r.id ASC LIMIT ?", + (req_fts_term, self._MATCH_POOL_SIZE), + ).fetchall() + elif q_param: total = self.conn.execute( f"SELECT COUNT(*) FROM {c}recipes WHERE LOWER(title) LIKE LOWER(?)", (q_param,), @@ -1355,27 +1441,32 @@ class Store: (self._MATCH_POOL_SIZE,), ).fetchall() else: - match_expr = self._browser_fts_query(keywords) - fts_sub = ( - f"r.id IN (SELECT rowid FROM {c}recipe_browser_fts" - f" WHERE recipe_browser_fts MATCH ?)" + keywords_expr = self._browser_fts_query(keywords) + combined_match = ( + f"({keywords_expr}) AND {req_fts_term}" if req_fts_term else keywords_expr ) if q_param: total = self.conn.execute( f"SELECT COUNT(*) FROM {c}recipes r" f" WHERE {fts_sub} AND LOWER(r.title) LIKE LOWER(?)", - (match_expr, q_param), + (combined_match, q_param), ).fetchone()[0] rows = self.conn.execute( f"{base_cols} WHERE {fts_sub} AND LOWER(r.title) LIKE LOWER(?)" f" ORDER BY r.id ASC LIMIT ?", - (match_expr, q_param, self._MATCH_POOL_SIZE), + (combined_match, q_param, self._MATCH_POOL_SIZE), ).fetchall() else: - total = self._count_recipes_for_keywords(keywords) + if required_ingredient: + total = self.conn.execute( + f"SELECT COUNT(*) FROM {c}recipes r WHERE {fts_sub}", + (combined_match,), + ).fetchone()[0] + else: + total = self._count_recipes_for_keywords(keywords) rows = self.conn.execute( f"{base_cols} WHERE {fts_sub} ORDER BY r.id ASC LIMIT ?", - (match_expr, self._MATCH_POOL_SIZE), + (combined_match, self._MATCH_POOL_SIZE), ).fetchall() # ── Score in Python, sort, paginate ────────────────────────────────── diff --git a/app/models/schemas/recipe.py b/app/models/schemas/recipe.py index 4eb6ddd..c0b434d 100644 --- a/app/models/schemas/recipe.py +++ b/app/models/schemas/recipe.py @@ -16,6 +16,7 @@ class StepAnalysis(BaseModel): """Active/passive classification for one direction step.""" is_passive: bool detected_minutes: int | None = None + prep_min: int | None = None # estimated physical prep time (action detection) class TimeEffortProfile(BaseModel): diff --git a/app/services/recipe/recipe_engine.py b/app/services/recipe/recipe_engine.py index 50686fe..817479b 100644 --- a/app/services/recipe/recipe_engine.py +++ b/app/services/recipe/recipe_engine.py @@ -883,7 +883,11 @@ class RecipeEngine: # Compute complexity + parse time effort once — reused for filters and response. row_complexity = _classify_method_complexity(directions, available_equipment) row_time_min = _estimate_time_min(directions, row_complexity) - row_time_effort = parse_time_effort(directions) + row_time_effort = parse_time_effort( + directions, + ingredients=row.get("ingredients") or [], + ingredient_names=row.get("ingredient_names") or [], + ) # Filter and tier-rank by hard_day_mode if req.hard_day_mode: @@ -961,6 +965,7 @@ class RecipeEngine: StepAnalysis( is_passive=sa.is_passive, detected_minutes=sa.detected_minutes, + prep_min=sa.prep_min, ) for sa in row_time_effort.step_analyses ], diff --git a/app/services/recipe/time_effort.py b/app/services/recipe/time_effort.py index 9f403fc..1c8dedb 100644 --- a/app/services/recipe/time_effort.py +++ b/app/services/recipe/time_effort.py @@ -1,17 +1,27 @@ """ -Runtime parser for active/passive time split and equipment detection. +Runtime parser for active/passive time split, prep effort, and equipment detection. -Operates over a list of direction strings. No I/O — pure Python functions. -Sub-millisecond for up to 20 recipes (20 × ~10 steps each = 200 regex calls). +Operates over a list of direction strings plus an optional ingredient list. +No I/O — pure Python functions. Sub-millisecond for up to 20 recipes. + +Time estimation strategy (in priority order): +1. Explicit time mention in step text ("simmer for 20 minutes") +2. Passive keyword + per-technique default ("bake until golden" → 30 min) +3. Prep action + ingredient quantity scaling ("dice 2 lbs potatoes" → ~5 min) +4. Fallback active default (assembly/misc steps → 2 min each) + +Quantity scaling uses n^0.75 (sub-linear, matching human batch-work curves). +Pass `ingredients` + `ingredient_names` to enable cross-referenced scaling. +Without them, prep actions use base times only (no scaling). """ from __future__ import annotations import math import re -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Final -# ── Passive step keywords (whole-word, case-insensitive) ────────────────── +# ── Passive step keywords ───────────────────────────────────────────────── _PASSIVE_PATTERNS: Final[list[str]] = [ "simmer", "bake", "roast", "broil", "refrigerate", "marinate", @@ -20,19 +30,39 @@ _PASSIVE_PATTERNS: Final[list[str]] = [ r"slow\s+cook", r"pressure\s+cook", ] -# Pre-compiled as a single alternation — avoids re-compiling on every call. _PASSIVE_RE: re.Pattern[str] = re.compile( r"\b(?:" + "|".join(_PASSIVE_PATTERNS) + r")\b", re.IGNORECASE, ) -# ── Time extraction regex ───────────────────────────────────────────────── +# Per-technique passive defaults (minutes) — used when no explicit time found. +# Calibrated to conservative midpoints from USDA FoodKeeper + culinary practice. +_PASSIVE_DEFAULTS: Final[list[tuple[re.Pattern[str], int]]] = [ + # Multi-word first (longer match wins) + (re.compile(r"\bslow\s+cook\b", re.IGNORECASE), 300), # 5 hr crockpot default + (re.compile(r"\bpressure\s+cook\b", re.IGNORECASE), 15), + (re.compile(r"\bovernight\b", re.IGNORECASE), 480), # 8 hr + # Single-word + (re.compile(r"\bbraise\b", re.IGNORECASE), 90), + (re.compile(r"\bmarinate\b", re.IGNORECASE), 60), + (re.compile(r"\brefrigerate\b", re.IGNORECASE), 120), + (re.compile(r"\bproof\b|\brise\b", re.IGNORECASE), 60), + (re.compile(r"\bsoak\b", re.IGNORECASE), 30), + (re.compile(r"\bfreeze\b", re.IGNORECASE), 120), + (re.compile(r"\bchill\b", re.IGNORECASE), 60), + (re.compile(r"\broast\b", re.IGNORECASE), 40), + (re.compile(r"\bbake\b", re.IGNORECASE), 30), + (re.compile(r"\bbroil\b", re.IGNORECASE), 8), + (re.compile(r"\bsimmer\b", re.IGNORECASE), 20), + (re.compile(r"\bset\b", re.IGNORECASE), 30), # gelatin / custard set + (re.compile(r"\bsteep\b", re.IGNORECASE), 5), + (re.compile(r"\brest\b|\bstand\b", re.IGNORECASE), 10), + (re.compile(r"\bcool\b", re.IGNORECASE), 15), + (re.compile(r"\bwait\b|\blet\b", re.IGNORECASE), 5), +] + +# ── Explicit time extraction ────────────────────────────────────────────── -# Two-branch pattern: -# Branch A (groups 1-3): range "15-20 minutes", "15–20 min" -# Branch B (groups 4-5): single "10 minutes", "2 hours", "30 sec" -# -# Separator characters: plain hyphen (-), en-dash (–), or literal "-to-" _TIME_RE: re.Pattern[str] = re.compile( r"(\d+)\s*(?:[-\u2013]|-to-)\s*(\d+)\s*(hour|hr|minute|min|second|sec)s?" r"|" @@ -40,9 +70,242 @@ _TIME_RE: re.Pattern[str] = re.compile( re.IGNORECASE, ) -_MAX_MINUTES_PER_STEP: Final[int] = 480 # 8 hours sanity cap +_MAX_MINUTES_PER_STEP: Final[int] = 480 # 8-hour sanity cap -# ── Equipment detection (keyword → label, in detection priority order) ──── +# ── Prep action detection ───────────────────────────────────────────────── + +# Base times (minutes) per prep action, calibrated to ~3 items / 0.5 lb reference. +# These are starting points — flagged for calibration against real recipe timing data. +_PREP_ACTION_BASES: Final[dict[str, float]] = { + # Peeling / stripping + "peel": 1.5, + "pare": 1.5, + "hull": 1.5, + "pit": 2.0, # cherries, avocados + "core": 1.0, + "stem": 1.0, + "trim": 1.0, + # Cutting + "chop": 2.0, + "cut": 1.5, + "dice": 2.5, # more precise than chop + "mince": 2.0, + "slice": 1.5, + "julienne": 4.0, + "cube": 2.0, + "quarter": 1.0, + "halve": 0.5, + "shred": 2.0, + # Grating / zesting + "grate": 3.0, + "zest": 2.0, + # Crushing + "crush": 0.5, + "smash": 0.5, + "crack": 0.5, + # Mixing / assembly (lower base — less physical effort) + "knead": 8.0, # bread dough: consistent regardless of quantity + "whisk": 1.5, + "beat": 2.0, + "cream": 3.0, # butter + sugar until fluffy + "fold": 1.5, + "stir": 0.5, + "combine": 0.5, + "mix": 1.0, + "season": 0.5, +} + +# Compiled regex — longer patterns first to avoid partial matches. +_PREP_RE: re.Pattern[str] = re.compile( + r"\b(?:" + "|".join( + re.escape(k) for k in sorted(_PREP_ACTION_BASES, key=len, reverse=True) + ) + r")\b", + re.IGNORECASE, +) + +# Default active time per step when no explicit time and no prep action detected. +_ACTIVE_STEP_DEFAULT_MIN: Final[float] = 2.0 + +# ── Prep-needing ingredient classification ──────────────────────────────── +# +# Only ingredients in this set get quantity-scaled prep time. +# Liquids, spices, canned goods, and dry staples are excluded — they require +# no physical prep beyond measuring. + +_PREP_NEEDING: Final[frozenset[str]] = frozenset({ + # Alliums + "onion", "shallot", "leek", "scallion", "green onion", "chive", "garlic", + # Root / stem vegetables + "ginger", "carrot", "celery", "potato", "sweet potato", "yam", + "beet", "turnip", "parsnip", "radish", "fennel", "celeriac", + # Squash / gourd family + "zucchini", "squash", "pumpkin", "cucumber", + # Peppers + "pepper", "bell pepper", "jalapeño", "jalapeno", "chili", "chile", + # Brassicas + "broccoli", "cauliflower", "cabbage", "kale", "chard", "spinach", + "brussels sprout", + # Other vegetables + "tomato", "eggplant", "aubergine", "corn", "artichoke", "asparagus", + "green bean", "snow pea", "snap pea", "mushroom", "lettuce", + # Fruits + "apple", "pear", "peach", "nectarine", "plum", "apricot", + "mango", "papaya", "pineapple", "melon", "watermelon", "cantaloupe", + "avocado", "banana", + "strawberry", "raspberry", "blackberry", "blueberry", "cherry", + "citrus", "lemon", "lime", "orange", "grapefruit", + # Protein (trimming / portioning) + "chicken", "turkey", "duck", + "beef", "pork", "lamb", "veal", + "fish", "salmon", "tuna", "cod", "tilapia", "halibut", "shrimp", + "scallop", "crab", "lobster", + # Dairy requiring active prep + "cheese", + # Nuts / seeds (chopping) + "almond", "walnut", "pecan", "cashew", "peanut", "hazelnut", + "pistachio", "macadamia", "nut", + # Fresh herbs (chopping / tearing) + "basil", "parsley", "cilantro", "thyme", "rosemary", "sage", + "dill", "mint", "tarragon", + # Other + "bread", +}) + + +def _is_prep_needing(name: str) -> bool: + """True if the normalized ingredient name contains any prep-needing keyword.""" + nl = name.lower() + return any(kw in nl for kw in _PREP_NEEDING) + + +# ── Quantity extraction ─────────────────────────────────────────────────── + +_FRAC_RE: re.Pattern[str] = re.compile(r"(\d+)\s*/\s*(\d+)") + +# Weight units → converted to pounds internally +_WEIGHT_RE: re.Pattern[str] = re.compile( + r"(\d+(?:\.\d+)?|\d+\s*/\s*\d+)\s*" + r"(pound|lb|ounce|oz|gram|g(?![a-z])|kilogram|kg)\s*s?\b", + re.IGNORECASE, +) + +# Volume (cups only — the common recipe unit for quantity scaling) +_VOLUME_CUP_RE: re.Pattern[str] = re.compile( + r"(\d+(?:\.\d+)?|\d+\s*/\s*\d+)\s*cups?\b", + re.IGNORECASE, +) + +# Count — bare integer or decimal followed by optional size/unit word +_COUNT_RE: re.Pattern[str] = re.compile( + r"(? float: + m = _FRAC_RE.search(s) + if m: + try: + return float(m.group(1)) / float(m.group(2)) + except (ValueError, ZeroDivisionError): + return 1.0 + try: + return float(s.replace(" ", "")) + except ValueError: + return 1.0 + + +def _extract_qty(text: str) -> tuple[float, str] | None: + """Return (quantity_in_canonical_units, unit_type) or None. + + Unit types: "lb" (weight in pounds), "cup", "count". + All weights are normalised to pounds. + """ + # Weight (most specific — check first) + m = _WEIGHT_RE.search(text) + if m: + qty = _parse_fraction(m.group(1)) + u = m.group(2).lower().rstrip("s") + if u in ("pound", "lb"): + return (qty, "lb") + if u in ("ounce", "oz"): + return (qty / 16.0, "lb") + if u in ("gram", "g"): + return (qty / 453.6, "lb") + if u in ("kilogram", "kg"): + return (qty * 2.205, "lb") + + # Volume (cups) + m = _VOLUME_CUP_RE.search(text) + if m: + return (_parse_fraction(m.group(1)), "cup") + + # Count — only accept values in a sane range to avoid false positives + m = _COUNT_RE.search(text) + if m: + qty = float(m.group(1)) + if 0 < qty <= 24: + return (qty, "count") + + return None + + +def _extract_inline_qty_for(text: str, ing_name: str) -> tuple[float, str] | None: + """Extract the quantity specifically associated with `ing_name` in a direction step. + + Looks for a number immediately before the ingredient name (plus optional size/unit + words). Falls back to None if the pattern does not match. + + Example: "Dice 2 large onions and 3 carrots" → for "onion" returns (2.0, "count"). + """ + pattern = re.compile( + r"(\d+(?:\.\d+)?|\d+\s*/\s*\d+)\s*" + r"(?:large|medium|small|whole|" + r"(?:pound|lb|ounce|oz|gram|g|kilogram|kg|cup|clove|cloves|" + r"head|heads|fillet|fillets|breast|breasts|piece|pieces)s?)??\s*" + + re.escape(ing_name) + r"(?:es|s)?\b", + re.IGNORECASE, + ) + m = pattern.search(text) + if m: + # Re-extract with _extract_qty on the full matched span to get unit too + span = text[m.start(): m.end()] + result = _extract_qty(span) + if result: + return result + # Fallback: bare count + try: + return (_parse_fraction(m.group(1)), "count") + except Exception: + pass + return None + + +def _quantity_scale(qty: float, unit: str) -> float: + """Apply n^0.75 scaling relative to unit reference, clamped to [MIN, MAX].""" + ref = _QTY_REFS.get(unit, 1.0) + if ref <= 0 or qty <= 0: + return 1.0 + raw = (qty / ref) ** _SCALE_POWER + return max(_MIN_SCALE, min(_MAX_SCALE, raw)) + + +# ── Equipment detection ─────────────────────────────────────────────────── _EQUIPMENT_RULES: Final[list[tuple[re.Pattern[str], str]]] = [ (re.compile(r"\b(?:chop|dice|mince|slice|julienne)\b", re.IGNORECASE), "Knife"), @@ -58,74 +321,8 @@ _EQUIPMENT_RULES: Final[list[tuple[re.Pattern[str], str]]] = [ (re.compile(r"\b(?:drain|strain|colander|rinse pasta)\b", re.IGNORECASE), "Colander"), ] -# ── Dataclasses ─────────────────────────────────────────────────────────── - - -@dataclass(frozen=True) -class StepAnalysis: - """Analysis result for a single direction step.""" - is_passive: bool - detected_minutes: int | None # None when no time mention found in text - - -@dataclass(frozen=True) -class TimeEffortProfile: - """Aggregated time and effort profile for a full recipe.""" - active_min: int # total minutes requiring active attention - passive_min: int # total minutes the cook can step away - total_min: int # active_min + passive_min - step_analyses: list[StepAnalysis] # one entry per direction step - equipment: list[str] # ordered, deduplicated equipment labels - effort_label: str # "quick" | "moderate" | "involved" - - -# ── Core parsing logic ──────────────────────────────────────────────────── - - -def _extract_minutes(text: str) -> int | None: - """Return the number of minutes mentioned in text, or None. - - Range values (e.g. "15-20 minutes") return the integer midpoint. - Hours are converted to minutes. Seconds are rounded up to 1 minute minimum. - Result is capped at _MAX_MINUTES_PER_STEP. - """ - m = _TIME_RE.search(text) - if m is None: - return None - - if m.group(1) is not None: - # Branch A: range match (e.g. "15-20 minutes") - low = int(m.group(1)) - high = int(m.group(2)) - unit = m.group(3).lower() - raw_value: float = (low + high) / 2 - else: - # Branch B: single value match (e.g. "10 minutes") - low = int(m.group(4)) - unit = m.group(5).lower() - raw_value = float(low) - - if unit in ("hour", "hr"): - minutes: float = raw_value * 60 - elif unit in ("second", "sec"): - minutes = max(1.0, math.ceil(raw_value / 60)) - else: - minutes = raw_value - - return min(int(minutes), _MAX_MINUTES_PER_STEP) - - -def _classify_passive(text: str) -> bool: - """Return True if the step text matches any passive keyword (whole-word).""" - return _PASSIVE_RE.search(text) is not None - def _detect_equipment(all_text: str, has_passive: bool) -> list[str]: - """Return ordered, deduplicated list of equipment labels detected in text. - - all_text should be all direction steps joined with spaces. - has_passive controls whether 'Timer' is appended at the end. - """ seen: set[str] = set() result: list[str] = [] for pattern, label in _EQUIPMENT_RULES: @@ -137,8 +334,172 @@ def _detect_equipment(all_text: str, has_passive: bool) -> list[str]: return result -def _effort_label(step_count: int) -> str: - """Derive effort label from step count.""" +# ── Ingredient–step cross-reference ────────────────────────────────────── + +def _ingredient_mentioned(text: str, name: str) -> bool: + """True if `name` appears in `text` as a whole word. + + Handles both regular plurals (onion → onions) and -es plurals + (potato → potatoes, tomato → tomatoes). + """ + pattern = re.compile(r"\b" + re.escape(name.lower()) + r"(?:es|s)?\b", re.IGNORECASE) + return bool(pattern.search(text)) + + +def _build_step_ingredient_qtys( + ingredients: list[str], + ingredient_names: list[str], + directions: list[str], +) -> list[dict[str, tuple[float, str]]]: + """Return, for each direction step, {ing_name: (qty_for_this_step, unit)}. + + Strategy: + - Filter ingredient pairs to prep-needing items only. + - Parse total quantities from the raw ingredient strings. + - For each step, try to find an inline quantity tied to that ingredient name. + - If no inline quantity, distribute the total evenly across all steps that + mention the ingredient (handles "3 onions" split across 2 steps). + """ + # Build total qty map for prep-needing ingredients + total_qtys: dict[str, tuple[float, str]] = {} + for raw, name in zip(ingredients, ingredient_names): + base = name.lower().strip() + if not _is_prep_needing(base): + continue + result = _extract_qty(raw) + if result is not None: + total_qtys[base] = result + + if not total_qtys: + return [{} for _ in directions] + + # Count how many steps mention each ingredient + step_counts: dict[str, int] = {n: 0 for n in total_qtys} + for step in directions: + for name in total_qtys: + if _ingredient_mentioned(step, name): + step_counts[name] += 1 + + # Build per-step qty maps + per_step: list[dict[str, tuple[float, str]]] = [] + for step in directions: + step_map: dict[str, tuple[float, str]] = {} + for name, (total, unit) in total_qtys.items(): + if not _ingredient_mentioned(step, name): + continue + # Try ingredient-specific inline quantity first + inline = _extract_inline_qty_for(step, name) + if inline is not None: + step_map[name] = inline + else: + # Distribute total across steps that reference this ingredient + n = max(step_counts.get(name, 1), 1) + step_map[name] = (total / n, unit) + per_step.append(step_map) + + return per_step + + +# ── Dataclasses ─────────────────────────────────────────────────────────── + + +@dataclass(frozen=True) +class StepAnalysis: + """Analysis result for a single direction step.""" + is_passive: bool + detected_minutes: int | None # explicit or estimated time (None = no signal) + prep_min: int | None = None # estimated physical prep time from action detection + + +@dataclass(frozen=True) +class TimeEffortProfile: + """Aggregated time and effort profile for a full recipe.""" + active_min: int + passive_min: int + total_min: int + step_analyses: list[StepAnalysis] = field(default_factory=list) + equipment: list[str] = field(default_factory=list) + effort_label: str = "moderate" # "quick" | "moderate" | "involved" + + +# ── Core parsing helpers ────────────────────────────────────────────────── + + +def _extract_minutes(text: str) -> int | None: + """Return explicit minutes from text, or None.""" + m = _TIME_RE.search(text) + if m is None: + return None + if m.group(1) is not None: + low, high = int(m.group(1)), int(m.group(2)) + unit = m.group(3).lower() + raw: float = (low + high) / 2 + else: + low = int(m.group(4)) + unit = m.group(5).lower() + raw = float(low) + + if unit in ("hour", "hr"): + minutes: float = raw * 60 + elif unit in ("second", "sec"): + minutes = max(1.0, math.ceil(raw / 60)) + else: + minutes = raw + + return min(int(minutes), _MAX_MINUTES_PER_STEP) + + +def _classify_passive(text: str) -> bool: + return _PASSIVE_RE.search(text) is not None + + +def _passive_default(text: str) -> int | None: + """Return estimated passive minutes from per-keyword defaults.""" + for pattern, minutes in _PASSIVE_DEFAULTS: + if pattern.search(text): + return minutes + return None + + +def _prep_estimate( + text: str, + step_ing_qtys: dict[str, tuple[float, str]], +) -> int: + """Estimate active prep time from the first detected prep action + ingredient qtys. + + If no prep-needing ingredient is identified in the step, uses the action's + base time at 1× (no scaling). + """ + m = _PREP_RE.search(text) + if m is None: + return 0 + + action = m.group(0).lower() + base = _PREP_ACTION_BASES.get(action, _ACTIVE_STEP_DEFAULT_MIN) + + # Find which prep-needing ingredients this step mentions + matches: list[tuple[float, str]] = [ + qty_unit + for name, qty_unit in step_ing_qtys.items() + if _ingredient_mentioned(text, name) + ] + + if not matches: + return round(base) # no ingredient context — use base unscaled + + total = sum(base * _quantity_scale(qty, unit) for qty, unit in matches) + return round(total) + + +def _effort_label(total_min: int, step_count: int) -> str: + """Effort label based on total estimated time; falls back to step count.""" + if total_min > 0: + if total_min <= 20: + return "quick" + if total_min <= 45: + return "moderate" + return "involved" + # No time signals at all — fall back to step count heuristic if step_count <= 3: return "quick" if step_count <= 7: @@ -146,52 +507,96 @@ def _effort_label(step_count: int) -> str: return "involved" -def parse_time_effort(directions: list[str]) -> TimeEffortProfile: - """Parse a list of direction strings into a TimeEffortProfile. +# ── Public API ──────────────────────────────────────────────────────────── + + +def parse_time_effort( + directions: list[str], + ingredients: list[str] | None = None, + ingredient_names: list[str] | None = None, +) -> TimeEffortProfile: + """Parse direction strings into a TimeEffortProfile. + + Args: + directions: List of step strings from the recipe corpus. + ingredients: Raw ingredient strings ("2 large onions", "1.5 lbs potatoes"). + Parallel to ingredient_names. + ingredient_names: Normalised ingredient names ("onion", "potato"). + Required alongside ingredients to enable quantity scaling. Returns a zero-value profile with empty lists when directions is empty. - Never raises — all failures silently produce sensible defaults. + Never raises — all failures produce sensible defaults. """ if not directions: return TimeEffortProfile( - active_min=0, - passive_min=0, - total_min=0, - step_analyses=[], - equipment=[], - effort_label="quick", + active_min=0, passive_min=0, total_min=0, + step_analyses=[], equipment=[], effort_label="quick", ) + # Build per-step ingredient quantity maps (empty dicts if no ingredient data) + use_ingredients = ( + bool(ingredients) + and bool(ingredient_names) + and len(ingredients) == len(ingredient_names) + ) + step_ing_qtys: list[dict[str, tuple[float, str]]] + if use_ingredients: + step_ing_qtys = _build_step_ingredient_qtys( + list(ingredients), # type: ignore[arg-type] + list(ingredient_names), # type: ignore[arg-type] + directions, + ) + else: + step_ing_qtys = [{} for _ in directions] + step_analyses: list[StepAnalysis] = [] active_min = 0 passive_min = 0 has_any_passive = False - for step in directions: + for i, step in enumerate(directions): is_passive = _classify_passive(step) detected = _extract_minutes(step) + prep_estimate: int | None = None if is_passive: has_any_passive = True if detected is not None: passive_min += detected + else: + # Fall back to per-technique default + default = _passive_default(step) + if default is not None: + passive_min += default + detected = default # surface in UI as the hint time else: if detected is not None: active_min += detected + # Estimate prep time from action detection + quantity scaling + prep_est = _prep_estimate(step, step_ing_qtys[i]) + if prep_est > 0: + prep_estimate = prep_est + active_min += prep_est + elif detected is None: + # General active step with no time signal — apply a small default + active_min += round(_ACTIVE_STEP_DEFAULT_MIN) + step_analyses.append(StepAnalysis( is_passive=is_passive, detected_minutes=detected, + prep_min=prep_estimate, )) combined_text = " ".join(directions) equipment = _detect_equipment(combined_text, has_any_passive) + total = active_min + passive_min return TimeEffortProfile( active_min=active_min, passive_min=passive_min, - total_min=active_min + passive_min, + total_min=total, step_analyses=step_analyses, equipment=equipment, - effort_label=_effort_label(len(directions)), + effort_label=_effort_label(total, len(directions)), ) diff --git a/frontend/src/components/RecipeBrowserPanel.vue b/frontend/src/components/RecipeBrowserPanel.vue index 288310f..068b808 100644 --- a/frontend/src/components/RecipeBrowserPanel.vue +++ b/frontend/src/components/RecipeBrowserPanel.vue @@ -93,6 +93,15 @@ placeholder="Filter by title…" class="browser-search" /> +