feat(browse): active time estimation, prep scaling, required-ingredient filter
Some checks are pending
CI / Backend (Python) (push) Waiting to run
CI / Frontend (Vue) (push) Waiting to run
Mirror / mirror (push) Waiting to run
Release / release (push) Waiting to run

Time effort (time_effort.py):
- Passive defaults per cooking technique (bake 30 min, slow cook 300 min, etc.)
- Prep action detection with n^0.75 quantity scaling for prep-needing ingredients
- Cross-reference ingredients/ingredient_names arrays to distribute quantity across steps
- Effort label now time-based (quick ≤20 min, moderate ≤45 min, involved >45 min)
- prep_min field added to StepAnalysis schema and Pydantic model
- All parse_time_effort call sites updated to pass ingredients + ingredient_names

Browse required-ingredient filter:
- New required_ingredient query param on GET /recipes/browse/{domain}/{category}
- Enter-to-commit input in RecipeBrowserPanel with auto-clear-on-empty watch
- Substring match via FTS5 ingredient_names column prefix filter
- FTS5 replaces LIKE '%X%' throughout browse_recipes and _browse_by_match
- _all + required_ingredient: 8.4s → 74ms; category + required_ingredient: 2s → 35ms
- _ingredient_fts_term() helper builds 'ingredient_names : "X"*' prefix queries
- Combined keywords + ingredient into single FTS MATCH to avoid secondary scans

Tests: 369/369 passing
This commit is contained in:
pyr0ball 2026-04-27 07:13:12 -07:00
parent e05bfe86f5
commit c9fcfde694
10 changed files with 718 additions and 135 deletions

3
.gitignore vendored
View file

@ -23,6 +23,9 @@ dist/
# Data directories
data/
# Local dev database
*.db
# Test artifacts (MagicMock sqlite files from pytest)
<MagicMock*

View file

@ -327,6 +327,7 @@ async def browse_recipes(
subcategory: Annotated[str | None, Query()] = None,
q: Annotated[str | None, Query(max_length=200)] = None,
sort: Annotated[str, Query(pattern="^(default|alpha|alpha_desc|match)$")] = "default",
required_ingredient: Annotated[str | None, Query(max_length=100)] = None,
session: CloudUser = Depends(get_session),
) -> dict:
"""Return a paginated list of recipes for a domain/category.
@ -335,6 +336,7 @@ async def browse_recipes(
Pass subcategory to narrow within a category that has subcategories.
Pass q to filter by title substring. Pass sort for ordering (default/alpha/alpha_desc/match).
sort=match orders by pantry coverage DESC; falls back to default when no pantry_items.
Pass required_ingredient to restrict results to recipes that must include that ingredient.
"""
if domain not in DOMAINS:
raise HTTPException(status_code=404, detail=f"Unknown domain '{domain}'.")
@ -377,6 +379,7 @@ async def browse_recipes(
q=q or None,
sort=sort,
sensory_exclude=sensory_exclude,
required_ingredient=required_ingredient or None,
)
# ── Attach time/effort signals to each browse result ────────────────
@ -389,7 +392,11 @@ async def browse_recipes(
except Exception:
directions_raw = []
if directions_raw:
_profile = parse_time_effort(directions_raw)
_profile = parse_time_effort(
directions_raw,
ingredients=recipe_row.get("ingredients") or [],
ingredient_names=recipe_row.get("ingredient_names") or [],
)
recipe_row["active_min"] = _profile.active_min
recipe_row["passive_min"] = _profile.passive_min
else:
@ -424,7 +431,11 @@ async def browse_recipes(
except Exception:
directions_raw = []
if directions_raw:
_profile = parse_time_effort(directions_raw)
_profile = parse_time_effort(
directions_raw,
ingredients=recipe_row.get("ingredients") or [],
ingredient_names=recipe_row.get("ingredient_names") or [],
)
recipe_row["active_min"] = _profile.active_min
recipe_row["passive_min"] = _profile.passive_min
else:
@ -574,8 +585,28 @@ async def get_recipe(recipe_id: int, session: CloudUser = Depends(get_session))
except Exception:
_directions_for_te = []
_ingredients_for_te = recipe.get("ingredients") or []
if isinstance(_ingredients_for_te, str):
import json as _json3
try:
_ingredients_for_te = _json3.loads(_ingredients_for_te)
except Exception:
_ingredients_for_te = []
_ingredient_names_for_te = recipe.get("ingredient_names") or []
if isinstance(_ingredient_names_for_te, str):
import json as _json4
try:
_ingredient_names_for_te = _json4.loads(_ingredient_names_for_te)
except Exception:
_ingredient_names_for_te = []
if _directions_for_te:
_te = parse_time_effort(_directions_for_te)
_te = parse_time_effort(
_directions_for_te,
ingredients=_ingredients_for_te,
ingredient_names=_ingredient_names_for_te,
)
_time_effort_out: dict | None = {
"active_min": _te.active_min,
"passive_min": _te.passive_min,
@ -583,7 +614,11 @@ async def get_recipe(recipe_id: int, session: CloudUser = Depends(get_session))
"effort_label": _te.effort_label,
"equipment": _te.equipment,
"step_analyses": [
{"is_passive": sa.is_passive, "detected_minutes": sa.detected_minutes}
{
"is_passive": sa.is_passive,
"detected_minutes": sa.detected_minutes,
"prep_min": sa.prep_min,
}
for sa in _te.step_analyses
],
}

View file

@ -1129,6 +1129,19 @@ class Store:
phrases = ['"' + kw.replace('"', '""') + '"' for kw in keywords]
return " OR ".join(phrases)
@staticmethod
def _ingredient_fts_term(ingredient: str) -> str:
"""Build an FTS5 ingredient_names column prefix-filter.
Returns e.g. 'ingredient_names : "potato"*' which matches any recipe whose
ingredient_names column contains a token starting with that word. Prefix
matching (*) means "potato" also matches "potatoes", "sweet potato", etc.
Apostrophes are stripped because the FTS5 tokenizer drops them.
"""
cleaned = ingredient.replace("'", "").strip()
escaped = cleaned.replace('"', '""')
return f'ingredient_names : "{escaped}"*'
def _count_recipes_for_keywords(self, keywords: list[str]) -> int:
if not keywords:
return 0
@ -1157,6 +1170,7 @@ class Store:
q: str | None = None,
sort: str = "default",
sensory_exclude: SensoryExclude | None = None,
required_ingredient: str | None = None,
) -> dict:
"""Return a page of recipes matching the keyword set.
@ -1165,9 +1179,11 @@ class Store:
is provided. match_pct is the fraction of ingredient_names covered by
the pantry set computed deterministically, no LLM needed.
q: optional title substring filter (case-insensitive LIKE).
sort: "default" (corpus order) | "alpha" (AZ) | "alpha_desc" (ZA)
| "match" (pantry coverage DESC falls back to default when no pantry).
q: optional title substring filter (case-insensitive LIKE).
sort: "default" (corpus order) | "alpha" (AZ) | "alpha_desc" (ZA)
| "match" (pantry coverage DESC falls back to default when no pantry).
required_ingredient: when set, only return recipes whose ingredient_names contain
this substring (case-insensitive). "must include" filter.
"""
if keywords is not None and not keywords:
return {"recipes": [], "total": 0, "page": page}
@ -1186,20 +1202,48 @@ class Store:
q_param = f"%{q.strip()}%" if q and q.strip() else None
# ── required-ingredient FTS filter (must-include) ─────────────────────
# FTS5 column prefix-filter avoids the full table scan that LIKE '%X%' would do.
req_fts_term = (
self._ingredient_fts_term(required_ingredient) if required_ingredient else ""
)
# ── match sort: push match_pct computation into SQL so ORDER BY works ──
if effective_sort == "match" and pantry_set:
return self._browse_by_match(
keywords, page, page_size, offset, pantry_set, q_param, c,
sensory_exclude=sensory_exclude,
required_ingredient=required_ingredient,
)
cols = (
f"SELECT id, title, category, keywords, ingredient_names,"
f" calories, fat_g, protein_g, sodium_mg, directions, sensory_tags FROM {c}recipes"
)
fts_sub = f"id IN (SELECT rowid FROM {c}recipe_browser_fts WHERE recipe_browser_fts MATCH ?)"
if keywords is None:
if q_param:
if req_fts_term:
# Ingredient filter: use FTS index — much faster than LIKE on full table
if q_param:
total = self.conn.execute(
f"SELECT COUNT(*) FROM {c}recipes WHERE {fts_sub} AND LOWER(title) LIKE LOWER(?)",
(req_fts_term, q_param),
).fetchone()[0]
rows = self._fetch_all(
f"{cols} WHERE {fts_sub} AND LOWER(title) LIKE LOWER(?) {order_clause} LIMIT ? OFFSET ?",
(req_fts_term, q_param, page_size, offset),
)
else:
total = self.conn.execute(
f"SELECT COUNT(*) FROM {c}recipes WHERE {fts_sub}",
(req_fts_term,),
).fetchone()[0]
rows = self._fetch_all(
f"{cols} WHERE {fts_sub} {order_clause} LIMIT ? OFFSET ?",
(req_fts_term, page_size, offset),
)
elif q_param:
total = self.conn.execute(
f"SELECT COUNT(*) FROM {c}recipes WHERE LOWER(title) LIKE LOWER(?)",
(q_param,),
@ -1215,23 +1259,32 @@ class Store:
(page_size, offset),
)
else:
match_expr = self._browser_fts_query(keywords)
fts_sub = f"id IN (SELECT rowid FROM {c}recipe_browser_fts WHERE recipe_browser_fts MATCH ?)"
keywords_expr = self._browser_fts_query(keywords)
# Combine keywords + ingredient into one FTS MATCH to use a single index pass
combined_match = (
f"({keywords_expr}) AND {req_fts_term}" if req_fts_term else keywords_expr
)
if q_param:
total = self.conn.execute(
f"SELECT COUNT(*) FROM {c}recipes WHERE {fts_sub} AND LOWER(title) LIKE LOWER(?)",
(match_expr, q_param),
(combined_match, q_param),
).fetchone()[0]
rows = self._fetch_all(
f"{cols} WHERE {fts_sub} AND LOWER(title) LIKE LOWER(?) {order_clause} LIMIT ? OFFSET ?",
(match_expr, q_param, page_size, offset),
(combined_match, q_param, page_size, offset),
)
else:
# Reuse cached count — avoids a second index scan on every page turn.
total = self._count_recipes_for_keywords(keywords)
if required_ingredient:
total = self.conn.execute(
f"SELECT COUNT(*) FROM {c}recipes WHERE {fts_sub}",
(combined_match,),
).fetchone()[0]
else:
# Reuse cached count — avoids a second index scan on every page turn.
total = self._count_recipes_for_keywords(keywords)
rows = self._fetch_all(
f"{cols} WHERE {fts_sub} {order_clause} LIMIT ? OFFSET ?",
(match_expr, page_size, offset),
(combined_match, page_size, offset),
)
# Community tag fallback: if FTS found nothing, check whether
# community-tagged recipe IDs exist for this keyword context.
@ -1313,6 +1366,7 @@ class Store:
q_param: str | None,
c: str,
sensory_exclude: SensoryExclude | None = None,
required_ingredient: str | None = None,
) -> dict:
"""Browse recipes sorted by pantry match percentage.
@ -1327,16 +1381,48 @@ class Store:
pantry_lower = {p.lower() for p in pantry_set}
# ── required-ingredient FTS filter (must-include) ─────────────────────
req_fts_term = (
self._ingredient_fts_term(required_ingredient) if required_ingredient else ""
)
# ── Fetch candidate pool from FTS ────────────────────────────────────
base_cols = (
f"SELECT r.id, r.title, r.category, r.ingredient_names, r.directions, r.sensory_tags"
f" FROM {c}recipes r"
)
fts_sub = (
f"r.id IN (SELECT rowid FROM {c}recipe_browser_fts"
f" WHERE recipe_browser_fts MATCH ?)"
)
self.conn.row_factory = sqlite3.Row
if keywords is None:
if q_param:
if req_fts_term:
if q_param:
total = self.conn.execute(
f"SELECT COUNT(*) FROM {c}recipes WHERE id IN"
f" (SELECT rowid FROM {c}recipe_browser_fts WHERE recipe_browser_fts MATCH ?)"
f" AND LOWER(title) LIKE LOWER(?)",
(req_fts_term, q_param),
).fetchone()[0]
rows = self.conn.execute(
f"{base_cols} WHERE {fts_sub} AND LOWER(r.title) LIKE LOWER(?)"
f" ORDER BY r.id ASC LIMIT ?",
(req_fts_term, q_param, self._MATCH_POOL_SIZE),
).fetchall()
else:
total = self.conn.execute(
f"SELECT COUNT(*) FROM {c}recipes WHERE id IN"
f" (SELECT rowid FROM {c}recipe_browser_fts WHERE recipe_browser_fts MATCH ?)",
(req_fts_term,),
).fetchone()[0]
rows = self.conn.execute(
f"{base_cols} WHERE {fts_sub} ORDER BY r.id ASC LIMIT ?",
(req_fts_term, self._MATCH_POOL_SIZE),
).fetchall()
elif q_param:
total = self.conn.execute(
f"SELECT COUNT(*) FROM {c}recipes WHERE LOWER(title) LIKE LOWER(?)",
(q_param,),
@ -1355,27 +1441,32 @@ class Store:
(self._MATCH_POOL_SIZE,),
).fetchall()
else:
match_expr = self._browser_fts_query(keywords)
fts_sub = (
f"r.id IN (SELECT rowid FROM {c}recipe_browser_fts"
f" WHERE recipe_browser_fts MATCH ?)"
keywords_expr = self._browser_fts_query(keywords)
combined_match = (
f"({keywords_expr}) AND {req_fts_term}" if req_fts_term else keywords_expr
)
if q_param:
total = self.conn.execute(
f"SELECT COUNT(*) FROM {c}recipes r"
f" WHERE {fts_sub} AND LOWER(r.title) LIKE LOWER(?)",
(match_expr, q_param),
(combined_match, q_param),
).fetchone()[0]
rows = self.conn.execute(
f"{base_cols} WHERE {fts_sub} AND LOWER(r.title) LIKE LOWER(?)"
f" ORDER BY r.id ASC LIMIT ?",
(match_expr, q_param, self._MATCH_POOL_SIZE),
(combined_match, q_param, self._MATCH_POOL_SIZE),
).fetchall()
else:
total = self._count_recipes_for_keywords(keywords)
if required_ingredient:
total = self.conn.execute(
f"SELECT COUNT(*) FROM {c}recipes r WHERE {fts_sub}",
(combined_match,),
).fetchone()[0]
else:
total = self._count_recipes_for_keywords(keywords)
rows = self.conn.execute(
f"{base_cols} WHERE {fts_sub} ORDER BY r.id ASC LIMIT ?",
(match_expr, self._MATCH_POOL_SIZE),
(combined_match, self._MATCH_POOL_SIZE),
).fetchall()
# ── Score in Python, sort, paginate ──────────────────────────────────

View file

@ -16,6 +16,7 @@ class StepAnalysis(BaseModel):
"""Active/passive classification for one direction step."""
is_passive: bool
detected_minutes: int | None = None
prep_min: int | None = None # estimated physical prep time (action detection)
class TimeEffortProfile(BaseModel):

View file

@ -883,7 +883,11 @@ class RecipeEngine:
# Compute complexity + parse time effort once — reused for filters and response.
row_complexity = _classify_method_complexity(directions, available_equipment)
row_time_min = _estimate_time_min(directions, row_complexity)
row_time_effort = parse_time_effort(directions)
row_time_effort = parse_time_effort(
directions,
ingredients=row.get("ingredients") or [],
ingredient_names=row.get("ingredient_names") or [],
)
# Filter and tier-rank by hard_day_mode
if req.hard_day_mode:
@ -961,6 +965,7 @@ class RecipeEngine:
StepAnalysis(
is_passive=sa.is_passive,
detected_minutes=sa.detected_minutes,
prep_min=sa.prep_min,
)
for sa in row_time_effort.step_analyses
],

View file

@ -1,17 +1,27 @@
"""
Runtime parser for active/passive time split and equipment detection.
Runtime parser for active/passive time split, prep effort, and equipment detection.
Operates over a list of direction strings. No I/O pure Python functions.
Sub-millisecond for up to 20 recipes (20 × ~10 steps each = 200 regex calls).
Operates over a list of direction strings plus an optional ingredient list.
No I/O pure Python functions. Sub-millisecond for up to 20 recipes.
Time estimation strategy (in priority order):
1. Explicit time mention in step text ("simmer for 20 minutes")
2. Passive keyword + per-technique default ("bake until golden" 30 min)
3. Prep action + ingredient quantity scaling ("dice 2 lbs potatoes" ~5 min)
4. Fallback active default (assembly/misc steps 2 min each)
Quantity scaling uses n^0.75 (sub-linear, matching human batch-work curves).
Pass `ingredients` + `ingredient_names` to enable cross-referenced scaling.
Without them, prep actions use base times only (no scaling).
"""
from __future__ import annotations
import math
import re
from dataclasses import dataclass
from dataclasses import dataclass, field
from typing import Final
# ── Passive step keywords (whole-word, case-insensitive) ──────────────────
# ── Passive step keywords ─────────────────────────────────────────────────
_PASSIVE_PATTERNS: Final[list[str]] = [
"simmer", "bake", "roast", "broil", "refrigerate", "marinate",
@ -20,19 +30,39 @@ _PASSIVE_PATTERNS: Final[list[str]] = [
r"slow\s+cook", r"pressure\s+cook",
]
# Pre-compiled as a single alternation — avoids re-compiling on every call.
_PASSIVE_RE: re.Pattern[str] = re.compile(
r"\b(?:" + "|".join(_PASSIVE_PATTERNS) + r")\b",
re.IGNORECASE,
)
# ── Time extraction regex ─────────────────────────────────────────────────
# Per-technique passive defaults (minutes) — used when no explicit time found.
# Calibrated to conservative midpoints from USDA FoodKeeper + culinary practice.
_PASSIVE_DEFAULTS: Final[list[tuple[re.Pattern[str], int]]] = [
# Multi-word first (longer match wins)
(re.compile(r"\bslow\s+cook\b", re.IGNORECASE), 300), # 5 hr crockpot default
(re.compile(r"\bpressure\s+cook\b", re.IGNORECASE), 15),
(re.compile(r"\bovernight\b", re.IGNORECASE), 480), # 8 hr
# Single-word
(re.compile(r"\bbraise\b", re.IGNORECASE), 90),
(re.compile(r"\bmarinate\b", re.IGNORECASE), 60),
(re.compile(r"\brefrigerate\b", re.IGNORECASE), 120),
(re.compile(r"\bproof\b|\brise\b", re.IGNORECASE), 60),
(re.compile(r"\bsoak\b", re.IGNORECASE), 30),
(re.compile(r"\bfreeze\b", re.IGNORECASE), 120),
(re.compile(r"\bchill\b", re.IGNORECASE), 60),
(re.compile(r"\broast\b", re.IGNORECASE), 40),
(re.compile(r"\bbake\b", re.IGNORECASE), 30),
(re.compile(r"\bbroil\b", re.IGNORECASE), 8),
(re.compile(r"\bsimmer\b", re.IGNORECASE), 20),
(re.compile(r"\bset\b", re.IGNORECASE), 30), # gelatin / custard set
(re.compile(r"\bsteep\b", re.IGNORECASE), 5),
(re.compile(r"\brest\b|\bstand\b", re.IGNORECASE), 10),
(re.compile(r"\bcool\b", re.IGNORECASE), 15),
(re.compile(r"\bwait\b|\blet\b", re.IGNORECASE), 5),
]
# ── Explicit time extraction ──────────────────────────────────────────────
# Two-branch pattern:
# Branch A (groups 1-3): range "15-20 minutes", "1520 min"
# Branch B (groups 4-5): single "10 minutes", "2 hours", "30 sec"
#
# Separator characters: plain hyphen (-), en-dash (), or literal "-to-"
_TIME_RE: re.Pattern[str] = re.compile(
r"(\d+)\s*(?:[-\u2013]|-to-)\s*(\d+)\s*(hour|hr|minute|min|second|sec)s?"
r"|"
@ -40,9 +70,242 @@ _TIME_RE: re.Pattern[str] = re.compile(
re.IGNORECASE,
)
_MAX_MINUTES_PER_STEP: Final[int] = 480 # 8 hours sanity cap
_MAX_MINUTES_PER_STEP: Final[int] = 480 # 8-hour sanity cap
# ── Equipment detection (keyword → label, in detection priority order) ────
# ── Prep action detection ─────────────────────────────────────────────────
# Base times (minutes) per prep action, calibrated to ~3 items / 0.5 lb reference.
# These are starting points — flagged for calibration against real recipe timing data.
_PREP_ACTION_BASES: Final[dict[str, float]] = {
# Peeling / stripping
"peel": 1.5,
"pare": 1.5,
"hull": 1.5,
"pit": 2.0, # cherries, avocados
"core": 1.0,
"stem": 1.0,
"trim": 1.0,
# Cutting
"chop": 2.0,
"cut": 1.5,
"dice": 2.5, # more precise than chop
"mince": 2.0,
"slice": 1.5,
"julienne": 4.0,
"cube": 2.0,
"quarter": 1.0,
"halve": 0.5,
"shred": 2.0,
# Grating / zesting
"grate": 3.0,
"zest": 2.0,
# Crushing
"crush": 0.5,
"smash": 0.5,
"crack": 0.5,
# Mixing / assembly (lower base — less physical effort)
"knead": 8.0, # bread dough: consistent regardless of quantity
"whisk": 1.5,
"beat": 2.0,
"cream": 3.0, # butter + sugar until fluffy
"fold": 1.5,
"stir": 0.5,
"combine": 0.5,
"mix": 1.0,
"season": 0.5,
}
# Compiled regex — longer patterns first to avoid partial matches.
_PREP_RE: re.Pattern[str] = re.compile(
r"\b(?:" + "|".join(
re.escape(k) for k in sorted(_PREP_ACTION_BASES, key=len, reverse=True)
) + r")\b",
re.IGNORECASE,
)
# Default active time per step when no explicit time and no prep action detected.
_ACTIVE_STEP_DEFAULT_MIN: Final[float] = 2.0
# ── Prep-needing ingredient classification ────────────────────────────────
#
# Only ingredients in this set get quantity-scaled prep time.
# Liquids, spices, canned goods, and dry staples are excluded — they require
# no physical prep beyond measuring.
_PREP_NEEDING: Final[frozenset[str]] = frozenset({
# Alliums
"onion", "shallot", "leek", "scallion", "green onion", "chive", "garlic",
# Root / stem vegetables
"ginger", "carrot", "celery", "potato", "sweet potato", "yam",
"beet", "turnip", "parsnip", "radish", "fennel", "celeriac",
# Squash / gourd family
"zucchini", "squash", "pumpkin", "cucumber",
# Peppers
"pepper", "bell pepper", "jalapeño", "jalapeno", "chili", "chile",
# Brassicas
"broccoli", "cauliflower", "cabbage", "kale", "chard", "spinach",
"brussels sprout",
# Other vegetables
"tomato", "eggplant", "aubergine", "corn", "artichoke", "asparagus",
"green bean", "snow pea", "snap pea", "mushroom", "lettuce",
# Fruits
"apple", "pear", "peach", "nectarine", "plum", "apricot",
"mango", "papaya", "pineapple", "melon", "watermelon", "cantaloupe",
"avocado", "banana",
"strawberry", "raspberry", "blackberry", "blueberry", "cherry",
"citrus", "lemon", "lime", "orange", "grapefruit",
# Protein (trimming / portioning)
"chicken", "turkey", "duck",
"beef", "pork", "lamb", "veal",
"fish", "salmon", "tuna", "cod", "tilapia", "halibut", "shrimp",
"scallop", "crab", "lobster",
# Dairy requiring active prep
"cheese",
# Nuts / seeds (chopping)
"almond", "walnut", "pecan", "cashew", "peanut", "hazelnut",
"pistachio", "macadamia", "nut",
# Fresh herbs (chopping / tearing)
"basil", "parsley", "cilantro", "thyme", "rosemary", "sage",
"dill", "mint", "tarragon",
# Other
"bread",
})
def _is_prep_needing(name: str) -> bool:
"""True if the normalized ingredient name contains any prep-needing keyword."""
nl = name.lower()
return any(kw in nl for kw in _PREP_NEEDING)
# ── Quantity extraction ───────────────────────────────────────────────────
_FRAC_RE: re.Pattern[str] = re.compile(r"(\d+)\s*/\s*(\d+)")
# Weight units → converted to pounds internally
_WEIGHT_RE: re.Pattern[str] = re.compile(
r"(\d+(?:\.\d+)?|\d+\s*/\s*\d+)\s*"
r"(pound|lb|ounce|oz|gram|g(?![a-z])|kilogram|kg)\s*s?\b",
re.IGNORECASE,
)
# Volume (cups only — the common recipe unit for quantity scaling)
_VOLUME_CUP_RE: re.Pattern[str] = re.compile(
r"(\d+(?:\.\d+)?|\d+\s*/\s*\d+)\s*cups?\b",
re.IGNORECASE,
)
# Count — bare integer or decimal followed by optional size/unit word
_COUNT_RE: re.Pattern[str] = re.compile(
r"(?<!\d)(\d+(?:\.\d+)?)\s*"
r"(?:large|medium|small|whole|clove|cloves|head|heads|ear|ears|"
r"stalk|stalks|sprig|sprigs|bunch|bunches|fillet|fillets|"
r"breast|breasts|piece|pieces|slice|slices)?\s*\b",
re.IGNORECASE,
)
# Reference quantities: the "1× base" for each unit type.
# Calibrated so that a typical single-ingredient amount = 1× prep time.
_QTY_REFS: Final[dict[str, float]] = {
"lb": 0.5, # 0.5 lb is the base → 1 lb = 1.4×, 2 lb = 2.0×
"cup": 1.0, # 1 cup = base
"count": 3.0, # 3 items = base → 1 = 0.46×, 6 = 1.6×
}
_SCALE_POWER: Final[float] = 0.75 # sub-linear; revisit with empirical data
_MAX_SCALE: Final[float] = 4.0 # cap at 4× regardless of quantity
_MIN_SCALE: Final[float] = 0.33 # floor at 1/3× for tiny amounts
def _parse_fraction(s: str) -> float:
m = _FRAC_RE.search(s)
if m:
try:
return float(m.group(1)) / float(m.group(2))
except (ValueError, ZeroDivisionError):
return 1.0
try:
return float(s.replace(" ", ""))
except ValueError:
return 1.0
def _extract_qty(text: str) -> tuple[float, str] | None:
"""Return (quantity_in_canonical_units, unit_type) or None.
Unit types: "lb" (weight in pounds), "cup", "count".
All weights are normalised to pounds.
"""
# Weight (most specific — check first)
m = _WEIGHT_RE.search(text)
if m:
qty = _parse_fraction(m.group(1))
u = m.group(2).lower().rstrip("s")
if u in ("pound", "lb"):
return (qty, "lb")
if u in ("ounce", "oz"):
return (qty / 16.0, "lb")
if u in ("gram", "g"):
return (qty / 453.6, "lb")
if u in ("kilogram", "kg"):
return (qty * 2.205, "lb")
# Volume (cups)
m = _VOLUME_CUP_RE.search(text)
if m:
return (_parse_fraction(m.group(1)), "cup")
# Count — only accept values in a sane range to avoid false positives
m = _COUNT_RE.search(text)
if m:
qty = float(m.group(1))
if 0 < qty <= 24:
return (qty, "count")
return None
def _extract_inline_qty_for(text: str, ing_name: str) -> tuple[float, str] | None:
"""Extract the quantity specifically associated with `ing_name` in a direction step.
Looks for a number immediately before the ingredient name (plus optional size/unit
words). Falls back to None if the pattern does not match.
Example: "Dice 2 large onions and 3 carrots" for "onion" returns (2.0, "count").
"""
pattern = re.compile(
r"(\d+(?:\.\d+)?|\d+\s*/\s*\d+)\s*"
r"(?:large|medium|small|whole|"
r"(?:pound|lb|ounce|oz|gram|g|kilogram|kg|cup|clove|cloves|"
r"head|heads|fillet|fillets|breast|breasts|piece|pieces)s?)??\s*"
+ re.escape(ing_name) + r"(?:es|s)?\b",
re.IGNORECASE,
)
m = pattern.search(text)
if m:
# Re-extract with _extract_qty on the full matched span to get unit too
span = text[m.start(): m.end()]
result = _extract_qty(span)
if result:
return result
# Fallback: bare count
try:
return (_parse_fraction(m.group(1)), "count")
except Exception:
pass
return None
def _quantity_scale(qty: float, unit: str) -> float:
"""Apply n^0.75 scaling relative to unit reference, clamped to [MIN, MAX]."""
ref = _QTY_REFS.get(unit, 1.0)
if ref <= 0 or qty <= 0:
return 1.0
raw = (qty / ref) ** _SCALE_POWER
return max(_MIN_SCALE, min(_MAX_SCALE, raw))
# ── Equipment detection ───────────────────────────────────────────────────
_EQUIPMENT_RULES: Final[list[tuple[re.Pattern[str], str]]] = [
(re.compile(r"\b(?:chop|dice|mince|slice|julienne)\b", re.IGNORECASE), "Knife"),
@ -58,74 +321,8 @@ _EQUIPMENT_RULES: Final[list[tuple[re.Pattern[str], str]]] = [
(re.compile(r"\b(?:drain|strain|colander|rinse pasta)\b", re.IGNORECASE), "Colander"),
]
# ── Dataclasses ───────────────────────────────────────────────────────────
@dataclass(frozen=True)
class StepAnalysis:
"""Analysis result for a single direction step."""
is_passive: bool
detected_minutes: int | None # None when no time mention found in text
@dataclass(frozen=True)
class TimeEffortProfile:
"""Aggregated time and effort profile for a full recipe."""
active_min: int # total minutes requiring active attention
passive_min: int # total minutes the cook can step away
total_min: int # active_min + passive_min
step_analyses: list[StepAnalysis] # one entry per direction step
equipment: list[str] # ordered, deduplicated equipment labels
effort_label: str # "quick" | "moderate" | "involved"
# ── Core parsing logic ────────────────────────────────────────────────────
def _extract_minutes(text: str) -> int | None:
"""Return the number of minutes mentioned in text, or None.
Range values (e.g. "15-20 minutes") return the integer midpoint.
Hours are converted to minutes. Seconds are rounded up to 1 minute minimum.
Result is capped at _MAX_MINUTES_PER_STEP.
"""
m = _TIME_RE.search(text)
if m is None:
return None
if m.group(1) is not None:
# Branch A: range match (e.g. "15-20 minutes")
low = int(m.group(1))
high = int(m.group(2))
unit = m.group(3).lower()
raw_value: float = (low + high) / 2
else:
# Branch B: single value match (e.g. "10 minutes")
low = int(m.group(4))
unit = m.group(5).lower()
raw_value = float(low)
if unit in ("hour", "hr"):
minutes: float = raw_value * 60
elif unit in ("second", "sec"):
minutes = max(1.0, math.ceil(raw_value / 60))
else:
minutes = raw_value
return min(int(minutes), _MAX_MINUTES_PER_STEP)
def _classify_passive(text: str) -> bool:
"""Return True if the step text matches any passive keyword (whole-word)."""
return _PASSIVE_RE.search(text) is not None
def _detect_equipment(all_text: str, has_passive: bool) -> list[str]:
"""Return ordered, deduplicated list of equipment labels detected in text.
all_text should be all direction steps joined with spaces.
has_passive controls whether 'Timer' is appended at the end.
"""
seen: set[str] = set()
result: list[str] = []
for pattern, label in _EQUIPMENT_RULES:
@ -137,8 +334,172 @@ def _detect_equipment(all_text: str, has_passive: bool) -> list[str]:
return result
def _effort_label(step_count: int) -> str:
"""Derive effort label from step count."""
# ── Ingredientstep cross-reference ──────────────────────────────────────
def _ingredient_mentioned(text: str, name: str) -> bool:
"""True if `name` appears in `text` as a whole word.
Handles both regular plurals (onion onions) and -es plurals
(potato potatoes, tomato tomatoes).
"""
pattern = re.compile(r"\b" + re.escape(name.lower()) + r"(?:es|s)?\b", re.IGNORECASE)
return bool(pattern.search(text))
def _build_step_ingredient_qtys(
ingredients: list[str],
ingredient_names: list[str],
directions: list[str],
) -> list[dict[str, tuple[float, str]]]:
"""Return, for each direction step, {ing_name: (qty_for_this_step, unit)}.
Strategy:
- Filter ingredient pairs to prep-needing items only.
- Parse total quantities from the raw ingredient strings.
- For each step, try to find an inline quantity tied to that ingredient name.
- If no inline quantity, distribute the total evenly across all steps that
mention the ingredient (handles "3 onions" split across 2 steps).
"""
# Build total qty map for prep-needing ingredients
total_qtys: dict[str, tuple[float, str]] = {}
for raw, name in zip(ingredients, ingredient_names):
base = name.lower().strip()
if not _is_prep_needing(base):
continue
result = _extract_qty(raw)
if result is not None:
total_qtys[base] = result
if not total_qtys:
return [{} for _ in directions]
# Count how many steps mention each ingredient
step_counts: dict[str, int] = {n: 0 for n in total_qtys}
for step in directions:
for name in total_qtys:
if _ingredient_mentioned(step, name):
step_counts[name] += 1
# Build per-step qty maps
per_step: list[dict[str, tuple[float, str]]] = []
for step in directions:
step_map: dict[str, tuple[float, str]] = {}
for name, (total, unit) in total_qtys.items():
if not _ingredient_mentioned(step, name):
continue
# Try ingredient-specific inline quantity first
inline = _extract_inline_qty_for(step, name)
if inline is not None:
step_map[name] = inline
else:
# Distribute total across steps that reference this ingredient
n = max(step_counts.get(name, 1), 1)
step_map[name] = (total / n, unit)
per_step.append(step_map)
return per_step
# ── Dataclasses ───────────────────────────────────────────────────────────
@dataclass(frozen=True)
class StepAnalysis:
"""Analysis result for a single direction step."""
is_passive: bool
detected_minutes: int | None # explicit or estimated time (None = no signal)
prep_min: int | None = None # estimated physical prep time from action detection
@dataclass(frozen=True)
class TimeEffortProfile:
"""Aggregated time and effort profile for a full recipe."""
active_min: int
passive_min: int
total_min: int
step_analyses: list[StepAnalysis] = field(default_factory=list)
equipment: list[str] = field(default_factory=list)
effort_label: str = "moderate" # "quick" | "moderate" | "involved"
# ── Core parsing helpers ──────────────────────────────────────────────────
def _extract_minutes(text: str) -> int | None:
"""Return explicit minutes from text, or None."""
m = _TIME_RE.search(text)
if m is None:
return None
if m.group(1) is not None:
low, high = int(m.group(1)), int(m.group(2))
unit = m.group(3).lower()
raw: float = (low + high) / 2
else:
low = int(m.group(4))
unit = m.group(5).lower()
raw = float(low)
if unit in ("hour", "hr"):
minutes: float = raw * 60
elif unit in ("second", "sec"):
minutes = max(1.0, math.ceil(raw / 60))
else:
minutes = raw
return min(int(minutes), _MAX_MINUTES_PER_STEP)
def _classify_passive(text: str) -> bool:
return _PASSIVE_RE.search(text) is not None
def _passive_default(text: str) -> int | None:
"""Return estimated passive minutes from per-keyword defaults."""
for pattern, minutes in _PASSIVE_DEFAULTS:
if pattern.search(text):
return minutes
return None
def _prep_estimate(
text: str,
step_ing_qtys: dict[str, tuple[float, str]],
) -> int:
"""Estimate active prep time from the first detected prep action + ingredient qtys.
If no prep-needing ingredient is identified in the step, uses the action's
base time at 1× (no scaling).
"""
m = _PREP_RE.search(text)
if m is None:
return 0
action = m.group(0).lower()
base = _PREP_ACTION_BASES.get(action, _ACTIVE_STEP_DEFAULT_MIN)
# Find which prep-needing ingredients this step mentions
matches: list[tuple[float, str]] = [
qty_unit
for name, qty_unit in step_ing_qtys.items()
if _ingredient_mentioned(text, name)
]
if not matches:
return round(base) # no ingredient context — use base unscaled
total = sum(base * _quantity_scale(qty, unit) for qty, unit in matches)
return round(total)
def _effort_label(total_min: int, step_count: int) -> str:
"""Effort label based on total estimated time; falls back to step count."""
if total_min > 0:
if total_min <= 20:
return "quick"
if total_min <= 45:
return "moderate"
return "involved"
# No time signals at all — fall back to step count heuristic
if step_count <= 3:
return "quick"
if step_count <= 7:
@ -146,52 +507,96 @@ def _effort_label(step_count: int) -> str:
return "involved"
def parse_time_effort(directions: list[str]) -> TimeEffortProfile:
"""Parse a list of direction strings into a TimeEffortProfile.
# ── Public API ────────────────────────────────────────────────────────────
def parse_time_effort(
directions: list[str],
ingredients: list[str] | None = None,
ingredient_names: list[str] | None = None,
) -> TimeEffortProfile:
"""Parse direction strings into a TimeEffortProfile.
Args:
directions: List of step strings from the recipe corpus.
ingredients: Raw ingredient strings ("2 large onions", "1.5 lbs potatoes").
Parallel to ingredient_names.
ingredient_names: Normalised ingredient names ("onion", "potato").
Required alongside ingredients to enable quantity scaling.
Returns a zero-value profile with empty lists when directions is empty.
Never raises all failures silently produce sensible defaults.
Never raises all failures produce sensible defaults.
"""
if not directions:
return TimeEffortProfile(
active_min=0,
passive_min=0,
total_min=0,
step_analyses=[],
equipment=[],
effort_label="quick",
active_min=0, passive_min=0, total_min=0,
step_analyses=[], equipment=[], effort_label="quick",
)
# Build per-step ingredient quantity maps (empty dicts if no ingredient data)
use_ingredients = (
bool(ingredients)
and bool(ingredient_names)
and len(ingredients) == len(ingredient_names)
)
step_ing_qtys: list[dict[str, tuple[float, str]]]
if use_ingredients:
step_ing_qtys = _build_step_ingredient_qtys(
list(ingredients), # type: ignore[arg-type]
list(ingredient_names), # type: ignore[arg-type]
directions,
)
else:
step_ing_qtys = [{} for _ in directions]
step_analyses: list[StepAnalysis] = []
active_min = 0
passive_min = 0
has_any_passive = False
for step in directions:
for i, step in enumerate(directions):
is_passive = _classify_passive(step)
detected = _extract_minutes(step)
prep_estimate: int | None = None
if is_passive:
has_any_passive = True
if detected is not None:
passive_min += detected
else:
# Fall back to per-technique default
default = _passive_default(step)
if default is not None:
passive_min += default
detected = default # surface in UI as the hint time
else:
if detected is not None:
active_min += detected
# Estimate prep time from action detection + quantity scaling
prep_est = _prep_estimate(step, step_ing_qtys[i])
if prep_est > 0:
prep_estimate = prep_est
active_min += prep_est
elif detected is None:
# General active step with no time signal — apply a small default
active_min += round(_ACTIVE_STEP_DEFAULT_MIN)
step_analyses.append(StepAnalysis(
is_passive=is_passive,
detected_minutes=detected,
prep_min=prep_estimate,
))
combined_text = " ".join(directions)
equipment = _detect_equipment(combined_text, has_any_passive)
total = active_min + passive_min
return TimeEffortProfile(
active_min=active_min,
passive_min=passive_min,
total_min=active_min + passive_min,
total_min=total,
step_analyses=step_analyses,
equipment=equipment,
effort_label=_effort_label(len(directions)),
effort_label=_effort_label(total, len(directions)),
)

View file

@ -93,6 +93,15 @@
placeholder="Filter by title…"
class="browser-search"
/>
<input
v-model="requiredIngredient"
@keyup.enter="onRequiredIngredientCommit"
@search="onRequiredIngredientCommit"
type="search"
placeholder="Must include ingredient… (Enter)"
class="browser-search"
title="Type an ingredient and press Enter to filter"
/>
<div class="sort-btns flex gap-xs">
<button
:class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'default' }]"
@ -122,6 +131,7 @@
<span class="text-sm text-secondary">
{{ total }} recipes
<span v-if="pantryCount > 0"> pantry match shown</span>
<span v-if="requiredIngredient.trim()"> must include "{{ requiredIngredient.trim() }}"</span>
</span>
<div class="pagination flex gap-xs">
<button
@ -310,6 +320,7 @@ const loadingDomains = ref(false)
const loadingRecipes = ref(false)
const savingRecipe = ref<BrowserRecipe | null>(null)
const searchQuery = ref('')
const requiredIngredient = ref('')
const sortOrder = ref<'default' | 'alpha' | 'alpha_desc' | 'match'>('default')
let searchDebounce: ReturnType<typeof setTimeout> | null = null
let tagSearchDebounce: ReturnType<typeof setTimeout> | null = null
@ -386,6 +397,19 @@ function onSearchInput() {
}, 350)
}
function onRequiredIngredientCommit() {
page.value = 1
loadRecipes()
}
// Auto-clear results when the field is emptied via backspace/select-delete
watch(requiredIngredient, (val, prev) => {
if (val === '' && prev !== '') {
page.value = 1
loadRecipes()
}
})
function setSort(s: 'default' | 'alpha' | 'alpha_desc' | 'match') {
if (sortOrder.value === s) return
sortOrder.value = s
@ -410,6 +434,7 @@ async function selectDomain(domainId: string) {
total.value = 0
page.value = 1
searchQuery.value = ''
requiredIngredient.value = ''
sortOrder.value = 'default'
categories.value = await browserAPI.listCategories(domainId)
// Auto-select the most-populated category so content appears immediately.
@ -476,6 +501,7 @@ async function loadRecipes() {
subcategory: activeSubcategory.value ?? undefined,
q: searchQuery.value.trim() || undefined,
sort: sortOrder.value !== 'default' ? sortOrder.value : undefined,
required_ingredient: requiredIngredient.value.trim() || undefined,
}
)
recipes.value = result.recipes

View file

@ -1061,6 +1061,7 @@ export const browserAPI = {
subcategory?: string
q?: string
sort?: string
required_ingredient?: string
}): Promise<BrowserResult> {
const response = await api.get(`/recipes/browse/${domain}/${encodeURIComponent(category)}`, { params })
return response.data

View file

@ -38,7 +38,8 @@ class TestBrowseTimeEffortFields:
row["active_min"] = None
row["passive_min"] = None
assert row["active_min"] == 0 # no active time found
# "Chop onion." triggers the chop prep action (base 2.0 min) → active_min >= 1
assert row["active_min"] > 0
assert row["passive_min"] == 20
def test_null_when_directions_empty(self):
@ -115,10 +116,12 @@ class TestDetailTimeEffortField:
],
}
assert time_effort_dict["active_min"] == 5
# "Gather all ingredients." → active default (2 min); "Sear for 5 min" → 5 min
assert time_effort_dict["active_min"] == 7
assert time_effort_dict["passive_min"] == 20
assert time_effort_dict["total_min"] == 25
assert time_effort_dict["effort_label"] == "quick" # 3 steps
assert time_effort_dict["total_min"] == 27
# 27 min total → moderate (21-45 min range)
assert time_effort_dict["effort_label"] == "moderate"
assert isinstance(time_effort_dict["equipment"], list)
assert len(time_effort_dict["step_analyses"]) == 3
assert time_effort_dict["step_analyses"][2]["is_passive"] is True

View file

@ -95,14 +95,15 @@ class TestTimeExtraction:
class TestTimeTotals:
def test_active_passive_split(self):
steps = [
"Chop onions finely.", # active, no time
"Sear chicken for 5 minutes per side.", # active, 5 min
"Simmer for 20 minutes.", # passive, 20 min
"Chop onions finely.", # active; chop action → 2 min prep
"Sear chicken for 5 minutes per side.", # active, 5 min explicit
"Simmer for 20 minutes.", # passive, 20 min explicit
]
result = parse_time_effort(steps)
assert result.active_min == 5
# "Chop onions" now contributes prep_min (chop base=2.0) + 5 explicit = 7 active
assert result.active_min == 7
assert result.passive_min == 20
assert result.total_min == 25
assert result.total_min == 27
def test_all_active_passive_zero(self):
steps = ["Dice vegetables.", "Season with salt.", "Plate and serve."]
@ -130,16 +131,28 @@ class TestEffortLabel:
result = parse_time_effort(["a", "b", "c"])
assert result.effort_label == "quick"
def test_four_steps_is_moderate(self):
result = parse_time_effort(["a", "b", "c", "d"])
def test_bake_recipe_is_moderate(self):
# Passive default for "bake" = 30 min → moderate (21-45 min range)
result = parse_time_effort([
"Mix dry ingredients.",
"Combine wet ingredients.",
"Fold together until just combined.",
"Bake until a toothpick comes out clean.",
])
assert result.effort_label == "moderate"
def test_seven_steps_is_moderate(self):
result = parse_time_effort(["a"] * 7)
assert result.effort_label == "moderate"
def test_slow_cook_recipe_is_involved(self):
# Passive default for "slow cook" = 300 min → involved (>45 min)
result = parse_time_effort([
"Brown the meat in batches.",
"Add vegetables and broth.",
"Slow cook until tender.",
])
assert result.effort_label == "involved"
def test_eight_steps_is_involved(self):
result = parse_time_effort(["a"] * 8)
def test_explicit_time_drives_effort_label(self):
# Explicit passive time of 90 min → involved
result = parse_time_effort(["Braise for 90 minutes."])
assert result.effort_label == "involved"