feat(recipe-engine): time-effort profile, product-label tokenisation, L1 tuning
- Add TimeEffortProfile + StepAnalysis Pydantic schemas; serialised into RecipeSuggestion so the frontend receives active/passive/total minutes, effort label, and detected equipment per suggestion. - parse_time_effort() now drives max_total_min filter (falls back to step-count estimate when directions contain no explicit time mentions). - _PRODUCT_TOKEN_STOPWORDS: strips marketing/packaging words from multi-word product labels before adding individual ingredient tokens to pantry_set. "Organic Extra Firm Tofu" → adds "tofu"; improves packaged-food pantry match. - L1 candidate pool raised to 60 (was 20); min_match_ratio lowered to 0.35 (was 0.60) to keep enough results for plant-based / packaged-food pantries. - household.py: tighten import to pull HEIMDALL_URL/ADMIN_TOKEN from services.heimdall_orch (matches refactor in cloud_session.py).
This commit is contained in:
parent
ed04b655be
commit
9c4d8b7883
9 changed files with 208 additions and 8 deletions
|
|
@ -11,7 +11,8 @@ import sqlite3
|
||||||
import requests
|
import requests
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
|
||||||
from app.cloud_session import CloudUser, CLOUD_DATA_ROOT, HEIMDALL_URL, HEIMDALL_ADMIN_TOKEN, get_session
|
from app.cloud_session import CloudUser, CLOUD_DATA_ROOT, get_session
|
||||||
|
from app.services.heimdall_orch import HEIMDALL_URL, HEIMDALL_ADMIN_TOKEN
|
||||||
from app.db.store import Store
|
from app.db.store import Store
|
||||||
from app.models.schemas.household import (
|
from app.models.schemas.household import (
|
||||||
HouseholdAcceptRequest,
|
HouseholdAcceptRequest,
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,27 @@ from __future__ import annotations
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class StepAnalysis(BaseModel):
|
||||||
|
"""Active/passive classification for one direction step."""
|
||||||
|
is_passive: bool
|
||||||
|
detected_minutes: int | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class TimeEffortProfile(BaseModel):
|
||||||
|
"""Parsed time and effort profile for a recipe.
|
||||||
|
|
||||||
|
Mirrors app.services.recipe.time_effort.TimeEffortProfile (dataclass).
|
||||||
|
Serialised into RecipeSuggestion so the frontend can render the effort
|
||||||
|
summary without a second round-trip.
|
||||||
|
"""
|
||||||
|
active_min: int = 0
|
||||||
|
passive_min: int = 0
|
||||||
|
total_min: int = 0
|
||||||
|
effort_label: str = "moderate" # "quick" | "moderate" | "involved"
|
||||||
|
equipment: list[str] = Field(default_factory=list)
|
||||||
|
step_analyses: list[StepAnalysis] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
class SwapCandidate(BaseModel):
|
class SwapCandidate(BaseModel):
|
||||||
original_name: str
|
original_name: str
|
||||||
substitute_name: str
|
substitute_name: str
|
||||||
|
|
@ -43,6 +64,7 @@ class RecipeSuggestion(BaseModel):
|
||||||
source_url: str | None = None
|
source_url: str | None = None
|
||||||
complexity: str | None = None # 'easy' | 'moderate' | 'involved'
|
complexity: str | None = None # 'easy' | 'moderate' | 'involved'
|
||||||
estimated_time_min: int | None = None # derived from step count + method signals
|
estimated_time_min: int | None = None # derived from step count + method signals
|
||||||
|
time_effort: TimeEffortProfile | None = None # full time/effort profile from parse_time_effort
|
||||||
rerank_score: float | None = None # cross-encoder relevance score (paid+ only, None for free tier)
|
rerank_score: float | None = None # cross-encoder relevance score (paid+ only, None for free tier)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ from typing import TYPE_CHECKING
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from app.db.store import Store
|
from app.db.store import Store
|
||||||
|
|
||||||
from app.models.schemas.recipe import GroceryLink, NutritionPanel, RecipeRequest, RecipeResult, RecipeSuggestion, SwapCandidate
|
from app.models.schemas.recipe import GroceryLink, NutritionPanel, RecipeRequest, RecipeResult, RecipeSuggestion, StepAnalysis, TimeEffortProfile, SwapCandidate
|
||||||
from app.services.recipe.element_classifier import ElementClassifier
|
from app.services.recipe.element_classifier import ElementClassifier
|
||||||
from app.services.recipe.grocery_links import GroceryLinkBuilder
|
from app.services.recipe.grocery_links import GroceryLinkBuilder
|
||||||
from app.services.recipe.substitution_engine import SubstitutionEngine
|
from app.services.recipe.substitution_engine import SubstitutionEngine
|
||||||
|
|
@ -36,6 +36,38 @@ _SWAP_STOPWORDS = frozenset({
|
||||||
"to", "from", "at", "by", "as", "on",
|
"to", "from", "at", "by", "as", "on",
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Marketing / prep / packaging words stripped when tokenising product-label names
|
||||||
|
# into individual ingredient tokens. Parallel to Store._FTS_TOKEN_STOPWORDS —
|
||||||
|
# both lists should agree. Kept here to avoid a circular import at runtime.
|
||||||
|
_PRODUCT_TOKEN_STOPWORDS = frozenset({
|
||||||
|
# Basic English stopwords
|
||||||
|
"a", "an", "the", "of", "in", "for", "with", "and", "or", "to",
|
||||||
|
"from", "at", "by", "as", "on", "into",
|
||||||
|
# Brand / marketing words that appear in product names
|
||||||
|
"lean", "cuisine", "healthy", "choice", "stouffer", "original",
|
||||||
|
"classic", "deluxe", "homestyle", "family", "style", "grade",
|
||||||
|
"premium", "select", "natural", "organic", "fresh", "lite",
|
||||||
|
"ready", "quick", "easy", "instant", "microwave", "frozen",
|
||||||
|
"brand", "size", "large", "small", "medium", "extra",
|
||||||
|
# Plant-based / alt-meat brand names
|
||||||
|
"daring", "gardein", "morningstar", "lightlife", "tofurky",
|
||||||
|
"quorn", "omni", "nuggs", "simulate",
|
||||||
|
# Preparation states
|
||||||
|
"cut", "diced", "sliced", "chopped", "minced", "shredded",
|
||||||
|
"cooked", "raw", "whole", "boneless", "skinless", "trimmed",
|
||||||
|
"pre", "prepared", "marinated", "seasoned", "breaded", "battered",
|
||||||
|
"grilled", "roasted", "smoked", "canned", "dried", "dehydrated",
|
||||||
|
"pieces", "piece", "strips", "strip", "chunks", "chunk",
|
||||||
|
"fillets", "fillet", "cutlets", "cutlet", "tenders", "nuggets",
|
||||||
|
# Units / packaging
|
||||||
|
"oz", "lb", "lbs", "pkg", "pack", "box", "can", "bag", "jar",
|
||||||
|
# Adjectives that aren't ingredients
|
||||||
|
"firm", "soft", "silken", "hard", "crispy", "crunchy", "smooth",
|
||||||
|
"mild", "spicy", "hot", "sweet", "savory", "unsalted", "salted",
|
||||||
|
"low", "high", "reduced", "free", "fat", "sodium", "sugar", "calorie",
|
||||||
|
"dairy", "gluten", "vegan", "plant", "based", "free",
|
||||||
|
})
|
||||||
|
|
||||||
# Maps product-label substrings to recipe-corpus canonical terms.
|
# Maps product-label substrings to recipe-corpus canonical terms.
|
||||||
# Kept in sync with Store._FTS_SYNONYMS — both must agree on canonical names.
|
# Kept in sync with Store._FTS_SYNONYMS — both must agree on canonical names.
|
||||||
# Used to expand pantry_set so single-word recipe ingredients can match
|
# Used to expand pantry_set so single-word recipe ingredients can match
|
||||||
|
|
@ -363,6 +395,13 @@ def _expand_pantry_set(
|
||||||
if pattern in lower:
|
if pattern in lower:
|
||||||
expanded.add(canonical)
|
expanded.add(canonical)
|
||||||
|
|
||||||
|
# Extract individual ingredient tokens from multi-word product names.
|
||||||
|
# "Organic Extra Firm Tofu" → adds "tofu"; "Brown Basmati Rice" → adds "rice".
|
||||||
|
# This catches plain ingredients that _PANTRY_LABEL_SYNONYMS doesn't translate.
|
||||||
|
for token in lower.split():
|
||||||
|
if len(token) >= 4 and token not in _PRODUCT_TOKEN_STOPWORDS:
|
||||||
|
expanded.add(token)
|
||||||
|
|
||||||
# Secondary state expansion — adds terms like "stale bread", "day-old rice"
|
# Secondary state expansion — adds terms like "stale bread", "day-old rice"
|
||||||
if secondary_pantry_items and item in secondary_pantry_items:
|
if secondary_pantry_items and item in secondary_pantry_items:
|
||||||
state_label = secondary_pantry_items[item]
|
state_label = secondary_pantry_items[item]
|
||||||
|
|
@ -736,9 +775,13 @@ class RecipeEngine:
|
||||||
# - match ratio: require ≥60% ingredient coverage to avoid low-signal results
|
# - match ratio: require ≥60% ingredient coverage to avoid low-signal results
|
||||||
_l1 = req.level == 1 and not req.shopping_mode
|
_l1 = req.level == 1 and not req.shopping_mode
|
||||||
nf = req.nutrition_filters
|
nf = req.nutrition_filters
|
||||||
|
# L1 uses a larger candidate pool — the ratio gate below will prune
|
||||||
|
# aggressively anyway, so we need more raw candidates to end up with
|
||||||
|
# enough results for a packaged-food / plant-based pantry.
|
||||||
|
_fts_limit = 60 if _l1 else 20
|
||||||
rows = self._store.search_recipes_by_ingredients(
|
rows = self._store.search_recipes_by_ingredients(
|
||||||
req.pantry_items,
|
req.pantry_items,
|
||||||
limit=20,
|
limit=_fts_limit,
|
||||||
category=req.category or None,
|
category=req.category or None,
|
||||||
max_calories=nf.max_calories,
|
max_calories=nf.max_calories,
|
||||||
max_sugar_g=nf.max_sugar_g,
|
max_sugar_g=nf.max_sugar_g,
|
||||||
|
|
@ -749,8 +792,11 @@ class RecipeEngine:
|
||||||
)
|
)
|
||||||
|
|
||||||
# L1 strict defaults: cap missing ingredients and require a minimum ratio.
|
# L1 strict defaults: cap missing ingredients and require a minimum ratio.
|
||||||
|
# 0.35 allows ~1/3 ingredient coverage — low enough for packaged/plant-based
|
||||||
|
# pantries that rarely match raw-ingredient corpus recipes 1:1, but still
|
||||||
|
# filters out recipes where only one common staple matched.
|
||||||
_L1_MAX_MISSING_DEFAULT = 2
|
_L1_MAX_MISSING_DEFAULT = 2
|
||||||
_L1_MIN_MATCH_RATIO = 0.6
|
_L1_MIN_MATCH_RATIO = 0.35
|
||||||
effective_max_missing = req.max_missing
|
effective_max_missing = req.max_missing
|
||||||
if _l1 and effective_max_missing is None:
|
if _l1 and effective_max_missing is None:
|
||||||
effective_max_missing = _L1_MAX_MISSING_DEFAULT
|
effective_max_missing = _L1_MAX_MISSING_DEFAULT
|
||||||
|
|
@ -834,9 +880,10 @@ class RecipeEngine:
|
||||||
except Exception:
|
except Exception:
|
||||||
directions = [directions]
|
directions = [directions]
|
||||||
|
|
||||||
# Compute complexity for every suggestion (used for badge + filter).
|
# Compute complexity + parse time effort once — reused for filters and response.
|
||||||
row_complexity = _classify_method_complexity(directions, available_equipment)
|
row_complexity = _classify_method_complexity(directions, available_equipment)
|
||||||
row_time_min = _estimate_time_min(directions, row_complexity)
|
row_time_min = _estimate_time_min(directions, row_complexity)
|
||||||
|
row_time_effort = parse_time_effort(directions)
|
||||||
|
|
||||||
# Filter and tier-rank by hard_day_mode
|
# Filter and tier-rank by hard_day_mode
|
||||||
if req.hard_day_mode:
|
if req.hard_day_mode:
|
||||||
|
|
@ -856,9 +903,16 @@ class RecipeEngine:
|
||||||
if req.max_time_min is not None and row_time_min > req.max_time_min:
|
if req.max_time_min is not None and row_time_min > req.max_time_min:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Total time filter (kiwi#52) — uses parsed time from directions
|
# Total time filter (kiwi#52).
|
||||||
if req.max_total_min is not None and not _within_time(directions, req.max_total_min):
|
# Prefer parsed time extracted from direction text (explicit "15 minutes" mentions).
|
||||||
continue
|
# When directions contain no parseable time signals, fall back to the
|
||||||
|
# step-count estimate so the filter still has teeth on the corpus majority.
|
||||||
|
if req.max_total_min is not None:
|
||||||
|
if row_time_effort.total_min > 0:
|
||||||
|
if row_time_effort.total_min > req.max_total_min:
|
||||||
|
continue
|
||||||
|
elif row_time_min > req.max_total_min:
|
||||||
|
continue
|
||||||
|
|
||||||
# Level 2: also add dietary constraint swaps from substitution_pairs
|
# Level 2: also add dietary constraint swaps from substitution_pairs
|
||||||
if req.level == 2 and req.constraints:
|
if req.level == 2 and req.constraints:
|
||||||
|
|
@ -897,6 +951,20 @@ class RecipeEngine:
|
||||||
v is not None
|
v is not None
|
||||||
for v in (nutrition.calories, nutrition.sugar_g, nutrition.carbs_g)
|
for v in (nutrition.calories, nutrition.sugar_g, nutrition.carbs_g)
|
||||||
)
|
)
|
||||||
|
te = TimeEffortProfile(
|
||||||
|
active_min=row_time_effort.active_min,
|
||||||
|
passive_min=row_time_effort.passive_min,
|
||||||
|
total_min=row_time_effort.total_min,
|
||||||
|
effort_label=row_time_effort.effort_label,
|
||||||
|
equipment=list(row_time_effort.equipment),
|
||||||
|
step_analyses=[
|
||||||
|
StepAnalysis(
|
||||||
|
is_passive=sa.is_passive,
|
||||||
|
detected_minutes=sa.detected_minutes,
|
||||||
|
)
|
||||||
|
for sa in row_time_effort.step_analyses
|
||||||
|
],
|
||||||
|
)
|
||||||
suggestions.append(RecipeSuggestion(
|
suggestions.append(RecipeSuggestion(
|
||||||
id=row["id"],
|
id=row["id"],
|
||||||
title=row["title"],
|
title=row["title"],
|
||||||
|
|
@ -905,12 +973,14 @@ class RecipeEngine:
|
||||||
swap_candidates=swap_candidates,
|
swap_candidates=swap_candidates,
|
||||||
matched_ingredients=matched,
|
matched_ingredients=matched,
|
||||||
missing_ingredients=missing,
|
missing_ingredients=missing,
|
||||||
|
directions=directions,
|
||||||
prep_notes=sorted(prep_note_set),
|
prep_notes=sorted(prep_note_set),
|
||||||
level=req.level,
|
level=req.level,
|
||||||
nutrition=nutrition if has_nutrition else None,
|
nutrition=nutrition if has_nutrition else None,
|
||||||
source_url=_build_source_url(row),
|
source_url=_build_source_url(row),
|
||||||
complexity=row_complexity,
|
complexity=row_complexity,
|
||||||
estimated_time_min=row_time_min,
|
estimated_time_min=row_time_min,
|
||||||
|
time_effort=te,
|
||||||
))
|
))
|
||||||
|
|
||||||
# Sort corpus results.
|
# Sort corpus results.
|
||||||
|
|
|
||||||
BIN
tests/fixtures/recipe_scan/PXL_20260425_210039667.jpg
vendored
Normal file
BIN
tests/fixtures/recipe_scan/PXL_20260425_210039667.jpg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.2 MiB |
BIN
tests/fixtures/recipe_scan/PXL_20260425_210126591.jpg
vendored
Normal file
BIN
tests/fixtures/recipe_scan/PXL_20260425_210126591.jpg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.4 MiB |
BIN
tests/fixtures/recipe_scan/PXL_20260425_210135540.jpg
vendored
Normal file
BIN
tests/fixtures/recipe_scan/PXL_20260425_210135540.jpg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.8 MiB |
BIN
tests/fixtures/recipe_scan/PXL_20260425_210156606.jpg
vendored
Normal file
BIN
tests/fixtures/recipe_scan/PXL_20260425_210156606.jpg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.9 MiB |
BIN
tests/fixtures/recipe_scan/PXL_20260425_210159902.jpg
vendored
Normal file
BIN
tests/fixtures/recipe_scan/PXL_20260425_210159902.jpg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.7 MiB |
107
tests/fixtures/recipe_scan/extract_test.py
vendored
Normal file
107
tests/fixtures/recipe_scan/extract_test.py
vendored
Normal file
|
|
@ -0,0 +1,107 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Prompt validation harness for recipe scanner (kiwi#9).
|
||||||
|
|
||||||
|
Runs the draft extraction prompt against fixture images using the Anthropic API
|
||||||
|
directly (bypasses llm.yaml — for prompt dev only, not production path).
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python extract_test.py <image1.jpg> [image2.jpg]
|
||||||
|
"""
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from PIL import Image, ImageOps
|
||||||
|
import anthropic
|
||||||
|
|
||||||
|
PROMPT = """
|
||||||
|
You are extracting a recipe from a photograph of a recipe card, cookbook page, or handwritten note.
|
||||||
|
|
||||||
|
If two images are provided, treat them as a single recipe across two pages (e.g. ingredients on page 1, directions on page 2).
|
||||||
|
|
||||||
|
Return a single JSON object with these fields:
|
||||||
|
- title: recipe name (string)
|
||||||
|
- subtitle: any secondary title or serving suggestion e.g. "with Broccoli & Ranch Dressing" (string or null)
|
||||||
|
- servings: serving size if shown, as a string e.g. "2", "4-6" (string or null)
|
||||||
|
- cook_time: total cook time if shown, e.g. "15 min", "1 hour" (string or null)
|
||||||
|
- source_note: any attribution text like "From Betty Crocker" or "Purple Carrot" (string or null)
|
||||||
|
- ingredients: array of ingredient objects, each with:
|
||||||
|
- name: normalized generic ingredient name, lowercase, no quantities, no brand names
|
||||||
|
(e.g. "Follow Your Heart® Vegan Ranch" → "ranch dressing")
|
||||||
|
- qty: quantity as a string, preserving fractions e.g. "1/2", "¼" (string or null)
|
||||||
|
- unit: unit of measure, null for countable items (e.g. "3 eggs" → unit: null)
|
||||||
|
- raw: the original ingredient line verbatim, exactly as it appears
|
||||||
|
- steps: ordered array of instruction strings, one distinct step per element
|
||||||
|
- notes: any tips, substitutions, storage instructions, or variations (string or null)
|
||||||
|
- confidence: "high" if text is clear and complete, "medium" if some parts are uncertain,
|
||||||
|
"low" if mostly handwritten or significantly degraded
|
||||||
|
- warnings: array of strings describing anything the user should double-check
|
||||||
|
(e.g. "Directions appear to continue on another page not shown")
|
||||||
|
|
||||||
|
Return only valid JSON. No markdown fences. No explanation outside the JSON.
|
||||||
|
If the image does not appear to be a recipe at all, return: {"error": "not_a_recipe"}
|
||||||
|
""".strip()
|
||||||
|
|
||||||
|
|
||||||
|
def load_image_b64(path: Path) -> str:
|
||||||
|
"""Load image, apply EXIF rotation, return base64-encoded JPEG."""
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
img = Image.open(io.BytesIO(f.read()))
|
||||||
|
img = ImageOps.exif_transpose(img) # fix phone rotation
|
||||||
|
img = img.convert("RGB")
|
||||||
|
buf = io.BytesIO()
|
||||||
|
img.save(buf, format="JPEG", quality=90)
|
||||||
|
return base64.b64encode(buf.getvalue()).decode()
|
||||||
|
|
||||||
|
|
||||||
|
def extract(image_paths: list[Path]) -> dict:
|
||||||
|
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
|
||||||
|
|
||||||
|
content = []
|
||||||
|
for i, path in enumerate(image_paths):
|
||||||
|
if i > 0:
|
||||||
|
content.append({"type": "text", "text": f"(Page {i + 1} of the same recipe:)"})
|
||||||
|
content.append({
|
||||||
|
"type": "image",
|
||||||
|
"source": {
|
||||||
|
"type": "base64",
|
||||||
|
"media_type": "image/jpeg",
|
||||||
|
"data": load_image_b64(path),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
content.append({"type": "text", "text": PROMPT})
|
||||||
|
|
||||||
|
msg = client.messages.create(
|
||||||
|
model="claude-opus-4-6", # best vision for prompt dev; production uses VisionRouter
|
||||||
|
max_tokens=2048,
|
||||||
|
messages=[{"role": "user", "content": content}],
|
||||||
|
)
|
||||||
|
raw = msg.content[0].text.strip()
|
||||||
|
# Strip markdown fences if the model adds them anyway
|
||||||
|
if raw.startswith("```"):
|
||||||
|
raw = raw.split("```")[1]
|
||||||
|
if raw.startswith("json"):
|
||||||
|
raw = raw[4:]
|
||||||
|
return json.loads(raw)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
paths = [Path(p) for p in sys.argv[1:]]
|
||||||
|
if not paths:
|
||||||
|
print("Usage: python extract_test.py <image1.jpg> [image2.jpg]")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
for p in paths:
|
||||||
|
if not p.exists():
|
||||||
|
print(f"File not found: {p}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print(f"Extracting from: {[p.name for p in paths]}")
|
||||||
|
print("Applying EXIF rotation + sending to claude-opus-4-6...\n")
|
||||||
|
|
||||||
|
result = extract(paths)
|
||||||
|
print(json.dumps(result, indent=2, ensure_ascii=False))
|
||||||
Loading…
Reference in a new issue