Compare commits

..

2 commits

Author SHA1 Message Date
9c4d8b7883 feat(recipe-engine): time-effort profile, product-label tokenisation, L1 tuning
Some checks failed
CI / Backend (Python) (push) Waiting to run
CI / Frontend (Vue) (push) Waiting to run
Mirror / mirror (push) Has been cancelled
Release / release (push) Has been cancelled
- Add TimeEffortProfile + StepAnalysis Pydantic schemas; serialised into
  RecipeSuggestion so the frontend receives active/passive/total minutes,
  effort label, and detected equipment per suggestion.
- parse_time_effort() now drives max_total_min filter (falls back to step-count
  estimate when directions contain no explicit time mentions).
- _PRODUCT_TOKEN_STOPWORDS: strips marketing/packaging words from multi-word
  product labels before adding individual ingredient tokens to pantry_set.
  "Organic Extra Firm Tofu" → adds "tofu"; improves packaged-food pantry match.
- L1 candidate pool raised to 60 (was 20); min_match_ratio lowered to 0.35
  (was 0.60) to keep enough results for plant-based / packaged-food pantries.
- household.py: tighten import to pull HEIMDALL_URL/ADMIN_TOKEN from
  services.heimdall_orch (matches refactor in cloud_session.py).
2026-04-25 21:44:26 -07:00
ed04b655be fix(saved-recipes): resolve FK constraint, null title, and load reliability
- Migration 039: drop saved_recipes.recipe_id FK (SQLite table rebuild).
  The FK referenced main.recipes but corpus lives in an ATTACH'd DB — caused
  500 on every POST /recipes/saved in cloud mode.
- _to_summary: row.get("title") or "" to handle corpus JOIN returning NULL
  title (e.g. placeholder recipe_id 99999).
- list_collections: return [] for Free tier instead of 403 — prevents
  Promise.all in savedStore.load() from aborting the saved-recipes fetch.
- savedStore.load(): switched to Promise.allSettled so a collections failure
  never blocks the saved list from populating.
- RecipesView: star indicator now reflects savedStore.isSaved() (server-side
  saved state) rather than localStorage bookmarks; changed to <span> since
  the star is now read-only visual feedback.
- Removed { immediate: true } from saved-tab watcher — premature bounce to
  Build Your Own before onMounted load() completes.
2026-04-25 21:44:10 -07:00
13 changed files with 257 additions and 20 deletions

View file

@ -11,7 +11,8 @@ import sqlite3
import requests
from fastapi import APIRouter, Depends, HTTPException
from app.cloud_session import CloudUser, CLOUD_DATA_ROOT, HEIMDALL_URL, HEIMDALL_ADMIN_TOKEN, get_session
from app.cloud_session import CloudUser, CLOUD_DATA_ROOT, get_session
from app.services.heimdall_orch import HEIMDALL_URL, HEIMDALL_ADMIN_TOKEN
from app.db.store import Store
from app.models.schemas.household import (
HouseholdAcceptRequest,

View file

@ -35,7 +35,7 @@ def _to_summary(row: dict, store: Store) -> SavedRecipeSummary:
return SavedRecipeSummary(
id=row["id"],
recipe_id=row["recipe_id"],
title=row.get("title", ""),
title=row.get("title") or "",
saved_at=row["saved_at"],
notes=row.get("notes"),
rating=row.get("rating"),
@ -104,8 +104,10 @@ async def list_saved_recipes(
async def list_collections(
session: CloudUser = Depends(get_session),
) -> list[CollectionSummary]:
# Free users can list (they'll always have zero — creating requires Paid).
# Returning 403 here breaks savedStore.load() via Promise.all for non-Paid users.
if not can_use("recipe_collections", session.tier):
raise HTTPException(status_code=403, detail="Collections require Paid tier.")
return []
rows = await asyncio.to_thread(
_in_thread, session.db, lambda s: s.get_collections()
)

View file

@ -0,0 +1,31 @@
-- Migration 039: Drop FK constraint on saved_recipes.recipe_id.
--
-- In cloud mode the recipe corpus is ATTACHed as a separate database.
-- SQLite FK constraints only resolve against the `main` schema, so
-- `REFERENCES recipes(id)` was always failing for cloud saves (the
-- main.recipes table is empty; all data lives in corpus.recipes).
-- The corpus is read-only and never modified by the app, so cascade-on-delete
-- is meaningless anyway. Remove the constraint without changing any data.
PRAGMA foreign_keys = OFF;
CREATE TABLE saved_recipes_new (
id INTEGER PRIMARY KEY AUTOINCREMENT,
recipe_id INTEGER NOT NULL,
saved_at TEXT NOT NULL DEFAULT (datetime('now')),
notes TEXT,
rating INTEGER CHECK (rating IS NULL OR (rating >= 0 AND rating <= 5)),
style_tags TEXT NOT NULL DEFAULT '[]',
UNIQUE (recipe_id)
);
INSERT INTO saved_recipes_new SELECT * FROM saved_recipes;
DROP TABLE saved_recipes;
ALTER TABLE saved_recipes_new RENAME TO saved_recipes;
CREATE INDEX IF NOT EXISTS idx_saved_recipes_saved_at ON saved_recipes (saved_at DESC);
CREATE INDEX IF NOT EXISTS idx_saved_recipes_rating ON saved_recipes (rating);
PRAGMA foreign_keys = ON;

View file

@ -4,6 +4,27 @@ from __future__ import annotations
from pydantic import BaseModel, Field
class StepAnalysis(BaseModel):
"""Active/passive classification for one direction step."""
is_passive: bool
detected_minutes: int | None = None
class TimeEffortProfile(BaseModel):
"""Parsed time and effort profile for a recipe.
Mirrors app.services.recipe.time_effort.TimeEffortProfile (dataclass).
Serialised into RecipeSuggestion so the frontend can render the effort
summary without a second round-trip.
"""
active_min: int = 0
passive_min: int = 0
total_min: int = 0
effort_label: str = "moderate" # "quick" | "moderate" | "involved"
equipment: list[str] = Field(default_factory=list)
step_analyses: list[StepAnalysis] = Field(default_factory=list)
class SwapCandidate(BaseModel):
original_name: str
substitute_name: str
@ -43,6 +64,7 @@ class RecipeSuggestion(BaseModel):
source_url: str | None = None
complexity: str | None = None # 'easy' | 'moderate' | 'involved'
estimated_time_min: int | None = None # derived from step count + method signals
time_effort: TimeEffortProfile | None = None # full time/effort profile from parse_time_effort
rerank_score: float | None = None # cross-encoder relevance score (paid+ only, None for free tier)

View file

@ -20,7 +20,7 @@ from typing import TYPE_CHECKING
if TYPE_CHECKING:
from app.db.store import Store
from app.models.schemas.recipe import GroceryLink, NutritionPanel, RecipeRequest, RecipeResult, RecipeSuggestion, SwapCandidate
from app.models.schemas.recipe import GroceryLink, NutritionPanel, RecipeRequest, RecipeResult, RecipeSuggestion, StepAnalysis, TimeEffortProfile, SwapCandidate
from app.services.recipe.element_classifier import ElementClassifier
from app.services.recipe.grocery_links import GroceryLinkBuilder
from app.services.recipe.substitution_engine import SubstitutionEngine
@ -36,6 +36,38 @@ _SWAP_STOPWORDS = frozenset({
"to", "from", "at", "by", "as", "on",
})
# Marketing / prep / packaging words stripped when tokenising product-label names
# into individual ingredient tokens. Parallel to Store._FTS_TOKEN_STOPWORDS —
# both lists should agree. Kept here to avoid a circular import at runtime.
_PRODUCT_TOKEN_STOPWORDS = frozenset({
# Basic English stopwords
"a", "an", "the", "of", "in", "for", "with", "and", "or", "to",
"from", "at", "by", "as", "on", "into",
# Brand / marketing words that appear in product names
"lean", "cuisine", "healthy", "choice", "stouffer", "original",
"classic", "deluxe", "homestyle", "family", "style", "grade",
"premium", "select", "natural", "organic", "fresh", "lite",
"ready", "quick", "easy", "instant", "microwave", "frozen",
"brand", "size", "large", "small", "medium", "extra",
# Plant-based / alt-meat brand names
"daring", "gardein", "morningstar", "lightlife", "tofurky",
"quorn", "omni", "nuggs", "simulate",
# Preparation states
"cut", "diced", "sliced", "chopped", "minced", "shredded",
"cooked", "raw", "whole", "boneless", "skinless", "trimmed",
"pre", "prepared", "marinated", "seasoned", "breaded", "battered",
"grilled", "roasted", "smoked", "canned", "dried", "dehydrated",
"pieces", "piece", "strips", "strip", "chunks", "chunk",
"fillets", "fillet", "cutlets", "cutlet", "tenders", "nuggets",
# Units / packaging
"oz", "lb", "lbs", "pkg", "pack", "box", "can", "bag", "jar",
# Adjectives that aren't ingredients
"firm", "soft", "silken", "hard", "crispy", "crunchy", "smooth",
"mild", "spicy", "hot", "sweet", "savory", "unsalted", "salted",
"low", "high", "reduced", "free", "fat", "sodium", "sugar", "calorie",
"dairy", "gluten", "vegan", "plant", "based", "free",
})
# Maps product-label substrings to recipe-corpus canonical terms.
# Kept in sync with Store._FTS_SYNONYMS — both must agree on canonical names.
# Used to expand pantry_set so single-word recipe ingredients can match
@ -363,6 +395,13 @@ def _expand_pantry_set(
if pattern in lower:
expanded.add(canonical)
# Extract individual ingredient tokens from multi-word product names.
# "Organic Extra Firm Tofu" → adds "tofu"; "Brown Basmati Rice" → adds "rice".
# This catches plain ingredients that _PANTRY_LABEL_SYNONYMS doesn't translate.
for token in lower.split():
if len(token) >= 4 and token not in _PRODUCT_TOKEN_STOPWORDS:
expanded.add(token)
# Secondary state expansion — adds terms like "stale bread", "day-old rice"
if secondary_pantry_items and item in secondary_pantry_items:
state_label = secondary_pantry_items[item]
@ -736,9 +775,13 @@ class RecipeEngine:
# - match ratio: require ≥60% ingredient coverage to avoid low-signal results
_l1 = req.level == 1 and not req.shopping_mode
nf = req.nutrition_filters
# L1 uses a larger candidate pool — the ratio gate below will prune
# aggressively anyway, so we need more raw candidates to end up with
# enough results for a packaged-food / plant-based pantry.
_fts_limit = 60 if _l1 else 20
rows = self._store.search_recipes_by_ingredients(
req.pantry_items,
limit=20,
limit=_fts_limit,
category=req.category or None,
max_calories=nf.max_calories,
max_sugar_g=nf.max_sugar_g,
@ -749,8 +792,11 @@ class RecipeEngine:
)
# L1 strict defaults: cap missing ingredients and require a minimum ratio.
# 0.35 allows ~1/3 ingredient coverage — low enough for packaged/plant-based
# pantries that rarely match raw-ingredient corpus recipes 1:1, but still
# filters out recipes where only one common staple matched.
_L1_MAX_MISSING_DEFAULT = 2
_L1_MIN_MATCH_RATIO = 0.6
_L1_MIN_MATCH_RATIO = 0.35
effective_max_missing = req.max_missing
if _l1 and effective_max_missing is None:
effective_max_missing = _L1_MAX_MISSING_DEFAULT
@ -834,9 +880,10 @@ class RecipeEngine:
except Exception:
directions = [directions]
# Compute complexity for every suggestion (used for badge + filter).
# Compute complexity + parse time effort once — reused for filters and response.
row_complexity = _classify_method_complexity(directions, available_equipment)
row_time_min = _estimate_time_min(directions, row_complexity)
row_time_effort = parse_time_effort(directions)
# Filter and tier-rank by hard_day_mode
if req.hard_day_mode:
@ -856,9 +903,16 @@ class RecipeEngine:
if req.max_time_min is not None and row_time_min > req.max_time_min:
continue
# Total time filter (kiwi#52) — uses parsed time from directions
if req.max_total_min is not None and not _within_time(directions, req.max_total_min):
continue
# Total time filter (kiwi#52).
# Prefer parsed time extracted from direction text (explicit "15 minutes" mentions).
# When directions contain no parseable time signals, fall back to the
# step-count estimate so the filter still has teeth on the corpus majority.
if req.max_total_min is not None:
if row_time_effort.total_min > 0:
if row_time_effort.total_min > req.max_total_min:
continue
elif row_time_min > req.max_total_min:
continue
# Level 2: also add dietary constraint swaps from substitution_pairs
if req.level == 2 and req.constraints:
@ -897,6 +951,20 @@ class RecipeEngine:
v is not None
for v in (nutrition.calories, nutrition.sugar_g, nutrition.carbs_g)
)
te = TimeEffortProfile(
active_min=row_time_effort.active_min,
passive_min=row_time_effort.passive_min,
total_min=row_time_effort.total_min,
effort_label=row_time_effort.effort_label,
equipment=list(row_time_effort.equipment),
step_analyses=[
StepAnalysis(
is_passive=sa.is_passive,
detected_minutes=sa.detected_minutes,
)
for sa in row_time_effort.step_analyses
],
)
suggestions.append(RecipeSuggestion(
id=row["id"],
title=row["title"],
@ -905,12 +973,14 @@ class RecipeEngine:
swap_candidates=swap_candidates,
matched_ingredients=matched,
missing_ingredients=missing,
directions=directions,
prep_notes=sorted(prep_note_set),
level=req.level,
nutrition=nutrition if has_nutrition else None,
source_url=_build_source_url(row),
complexity=row_complexity,
estimated_time_min=row_time_min,
time_effort=te,
))
# Sort corpus results.

View file

@ -513,12 +513,12 @@
<span v-if="recipe.estimated_time_min" class="status-badge status-neutral">~{{ recipe.estimated_time_min }}m</span>
<span class="status-badge status-info">Level {{ recipe.level }}</span>
<span v-if="recipe.is_wildcard" class="status-badge status-info">Wildcard</span>
<button
<span
v-if="recipe.id"
:class="['btn-icon', 'btn-bookmark', { active: recipesStore.isBookmarked(recipe.id) }]"
@click="recipesStore.toggleBookmark(recipe)"
:aria-label="recipesStore.isBookmarked(recipe.id) ? 'Remove bookmark: ' + recipe.title : 'Bookmark: ' + recipe.title"
>{{ recipesStore.isBookmarked(recipe.id) ? '★' : '☆' }}</button>
:class="['btn-icon', 'btn-bookmark', { active: savedStore.isSaved(recipe.id) }]"
:aria-label="savedStore.isSaved(recipe.id) ? 'Saved: ' + recipe.title : recipe.title"
:title="savedStore.isSaved(recipe.id) ? 'Saved' : 'Not saved'"
>{{ savedStore.isSaved(recipe.id) ? '★' : '☆' }}</span>
<button
v-if="recipe.id"
class="btn-icon btn-dismiss"
@ -1212,7 +1212,9 @@ onMounted(async () => {
await savedStore.load()
})
// If Saved tab is empty after loading, bounce to Build Your Own
// If Saved tab is empty after loading, bounce to Build Your Own.
// No immediate: true the immediate fire happens before onMounted runs load(),
// so loading=false and count=0 is the initial unloaded state, not "empty after load".
watch(
() => ({ loading: savedStore.loading, count: savedStore.saved.length }),
({ loading, count }) => {
@ -1220,7 +1222,6 @@ watch(
activeTab.value = 'build'
}
},
{ immediate: true },
)
</script>

View file

@ -27,12 +27,15 @@ export const useSavedRecipesStore = defineStore('savedRecipes', () => {
async function load() {
loading.value = true
try {
const [items, cols] = await Promise.all([
// Fetch independently — a collections 403 (Free tier) must not prevent
// saved recipes from loading. Backend now returns [] for Free, but guard
// here too in case an older API version is deployed.
const [itemsResult, colsResult] = await Promise.allSettled([
savedRecipesAPI.list({ sort_by: sortBy.value, collection_id: activeCollectionId.value ?? undefined }),
savedRecipesAPI.listCollections(),
])
saved.value = items
collections.value = cols
if (itemsResult.status === 'fulfilled') saved.value = itemsResult.value
if (colsResult.status === 'fulfilled') collections.value = colsResult.value
} finally {
loading.value = false
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.4 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.9 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 MiB

View file

@ -0,0 +1,107 @@
#!/usr/bin/env python3
"""
Prompt validation harness for recipe scanner (kiwi#9).
Runs the draft extraction prompt against fixture images using the Anthropic API
directly (bypasses llm.yaml for prompt dev only, not production path).
Usage:
python extract_test.py <image1.jpg> [image2.jpg]
"""
import base64
import io
import json
import os
import sys
from pathlib import Path
from PIL import Image, ImageOps
import anthropic
PROMPT = """
You are extracting a recipe from a photograph of a recipe card, cookbook page, or handwritten note.
If two images are provided, treat them as a single recipe across two pages (e.g. ingredients on page 1, directions on page 2).
Return a single JSON object with these fields:
- title: recipe name (string)
- subtitle: any secondary title or serving suggestion e.g. "with Broccoli & Ranch Dressing" (string or null)
- servings: serving size if shown, as a string e.g. "2", "4-6" (string or null)
- cook_time: total cook time if shown, e.g. "15 min", "1 hour" (string or null)
- source_note: any attribution text like "From Betty Crocker" or "Purple Carrot" (string or null)
- ingredients: array of ingredient objects, each with:
- name: normalized generic ingredient name, lowercase, no quantities, no brand names
(e.g. "Follow Your Heart® Vegan Ranch" "ranch dressing")
- qty: quantity as a string, preserving fractions e.g. "1/2", "¼" (string or null)
- unit: unit of measure, null for countable items (e.g. "3 eggs" unit: null)
- raw: the original ingredient line verbatim, exactly as it appears
- steps: ordered array of instruction strings, one distinct step per element
- notes: any tips, substitutions, storage instructions, or variations (string or null)
- confidence: "high" if text is clear and complete, "medium" if some parts are uncertain,
"low" if mostly handwritten or significantly degraded
- warnings: array of strings describing anything the user should double-check
(e.g. "Directions appear to continue on another page not shown")
Return only valid JSON. No markdown fences. No explanation outside the JSON.
If the image does not appear to be a recipe at all, return: {"error": "not_a_recipe"}
""".strip()
def load_image_b64(path: Path) -> str:
"""Load image, apply EXIF rotation, return base64-encoded JPEG."""
with open(path, "rb") as f:
img = Image.open(io.BytesIO(f.read()))
img = ImageOps.exif_transpose(img) # fix phone rotation
img = img.convert("RGB")
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=90)
return base64.b64encode(buf.getvalue()).decode()
def extract(image_paths: list[Path]) -> dict:
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
content = []
for i, path in enumerate(image_paths):
if i > 0:
content.append({"type": "text", "text": f"(Page {i + 1} of the same recipe:)"})
content.append({
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": load_image_b64(path),
},
})
content.append({"type": "text", "text": PROMPT})
msg = client.messages.create(
model="claude-opus-4-6", # best vision for prompt dev; production uses VisionRouter
max_tokens=2048,
messages=[{"role": "user", "content": content}],
)
raw = msg.content[0].text.strip()
# Strip markdown fences if the model adds them anyway
if raw.startswith("```"):
raw = raw.split("```")[1]
if raw.startswith("json"):
raw = raw[4:]
return json.loads(raw)
if __name__ == "__main__":
paths = [Path(p) for p in sys.argv[1:]]
if not paths:
print("Usage: python extract_test.py <image1.jpg> [image2.jpg]")
sys.exit(1)
for p in paths:
if not p.exists():
print(f"File not found: {p}")
sys.exit(1)
print(f"Extracting from: {[p.name for p in paths]}")
print("Applying EXIF rotation + sending to claude-opus-4-6...\n")
result = extract(paths)
print(json.dumps(result, indent=2, ensure_ascii=False))