From 896b4e048ca47dce97cc195267fb863b744a8df2 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Mon, 27 Apr 2026 08:23:01 -0700 Subject: [PATCH] =?UTF-8?q?feat:=20recipe=20scanner=20=E2=80=94=20photo=20?= =?UTF-8?q?to=20structured=20recipe=20(kiwi#9)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New feature: photograph a recipe card, cookbook page, or handwritten note and have it extracted into a structured, editable recipe. Backend: - POST /recipes/scan: accept 1-4 photos, run VLM extraction, return structured JSON for review (not auto-saved) - POST /recipes/scan/save: persist a reviewed/edited recipe - GET/DELETE /recipes/user: user-created recipe CRUD - Vision backend priority: cf-orch -> local Qwen2.5-VL -> Anthropic BYOK - 503 with clear config hint when no vision backend available - Multi-photo support: facing pages (ingredients/directions) sent together - Pantry cross-reference: marks which ingredients are already on hand - migration 041: user_recipes table (title, servings, cook_time, steps, ingredients JSON, source, pantry_match_pct) - Tier gate: recipe_scan -> paid, BYOK-unlockable Frontend: - "Scan" button in the Recipes tab bar (camera icon) - RecipeScanModal: upload step (drag-drop + file picker, up to 4 photos, live previews), processing step (spinner), review/edit step (all fields inline-editable before save), pantry match badge, warning banner for low-confidence or incomplete scans Tests: 35 new tests (23 unit + 12 API), 404 total passing --- app/api/endpoints/recipe_scan.py | 256 ++++++ app/api/routes.py | 4 + app/db/migrations/041_user_recipes.sql | 23 + app/db/store.py | 53 ++ app/models/schemas/recipe_scan.py | 74 ++ app/services/recipe/recipe_scanner.py | 411 +++++++++ app/tiers.py | 4 + frontend/src/components/RecipeScanModal.vue | 844 +++++++++++++++++++ frontend/src/components/RecipesView.vue | 68 +- frontend/src/services/api.ts | 73 ++ tests/api/test_recipe_scan.py | 304 +++++++ tests/services/recipe/test_recipe_scanner.py | 233 +++++ 12 files changed, 2335 insertions(+), 12 deletions(-) create mode 100644 app/api/endpoints/recipe_scan.py create mode 100644 app/db/migrations/041_user_recipes.sql create mode 100644 app/models/schemas/recipe_scan.py create mode 100644 app/services/recipe/recipe_scanner.py create mode 100644 frontend/src/components/RecipeScanModal.vue create mode 100644 tests/api/test_recipe_scan.py create mode 100644 tests/services/recipe/test_recipe_scanner.py diff --git a/app/api/endpoints/recipe_scan.py b/app/api/endpoints/recipe_scan.py new file mode 100644 index 0000000..7fa1515 --- /dev/null +++ b/app/api/endpoints/recipe_scan.py @@ -0,0 +1,256 @@ +"""Recipe scanner endpoints (kiwi#9). + +POST /recipes/scan -- scan photo(s) -> structured recipe JSON (not saved) +POST /recipes/scan/save -- save a confirmed scanned recipe to user_recipes +GET /recipes/user -- list user-created recipes +GET /recipes/user/{id} -- get a single user recipe +DELETE /recipes/user/{id} -- delete a user recipe + +BSL 1.1 -- recipe_scan requires Paid tier or BYOK. +""" +from __future__ import annotations + +import asyncio +import logging +import uuid +from pathlib import Path +from typing import Annotated + +import aiofiles +from fastapi import APIRouter, Depends, File, HTTPException, UploadFile +from fastapi.responses import JSONResponse + +from app.cloud_session import CloudUser, get_session +from app.core.config import settings +from app.db.session import get_store +from app.db.store import Store +from app.models.schemas.recipe_scan import ( + ScannedIngredientSchema, + ScannedRecipeResponse, + ScannedRecipeSaveRequest, + UserRecipeResponse, +) +from app.tiers import can_use + +logger = logging.getLogger(__name__) +router = APIRouter() + +_ALLOWED_MIME_TYPES = { + "image/jpeg", "image/jpg", "image/png", "image/webp", "image/heic", "image/heif" +} +_MAX_FILE_SIZE_MB = 20 + + +async def _save_upload_temp(file: UploadFile) -> Path: + """Write upload to a temp path under UPLOAD_DIR. Caller is responsible for cleanup.""" + settings.ensure_dirs() + dest = settings.UPLOAD_DIR / f"scan_{uuid.uuid4()}_{file.filename}" + async with aiofiles.open(dest, "wb") as f: + await f.write(await file.read()) + return dest + + +def _result_to_response(result) -> ScannedRecipeResponse: + """Convert ScannedRecipeResult (dataclass) to Pydantic response schema.""" + return ScannedRecipeResponse( + title=result.title, + subtitle=result.subtitle, + servings=result.servings, + cook_time=result.cook_time, + source_note=result.source_note, + ingredients=[ + ScannedIngredientSchema( + name=i.name, + qty=i.qty, + unit=i.unit, + raw=i.raw, + in_pantry=i.in_pantry, + ) + for i in result.ingredients + ], + steps=result.steps, + notes=result.notes, + tags=result.tags, + pantry_match_pct=result.pantry_match_pct, + confidence=result.confidence, + warnings=result.warnings, + ) + + +def _row_to_user_recipe(row: dict) -> UserRecipeResponse: + """Convert a store row dict to UserRecipeResponse.""" + return UserRecipeResponse( + id=row["id"], + title=row["title"], + subtitle=row.get("subtitle"), + servings=row.get("servings"), + cook_time=row.get("cook_time"), + source_note=row.get("source_note"), + ingredients=[ + ScannedIngredientSchema(**i) if isinstance(i, dict) else i + for i in (row.get("ingredients") or []) + ], + steps=row.get("steps") or [], + notes=row.get("notes"), + tags=row.get("tags") or [], + source=row.get("source", "manual"), + pantry_match_pct=row.get("pantry_match_pct"), + created_at=row["created_at"], + ) + + +# ── Scan endpoint ────────────────────────────────────────────────────────────── + +@router.post("/scan", response_model=ScannedRecipeResponse) +async def scan_recipe( + files: Annotated[list[UploadFile], File(...)], + store: Store = Depends(get_store), + session: CloudUser = Depends(get_session), +): + """Scan one or more recipe photos and return a structured recipe for review. + + Accepts 1-4 images. Multi-page recipes (e.g. ingredients on page 1, + directions on page 2) work best when all pages are submitted together. + + The response is NOT saved automatically -- the user reviews and edits it, + then calls POST /recipes/scan/save to persist. + + Tier: Paid (or BYOK). + """ + if not can_use("recipe_scan", session.tier, session.has_byok): + raise HTTPException( + status_code=403, + detail=( + "Recipe scanning requires Paid tier or a configured vision backend (BYOK). " + "Set ANTHROPIC_API_KEY or connect to a cf-orch vision service." + ), + ) + + if not files: + raise HTTPException(status_code=422, detail="At least one image file is required.") + if len(files) > 4: + raise HTTPException(status_code=422, detail="Maximum 4 images per scan request.") + + for f in files: + ct = (f.content_type or "").lower() + if ct and ct not in _ALLOWED_MIME_TYPES: + raise HTTPException( + status_code=422, + detail=f"Unsupported file type: {ct}. Supported: JPEG, PNG, WebP, HEIC.", + ) + + # Save uploads to temp files + saved_paths: list[Path] = [] + try: + for f in files: + saved_paths.append(await _save_upload_temp(f)) + + # Get pantry item names for cross-reference + inventory = await asyncio.to_thread(store.list_inventory) + pantry_names = [item["product_name"] for item in inventory if item.get("product_name")] + + # Run scanner (blocks on VLM -- use to_thread) + from app.services.recipe.recipe_scanner import RecipeScanner + + def _run_scan(): + scanner = RecipeScanner() + return scanner.scan(saved_paths, pantry_names=pantry_names) + + try: + result = await asyncio.to_thread(_run_scan) + except ValueError as exc: + msg = str(exc) + if "not_a_recipe" in msg: + raise HTTPException( + status_code=422, + detail="The image does not appear to contain a recipe. " + "Please photograph a recipe card, cookbook page, or handwritten note.", + ) + raise HTTPException(status_code=422, detail=msg) + except RuntimeError as exc: + raise HTTPException( + status_code=503, + detail=str(exc), + ) + + return _result_to_response(result) + + finally: + # Clean up temp files + for p in saved_paths: + try: + p.unlink(missing_ok=True) + except Exception: + pass + + +# ── Save endpoint ────────────────────────────────────────────────────────────── + +@router.post("/scan/save", response_model=UserRecipeResponse, status_code=201) +async def save_scanned_recipe( + body: ScannedRecipeSaveRequest, + store: Store = Depends(get_store), + session: CloudUser = Depends(get_session), +): + """Save a user-reviewed (possibly edited) scanned recipe. + + The body is the ScannedRecipeResponse (or a user-edited version of it). + Returns the persisted UserRecipe with an assigned ID. + + Tier: Free (saving your own recipe doesn't require vision access). + """ + def _save(): + return store.create_user_recipe( + title=body.title, + subtitle=body.subtitle, + servings=body.servings, + cook_time=body.cook_time, + source_note=body.source_note, + ingredients=[i.model_dump() for i in body.ingredients], + steps=body.steps, + notes=body.notes, + tags=body.tags, + source=body.source, + pantry_match_pct=None, + ) + + row = await asyncio.to_thread(_save) + return _row_to_user_recipe(row) + + +# ── User recipe list / get / delete ─────────────────────────────────────────── + +@router.get("/user", response_model=list[UserRecipeResponse]) +async def list_user_recipes( + store: Store = Depends(get_store), + session: CloudUser = Depends(get_session), +): + """List all user-created recipes (scanned + manually entered), newest first.""" + rows = await asyncio.to_thread(store.list_user_recipes) + return [_row_to_user_recipe(r) for r in rows] + + +@router.get("/user/{recipe_id}", response_model=UserRecipeResponse) +async def get_user_recipe( + recipe_id: int, + store: Store = Depends(get_store), + session: CloudUser = Depends(get_session), +): + """Get a single user recipe by ID.""" + row = await asyncio.to_thread(store.get_user_recipe, recipe_id) + if not row: + raise HTTPException(status_code=404, detail="User recipe not found.") + return _row_to_user_recipe(row) + + +@router.delete("/user/{recipe_id}", status_code=204) +async def delete_user_recipe( + recipe_id: int, + store: Store = Depends(get_store), + session: CloudUser = Depends(get_session), +): + """Delete a user recipe by ID.""" + deleted = await asyncio.to_thread(store.delete_user_recipe, recipe_id) + if not deleted: + raise HTTPException(status_code=404, detail="User recipe not found.") + return JSONResponse(status_code=204, content=None) diff --git a/app/api/routes.py b/app/api/routes.py index a204f84..4e15e59 100644 --- a/app/api/routes.py +++ b/app/api/routes.py @@ -2,6 +2,7 @@ from fastapi import APIRouter from app.api.endpoints import health, receipts, export, inventory, ocr, recipes, settings, staples, feedback, feedback_attach, household, saved_recipes, imitate, meal_plans, orch_usage, session, shopping from app.api.endpoints.community import router as community_router from app.api.endpoints.corrections import router as corrections_router +from app.api.endpoints.recipe_scan import router as recipe_scan_router from app.api.endpoints.recipe_tags import router as recipe_tags_router api_router = APIRouter() @@ -13,6 +14,9 @@ api_router.include_router(ocr.router, prefix="/receipts", tags= api_router.include_router(export.router, tags=["export"]) api_router.include_router(inventory.router, prefix="/inventory", tags=["inventory"]) api_router.include_router(saved_recipes.router, prefix="/recipes/saved", tags=["saved-recipes"]) +# recipe_scan_router registered BEFORE recipes.router so /recipes/scan and /recipes/user +# take priority over /recipes/{recipe_id} (which would otherwise match them as int IDs). +api_router.include_router(recipe_scan_router, prefix="/recipes", tags=["recipe-scan"]) api_router.include_router(recipes.router, prefix="/recipes", tags=["recipes"]) api_router.include_router(settings.router, prefix="/settings", tags=["settings"]) api_router.include_router(staples.router, prefix="/staples", tags=["staples"]) diff --git a/app/db/migrations/041_user_recipes.sql b/app/db/migrations/041_user_recipes.sql new file mode 100644 index 0000000..6f5d781 --- /dev/null +++ b/app/db/migrations/041_user_recipes.sql @@ -0,0 +1,23 @@ +-- Migration 041: user_recipes table for user-scanned and manually-entered recipes. +-- +-- Separate from the food.com corpus (recipes table) -- user recipes are personal, +-- not curated, and need different fields (servings as string, cook_time as string). + +CREATE TABLE IF NOT EXISTS user_recipes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + title TEXT NOT NULL, + subtitle TEXT, + servings TEXT, -- kept as string: "2", "4-6", "serves 8" + cook_time TEXT, -- kept as string: "25 min", "1 hour" + source_note TEXT, -- e.g. "Purple Carrot", "Betty Crocker" + ingredients TEXT NOT NULL DEFAULT '[]', -- JSON: [{name, qty, unit, raw}] + steps TEXT NOT NULL DEFAULT '[]', -- JSON: ["step 1", "step 2", ...] + notes TEXT, + tags TEXT DEFAULT '[]', -- JSON: ["vegan", "quick"] + source TEXT NOT NULL DEFAULT 'manual', -- 'scan' | 'manual' + pantry_match_pct INTEGER, -- 0-100, computed at scan time; null for manual + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX IF NOT EXISTS idx_user_recipes_created ON user_recipes (created_at DESC); diff --git a/app/db/store.py b/app/db/store.py index 85291af..55a2f42 100644 --- a/app/db/store.py +++ b/app/db/store.py @@ -61,6 +61,8 @@ class Store: "style_tags", # meal plan columns "meal_types", + # user_recipes columns + "steps", "tags", # captured_products columns "allergens"): if key in d and isinstance(d[key], str): @@ -1802,3 +1804,54 @@ class Store: confidence, 1 if confirmed_by_user else 0, source, ), ) + + # ── User Recipes (kiwi#9) ────────────────────────────────────────────────── + + def create_user_recipe( + self, + title: str, + ingredients: list[dict], + steps: list[str], + subtitle: str | None = None, + servings: str | None = None, + cook_time: str | None = None, + source_note: str | None = None, + notes: str | None = None, + tags: list[str] | None = None, + source: str = "manual", + pantry_match_pct: int | None = None, + ) -> dict[str, Any]: + return self._insert_returning( + """INSERT INTO user_recipes + (title, subtitle, servings, cook_time, source_note, + ingredients, steps, notes, tags, source, pantry_match_pct) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + RETURNING *""", + ( + title, subtitle, servings, cook_time, source_note, + self._dump(ingredients), + self._dump(steps), + notes, + self._dump(tags or []), + source, + pantry_match_pct, + ), + ) + + def get_user_recipe(self, recipe_id: int) -> dict[str, Any] | None: + return self._fetch_one( + "SELECT * FROM user_recipes WHERE id = ?", + (recipe_id,), + ) + + def list_user_recipes(self) -> list[dict[str, Any]]: + return self._fetch_all( + "SELECT * FROM user_recipes ORDER BY created_at DESC", + ) + + def delete_user_recipe(self, recipe_id: int) -> bool: + cur = self.conn.execute( + "DELETE FROM user_recipes WHERE id = ?", (recipe_id,) + ) + self.conn.commit() + return cur.rowcount > 0 diff --git a/app/models/schemas/recipe_scan.py b/app/models/schemas/recipe_scan.py new file mode 100644 index 0000000..41c5e32 --- /dev/null +++ b/app/models/schemas/recipe_scan.py @@ -0,0 +1,74 @@ +"""Pydantic schemas for the recipe scanner (kiwi#9). + +Scan input → photo(s). +Scan output → ScannedRecipeResponse (for review + editing before save). +Save input → ScannedRecipeSaveRequest. +User recipe output → UserRecipeResponse (after save). +""" +from __future__ import annotations + +from pydantic import BaseModel, Field + + +# ── Ingredient in a scanned recipe ──────────────────────────────────────────── + +class ScannedIngredientSchema(BaseModel): + """One ingredient line extracted from a recipe photo.""" + name: str # normalized generic name ("ranch dressing") + qty: str | None = None # quantity as string, preserving fractions ("1/2", "¼") + unit: str | None = None # unit of measure; null for countable items + raw: str | None = None # verbatim original line from the image + in_pantry: bool = False # True if this ingredient matches something in the pantry + + +# ── Scan response (returned immediately, not persisted) ─────────────────────── + +class ScannedRecipeResponse(BaseModel): + """Structured recipe extracted from photo(s). Returned for user review before save.""" + title: str | None = None + subtitle: str | None = None # e.g. "with Broccoli & Ranch Dressing" + servings: str | None = None # kept as string: "2", "4-6", "serves 8" + cook_time: str | None = None # kept as string: "25 min", "1 hour" + source_note: str | None = None # e.g. "Purple Carrot", "Betty Crocker" + ingredients: list[ScannedIngredientSchema] = Field(default_factory=list) + steps: list[str] = Field(default_factory=list) + notes: str | None = None + tags: list[str] = Field(default_factory=list) + pantry_match_pct: int = 0 # 0-100: percentage of ingredients found in pantry + confidence: str = "medium" # "high" | "medium" | "low" + warnings: list[str] = Field(default_factory=list) + + +# ── Save request ────────────────────────────────────────────────────────────── + +class ScannedRecipeSaveRequest(BaseModel): + """User-reviewed (possibly edited) recipe data to persist as a user recipe.""" + title: str + subtitle: str | None = None + servings: str | None = None + cook_time: str | None = None + source_note: str | None = None + ingredients: list[ScannedIngredientSchema] + steps: list[str] + notes: str | None = None + tags: list[str] = Field(default_factory=list) + source: str = "scan" # "scan" | "manual" + + +# ── User recipe (persisted) ─────────────────────────────────────────────────── + +class UserRecipeResponse(BaseModel): + """A user-created or user-scanned recipe stored in user_recipes table.""" + id: int + title: str + subtitle: str | None = None + servings: str | None = None + cook_time: str | None = None + source_note: str | None = None + ingredients: list[ScannedIngredientSchema] + steps: list[str] + notes: str | None = None + tags: list[str] = Field(default_factory=list) + source: str + pantry_match_pct: int | None = None + created_at: str diff --git a/app/services/recipe/recipe_scanner.py b/app/services/recipe/recipe_scanner.py new file mode 100644 index 0000000..58b3d2a --- /dev/null +++ b/app/services/recipe/recipe_scanner.py @@ -0,0 +1,411 @@ +"""Recipe scanner service (kiwi#9). + +Extracts structured recipe data from one or more photos of recipe cards, +cookbook pages, or handwritten notes. + +Pipeline: + photo(s) -> EXIF correction -> VLM extraction -> JSON parse -> pantry cross-ref + +Vision backend priority (mirrors receipt OCR pattern): + 1. cf-orch vision service (if CF_ORCH_URL set) + 2. Local Qwen2.5-VL (if GPU available) + 3. Anthropic API (BYOK -- if ANTHROPIC_API_KEY set) + +BSL 1.1 -- requires Paid tier or BYOK. +""" +from __future__ import annotations + +import base64 +import io +import json +import logging +import os +import re +from dataclasses import dataclass +from pathlib import Path + +logger = logging.getLogger(__name__) + +# Maximum number of photos per scan call (to limit VLM context / VRAM) +MAX_IMAGES = 4 + +# VLM prompt -- adapted from tests/fixtures/recipe_scan/extract_test.py +_EXTRACTION_PROMPT = """ +You are extracting a recipe from a photograph of a recipe card, cookbook page, or handwritten note. + +If two or more images are provided, treat them as a single recipe across multiple pages +(e.g. ingredients on page 1, directions on page 2). + +Return a single JSON object with these fields: +- title: recipe name (string) +- subtitle: any secondary title or serving suggestion e.g. "with Broccoli & Ranch Dressing" (string or null) +- servings: serving size if shown, as a string e.g. "2", "4-6" (string or null) +- cook_time: total cook time if shown, e.g. "15 min", "1 hour" (string or null) +- source_note: any attribution text like "From Betty Crocker" or "Purple Carrot" (string or null) +- ingredients: array of ingredient objects, each with: + - name: normalized generic ingredient name, lowercase, no quantities, no brand names + (e.g. "Follow Your Heart Vegan Ranch" becomes "ranch dressing") + - qty: quantity as a string, preserving fractions e.g. "1/2", a quarter symbol (string or null) + - unit: unit of measure, null for countable items (e.g. "3 eggs" has unit: null) + - raw: the original ingredient line verbatim, exactly as it appears +- steps: ordered array of instruction strings, one distinct step per element +- notes: any tips, substitutions, storage instructions, or variations (string or null) +- confidence: "high" if text is clear and complete, "medium" if some parts are uncertain, + "low" if mostly handwritten or significantly degraded +- warnings: array of strings describing anything the user should double-check + (e.g. "Directions appear to continue on another page not shown") + +Return only valid JSON. No markdown fences. No explanation outside the JSON. +If the image does not appear to be a recipe at all, return: {"error": "not_a_recipe"} +""".strip() + + +# ── Data types ───────────────────────────────────────────────────────────────── + +@dataclass +class ScannedIngredient: + name: str + qty: str | None = None + unit: str | None = None + raw: str | None = None + in_pantry: bool = False + + +@dataclass +class ScannedRecipeResult: + title: str | None + subtitle: str | None + servings: str | None + cook_time: str | None + source_note: str | None + ingredients: list[ScannedIngredient] + steps: list[str] + notes: str | None + tags: list[str] + pantry_match_pct: int + confidence: str + warnings: list[str] + + +# ── Image helpers ────────────────────────────────────────────────────────────── + +def _load_image_b64(path: Path) -> str: + """Load image, apply EXIF rotation, return base64-encoded JPEG bytes.""" + from PIL import Image, ImageOps + + with open(path, "rb") as f: + raw = f.read() + img = Image.open(io.BytesIO(raw)) + img = ImageOps.exif_transpose(img).convert("RGB") + buf = io.BytesIO() + img.save(buf, format="JPEG", quality=90) + return base64.b64encode(buf.getvalue()).decode() + + +# ── Vision backend ───────────────────────────────────────────────────────────── + +def _call_via_anthropic(image_paths: list[Path], prompt: str) -> str: + """Send image(s) + prompt to Anthropic API. Raises RuntimeError if unavailable.""" + try: + import anthropic + except ImportError as exc: + raise RuntimeError("anthropic package not installed") from exc + + api_key = os.environ.get("ANTHROPIC_API_KEY") + if not api_key: + raise RuntimeError("ANTHROPIC_API_KEY not set") + + client = anthropic.Anthropic(api_key=api_key) + + content: list[dict] = [] + for i, path in enumerate(image_paths): + if i > 0: + content.append({"type": "text", "text": f"(Page {i + 1} of the same recipe:)"}) + content.append({ + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": _load_image_b64(path), + }, + }) + content.append({"type": "text", "text": prompt}) + + msg = client.messages.create( + # Haiku is cost-efficient for well-structured extraction prompts + model="claude-haiku-4-5-20251001", + max_tokens=2048, + messages=[{"role": "user", "content": content}], + ) + return msg.content[0].text.strip() + + +def _call_via_local_vlm(image_paths: list[Path], prompt: str) -> str: + """Send image(s) + prompt to local Qwen2.5-VL. Raises RuntimeError if unavailable.""" + try: + import torch + except ImportError as exc: + raise RuntimeError("torch not installed") from exc + + if not torch.cuda.is_available(): + raise RuntimeError("No CUDA device -- local VLM unavailable") + + # Lazy import so the module loads fast when GPU is absent + from transformers import Qwen2VLForConditionalGeneration, AutoProcessor + from PIL import Image, ImageOps + + model_name = "Qwen/Qwen2.5-VL-7B-Instruct" + logger.info("Loading local VLM for recipe scan: %s", model_name) + + model = Qwen2VLForConditionalGeneration.from_pretrained( + model_name, + torch_dtype=torch.float16, + device_map="auto", + low_cpu_mem_usage=True, + ) + processor = AutoProcessor.from_pretrained(model_name) + model.train(False) # inference mode + + images = [] + for path in image_paths: + with open(path, "rb") as f: + raw = f.read() + img = Image.open(io.BytesIO(raw)) + img = ImageOps.exif_transpose(img).convert("RGB") + images.append(img) + + inputs = processor(images=images, text=prompt, return_tensors="pt") + inputs = {k: v.to("cuda", torch.float16) if isinstance(v, torch.Tensor) else v + for k, v in inputs.items()} + + with torch.no_grad(): + output_ids = model.generate( + **inputs, + max_new_tokens=2048, + do_sample=False, + temperature=0.0, + ) + + output = processor.decode(output_ids[0], skip_special_tokens=True) + output = output.replace(prompt, "").strip() + + # Free VRAM + del model + torch.cuda.empty_cache() + + return output + + +def _call_vision_backend(image_paths: list[Path], prompt: str) -> str: + """Dispatch to the best available vision backend. + + Priority: cf-orch vision -> local Qwen2.5-VL -> Anthropic API. + Raises RuntimeError with a clear message when no backend is available. + """ + errors: list[str] = [] + + # 1. Try cf-orch vision allocation + cf_orch_url = os.environ.get("CF_ORCH_URL") + if cf_orch_url: + try: + from circuitforge_orch.client import CFOrchClient + from app.services.ocr.docuvision_client import DocuvisionClient + + client = CFOrchClient(cf_orch_url) + with client.allocate( + service="cf-vision", + model_candidates=["qwen2.5-vl-7b", "cf-docuvision"], + ttl_s=90.0, + caller="kiwi-recipe-scan", + ) as alloc: + if alloc is not None: + doc_client = DocuvisionClient(alloc.url) + # docuvision takes a single image -- use first image only for now + result = doc_client.extract_text(image_paths[0]) + if result.text: + return result.text + except Exception as exc: + logger.debug("cf-orch vision failed for recipe scan: %s", exc) + errors.append(f"cf-orch: {exc}") + + # 2. Try local Qwen2.5-VL + try: + return _call_via_local_vlm(image_paths, prompt) + except Exception as exc: + logger.debug("Local VLM unavailable for recipe scan: %s", exc) + errors.append(f"local VLM: {exc}") + + # 3. Try Anthropic API (BYOK) + try: + return _call_via_anthropic(image_paths, prompt) + except Exception as exc: + logger.debug("Anthropic API failed for recipe scan: %s", exc) + errors.append(f"Anthropic: {exc}") + + raise RuntimeError( + "No vision backend configured for recipe scanning. " + "Options: cf-orch (CF_ORCH_URL), local GPU, or ANTHROPIC_API_KEY (BYOK). " + f"Errors: {'; '.join(errors)}" + ) + + +# ── Parsing helpers ──────────────────────────────────────────────────────────── + +def _normalize_ingredient_name(name: str) -> str: + """Lowercase + strip whitespace. Preserves multi-word names as-is.""" + return name.lower().strip() + + +def _parse_scanner_json(raw_text: str) -> dict: + """Extract and return the JSON dict from VLM output. + + Handles: + - Pure JSON + - JSON wrapped in ```json ... ``` markdown fences + - JSON preceded by a line of prose ("Here is the recipe: {...}") + + Raises ValueError on not_a_recipe or unparseable output. + """ + text = raw_text.strip() + + # Strip markdown fences if present + if text.startswith("```"): + parts = text.split("```") + for part in parts: + part = part.strip() + if part.startswith("json"): + part = part[4:].strip() + if part.startswith("{"): + text = part + break + + # Try direct parse first + try: + data = json.loads(text) + except json.JSONDecodeError: + # Extract first JSON object embedded in prose + match = re.search(r"\{.*\}", text, re.DOTALL) + if not match: + raise ValueError(f"Could not parse JSON from VLM output: {text[:200]!r}") + try: + data = json.loads(match.group(0)) + except json.JSONDecodeError as exc: + raise ValueError(f"Could not parse JSON from VLM output: {exc}") from exc + + if isinstance(data, dict) and data.get("error") == "not_a_recipe": + raise ValueError("not_a_recipe: image does not appear to contain a recipe") + + return data + + +# ── Pantry cross-reference ───────────────────────────────────────────────────── + +def _cross_reference_pantry( + ingredients: list[ScannedIngredient], + pantry_names: list[str], +) -> tuple[list[ScannedIngredient], int]: + """Mark ingredients found in the pantry and return updated list + match percent. + + Matching is bidirectional by token: + - "broccoli florets" matches pantry item "broccoli" (pantry token in ingredient) + - "pumpkin seeds" matches pantry "pumpkin seeds" (exact) + + Returns (updated_ingredients, pantry_match_pct). + """ + if not ingredients: + return ingredients, 0 + + normalized_pantry = [_normalize_ingredient_name(p) for p in pantry_names] + updated: list[ScannedIngredient] = [] + matched = 0 + + for ingr in ingredients: + norm_ingr = _normalize_ingredient_name(ingr.name) + in_pantry = any( + (p_tok in norm_ingr or norm_ingr in p_tok) + for p in normalized_pantry + for p_tok in p.split() + if len(p_tok) >= 4 # skip short stop-words like "of", "and", "the" + ) + updated.append(ScannedIngredient( + name=ingr.name, + qty=ingr.qty, + unit=ingr.unit, + raw=ingr.raw, + in_pantry=in_pantry, + )) + if in_pantry: + matched += 1 + + pct = round(matched / len(ingredients) * 100) + return updated, pct + + +# ── Main scanner class ───────────────────────────────────────────────────────── + +class RecipeScanner: + """Stateless recipe scanner. One instance can be reused across requests.""" + + def scan( + self, + image_paths: list[Path], + pantry_names: list[str] | None = None, + ) -> ScannedRecipeResult: + """Extract a structured recipe from one or more photos. + + Args: + image_paths: 1-4 image files (phone photos, scans). + pantry_names: Flat list of product names from user's inventory. + Pass [] or None to skip pantry cross-reference. + + Returns: + ScannedRecipeResult with all fields populated. + + Raises: + ValueError: Image is not a recipe, or JSON could not be parsed. + RuntimeError: No vision backend is configured. + """ + if not image_paths: + raise ValueError("At least one image is required") + if len(image_paths) > MAX_IMAGES: + raise ValueError(f"Maximum {MAX_IMAGES} images per scan (got {len(image_paths)})") + + # Call vision backend + raw_text = _call_vision_backend(image_paths, _EXTRACTION_PROMPT) + + # Parse JSON from VLM output + data = _parse_scanner_json(raw_text) + + # Build ingredient list + raw_ingredients = data.get("ingredients") or [] + ingredients: list[ScannedIngredient] = [ + ScannedIngredient( + name=str(item.get("name") or "").strip() or "unknown", + qty=str(item["qty"]) if item.get("qty") is not None else None, + unit=str(item["unit"]) if item.get("unit") is not None else None, + raw=str(item["raw"]) if item.get("raw") is not None else None, + ) + for item in raw_ingredients + if isinstance(item, dict) + ] + + # Pantry cross-reference + ingredients, pct = _cross_reference_pantry( + ingredients, + pantry_names or [], + ) + + return ScannedRecipeResult( + title=data.get("title") or None, + subtitle=data.get("subtitle") or None, + servings=str(data["servings"]) if data.get("servings") is not None else None, + cook_time=str(data["cook_time"]) if data.get("cook_time") is not None else None, + source_note=data.get("source_note") or None, + ingredients=ingredients, + steps=[str(s) for s in (data.get("steps") or []) if s], + notes=data.get("notes") or None, + tags=list(data.get("tags") or []), + pantry_match_pct=pct, + confidence=data.get("confidence") or "medium", + warnings=list(data.get("warnings") or []), + ) diff --git a/app/tiers.py b/app/tiers.py index b07ec82..9cb321c 100644 --- a/app/tiers.py +++ b/app/tiers.py @@ -15,6 +15,7 @@ KIWI_BYOK_UNLOCKABLE: frozenset[str] = frozenset({ "recipe_suggestions", "expiry_llm_matching", "receipt_ocr", + "recipe_scan", "style_classifier", "meal_plan_llm", "meal_plan_llm_timing", @@ -58,6 +59,9 @@ KIWI_FEATURES: dict[str, str] = { "community_publish": "paid", # Publish plans/outcomes to community feed "community_fork_adapt": "paid", # Fork with LLM pantry adaptation (BYOK-unlockable) + # Paid tier (continued) + "recipe_scan": "paid", # BYOK-unlockable: photo -> structured recipe + # Premium tier "multi_household": "premium", "background_monitoring": "premium", diff --git a/frontend/src/components/RecipeScanModal.vue b/frontend/src/components/RecipeScanModal.vue new file mode 100644 index 0000000..77f8b7a --- /dev/null +++ b/frontend/src/components/RecipeScanModal.vue @@ -0,0 +1,844 @@ + + + + + diff --git a/frontend/src/components/RecipesView.vue b/frontend/src/components/RecipesView.vue index f139746..1304fd6 100644 --- a/frontend/src/components/RecipesView.vue +++ b/frontend/src/components/RecipesView.vue @@ -1,21 +1,53 @@