kiwi/tests/fixtures/recipe_scan/extract_test.py
pyr0ball 9c4d8b7883
Some checks are pending
CI / Backend (Python) (push) Waiting to run
CI / Frontend (Vue) (push) Waiting to run
Mirror / mirror (push) Waiting to run
Release / release (push) Waiting to run
feat(recipe-engine): time-effort profile, product-label tokenisation, L1 tuning
- Add TimeEffortProfile + StepAnalysis Pydantic schemas; serialised into
  RecipeSuggestion so the frontend receives active/passive/total minutes,
  effort label, and detected equipment per suggestion.
- parse_time_effort() now drives max_total_min filter (falls back to step-count
  estimate when directions contain no explicit time mentions).
- _PRODUCT_TOKEN_STOPWORDS: strips marketing/packaging words from multi-word
  product labels before adding individual ingredient tokens to pantry_set.
  "Organic Extra Firm Tofu" → adds "tofu"; improves packaged-food pantry match.
- L1 candidate pool raised to 60 (was 20); min_match_ratio lowered to 0.35
  (was 0.60) to keep enough results for plant-based / packaged-food pantries.
- household.py: tighten import to pull HEIMDALL_URL/ADMIN_TOKEN from
  services.heimdall_orch (matches refactor in cloud_session.py).
2026-04-25 21:44:26 -07:00

107 lines
4.1 KiB
Python

#!/usr/bin/env python3
"""
Prompt validation harness for recipe scanner (kiwi#9).
Runs the draft extraction prompt against fixture images using the Anthropic API
directly (bypasses llm.yaml — for prompt dev only, not production path).
Usage:
python extract_test.py <image1.jpg> [image2.jpg]
"""
import base64
import io
import json
import os
import sys
from pathlib import Path
from PIL import Image, ImageOps
import anthropic
PROMPT = """
You are extracting a recipe from a photograph of a recipe card, cookbook page, or handwritten note.
If two images are provided, treat them as a single recipe across two pages (e.g. ingredients on page 1, directions on page 2).
Return a single JSON object with these fields:
- title: recipe name (string)
- subtitle: any secondary title or serving suggestion e.g. "with Broccoli & Ranch Dressing" (string or null)
- servings: serving size if shown, as a string e.g. "2", "4-6" (string or null)
- cook_time: total cook time if shown, e.g. "15 min", "1 hour" (string or null)
- source_note: any attribution text like "From Betty Crocker" or "Purple Carrot" (string or null)
- ingredients: array of ingredient objects, each with:
- name: normalized generic ingredient name, lowercase, no quantities, no brand names
(e.g. "Follow Your Heart® Vegan Ranch""ranch dressing")
- qty: quantity as a string, preserving fractions e.g. "1/2", "¼" (string or null)
- unit: unit of measure, null for countable items (e.g. "3 eggs" → unit: null)
- raw: the original ingredient line verbatim, exactly as it appears
- steps: ordered array of instruction strings, one distinct step per element
- notes: any tips, substitutions, storage instructions, or variations (string or null)
- confidence: "high" if text is clear and complete, "medium" if some parts are uncertain,
"low" if mostly handwritten or significantly degraded
- warnings: array of strings describing anything the user should double-check
(e.g. "Directions appear to continue on another page not shown")
Return only valid JSON. No markdown fences. No explanation outside the JSON.
If the image does not appear to be a recipe at all, return: {"error": "not_a_recipe"}
""".strip()
def load_image_b64(path: Path) -> str:
"""Load image, apply EXIF rotation, return base64-encoded JPEG."""
with open(path, "rb") as f:
img = Image.open(io.BytesIO(f.read()))
img = ImageOps.exif_transpose(img) # fix phone rotation
img = img.convert("RGB")
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=90)
return base64.b64encode(buf.getvalue()).decode()
def extract(image_paths: list[Path]) -> dict:
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
content = []
for i, path in enumerate(image_paths):
if i > 0:
content.append({"type": "text", "text": f"(Page {i + 1} of the same recipe:)"})
content.append({
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": load_image_b64(path),
},
})
content.append({"type": "text", "text": PROMPT})
msg = client.messages.create(
model="claude-opus-4-6", # best vision for prompt dev; production uses VisionRouter
max_tokens=2048,
messages=[{"role": "user", "content": content}],
)
raw = msg.content[0].text.strip()
# Strip markdown fences if the model adds them anyway
if raw.startswith("```"):
raw = raw.split("```")[1]
if raw.startswith("json"):
raw = raw[4:]
return json.loads(raw)
if __name__ == "__main__":
paths = [Path(p) for p in sys.argv[1:]]
if not paths:
print("Usage: python extract_test.py <image1.jpg> [image2.jpg]")
sys.exit(1)
for p in paths:
if not p.exists():
print(f"File not found: {p}")
sys.exit(1)
print(f"Extracting from: {[p.name for p in paths]}")
print("Applying EXIF rotation + sending to claude-opus-4-6...\n")
result = extract(paths)
print(json.dumps(result, indent=2, ensure_ascii=False))