feat(recipe-scan): labeling UI for Kiwi vision training pipeline (closes #65)

- POST /api/recipe-scan/import — bulk ingest from Kiwi scanner pipeline, idempotent by item id - GET /api/recipe-scan/next — oldest-first pending item for review - POST /api/recipe-scan/items/{id}/approve|edit|reject — label actions - GET /api/recipe-scan/stats — counts by status and modality - GET /api/recipe-scan/export — JSONL training pairs (messages chat format, Option B: correction prompt + extracted draft → corrected ground truth) - GET /api/recipe-scan/image — path-traversal-safe image serving from /Library/Assets/kiwi/ - SQLite at data/recipe_scan.db with WAL mode; separate from corpus.db lifecycle - set_db_path() testability seam; 18 tests, all passing - RecipeScanView.vue: two-column review UI (image left, JSON diff right), keyboard shortcuts A/E/R, toast feedback, stats header, export download - Route /data/recipe-scan and sidebar nav entry added
2026-05-17 12:22:15 -07:00 · 2026-05-17 12:22:15 -07:00 · 391ebb3cd1
commit 391ebb3cd1
parent 9bb88b168f
6 changed files with 1081 additions and 0 deletions
--- a/app/api.py
+++ b/app/api.py
@ -70,6 +70,9 @@ def finetune_cancel_compat() -> dict:
 from app.data.log_corpus import router as log_corpus_router
 app.include_router(log_corpus_router, prefix="/api/corpus")

+from app.data.recipe_scan import router as recipe_scan_router
+app.include_router(recipe_scan_router, prefix="/api/recipe-scan")
+
 from app.dashboard import router as dashboard_router
 app.include_router(dashboard_router, prefix="/api")

--- a/app/data/recipe_scan.py
+++ b/app/data/recipe_scan.py
@ -0,0 +1,313 @@
+"""Avocet — Recipe scan labeling API (avocet#65).
+
+Receives recipe scan items from the Kiwi pipeline (scanner/phone image +
+docuvision OCR extraction + ground-truth structured recipe), presents them
+for human review, and exports approved/edited pairs in the messages chat
+format for the vision fine-tune harness.
+
+DB: data/recipe_scan.db (separate from corpus.db — different lifecycle)
+No auth required — local admin tool, not a push endpoint.
+
+All endpoints registered on `router`. api.py includes this with
+prefix="/api/recipe-scan".
+"""
+from __future__ import annotations
+
+import json
+import logging
+import sqlite3
+import uuid
+from contextlib import contextmanager
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Generator, Literal
+
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel, field_validator
+
+logger = logging.getLogger(__name__)
+
+_ROOT = Path(__file__).parent.parent.parent
+_DB_PATH: Path = _ROOT / "data" / "recipe_scan.db"
+
+_VALID_MODALITIES = {"scanner", "phone", "handwritten"}
+_VALID_STATUSES = {"pending", "approved", "edited", "rejected"}
+
+_SCHEMA = """
+CREATE TABLE IF NOT EXISTS recipe_scan_items (
+    id              TEXT PRIMARY KEY,
+    image_path      TEXT NOT NULL,
+    modality        TEXT NOT NULL DEFAULT 'scanner',
+    source          TEXT NOT NULL DEFAULT 'purple_carrot',
+    extracted       TEXT NOT NULL,
+    ground_truth    TEXT NOT NULL,
+    status          TEXT NOT NULL DEFAULT 'pending',
+    corrected       TEXT,
+    labeled_at      TEXT,
+    rejected_reason TEXT
+);
+CREATE INDEX IF NOT EXISTS idx_rsi_status   ON recipe_scan_items(status);
+CREATE INDEX IF NOT EXISTS idx_rsi_modality ON recipe_scan_items(modality);
+"""
+
+router = APIRouter()
+
+
+# ── Testability seam ──────────────────────────────────────────────────────────
+
+def set_db_path(path: Path) -> None:
+    global _DB_PATH
+    _DB_PATH = path
+
+
+# ── Internal helpers ──────────────────────────────────────────────────────────
+
+@contextmanager
+def _db() -> Generator[sqlite3.Connection, None, None]:
+    conn = sqlite3.connect(str(_DB_PATH))
+    conn.row_factory = sqlite3.Row
+    conn.execute("PRAGMA journal_mode=WAL")
+    try:
+        yield conn
+        conn.commit()
+    except Exception:
+        conn.rollback()
+        raise
+    finally:
+        conn.close()
+
+
+def _init_db() -> None:
+    with _db() as conn:
+        conn.executescript(_SCHEMA)
+
+
+def _now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+def _build_training_pair(row: sqlite3.Row) -> dict:
+    """Build a messages-format training pair from a labeled row.
+
+    user message: correction prompt + the docuvision-extracted JSON draft.
+    Trains the model to review and correct an existing extraction, which is
+    more data-efficient than producing from scratch when OCR is usually close.
+
+    assistant message: the approved ground truth (or human-corrected JSON).
+    """
+    target_str = row["corrected"] if row["corrected"] else row["ground_truth"]
+    extracted = json.loads(row["extracted"])
+    target = json.loads(target_str)
+    user_content = (
+        "Review and correct this recipe extraction. "
+        "Return valid JSON with fields: title, description, ingredients, steps, "
+        "prep_time, cook_time, servings.\n\n"
+        f"Extraction to review:\n{json.dumps(extracted, ensure_ascii=False, indent=2)}"
+    )
+    return {
+        "id": row["id"],
+        "modality": row["modality"],
+        "source": row["source"],
+        "image_path": row["image_path"],
+        "messages": [
+            {"role": "user", "content": user_content},
+            {"role": "assistant", "content": json.dumps(target, ensure_ascii=False)},
+        ],
+    }
+
+
+_init_db()
+
+
+# ── POST /import ───────────────────────────────────────────────────────────────
+
+class ImportItem(BaseModel):
+    id: str = ""
+    image_path: str
+    modality: Literal["scanner", "phone", "handwritten"] = "scanner"
+    source: str = "purple_carrot"
+    extracted: dict
+    ground_truth: dict
+
+    @field_validator("id", mode="before")
+    @classmethod
+    def default_id(cls, v: str) -> str:
+        return v or str(uuid.uuid4())
+
+
+class ImportRequest(BaseModel):
+    items: list[ImportItem]
+
+
+@router.post("/import")
+def import_items(body: ImportRequest) -> dict:
+    """Bulk-import scan items from the Kiwi pipeline. Idempotent by item id."""
+    stored = 0
+    with _db() as conn:
+        for item in body.items:
+            result = conn.execute(
+                "INSERT OR IGNORE INTO recipe_scan_items "
+                "(id, image_path, modality, source, extracted, ground_truth) "
+                "VALUES (?, ?, ?, ?, ?, ?)",
+                (item.id, item.image_path, item.modality, item.source,
+                 json.dumps(item.extracted), json.dumps(item.ground_truth)),
+            )
+            stored += result.rowcount
+    return {"imported": stored, "total_submitted": len(body.items)}
+
+
+# ── GET /next ─────────────────────────────────────────────────────────────────
+
+@router.get("/next")
+def get_next() -> dict:
+    """Return the next pending item for review, oldest-first."""
+    with _db() as conn:
+        row = conn.execute(
+            "SELECT * FROM recipe_scan_items WHERE status = 'pending' ORDER BY rowid LIMIT 1"
+        ).fetchone()
+    if row is None:
+        raise HTTPException(404, "No pending items in queue")
+    return {
+        **dict(row),
+        "extracted": json.loads(row["extracted"]),
+        "ground_truth": json.loads(row["ground_truth"]),
+    }
+
+
+# ── POST /items/{id}/approve ──────────────────────────────────────────────────
+
+@router.post("/items/{item_id}/approve")
+def approve_item(item_id: str) -> dict:
+    """Mark item as approved — extracted JSON is close enough to ground truth."""
+    with _db() as conn:
+        row = conn.execute("SELECT id FROM recipe_scan_items WHERE id = ?", (item_id,)).fetchone()
+        if row is None:
+            raise HTTPException(404, "Item not found")
+        conn.execute(
+            "UPDATE recipe_scan_items SET status='approved', labeled_at=? WHERE id=?",
+            (_now_iso(), item_id),
+        )
+    return {"status": "approved", "id": item_id}
+
+
+# ── POST /items/{id}/edit ─────────────────────────────────────────────────────
+
+class EditBody(BaseModel):
+    corrected: dict
+
+
+@router.post("/items/{item_id}/edit")
+def edit_item(item_id: str, body: EditBody) -> dict:
+    """Approve with a human-corrected JSON. corrected overrides extracted in export."""
+    with _db() as conn:
+        row = conn.execute("SELECT id FROM recipe_scan_items WHERE id = ?", (item_id,)).fetchone()
+        if row is None:
+            raise HTTPException(404, "Item not found")
+        conn.execute(
+            "UPDATE recipe_scan_items SET status='edited', corrected=?, labeled_at=? WHERE id=?",
+            (json.dumps(body.corrected), _now_iso(), item_id),
+        )
+    return {"status": "edited", "id": item_id}
+
+
+# ── POST /items/{id}/reject ───────────────────────────────────────────────────
+
+class RejectBody(BaseModel):
+    reason: str = ""
+
+
+@router.post("/items/{item_id}/reject")
+def reject_item(item_id: str, body: RejectBody = RejectBody()) -> dict:
+    """Reject item — extraction too broken to use for training."""
+    with _db() as conn:
+        row = conn.execute("SELECT id FROM recipe_scan_items WHERE id = ?", (item_id,)).fetchone()
+        if row is None:
+            raise HTTPException(404, "Item not found")
+        conn.execute(
+            "UPDATE recipe_scan_items SET status='rejected', rejected_reason=?, labeled_at=? WHERE id=?",
+            (body.reason or None, _now_iso(), item_id),
+        )
+    return {"status": "rejected", "id": item_id}
+
+
+# ── GET /stats ────────────────────────────────────────────────────────────────
+
+@router.get("/stats")
+def get_stats() -> dict:
+    with _db() as conn:
+        total = conn.execute("SELECT COUNT(*) FROM recipe_scan_items").fetchone()[0]
+        by_status = {
+            r["status"]: r["cnt"]
+            for r in conn.execute(
+                "SELECT status, COUNT(*) AS cnt FROM recipe_scan_items GROUP BY status"
+            ).fetchall()
+        }
+        by_modality = {
+            r["modality"]: r["cnt"]
+            for r in conn.execute(
+                "SELECT modality, COUNT(*) AS cnt FROM recipe_scan_items GROUP BY modality"
+            ).fetchall()
+        }
+        export_ready = conn.execute(
+            "SELECT COUNT(*) FROM recipe_scan_items WHERE status IN ('approved', 'edited')"
+        ).fetchone()[0]
+    return {
+        "total": total,
+        "by_status": by_status,
+        "by_modality": by_modality,
+        "export_ready": export_ready,
+    }
+
+
+# ── GET /export ───────────────────────────────────────────────────────────────
+
+@router.get("/export")
+def export_pairs() -> StreamingResponse:
+    """Stream approved/edited items as JSONL training pairs (messages format)."""
+    with _db() as conn:
+        rows = conn.execute(
+            "SELECT * FROM recipe_scan_items WHERE status IN ('approved', 'edited') ORDER BY rowid"
+        ).fetchall()
+
+    def _generate():
+        for row in rows:
+            yield json.dumps(_build_training_pair(row), ensure_ascii=False) + "\n"
+
+    return StreamingResponse(
+        _generate(),
+        media_type="application/x-ndjson",
+        headers={"Content-Disposition": "attachment; filename=recipe_scan_pairs.jsonl"},
+    )
+
+
+# ── GET /image ────────────────────────────────────────────────────────────────
+
+_IMAGE_ROOT = Path("/Library/Assets/kiwi")
+
+
+@router.get("/image")
+def serve_image(path: str) -> StreamingResponse:
+    """Serve a scan image from /Library/Assets/kiwi/.
+
+    path must resolve within /Library/Assets/kiwi/ — rejects traversal attempts.
+    """
+    try:
+        resolved = Path(path).resolve()
+        _IMAGE_ROOT.resolve()  # ensure root itself is valid
+        resolved.relative_to(_IMAGE_ROOT.resolve())
+    except (ValueError, OSError):
+        raise HTTPException(403, "Path outside allowed image directory")
+
+    if not resolved.exists():
+        raise HTTPException(404, "Image not found")
+
+    suffix = resolved.suffix.lower()
+    media_types = {".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".png": "image/png", ".webp": "image/webp"}
+    media_type = media_types.get(suffix, "application/octet-stream")
+
+    return StreamingResponse(
+        open(resolved, "rb"),
+        media_type=media_type,
+        headers={"Cache-Control": "public, max-age=86400"},
+    )
--- a/tests/test_recipe_scan.py
+++ b/tests/test_recipe_scan.py
@ -0,0 +1,227 @@
+"""Tests for app/data/recipe_scan.py — recipe scan labeling endpoints."""
+from __future__ import annotations
+
+import json
+import uuid
+from pathlib import Path
+
+import pytest
+from fastapi.testclient import TestClient
+
+from app.data import recipe_scan as rs
+
+
+EXTRACTED = {"title": "Shepherd's Pie", "ingredients": ["lamb", "potato"], "steps": ["brown meat", "mash potato"]}
+GROUND_TRUTH = {"title": "Shepherd's Pie", "ingredients": ["ground lamb", "mashed potato", "peas"], "steps": ["brown meat", "add veg", "mash potato", "bake"]}
+
+
+@pytest.fixture(autouse=True)
+def isolated_db(tmp_path, monkeypatch):
+    monkeypatch.setattr(rs, "_DB_PATH", tmp_path / "recipe_scan.db")
+    rs._init_db()
+
+
+@pytest.fixture()
+def client():
+    from fastapi import FastAPI
+    app = FastAPI()
+    app.include_router(rs.router, prefix="/api/recipe-scan")
+    return TestClient(app)
+
+
+def _item(**kwargs) -> dict:
+    return {
+        "id": str(uuid.uuid4()),
+        "image_path": "/Library/Assets/kiwi/scans/pc_test.jpg",
+        "modality": kwargs.get("modality", "scanner"),
+        "source": kwargs.get("source", "purple_carrot"),
+        "extracted": kwargs.get("extracted", EXTRACTED),
+        "ground_truth": kwargs.get("ground_truth", GROUND_TRUTH),
+    }
+
+
+def _import(client, items: list[dict]) -> None:
+    resp = client.post("/api/recipe-scan/import", json={"items": items})
+    assert resp.status_code == 200
+
+
+# ── Import ─────────────────────────────────────────────────────────────────────
+
+def test_import_stores_items(client):
+    _import(client, [_item()])
+    stats = client.get("/api/recipe-scan/stats").json()
+    assert stats["total"] == 1
+    assert stats["by_status"]["pending"] == 1
+
+
+def test_import_rejects_unknown_modality(client):
+    bad = _item()
+    bad["modality"] = "telepathy"
+    resp = client.post("/api/recipe-scan/import", json={"items": [bad]})
+    assert resp.status_code == 422
+
+
+def test_import_is_idempotent(client):
+    item = _item()
+    _import(client, [item])
+    _import(client, [item])  # same id — should not duplicate
+    stats = client.get("/api/recipe-scan/stats").json()
+    assert stats["total"] == 1
+
+
+def test_import_multiple_items(client):
+    _import(client, [_item(), _item(), _item()])
+    assert client.get("/api/recipe-scan/stats").json()["total"] == 3
+
+
+# ── Next ───────────────────────────────────────────────────────────────────────
+
+def test_next_returns_404_when_queue_empty(client):
+    resp = client.get("/api/recipe-scan/next")
+    assert resp.status_code == 404
+
+
+def test_next_returns_pending_item(client):
+    item = _item()
+    _import(client, [item])
+    resp = client.get("/api/recipe-scan/next")
+    assert resp.status_code == 200
+    data = resp.json()
+    assert data["id"] == item["id"]
+    assert data["status"] == "pending"
+    assert "extracted" in data
+    assert "ground_truth" in data
+
+
+def test_next_skips_non_pending(client):
+    item = _item()
+    _import(client, [item])
+    client.post(f"/api/recipe-scan/items/{item['id']}/reject")
+    resp = client.get("/api/recipe-scan/next")
+    assert resp.status_code == 404
+
+
+# ── Approve ────────────────────────────────────────────────────────────────────
+
+def test_approve_marks_item_approved(client):
+    item = _item()
+    _import(client, [item])
+    resp = client.post(f"/api/recipe-scan/items/{item['id']}/approve")
+    assert resp.status_code == 200
+    assert resp.json()["status"] == "approved"
+    stats = client.get("/api/recipe-scan/stats").json()
+    assert stats["by_status"]["approved"] == 1
+
+
+def test_approve_returns_404_for_unknown_id(client):
+    resp = client.post("/api/recipe-scan/items/no-such-id/approve")
+    assert resp.status_code == 404
+
+
+# ── Edit ───────────────────────────────────────────────────────────────────────
+
+def test_edit_stores_corrected_json(client):
+    item = _item()
+    _import(client, [item])
+    corrected = {**GROUND_TRUTH, "servings": 4}
+    resp = client.post(
+        f"/api/recipe-scan/items/{item['id']}/edit",
+        json={"corrected": corrected},
+    )
+    assert resp.status_code == 200
+    assert resp.json()["status"] == "edited"
+    stats = client.get("/api/recipe-scan/stats").json()
+    assert stats["by_status"]["edited"] == 1
+
+
+def test_edit_requires_corrected_field(client):
+    item = _item()
+    _import(client, [item])
+    resp = client.post(f"/api/recipe-scan/items/{item['id']}/edit", json={})
+    assert resp.status_code == 422
+
+
+# ── Reject ─────────────────────────────────────────────────────────────────────
+
+def test_reject_marks_item_rejected(client):
+    item = _item()
+    _import(client, [item])
+    resp = client.post(
+        f"/api/recipe-scan/items/{item['id']}/reject",
+        json={"reason": "OCR completely unreadable"},
+    )
+    assert resp.status_code == 200
+    assert resp.json()["status"] == "rejected"
+
+
+def test_reject_without_reason_is_valid(client):
+    item = _item()
+    _import(client, [item])
+    resp = client.post(f"/api/recipe-scan/items/{item['id']}/reject")
+    assert resp.status_code == 200
+
+
+# ── Export ─────────────────────────────────────────────────────────────────────
+
+def test_export_empty_when_nothing_approved(client):
+    item = _item()
+    _import(client, [item])
+    resp = client.get("/api/recipe-scan/export")
+    assert resp.status_code == 200
+    assert resp.text.strip() == ""
+
+
+def test_export_includes_approved_item(client):
+    item = _item()
+    _import(client, [item])
+    client.post(f"/api/recipe-scan/items/{item['id']}/approve")
+    resp = client.get("/api/recipe-scan/export")
+    lines = [l for l in resp.text.strip().splitlines() if l]
+    assert len(lines) == 1
+    pair = json.loads(lines[0])
+    assert pair["id"] == item["id"]
+    assert pair["modality"] == "scanner"
+    assert "messages" in pair
+    assert len(pair["messages"]) == 2
+    assert pair["messages"][0]["role"] == "user"
+    assert pair["messages"][1]["role"] == "assistant"
+
+
+def test_export_includes_edited_item_with_correction(client):
+    item = _item()
+    _import(client, [item])
+    corrected = {**GROUND_TRUTH, "servings": 4}
+    client.post(
+        f"/api/recipe-scan/items/{item['id']}/edit",
+        json={"corrected": corrected},
+    )
+    resp = client.get("/api/recipe-scan/export")
+    lines = [l for l in resp.text.strip().splitlines() if l]
+    pair = json.loads(lines[0])
+    assistant_content = json.loads(pair["messages"][1]["content"])
+    assert assistant_content["servings"] == 4
+
+
+def test_export_excludes_rejected_items(client):
+    item = _item()
+    _import(client, [item])
+    client.post(f"/api/recipe-scan/items/{item['id']}/reject")
+    resp = client.get("/api/recipe-scan/export")
+    assert resp.text.strip() == ""
+
+
+# ── Stats ──────────────────────────────────────────────────────────────────────
+
+def test_stats_counts_all_statuses(client):
+    items = [_item(), _item(), _item(), _item()]
+    _import(client, items)
+    client.post(f"/api/recipe-scan/items/{items[0]['id']}/approve")
+    client.post(f"/api/recipe-scan/items/{items[1]['id']}/edit", json={"corrected": GROUND_TRUTH})
+    client.post(f"/api/recipe-scan/items/{items[2]['id']}/reject")
+    stats = client.get("/api/recipe-scan/stats").json()
+    assert stats["total"] == 4
+    assert stats["by_status"]["pending"] == 1
+    assert stats["by_status"]["approved"] == 1
+    assert stats["by_status"]["edited"] == 1
+    assert stats["by_status"]["rejected"] == 1
+    assert stats["export_ready"] == 2  # approved + edited
--- a/web/src/components/AppSidebar.vue
+++ b/web/src/components/AppSidebar.vue
@ -220,6 +220,7 @@ const dataItems: NavItem[] = [
  { path: '/data/fetch',       icon: '📬',  label: 'Fetch'       },
  { path: '/data/corrections', icon: '✏️',  label: 'Corrections' },
  { path: '/data/imitate',     icon: '🪞',  label: 'Imitate'     },
+  { path: '/data/recipe-scan', icon: '📷',  label: 'Recipe Scan' },
 ]

 const evalItems: NavItem[] = [
--- a/web/src/router/index.ts
+++ b/web/src/router/index.ts
@ -26,6 +26,7 @@ export const routes = [
  { path: '/data/fetch',       component: FetchView,       meta: { title: 'Fetch'       } },
  { path: '/data/corrections', component: CorrectionsView, meta: { title: 'Corrections' } },
  { path: '/data/imitate',     component: ImitateView,     meta: { title: 'Imitate'     } },
+  { path: '/data/recipe-scan', component: () => import('../views/RecipeScanView.vue'), meta: { title: 'Recipe Scan' } },

  // ── Eval domain ──────────────────────────────────────────
  { path: '/eval/benchmark', component: BenchmarkView, meta: { title: 'Benchmark' } },
--- a/web/src/views/RecipeScanView.vue
+++ b/web/src/views/RecipeScanView.vue
@ -0,0 +1,536 @@
+<template>
+  <div class="rsv">
+    <!-- Header -->
+    <header class="rsv-header">
+      <h1 class="rsv-title">Recipe Scan Review</h1>
+      <div class="rsv-stats" v-if="stats">
+        <span class="stat-chip">{{ stats.by_status?.pending ?? 0 }} pending</span>
+        <span class="stat-chip stat-chip--ok">{{ stats.by_status?.approved ?? 0 }} approved</span>
+        <span class="stat-chip stat-chip--edited">{{ stats.by_status?.edited ?? 0 }} edited</span>
+        <span class="stat-chip stat-chip--bad">{{ stats.by_status?.rejected ?? 0 }} rejected</span>
+        <a
+          v-if="(stats.export_ready ?? 0) > 0"
+          :href="`${apiBase}/api/recipe-scan/export`"
+          download
+          class="btn-export"
+        >
+          ⬇ Export {{ stats.export_ready }} pairs
+        </a>
+      </div>
+    </header>
+
+    <!-- Loading -->
+    <div v-if="loading" class="rsv-state" aria-label="Loading">
+      <div class="skeleton-block" />
+    </div>
+
+    <!-- Error -->
+    <div v-else-if="apiError" class="rsv-state rsv-error" role="alert">
+      <p>{{ apiError }}</p>
+      <button class="btn-action" @click="fetchNext">Retry</button>
+    </div>
+
+    <!-- Queue empty -->
+    <div v-else-if="!item" class="rsv-state rsv-empty">
+      <p>Queue is empty — all items reviewed.</p>
+      <p class="rsv-hint">Import items from the Kiwi pipeline to continue.</p>
+    </div>
+
+    <!-- Review panel -->
+    <div v-else class="rsv-workspace">
+      <!-- Left: image -->
+      <section class="rsv-image-panel" aria-label="Scan image">
+        <div class="rsv-panel-label">
+          <span class="modality-badge">{{ item.modality }}</span>
+          <span class="source-badge">{{ item.source }}</span>
+        </div>
+        <div class="rsv-image-wrap">
+          <img
+            v-if="imageUrl"
+            :src="imageUrl"
+            :alt="`Recipe scan — ${item.source}`"
+            class="rsv-image"
+          />
+          <div v-else class="rsv-image-placeholder">
+            <span>Image not available</span>
+            <code class="rsv-path">{{ item.image_path }}</code>
+          </div>
+        </div>
+      </section>
+
+      <!-- Right: JSON comparison -->
+      <section class="rsv-json-panel" aria-label="Extraction review">
+
+        <!-- Ground truth (read-only reference) -->
+        <div class="rsv-json-block">
+          <h2 class="rsv-json-label">Ground truth <span class="label-tag">reference</span></h2>
+          <pre class="rsv-json rsv-json--ground-truth" tabindex="0" aria-label="Ground truth JSON">{{ prettyJson(item.ground_truth) }}</pre>
+        </div>
+
+        <!-- Extracted / editable -->
+        <div class="rsv-json-block">
+          <h2 class="rsv-json-label">
+            Extracted
+            <span class="label-tag label-tag--edit">edit before approving</span>
+          </h2>
+          <textarea
+            v-model="draftJson"
+            class="rsv-json rsv-json--edit"
+            spellcheck="false"
+            aria-label="Extracted JSON — edit to correct"
+            :class="{ 'rsv-json--invalid': jsonError }"
+          />
+          <p v-if="jsonError" class="rsv-json-error" role="alert">{{ jsonError }}</p>
+        </div>
+
+        <!-- Actions -->
+        <div class="rsv-actions" role="group" aria-label="Review actions">
+          <button
+            class="btn-approve"
+            :disabled="acting"
+            @click="handleApprove"
+            title="Extracted JSON is accurate — approve as-is (A)"
+          >
+            ✓ Approve
+          </button>
+          <button
+            class="btn-edit"
+            :disabled="acting || !!jsonError"
+            @click="handleEdit"
+            title="Approve the edited JSON in the text area (E)"
+          >
+            ✎ Approve edited
+          </button>
+          <button
+            class="btn-reject"
+            :disabled="acting"
+            @click="handleReject"
+            title="Extraction too broken to use — reject (R)"
+          >
+            ✕ Reject
+          </button>
+        </div>
+
+      </section>
+    </div>
+
+    <!-- Feedback toast -->
+    <Transition name="toast">
+      <div v-if="toast" class="rsv-toast" role="status" aria-live="polite">
+        {{ toast }}
+      </div>
+    </Transition>
+  </div>
+</template>
+
+<script setup lang="ts">
+import { ref, computed, watch, onMounted, onUnmounted } from 'vue'
+
+const apiBase = window.location.origin
+
+interface RecipeScanItem {
+  id: string
+  image_path: string
+  modality: string
+  source: string
+  extracted: Record<string, unknown>
+  ground_truth: Record<string, unknown>
+  status: string
+}
+
+interface Stats {
+  total: number
+  by_status: Record<string, number>
+  by_modality: Record<string, number>
+  export_ready: number
+}
+
+const item    = ref<RecipeScanItem | null>(null)
+const stats   = ref<Stats | null>(null)
+const loading = ref(true)
+const acting  = ref(false)
+const apiError = ref('')
+const draftJson = ref('')
+const toast   = ref('')
+let toastTimer: ReturnType<typeof setTimeout> | null = null
+
+const jsonError = computed(() => {
+  if (!draftJson.value.trim()) return ''
+  try {
+    JSON.parse(draftJson.value)
+    return ''
+  } catch (e) {
+    return 'Invalid JSON — fix before approving'
+  }
+})
+
+const imageUrl = computed(() => {
+  if (!item.value) return ''
+  const encoded = encodeURIComponent(item.value.image_path)
+  return `${apiBase}/api/recipe-scan/image?path=${encoded}`
+})
+
+function prettyJson(obj: unknown): string {
+  return JSON.stringify(obj, null, 2)
+}
+
+function showToast(msg: string) {
+  toast.value = msg
+  if (toastTimer) clearTimeout(toastTimer)
+  toastTimer = setTimeout(() => { toast.value = '' }, 2500)
+}
+
+async function fetchNext() {
+  loading.value = true
+  apiError.value = ''
+  try {
+    const r = await fetch(`${apiBase}/api/recipe-scan/next`)
+    if (r.status === 404) {
+      item.value = null
+    } else if (!r.ok) {
+      throw new Error(`API error ${r.status}`)
+    } else {
+      item.value = await r.json()
+      draftJson.value = prettyJson(item.value!.extracted)
+    }
+  } catch (e) {
+    apiError.value = e instanceof Error ? e.message : 'Could not reach API'
+  } finally {
+    loading.value = false
+  }
+}
+
+async function fetchStats() {
+  try {
+    const r = await fetch(`${apiBase}/api/recipe-scan/stats`)
+    if (r.ok) stats.value = await r.json()
+  } catch { /* non-critical */ }
+}
+
+async function act(endpoint: string, body?: unknown) {
+  if (!item.value || acting.value) return
+  acting.value = true
+  try {
+    const r = await fetch(`${apiBase}/api/recipe-scan/items/${item.value.id}/${endpoint}`, {
+      method: 'POST',
+      headers: body ? { 'Content-Type': 'application/json' } : {},
+      body: body ? JSON.stringify(body) : undefined,
+    })
+    if (!r.ok) throw new Error(`API error ${r.status}`)
+  } catch (e) {
+    showToast(e instanceof Error ? e.message : 'Action failed')
+    acting.value = false
+    return
+  }
+  acting.value = false
+  await Promise.all([fetchNext(), fetchStats()])
+}
+
+async function handleApprove() {
+  showToast('Approved')
+  await act('approve')
+}
+
+async function handleEdit() {
+  if (jsonError.value) return
+  let corrected: unknown
+  try {
+    corrected = JSON.parse(draftJson.value)
+  } catch {
+    return
+  }
+  showToast('Saved edit')
+  await act('edit', { corrected })
+}
+
+async function handleReject() {
+  showToast('Rejected')
+  await act('reject')
+}
+
+// Keyboard shortcuts: A = approve, E = edit+approve, R = reject
+function handleKey(e: KeyboardEvent) {
+  const tag = (e.target as HTMLElement)?.tagName?.toLowerCase()
+  if (tag === 'textarea' || tag === 'input') return
+  if (e.key === 'a' || e.key === 'A') handleApprove()
+  if (e.key === 'e' || e.key === 'E') handleEdit()
+  if (e.key === 'r' || e.key === 'R') handleReject()
+}
+
+watch(item, (newItem) => {
+  if (newItem) draftJson.value = prettyJson(newItem.extracted)
+})
+
+onMounted(() => {
+  fetchNext()
+  fetchStats()
+  window.addEventListener('keydown', handleKey)
+})
+
+onUnmounted(() => {
+  window.removeEventListener('keydown', handleKey)
+  if (toastTimer) clearTimeout(toastTimer)
+})
+</script>
+
+<style scoped>
+.rsv {
+  display: flex;
+  flex-direction: column;
+  height: 100%;
+  padding: var(--space-md, 1rem);
+  gap: var(--space-md, 1rem);
+  box-sizing: border-box;
+  overflow: hidden;
+}
+
+/* Header */
+.rsv-header {
+  display: flex;
+  align-items: center;
+  gap: var(--space-md, 1rem);
+  flex-wrap: wrap;
+}
+.rsv-title {
+  font-size: 1.1rem;
+  font-weight: 600;
+  margin: 0;
+  color: var(--color-text, #fff);
+}
+.rsv-stats {
+  display: flex;
+  align-items: center;
+  gap: 0.5rem;
+  flex-wrap: wrap;
+}
+.stat-chip {
+  font-size: 0.75rem;
+  padding: 2px 8px;
+  border-radius: 12px;
+  background: var(--color-surface-alt, #2a2a2a);
+  color: var(--color-text-muted, #aaa);
+}
+.stat-chip--ok      { background: #1a3a1a; color: #6fcf97; }
+.stat-chip--edited  { background: #2a2a00; color: #f2c94c; }
+.stat-chip--bad     { background: #3a1a1a; color: #eb5757; }
+.btn-export {
+  font-size: 0.8rem;
+  padding: 4px 12px;
+  border-radius: 6px;
+  background: var(--color-accent, #4a9eff);
+  color: #fff;
+  text-decoration: none;
+}
+
+/* State panels */
+.rsv-state {
+  flex: 1;
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  justify-content: center;
+  gap: 0.5rem;
+  color: var(--color-text-muted, #aaa);
+}
+.rsv-error  { color: var(--color-danger, #eb5757); }
+.rsv-empty  { font-size: 1rem; }
+.rsv-hint   { font-size: 0.85rem; opacity: 0.7; margin: 0; }
+.skeleton-block {
+  width: 100%; height: 300px;
+  border-radius: 8px;
+  background: var(--color-surface-alt, #2a2a2a);
+  animation: pulse 1.5s ease-in-out infinite;
+}
+@keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.5; } }
+
+/* Workspace: two-column layout */
+.rsv-workspace {
+  flex: 1;
+  display: grid;
+  grid-template-columns: 1fr 1fr;
+  gap: var(--space-md, 1rem);
+  min-height: 0;
+  overflow: hidden;
+}
+@media (max-width: 900px) {
+  .rsv-workspace {
+    grid-template-columns: 1fr;
+    overflow-y: auto;
+  }
+}
+
+/* Image panel */
+.rsv-image-panel {
+  display: flex;
+  flex-direction: column;
+  gap: 0.5rem;
+  min-height: 0;
+}
+.rsv-panel-label {
+  display: flex;
+  gap: 0.5rem;
+}
+.modality-badge, .source-badge {
+  font-size: 0.72rem;
+  padding: 2px 8px;
+  border-radius: 10px;
+  background: var(--color-surface-alt, #2a2a2a);
+  color: var(--color-text-muted, #aaa);
+  text-transform: uppercase;
+  letter-spacing: 0.04em;
+}
+.rsv-image-wrap {
+  flex: 1;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  background: var(--color-surface-alt, #111);
+  border-radius: 8px;
+  overflow: hidden;
+  min-height: 200px;
+}
+.rsv-image {
+  max-width: 100%;
+  max-height: 100%;
+  object-fit: contain;
+}
+.rsv-image-placeholder {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  gap: 0.5rem;
+  color: var(--color-text-muted, #666);
+  font-size: 0.85rem;
+  padding: 1rem;
+  text-align: center;
+}
+.rsv-path {
+  font-size: 0.7rem;
+  word-break: break-all;
+  opacity: 0.6;
+}
+
+/* JSON panel */
+.rsv-json-panel {
+  display: flex;
+  flex-direction: column;
+  gap: 0.5rem;
+  min-height: 0;
+  overflow-y: auto;
+}
+.rsv-json-block {
+  display: flex;
+  flex-direction: column;
+  gap: 0.25rem;
+  flex: 1;
+  min-height: 0;
+}
+.rsv-json-label {
+  font-size: 0.8rem;
+  font-weight: 600;
+  color: var(--color-text-muted, #aaa);
+  margin: 0;
+  display: flex;
+  align-items: center;
+  gap: 0.5rem;
+}
+.label-tag {
+  font-size: 0.68rem;
+  font-weight: 400;
+  padding: 1px 6px;
+  border-radius: 8px;
+  background: var(--color-surface-alt, #2a2a2a);
+  color: var(--color-text-muted, #888);
+}
+.label-tag--edit {
+  background: #2a2a00;
+  color: #f2c94c;
+}
+.rsv-json {
+  font-family: var(--font-mono, monospace);
+  font-size: 0.75rem;
+  line-height: 1.5;
+  padding: 0.75rem;
+  border-radius: 6px;
+  min-height: 120px;
+  flex: 1;
+  overflow-y: auto;
+  resize: vertical;
+  white-space: pre;
+}
+.rsv-json--ground-truth {
+  background: var(--color-surface-alt, #111);
+  color: var(--color-text, #ccc);
+  border: 1px solid var(--color-border, #333);
+}
+.rsv-json--edit {
+  background: var(--color-surface, #1a1a1a);
+  color: var(--color-text, #e0e0e0);
+  border: 1px solid var(--color-border, #444);
+  caret-color: var(--color-accent, #4a9eff);
+  outline: none;
+  transition: border-color 0.15s;
+}
+.rsv-json--edit:focus {
+  border-color: var(--color-accent, #4a9eff);
+}
+.rsv-json--invalid {
+  border-color: var(--color-danger, #eb5757) !important;
+}
+.rsv-json-error {
+  font-size: 0.75rem;
+  color: var(--color-danger, #eb5757);
+  margin: 0;
+}
+
+/* Action buttons */
+.rsv-actions {
+  display: flex;
+  gap: 0.5rem;
+  padding-top: 0.25rem;
+  flex-wrap: wrap;
+}
+.btn-approve, .btn-edit, .btn-reject {
+  flex: 1;
+  min-width: 80px;
+  padding: 0.5rem 0.75rem;
+  border: none;
+  border-radius: 6px;
+  font-size: 0.85rem;
+  font-weight: 600;
+  cursor: pointer;
+  transition: opacity 0.15s;
+}
+.btn-approve, .btn-edit, .btn-reject {
+  opacity: 1;
+}
+.btn-approve:disabled, .btn-edit:disabled, .btn-reject:disabled {
+  opacity: 0.4;
+  cursor: default;
+}
+.btn-approve { background: #1e6e1e; color: #6fcf97; }
+.btn-approve:hover:not(:disabled) { background: #256325; }
+.btn-edit    { background: #4a4a00; color: #f2c94c; }
+.btn-edit:hover:not(:disabled)    { background: #606000; }
+.btn-reject  { background: #6e1e1e; color: #eb8f8f; }
+.btn-reject:hover:not(:disabled)  { background: #7a2222; }
+
+/* Toast */
+.rsv-toast {
+  position: fixed;
+  bottom: 1.5rem;
+  left: 50%;
+  transform: translateX(-50%);
+  background: var(--color-surface, #222);
+  color: var(--color-text, #fff);
+  border: 1px solid var(--color-border, #444);
+  border-radius: 8px;
+  padding: 0.5rem 1.25rem;
+  font-size: 0.85rem;
+  box-shadow: 0 4px 20px rgba(0,0,0,0.4);
+  pointer-events: none;
+  z-index: 100;
+}
+.toast-enter-active, .toast-leave-active { transition: opacity 0.2s, transform 0.2s; }
+.toast-enter-from, .toast-leave-to { opacity: 0; transform: translateX(-50%) translateY(8px); }
+</style>