feat(recipe-scan): labeling UI for Kiwi vision training pipeline (closes #65)
- POST /api/recipe-scan/import — bulk ingest from Kiwi scanner pipeline, idempotent by item id
- GET /api/recipe-scan/next — oldest-first pending item for review
- POST /api/recipe-scan/items/{id}/approve|edit|reject — label actions
- GET /api/recipe-scan/stats — counts by status and modality
- GET /api/recipe-scan/export — JSONL training pairs (messages chat format, Option B: correction prompt + extracted draft → corrected ground truth)
- GET /api/recipe-scan/image — path-traversal-safe image serving from /Library/Assets/kiwi/
- SQLite at data/recipe_scan.db with WAL mode; separate from corpus.db lifecycle
- set_db_path() testability seam; 18 tests, all passing
- RecipeScanView.vue: two-column review UI (image left, JSON diff right), keyboard shortcuts A/E/R, toast feedback, stats header, export download
- Route /data/recipe-scan and sidebar nav entry added
This commit is contained in:
parent
9bb88b168f
commit
391ebb3cd1
6 changed files with 1081 additions and 0 deletions
|
|
@ -70,6 +70,9 @@ def finetune_cancel_compat() -> dict:
|
|||
from app.data.log_corpus import router as log_corpus_router
|
||||
app.include_router(log_corpus_router, prefix="/api/corpus")
|
||||
|
||||
from app.data.recipe_scan import router as recipe_scan_router
|
||||
app.include_router(recipe_scan_router, prefix="/api/recipe-scan")
|
||||
|
||||
from app.dashboard import router as dashboard_router
|
||||
app.include_router(dashboard_router, prefix="/api")
|
||||
|
||||
|
|
|
|||
313
app/data/recipe_scan.py
Normal file
313
app/data/recipe_scan.py
Normal file
|
|
@ -0,0 +1,313 @@
|
|||
"""Avocet — Recipe scan labeling API (avocet#65).
|
||||
|
||||
Receives recipe scan items from the Kiwi pipeline (scanner/phone image +
|
||||
docuvision OCR extraction + ground-truth structured recipe), presents them
|
||||
for human review, and exports approved/edited pairs in the messages chat
|
||||
format for the vision fine-tune harness.
|
||||
|
||||
DB: data/recipe_scan.db (separate from corpus.db — different lifecycle)
|
||||
No auth required — local admin tool, not a push endpoint.
|
||||
|
||||
All endpoints registered on `router`. api.py includes this with
|
||||
prefix="/api/recipe-scan".
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
import uuid
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Generator, Literal
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_ROOT = Path(__file__).parent.parent.parent
|
||||
_DB_PATH: Path = _ROOT / "data" / "recipe_scan.db"
|
||||
|
||||
_VALID_MODALITIES = {"scanner", "phone", "handwritten"}
|
||||
_VALID_STATUSES = {"pending", "approved", "edited", "rejected"}
|
||||
|
||||
_SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS recipe_scan_items (
|
||||
id TEXT PRIMARY KEY,
|
||||
image_path TEXT NOT NULL,
|
||||
modality TEXT NOT NULL DEFAULT 'scanner',
|
||||
source TEXT NOT NULL DEFAULT 'purple_carrot',
|
||||
extracted TEXT NOT NULL,
|
||||
ground_truth TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
corrected TEXT,
|
||||
labeled_at TEXT,
|
||||
rejected_reason TEXT
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_rsi_status ON recipe_scan_items(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_rsi_modality ON recipe_scan_items(modality);
|
||||
"""
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# ── Testability seam ──────────────────────────────────────────────────────────
|
||||
|
||||
def set_db_path(path: Path) -> None:
|
||||
global _DB_PATH
|
||||
_DB_PATH = path
|
||||
|
||||
|
||||
# ── Internal helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
@contextmanager
|
||||
def _db() -> Generator[sqlite3.Connection, None, None]:
|
||||
conn = sqlite3.connect(str(_DB_PATH))
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
try:
|
||||
yield conn
|
||||
conn.commit()
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
raise
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _init_db() -> None:
|
||||
with _db() as conn:
|
||||
conn.executescript(_SCHEMA)
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _build_training_pair(row: sqlite3.Row) -> dict:
|
||||
"""Build a messages-format training pair from a labeled row.
|
||||
|
||||
user message: correction prompt + the docuvision-extracted JSON draft.
|
||||
Trains the model to review and correct an existing extraction, which is
|
||||
more data-efficient than producing from scratch when OCR is usually close.
|
||||
|
||||
assistant message: the approved ground truth (or human-corrected JSON).
|
||||
"""
|
||||
target_str = row["corrected"] if row["corrected"] else row["ground_truth"]
|
||||
extracted = json.loads(row["extracted"])
|
||||
target = json.loads(target_str)
|
||||
user_content = (
|
||||
"Review and correct this recipe extraction. "
|
||||
"Return valid JSON with fields: title, description, ingredients, steps, "
|
||||
"prep_time, cook_time, servings.\n\n"
|
||||
f"Extraction to review:\n{json.dumps(extracted, ensure_ascii=False, indent=2)}"
|
||||
)
|
||||
return {
|
||||
"id": row["id"],
|
||||
"modality": row["modality"],
|
||||
"source": row["source"],
|
||||
"image_path": row["image_path"],
|
||||
"messages": [
|
||||
{"role": "user", "content": user_content},
|
||||
{"role": "assistant", "content": json.dumps(target, ensure_ascii=False)},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
_init_db()
|
||||
|
||||
|
||||
# ── POST /import ───────────────────────────────────────────────────────────────
|
||||
|
||||
class ImportItem(BaseModel):
|
||||
id: str = ""
|
||||
image_path: str
|
||||
modality: Literal["scanner", "phone", "handwritten"] = "scanner"
|
||||
source: str = "purple_carrot"
|
||||
extracted: dict
|
||||
ground_truth: dict
|
||||
|
||||
@field_validator("id", mode="before")
|
||||
@classmethod
|
||||
def default_id(cls, v: str) -> str:
|
||||
return v or str(uuid.uuid4())
|
||||
|
||||
|
||||
class ImportRequest(BaseModel):
|
||||
items: list[ImportItem]
|
||||
|
||||
|
||||
@router.post("/import")
|
||||
def import_items(body: ImportRequest) -> dict:
|
||||
"""Bulk-import scan items from the Kiwi pipeline. Idempotent by item id."""
|
||||
stored = 0
|
||||
with _db() as conn:
|
||||
for item in body.items:
|
||||
result = conn.execute(
|
||||
"INSERT OR IGNORE INTO recipe_scan_items "
|
||||
"(id, image_path, modality, source, extracted, ground_truth) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?)",
|
||||
(item.id, item.image_path, item.modality, item.source,
|
||||
json.dumps(item.extracted), json.dumps(item.ground_truth)),
|
||||
)
|
||||
stored += result.rowcount
|
||||
return {"imported": stored, "total_submitted": len(body.items)}
|
||||
|
||||
|
||||
# ── GET /next ─────────────────────────────────────────────────────────────────
|
||||
|
||||
@router.get("/next")
|
||||
def get_next() -> dict:
|
||||
"""Return the next pending item for review, oldest-first."""
|
||||
with _db() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT * FROM recipe_scan_items WHERE status = 'pending' ORDER BY rowid LIMIT 1"
|
||||
).fetchone()
|
||||
if row is None:
|
||||
raise HTTPException(404, "No pending items in queue")
|
||||
return {
|
||||
**dict(row),
|
||||
"extracted": json.loads(row["extracted"]),
|
||||
"ground_truth": json.loads(row["ground_truth"]),
|
||||
}
|
||||
|
||||
|
||||
# ── POST /items/{id}/approve ──────────────────────────────────────────────────
|
||||
|
||||
@router.post("/items/{item_id}/approve")
|
||||
def approve_item(item_id: str) -> dict:
|
||||
"""Mark item as approved — extracted JSON is close enough to ground truth."""
|
||||
with _db() as conn:
|
||||
row = conn.execute("SELECT id FROM recipe_scan_items WHERE id = ?", (item_id,)).fetchone()
|
||||
if row is None:
|
||||
raise HTTPException(404, "Item not found")
|
||||
conn.execute(
|
||||
"UPDATE recipe_scan_items SET status='approved', labeled_at=? WHERE id=?",
|
||||
(_now_iso(), item_id),
|
||||
)
|
||||
return {"status": "approved", "id": item_id}
|
||||
|
||||
|
||||
# ── POST /items/{id}/edit ─────────────────────────────────────────────────────
|
||||
|
||||
class EditBody(BaseModel):
|
||||
corrected: dict
|
||||
|
||||
|
||||
@router.post("/items/{item_id}/edit")
|
||||
def edit_item(item_id: str, body: EditBody) -> dict:
|
||||
"""Approve with a human-corrected JSON. corrected overrides extracted in export."""
|
||||
with _db() as conn:
|
||||
row = conn.execute("SELECT id FROM recipe_scan_items WHERE id = ?", (item_id,)).fetchone()
|
||||
if row is None:
|
||||
raise HTTPException(404, "Item not found")
|
||||
conn.execute(
|
||||
"UPDATE recipe_scan_items SET status='edited', corrected=?, labeled_at=? WHERE id=?",
|
||||
(json.dumps(body.corrected), _now_iso(), item_id),
|
||||
)
|
||||
return {"status": "edited", "id": item_id}
|
||||
|
||||
|
||||
# ── POST /items/{id}/reject ───────────────────────────────────────────────────
|
||||
|
||||
class RejectBody(BaseModel):
|
||||
reason: str = ""
|
||||
|
||||
|
||||
@router.post("/items/{item_id}/reject")
|
||||
def reject_item(item_id: str, body: RejectBody = RejectBody()) -> dict:
|
||||
"""Reject item — extraction too broken to use for training."""
|
||||
with _db() as conn:
|
||||
row = conn.execute("SELECT id FROM recipe_scan_items WHERE id = ?", (item_id,)).fetchone()
|
||||
if row is None:
|
||||
raise HTTPException(404, "Item not found")
|
||||
conn.execute(
|
||||
"UPDATE recipe_scan_items SET status='rejected', rejected_reason=?, labeled_at=? WHERE id=?",
|
||||
(body.reason or None, _now_iso(), item_id),
|
||||
)
|
||||
return {"status": "rejected", "id": item_id}
|
||||
|
||||
|
||||
# ── GET /stats ────────────────────────────────────────────────────────────────
|
||||
|
||||
@router.get("/stats")
|
||||
def get_stats() -> dict:
|
||||
with _db() as conn:
|
||||
total = conn.execute("SELECT COUNT(*) FROM recipe_scan_items").fetchone()[0]
|
||||
by_status = {
|
||||
r["status"]: r["cnt"]
|
||||
for r in conn.execute(
|
||||
"SELECT status, COUNT(*) AS cnt FROM recipe_scan_items GROUP BY status"
|
||||
).fetchall()
|
||||
}
|
||||
by_modality = {
|
||||
r["modality"]: r["cnt"]
|
||||
for r in conn.execute(
|
||||
"SELECT modality, COUNT(*) AS cnt FROM recipe_scan_items GROUP BY modality"
|
||||
).fetchall()
|
||||
}
|
||||
export_ready = conn.execute(
|
||||
"SELECT COUNT(*) FROM recipe_scan_items WHERE status IN ('approved', 'edited')"
|
||||
).fetchone()[0]
|
||||
return {
|
||||
"total": total,
|
||||
"by_status": by_status,
|
||||
"by_modality": by_modality,
|
||||
"export_ready": export_ready,
|
||||
}
|
||||
|
||||
|
||||
# ── GET /export ───────────────────────────────────────────────────────────────
|
||||
|
||||
@router.get("/export")
|
||||
def export_pairs() -> StreamingResponse:
|
||||
"""Stream approved/edited items as JSONL training pairs (messages format)."""
|
||||
with _db() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM recipe_scan_items WHERE status IN ('approved', 'edited') ORDER BY rowid"
|
||||
).fetchall()
|
||||
|
||||
def _generate():
|
||||
for row in rows:
|
||||
yield json.dumps(_build_training_pair(row), ensure_ascii=False) + "\n"
|
||||
|
||||
return StreamingResponse(
|
||||
_generate(),
|
||||
media_type="application/x-ndjson",
|
||||
headers={"Content-Disposition": "attachment; filename=recipe_scan_pairs.jsonl"},
|
||||
)
|
||||
|
||||
|
||||
# ── GET /image ────────────────────────────────────────────────────────────────
|
||||
|
||||
_IMAGE_ROOT = Path("/Library/Assets/kiwi")
|
||||
|
||||
|
||||
@router.get("/image")
|
||||
def serve_image(path: str) -> StreamingResponse:
|
||||
"""Serve a scan image from /Library/Assets/kiwi/.
|
||||
|
||||
path must resolve within /Library/Assets/kiwi/ — rejects traversal attempts.
|
||||
"""
|
||||
try:
|
||||
resolved = Path(path).resolve()
|
||||
_IMAGE_ROOT.resolve() # ensure root itself is valid
|
||||
resolved.relative_to(_IMAGE_ROOT.resolve())
|
||||
except (ValueError, OSError):
|
||||
raise HTTPException(403, "Path outside allowed image directory")
|
||||
|
||||
if not resolved.exists():
|
||||
raise HTTPException(404, "Image not found")
|
||||
|
||||
suffix = resolved.suffix.lower()
|
||||
media_types = {".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".png": "image/png", ".webp": "image/webp"}
|
||||
media_type = media_types.get(suffix, "application/octet-stream")
|
||||
|
||||
return StreamingResponse(
|
||||
open(resolved, "rb"),
|
||||
media_type=media_type,
|
||||
headers={"Cache-Control": "public, max-age=86400"},
|
||||
)
|
||||
227
tests/test_recipe_scan.py
Normal file
227
tests/test_recipe_scan.py
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
"""Tests for app/data/recipe_scan.py — recipe scan labeling endpoints."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from app.data import recipe_scan as rs
|
||||
|
||||
|
||||
EXTRACTED = {"title": "Shepherd's Pie", "ingredients": ["lamb", "potato"], "steps": ["brown meat", "mash potato"]}
|
||||
GROUND_TRUTH = {"title": "Shepherd's Pie", "ingredients": ["ground lamb", "mashed potato", "peas"], "steps": ["brown meat", "add veg", "mash potato", "bake"]}
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def isolated_db(tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(rs, "_DB_PATH", tmp_path / "recipe_scan.db")
|
||||
rs._init_db()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def client():
|
||||
from fastapi import FastAPI
|
||||
app = FastAPI()
|
||||
app.include_router(rs.router, prefix="/api/recipe-scan")
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
def _item(**kwargs) -> dict:
|
||||
return {
|
||||
"id": str(uuid.uuid4()),
|
||||
"image_path": "/Library/Assets/kiwi/scans/pc_test.jpg",
|
||||
"modality": kwargs.get("modality", "scanner"),
|
||||
"source": kwargs.get("source", "purple_carrot"),
|
||||
"extracted": kwargs.get("extracted", EXTRACTED),
|
||||
"ground_truth": kwargs.get("ground_truth", GROUND_TRUTH),
|
||||
}
|
||||
|
||||
|
||||
def _import(client, items: list[dict]) -> None:
|
||||
resp = client.post("/api/recipe-scan/import", json={"items": items})
|
||||
assert resp.status_code == 200
|
||||
|
||||
|
||||
# ── Import ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
def test_import_stores_items(client):
|
||||
_import(client, [_item()])
|
||||
stats = client.get("/api/recipe-scan/stats").json()
|
||||
assert stats["total"] == 1
|
||||
assert stats["by_status"]["pending"] == 1
|
||||
|
||||
|
||||
def test_import_rejects_unknown_modality(client):
|
||||
bad = _item()
|
||||
bad["modality"] = "telepathy"
|
||||
resp = client.post("/api/recipe-scan/import", json={"items": [bad]})
|
||||
assert resp.status_code == 422
|
||||
|
||||
|
||||
def test_import_is_idempotent(client):
|
||||
item = _item()
|
||||
_import(client, [item])
|
||||
_import(client, [item]) # same id — should not duplicate
|
||||
stats = client.get("/api/recipe-scan/stats").json()
|
||||
assert stats["total"] == 1
|
||||
|
||||
|
||||
def test_import_multiple_items(client):
|
||||
_import(client, [_item(), _item(), _item()])
|
||||
assert client.get("/api/recipe-scan/stats").json()["total"] == 3
|
||||
|
||||
|
||||
# ── Next ───────────────────────────────────────────────────────────────────────
|
||||
|
||||
def test_next_returns_404_when_queue_empty(client):
|
||||
resp = client.get("/api/recipe-scan/next")
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
def test_next_returns_pending_item(client):
|
||||
item = _item()
|
||||
_import(client, [item])
|
||||
resp = client.get("/api/recipe-scan/next")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["id"] == item["id"]
|
||||
assert data["status"] == "pending"
|
||||
assert "extracted" in data
|
||||
assert "ground_truth" in data
|
||||
|
||||
|
||||
def test_next_skips_non_pending(client):
|
||||
item = _item()
|
||||
_import(client, [item])
|
||||
client.post(f"/api/recipe-scan/items/{item['id']}/reject")
|
||||
resp = client.get("/api/recipe-scan/next")
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
# ── Approve ────────────────────────────────────────────────────────────────────
|
||||
|
||||
def test_approve_marks_item_approved(client):
|
||||
item = _item()
|
||||
_import(client, [item])
|
||||
resp = client.post(f"/api/recipe-scan/items/{item['id']}/approve")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["status"] == "approved"
|
||||
stats = client.get("/api/recipe-scan/stats").json()
|
||||
assert stats["by_status"]["approved"] == 1
|
||||
|
||||
|
||||
def test_approve_returns_404_for_unknown_id(client):
|
||||
resp = client.post("/api/recipe-scan/items/no-such-id/approve")
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
# ── Edit ───────────────────────────────────────────────────────────────────────
|
||||
|
||||
def test_edit_stores_corrected_json(client):
|
||||
item = _item()
|
||||
_import(client, [item])
|
||||
corrected = {**GROUND_TRUTH, "servings": 4}
|
||||
resp = client.post(
|
||||
f"/api/recipe-scan/items/{item['id']}/edit",
|
||||
json={"corrected": corrected},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["status"] == "edited"
|
||||
stats = client.get("/api/recipe-scan/stats").json()
|
||||
assert stats["by_status"]["edited"] == 1
|
||||
|
||||
|
||||
def test_edit_requires_corrected_field(client):
|
||||
item = _item()
|
||||
_import(client, [item])
|
||||
resp = client.post(f"/api/recipe-scan/items/{item['id']}/edit", json={})
|
||||
assert resp.status_code == 422
|
||||
|
||||
|
||||
# ── Reject ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
def test_reject_marks_item_rejected(client):
|
||||
item = _item()
|
||||
_import(client, [item])
|
||||
resp = client.post(
|
||||
f"/api/recipe-scan/items/{item['id']}/reject",
|
||||
json={"reason": "OCR completely unreadable"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["status"] == "rejected"
|
||||
|
||||
|
||||
def test_reject_without_reason_is_valid(client):
|
||||
item = _item()
|
||||
_import(client, [item])
|
||||
resp = client.post(f"/api/recipe-scan/items/{item['id']}/reject")
|
||||
assert resp.status_code == 200
|
||||
|
||||
|
||||
# ── Export ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
def test_export_empty_when_nothing_approved(client):
|
||||
item = _item()
|
||||
_import(client, [item])
|
||||
resp = client.get("/api/recipe-scan/export")
|
||||
assert resp.status_code == 200
|
||||
assert resp.text.strip() == ""
|
||||
|
||||
|
||||
def test_export_includes_approved_item(client):
|
||||
item = _item()
|
||||
_import(client, [item])
|
||||
client.post(f"/api/recipe-scan/items/{item['id']}/approve")
|
||||
resp = client.get("/api/recipe-scan/export")
|
||||
lines = [l for l in resp.text.strip().splitlines() if l]
|
||||
assert len(lines) == 1
|
||||
pair = json.loads(lines[0])
|
||||
assert pair["id"] == item["id"]
|
||||
assert pair["modality"] == "scanner"
|
||||
assert "messages" in pair
|
||||
assert len(pair["messages"]) == 2
|
||||
assert pair["messages"][0]["role"] == "user"
|
||||
assert pair["messages"][1]["role"] == "assistant"
|
||||
|
||||
|
||||
def test_export_includes_edited_item_with_correction(client):
|
||||
item = _item()
|
||||
_import(client, [item])
|
||||
corrected = {**GROUND_TRUTH, "servings": 4}
|
||||
client.post(
|
||||
f"/api/recipe-scan/items/{item['id']}/edit",
|
||||
json={"corrected": corrected},
|
||||
)
|
||||
resp = client.get("/api/recipe-scan/export")
|
||||
lines = [l for l in resp.text.strip().splitlines() if l]
|
||||
pair = json.loads(lines[0])
|
||||
assistant_content = json.loads(pair["messages"][1]["content"])
|
||||
assert assistant_content["servings"] == 4
|
||||
|
||||
|
||||
def test_export_excludes_rejected_items(client):
|
||||
item = _item()
|
||||
_import(client, [item])
|
||||
client.post(f"/api/recipe-scan/items/{item['id']}/reject")
|
||||
resp = client.get("/api/recipe-scan/export")
|
||||
assert resp.text.strip() == ""
|
||||
|
||||
|
||||
# ── Stats ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def test_stats_counts_all_statuses(client):
|
||||
items = [_item(), _item(), _item(), _item()]
|
||||
_import(client, items)
|
||||
client.post(f"/api/recipe-scan/items/{items[0]['id']}/approve")
|
||||
client.post(f"/api/recipe-scan/items/{items[1]['id']}/edit", json={"corrected": GROUND_TRUTH})
|
||||
client.post(f"/api/recipe-scan/items/{items[2]['id']}/reject")
|
||||
stats = client.get("/api/recipe-scan/stats").json()
|
||||
assert stats["total"] == 4
|
||||
assert stats["by_status"]["pending"] == 1
|
||||
assert stats["by_status"]["approved"] == 1
|
||||
assert stats["by_status"]["edited"] == 1
|
||||
assert stats["by_status"]["rejected"] == 1
|
||||
assert stats["export_ready"] == 2 # approved + edited
|
||||
|
|
@ -220,6 +220,7 @@ const dataItems: NavItem[] = [
|
|||
{ path: '/data/fetch', icon: '📬', label: 'Fetch' },
|
||||
{ path: '/data/corrections', icon: '✏️', label: 'Corrections' },
|
||||
{ path: '/data/imitate', icon: '🪞', label: 'Imitate' },
|
||||
{ path: '/data/recipe-scan', icon: '📷', label: 'Recipe Scan' },
|
||||
]
|
||||
|
||||
const evalItems: NavItem[] = [
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ export const routes = [
|
|||
{ path: '/data/fetch', component: FetchView, meta: { title: 'Fetch' } },
|
||||
{ path: '/data/corrections', component: CorrectionsView, meta: { title: 'Corrections' } },
|
||||
{ path: '/data/imitate', component: ImitateView, meta: { title: 'Imitate' } },
|
||||
{ path: '/data/recipe-scan', component: () => import('../views/RecipeScanView.vue'), meta: { title: 'Recipe Scan' } },
|
||||
|
||||
// ── Eval domain ──────────────────────────────────────────
|
||||
{ path: '/eval/benchmark', component: BenchmarkView, meta: { title: 'Benchmark' } },
|
||||
|
|
|
|||
536
web/src/views/RecipeScanView.vue
Normal file
536
web/src/views/RecipeScanView.vue
Normal file
|
|
@ -0,0 +1,536 @@
|
|||
<template>
|
||||
<div class="rsv">
|
||||
<!-- Header -->
|
||||
<header class="rsv-header">
|
||||
<h1 class="rsv-title">Recipe Scan Review</h1>
|
||||
<div class="rsv-stats" v-if="stats">
|
||||
<span class="stat-chip">{{ stats.by_status?.pending ?? 0 }} pending</span>
|
||||
<span class="stat-chip stat-chip--ok">{{ stats.by_status?.approved ?? 0 }} approved</span>
|
||||
<span class="stat-chip stat-chip--edited">{{ stats.by_status?.edited ?? 0 }} edited</span>
|
||||
<span class="stat-chip stat-chip--bad">{{ stats.by_status?.rejected ?? 0 }} rejected</span>
|
||||
<a
|
||||
v-if="(stats.export_ready ?? 0) > 0"
|
||||
:href="`${apiBase}/api/recipe-scan/export`"
|
||||
download
|
||||
class="btn-export"
|
||||
>
|
||||
⬇ Export {{ stats.export_ready }} pairs
|
||||
</a>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<!-- Loading -->
|
||||
<div v-if="loading" class="rsv-state" aria-label="Loading">
|
||||
<div class="skeleton-block" />
|
||||
</div>
|
||||
|
||||
<!-- Error -->
|
||||
<div v-else-if="apiError" class="rsv-state rsv-error" role="alert">
|
||||
<p>{{ apiError }}</p>
|
||||
<button class="btn-action" @click="fetchNext">Retry</button>
|
||||
</div>
|
||||
|
||||
<!-- Queue empty -->
|
||||
<div v-else-if="!item" class="rsv-state rsv-empty">
|
||||
<p>Queue is empty — all items reviewed.</p>
|
||||
<p class="rsv-hint">Import items from the Kiwi pipeline to continue.</p>
|
||||
</div>
|
||||
|
||||
<!-- Review panel -->
|
||||
<div v-else class="rsv-workspace">
|
||||
<!-- Left: image -->
|
||||
<section class="rsv-image-panel" aria-label="Scan image">
|
||||
<div class="rsv-panel-label">
|
||||
<span class="modality-badge">{{ item.modality }}</span>
|
||||
<span class="source-badge">{{ item.source }}</span>
|
||||
</div>
|
||||
<div class="rsv-image-wrap">
|
||||
<img
|
||||
v-if="imageUrl"
|
||||
:src="imageUrl"
|
||||
:alt="`Recipe scan — ${item.source}`"
|
||||
class="rsv-image"
|
||||
/>
|
||||
<div v-else class="rsv-image-placeholder">
|
||||
<span>Image not available</span>
|
||||
<code class="rsv-path">{{ item.image_path }}</code>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- Right: JSON comparison -->
|
||||
<section class="rsv-json-panel" aria-label="Extraction review">
|
||||
|
||||
<!-- Ground truth (read-only reference) -->
|
||||
<div class="rsv-json-block">
|
||||
<h2 class="rsv-json-label">Ground truth <span class="label-tag">reference</span></h2>
|
||||
<pre class="rsv-json rsv-json--ground-truth" tabindex="0" aria-label="Ground truth JSON">{{ prettyJson(item.ground_truth) }}</pre>
|
||||
</div>
|
||||
|
||||
<!-- Extracted / editable -->
|
||||
<div class="rsv-json-block">
|
||||
<h2 class="rsv-json-label">
|
||||
Extracted
|
||||
<span class="label-tag label-tag--edit">edit before approving</span>
|
||||
</h2>
|
||||
<textarea
|
||||
v-model="draftJson"
|
||||
class="rsv-json rsv-json--edit"
|
||||
spellcheck="false"
|
||||
aria-label="Extracted JSON — edit to correct"
|
||||
:class="{ 'rsv-json--invalid': jsonError }"
|
||||
/>
|
||||
<p v-if="jsonError" class="rsv-json-error" role="alert">{{ jsonError }}</p>
|
||||
</div>
|
||||
|
||||
<!-- Actions -->
|
||||
<div class="rsv-actions" role="group" aria-label="Review actions">
|
||||
<button
|
||||
class="btn-approve"
|
||||
:disabled="acting"
|
||||
@click="handleApprove"
|
||||
title="Extracted JSON is accurate — approve as-is (A)"
|
||||
>
|
||||
✓ Approve
|
||||
</button>
|
||||
<button
|
||||
class="btn-edit"
|
||||
:disabled="acting || !!jsonError"
|
||||
@click="handleEdit"
|
||||
title="Approve the edited JSON in the text area (E)"
|
||||
>
|
||||
✎ Approve edited
|
||||
</button>
|
||||
<button
|
||||
class="btn-reject"
|
||||
:disabled="acting"
|
||||
@click="handleReject"
|
||||
title="Extraction too broken to use — reject (R)"
|
||||
>
|
||||
✕ Reject
|
||||
</button>
|
||||
</div>
|
||||
|
||||
</section>
|
||||
</div>
|
||||
|
||||
<!-- Feedback toast -->
|
||||
<Transition name="toast">
|
||||
<div v-if="toast" class="rsv-toast" role="status" aria-live="polite">
|
||||
{{ toast }}
|
||||
</div>
|
||||
</Transition>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup lang="ts">
|
||||
import { ref, computed, watch, onMounted, onUnmounted } from 'vue'
|
||||
|
||||
const apiBase = window.location.origin
|
||||
|
||||
interface RecipeScanItem {
|
||||
id: string
|
||||
image_path: string
|
||||
modality: string
|
||||
source: string
|
||||
extracted: Record<string, unknown>
|
||||
ground_truth: Record<string, unknown>
|
||||
status: string
|
||||
}
|
||||
|
||||
interface Stats {
|
||||
total: number
|
||||
by_status: Record<string, number>
|
||||
by_modality: Record<string, number>
|
||||
export_ready: number
|
||||
}
|
||||
|
||||
const item = ref<RecipeScanItem | null>(null)
|
||||
const stats = ref<Stats | null>(null)
|
||||
const loading = ref(true)
|
||||
const acting = ref(false)
|
||||
const apiError = ref('')
|
||||
const draftJson = ref('')
|
||||
const toast = ref('')
|
||||
let toastTimer: ReturnType<typeof setTimeout> | null = null
|
||||
|
||||
const jsonError = computed(() => {
|
||||
if (!draftJson.value.trim()) return ''
|
||||
try {
|
||||
JSON.parse(draftJson.value)
|
||||
return ''
|
||||
} catch (e) {
|
||||
return 'Invalid JSON — fix before approving'
|
||||
}
|
||||
})
|
||||
|
||||
const imageUrl = computed(() => {
|
||||
if (!item.value) return ''
|
||||
const encoded = encodeURIComponent(item.value.image_path)
|
||||
return `${apiBase}/api/recipe-scan/image?path=${encoded}`
|
||||
})
|
||||
|
||||
function prettyJson(obj: unknown): string {
|
||||
return JSON.stringify(obj, null, 2)
|
||||
}
|
||||
|
||||
function showToast(msg: string) {
|
||||
toast.value = msg
|
||||
if (toastTimer) clearTimeout(toastTimer)
|
||||
toastTimer = setTimeout(() => { toast.value = '' }, 2500)
|
||||
}
|
||||
|
||||
async function fetchNext() {
|
||||
loading.value = true
|
||||
apiError.value = ''
|
||||
try {
|
||||
const r = await fetch(`${apiBase}/api/recipe-scan/next`)
|
||||
if (r.status === 404) {
|
||||
item.value = null
|
||||
} else if (!r.ok) {
|
||||
throw new Error(`API error ${r.status}`)
|
||||
} else {
|
||||
item.value = await r.json()
|
||||
draftJson.value = prettyJson(item.value!.extracted)
|
||||
}
|
||||
} catch (e) {
|
||||
apiError.value = e instanceof Error ? e.message : 'Could not reach API'
|
||||
} finally {
|
||||
loading.value = false
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchStats() {
|
||||
try {
|
||||
const r = await fetch(`${apiBase}/api/recipe-scan/stats`)
|
||||
if (r.ok) stats.value = await r.json()
|
||||
} catch { /* non-critical */ }
|
||||
}
|
||||
|
||||
async function act(endpoint: string, body?: unknown) {
|
||||
if (!item.value || acting.value) return
|
||||
acting.value = true
|
||||
try {
|
||||
const r = await fetch(`${apiBase}/api/recipe-scan/items/${item.value.id}/${endpoint}`, {
|
||||
method: 'POST',
|
||||
headers: body ? { 'Content-Type': 'application/json' } : {},
|
||||
body: body ? JSON.stringify(body) : undefined,
|
||||
})
|
||||
if (!r.ok) throw new Error(`API error ${r.status}`)
|
||||
} catch (e) {
|
||||
showToast(e instanceof Error ? e.message : 'Action failed')
|
||||
acting.value = false
|
||||
return
|
||||
}
|
||||
acting.value = false
|
||||
await Promise.all([fetchNext(), fetchStats()])
|
||||
}
|
||||
|
||||
async function handleApprove() {
|
||||
showToast('Approved')
|
||||
await act('approve')
|
||||
}
|
||||
|
||||
async function handleEdit() {
|
||||
if (jsonError.value) return
|
||||
let corrected: unknown
|
||||
try {
|
||||
corrected = JSON.parse(draftJson.value)
|
||||
} catch {
|
||||
return
|
||||
}
|
||||
showToast('Saved edit')
|
||||
await act('edit', { corrected })
|
||||
}
|
||||
|
||||
async function handleReject() {
|
||||
showToast('Rejected')
|
||||
await act('reject')
|
||||
}
|
||||
|
||||
// Keyboard shortcuts: A = approve, E = edit+approve, R = reject
|
||||
function handleKey(e: KeyboardEvent) {
|
||||
const tag = (e.target as HTMLElement)?.tagName?.toLowerCase()
|
||||
if (tag === 'textarea' || tag === 'input') return
|
||||
if (e.key === 'a' || e.key === 'A') handleApprove()
|
||||
if (e.key === 'e' || e.key === 'E') handleEdit()
|
||||
if (e.key === 'r' || e.key === 'R') handleReject()
|
||||
}
|
||||
|
||||
watch(item, (newItem) => {
|
||||
if (newItem) draftJson.value = prettyJson(newItem.extracted)
|
||||
})
|
||||
|
||||
onMounted(() => {
|
||||
fetchNext()
|
||||
fetchStats()
|
||||
window.addEventListener('keydown', handleKey)
|
||||
})
|
||||
|
||||
onUnmounted(() => {
|
||||
window.removeEventListener('keydown', handleKey)
|
||||
if (toastTimer) clearTimeout(toastTimer)
|
||||
})
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.rsv {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
height: 100%;
|
||||
padding: var(--space-md, 1rem);
|
||||
gap: var(--space-md, 1rem);
|
||||
box-sizing: border-box;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
/* Header */
|
||||
.rsv-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: var(--space-md, 1rem);
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.rsv-title {
|
||||
font-size: 1.1rem;
|
||||
font-weight: 600;
|
||||
margin: 0;
|
||||
color: var(--color-text, #fff);
|
||||
}
|
||||
.rsv-stats {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.stat-chip {
|
||||
font-size: 0.75rem;
|
||||
padding: 2px 8px;
|
||||
border-radius: 12px;
|
||||
background: var(--color-surface-alt, #2a2a2a);
|
||||
color: var(--color-text-muted, #aaa);
|
||||
}
|
||||
.stat-chip--ok { background: #1a3a1a; color: #6fcf97; }
|
||||
.stat-chip--edited { background: #2a2a00; color: #f2c94c; }
|
||||
.stat-chip--bad { background: #3a1a1a; color: #eb5757; }
|
||||
.btn-export {
|
||||
font-size: 0.8rem;
|
||||
padding: 4px 12px;
|
||||
border-radius: 6px;
|
||||
background: var(--color-accent, #4a9eff);
|
||||
color: #fff;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
/* State panels */
|
||||
.rsv-state {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
gap: 0.5rem;
|
||||
color: var(--color-text-muted, #aaa);
|
||||
}
|
||||
.rsv-error { color: var(--color-danger, #eb5757); }
|
||||
.rsv-empty { font-size: 1rem; }
|
||||
.rsv-hint { font-size: 0.85rem; opacity: 0.7; margin: 0; }
|
||||
.skeleton-block {
|
||||
width: 100%; height: 300px;
|
||||
border-radius: 8px;
|
||||
background: var(--color-surface-alt, #2a2a2a);
|
||||
animation: pulse 1.5s ease-in-out infinite;
|
||||
}
|
||||
@keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.5; } }
|
||||
|
||||
/* Workspace: two-column layout */
|
||||
.rsv-workspace {
|
||||
flex: 1;
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: var(--space-md, 1rem);
|
||||
min-height: 0;
|
||||
overflow: hidden;
|
||||
}
|
||||
@media (max-width: 900px) {
|
||||
.rsv-workspace {
|
||||
grid-template-columns: 1fr;
|
||||
overflow-y: auto;
|
||||
}
|
||||
}
|
||||
|
||||
/* Image panel */
|
||||
.rsv-image-panel {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.5rem;
|
||||
min-height: 0;
|
||||
}
|
||||
.rsv-panel-label {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.modality-badge, .source-badge {
|
||||
font-size: 0.72rem;
|
||||
padding: 2px 8px;
|
||||
border-radius: 10px;
|
||||
background: var(--color-surface-alt, #2a2a2a);
|
||||
color: var(--color-text-muted, #aaa);
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.04em;
|
||||
}
|
||||
.rsv-image-wrap {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
background: var(--color-surface-alt, #111);
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
min-height: 200px;
|
||||
}
|
||||
.rsv-image {
|
||||
max-width: 100%;
|
||||
max-height: 100%;
|
||||
object-fit: contain;
|
||||
}
|
||||
.rsv-image-placeholder {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
color: var(--color-text-muted, #666);
|
||||
font-size: 0.85rem;
|
||||
padding: 1rem;
|
||||
text-align: center;
|
||||
}
|
||||
.rsv-path {
|
||||
font-size: 0.7rem;
|
||||
word-break: break-all;
|
||||
opacity: 0.6;
|
||||
}
|
||||
|
||||
/* JSON panel */
|
||||
.rsv-json-panel {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.5rem;
|
||||
min-height: 0;
|
||||
overflow-y: auto;
|
||||
}
|
||||
.rsv-json-block {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.25rem;
|
||||
flex: 1;
|
||||
min-height: 0;
|
||||
}
|
||||
.rsv-json-label {
|
||||
font-size: 0.8rem;
|
||||
font-weight: 600;
|
||||
color: var(--color-text-muted, #aaa);
|
||||
margin: 0;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.label-tag {
|
||||
font-size: 0.68rem;
|
||||
font-weight: 400;
|
||||
padding: 1px 6px;
|
||||
border-radius: 8px;
|
||||
background: var(--color-surface-alt, #2a2a2a);
|
||||
color: var(--color-text-muted, #888);
|
||||
}
|
||||
.label-tag--edit {
|
||||
background: #2a2a00;
|
||||
color: #f2c94c;
|
||||
}
|
||||
.rsv-json {
|
||||
font-family: var(--font-mono, monospace);
|
||||
font-size: 0.75rem;
|
||||
line-height: 1.5;
|
||||
padding: 0.75rem;
|
||||
border-radius: 6px;
|
||||
min-height: 120px;
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
resize: vertical;
|
||||
white-space: pre;
|
||||
}
|
||||
.rsv-json--ground-truth {
|
||||
background: var(--color-surface-alt, #111);
|
||||
color: var(--color-text, #ccc);
|
||||
border: 1px solid var(--color-border, #333);
|
||||
}
|
||||
.rsv-json--edit {
|
||||
background: var(--color-surface, #1a1a1a);
|
||||
color: var(--color-text, #e0e0e0);
|
||||
border: 1px solid var(--color-border, #444);
|
||||
caret-color: var(--color-accent, #4a9eff);
|
||||
outline: none;
|
||||
transition: border-color 0.15s;
|
||||
}
|
||||
.rsv-json--edit:focus {
|
||||
border-color: var(--color-accent, #4a9eff);
|
||||
}
|
||||
.rsv-json--invalid {
|
||||
border-color: var(--color-danger, #eb5757) !important;
|
||||
}
|
||||
.rsv-json-error {
|
||||
font-size: 0.75rem;
|
||||
color: var(--color-danger, #eb5757);
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
/* Action buttons */
|
||||
.rsv-actions {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
padding-top: 0.25rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.btn-approve, .btn-edit, .btn-reject {
|
||||
flex: 1;
|
||||
min-width: 80px;
|
||||
padding: 0.5rem 0.75rem;
|
||||
border: none;
|
||||
border-radius: 6px;
|
||||
font-size: 0.85rem;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
transition: opacity 0.15s;
|
||||
}
|
||||
.btn-approve, .btn-edit, .btn-reject {
|
||||
opacity: 1;
|
||||
}
|
||||
.btn-approve:disabled, .btn-edit:disabled, .btn-reject:disabled {
|
||||
opacity: 0.4;
|
||||
cursor: default;
|
||||
}
|
||||
.btn-approve { background: #1e6e1e; color: #6fcf97; }
|
||||
.btn-approve:hover:not(:disabled) { background: #256325; }
|
||||
.btn-edit { background: #4a4a00; color: #f2c94c; }
|
||||
.btn-edit:hover:not(:disabled) { background: #606000; }
|
||||
.btn-reject { background: #6e1e1e; color: #eb8f8f; }
|
||||
.btn-reject:hover:not(:disabled) { background: #7a2222; }
|
||||
|
||||
/* Toast */
|
||||
.rsv-toast {
|
||||
position: fixed;
|
||||
bottom: 1.5rem;
|
||||
left: 50%;
|
||||
transform: translateX(-50%);
|
||||
background: var(--color-surface, #222);
|
||||
color: var(--color-text, #fff);
|
||||
border: 1px solid var(--color-border, #444);
|
||||
border-radius: 8px;
|
||||
padding: 0.5rem 1.25rem;
|
||||
font-size: 0.85rem;
|
||||
box-shadow: 0 4px 20px rgba(0,0,0,0.4);
|
||||
pointer-events: none;
|
||||
z-index: 100;
|
||||
}
|
||||
.toast-enter-active, .toast-leave-active { transition: opacity 0.2s, transform 0.2s; }
|
||||
.toast-enter-from, .toast-leave-to { opacity: 0; transform: translateX(-50%) translateY(8px); }
|
||||
</style>
|
||||
Loading…
Reference in a new issue