Compare commits
37 commits
| Author | SHA1 | Date | |
|---|---|---|---|
| cf807179f5 | |||
| 0c200f3148 | |||
| 21a0664961 | |||
| a9ab996bcc | |||
| 56f942b3fd | |||
| 84636bcdaf | |||
| 51a48a430b | |||
| b326d4aa6e | |||
| 7cad503b35 | |||
| 430600c1af | |||
| 21a9b85067 | |||
| c72b4415db | |||
| 2df17ec719 | |||
| 4ac24e7920 | |||
| cdbc24240a | |||
| dd39418bc8 | |||
| 02abc8e734 | |||
| 61c428baf0 | |||
| 6e954c5c6e | |||
| ef04064728 | |||
| 59b183a898 | |||
| b4624fba84 | |||
| 667daf939e | |||
| 4e50661483 | |||
| ac4eda2047 | |||
| 3f4b756fc6 | |||
| 973c76a4c8 | |||
| 92fab94ae0 | |||
| 30f5620fd5 | |||
| 0ef57618bf | |||
| 8c765b7da2 | |||
| e57f46f4b6 | |||
| 04dbdddbad | |||
| e83bb0415a | |||
| e62d69d099 | |||
| 7498995092 | |||
| 640fcefa9e |
54 changed files with 5923 additions and 499 deletions
13
.env.example
13
.env.example
|
|
@ -21,10 +21,12 @@ DATA_DIR=./data
|
||||||
# IP this machine advertises to the coordinator (must be reachable from coordinator host)
|
# IP this machine advertises to the coordinator (must be reachable from coordinator host)
|
||||||
# CF_ORCH_ADVERTISE_HOST=10.1.10.71
|
# CF_ORCH_ADVERTISE_HOST=10.1.10.71
|
||||||
|
|
||||||
# CF-core hosted coordinator (managed cloud GPU inference — Paid+ tier)
|
# GPU inference server (cf-orch coordinator for recipe scan, LLM generation, etc.)
|
||||||
# Set CF_ORCH_URL to use a hosted cf-orch coordinator instead of self-hosting.
|
# GPU_SERVER_URL: set to your local cf-orch coordinator (self-hosted rack).
|
||||||
# CF_LICENSE_KEY is read automatically by CFOrchClient for bearer auth.
|
# CF_ORCH_URL is the backward-compat alias — both are honoured.
|
||||||
# CF_ORCH_URL=https://orch.circuitforge.tech
|
# Paid+ default: when CF_LICENSE_KEY is present and neither URL is set,
|
||||||
|
# the app automatically points to https://orch.circuitforge.tech.
|
||||||
|
# GPU_SERVER_URL=http://10.1.10.71:7700
|
||||||
# CF_LICENSE_KEY=CFG-KIWI-xxxx-xxxx-xxxx
|
# CF_LICENSE_KEY=CFG-KIWI-xxxx-xxxx-xxxx
|
||||||
|
|
||||||
# LLM backend — env-var auto-config (no llm.yaml needed for bare-metal users)
|
# LLM backend — env-var auto-config (no llm.yaml needed for bare-metal users)
|
||||||
|
|
@ -57,6 +59,9 @@ CF_APP_NAME=kiwi
|
||||||
# Unset = auto-detect: true if CLOUD_MODE or circuitforge_orch is installed (paid+ local).
|
# Unset = auto-detect: true if CLOUD_MODE or circuitforge_orch is installed (paid+ local).
|
||||||
# Set false to force LocalScheduler even when cf-orch is present.
|
# Set false to force LocalScheduler even when cf-orch is present.
|
||||||
# USE_ORCH_SCHEDULER=false
|
# USE_ORCH_SCHEDULER=false
|
||||||
|
# GPU_SERVER_URL: cf-orch coordinator endpoint. Required for recipe scan (cf-docuvision)
|
||||||
|
# and LLM features on a self-hosted rack. CF_ORCH_URL is the backward-compat alias.
|
||||||
|
# GPU_SERVER_URL=http://10.1.10.71:7700
|
||||||
|
|
||||||
# Cloud mode (set in compose.cloud.yml; also set here for reference)
|
# Cloud mode (set in compose.cloud.yml; also set here for reference)
|
||||||
# CLOUD_DATA_ROOT=/devl/kiwi-cloud-data
|
# CLOUD_DATA_ROOT=/devl/kiwi-cloud-data
|
||||||
|
|
|
||||||
142
README.md
142
README.md
|
|
@ -1,80 +1,118 @@
|
||||||
# 🥝 Kiwi
|
<!-- Logo coming soon — replace docs/kiwi-logo.svg when final icon ships -->
|
||||||
|
<div align="center">
|
||||||
|
<img src="docs/kiwi-logo.svg" alt="Kiwi logo" width="96" height="96" />
|
||||||
|
|
||||||
> *Part of the CircuitForge LLC "AI for the tasks the system made hard on purpose" suite.*
|
# Kiwi
|
||||||
|
|
||||||
**Pantry tracking and leftover recipe suggestions.**
|
**Pantry tracking and recipe suggestions — with or without an LLM.**
|
||||||
|
|
||||||
Scan barcodes, photograph receipts, and get recipe ideas based on what you already have — before it expires.
|
[](#license)
|
||||||
|
[](https://git.opensourcesolarpunk.com/Circuit-Forge/kiwi/actions)
|
||||||
|
[](https://git.opensourcesolarpunk.com/Circuit-Forge/kiwi/releases)
|
||||||
|
|
||||||
**LLM support is optional.** Inventory tracking, barcode scanning, expiry alerts, CSV export, and receipt upload all work without any LLM configured. AI features (receipt OCR, recipe suggestions, meal planning) activate when a backend is available and are BYOK-unlockable at any tier.
|
[Documentation](https://docs.circuitforge.tech/kiwi) · [Live demo](https://menagerie.circuitforge.tech/kiwi) · [circuitforge.tech](https://circuitforge.tech)
|
||||||
|
|
||||||
**Status:** Beta · CircuitForge LLC
|
*Part of the CircuitForge LLC suite — "AI for the tasks the system made hard on purpose."*
|
||||||
|
</div>
|
||||||
**[Documentation](https://docs.circuitforge.tech/kiwi/)** · [circuitforge.tech](https://circuitforge.tech)
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## What it does
|
> **The LLM is optional.** Barcode scanning, receipt upload, expiry alerts, the full 200k+ recipe browser, and CSV export all work with zero LLM configured. Recipe suggestions and receipt OCR activate when a backend is available, and are BYOK-unlockable at any tier. You are never forced to send your data anywhere.
|
||||||
|
|
||||||
- **Inventory tracking** — add items by barcode scan, receipt upload, or manually
|
---
|
||||||
- **Expiry alerts** — know what's about to go bad
|
|
||||||
- **Recipe browser** — browse the full recipe corpus by cuisine, meal type, dietary preference, or main ingredient; pantry match percentage shown inline (Free)
|
|
||||||
- **Saved recipes** — bookmark any recipe with notes, a 0–5 star rating, and free-text style tags (Free); organize into named collections (Paid)
|
|
||||||
- **Receipt OCR** — extract line items from receipt photos automatically (Paid tier, BYOK-unlockable)
|
|
||||||
- **Recipe suggestions** — four levels from pantry-match to full LLM generation (Paid tier, BYOK-unlockable)
|
|
||||||
- **Style auto-classifier** — LLM suggests style tags (comforting, hands-off, quick, etc.) for saved recipes (Paid tier, BYOK-unlockable)
|
|
||||||
- **Leftover mode** — prioritize nearly-expired items in recipe ranking (Free, 5/day; unlimited at Paid+)
|
|
||||||
- **LLM backend config** — configure inference via `circuitforge-core` env-var system; BYOK unlocks Paid AI features at any tier
|
|
||||||
- **Feedback FAB** — in-app feedback button; status probed on load, hidden if CF feedback endpoint unreachable
|
|
||||||
|
|
||||||
## Stack
|
## What Kiwi does
|
||||||
|
|
||||||
- **Frontend:** Vue 3 SPA (Vite + TypeScript)
|
| Feature | Notes |
|
||||||
- **Backend:** FastAPI + SQLite (via `circuitforge-core`)
|
|---|---|
|
||||||
- **Auth:** CF session cookie → Directus JWT (cloud mode)
|
| **Inventory tracking** | Add items by barcode scan, receipt upload, or manually |
|
||||||
- **Licensing:** Heimdall (free tier auto-provisioned at signup)
|
| **Expiry alerts** | Know what is about to go bad before it does |
|
||||||
|
| **Recipe browser** | 200k+ recipes — filter by cuisine, meal type, dietary preference, or main ingredient; pantry match percentage shown inline |
|
||||||
|
| **Leftover mode** | Prioritizes nearly-expired items in recipe ranking (5/day free, unlimited at Paid+) |
|
||||||
|
| **Recipe suggestions** | Four levels: direct corpus match, substitution/swap, cuisine-style adapter, full LLM generation |
|
||||||
|
| **Meal planning** | Plan meals for the week; pull from saved recipes or suggestions |
|
||||||
|
| **Saved recipes** | Bookmark any recipe with notes, 0-5 star rating, and free-text style tags; organize into named collections (Paid) |
|
||||||
|
| **Receipt OCR** | Extract line items from receipt photos automatically |
|
||||||
|
| **Dietary profiles** | Vegan, gluten-free, diabetic, and other constraints respected throughout |
|
||||||
|
| **Style auto-classifier** | LLM suggests style tags (comforting, hands-off, quick, etc.) for saved recipes |
|
||||||
|
| **Community feed** | Browse and share recipes with other Kiwi users |
|
||||||
|
| **CSV export** | Full pantry export, always available, no tier gate |
|
||||||
|
|
||||||
## Running locally
|
---
|
||||||
|
|
||||||
|
## Quick start
|
||||||
|
|
||||||
|
**One-line install (self-hosted, Docker required):**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
bash <(curl -fsSL https://git.opensourcesolarpunk.com/Circuit-Forge/kiwi/raw/branch/main/install.sh)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Or clone and run manually:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/kiwi.git
|
||||||
|
cd kiwi
|
||||||
cp .env.example .env
|
cp .env.example .env
|
||||||
./manage.sh build
|
./manage.sh build
|
||||||
./manage.sh start
|
./manage.sh start
|
||||||
# Web: http://localhost:8511
|
# Web: http://localhost:8511
|
||||||
# API: http://localhost:8512
|
# API: http://localhost:8512
|
||||||
```
|
```
|
||||||
|
|
||||||
## Cloud instance
|
**Live cloud instance** (free account required):
|
||||||
|
[menagerie.circuitforge.tech/kiwi](https://menagerie.circuitforge.tech/kiwi)
|
||||||
|
|
||||||
```bash
|
Full setup and configuration guide: [docs.circuitforge.tech/kiwi](https://docs.circuitforge.tech/kiwi)
|
||||||
./manage.sh cloud-build
|
|
||||||
./manage.sh cloud-start
|
---
|
||||||
# Served at menagerie.circuitforge.tech/kiwi (JWT-gated)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Tiers
|
## Tiers
|
||||||
|
|
||||||
| Feature | Free | Paid | Premium |
|
| Feature | Free | Paid | Premium |
|
||||||
|---------|------|------|---------|
|
|---|:---:|:---:|:---:|
|
||||||
| Inventory CRUD | ✓ | ✓ | ✓ |
|
| Inventory CRUD | Yes | Yes | Yes |
|
||||||
| Barcode scan | ✓ | ✓ | ✓ |
|
| Barcode scan | Yes | Yes | Yes |
|
||||||
| Receipt upload | ✓ | ✓ | ✓ |
|
| Receipt upload | Yes | Yes | Yes |
|
||||||
| Expiry alerts | ✓ | ✓ | ✓ |
|
| Expiry alerts | Yes | Yes | Yes |
|
||||||
| CSV export | ✓ | ✓ | ✓ |
|
| CSV export | Yes | Yes | Yes |
|
||||||
| Recipe browser (domain/category) | ✓ | ✓ | ✓ |
|
| Recipe browser (200k+ recipes) | Yes | Yes | Yes |
|
||||||
| Save recipes + notes + star rating | ✓ | ✓ | ✓ |
|
| Save recipes + notes + star rating | Yes | Yes | Yes |
|
||||||
| Style tags (manual, free-text) | ✓ | ✓ | ✓ |
|
| Style tags (manual, free-text) | Yes | Yes | Yes |
|
||||||
| Receipt OCR | BYOK | ✓ | ✓ |
|
| Leftover mode (5/day) | Yes | Yes | Yes |
|
||||||
| Recipe suggestions (L1–L4) | BYOK | ✓ | ✓ |
|
| Receipt OCR | BYOK | Yes | Yes |
|
||||||
| Named recipe collections | — | ✓ | ✓ |
|
| Recipe suggestions (L1–L4) | BYOK | Yes | Yes |
|
||||||
| LLM style auto-classifier | — | BYOK | ✓ |
|
| Named recipe collections | — | Yes | Yes |
|
||||||
| Meal planning | — | ✓ | ✓ |
|
| LLM style auto-classifier | — | BYOK | Yes |
|
||||||
| Multi-household | — | — | ✓ |
|
| Meal planning | — | Yes | Yes |
|
||||||
| Leftover mode (5/day) | ✓ | ✓ | ✓ |
|
| Multi-household | — | — | Yes |
|
||||||
|
|
||||||
BYOK = bring your own LLM backend (configure `~/.config/circuitforge/llm.yaml`)
|
**BYOK** = bring your own LLM backend. Configure `~/.config/circuitforge/llm.yaml` to unlock AI features at any tier without a paid subscription.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Stack
|
||||||
|
|
||||||
|
- **Frontend:** Vue 3 SPA (Vite + TypeScript), served on port 8511
|
||||||
|
- **Backend:** FastAPI + SQLite via `circuitforge-core`, API on port 8512
|
||||||
|
- **Auth:** CircuitForge session cookie (cloud mode); local mode requires no account
|
||||||
|
- **Licensing:** Heimdall — free tier auto-provisioned at signup
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Forgejo-primary
|
||||||
|
|
||||||
|
Kiwi is developed and maintained on Forgejo at [git.opensourcesolarpunk.com/Circuit-Forge/kiwi](https://git.opensourcesolarpunk.com/Circuit-Forge/kiwi). GitHub and Codeberg are read-only mirrors. File issues and submit pull requests on Forgejo.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
Discovery/pipeline layer: MIT
|
Kiwi uses a split license:
|
||||||
AI features: BSL 1.1 (free for personal non-commercial self-hosting)
|
|
||||||
|
- **Discovery and inventory pipeline** (barcode scan, expiry tracking, pantry CRUD, CSV export, recipe browser): [MIT](LICENSE-MIT)
|
||||||
|
- **AI features** (receipt OCR, LLM recipe suggestions, style auto-classifier): [BSL 1.1](LICENSE-BSL) — free for personal non-commercial self-hosting; commercial use or SaaS re-hosting requires a paid license. Converts to MIT after 4 years.
|
||||||
|
|
||||||
|
Humans own design, architecture, code review, testing, and verification. LLMs are part of our development workflow. [Our positions on LLM use →](https://circuitforge.tech/positions)
|
||||||
|
|
||||||
|
Privacy · Safety · Accessibility — co-equal, non-negotiable across all CircuitForge products.
|
||||||
|
|
|
||||||
332
app/api/endpoints/activitypub.py
Normal file
332
app/api/endpoints/activitypub.py
Normal file
|
|
@ -0,0 +1,332 @@
|
||||||
|
# app/api/endpoints/activitypub.py
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# ActivityPub endpoints for Kiwi instances:
|
||||||
|
# GET /.well-known/webfinger — WebFinger JRD
|
||||||
|
# GET /ap/actor — Instance actor document
|
||||||
|
# POST /ap/actor/inbox — Incoming activities
|
||||||
|
# GET /ap/outbox — Outgoing activities (OrderedCollection)
|
||||||
|
# GET /ap/posts/{slug} — Individual AP Note
|
||||||
|
# GET /ap/followers — Followers collection (count only)
|
||||||
|
# GET /ap/following — Following collection (empty stub)
|
||||||
|
#
|
||||||
|
# All endpoints are no-ops / 404 when AP_ENABLED=false or actor not loaded.
|
||||||
|
# The WebFinger and well-known routes are mounted at the root app level (not
|
||||||
|
# under /api/v1) — see main.py.
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from fastapi import APIRouter, HTTPException, Request, Response
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
|
||||||
|
from app.core.config import settings
|
||||||
|
from app.services.ap.keys import get_actor
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ── Two routers: one for well-known (root mount), one for /ap prefix ─────────
|
||||||
|
|
||||||
|
webfinger_router = APIRouter(tags=["activitypub"])
|
||||||
|
ap_router = APIRouter(prefix="/ap", tags=["activitypub"])
|
||||||
|
|
||||||
|
_AP_CONTENT_TYPE = "application/activity+json"
|
||||||
|
_JRD_CONTENT_TYPE = "application/jrd+json"
|
||||||
|
|
||||||
|
|
||||||
|
def _actor_required():
|
||||||
|
actor = get_actor()
|
||||||
|
if actor is None:
|
||||||
|
raise HTTPException(status_code=404, detail="ActivityPub not enabled on this instance.")
|
||||||
|
return actor
|
||||||
|
|
||||||
|
|
||||||
|
# ── WebFinger ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@webfinger_router.get("/.well-known/webfinger")
|
||||||
|
async def webfinger(resource: str | None = None):
|
||||||
|
actor = get_actor()
|
||||||
|
if actor is None:
|
||||||
|
raise HTTPException(status_code=404, detail="ActivityPub not enabled.")
|
||||||
|
|
||||||
|
expected = f"acct:kiwi@{settings.AP_HOST}"
|
||||||
|
if resource and resource != expected:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Resource {resource!r} not found.")
|
||||||
|
|
||||||
|
jrd = {
|
||||||
|
"subject": expected,
|
||||||
|
"links": [
|
||||||
|
{
|
||||||
|
"rel": "self",
|
||||||
|
"type": _AP_CONTENT_TYPE,
|
||||||
|
"href": actor.actor_id,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
return Response(
|
||||||
|
content=json.dumps(jrd),
|
||||||
|
media_type=_JRD_CONTENT_TYPE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Actor ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@ap_router.get("/actor")
|
||||||
|
async def get_actor_doc():
|
||||||
|
actor = _actor_required()
|
||||||
|
return Response(
|
||||||
|
content=json.dumps(actor.to_ap_dict()),
|
||||||
|
media_type=_AP_CONTENT_TYPE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Inbox (mounted via make_inbox_router below) ───────────────────────────────
|
||||||
|
|
||||||
|
async def _on_follow(activity: dict, headers: dict) -> None:
|
||||||
|
"""Accept Follow: add to ap_followers, send Accept(Follow) back."""
|
||||||
|
actor_url = activity.get("actor", "")
|
||||||
|
if not actor_url:
|
||||||
|
return
|
||||||
|
|
||||||
|
from app.db.store import Store
|
||||||
|
from app.core.config import settings as _settings
|
||||||
|
db_path = _settings.DB_PATH
|
||||||
|
|
||||||
|
inbox_url, shared_inbox = await asyncio.to_thread(_resolve_inbox, actor_url)
|
||||||
|
if inbox_url is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
try:
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT OR REPLACE INTO ap_followers
|
||||||
|
(actor_id, inbox_url, shared_inbox, followed_at, active)
|
||||||
|
VALUES (?, ?, ?, ?, 1)""",
|
||||||
|
(actor_url, inbox_url, shared_inbox, datetime.now(timezone.utc).isoformat()),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
actor = get_actor()
|
||||||
|
if actor is None:
|
||||||
|
return
|
||||||
|
accept = {
|
||||||
|
"@context": "https://www.w3.org/ns/activitystreams",
|
||||||
|
"id": f"{actor.actor_id}/accepts/{activity.get('id', 'unknown')}",
|
||||||
|
"type": "Accept",
|
||||||
|
"actor": actor.actor_id,
|
||||||
|
"object": activity,
|
||||||
|
}
|
||||||
|
from circuitforge_core.activitypub import deliver_activity
|
||||||
|
await asyncio.to_thread(deliver_activity, accept, inbox_url, actor, 10.0)
|
||||||
|
|
||||||
|
|
||||||
|
async def _on_undo(activity: dict, headers: dict) -> None:
|
||||||
|
"""Handle Undo(Follow): deactivate the follower row."""
|
||||||
|
inner = activity.get("object", {})
|
||||||
|
if isinstance(inner, dict) and inner.get("type") == "Follow":
|
||||||
|
actor_url = activity.get("actor", "")
|
||||||
|
if actor_url:
|
||||||
|
import sqlite3
|
||||||
|
conn = sqlite3.connect(str(settings.DB_PATH))
|
||||||
|
try:
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE ap_followers SET active = 0 WHERE actor_id = ?", (actor_url,)
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
async def _dedup_activity(activity_id: str | None) -> bool:
|
||||||
|
"""Return True (already seen) if activity_id is in ap_received; otherwise insert it."""
|
||||||
|
if not activity_id:
|
||||||
|
return False
|
||||||
|
import sqlite3
|
||||||
|
conn = sqlite3.connect(str(settings.DB_PATH))
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO ap_received (activity_id) VALUES (?)", (activity_id,)
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
return False
|
||||||
|
except sqlite3.IntegrityError:
|
||||||
|
return True
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _build_inbox_router():
|
||||||
|
from circuitforge_core.activitypub.inbox import make_inbox_router
|
||||||
|
|
||||||
|
async def on_follow(activity: dict, headers: dict) -> None:
|
||||||
|
if await _dedup_activity(activity.get("id")):
|
||||||
|
return
|
||||||
|
await _on_follow(activity, headers)
|
||||||
|
|
||||||
|
async def on_undo(activity: dict, headers: dict) -> None:
|
||||||
|
if await _dedup_activity(activity.get("id")):
|
||||||
|
return
|
||||||
|
await _on_undo(activity, headers)
|
||||||
|
|
||||||
|
return make_inbox_router(
|
||||||
|
handlers={"Follow": on_follow, "Undo": on_undo},
|
||||||
|
verify_key_fetcher=None, # Signature verification enabled in prod when actor is loaded
|
||||||
|
path="/inbox",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Mount inbox at /ap/actor/inbox (AP spec: inbox is a sub-resource of the actor)
|
||||||
|
try:
|
||||||
|
_inbox_sub = _build_inbox_router()
|
||||||
|
ap_router.include_router(_inbox_sub, prefix="/actor")
|
||||||
|
except Exception as _e:
|
||||||
|
logger.warning("AP inbox router not available: %s", _e)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Outbox ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@ap_router.get("/outbox")
|
||||||
|
async def get_outbox(page: int | None = None, request: Request = None):
|
||||||
|
actor = _actor_required()
|
||||||
|
from app.api.endpoints.community import _get_community_store
|
||||||
|
store = _get_community_store()
|
||||||
|
base = f"https://{settings.AP_HOST}"
|
||||||
|
|
||||||
|
if store is None:
|
||||||
|
collection = {
|
||||||
|
"@context": "https://www.w3.org/ns/activitystreams",
|
||||||
|
"id": f"{actor.outbox_url}",
|
||||||
|
"type": "OrderedCollection",
|
||||||
|
"totalItems": 0,
|
||||||
|
"orderedItems": [],
|
||||||
|
}
|
||||||
|
return Response(content=json.dumps(collection), media_type=_AP_CONTENT_TYPE)
|
||||||
|
|
||||||
|
PAGE_SIZE = 20
|
||||||
|
offset = ((page or 1) - 1) * PAGE_SIZE
|
||||||
|
posts = await asyncio.to_thread(store.list_posts, limit=PAGE_SIZE, offset=offset)
|
||||||
|
items = [_post_to_ap_note(p, actor, base) for p in posts]
|
||||||
|
|
||||||
|
collection = {
|
||||||
|
"@context": "https://www.w3.org/ns/activitystreams",
|
||||||
|
"id": actor.outbox_url + (f"?page={page}" if page else ""),
|
||||||
|
"type": "OrderedCollectionPage" if page else "OrderedCollection",
|
||||||
|
"orderedItems": items,
|
||||||
|
}
|
||||||
|
return Response(content=json.dumps(collection), media_type=_AP_CONTENT_TYPE)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Individual post ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@ap_router.get("/posts/{slug}")
|
||||||
|
async def get_ap_post(slug: str):
|
||||||
|
actor = _actor_required()
|
||||||
|
from app.api.endpoints.community import _get_community_store
|
||||||
|
store = _get_community_store()
|
||||||
|
if store is None:
|
||||||
|
raise HTTPException(status_code=404, detail="Community DB not available.")
|
||||||
|
|
||||||
|
post = await asyncio.to_thread(store.get_post_by_slug, slug)
|
||||||
|
if post is None:
|
||||||
|
raise HTTPException(status_code=404, detail="Post not found.")
|
||||||
|
|
||||||
|
base = f"https://{settings.AP_HOST}"
|
||||||
|
note = _post_to_ap_note(post, actor, base)
|
||||||
|
return Response(content=json.dumps(note), media_type=_AP_CONTENT_TYPE)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Followers / Following ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@ap_router.get("/followers")
|
||||||
|
async def get_followers():
|
||||||
|
actor = _actor_required()
|
||||||
|
import sqlite3
|
||||||
|
count = 0
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(str(settings.DB_PATH))
|
||||||
|
row = conn.execute("SELECT COUNT(*) FROM ap_followers WHERE active = 1").fetchone()
|
||||||
|
conn.close()
|
||||||
|
count = row[0] if row else 0
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
collection = {
|
||||||
|
"@context": "https://www.w3.org/ns/activitystreams",
|
||||||
|
"id": f"{actor.actor_id}/followers",
|
||||||
|
"type": "OrderedCollection",
|
||||||
|
"totalItems": count,
|
||||||
|
}
|
||||||
|
return Response(content=json.dumps(collection), media_type=_AP_CONTENT_TYPE)
|
||||||
|
|
||||||
|
|
||||||
|
@ap_router.get("/following")
|
||||||
|
async def get_following():
|
||||||
|
actor = _actor_required()
|
||||||
|
collection = {
|
||||||
|
"@context": "https://www.w3.org/ns/activitystreams",
|
||||||
|
"id": f"{actor.actor_id}/following",
|
||||||
|
"type": "OrderedCollection",
|
||||||
|
"totalItems": 0,
|
||||||
|
"orderedItems": [],
|
||||||
|
}
|
||||||
|
return Response(content=json.dumps(collection), media_type=_AP_CONTENT_TYPE)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _post_to_ap_note(post, actor, base_url: str) -> dict:
|
||||||
|
from circuitforge_core.activitypub import make_note
|
||||||
|
from app.services.community.ap_compat import _build_content
|
||||||
|
|
||||||
|
diet_tags: list[str] = list(getattr(post, "dietary_tags", []) or [])
|
||||||
|
hashtags = [{"type": "Hashtag", "name": "#Kiwi", "href": f"{base_url}/ap/tags/kiwi"}]
|
||||||
|
for tag in diet_tags[:4]:
|
||||||
|
ht = "".join(w.capitalize() for w in tag.replace("-", " ").split())
|
||||||
|
hashtags.append({"type": "Hashtag", "name": f"#{ht}"})
|
||||||
|
|
||||||
|
content = _build_content(
|
||||||
|
{
|
||||||
|
"title": post.title,
|
||||||
|
"description": getattr(post, "description", None),
|
||||||
|
"outcome_notes": getattr(post, "outcome_notes", None),
|
||||||
|
"dietary_tags": diet_tags,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
published = post.published
|
||||||
|
note = make_note(
|
||||||
|
actor_id=actor.actor_id,
|
||||||
|
content=content,
|
||||||
|
tag=hashtags,
|
||||||
|
published=published if isinstance(published, datetime) else None,
|
||||||
|
)
|
||||||
|
note["id"] = f"{base_url}/ap/posts/{post.slug}"
|
||||||
|
return note
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_inbox(actor_url: str) -> tuple[str | None, str | None]:
|
||||||
|
"""Fetch an AP actor document and extract inbox + sharedInbox URLs."""
|
||||||
|
try:
|
||||||
|
import httpx
|
||||||
|
resp = httpx.get(
|
||||||
|
actor_url,
|
||||||
|
headers={"Accept": "application/activity+json"},
|
||||||
|
timeout=8.0,
|
||||||
|
follow_redirects=True,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
doc = resp.json()
|
||||||
|
inbox = doc.get("inbox")
|
||||||
|
shared = doc.get("endpoints", {}).get("sharedInbox")
|
||||||
|
return inbox, shared
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("Could not resolve actor %s: %s", actor_url, exc)
|
||||||
|
return None, None
|
||||||
|
|
@ -167,6 +167,54 @@ def _validate_publish_body(body: dict) -> None:
|
||||||
raise HTTPException(status_code=422, detail="photo_url must be an https:// URL.")
|
raise HTTPException(status_code=422, detail="photo_url must be an https:// URL.")
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/check-similar")
|
||||||
|
async def check_similar(body: dict, session: CloudUser = Depends(get_session)):
|
||||||
|
"""Pre-submission dedup check: return similar existing posts for the given title/recipe_id.
|
||||||
|
|
||||||
|
Safe to call with no community store configured — returns empty list rather than 503.
|
||||||
|
"""
|
||||||
|
store = _get_community_store()
|
||||||
|
if store is None:
|
||||||
|
return {"similar_posts": []}
|
||||||
|
|
||||||
|
title = (body.get("title") or "").strip()
|
||||||
|
recipe_id = body.get("recipe_id")
|
||||||
|
post_type = body.get("post_type")
|
||||||
|
|
||||||
|
if not title:
|
||||||
|
return {"similar_posts": []}
|
||||||
|
|
||||||
|
candidates = await asyncio.to_thread(
|
||||||
|
store.search_similar_posts,
|
||||||
|
title,
|
||||||
|
recipe_id,
|
||||||
|
post_type,
|
||||||
|
8,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not candidates:
|
||||||
|
return {"similar_posts": []}
|
||||||
|
|
||||||
|
from app.services.community.dedup import build_similar_post_result, fetch_recipe_ingredients
|
||||||
|
incoming_ingredients = await asyncio.to_thread(
|
||||||
|
fetch_recipe_ingredients, session.db, recipe_id
|
||||||
|
)
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for post in candidates:
|
||||||
|
result = await asyncio.to_thread(
|
||||||
|
build_similar_post_result,
|
||||||
|
post,
|
||||||
|
recipe_id,
|
||||||
|
incoming_ingredients,
|
||||||
|
session.db,
|
||||||
|
)
|
||||||
|
if result["similarity_tier"] != "different":
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
|
return {"similar_posts": results[:5]}
|
||||||
|
|
||||||
|
|
||||||
@router.post("/posts", status_code=201)
|
@router.post("/posts", status_code=201)
|
||||||
async def publish_post(body: dict, session: CloudUser = Depends(get_session)):
|
async def publish_post(body: dict, session: CloudUser = Depends(get_session)):
|
||||||
from app.tiers import can_use
|
from app.tiers import can_use
|
||||||
|
|
@ -214,6 +262,8 @@ async def publish_post(body: dict, session: CloudUser = Depends(get_session)):
|
||||||
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||||
slug = f"kiwi-{_post_type_prefix(post_type)}-{pseudonym.lower().replace(' ', '')}-{today}-{slug_title}"[:120]
|
slug = f"kiwi-{_post_type_prefix(post_type)}-{pseudonym.lower().replace(' ', '')}-{today}-{slug_title}"[:120]
|
||||||
|
|
||||||
|
similar_to_ref = body.get("similar_to_ref") or None
|
||||||
|
|
||||||
from circuitforge_core.community.models import CommunityPost
|
from circuitforge_core.community.models import CommunityPost
|
||||||
post = CommunityPost(
|
post = CommunityPost(
|
||||||
slug=slug,
|
slug=slug,
|
||||||
|
|
@ -241,6 +291,7 @@ async def publish_post(body: dict, session: CloudUser = Depends(get_session)):
|
||||||
fat_pct=snapshot.fat_pct,
|
fat_pct=snapshot.fat_pct,
|
||||||
protein_pct=snapshot.protein_pct,
|
protein_pct=snapshot.protein_pct,
|
||||||
moisture_pct=snapshot.moisture_pct,
|
moisture_pct=snapshot.moisture_pct,
|
||||||
|
similar_to_ref=similar_to_ref,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -250,7 +301,41 @@ async def publish_post(body: dict, session: CloudUser = Depends(get_session)):
|
||||||
status_code=409,
|
status_code=409,
|
||||||
detail="A post with this title already exists today. Try a different title.",
|
detail="A post with this title already exists today. Try a different title.",
|
||||||
) from exc
|
) from exc
|
||||||
return _post_to_dict(inserted)
|
|
||||||
|
post_dict = _post_to_dict(inserted)
|
||||||
|
|
||||||
|
# AP delivery + Mastodon post (Paid tier, AP_ENABLED, opted-in)
|
||||||
|
from app.core.config import settings as _settings
|
||||||
|
if _settings.AP_ENABLED and session.tier in ("paid", "premium", "ultra"):
|
||||||
|
from circuitforge_core.activitypub import make_create, make_note, PUBLIC
|
||||||
|
from app.services.ap.keys import get_actor
|
||||||
|
from app.services.ap.delivery import deliver_to_followers
|
||||||
|
_ap_actor = get_actor()
|
||||||
|
if _ap_actor is not None:
|
||||||
|
base = f"https://{_settings.AP_HOST}"
|
||||||
|
from app.api.endpoints.activitypub import _post_to_ap_note
|
||||||
|
_note = _post_to_ap_note(inserted, _ap_actor, base)
|
||||||
|
_activity = make_create(_ap_actor, _note)
|
||||||
|
asyncio.create_task(
|
||||||
|
asyncio.to_thread(
|
||||||
|
deliver_to_followers, inserted.slug, _activity, session.db
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mastodon post if user has connected account and opted in
|
||||||
|
if body.get("post_to_mastodon"):
|
||||||
|
from app.services.ap.mastodon import build_post_content, get_token, post_status
|
||||||
|
_masto = await asyncio.to_thread(
|
||||||
|
get_token, session.db, session.user_id, _settings.AP_TOKEN_ENCRYPTION_KEY
|
||||||
|
)
|
||||||
|
if _masto:
|
||||||
|
_masto_url, _masto_token = _masto
|
||||||
|
_content = build_post_content(post_dict)
|
||||||
|
asyncio.create_task(
|
||||||
|
asyncio.to_thread(post_status, _masto_url, _masto_token, _content)
|
||||||
|
)
|
||||||
|
|
||||||
|
return post_dict
|
||||||
|
|
||||||
|
|
||||||
@router.delete("/posts/{slug}", status_code=204)
|
@router.delete("/posts/{slug}", status_code=204)
|
||||||
|
|
@ -351,6 +436,7 @@ def _post_to_dict(post) -> dict:
|
||||||
"fat_pct": post.fat_pct,
|
"fat_pct": post.fat_pct,
|
||||||
"protein_pct": post.protein_pct,
|
"protein_pct": post.protein_pct,
|
||||||
"moisture_pct": post.moisture_pct,
|
"moisture_pct": post.moisture_pct,
|
||||||
|
"similar_to_ref": getattr(post, "similar_to_ref", None),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -478,7 +478,8 @@ async def scan_barcode_image(
|
||||||
from app.services.openfoodfacts import OpenFoodFactsService
|
from app.services.openfoodfacts import OpenFoodFactsService
|
||||||
from app.services.expiration_predictor import ExpirationPredictor
|
from app.services.expiration_predictor import ExpirationPredictor
|
||||||
|
|
||||||
barcodes = await asyncio.to_thread(BarcodeScanner().scan_image, temp_file)
|
image_bytes = temp_file.read_bytes()
|
||||||
|
barcodes = await asyncio.to_thread(BarcodeScanner().scan_from_bytes, image_bytes)
|
||||||
if not barcodes:
|
if not barcodes:
|
||||||
return BarcodeScanResponse(
|
return BarcodeScanResponse(
|
||||||
success=False, barcodes_found=0, results=[],
|
success=False, barcodes_found=0, results=[],
|
||||||
|
|
@ -500,9 +501,10 @@ async def scan_barcode_image(
|
||||||
product_info = await off.lookup_product(code)
|
product_info = await off.lookup_product(code)
|
||||||
product_source = "openfoodfacts"
|
product_source = "openfoodfacts"
|
||||||
|
|
||||||
|
db_product = None
|
||||||
inventory_item = None
|
inventory_item = None
|
||||||
if product_info and auto_add_to_inventory:
|
if product_info:
|
||||||
product, _ = await asyncio.to_thread(
|
db_product, _ = await asyncio.to_thread(
|
||||||
store.get_or_create_product,
|
store.get_or_create_product,
|
||||||
product_info.get("name", code),
|
product_info.get("name", code),
|
||||||
code,
|
code,
|
||||||
|
|
@ -512,29 +514,30 @@ async def scan_barcode_image(
|
||||||
source=product_source,
|
source=product_source,
|
||||||
source_data=product_info,
|
source_data=product_info,
|
||||||
)
|
)
|
||||||
exp = predictor.predict_expiration(
|
if auto_add_to_inventory:
|
||||||
product_info.get("category", ""),
|
exp = predictor.predict_expiration(
|
||||||
location,
|
product_info.get("category", ""),
|
||||||
product_name=product_info.get("name", code),
|
location,
|
||||||
tier=session.tier,
|
product_name=product_info.get("name", code),
|
||||||
has_byok=session.has_byok,
|
tier=session.tier,
|
||||||
)
|
has_byok=session.has_byok,
|
||||||
resolved_qty = product_info.get("pack_quantity") or quantity
|
)
|
||||||
resolved_unit = product_info.get("pack_unit") or "count"
|
resolved_qty = product_info.get("pack_quantity") or quantity
|
||||||
inventory_item = await asyncio.to_thread(
|
resolved_unit = product_info.get("pack_unit") or "count"
|
||||||
store.add_inventory_item,
|
inventory_item = await asyncio.to_thread(
|
||||||
product["id"], location,
|
store.add_inventory_item,
|
||||||
quantity=resolved_qty,
|
db_product["id"], location,
|
||||||
unit=resolved_unit,
|
quantity=resolved_qty,
|
||||||
expiration_date=str(exp) if exp else None,
|
unit=resolved_unit,
|
||||||
source="barcode_scan",
|
expiration_date=str(exp) if exp else None,
|
||||||
)
|
source="barcode_scan",
|
||||||
product_found = product_info is not None
|
)
|
||||||
|
product_found = db_product is not None
|
||||||
needs_capture = not product_found and has_visual_capture
|
needs_capture = not product_found and has_visual_capture
|
||||||
results.append({
|
results.append({
|
||||||
"barcode": code,
|
"barcode": code,
|
||||||
"barcode_type": bc.get("type", "unknown"),
|
"barcode_type": bc.get("type", "unknown"),
|
||||||
"product": ProductResponse.model_validate(product_info) if product_info else None,
|
"product": ProductResponse.model_validate(db_product) if db_product else None,
|
||||||
"inventory_item": InventoryItemResponse.model_validate(inventory_item) if inventory_item else None,
|
"inventory_item": InventoryItemResponse.model_validate(inventory_item) if inventory_item else None,
|
||||||
"added_to_inventory": inventory_item is not None,
|
"added_to_inventory": inventory_item is not None,
|
||||||
"needs_manual_entry": not product_found and not needs_capture,
|
"needs_manual_entry": not product_found and not needs_capture,
|
||||||
|
|
|
||||||
133
app/api/endpoints/mastodon_oauth.py
Normal file
133
app/api/endpoints/mastodon_oauth.py
Normal file
|
|
@ -0,0 +1,133 @@
|
||||||
|
# app/api/endpoints/mastodon_oauth.py
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Mastodon OAuth flow endpoints:
|
||||||
|
# POST /social/mastodon/connect — Start OAuth (dynamic app registration)
|
||||||
|
# GET /social/mastodon/callback — OAuth callback, exchange code for token
|
||||||
|
# DELETE /social/mastodon/disconnect — Revoke and remove stored token
|
||||||
|
# GET /social/mastodon/status — Check connection status
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
from fastapi.responses import RedirectResponse
|
||||||
|
|
||||||
|
from app.cloud_session import CloudUser, get_session
|
||||||
|
from app.core.config import settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/social/mastodon", tags=["mastodon"])
|
||||||
|
|
||||||
|
|
||||||
|
def _redirect_uri() -> str:
|
||||||
|
host = settings.AP_HOST or "localhost:8512"
|
||||||
|
return f"https://{host}/api/v1/social/mastodon/callback"
|
||||||
|
|
||||||
|
|
||||||
|
# In-memory pending state: maps state_token → {instance_url, client_id, client_secret, user_id}
|
||||||
|
# A real deployment would persist this in a short-TTL cache or DB.
|
||||||
|
_pending: dict[str, dict] = {}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/connect")
|
||||||
|
async def connect_mastodon(body: dict, session: CloudUser = Depends(get_session)):
|
||||||
|
"""Start the Mastodon OAuth flow.
|
||||||
|
|
||||||
|
Body: {"instance_url": "https://mastodon.social"}
|
||||||
|
Returns: {"authorize_url": "..."}
|
||||||
|
"""
|
||||||
|
import secrets
|
||||||
|
from app.services.ap.mastodon import build_authorize_url, register_app
|
||||||
|
|
||||||
|
instance_url = (body.get("instance_url") or "").strip().rstrip("/")
|
||||||
|
if not instance_url.startswith("https://"):
|
||||||
|
raise HTTPException(status_code=422, detail="instance_url must be an https:// URL.")
|
||||||
|
|
||||||
|
redirect_uri = _redirect_uri()
|
||||||
|
try:
|
||||||
|
app_creds = await asyncio.to_thread(register_app, instance_url, redirect_uri)
|
||||||
|
except Exception as exc:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=502, detail=f"Could not register with Mastodon instance: {exc}"
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
state = secrets.token_urlsafe(24)
|
||||||
|
_pending[state] = {
|
||||||
|
"instance_url": instance_url,
|
||||||
|
"client_id": app_creds["client_id"],
|
||||||
|
"client_secret": app_creds["client_secret"],
|
||||||
|
"user_id": session.user_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
authorize_url = build_authorize_url(
|
||||||
|
instance_url=instance_url,
|
||||||
|
client_id=app_creds["client_id"],
|
||||||
|
redirect_uri=redirect_uri + f"?state={state}",
|
||||||
|
)
|
||||||
|
return {"authorize_url": authorize_url, "state": state}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/callback")
|
||||||
|
async def mastodon_callback(code: str | None = None, state: str | None = None):
|
||||||
|
"""OAuth callback. Exchanges auth code for access token and stores it."""
|
||||||
|
if not code or not state:
|
||||||
|
raise HTTPException(status_code=400, detail="Missing code or state parameter.")
|
||||||
|
|
||||||
|
pending = _pending.pop(state, None)
|
||||||
|
if pending is None:
|
||||||
|
raise HTTPException(status_code=400, detail="Unknown or expired OAuth state.")
|
||||||
|
|
||||||
|
from app.services.ap.mastodon import exchange_code, store_token
|
||||||
|
|
||||||
|
redirect_uri = _redirect_uri() + f"?state={state}"
|
||||||
|
try:
|
||||||
|
access_token = await asyncio.to_thread(
|
||||||
|
exchange_code,
|
||||||
|
pending["instance_url"],
|
||||||
|
pending["client_id"],
|
||||||
|
pending["client_secret"],
|
||||||
|
code,
|
||||||
|
redirect_uri,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
raise HTTPException(status_code=502, detail=f"Token exchange failed: {exc}") from exc
|
||||||
|
|
||||||
|
await asyncio.to_thread(
|
||||||
|
store_token,
|
||||||
|
settings.DB_PATH,
|
||||||
|
pending["user_id"],
|
||||||
|
pending["instance_url"],
|
||||||
|
access_token,
|
||||||
|
settings.AP_TOKEN_ENCRYPTION_KEY,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Redirect to frontend settings page after successful connect
|
||||||
|
return RedirectResponse(url="/#/settings?mastodon=connected", status_code=302)
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/disconnect", status_code=204)
|
||||||
|
async def disconnect_mastodon(session: CloudUser = Depends(get_session)):
|
||||||
|
"""Remove the stored Mastodon token."""
|
||||||
|
from app.services.ap.mastodon import delete_token
|
||||||
|
await asyncio.to_thread(delete_token, settings.DB_PATH, session.user_id)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/status")
|
||||||
|
async def mastodon_status(session: CloudUser = Depends(get_session)):
|
||||||
|
"""Return connection status and instance URL (no token value)."""
|
||||||
|
from app.services.ap.mastodon import get_token
|
||||||
|
result = await asyncio.to_thread(
|
||||||
|
get_token,
|
||||||
|
settings.DB_PATH,
|
||||||
|
session.user_id,
|
||||||
|
settings.AP_TOKEN_ENCRYPTION_KEY,
|
||||||
|
)
|
||||||
|
if result is None:
|
||||||
|
return {"connected": False, "instance_url": None}
|
||||||
|
instance_url, _ = result
|
||||||
|
return {"connected": True, "instance_url": instance_url}
|
||||||
|
|
@ -11,6 +11,7 @@ BSL 1.1 -- recipe_scan requires Paid tier or BYOK.
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import json as _json
|
||||||
import logging
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
@ -18,7 +19,7 @@ from typing import Annotated
|
||||||
|
|
||||||
import aiofiles
|
import aiofiles
|
||||||
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
|
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse, StreamingResponse
|
||||||
|
|
||||||
from app.cloud_session import CloudUser, get_session
|
from app.cloud_session import CloudUser, get_session
|
||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
|
|
@ -168,9 +169,15 @@ async def scan_recipe(
|
||||||
)
|
)
|
||||||
raise HTTPException(status_code=422, detail=msg)
|
raise HTTPException(status_code=422, detail=msg)
|
||||||
except RuntimeError as exc:
|
except RuntimeError as exc:
|
||||||
|
msg = str(exc)
|
||||||
|
logger.warning("Recipe scanner unavailable: %s", msg)
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=503,
|
status_code=503,
|
||||||
detail=str(exc),
|
detail=(
|
||||||
|
"The recipe scanner is temporarily unavailable — "
|
||||||
|
"no vision backend could be reached. "
|
||||||
|
"Try again in a few minutes, or contact support if this persists."
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
return _result_to_response(result)
|
return _result_to_response(result)
|
||||||
|
|
@ -184,6 +191,114 @@ async def scan_recipe(
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# ── SSE scan endpoint ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def _scan_recipe_sse(saved_paths: list[Path], pantry_names: list[str]):
|
||||||
|
"""Async generator yielding SSE events for a recipe scan.
|
||||||
|
|
||||||
|
Emits progress events while the vision service allocates and runs, then a
|
||||||
|
final "done" event containing the full recipe payload (same shape as the
|
||||||
|
ScannedRecipeResponse from POST /scan).
|
||||||
|
|
||||||
|
Events:
|
||||||
|
{"status": "allocating", "message": "..."}
|
||||||
|
{"status": "scanning", "message": "..."}
|
||||||
|
{"status": "structuring","message": "..."}
|
||||||
|
{"status": "done", "recipe": {...}}
|
||||||
|
{"status": "error", "message": "..."}
|
||||||
|
"""
|
||||||
|
queue: asyncio.Queue = asyncio.Queue()
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
|
||||||
|
def _run() -> None:
|
||||||
|
def cb(status: str, message: str) -> None:
|
||||||
|
loop.call_soon_threadsafe(queue.put_nowait, {"status": status, "message": message})
|
||||||
|
try:
|
||||||
|
from app.services.recipe.recipe_scanner import RecipeScanner
|
||||||
|
result = RecipeScanner().scan(saved_paths, pantry_names=pantry_names, progress_cb=cb)
|
||||||
|
recipe_dict = _result_to_response(result).model_dump()
|
||||||
|
loop.call_soon_threadsafe(queue.put_nowait, {"status": "done", "recipe": recipe_dict})
|
||||||
|
except ValueError as exc:
|
||||||
|
loop.call_soon_threadsafe(queue.put_nowait, {"status": "error", "message": str(exc)})
|
||||||
|
except RuntimeError as exc:
|
||||||
|
loop.call_soon_threadsafe(queue.put_nowait, {"status": "error", "message": str(exc)})
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("Unexpected error in recipe scan thread")
|
||||||
|
loop.call_soon_threadsafe(queue.put_nowait, {"status": "error", "message": "Scan failed unexpectedly."})
|
||||||
|
|
||||||
|
scan_task = asyncio.ensure_future(asyncio.to_thread(_run))
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
event = await asyncio.wait_for(queue.get(), timeout=180.0)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
yield f"data: {_json.dumps({'status': 'error', 'message': 'Scan timed out after 3 minutes.'})}\n\n"
|
||||||
|
break
|
||||||
|
yield f"data: {_json.dumps(event)}\n\n"
|
||||||
|
if event["status"] in ("done", "error"):
|
||||||
|
break
|
||||||
|
finally:
|
||||||
|
if not scan_task.done():
|
||||||
|
scan_task.cancel()
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/scan/stream")
|
||||||
|
async def scan_recipe_stream(
|
||||||
|
files: Annotated[list[UploadFile], File(...)],
|
||||||
|
store: Store = Depends(get_store),
|
||||||
|
session: CloudUser = Depends(get_session),
|
||||||
|
):
|
||||||
|
"""Scan recipe photos and stream SSE progress events during model load.
|
||||||
|
|
||||||
|
Use this endpoint instead of POST /scan when you need live feedback during
|
||||||
|
cold-start model loading (first request after a GPU-idle period can take
|
||||||
|
30-60 seconds for cf-docuvision to warm up).
|
||||||
|
|
||||||
|
Tier: Paid (or BYOK) — same gate as POST /scan.
|
||||||
|
"""
|
||||||
|
if not can_use("recipe_scan", session.tier, session.has_byok):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=403,
|
||||||
|
detail=(
|
||||||
|
"Recipe scanning requires Paid tier or a configured vision backend (BYOK). "
|
||||||
|
"Set ANTHROPIC_API_KEY or connect to a cf-orch vision service."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
raise HTTPException(status_code=422, detail="At least one image file is required.")
|
||||||
|
if len(files) > 4:
|
||||||
|
raise HTTPException(status_code=422, detail="Maximum 4 images per scan request.")
|
||||||
|
|
||||||
|
for f in files:
|
||||||
|
ct = (f.content_type or "").lower()
|
||||||
|
if ct and ct not in _ALLOWED_MIME_TYPES:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=422,
|
||||||
|
detail=f"Unsupported file type: {ct}. Supported: JPEG, PNG, WebP, HEIC.",
|
||||||
|
)
|
||||||
|
|
||||||
|
saved_paths: list[Path] = []
|
||||||
|
for f in files:
|
||||||
|
saved_paths.append(await _save_upload_temp(f))
|
||||||
|
|
||||||
|
inventory = await asyncio.to_thread(store.list_inventory)
|
||||||
|
pantry_names = [item["product_name"] for item in inventory if item.get("product_name")]
|
||||||
|
|
||||||
|
async def generate():
|
||||||
|
try:
|
||||||
|
async for chunk in _scan_recipe_sse(saved_paths, pantry_names):
|
||||||
|
yield chunk
|
||||||
|
finally:
|
||||||
|
for p in saved_paths:
|
||||||
|
try:
|
||||||
|
p.unlink(missing_ok=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return StreamingResponse(generate(), media_type="text/event-stream")
|
||||||
|
|
||||||
|
|
||||||
# ── Save endpoint ──────────────────────────────────────────────────────────────
|
# ── Save endpoint ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@router.post("/scan/save", response_model=UserRecipeResponse, status_code=201)
|
@router.post("/scan/save", response_model=UserRecipeResponse, status_code=201)
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,9 @@ import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
|
|
||||||
|
import json as _json_mod
|
||||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||||
|
from fastapi.responses import StreamingResponse
|
||||||
|
|
||||||
from app.cloud_session import CloudUser, _auth_label, get_session
|
from app.cloud_session import CloudUser, _auth_label, get_session
|
||||||
|
|
||||||
|
|
@ -14,6 +16,9 @@ log = logging.getLogger(__name__)
|
||||||
from app.db.session import get_store
|
from app.db.session import get_store
|
||||||
from app.db.store import Store
|
from app.db.store import Store
|
||||||
from app.models.schemas.recipe import (
|
from app.models.schemas.recipe import (
|
||||||
|
AskRequest,
|
||||||
|
AskResponse,
|
||||||
|
AskRecipeHit,
|
||||||
AssemblyTemplateOut,
|
AssemblyTemplateOut,
|
||||||
BuildRequest,
|
BuildRequest,
|
||||||
LeftoversResponse,
|
LeftoversResponse,
|
||||||
|
|
@ -103,6 +108,39 @@ def _build_stream_prompt(db_path: Path, level: int) -> str:
|
||||||
store.close()
|
store.close()
|
||||||
|
|
||||||
|
|
||||||
|
async def _stream_recipe_sse(db_path: Path, req: RecipeRequest):
|
||||||
|
"""Async generator that yields SSE events for a streaming recipe request.
|
||||||
|
|
||||||
|
Phase 1 (thread): classify pantry items using a temporary Store.
|
||||||
|
Phase 2 (async): stream tokens from LLM via LLMRecipeGenerator.stream_generate().
|
||||||
|
"""
|
||||||
|
def _prep(db_path: Path) -> tuple[list, list[str]]:
|
||||||
|
from app.services.recipe.element_classifier import IngredientClassifier
|
||||||
|
store = Store(db_path)
|
||||||
|
try:
|
||||||
|
classifier = IngredientClassifier(store)
|
||||||
|
profiles = classifier.classify_batch(req.pantry_items)
|
||||||
|
gaps = classifier.identify_gaps(profiles)
|
||||||
|
return profiles, gaps
|
||||||
|
finally:
|
||||||
|
store.close()
|
||||||
|
|
||||||
|
try:
|
||||||
|
profiles, gaps = await asyncio.to_thread(_prep, db_path)
|
||||||
|
except Exception as exc:
|
||||||
|
yield f"data: {_json_mod.dumps({'error': str(exc)})}\n\n"
|
||||||
|
return
|
||||||
|
|
||||||
|
from app.services.recipe.llm_recipe import LLMRecipeGenerator
|
||||||
|
gen = LLMRecipeGenerator(None)
|
||||||
|
try:
|
||||||
|
async for token in gen.stream_generate(req, profiles, gaps):
|
||||||
|
yield f"data: {_json_mod.dumps({'chunk': token})}\n\n"
|
||||||
|
yield f"data: {_json_mod.dumps({'done': True})}\n\n"
|
||||||
|
except Exception as exc:
|
||||||
|
yield f"data: {_json_mod.dumps({'error': str(exc)})}\n\n"
|
||||||
|
|
||||||
|
|
||||||
async def _enqueue_recipe_job(session: CloudUser, req: RecipeRequest):
|
async def _enqueue_recipe_job(session: CloudUser, req: RecipeRequest):
|
||||||
"""Queue an async recipe_llm job and return 202 with job_id.
|
"""Queue an async recipe_llm job and return 202 with job_id.
|
||||||
|
|
||||||
|
|
@ -144,6 +182,7 @@ async def _enqueue_recipe_job(session: CloudUser, req: RecipeRequest):
|
||||||
async def suggest_recipes(
|
async def suggest_recipes(
|
||||||
req: RecipeRequest,
|
req: RecipeRequest,
|
||||||
async_mode: bool = Query(default=False, alias="async"),
|
async_mode: bool = Query(default=False, alias="async"),
|
||||||
|
stream: bool = Query(default=False),
|
||||||
session: CloudUser = Depends(get_session),
|
session: CloudUser = Depends(get_session),
|
||||||
store: Store = Depends(get_store),
|
store: Store = Depends(get_store),
|
||||||
):
|
):
|
||||||
|
|
@ -179,6 +218,13 @@ async def suggest_recipes(
|
||||||
req = req.model_copy(update={"level": 2})
|
req = req.model_copy(update={"level": 2})
|
||||||
orch_fallback = True
|
orch_fallback = True
|
||||||
|
|
||||||
|
if stream and req.level in (3, 4):
|
||||||
|
return StreamingResponse(
|
||||||
|
_stream_recipe_sse(session.db, req),
|
||||||
|
media_type="text/event-stream",
|
||||||
|
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
|
||||||
|
)
|
||||||
|
|
||||||
if req.level in (3, 4) and async_mode:
|
if req.level in (3, 4) and async_mode:
|
||||||
return await _enqueue_recipe_job(session, req)
|
return await _enqueue_recipe_job(session, req)
|
||||||
|
|
||||||
|
|
@ -554,6 +600,137 @@ async def build_recipe(
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
_ASK_STOPWORDS: frozenset[str] = frozenset({
|
||||||
|
"what", "can", "make", "with", "have", "some", "the", "and", "for",
|
||||||
|
"that", "this", "these", "those", "how", "about", "are", "there",
|
||||||
|
"give", "show", "find", "want", "need", "like", "any", "good",
|
||||||
|
"quick", "easy", "simple", "fast", "using", "use", "from", "into",
|
||||||
|
"more", "much", "just", "only", "my", "please", "could", "would",
|
||||||
|
"should", "something", "anything", "everything", "ideas", "idea",
|
||||||
|
"suggest", "meal", "food", "dish", "dishes", "today", "tonight",
|
||||||
|
"tomorrow", "now", "here", "there", "recipes", "recipe", "dinner",
|
||||||
|
"lunch", "breakfast", "snack", "under", "minutes", "hours", "time",
|
||||||
|
"left", "over", "also", "some", "make", "cook", "made", "cooked",
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
import re as _re
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_ask_keywords(question: str) -> list[str]:
|
||||||
|
"""Extract food-relevant keywords from a natural language question."""
|
||||||
|
tokens = _re.findall(r"[a-zA-Z]+", question.lower())
|
||||||
|
return [t for t in tokens if len(t) > 3 and t not in _ASK_STOPWORDS]
|
||||||
|
|
||||||
|
|
||||||
|
def _ask_in_thread(db_path: Path, question: str, pantry_items: list[str]) -> AskResponse:
|
||||||
|
"""Run Ask logic in a worker thread.
|
||||||
|
|
||||||
|
Free tier: keyword extraction + FTS ingredient search.
|
||||||
|
Paid tier path: same search, then LLM synthesis over results.
|
||||||
|
The caller handles tier gating and LLM synthesis outside this thread
|
||||||
|
to avoid importing LLMRouter in a sync context.
|
||||||
|
"""
|
||||||
|
import json as _json
|
||||||
|
store = Store(db_path)
|
||||||
|
try:
|
||||||
|
keywords = _extract_ask_keywords(question)
|
||||||
|
ingredient_hits: list[dict] = []
|
||||||
|
if keywords:
|
||||||
|
ingredient_hits = store.search_recipes_by_ingredients(keywords, limit=15)
|
||||||
|
|
||||||
|
# Also search by title using the full question text as a substring hint.
|
||||||
|
# browse_recipes q= does title LIKE %q%. Extract the longest keyword
|
||||||
|
# from the question as the title probe (most likely to appear in a title).
|
||||||
|
title_hits: list[dict] = []
|
||||||
|
title_probe = max(keywords, key=len) if keywords else None
|
||||||
|
if title_probe:
|
||||||
|
browse_result = store.browse_recipes(
|
||||||
|
keywords=None,
|
||||||
|
page=1,
|
||||||
|
page_size=12,
|
||||||
|
pantry_items=pantry_items or None,
|
||||||
|
q=title_probe,
|
||||||
|
sort="match" if pantry_items else "default",
|
||||||
|
)
|
||||||
|
title_hits = browse_result.get("recipes", [])
|
||||||
|
|
||||||
|
# Merge by ID; ingredient hits come first (more semantically relevant).
|
||||||
|
seen: set[int] = set()
|
||||||
|
merged: list[dict] = []
|
||||||
|
for row in ingredient_hits + title_hits:
|
||||||
|
rid = row.get("id")
|
||||||
|
if rid is not None and rid not in seen:
|
||||||
|
seen.add(rid)
|
||||||
|
merged.append(row)
|
||||||
|
|
||||||
|
# Compute pantry match_pct if caller sent pantry items.
|
||||||
|
pantry_set = {p.lower() for p in pantry_items} if pantry_items else set()
|
||||||
|
|
||||||
|
hits: list[AskRecipeHit] = []
|
||||||
|
for row in merged[:12]:
|
||||||
|
match_pct: float | None = None
|
||||||
|
if pantry_set:
|
||||||
|
raw_names = row.get("ingredient_names") or []
|
||||||
|
if isinstance(raw_names, str):
|
||||||
|
try:
|
||||||
|
raw_names = _json.loads(raw_names)
|
||||||
|
except Exception:
|
||||||
|
raw_names = []
|
||||||
|
if raw_names:
|
||||||
|
covered = sum(
|
||||||
|
1 for n in raw_names
|
||||||
|
if any(p in n.lower() for p in pantry_set)
|
||||||
|
)
|
||||||
|
match_pct = round(covered / len(raw_names), 2)
|
||||||
|
hits.append(AskRecipeHit(
|
||||||
|
id=row["id"],
|
||||||
|
title=row.get("title", ""),
|
||||||
|
category=row.get("category"),
|
||||||
|
match_pct=match_pct,
|
||||||
|
))
|
||||||
|
|
||||||
|
return AskResponse(answer=None, recipes=hits, tier="free")
|
||||||
|
finally:
|
||||||
|
store.close()
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/ask", response_model=AskResponse)
|
||||||
|
async def ask_recipes(
|
||||||
|
req: AskRequest,
|
||||||
|
session: CloudUser = Depends(get_session),
|
||||||
|
) -> AskResponse:
|
||||||
|
"""Natural-language recipe search with optional LLM synthesis.
|
||||||
|
|
||||||
|
Free tier: keyword extraction from question → FTS ingredient + title search.
|
||||||
|
Paid tier / BYOK: same search, then LLM synthesizes a short conversational answer.
|
||||||
|
"""
|
||||||
|
result = await asyncio.to_thread(_ask_in_thread, session.db, req.question, req.pantry_items)
|
||||||
|
|
||||||
|
# LLM synthesis: only for paid/premium/ultra tiers, not "local" dev tier.
|
||||||
|
# Wrapped in wait_for so an unresponsive model degrades gracefully to recipe list only.
|
||||||
|
paid_tier = session.tier in ("paid", "premium", "ultra")
|
||||||
|
if (paid_tier or session.has_byok) and result.recipes:
|
||||||
|
recipe_titles = ", ".join(r.title for r in result.recipes[:6])
|
||||||
|
prompt = (
|
||||||
|
f'You are a helpful kitchen assistant. The user asked: "{req.question}"\n\n'
|
||||||
|
f"Matching recipes: {recipe_titles}\n\n"
|
||||||
|
f"Write a brief, friendly 1–2 sentence response suggesting which of these "
|
||||||
|
f"recipes might best fit the question. Be specific and natural."
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
from circuitforge_core.llm.router import LLMRouter
|
||||||
|
answer = await asyncio.wait_for(
|
||||||
|
asyncio.to_thread(LLMRouter().complete, prompt),
|
||||||
|
timeout=8.0,
|
||||||
|
)
|
||||||
|
result = result.model_copy(update={"answer": answer.strip() or None, "tier": "paid"})
|
||||||
|
except (Exception, asyncio.TimeoutError) as exc:
|
||||||
|
log.warning("Ask LLM synthesis skipped: %s", exc)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{recipe_id}")
|
@router.get("/{recipe_id}")
|
||||||
async def get_recipe(recipe_id: int, session: CloudUser = Depends(get_session)) -> dict:
|
async def get_recipe(recipe_id: int, session: CloudUser = Depends(get_session)) -> dict:
|
||||||
def _get(db_path: Path, rid: int) -> dict | None:
|
def _get(db_path: Path, rid: int) -> dict | None:
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ from fastapi import APIRouter
|
||||||
from app.api.endpoints import health, receipts, export, inventory, ocr, recipes, settings, staples, feedback, feedback_attach, household, saved_recipes, imitate, meal_plans, orch_usage, session, shopping
|
from app.api.endpoints import health, receipts, export, inventory, ocr, recipes, settings, staples, feedback, feedback_attach, household, saved_recipes, imitate, meal_plans, orch_usage, session, shopping
|
||||||
from app.api.endpoints.community import router as community_router
|
from app.api.endpoints.community import router as community_router
|
||||||
from app.api.endpoints.corrections import router as corrections_router
|
from app.api.endpoints.corrections import router as corrections_router
|
||||||
|
from app.api.endpoints.mastodon_oauth import router as mastodon_router
|
||||||
from app.api.endpoints.recipe_scan import router as recipe_scan_router
|
from app.api.endpoints.recipe_scan import router as recipe_scan_router
|
||||||
from app.api.endpoints.recipe_tags import router as recipe_tags_router
|
from app.api.endpoints.recipe_tags import router as recipe_tags_router
|
||||||
|
|
||||||
|
|
@ -30,3 +31,4 @@ api_router.include_router(shopping.router, prefix="/shopping", tags=
|
||||||
api_router.include_router(community_router)
|
api_router.include_router(community_router)
|
||||||
api_router.include_router(recipe_tags_router)
|
api_router.include_router(recipe_tags_router)
|
||||||
api_router.include_router(corrections_router, prefix="/corrections", tags=["corrections"])
|
api_router.include_router(corrections_router, prefix="/corrections", tags=["corrections"])
|
||||||
|
api_router.include_router(mastodon_router)
|
||||||
|
|
|
||||||
|
|
@ -65,9 +65,24 @@ class Settings:
|
||||||
# Quality
|
# Quality
|
||||||
MIN_QUALITY_SCORE: float = float(os.environ.get("MIN_QUALITY_SCORE", "50.0"))
|
MIN_QUALITY_SCORE: float = float(os.environ.get("MIN_QUALITY_SCORE", "50.0"))
|
||||||
|
|
||||||
# CF-core resource coordinator (VRAM lease management)
|
# CF-core resource coordinator (VRAM lease management — lease broker, not inference)
|
||||||
COORDINATOR_URL: str = os.environ.get("COORDINATOR_URL", "http://localhost:7700")
|
COORDINATOR_URL: str = os.environ.get("COORDINATOR_URL", "http://localhost:7700")
|
||||||
|
|
||||||
|
# GPU inference server URL
|
||||||
|
# Priority: GPU_SERVER_URL env var → CF_ORCH_URL env var (backward compat)
|
||||||
|
# → https://orch.circuitforge.tech when CF_LICENSE_KEY is present (Paid+)
|
||||||
|
# Resolved value is written back to os.environ["CF_ORCH_URL"] at startup so
|
||||||
|
# all service-layer callers that read CF_ORCH_URL directly see the right URL.
|
||||||
|
GPU_SERVER_URL: str | None = (
|
||||||
|
os.environ.get("GPU_SERVER_URL")
|
||||||
|
or os.environ.get("CF_ORCH_URL")
|
||||||
|
or (
|
||||||
|
"https://orch.circuitforge.tech"
|
||||||
|
if os.environ.get("CF_LICENSE_KEY")
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Hosted cf-orch coordinator — bearer token for managed cloud GPU inference (Paid+)
|
# Hosted cf-orch coordinator — bearer token for managed cloud GPU inference (Paid+)
|
||||||
# CFOrchClient reads CF_LICENSE_KEY automatically; exposed here for startup validation.
|
# CFOrchClient reads CF_LICENSE_KEY automatically; exposed here for startup validation.
|
||||||
CF_LICENSE_KEY: str | None = os.environ.get("CF_LICENSE_KEY")
|
CF_LICENSE_KEY: str | None = os.environ.get("CF_LICENSE_KEY")
|
||||||
|
|
@ -76,6 +91,17 @@ class Settings:
|
||||||
# runs don't pollute session counts. Set to the Directus UUID of the test user.
|
# runs don't pollute session counts. Set to the Directus UUID of the test user.
|
||||||
E2E_TEST_USER_ID: str | None = os.environ.get("E2E_TEST_USER_ID") or None
|
E2E_TEST_USER_ID: str | None = os.environ.get("E2E_TEST_USER_ID") or None
|
||||||
|
|
||||||
|
# ActivityPub federation (optional; disabled by default)
|
||||||
|
AP_ENABLED: bool = os.environ.get("AP_ENABLED", "false").lower() in ("1", "true", "yes")
|
||||||
|
AP_HOST: str = os.environ.get("AP_HOST", "") # e.g. kiwi.circuitforge.tech
|
||||||
|
CLOUD_DATA_ROOT: Path = Path(os.environ.get("CLOUD_DATA_ROOT", "/devl/kiwi-cloud-data"))
|
||||||
|
AP_KEY_PATH: Path = Path(
|
||||||
|
os.environ.get("AP_KEY_PATH", str(CLOUD_DATA_ROOT / "ap_keys" / "instance.pem"))
|
||||||
|
)
|
||||||
|
# Fernet key for Mastodon access token encryption (base64-urlsafe, 32 bytes)
|
||||||
|
# Leave unset to skip encryption (dev only)
|
||||||
|
AP_TOKEN_ENCRYPTION_KEY: str | None = os.environ.get("AP_TOKEN_ENCRYPTION_KEY") or None
|
||||||
|
|
||||||
# Feature flags
|
# Feature flags
|
||||||
ENABLE_OCR: bool = os.environ.get("ENABLE_OCR", "false").lower() in ("1", "true", "yes")
|
ENABLE_OCR: bool = os.environ.get("ENABLE_OCR", "false").lower() in ("1", "true", "yes")
|
||||||
# Use OrchestratedScheduler (coordinator-aware, multi-GPU fan-out) instead of
|
# Use OrchestratedScheduler (coordinator-aware, multi-GPU fan-out) instead of
|
||||||
|
|
@ -97,3 +123,9 @@ class Settings:
|
||||||
|
|
||||||
|
|
||||||
settings = Settings()
|
settings = Settings()
|
||||||
|
|
||||||
|
# Normalise GPU_SERVER_URL into CF_ORCH_URL so every service-layer caller that
|
||||||
|
# reads os.environ.get("CF_ORCH_URL") sees the resolved value, including the
|
||||||
|
# Paid+ cloud default injected above.
|
||||||
|
if settings.GPU_SERVER_URL:
|
||||||
|
os.environ["CF_ORCH_URL"] = settings.GPU_SERVER_URL
|
||||||
|
|
|
||||||
47
app/db/migrations/042_activitypub.sql
Normal file
47
app/db/migrations/042_activitypub.sql
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
-- 042_activitypub.sql
|
||||||
|
-- ActivityPub federation tables: follower registry, delivery log, dedup, Mastodon tokens.
|
||||||
|
|
||||||
|
-- Follower registry: AP actors that Follow this Kiwi instance
|
||||||
|
CREATE TABLE IF NOT EXISTS ap_followers (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
actor_id TEXT NOT NULL UNIQUE, -- AP actor URL
|
||||||
|
inbox_url TEXT NOT NULL,
|
||||||
|
shared_inbox TEXT,
|
||||||
|
followed_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||||
|
active INTEGER NOT NULL DEFAULT 1
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_ap_followers_active
|
||||||
|
ON ap_followers (active) WHERE active = 1;
|
||||||
|
|
||||||
|
-- Outgoing delivery log: one row per (post_slug, target_inbox) attempt
|
||||||
|
CREATE TABLE IF NOT EXISTS ap_deliveries (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
post_slug TEXT NOT NULL,
|
||||||
|
target_inbox TEXT NOT NULL,
|
||||||
|
status TEXT NOT NULL DEFAULT 'pending', -- pending | delivered | failed
|
||||||
|
attempts INTEGER NOT NULL DEFAULT 0,
|
||||||
|
last_error TEXT,
|
||||||
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||||
|
delivered_at TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_ap_deliveries_status
|
||||||
|
ON ap_deliveries (status) WHERE status != 'delivered';
|
||||||
|
|
||||||
|
-- Incoming activity dedup: prevents replay attacks and double-processing
|
||||||
|
CREATE TABLE IF NOT EXISTS ap_received (
|
||||||
|
activity_id TEXT PRIMARY KEY,
|
||||||
|
received_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Mastodon OAuth tokens: per-user, encrypted at rest
|
||||||
|
-- Stored in the user's local kiwi.db (CLOUD_MODE: per-user DB tree)
|
||||||
|
CREATE TABLE IF NOT EXISTS mastodon_tokens (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
directus_user_id TEXT NOT NULL UNIQUE,
|
||||||
|
instance_url TEXT NOT NULL,
|
||||||
|
access_token TEXT NOT NULL, -- Fernet-encrypted when AP_TOKEN_ENCRYPTION_KEY set
|
||||||
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||||
|
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
18
app/main.py
18
app/main.py
|
|
@ -43,6 +43,11 @@ async def _browse_counts_refresh_loop(corpus_path: str) -> None:
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
logger.info("Starting Kiwi API...")
|
logger.info("Starting Kiwi API...")
|
||||||
settings.ensure_dirs()
|
settings.ensure_dirs()
|
||||||
|
|
||||||
|
# Run DB migrations at startup (ensures all tables exist before any request)
|
||||||
|
from app.db.store import Store
|
||||||
|
_s = Store(settings.DB_PATH)
|
||||||
|
_s.close()
|
||||||
register_kiwi_programs()
|
register_kiwi_programs()
|
||||||
|
|
||||||
# Start LLM background task scheduler
|
# Start LLM background task scheduler
|
||||||
|
|
@ -54,6 +59,14 @@ async def lifespan(app: FastAPI):
|
||||||
from app.api.endpoints.community import init_community_store
|
from app.api.endpoints.community import init_community_store
|
||||||
init_community_store(settings.COMMUNITY_DB_URL)
|
init_community_store(settings.COMMUNITY_DB_URL)
|
||||||
|
|
||||||
|
# Initialize ActivityPub instance actor (no-op when AP_ENABLED=false)
|
||||||
|
if settings.AP_ENABLED and settings.AP_HOST:
|
||||||
|
try:
|
||||||
|
from app.services.ap.keys import init_actor
|
||||||
|
init_actor(host=settings.AP_HOST, key_path=settings.AP_KEY_PATH)
|
||||||
|
except Exception as _ap_exc:
|
||||||
|
logger.warning("AP init failed (AP features disabled): %s", _ap_exc)
|
||||||
|
|
||||||
# Browse counts cache — warm in-memory cache from disk, refresh if stale.
|
# Browse counts cache — warm in-memory cache from disk, refresh if stale.
|
||||||
# Uses the corpus path the store will attach to at request time.
|
# Uses the corpus path the store will attach to at request time.
|
||||||
corpus_path = os.environ.get("RECIPE_DB_PATH", str(settings.DB_PATH))
|
corpus_path = os.environ.get("RECIPE_DB_PATH", str(settings.DB_PATH))
|
||||||
|
|
@ -101,6 +114,11 @@ app.add_middleware(
|
||||||
|
|
||||||
app.include_router(api_router, prefix=settings.API_PREFIX)
|
app.include_router(api_router, prefix=settings.API_PREFIX)
|
||||||
|
|
||||||
|
# AP endpoints: WebFinger at root (not under /api/v1), AP objects under /ap
|
||||||
|
from app.api.endpoints.activitypub import ap_router, webfinger_router
|
||||||
|
app.include_router(webfinger_router)
|
||||||
|
app.include_router(ap_router)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
async def root():
|
async def root():
|
||||||
|
|
|
||||||
0
app/mcp/__init__.py
Normal file
0
app/mcp/__init__.py
Normal file
306
app/mcp/server.py
Normal file
306
app/mcp/server.py
Normal file
|
|
@ -0,0 +1,306 @@
|
||||||
|
"""Kiwi MCP Server — read-only corpus DB access for tag/keyword audits.
|
||||||
|
|
||||||
|
Exposes four tools to Claude:
|
||||||
|
kiwi_query_corpus — run a read-only SQL query against the corpus DB
|
||||||
|
kiwi_count_fts — run an FTS5 MATCH expression and return row count
|
||||||
|
kiwi_sample_tags — return tag frequency distribution by prefix
|
||||||
|
kiwi_browse_preview — call the browse endpoint and return first-page results
|
||||||
|
|
||||||
|
Run with:
|
||||||
|
python -m app.mcp.server
|
||||||
|
(from /Library/Development/CircuitForge/kiwi with cf conda env active)
|
||||||
|
|
||||||
|
Configure in Claude Code ~/.claude/settings.json mcpServers:
|
||||||
|
"kiwi": {
|
||||||
|
"command": "/devl/miniconda3/envs/cf/bin/python",
|
||||||
|
"args": ["-m", "app.mcp.server"],
|
||||||
|
"cwd": "/Library/Development/CircuitForge/kiwi",
|
||||||
|
"env": {
|
||||||
|
"KIWI_DB_PATH": "/Library/Development/CircuitForge/kiwi/data/kiwi.db",
|
||||||
|
"KIWI_API_URL": "http://localhost:8512"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from mcp.server import Server
|
||||||
|
from mcp.server.stdio import stdio_server
|
||||||
|
from mcp.types import TextContent, Tool
|
||||||
|
|
||||||
|
_DB_PATH = os.environ.get(
|
||||||
|
"KIWI_DB_PATH",
|
||||||
|
str(Path(__file__).parents[3] / "data" / "kiwi.db"),
|
||||||
|
)
|
||||||
|
_API_URL = os.environ.get("KIWI_API_URL", "http://localhost:8512")
|
||||||
|
_TIMEOUT = 30.0
|
||||||
|
_QUERY_ROW_LIMIT = 200
|
||||||
|
|
||||||
|
server = Server("kiwi")
|
||||||
|
|
||||||
|
|
||||||
|
def _open_ro() -> sqlite3.Connection:
|
||||||
|
"""Open the corpus DB in read-only mode."""
|
||||||
|
uri = f"file:///{Path(_DB_PATH).as_posix()}?mode=ro"
|
||||||
|
conn = sqlite3.connect(uri, uri=True, check_same_thread=False)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
@server.list_tools()
|
||||||
|
async def list_tools() -> list[Tool]:
|
||||||
|
return [
|
||||||
|
Tool(
|
||||||
|
name="kiwi_query_corpus",
|
||||||
|
description=(
|
||||||
|
"Run a read-only SQL SELECT query against the Kiwi corpus DB (kiwi.db). "
|
||||||
|
"Returns up to 200 rows as a JSON array. "
|
||||||
|
"Key tables: recipes (id, title, ingredient_names, inferred_tags, source_url), "
|
||||||
|
"recipes_fts (FTS5 virtual table for full-text search), "
|
||||||
|
"ingredient_profiles (name, elements, texture_profile). "
|
||||||
|
"Use for schema exploration, spot-checking tag coverage, and counting results. "
|
||||||
|
"Read-only — any write statement will be rejected by SQLite."
|
||||||
|
),
|
||||||
|
inputSchema={
|
||||||
|
"type": "object",
|
||||||
|
"required": ["sql"],
|
||||||
|
"properties": {
|
||||||
|
"sql": {
|
||||||
|
"type": "string",
|
||||||
|
"description": (
|
||||||
|
"A SELECT statement. E.g.: "
|
||||||
|
"SELECT title, inferred_tags FROM recipes WHERE inferred_tags LIKE '%vegan%' LIMIT 10"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Tool(
|
||||||
|
name="kiwi_count_fts",
|
||||||
|
description=(
|
||||||
|
"Run an FTS5 MATCH expression against the recipes_fts table and return the hit count. "
|
||||||
|
"Useful for quickly auditing keyword coverage without a full query. "
|
||||||
|
"Always double-quote all terms in MATCH expressions. "
|
||||||
|
"E.g. match_expr='\"tofu\" OR \"tempeh\"' returns how many recipes include either."
|
||||||
|
),
|
||||||
|
inputSchema={
|
||||||
|
"type": "object",
|
||||||
|
"required": ["match_expr"],
|
||||||
|
"properties": {
|
||||||
|
"match_expr": {
|
||||||
|
"type": "string",
|
||||||
|
"description": (
|
||||||
|
"FTS5 MATCH expression string (without the MATCH keyword). "
|
||||||
|
'E.g. \'"lentil" OR "chickpea"\' or \'"pasta" AND "vegetarian"\''
|
||||||
|
),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Tool(
|
||||||
|
name="kiwi_sample_tags",
|
||||||
|
description=(
|
||||||
|
"Return tag frequency distribution from the corpus. "
|
||||||
|
"Queries inferred_tags column for tags matching the given prefix pattern. "
|
||||||
|
"Useful for auditing how well a category keyword set covers the corpus, "
|
||||||
|
"or discovering what tags exist under a domain (cuisine:, meal:, dietary:, texture:)."
|
||||||
|
),
|
||||||
|
inputSchema={
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"prefix": {
|
||||||
|
"type": "string",
|
||||||
|
"default": "",
|
||||||
|
"description": (
|
||||||
|
"Tag prefix to filter by. E.g. 'cuisine:' returns all cuisine tags, "
|
||||||
|
"'meal:' returns all meal type tags, '' returns all tags. "
|
||||||
|
"Returns top 50 by frequency."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"limit": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 50,
|
||||||
|
"description": "Max number of tag entries to return (default 50, max 200).",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Tool(
|
||||||
|
name="kiwi_browse_preview",
|
||||||
|
description=(
|
||||||
|
"Call the Kiwi browse endpoint and return first-page results. "
|
||||||
|
"Use to verify that a domain/category returns the expected recipes "
|
||||||
|
"after a keyword or tag change, without opening the browser. "
|
||||||
|
"Returns recipe titles, match counts, and total result count."
|
||||||
|
),
|
||||||
|
inputSchema={
|
||||||
|
"type": "object",
|
||||||
|
"required": ["domain", "category"],
|
||||||
|
"properties": {
|
||||||
|
"domain": {
|
||||||
|
"type": "string",
|
||||||
|
"description": (
|
||||||
|
"Browse domain slug. "
|
||||||
|
"Known domains: cuisine, meal_type, dietary, ingredient, occasion, texture."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"category": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Category slug within the domain, e.g. 'italian', 'breakfast', 'vegan'.",
|
||||||
|
},
|
||||||
|
"subcategory": {
|
||||||
|
"type": "string",
|
||||||
|
"default": "",
|
||||||
|
"description": "Optional subcategory slug to narrow further.",
|
||||||
|
},
|
||||||
|
"page_size": {
|
||||||
|
"type": "integer",
|
||||||
|
"default": 10,
|
||||||
|
"description": "Results per page (default 10, max 50).",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@server.call_tool()
|
||||||
|
async def call_tool(name: str, arguments: dict) -> list[TextContent]:
|
||||||
|
if name == "kiwi_query_corpus":
|
||||||
|
return await _query_corpus(arguments)
|
||||||
|
if name == "kiwi_count_fts":
|
||||||
|
return await _count_fts(arguments)
|
||||||
|
if name == "kiwi_sample_tags":
|
||||||
|
return await _sample_tags(arguments)
|
||||||
|
if name == "kiwi_browse_preview":
|
||||||
|
return await _browse_preview(arguments)
|
||||||
|
return [TextContent(type="text", text=f"Unknown tool: {name}")]
|
||||||
|
|
||||||
|
|
||||||
|
async def _query_corpus(args: dict) -> list[TextContent]:
|
||||||
|
sql = args.get("sql", "").strip()
|
||||||
|
if not sql.upper().startswith("SELECT"):
|
||||||
|
return [TextContent(type="text", text="Error: only SELECT statements are allowed.")]
|
||||||
|
|
||||||
|
def _run() -> list[dict]:
|
||||||
|
conn = _open_ro()
|
||||||
|
try:
|
||||||
|
cur = conn.execute(sql)
|
||||||
|
rows = cur.fetchmany(_QUERY_ROW_LIMIT)
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
try:
|
||||||
|
rows = await asyncio.get_event_loop().run_in_executor(None, _run)
|
||||||
|
return [TextContent(type="text", text=json.dumps(rows, indent=2, default=str))]
|
||||||
|
except Exception as exc:
|
||||||
|
return [TextContent(type="text", text=f"Query error: {exc}")]
|
||||||
|
|
||||||
|
|
||||||
|
async def _count_fts(args: dict) -> list[TextContent]:
|
||||||
|
match_expr = args.get("match_expr", "").strip()
|
||||||
|
if not match_expr:
|
||||||
|
return [TextContent(type="text", text="Error: match_expr is required.")]
|
||||||
|
|
||||||
|
def _run() -> int:
|
||||||
|
conn = _open_ro()
|
||||||
|
try:
|
||||||
|
cur = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM recipes_fts WHERE recipes_fts MATCH ?",
|
||||||
|
(match_expr,),
|
||||||
|
)
|
||||||
|
return cur.fetchone()[0]
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
try:
|
||||||
|
count = await asyncio.get_event_loop().run_in_executor(None, _run)
|
||||||
|
return [TextContent(type="text", text=json.dumps({"match_expr": match_expr, "count": count}))]
|
||||||
|
except Exception as exc:
|
||||||
|
return [TextContent(type="text", text=f"FTS error: {exc}")]
|
||||||
|
|
||||||
|
|
||||||
|
async def _sample_tags(args: dict) -> list[TextContent]:
|
||||||
|
prefix = args.get("prefix", "")
|
||||||
|
limit = min(int(args.get("limit", 50)), _QUERY_ROW_LIMIT)
|
||||||
|
|
||||||
|
def _run() -> list[dict]:
|
||||||
|
conn = _open_ro()
|
||||||
|
try:
|
||||||
|
# Split inferred_tags (comma or space separated) and count each tag
|
||||||
|
sql = """
|
||||||
|
WITH tag_rows AS (
|
||||||
|
SELECT trim(value) AS tag
|
||||||
|
FROM recipes, json_each('["' || replace(replace(inferred_tags, ', ', '","'), ',', '","') || '"]')
|
||||||
|
WHERE inferred_tags IS NOT NULL AND inferred_tags != ''
|
||||||
|
)
|
||||||
|
SELECT tag, COUNT(*) AS frequency
|
||||||
|
FROM tag_rows
|
||||||
|
WHERE tag LIKE ? AND tag != ''
|
||||||
|
GROUP BY tag
|
||||||
|
ORDER BY frequency DESC
|
||||||
|
LIMIT ?
|
||||||
|
"""
|
||||||
|
pattern = f"{prefix}%" if prefix else "%"
|
||||||
|
cur = conn.execute(sql, (pattern, limit))
|
||||||
|
return [{"tag": r["tag"], "frequency": r["frequency"]} for r in cur.fetchall()]
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
try:
|
||||||
|
tags = await asyncio.get_event_loop().run_in_executor(None, _run)
|
||||||
|
return [TextContent(type="text", text=json.dumps({"prefix": prefix, "tags": tags}, indent=2))]
|
||||||
|
except Exception as exc:
|
||||||
|
return [TextContent(type="text", text=f"Tag query error: {exc}")]
|
||||||
|
|
||||||
|
|
||||||
|
async def _browse_preview(args: dict) -> list[TextContent]:
|
||||||
|
domain = args.get("domain", "")
|
||||||
|
category = args.get("category", "")
|
||||||
|
subcategory = args.get("subcategory", "")
|
||||||
|
page_size = min(int(args.get("page_size", 10)), 50)
|
||||||
|
|
||||||
|
params: dict = {"page": 1, "page_size": page_size}
|
||||||
|
if subcategory:
|
||||||
|
params["subcategory"] = subcategory
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=_TIMEOUT) as client:
|
||||||
|
try:
|
||||||
|
resp = await client.get(
|
||||||
|
f"{_API_URL}/api/v1/recipes/browse/{domain}/{category}",
|
||||||
|
params=params,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
except Exception as exc:
|
||||||
|
return [TextContent(type="text", text=f"Browse error: {exc}")]
|
||||||
|
|
||||||
|
data = resp.json()
|
||||||
|
summary = {
|
||||||
|
"domain": domain,
|
||||||
|
"category": category,
|
||||||
|
"subcategory": subcategory or None,
|
||||||
|
"total": data.get("total", 0),
|
||||||
|
"page_size": page_size,
|
||||||
|
"titles": [r.get("title", "") for r in data.get("recipes", [])],
|
||||||
|
}
|
||||||
|
return [TextContent(type="text", text=json.dumps(summary, indent=2))]
|
||||||
|
|
||||||
|
|
||||||
|
async def _main() -> None:
|
||||||
|
async with stdio_server() as (read_stream, write_stream):
|
||||||
|
await server.run(
|
||||||
|
read_stream,
|
||||||
|
write_stream,
|
||||||
|
server.create_initialization_options(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(_main())
|
||||||
|
|
@ -137,7 +137,8 @@ class RecipeRequest(BaseModel):
|
||||||
pantry_match_only: bool = False # when True, only return recipes with zero missing ingredients
|
pantry_match_only: bool = False # when True, only return recipes with zero missing ingredients
|
||||||
complexity_filter: str | None = None # 'easy' | 'moderate' | 'involved' — None = any
|
complexity_filter: str | None = None # 'easy' | 'moderate' | 'involved' — None = any
|
||||||
max_time_min: int | None = None # filter by estimated cooking time ceiling
|
max_time_min: int | None = None # filter by estimated cooking time ceiling
|
||||||
max_total_min: int | None = None # filter by parsed total time from recipe directions
|
max_total_min: int | None = None # filter by parsed total time (active + passive)
|
||||||
|
max_active_min: int | None = None # filter by hands-on active time only
|
||||||
unit_system: str = "metric" # "metric" | "imperial"
|
unit_system: str = "metric" # "metric" | "imperial"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -205,3 +206,24 @@ class StreamTokenResponse(BaseModel):
|
||||||
stream_url: str
|
stream_url: str
|
||||||
token: str
|
token: str
|
||||||
expires_in_s: int
|
expires_in_s: int
|
||||||
|
|
||||||
|
|
||||||
|
class AskRequest(BaseModel):
|
||||||
|
"""Request body for POST /recipes/ask."""
|
||||||
|
question: str = Field(min_length=1, max_length=500)
|
||||||
|
pantry_items: list[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class AskRecipeHit(BaseModel):
|
||||||
|
"""A single recipe result from the Ask endpoint."""
|
||||||
|
id: int
|
||||||
|
title: str
|
||||||
|
match_pct: float | None = None
|
||||||
|
category: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class AskResponse(BaseModel):
|
||||||
|
"""Response from POST /recipes/ask."""
|
||||||
|
answer: str | None = None # LLM-synthesized response (Paid tier only)
|
||||||
|
recipes: list[AskRecipeHit]
|
||||||
|
tier: str
|
||||||
|
|
|
||||||
0
app/services/ap/__init__.py
Normal file
0
app/services/ap/__init__.py
Normal file
115
app/services/ap/delivery.py
Normal file
115
app/services/ap/delivery.py
Normal file
|
|
@ -0,0 +1,115 @@
|
||||||
|
# app/services/ap/delivery.py
|
||||||
|
# MIT License
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from circuitforge_core.activitypub import deliver_activity
|
||||||
|
|
||||||
|
from app.services.ap.keys import get_actor
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_RETRIES = 3
|
||||||
|
_BACKOFF = [1.0, 4.0, 16.0]
|
||||||
|
|
||||||
|
|
||||||
|
def deliver_to_followers(post_slug: str, activity: dict, db_path: Path) -> None:
|
||||||
|
"""Deliver an AP activity to all active followers. Called as a background task.
|
||||||
|
|
||||||
|
Retries each inbox up to 3 times with exponential backoff.
|
||||||
|
Logs each attempt to ap_deliveries in the local kiwi.db.
|
||||||
|
"""
|
||||||
|
actor = get_actor()
|
||||||
|
if actor is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
try:
|
||||||
|
followers = conn.execute(
|
||||||
|
"SELECT inbox_url, shared_inbox FROM ap_followers WHERE active = 1"
|
||||||
|
).fetchall()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# Deduplicate by shared_inbox where available
|
||||||
|
inboxes: set[str] = set()
|
||||||
|
for row in followers:
|
||||||
|
inbox = row["shared_inbox"] or row["inbox_url"]
|
||||||
|
inboxes.add(inbox)
|
||||||
|
|
||||||
|
for inbox_url in inboxes:
|
||||||
|
_deliver_with_retry(post_slug=post_slug, activity=activity, inbox_url=inbox_url, db_path=db_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _deliver_with_retry(
|
||||||
|
post_slug: str,
|
||||||
|
activity: dict,
|
||||||
|
inbox_url: str,
|
||||||
|
db_path: Path,
|
||||||
|
) -> None:
|
||||||
|
actor = get_actor()
|
||||||
|
if actor is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
try:
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR IGNORE INTO ap_deliveries (post_slug, target_inbox, status) VALUES (?,?,?)",
|
||||||
|
(post_slug, inbox_url, "pending"),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
last_error: str | None = None
|
||||||
|
for attempt, delay in enumerate(_BACKOFF[:_RETRIES]):
|
||||||
|
try:
|
||||||
|
resp = deliver_activity(activity=activity, inbox_url=inbox_url, actor=actor, timeout=10.0)
|
||||||
|
if resp.status_code < 300:
|
||||||
|
_update_delivery(db_path, post_slug, inbox_url, "delivered", None)
|
||||||
|
return
|
||||||
|
last_error = f"HTTP {resp.status_code}"
|
||||||
|
except Exception as exc:
|
||||||
|
last_error = str(exc)[:200]
|
||||||
|
|
||||||
|
if attempt < _RETRIES - 1:
|
||||||
|
time.sleep(delay)
|
||||||
|
|
||||||
|
_update_delivery(db_path, post_slug, inbox_url, "failed", last_error)
|
||||||
|
logger.warning("AP delivery failed after %d attempts to %s: %s", _RETRIES, inbox_url, last_error)
|
||||||
|
|
||||||
|
|
||||||
|
def _update_delivery(
|
||||||
|
db_path: Path,
|
||||||
|
post_slug: str,
|
||||||
|
inbox_url: str,
|
||||||
|
status: str,
|
||||||
|
error: str | None,
|
||||||
|
) -> None:
|
||||||
|
import sqlite3
|
||||||
|
now = datetime.now(timezone.utc).isoformat()
|
||||||
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
try:
|
||||||
|
if status == "delivered":
|
||||||
|
conn.execute(
|
||||||
|
"""UPDATE ap_deliveries SET status=?, attempts=attempts+1, delivered_at=?
|
||||||
|
WHERE post_slug=? AND target_inbox=?""",
|
||||||
|
(status, now, post_slug, inbox_url),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
conn.execute(
|
||||||
|
"""UPDATE ap_deliveries SET status=?, attempts=attempts+1, last_error=?
|
||||||
|
WHERE post_slug=? AND target_inbox=?""",
|
||||||
|
(status, error, post_slug, inbox_url),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
48
app/services/ap/keys.py
Normal file
48
app/services/ap/keys.py
Normal file
|
|
@ -0,0 +1,48 @@
|
||||||
|
# app/services/ap/keys.py
|
||||||
|
# MIT License
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from circuitforge_core.activitypub import CFActor, generate_rsa_keypair, load_actor_from_key_file
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_actor: CFActor | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_actor() -> CFActor | None:
|
||||||
|
"""Return the loaded instance actor, or None if AP is not enabled."""
|
||||||
|
return _actor
|
||||||
|
|
||||||
|
|
||||||
|
def init_actor(host: str, key_path: Path) -> CFActor:
|
||||||
|
"""Load or generate the instance RSA keypair and build the CFActor singleton.
|
||||||
|
|
||||||
|
Called once at startup when AP_ENABLED=true. Generates a new 2048-bit keypair
|
||||||
|
if the key file does not yet exist (first boot).
|
||||||
|
"""
|
||||||
|
global _actor
|
||||||
|
|
||||||
|
key_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
if not key_path.exists():
|
||||||
|
logger.info("AP: no key file found at %s — generating new RSA-2048 keypair", key_path)
|
||||||
|
private_pem, _pub = generate_rsa_keypair(bits=2048)
|
||||||
|
key_path.write_text(private_pem, encoding="utf-8")
|
||||||
|
key_path.chmod(0o600)
|
||||||
|
|
||||||
|
base = f"https://{host}"
|
||||||
|
actor_id = f"{base}/ap/actor"
|
||||||
|
|
||||||
|
_actor = load_actor_from_key_file(
|
||||||
|
actor_id=actor_id,
|
||||||
|
username="kiwi",
|
||||||
|
display_name="Kiwi Pantry",
|
||||||
|
private_key_path=str(key_path),
|
||||||
|
summary="Community pantry and recipe feed from a Kiwi instance.",
|
||||||
|
)
|
||||||
|
logger.info("AP: instance actor loaded — %s", actor_id)
|
||||||
|
return _actor
|
||||||
194
app/services/ap/mastodon.py
Normal file
194
app/services/ap/mastodon.py
Normal file
|
|
@ -0,0 +1,194 @@
|
||||||
|
# app/services/ap/mastodon.py
|
||||||
|
# MIT License
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_APP_SCOPES = "write:statuses"
|
||||||
|
_APP_NAME = "Kiwi Pantry"
|
||||||
|
_APP_WEBSITE = "https://circuitforge.tech/kiwi"
|
||||||
|
|
||||||
|
|
||||||
|
def register_app(instance_url: str, redirect_uri: str) -> dict:
|
||||||
|
"""Dynamically register Kiwi as an OAuth app on the user's Mastodon instance.
|
||||||
|
|
||||||
|
Returns the app credentials dict (client_id, client_secret, etc.).
|
||||||
|
Raises httpx.HTTPError on failure.
|
||||||
|
"""
|
||||||
|
url = instance_url.rstrip("/") + "/api/v1/apps"
|
||||||
|
resp = httpx.post(
|
||||||
|
url,
|
||||||
|
data={
|
||||||
|
"client_name": _APP_NAME,
|
||||||
|
"redirect_uris": redirect_uri,
|
||||||
|
"scopes": _APP_SCOPES,
|
||||||
|
"website": _APP_WEBSITE,
|
||||||
|
},
|
||||||
|
timeout=10.0,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
|
||||||
|
def build_authorize_url(instance_url: str, client_id: str, redirect_uri: str) -> str:
|
||||||
|
"""Return the OAuth authorize URL to redirect the user to."""
|
||||||
|
return (
|
||||||
|
f"{instance_url.rstrip('/')}/oauth/authorize"
|
||||||
|
f"?response_type=code"
|
||||||
|
f"&client_id={client_id}"
|
||||||
|
f"&redirect_uri={redirect_uri}"
|
||||||
|
f"&scope={_APP_SCOPES}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def exchange_code(
|
||||||
|
instance_url: str,
|
||||||
|
client_id: str,
|
||||||
|
client_secret: str,
|
||||||
|
code: str,
|
||||||
|
redirect_uri: str,
|
||||||
|
) -> str:
|
||||||
|
"""Exchange an authorization code for an access token. Returns the token string."""
|
||||||
|
url = instance_url.rstrip("/") + "/oauth/token"
|
||||||
|
resp = httpx.post(
|
||||||
|
url,
|
||||||
|
data={
|
||||||
|
"grant_type": "authorization_code",
|
||||||
|
"client_id": client_id,
|
||||||
|
"client_secret": client_secret,
|
||||||
|
"redirect_uri": redirect_uri,
|
||||||
|
"code": code,
|
||||||
|
"scope": _APP_SCOPES,
|
||||||
|
},
|
||||||
|
timeout=10.0,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()["access_token"]
|
||||||
|
|
||||||
|
|
||||||
|
def post_status(instance_url: str, access_token: str, content: str) -> dict:
|
||||||
|
"""Post a status to the user's Mastodon account. Returns the status response dict."""
|
||||||
|
url = instance_url.rstrip("/") + "/api/v1/statuses"
|
||||||
|
resp = httpx.post(
|
||||||
|
url,
|
||||||
|
headers={"Authorization": f"Bearer {access_token}"},
|
||||||
|
json={"status": content, "visibility": "public"},
|
||||||
|
timeout=15.0,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
|
||||||
|
def build_post_content(post: dict) -> str:
|
||||||
|
"""Format a community post dict as Mastodon-ready plain text."""
|
||||||
|
title = post.get("title") or "Untitled"
|
||||||
|
recipe = post.get("recipe_name")
|
||||||
|
notes = post.get("outcome_notes") or post.get("description")
|
||||||
|
tags_raw: list[str] = post.get("dietary_tags") or []
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
if recipe and recipe != title:
|
||||||
|
lines.append(f"🍽 {title} — {recipe}")
|
||||||
|
else:
|
||||||
|
lines.append(f"🍽 {title}")
|
||||||
|
|
||||||
|
if notes:
|
||||||
|
snippet = notes[:200].strip()
|
||||||
|
if len(notes) > 200:
|
||||||
|
snippet += "…"
|
||||||
|
lines.append(f"\n{snippet}")
|
||||||
|
|
||||||
|
hashtags = ["#Kiwi", "#Cooking"]
|
||||||
|
for tag in tags_raw[:3]:
|
||||||
|
ht = "#" + "".join(w.capitalize() for w in tag.replace("-", " ").split())
|
||||||
|
hashtags.append(ht)
|
||||||
|
lines.append("\n" + " ".join(hashtags))
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def store_token(
|
||||||
|
db_path: Path,
|
||||||
|
directus_user_id: str,
|
||||||
|
instance_url: str,
|
||||||
|
access_token: str,
|
||||||
|
encryption_key: str | None,
|
||||||
|
) -> None:
|
||||||
|
"""Persist a Mastodon access token in the user's local kiwi.db."""
|
||||||
|
token_to_store = _encrypt(access_token, encryption_key)
|
||||||
|
import sqlite3
|
||||||
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
try:
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT INTO mastodon_tokens (directus_user_id, instance_url, access_token)
|
||||||
|
VALUES (?, ?, ?)
|
||||||
|
ON CONFLICT(directus_user_id) DO UPDATE SET
|
||||||
|
instance_url=excluded.instance_url,
|
||||||
|
access_token=excluded.access_token,
|
||||||
|
updated_at=datetime('now')""",
|
||||||
|
(directus_user_id, instance_url.rstrip("/"), token_to_store),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def get_token(
|
||||||
|
db_path: Path,
|
||||||
|
directus_user_id: str,
|
||||||
|
encryption_key: str | None,
|
||||||
|
) -> tuple[str, str] | None:
|
||||||
|
"""Return (instance_url, plaintext_access_token) or None if not connected."""
|
||||||
|
import sqlite3
|
||||||
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
try:
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT instance_url, access_token FROM mastodon_tokens WHERE directus_user_id = ?",
|
||||||
|
(directus_user_id,),
|
||||||
|
).fetchone()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
if row is None:
|
||||||
|
return None
|
||||||
|
return row[0], _decrypt(row[1], encryption_key)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_token(db_path: Path, directus_user_id: str) -> None:
|
||||||
|
"""Remove the user's stored Mastodon token."""
|
||||||
|
import sqlite3
|
||||||
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
try:
|
||||||
|
conn.execute(
|
||||||
|
"DELETE FROM mastodon_tokens WHERE directus_user_id = ?", (directus_user_id,)
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _encrypt(plaintext: str, key: str | None) -> str:
|
||||||
|
if key is None:
|
||||||
|
return plaintext
|
||||||
|
try:
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
|
return Fernet(key.encode()).encrypt(plaintext.encode()).decode()
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Mastodon token encryption failed — storing plaintext")
|
||||||
|
return plaintext
|
||||||
|
|
||||||
|
|
||||||
|
def _decrypt(ciphertext: str, key: str | None) -> str:
|
||||||
|
if key is None:
|
||||||
|
return ciphertext
|
||||||
|
try:
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
|
return Fernet(key.encode()).decrypt(ciphertext.encode()).decode()
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Mastodon token decryption failed — returning as-is")
|
||||||
|
return ciphertext
|
||||||
111
app/services/community/dedup.py
Normal file
111
app/services/community/dedup.py
Normal file
|
|
@ -0,0 +1,111 @@
|
||||||
|
# app/services/community/dedup.py
|
||||||
|
# MIT License
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_SIMILARITY_TIERS = {
|
||||||
|
"exact_recipe": "This exact recipe is already in the community feed.",
|
||||||
|
"very_similar": "Very similar recipes already exist (70%+ ingredient overlap).",
|
||||||
|
"somewhat_similar": "Somewhat similar recipes exist (35-70% ingredient overlap).",
|
||||||
|
"different": "No close matches found.",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_ingredient_names(raw) -> set[str]:
|
||||||
|
"""Return a normalised set of ingredient name tokens from various stored formats."""
|
||||||
|
if raw is None:
|
||||||
|
return set()
|
||||||
|
if isinstance(raw, str):
|
||||||
|
try:
|
||||||
|
raw = json.loads(raw)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return set()
|
||||||
|
names: set[str] = set()
|
||||||
|
for item in raw:
|
||||||
|
if isinstance(item, str):
|
||||||
|
names.add(item.lower().strip())
|
||||||
|
elif isinstance(item, dict):
|
||||||
|
name = item.get("name") or item.get("ingredient") or ""
|
||||||
|
if name:
|
||||||
|
names.add(name.lower().strip())
|
||||||
|
return names
|
||||||
|
|
||||||
|
|
||||||
|
def jaccard(a: set[str], b: set[str]) -> float:
|
||||||
|
if not a and not b:
|
||||||
|
return 1.0
|
||||||
|
if not a or not b:
|
||||||
|
return 0.0
|
||||||
|
return len(a & b) / len(a | b)
|
||||||
|
|
||||||
|
|
||||||
|
def similarity_tier(jaccard_score: float, exact_recipe: bool) -> str:
|
||||||
|
if exact_recipe:
|
||||||
|
return "exact_recipe"
|
||||||
|
if jaccard_score >= 0.70:
|
||||||
|
return "very_similar"
|
||||||
|
if jaccard_score >= 0.35:
|
||||||
|
return "somewhat_similar"
|
||||||
|
return "different"
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_recipe_ingredients(db_path: Path, recipe_id: int | None) -> set[str]:
|
||||||
|
"""Look up ingredient names for a recipe from the local corpus. Returns empty set on miss."""
|
||||||
|
if recipe_id is None:
|
||||||
|
return set()
|
||||||
|
try:
|
||||||
|
from app.db.store import Store
|
||||||
|
store = Store(db_path)
|
||||||
|
try:
|
||||||
|
row = store.get_recipe(recipe_id)
|
||||||
|
if row is None:
|
||||||
|
return set()
|
||||||
|
return _parse_ingredient_names(row.get("ingredient_names"))
|
||||||
|
finally:
|
||||||
|
store.close()
|
||||||
|
except Exception:
|
||||||
|
logger.debug("ingredient lookup failed for recipe_id=%s", recipe_id)
|
||||||
|
return set()
|
||||||
|
|
||||||
|
|
||||||
|
def build_similar_post_result(
|
||||||
|
post,
|
||||||
|
incoming_recipe_id: int | None,
|
||||||
|
incoming_ingredients: set[str],
|
||||||
|
db_path: Path,
|
||||||
|
) -> dict:
|
||||||
|
"""Build a similarity result dict for one existing community post."""
|
||||||
|
exact = (
|
||||||
|
incoming_recipe_id is not None
|
||||||
|
and post.recipe_id is not None
|
||||||
|
and post.recipe_id == incoming_recipe_id
|
||||||
|
)
|
||||||
|
|
||||||
|
j_score = 0.0
|
||||||
|
if not exact and incoming_ingredients:
|
||||||
|
existing_ingredients = fetch_recipe_ingredients(db_path, post.recipe_id)
|
||||||
|
if existing_ingredients:
|
||||||
|
j_score = jaccard(incoming_ingredients, existing_ingredients)
|
||||||
|
|
||||||
|
tier = similarity_tier(j_score, exact)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"slug": post.slug,
|
||||||
|
"title": post.title,
|
||||||
|
"recipe_name": post.recipe_name,
|
||||||
|
"pseudonym": post.pseudonym,
|
||||||
|
"published": (
|
||||||
|
post.published.isoformat()
|
||||||
|
if hasattr(post.published, "isoformat")
|
||||||
|
else str(post.published)
|
||||||
|
),
|
||||||
|
"similarity_tier": tier,
|
||||||
|
"jaccard_score": round(j_score, 3) if not exact else None,
|
||||||
|
"tier_description": _SIMILARITY_TIERS.get(tier, ""),
|
||||||
|
}
|
||||||
|
|
@ -2,17 +2,20 @@
|
||||||
# BSL 1.1 — LLM feature
|
# BSL 1.1 — LLM feature
|
||||||
"""Provide a router-compatible LLM client for meal plan generation tasks.
|
"""Provide a router-compatible LLM client for meal plan generation tasks.
|
||||||
|
|
||||||
Cloud (CF_ORCH_URL set):
|
Cloud (CF_ORCH_URL set), tier 1 — task-based routing (preferred):
|
||||||
Allocates a cf-text service via cf-orch (3B-7B GGUF, ~2GB VRAM).
|
Calls /api/inference/task with product=kiwi, task=meal_plan.
|
||||||
Returns an _OrchTextRouter that wraps the cf-text HTTP endpoint
|
The coordinator resolves the model from assignments.yaml.
|
||||||
with a .complete(system, user, **kwargs) interface.
|
|
||||||
|
Cloud (CF_ORCH_URL set), tier 2 — direct allocation (fallback):
|
||||||
|
Allocates cf-text directly via client.allocate(). Used when the task
|
||||||
|
is not yet registered in the coordinator (cf-orch#61 not deployed).
|
||||||
|
|
||||||
Local / self-hosted (no CF_ORCH_URL):
|
Local / self-hosted (no CF_ORCH_URL):
|
||||||
Returns an LLMRouter instance which tries ollama, vllm, or any
|
Returns an LLMRouter instance which tries ollama, vllm, or any
|
||||||
backend configured in ~/.config/circuitforge/llm.yaml.
|
backend configured in ~/.config/circuitforge/llm.yaml.
|
||||||
|
|
||||||
Both paths expose the same interface so llm_timing.py and llm_planner.py
|
All paths expose the same (router, ctx) interface so llm_planner.py
|
||||||
need no knowledge of the backend.
|
needs no knowledge of the backend.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
@ -22,8 +25,7 @@ from contextlib import nullcontext
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# cf-orch service name and VRAM budget for meal plan LLM tasks.
|
# cf-orch service name and TTL for direct-allocate fallback path.
|
||||||
# These are lighter than recipe_llm (4.0 GB) — cf-text handles them.
|
|
||||||
_SERVICE_TYPE = "cf-text"
|
_SERVICE_TYPE = "cf-text"
|
||||||
_TTL_S = 120.0
|
_TTL_S = 120.0
|
||||||
_CALLER = "kiwi-meal-plan"
|
_CALLER = "kiwi-meal-plan"
|
||||||
|
|
@ -62,35 +64,79 @@ class _OrchTextRouter:
|
||||||
return resp.choices[0].message.content or ""
|
return resp.choices[0].message.content or ""
|
||||||
|
|
||||||
|
|
||||||
|
# Imported at module level so tests can patch the names in this module's namespace.
|
||||||
|
# app.services.task_inference.task_allocate — patch target for task routing tests.
|
||||||
|
try:
|
||||||
|
from app.services.task_inference import TaskNotRegistered, task_allocate
|
||||||
|
_HAS_TASK_INFERENCE = True
|
||||||
|
except ImportError:
|
||||||
|
_HAS_TASK_INFERENCE = False
|
||||||
|
|
||||||
|
# circuitforge_orch.client.CFOrchClient — patch target for direct-allocate fallback tests.
|
||||||
|
try:
|
||||||
|
from circuitforge_orch.client import CFOrchClient
|
||||||
|
except ImportError:
|
||||||
|
CFOrchClient = None # type: ignore[assignment,misc]
|
||||||
|
|
||||||
|
# circuitforge_core.llm.router.LLMRouter — patch target for local-inference tests.
|
||||||
|
try:
|
||||||
|
from circuitforge_core.llm.router import LLMRouter
|
||||||
|
except (ImportError, FileNotFoundError):
|
||||||
|
LLMRouter = None # type: ignore[assignment,misc]
|
||||||
|
|
||||||
|
|
||||||
def get_meal_plan_router():
|
def get_meal_plan_router():
|
||||||
"""Return an LLM client for meal plan tasks.
|
"""Return an LLM client for meal plan tasks.
|
||||||
|
|
||||||
Tries cf-orch cf-text allocation first (cloud); falls back to LLMRouter
|
Returns (router, ctx) where ctx is a context manager the caller holds
|
||||||
(local ollama/vllm). Returns None if no backend is available.
|
open for the duration of the LLM call. Returns (None, nullcontext(None))
|
||||||
|
if no backend is available.
|
||||||
"""
|
"""
|
||||||
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
||||||
if cf_orch_url:
|
|
||||||
try:
|
|
||||||
from circuitforge_orch.client import CFOrchClient
|
|
||||||
client = CFOrchClient(cf_orch_url)
|
|
||||||
ctx = client.allocate(
|
|
||||||
service=_SERVICE_TYPE,
|
|
||||||
ttl_s=_TTL_S,
|
|
||||||
caller=_CALLER,
|
|
||||||
)
|
|
||||||
alloc = ctx.__enter__()
|
|
||||||
if alloc is not None:
|
|
||||||
return _OrchTextRouter(alloc.url), ctx
|
|
||||||
except Exception as exc:
|
|
||||||
logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc)
|
|
||||||
|
|
||||||
# Local fallback: LLMRouter (ollama / vllm / openai-compat)
|
if cf_orch_url:
|
||||||
try:
|
# Tier 1: task-based routing — coordinator owns model selection.
|
||||||
from circuitforge_core.llm.router import LLMRouter
|
if _HAS_TASK_INFERENCE:
|
||||||
return LLMRouter(), nullcontext(None)
|
try:
|
||||||
except FileNotFoundError:
|
ctx = task_allocate(
|
||||||
logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled")
|
"kiwi", "meal_plan",
|
||||||
return None, nullcontext(None)
|
service_hint=_SERVICE_TYPE,
|
||||||
except Exception as exc:
|
ttl_s=_TTL_S,
|
||||||
logger.debug("LLMRouter init failed: %s", exc)
|
)
|
||||||
return None, nullcontext(None)
|
alloc = ctx.__enter__()
|
||||||
|
return _OrchTextRouter(alloc.url), ctx
|
||||||
|
except TaskNotRegistered:
|
||||||
|
logger.debug(
|
||||||
|
"kiwi.meal_plan not in coordinator assignments — "
|
||||||
|
"falling back to direct cf-text allocation"
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("task allocation failed, trying direct allocate: %s", exc)
|
||||||
|
|
||||||
|
# Tier 2: direct allocation — hardcoded service type.
|
||||||
|
if CFOrchClient is not None:
|
||||||
|
try:
|
||||||
|
client = CFOrchClient(cf_orch_url)
|
||||||
|
ctx = client.allocate(
|
||||||
|
service=_SERVICE_TYPE,
|
||||||
|
ttl_s=_TTL_S,
|
||||||
|
caller=_CALLER,
|
||||||
|
)
|
||||||
|
alloc = ctx.__enter__()
|
||||||
|
if alloc is not None:
|
||||||
|
return _OrchTextRouter(alloc.url), ctx
|
||||||
|
ctx.__exit__(None, None, None) # release allocation before falling through
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc)
|
||||||
|
|
||||||
|
# Tier 3: local inference — ollama / vllm / openai-compat.
|
||||||
|
if LLMRouter is not None:
|
||||||
|
try:
|
||||||
|
return LLMRouter(), nullcontext(None)
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled")
|
||||||
|
return None, nullcontext(None)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("LLMRouter init failed: %s", exc)
|
||||||
|
return None, nullcontext(None)
|
||||||
|
return None, nullcontext(None)
|
||||||
|
|
|
||||||
|
|
@ -18,43 +18,51 @@ class DocuvisionResult:
|
||||||
class DocuvisionClient:
|
class DocuvisionClient:
|
||||||
"""Thin client for the cf-docuvision service."""
|
"""Thin client for the cf-docuvision service."""
|
||||||
|
|
||||||
def __init__(self, base_url: str) -> None:
|
def __init__(self, base_url: str, timeout: float = 120.0) -> None:
|
||||||
self._base_url = base_url.rstrip("/")
|
self._base_url = base_url.rstrip("/")
|
||||||
|
self._timeout = timeout
|
||||||
|
|
||||||
def extract_text(self, image_path: str | Path) -> DocuvisionResult:
|
def extract_text(self, image_path: str | Path, hint: str = "text") -> DocuvisionResult:
|
||||||
"""Send an image to docuvision and return extracted text."""
|
"""Send an image to docuvision and return extracted text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path: Path to the image file.
|
||||||
|
hint: Docuvision extraction hint — "text" for dense prose (recipes),
|
||||||
|
"table" for tabular data, "form" for form fields, "auto" for
|
||||||
|
automatic detection.
|
||||||
|
"""
|
||||||
image_bytes = Path(image_path).read_bytes()
|
image_bytes = Path(image_path).read_bytes()
|
||||||
b64 = base64.b64encode(image_bytes).decode()
|
b64 = base64.b64encode(image_bytes).decode()
|
||||||
|
|
||||||
with httpx.Client(timeout=30.0) as client:
|
with httpx.Client(timeout=self._timeout) as client:
|
||||||
resp = client.post(
|
resp = client.post(
|
||||||
f"{self._base_url}/extract",
|
f"{self._base_url}/extract",
|
||||||
json={"image": b64},
|
json={"image_b64": b64, "hint": hint},
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
|
|
||||||
return DocuvisionResult(
|
return DocuvisionResult(
|
||||||
text=data.get("text", ""),
|
text=data.get("raw_text", ""),
|
||||||
confidence=data.get("confidence"),
|
confidence=data.get("metadata", {}).get("confidence"),
|
||||||
raw=data,
|
raw=data,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def extract_text_async(self, image_path: str | Path) -> DocuvisionResult:
|
async def extract_text_async(self, image_path: str | Path, hint: str = "text") -> DocuvisionResult:
|
||||||
"""Async version."""
|
"""Async version."""
|
||||||
image_bytes = Path(image_path).read_bytes()
|
image_bytes = Path(image_path).read_bytes()
|
||||||
b64 = base64.b64encode(image_bytes).decode()
|
b64 = base64.b64encode(image_bytes).decode()
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
async with httpx.AsyncClient(timeout=self._timeout) as client:
|
||||||
resp = await client.post(
|
resp = await client.post(
|
||||||
f"{self._base_url}/extract",
|
f"{self._base_url}/extract",
|
||||||
json={"image": b64},
|
json={"image_b64": b64, "hint": hint},
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
|
|
||||||
return DocuvisionResult(
|
return DocuvisionResult(
|
||||||
text=data.get("text", ""),
|
text=data.get("raw_text", ""),
|
||||||
confidence=data.get("confidence"),
|
confidence=data.get("metadata", {}).get("confidence"),
|
||||||
raw=data,
|
raw=data,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,29 @@ def _try_docuvision(image_path: str | Path) -> str | None:
|
||||||
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
||||||
if not cf_orch_url:
|
if not cf_orch_url:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# Tier 1: task-based routing — coordinator owns model selection.
|
||||||
|
try:
|
||||||
|
from app.services.task_inference import task_allocate, TaskNotRegistered
|
||||||
|
from app.services.ocr.docuvision_client import DocuvisionClient
|
||||||
|
try:
|
||||||
|
with task_allocate(
|
||||||
|
"kiwi", "ocr",
|
||||||
|
service_hint="cf-docuvision",
|
||||||
|
ttl_s=60.0,
|
||||||
|
) as alloc:
|
||||||
|
doc_client = DocuvisionClient(alloc.url)
|
||||||
|
result = doc_client.extract_text(image_path)
|
||||||
|
return result.text if result.text else None
|
||||||
|
except TaskNotRegistered:
|
||||||
|
logger.debug(
|
||||||
|
"kiwi.ocr not in coordinator assignments — "
|
||||||
|
"falling back to direct cf-docuvision allocation"
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("task allocation path failed, trying direct allocate: %s", exc)
|
||||||
|
|
||||||
|
# Tier 2: direct allocation — hardcoded service type.
|
||||||
try:
|
try:
|
||||||
from circuitforge_orch.client import CFOrchClient
|
from circuitforge_orch.client import CFOrchClient
|
||||||
from app.services.ocr.docuvision_client import DocuvisionClient
|
from app.services.ocr.docuvision_client import DocuvisionClient
|
||||||
|
|
@ -49,7 +72,7 @@ def _try_docuvision(image_path: str | Path) -> str | None:
|
||||||
result = doc_client.extract_text(image_path)
|
result = doc_client.extract_text(image_path)
|
||||||
return result.text if result.text else None
|
return result.text if result.text else None
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("cf-docuvision fast-path failed, falling back: %s", exc)
|
logger.debug("cf-docuvision fast-path failed, falling back to local VLM: %s", exc)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -93,7 +93,18 @@ class ElementClassifier:
|
||||||
return self._heuristic_profile(name)
|
return self._heuristic_profile(name)
|
||||||
|
|
||||||
def classify_batch(self, names: list[str]) -> list[IngredientProfile]:
|
def classify_batch(self, names: list[str]) -> list[IngredientProfile]:
|
||||||
return [self.classify(n) for n in names]
|
"""Classify multiple names in one DB round-trip, falling back to heuristics."""
|
||||||
|
if not names:
|
||||||
|
return []
|
||||||
|
normalised = [n.lower().strip() for n in names]
|
||||||
|
c = self._store._cp
|
||||||
|
placeholders = ",".join("?" * len(normalised))
|
||||||
|
rows = self._store._fetch_all(
|
||||||
|
f"SELECT * FROM {c}ingredient_profiles WHERE name IN ({placeholders})",
|
||||||
|
tuple(normalised),
|
||||||
|
)
|
||||||
|
by_name = {r["name"]: self._row_to_profile(r) for r in rows}
|
||||||
|
return [by_name.get(n) or self._heuristic_profile(n) for n in normalised]
|
||||||
|
|
||||||
def identify_gaps(self, profiles: list[IngredientProfile]) -> list[str]:
|
def identify_gaps(self, profiles: list[IngredientProfile]) -> list[str]:
|
||||||
"""Return element names that have no coverage in the given profile list."""
|
"""Return element names that have no coverage in the given profile list."""
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,14 @@
|
||||||
"""LLM-driven recipe generator for Levels 3 and 4."""
|
"""LLM-driven recipe generator for Levels 3 and 4."""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from contextlib import nullcontext
|
from contextlib import nullcontext
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING, AsyncGenerator
|
||||||
|
|
||||||
from openai import OpenAI
|
from openai import AsyncOpenAI, OpenAI
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from app.db.store import Store
|
from app.db.store import Store
|
||||||
|
|
@ -149,8 +150,8 @@ class LLMRecipeGenerator:
|
||||||
|
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
_SERVICE_TYPE = "vllm"
|
_SERVICE_TYPE = "cf-text"
|
||||||
_MODEL_CANDIDATES = ["Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"]
|
_MODEL_CANDIDATES = ["granite-4.1-8b", "deepseek-r1-1.5b"]
|
||||||
_TTL_S = 300.0
|
_TTL_S = 300.0
|
||||||
_CALLER = "kiwi-recipe"
|
_CALLER = "kiwi-recipe"
|
||||||
|
|
||||||
|
|
@ -182,7 +183,12 @@ class LLMRecipeGenerator:
|
||||||
|
|
||||||
With CF_ORCH_URL set: acquires a vLLM allocation via CFOrchClient and
|
With CF_ORCH_URL set: acquires a vLLM allocation via CFOrchClient and
|
||||||
calls the OpenAI-compatible API directly against the allocated service URL.
|
calls the OpenAI-compatible API directly against the allocated service URL.
|
||||||
Allocation failure falls through to LLMRouter rather than silently returning "".
|
Falls back to LLMRouter when:
|
||||||
|
- Allocation succeeded but the service is cold (warm=False) — avoids
|
||||||
|
making the user wait for model load; LLMRouter uses Ollama which is
|
||||||
|
already running.
|
||||||
|
- Allocation succeeded but the connection to the service URL fails — the
|
||||||
|
agent may have registered the service but failed to start it.
|
||||||
Without CF_ORCH_URL: uses LLMRouter directly.
|
Without CF_ORCH_URL: uses LLMRouter directly.
|
||||||
"""
|
"""
|
||||||
ctx = self._get_llm_context()
|
ctx = self._get_llm_context()
|
||||||
|
|
@ -208,6 +214,15 @@ class LLMRecipeGenerator:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if alloc is not None:
|
if alloc is not None:
|
||||||
|
# Skip cold services — model not yet loaded means the user would
|
||||||
|
# wait 60–120 s for model load before any response. Use LLMRouter
|
||||||
|
# (Ollama) instead, which is already warm on the host.
|
||||||
|
if not alloc.warm:
|
||||||
|
logger.info(
|
||||||
|
"cf-orch vllm allocated but cold (warm=False) — releasing and falling back to LLMRouter"
|
||||||
|
)
|
||||||
|
raise RuntimeError("vllm cold")
|
||||||
|
|
||||||
base_url = alloc.url.rstrip("/") + "/v1"
|
base_url = alloc.url.rstrip("/") + "/v1"
|
||||||
client = OpenAI(base_url=base_url, api_key="any")
|
client = OpenAI(base_url=base_url, api_key="any")
|
||||||
model = alloc.model or "__auto__"
|
model = alloc.model or "__auto__"
|
||||||
|
|
@ -223,6 +238,20 @@ class LLMRecipeGenerator:
|
||||||
return LLMRouter().complete(prompt)
|
return LLMRouter().complete(prompt)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.error("LLM call failed: %s", exc)
|
logger.error("LLM call failed: %s", exc)
|
||||||
|
# When cf-orch gave us an allocation but the service is unreachable
|
||||||
|
# (cold skip, connection refused, or other error), fall back to
|
||||||
|
# LLMRouter rather than silently returning empty.
|
||||||
|
# Skip "vllm" in the fallback order — that backend also routes through
|
||||||
|
# cf-orch, which would trigger a second (wasted) cold allocation.
|
||||||
|
if alloc is not None:
|
||||||
|
logger.info("Falling back to LLMRouter after vllm failure")
|
||||||
|
try:
|
||||||
|
from circuitforge_core.llm.router import LLMRouter
|
||||||
|
router = LLMRouter()
|
||||||
|
_order = [b for b in (router.config.get("fallback_order") or []) if b != "vllm"]
|
||||||
|
return router.complete(prompt, fallback_order=_order or None)
|
||||||
|
except Exception as fallback_exc:
|
||||||
|
logger.error("LLMRouter fallback also failed: %s", fallback_exc)
|
||||||
return ""
|
return ""
|
||||||
finally:
|
finally:
|
||||||
if ctx is not None:
|
if ctx is not None:
|
||||||
|
|
@ -359,3 +388,91 @@ class LLMRecipeGenerator:
|
||||||
suggestions=[suggestion],
|
suggestions=[suggestion],
|
||||||
element_gaps=gaps,
|
element_gaps=gaps,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def stream_generate(
|
||||||
|
self,
|
||||||
|
req: RecipeRequest,
|
||||||
|
profiles: list,
|
||||||
|
gaps: list[str],
|
||||||
|
) -> AsyncGenerator[str, None]:
|
||||||
|
"""Stream LLM tokens for L3/L4. Yields raw text chunks as they arrive.
|
||||||
|
|
||||||
|
Tries cf-orch warm vllm first; falls back to Ollama via AsyncOpenAI.
|
||||||
|
When neither is reachable, falls back to blocking _call_llm and yields
|
||||||
|
the complete response as a single chunk so the caller always gets output.
|
||||||
|
"""
|
||||||
|
if req.level == 4:
|
||||||
|
prompt = self.build_level4_prompt(req)
|
||||||
|
else:
|
||||||
|
prompt = self.build_level3_prompt(req, profiles, gaps)
|
||||||
|
|
||||||
|
# Phase 1: try cf-orch warm vllm (sync allocation, wrapped in thread)
|
||||||
|
alloc_info = await asyncio.to_thread(self._try_alloc_for_stream)
|
||||||
|
if alloc_info is not None:
|
||||||
|
alloc, ctx = alloc_info
|
||||||
|
try:
|
||||||
|
async for token in self._stream_openai_compat(
|
||||||
|
alloc.url.rstrip("/") + "/v1", "any", alloc.model or "__auto__", prompt
|
||||||
|
):
|
||||||
|
yield token
|
||||||
|
return
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("cf-orch stream failed, falling back to Ollama: %s", exc)
|
||||||
|
finally:
|
||||||
|
await asyncio.to_thread(lambda: _safe_exit(ctx))
|
||||||
|
|
||||||
|
# Phase 2: Ollama streaming via OpenAI-compat API
|
||||||
|
from circuitforge_core.llm.router import LLMRouter
|
||||||
|
router = LLMRouter()
|
||||||
|
ollama = router.config.get("backends", {}).get("ollama")
|
||||||
|
if ollama and ollama.get("enabled", True):
|
||||||
|
base_url = ollama["base_url"]
|
||||||
|
model = ollama.get("model", "llama3")
|
||||||
|
try:
|
||||||
|
async for token in self._stream_openai_compat(base_url, "any", model, prompt):
|
||||||
|
yield token
|
||||||
|
return
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Ollama streaming failed, falling back to blocking: %s", exc)
|
||||||
|
|
||||||
|
# Phase 3: blocking fallback — yields full response at once
|
||||||
|
result = await asyncio.to_thread(self._call_llm, prompt)
|
||||||
|
if result:
|
||||||
|
yield result
|
||||||
|
|
||||||
|
def _try_alloc_for_stream(self):
|
||||||
|
"""Attempt cf-orch allocation synchronously; return (alloc, ctx) or None."""
|
||||||
|
ctx = self._get_llm_context()
|
||||||
|
try:
|
||||||
|
alloc = ctx.__enter__()
|
||||||
|
if alloc is not None and alloc.warm:
|
||||||
|
return alloc, ctx
|
||||||
|
# Not warm — release and signal fallback
|
||||||
|
_safe_exit(ctx)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("cf-orch alloc for stream failed: %s", exc)
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def _stream_openai_compat(
|
||||||
|
base_url: str, api_key: str, model: str, prompt: str
|
||||||
|
) -> AsyncGenerator[str, None]:
|
||||||
|
client = AsyncOpenAI(base_url=base_url, api_key=api_key)
|
||||||
|
if model == "__auto__":
|
||||||
|
models = await client.models.list()
|
||||||
|
model = models.data[0].id
|
||||||
|
stream = await client.chat.completions.create(
|
||||||
|
model=model,
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
async for chunk in stream:
|
||||||
|
if chunk.choices and chunk.choices[0].delta.content:
|
||||||
|
yield chunk.choices[0].delta.content
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_exit(ctx) -> None:
|
||||||
|
try:
|
||||||
|
ctx.__exit__(None, None, None)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
|
||||||
|
|
@ -918,6 +918,14 @@ class RecipeEngine:
|
||||||
elif row_time_min > req.max_total_min:
|
elif row_time_min > req.max_total_min:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Active (hands-on) time filter — independent of total time.
|
||||||
|
# Lets users request "≤30 min hands-on, any total" to include slow braises.
|
||||||
|
# Skips recipes where active_min == 0 (no time signals parsed) to avoid
|
||||||
|
# hiding valid results when the parser couldn't extract timing.
|
||||||
|
if req.max_active_min is not None and row_time_effort.active_min > 0:
|
||||||
|
if row_time_effort.active_min > req.max_active_min:
|
||||||
|
continue
|
||||||
|
|
||||||
# Level 2: also add dietary constraint swaps from substitution_pairs
|
# Level 2: also add dietary constraint swaps from substitution_pairs
|
||||||
if req.level == 2 and req.constraints:
|
if req.level == 2 and req.constraints:
|
||||||
for ing in ingredient_names:
|
for ing in ingredient_names:
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
from collections.abc import Callable
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
@ -196,34 +197,109 @@ def _call_via_local_vlm(image_paths: list[Path], prompt: str) -> str:
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
def _call_vision_backend(image_paths: list[Path], prompt: str) -> str:
|
def _build_ocr_extraction_prompt(ocr_text: str) -> str:
|
||||||
|
"""Build a text-LLM prompt for structuring OCR output into recipe JSON.
|
||||||
|
|
||||||
|
Swaps the image-centric preamble of _EXTRACTION_PROMPT for an OCR-centric
|
||||||
|
one, then appends the combined OCR text as input. The JSON schema section
|
||||||
|
is shared verbatim to keep the two paths in sync.
|
||||||
|
"""
|
||||||
|
schema_idx = _EXTRACTION_PROMPT.find("Return a single JSON object")
|
||||||
|
schema_part = _EXTRACTION_PROMPT[schema_idx:] if schema_idx != -1 else _EXTRACTION_PROMPT
|
||||||
|
return (
|
||||||
|
"You are extracting a recipe from OCR text taken from a recipe card, "
|
||||||
|
"cookbook page, or handwritten note.\n\n"
|
||||||
|
"The text below was obtained via optical character recognition and may "
|
||||||
|
"contain minor scanning artifacts or formatting irregularities.\n\n"
|
||||||
|
f"{schema_part}\n\nOCR Text:\n{ocr_text}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _call_via_cf_text_vlm(alloc_url: str, image_paths: list[Path], prompt: str) -> str:
|
||||||
|
"""Call the cf-text OpenAI-compat API with images via the llama.cpp multimodal backend."""
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
content: list[dict] = []
|
||||||
|
for i, path in enumerate(image_paths):
|
||||||
|
if i > 0:
|
||||||
|
content.append({"type": "text", "text": f"(Page {i + 1} of the same recipe:)"})
|
||||||
|
b64 = _load_image_b64(path)
|
||||||
|
content.append({
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": f"data:image/jpeg;base64,{b64}"},
|
||||||
|
})
|
||||||
|
content.append({"type": "text", "text": prompt})
|
||||||
|
|
||||||
|
resp = httpx.post(
|
||||||
|
f"{alloc_url.rstrip('/')}/v1/chat/completions",
|
||||||
|
json={
|
||||||
|
"model": "local",
|
||||||
|
"messages": [{"role": "user", "content": content}],
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"temperature": 0.0,
|
||||||
|
},
|
||||||
|
timeout=180.0,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()["choices"][0]["message"]["content"].strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _call_vision_backend(
|
||||||
|
image_paths: list[Path],
|
||||||
|
prompt: str,
|
||||||
|
progress_cb: "Callable[[str, str], None] | None" = None,
|
||||||
|
) -> str:
|
||||||
"""Dispatch to the best available vision backend.
|
"""Dispatch to the best available vision backend.
|
||||||
|
|
||||||
Priority: cf-orch vision -> local Qwen2.5-VL -> Anthropic API.
|
Priority: cf-orch (Qwen2-VL GGUF via cf-text) -> local Qwen2.5-VL -> Anthropic API.
|
||||||
Raises RuntimeError with a clear message when no backend is available.
|
Raises RuntimeError with a clear message when no backend is available.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_paths: Images to process.
|
||||||
|
prompt: Extraction prompt (used by local VLM / Anthropic paths).
|
||||||
|
progress_cb: Optional callback(status, message) for SSE progress events.
|
||||||
|
Called synchronously from the thread — caller bridges to async.
|
||||||
"""
|
"""
|
||||||
|
def _progress(status: str, message: str) -> None:
|
||||||
|
if progress_cb:
|
||||||
|
progress_cb(status, message)
|
||||||
|
|
||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
|
|
||||||
# 1. Try cf-orch vision allocation
|
# 1. Try cf-orch task allocation → cf-docuvision (Qwen2-VL GGUF via llama.cpp).
|
||||||
|
# Two-step: docuvision OCRs the image(s), then LLMRouter structures the text into JSON.
|
||||||
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
cf_orch_url = os.environ.get("CF_ORCH_URL")
|
||||||
if cf_orch_url:
|
if cf_orch_url:
|
||||||
try:
|
try:
|
||||||
from circuitforge_orch.client import CFOrchClient
|
from app.services.task_inference import TaskNotRegistered, task_allocate
|
||||||
from app.services.ocr.docuvision_client import DocuvisionClient
|
from app.services.ocr.docuvision_client import DocuvisionClient
|
||||||
|
from circuitforge_core.llm.router import LLMRouter
|
||||||
|
|
||||||
client = CFOrchClient(cf_orch_url)
|
try:
|
||||||
with client.allocate(
|
_progress("allocating", "Starting vision service...")
|
||||||
service="cf-vision",
|
with task_allocate("kiwi", "recipe_scan", service_hint="cf-docuvision", ttl_s=120.0) as alloc:
|
||||||
model_candidates=["qwen2.5-vl-7b", "cf-docuvision"],
|
_progress("scanning", "Extracting recipe text from photo...")
|
||||||
ttl_s=90.0,
|
|
||||||
caller="kiwi-recipe-scan",
|
|
||||||
) as alloc:
|
|
||||||
if alloc is not None:
|
|
||||||
doc_client = DocuvisionClient(alloc.url)
|
doc_client = DocuvisionClient(alloc.url)
|
||||||
# docuvision takes a single image -- use first image only for now
|
ocr_parts: list[str] = []
|
||||||
result = doc_client.extract_text(image_paths[0])
|
for i, path in enumerate(image_paths):
|
||||||
if result.text:
|
result = doc_client.extract_text(path, hint="text")
|
||||||
return result.text
|
prefix = f"(Page {i + 1} of the same recipe)\n" if len(image_paths) > 1 else ""
|
||||||
|
ocr_parts.append(f"{prefix}{result.text}")
|
||||||
|
combined_ocr = "\n\n".join(ocr_parts)
|
||||||
|
|
||||||
|
if not combined_ocr.strip():
|
||||||
|
raise ValueError("Docuvision returned no text — image may not be a recipe")
|
||||||
|
|
||||||
|
_progress("structuring", "Parsing recipe structure...")
|
||||||
|
text = LLMRouter().complete(
|
||||||
|
_build_ocr_extraction_prompt(combined_ocr),
|
||||||
|
system="You are a recipe data extractor. Return ONLY valid JSON. No markdown, no explanation, no code fences.",
|
||||||
|
)
|
||||||
|
if text:
|
||||||
|
return text
|
||||||
|
|
||||||
|
except TaskNotRegistered:
|
||||||
|
logger.debug("kiwi.recipe_scan not yet registered in cf-orch assignments")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("cf-orch vision failed for recipe scan: %s", exc)
|
logger.debug("cf-orch vision failed for recipe scan: %s", exc)
|
||||||
errors.append(f"cf-orch: {exc}")
|
errors.append(f"cf-orch: {exc}")
|
||||||
|
|
@ -256,40 +332,76 @@ def _normalize_ingredient_name(name: str) -> str:
|
||||||
return name.lower().strip()
|
return name.lower().strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_json_object(text: str) -> str | None:
|
||||||
|
"""Return the first balanced JSON object from text, or None if not found.
|
||||||
|
|
||||||
|
Uses brace-counting rather than a greedy regex so trailing prose and
|
||||||
|
nested objects are handled correctly.
|
||||||
|
"""
|
||||||
|
start = text.find("{")
|
||||||
|
if start == -1:
|
||||||
|
return None
|
||||||
|
depth = 0
|
||||||
|
in_string = False
|
||||||
|
escape_next = False
|
||||||
|
for i, ch in enumerate(text[start:], start):
|
||||||
|
if escape_next:
|
||||||
|
escape_next = False
|
||||||
|
continue
|
||||||
|
if ch == "\\" and in_string:
|
||||||
|
escape_next = True
|
||||||
|
continue
|
||||||
|
if ch == '"':
|
||||||
|
in_string = not in_string
|
||||||
|
continue
|
||||||
|
if in_string:
|
||||||
|
continue
|
||||||
|
if ch == "{":
|
||||||
|
depth += 1
|
||||||
|
elif ch == "}":
|
||||||
|
depth -= 1
|
||||||
|
if depth == 0:
|
||||||
|
return text[start : i + 1]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _parse_scanner_json(raw_text: str) -> dict:
|
def _parse_scanner_json(raw_text: str) -> dict:
|
||||||
"""Extract and return the JSON dict from VLM output.
|
"""Extract and return the JSON dict from VLM output.
|
||||||
|
|
||||||
Handles:
|
Handles:
|
||||||
- Pure JSON
|
- Pure JSON
|
||||||
- JSON wrapped in ```json ... ``` markdown fences
|
- JSON in ```json ... ``` markdown fences
|
||||||
- JSON preceded by a line of prose ("Here is the recipe: {...}")
|
- Qwen3-style <think>...</think> or <thinking>...</thinking> preambles
|
||||||
|
- JSON preceded or followed by prose
|
||||||
|
|
||||||
Raises ValueError on not_a_recipe or unparseable output.
|
Raises ValueError on not_a_recipe or unparseable output.
|
||||||
"""
|
"""
|
||||||
text = raw_text.strip()
|
text = raw_text.strip()
|
||||||
|
|
||||||
# Strip markdown fences if present
|
# Strip thinking-token blocks emitted by reasoning models (Qwen3, DeepSeek-R1, etc.)
|
||||||
if text.startswith("```"):
|
text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL | re.IGNORECASE).strip()
|
||||||
parts = text.split("```")
|
text = re.sub(r"<thinking>.*?</thinking>", "", text, flags=re.DOTALL | re.IGNORECASE).strip()
|
||||||
for part in parts:
|
|
||||||
part = part.strip()
|
|
||||||
if part.startswith("json"):
|
|
||||||
part = part[4:].strip()
|
|
||||||
if part.startswith("{"):
|
|
||||||
text = part
|
|
||||||
break
|
|
||||||
|
|
||||||
# Try direct parse first
|
# Strip markdown fences if present
|
||||||
|
if "```" in text:
|
||||||
|
# Find the content between the first ``` pair
|
||||||
|
fence_match = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL)
|
||||||
|
if fence_match:
|
||||||
|
text = fence_match.group(1).strip()
|
||||||
|
|
||||||
|
# Try direct parse
|
||||||
try:
|
try:
|
||||||
data = json.loads(text)
|
data = json.loads(text)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
# Extract first JSON object embedded in prose
|
# Fall back to brace-balanced extraction from anywhere in the output
|
||||||
match = re.search(r"\{.*\}", text, re.DOTALL)
|
candidate = _extract_json_object(text)
|
||||||
if not match:
|
if not candidate:
|
||||||
|
logger.warning("Could not parse JSON from LLM output (first 400 chars): %r", text[:400])
|
||||||
raise ValueError(f"Could not parse JSON from VLM output: {text[:200]!r}")
|
raise ValueError(f"Could not parse JSON from VLM output: {text[:200]!r}")
|
||||||
try:
|
try:
|
||||||
data = json.loads(match.group(0))
|
data = json.loads(candidate)
|
||||||
except json.JSONDecodeError as exc:
|
except json.JSONDecodeError as exc:
|
||||||
|
logger.warning("Brace-extracted JSON still invalid: %r", candidate[:400])
|
||||||
raise ValueError(f"Could not parse JSON from VLM output: {exc}") from exc
|
raise ValueError(f"Could not parse JSON from VLM output: {exc}") from exc
|
||||||
|
|
||||||
if isinstance(data, dict) and data.get("error") == "not_a_recipe":
|
if isinstance(data, dict) and data.get("error") == "not_a_recipe":
|
||||||
|
|
@ -350,6 +462,7 @@ class RecipeScanner:
|
||||||
self,
|
self,
|
||||||
image_paths: list[Path],
|
image_paths: list[Path],
|
||||||
pantry_names: list[str] | None = None,
|
pantry_names: list[str] | None = None,
|
||||||
|
progress_cb: Callable[[str, str], None] | None = None,
|
||||||
) -> ScannedRecipeResult:
|
) -> ScannedRecipeResult:
|
||||||
"""Extract a structured recipe from one or more photos.
|
"""Extract a structured recipe from one or more photos.
|
||||||
|
|
||||||
|
|
@ -371,7 +484,7 @@ class RecipeScanner:
|
||||||
raise ValueError(f"Maximum {MAX_IMAGES} images per scan (got {len(image_paths)})")
|
raise ValueError(f"Maximum {MAX_IMAGES} images per scan (got {len(image_paths)})")
|
||||||
|
|
||||||
# Call vision backend
|
# Call vision backend
|
||||||
raw_text = _call_vision_backend(image_paths, _EXTRACTION_PROMPT)
|
raw_text = _call_vision_backend(image_paths, _EXTRACTION_PROMPT, progress_cb=progress_cb)
|
||||||
|
|
||||||
# Parse JSON from VLM output
|
# Parse JSON from VLM output
|
||||||
data = _parse_scanner_json(raw_text)
|
data = _parse_scanner_json(raw_text)
|
||||||
|
|
|
||||||
124
app/services/task_inference.py
Normal file
124
app/services/task_inference.py
Normal file
|
|
@ -0,0 +1,124 @@
|
||||||
|
# app/services/task_inference.py
|
||||||
|
# BSL 1.1 — LLM feature
|
||||||
|
"""Task-based service allocation via the cf-orch coordinator.
|
||||||
|
|
||||||
|
Calls POST /api/inference/task instead of a hardcoded service type.
|
||||||
|
The coordinator resolves model_id and service_type from assignments.yaml.
|
||||||
|
|
||||||
|
Fallback contract (for callers):
|
||||||
|
- 404 → TaskNotRegistered (fall back to direct client.allocate())
|
||||||
|
- other error → RuntimeError
|
||||||
|
- CF_ORCH_URL unset → RuntimeError (guard with os.environ.get first)
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from collections.abc import Generator
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class TaskNotRegistered(Exception):
|
||||||
|
"""Coordinator returned 404 for a product/task pair.
|
||||||
|
|
||||||
|
Means the task is not yet in assignments.yaml. Callers should fall
|
||||||
|
back to direct service allocation (client.allocate()).
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Allocation:
|
||||||
|
url: str
|
||||||
|
allocation_id: str
|
||||||
|
service: str
|
||||||
|
|
||||||
|
|
||||||
|
def _orch_url() -> str:
|
||||||
|
return os.environ.get("CF_ORCH_URL", "").rstrip("/")
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def task_allocate(
|
||||||
|
product: str,
|
||||||
|
task: str,
|
||||||
|
*,
|
||||||
|
service_hint: str,
|
||||||
|
ttl_s: float = 120.0,
|
||||||
|
) -> Generator[Allocation, None, None]:
|
||||||
|
"""Context manager: allocate a service via task-based routing.
|
||||||
|
|
||||||
|
Calls POST /api/inference/task, yields Allocation, releases on exit.
|
||||||
|
Supports both `with task_allocate(...) as alloc:` and manual
|
||||||
|
`ctx = task_allocate(...); alloc = ctx.__enter__()` patterns.
|
||||||
|
|
||||||
|
**Sync-only**: uses the synchronous httpx API. Do not call from an
|
||||||
|
``async def`` handler without wrapping in ``asyncio.to_thread``. Current
|
||||||
|
call sites (``llm_router.py``, ``vl_model.py``) are synchronous.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
product: CF product name (e.g. "kiwi")
|
||||||
|
task: Task identifier (e.g. "meal_plan", "ocr")
|
||||||
|
service_hint: Service type for the release DELETE call. The
|
||||||
|
coordinator response does not include service_type, so the
|
||||||
|
caller provides it. When the coordinator is updated to return
|
||||||
|
service in the response (cf-orch#63), this becomes unused.
|
||||||
|
ttl_s: Allocation TTL in seconds.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
TaskNotRegistered: Coordinator returned 404.
|
||||||
|
RuntimeError: Coordinator unreachable, returned non-404 error, or
|
||||||
|
returned a malformed (non-JSON / missing fields) response.
|
||||||
|
RuntimeError: CF_ORCH_URL is not set.
|
||||||
|
"""
|
||||||
|
base = _orch_url()
|
||||||
|
if not base:
|
||||||
|
raise RuntimeError("CF_ORCH_URL is not set")
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = httpx.post(
|
||||||
|
f"{base}/api/inference/task",
|
||||||
|
json={"product": product, "task": task, "payload": {}},
|
||||||
|
timeout=30.0,
|
||||||
|
)
|
||||||
|
except httpx.RequestError as exc:
|
||||||
|
raise RuntimeError(f"cf-orch unreachable: {exc}") from exc
|
||||||
|
|
||||||
|
if resp.status_code == 404:
|
||||||
|
raise TaskNotRegistered(
|
||||||
|
f"No assignment for product={product!r} task={task!r} — "
|
||||||
|
"ensure cf-orch#61/62 are deployed and coordinator reloaded"
|
||||||
|
)
|
||||||
|
if not resp.is_success:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"cf-orch /api/inference/task failed: "
|
||||||
|
f"HTTP {resp.status_code} — {resp.text[:200]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = resp.json()
|
||||||
|
alloc = Allocation(
|
||||||
|
url=data["url"],
|
||||||
|
allocation_id=data["allocation_id"],
|
||||||
|
service=data.get("service") or service_hint,
|
||||||
|
)
|
||||||
|
except (KeyError, ValueError) as exc:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"cf-orch /api/inference/task returned malformed response: {exc} — "
|
||||||
|
f"body: {resp.text[:200]}"
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
try:
|
||||||
|
yield alloc
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
httpx.delete(
|
||||||
|
f"{base}/api/services/{alloc.service}/allocations/{alloc.allocation_id}",
|
||||||
|
timeout=10.0,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("cf-orch task allocation release failed (non-fatal): %s", exc)
|
||||||
|
|
@ -18,6 +18,10 @@ server {
|
||||||
proxy_set_header X-CF-Session $http_x_cf_session;
|
proxy_set_header X-CF-Session $http_x_cf_session;
|
||||||
# Allow image uploads (barcode/receipt photos from phone cameras).
|
# Allow image uploads (barcode/receipt photos from phone cameras).
|
||||||
client_max_body_size 20m;
|
client_max_body_size 20m;
|
||||||
|
# LLM inference (recipe suggestions, expiry fallback) can take 60-120s.
|
||||||
|
# Default proxy_read_timeout is 60s which causes 504s on full recipe generation.
|
||||||
|
proxy_read_timeout 180s;
|
||||||
|
proxy_send_timeout 180s;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Direct-port LAN access (localhost:8515): when VITE_API_BASE='/kiwi', the frontend
|
# Direct-port LAN access (localhost:8515): when VITE_API_BASE='/kiwi', the frontend
|
||||||
|
|
@ -34,6 +38,8 @@ server {
|
||||||
proxy_set_header X-Forwarded-Proto $http_x_forwarded_proto;
|
proxy_set_header X-Forwarded-Proto $http_x_forwarded_proto;
|
||||||
proxy_set_header X-CF-Session $http_x_cf_session;
|
proxy_set_header X-CF-Session $http_x_cf_session;
|
||||||
client_max_body_size 20m;
|
client_max_body_size 20m;
|
||||||
|
proxy_read_timeout 180s;
|
||||||
|
proxy_send_timeout 180s;
|
||||||
}
|
}
|
||||||
|
|
||||||
# When accessed directly (localhost:8515) instead of via Caddy (/kiwi path-strip),
|
# When accessed directly (localhost:8515) instead of via Caddy (/kiwi path-strip),
|
||||||
|
|
|
||||||
|
|
@ -106,6 +106,39 @@
|
||||||
<span class="form-hint">How you appear on posts -- not your real name or email.</span>
|
<span class="form-hint">How you appear on posts -- not your real name or email.</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Similarity check results -->
|
||||||
|
<div
|
||||||
|
v-if="similarPosts.length > 0"
|
||||||
|
class="similar-panel"
|
||||||
|
role="region"
|
||||||
|
aria-label="Similar stories found"
|
||||||
|
>
|
||||||
|
<p class="similar-heading text-sm">
|
||||||
|
<strong>Similar stories already exist.</strong>
|
||||||
|
You can publish as-is, mark yours as a variation, or cancel.
|
||||||
|
</p>
|
||||||
|
<ul class="similar-list" aria-label="Existing similar posts">
|
||||||
|
<li
|
||||||
|
v-for="hit in similarPosts"
|
||||||
|
:key="hit.slug"
|
||||||
|
class="similar-item"
|
||||||
|
>
|
||||||
|
<span class="similar-tier-badge" :class="`tier-${hit.similarity_tier}`">
|
||||||
|
{{ tierLabel(hit.similarity_tier) }}
|
||||||
|
</span>
|
||||||
|
<span class="similar-title">{{ hit.title }}</span>
|
||||||
|
<span class="similar-by text-muted text-xs">by {{ hit.pseudonym }}</span>
|
||||||
|
<button
|
||||||
|
class="btn-link text-xs"
|
||||||
|
:class="{ 'selected-ref': selectedRef === hit.slug }"
|
||||||
|
@click="toggleRef(hit.slug)"
|
||||||
|
>
|
||||||
|
{{ selectedRef === hit.slug ? 'Unmark variation' : 'Mark as variation' }}
|
||||||
|
</button>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Submission feedback (aria-live region, always rendered) -->
|
<!-- Submission feedback (aria-live region, always rendered) -->
|
||||||
<div
|
<div
|
||||||
class="feedback-region"
|
class="feedback-region"
|
||||||
|
|
@ -119,13 +152,24 @@
|
||||||
<!-- Footer actions -->
|
<!-- Footer actions -->
|
||||||
<div class="modal-footer flex gap-sm">
|
<div class="modal-footer flex gap-sm">
|
||||||
<button
|
<button
|
||||||
|
v-if="!similarPosts.length || similarChecked"
|
||||||
class="btn btn-primary"
|
class="btn btn-primary"
|
||||||
:disabled="submitting || !title.trim()"
|
:disabled="submitting || !title.trim()"
|
||||||
:aria-busy="submitting"
|
:aria-busy="submitting"
|
||||||
@click="onSubmit"
|
@click="onSubmit"
|
||||||
>
|
>
|
||||||
<span v-if="submitting" class="spinner spinner-sm" aria-hidden="true"></span>
|
<span v-if="submitting" class="spinner spinner-sm" aria-hidden="true"></span>
|
||||||
{{ submitting ? 'Publishing...' : 'Publish' }}
|
{{ submitting ? 'Publishing...' : (selectedRef ? 'Publish as variation' : 'Publish') }}
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
v-else
|
||||||
|
class="btn btn-primary"
|
||||||
|
:disabled="checking || !title.trim()"
|
||||||
|
:aria-busy="checking"
|
||||||
|
@click="onCheckThenSubmit"
|
||||||
|
>
|
||||||
|
<span v-if="checking" class="spinner spinner-sm" aria-hidden="true"></span>
|
||||||
|
{{ checking ? 'Checking...' : 'Publish' }}
|
||||||
</button>
|
</button>
|
||||||
<button class="btn btn-secondary" @click="$emit('close')">
|
<button class="btn btn-secondary" @click="$emit('close')">
|
||||||
Cancel
|
Cancel
|
||||||
|
|
@ -139,7 +183,7 @@
|
||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { ref, onMounted, onUnmounted, nextTick } from 'vue'
|
import { ref, onMounted, onUnmounted, nextTick } from 'vue'
|
||||||
import { useCommunityStore } from '../stores/community'
|
import { useCommunityStore } from '../stores/community'
|
||||||
import type { PublishPayload } from '../stores/community'
|
import type { PublishPayload, SimilarPost, SimilarityTier } from '../stores/community'
|
||||||
|
|
||||||
const props = defineProps<{
|
const props = defineProps<{
|
||||||
recipeId: number | null
|
recipeId: number | null
|
||||||
|
|
@ -162,6 +206,21 @@ const submitting = ref(false)
|
||||||
const submitError = ref<string | null>(null)
|
const submitError = ref<string | null>(null)
|
||||||
const submitSuccess = ref<string | null>(null)
|
const submitSuccess = ref<string | null>(null)
|
||||||
|
|
||||||
|
const checking = ref(false)
|
||||||
|
const similarChecked = ref(false)
|
||||||
|
const similarPosts = ref<SimilarPost[]>([])
|
||||||
|
const selectedRef = ref<string | null>(null)
|
||||||
|
|
||||||
|
function tierLabel(tier: SimilarityTier): string {
|
||||||
|
if (tier === 'exact_recipe') return 'Same recipe'
|
||||||
|
if (tier === 'very_similar') return 'Very similar'
|
||||||
|
return 'Similar'
|
||||||
|
}
|
||||||
|
|
||||||
|
function toggleRef(slug: string) {
|
||||||
|
selectedRef.value = selectedRef.value === slug ? null : slug
|
||||||
|
}
|
||||||
|
|
||||||
const dialogRef = ref<HTMLElement | null>(null)
|
const dialogRef = ref<HTMLElement | null>(null)
|
||||||
const firstFocusRef = ref<HTMLButtonElement | null>(null)
|
const firstFocusRef = ref<HTMLButtonElement | null>(null)
|
||||||
let previousFocus: HTMLElement | null = null
|
let previousFocus: HTMLElement | null = null
|
||||||
|
|
@ -215,6 +274,17 @@ onUnmounted(() => {
|
||||||
previousFocus?.focus()
|
previousFocus?.focus()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
async function onCheckThenSubmit() {
|
||||||
|
if (!title.value.trim()) return
|
||||||
|
checking.value = true
|
||||||
|
similarPosts.value = await store.checkSimilar(title.value.trim(), props.recipeId, postType.value)
|
||||||
|
similarChecked.value = true
|
||||||
|
checking.value = false
|
||||||
|
if (!similarPosts.value.length) {
|
||||||
|
await onSubmit()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function onSubmit() {
|
async function onSubmit() {
|
||||||
submitError.value = null
|
submitError.value = null
|
||||||
submitSuccess.value = null
|
submitSuccess.value = null
|
||||||
|
|
@ -228,6 +298,7 @@ async function onSubmit() {
|
||||||
if (outcomeNotes.value.trim()) payload.outcome_notes = outcomeNotes.value.trim()
|
if (outcomeNotes.value.trim()) payload.outcome_notes = outcomeNotes.value.trim()
|
||||||
if (pseudonymName.value.trim()) payload.pseudonym_name = pseudonymName.value.trim()
|
if (pseudonymName.value.trim()) payload.pseudonym_name = pseudonymName.value.trim()
|
||||||
if (props.recipeId != null) payload.recipe_id = props.recipeId
|
if (props.recipeId != null) payload.recipe_id = props.recipeId
|
||||||
|
if (selectedRef.value) payload.similar_to_ref = selectedRef.value
|
||||||
|
|
||||||
submitting.value = true
|
submitting.value = true
|
||||||
try {
|
try {
|
||||||
|
|
@ -349,6 +420,82 @@ async function onSubmit() {
|
||||||
flex-wrap: wrap;
|
flex-wrap: wrap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.similar-panel {
|
||||||
|
background: var(--color-surface-alt, var(--color-surface));
|
||||||
|
border: 1px solid var(--color-warning, #f59e0b);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
padding: var(--spacing-sm) var(--spacing-md);
|
||||||
|
margin-bottom: var(--spacing-md);
|
||||||
|
}
|
||||||
|
|
||||||
|
.similar-heading {
|
||||||
|
margin: 0 0 var(--spacing-sm);
|
||||||
|
}
|
||||||
|
|
||||||
|
.similar-list {
|
||||||
|
list-style: none;
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: var(--spacing-xs);
|
||||||
|
}
|
||||||
|
|
||||||
|
.similar-item {
|
||||||
|
display: flex;
|
||||||
|
align-items: baseline;
|
||||||
|
gap: var(--spacing-xs);
|
||||||
|
flex-wrap: wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.similar-tier-badge {
|
||||||
|
font-size: var(--font-size-xs);
|
||||||
|
font-weight: 700;
|
||||||
|
padding: 1px 6px;
|
||||||
|
border-radius: var(--radius-sm);
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tier-exact_recipe {
|
||||||
|
background: var(--color-error-bg, #fee2e2);
|
||||||
|
color: var(--color-error, #dc2626);
|
||||||
|
}
|
||||||
|
|
||||||
|
.tier-very_similar {
|
||||||
|
background: var(--color-warning-bg, #fef3c7);
|
||||||
|
color: var(--color-warning-text, #92400e);
|
||||||
|
}
|
||||||
|
|
||||||
|
.tier-somewhat_similar {
|
||||||
|
background: var(--color-surface-alt, #f3f4f6);
|
||||||
|
color: var(--color-text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.similar-title {
|
||||||
|
font-weight: 600;
|
||||||
|
font-size: var(--font-size-sm);
|
||||||
|
}
|
||||||
|
|
||||||
|
.similar-by {
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-link {
|
||||||
|
background: none;
|
||||||
|
border: none;
|
||||||
|
color: var(--color-primary);
|
||||||
|
cursor: pointer;
|
||||||
|
padding: 0;
|
||||||
|
text-decoration: underline;
|
||||||
|
font-size: var(--font-size-xs);
|
||||||
|
margin-left: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-link.selected-ref {
|
||||||
|
color: var(--color-success);
|
||||||
|
font-weight: 700;
|
||||||
|
}
|
||||||
|
|
||||||
@media (max-width: 480px) {
|
@media (max-width: 480px) {
|
||||||
.modal-panel {
|
.modal-panel {
|
||||||
max-height: 95vh;
|
max-height: 95vh;
|
||||||
|
|
|
||||||
|
|
@ -78,6 +78,39 @@
|
||||||
<span class="form-hint">How you appear on posts -- not your real name or email.</span>
|
<span class="form-hint">How you appear on posts -- not your real name or email.</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Similarity check results (shown before final confirm) -->
|
||||||
|
<div
|
||||||
|
v-if="similarPosts.length > 0"
|
||||||
|
class="similar-panel"
|
||||||
|
role="region"
|
||||||
|
aria-label="Similar posts found"
|
||||||
|
>
|
||||||
|
<p class="similar-heading text-sm">
|
||||||
|
<strong>Similar plans already exist.</strong>
|
||||||
|
You can publish as-is, mark yours as a variation, or cancel.
|
||||||
|
</p>
|
||||||
|
<ul class="similar-list" aria-label="Existing similar posts">
|
||||||
|
<li
|
||||||
|
v-for="hit in similarPosts"
|
||||||
|
:key="hit.slug"
|
||||||
|
class="similar-item"
|
||||||
|
>
|
||||||
|
<span class="similar-tier-badge" :class="`tier-${hit.similarity_tier}`">
|
||||||
|
{{ tierLabel(hit.similarity_tier) }}
|
||||||
|
</span>
|
||||||
|
<span class="similar-title">{{ hit.title }}</span>
|
||||||
|
<span class="similar-by text-muted text-xs">by {{ hit.pseudonym }}</span>
|
||||||
|
<button
|
||||||
|
class="btn-link text-xs"
|
||||||
|
:class="{ 'selected-ref': selectedRef === hit.slug }"
|
||||||
|
@click="toggleRef(hit.slug)"
|
||||||
|
>
|
||||||
|
{{ selectedRef === hit.slug ? 'Unmark variation' : 'Mark as variation' }}
|
||||||
|
</button>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Submission feedback (aria-live region, always rendered) -->
|
<!-- Submission feedback (aria-live region, always rendered) -->
|
||||||
<div
|
<div
|
||||||
class="feedback-region"
|
class="feedback-region"
|
||||||
|
|
@ -91,13 +124,24 @@
|
||||||
<!-- Footer actions -->
|
<!-- Footer actions -->
|
||||||
<div class="modal-footer flex gap-sm">
|
<div class="modal-footer flex gap-sm">
|
||||||
<button
|
<button
|
||||||
|
v-if="!similarPosts.length || similarChecked"
|
||||||
class="btn btn-primary"
|
class="btn btn-primary"
|
||||||
:disabled="submitting || !title.trim()"
|
:disabled="submitting || !title.trim()"
|
||||||
:aria-busy="submitting"
|
:aria-busy="submitting"
|
||||||
@click="onSubmit"
|
@click="onSubmit"
|
||||||
>
|
>
|
||||||
<span v-if="submitting" class="spinner spinner-sm" aria-hidden="true"></span>
|
<span v-if="submitting" class="spinner spinner-sm" aria-hidden="true"></span>
|
||||||
{{ submitting ? 'Publishing...' : 'Publish' }}
|
{{ submitting ? 'Publishing...' : (selectedRef ? 'Publish as variation' : 'Publish') }}
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
v-else
|
||||||
|
class="btn btn-primary"
|
||||||
|
:disabled="checking || !title.trim()"
|
||||||
|
:aria-busy="checking"
|
||||||
|
@click="onCheckThenSubmit"
|
||||||
|
>
|
||||||
|
<span v-if="checking" class="spinner spinner-sm" aria-hidden="true"></span>
|
||||||
|
{{ checking ? 'Checking...' : 'Publish' }}
|
||||||
</button>
|
</button>
|
||||||
<button class="btn btn-secondary" @click="$emit('close')">
|
<button class="btn btn-secondary" @click="$emit('close')">
|
||||||
Cancel
|
Cancel
|
||||||
|
|
@ -111,7 +155,7 @@
|
||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { ref, onMounted, onUnmounted, nextTick } from 'vue'
|
import { ref, onMounted, onUnmounted, nextTick } from 'vue'
|
||||||
import { useCommunityStore } from '../stores/community'
|
import { useCommunityStore } from '../stores/community'
|
||||||
import type { PublishPayload } from '../stores/community'
|
import type { PublishPayload, SimilarPost, SimilarityTier } from '../stores/community'
|
||||||
|
|
||||||
const props = defineProps<{
|
const props = defineProps<{
|
||||||
plan?: {
|
plan?: {
|
||||||
|
|
@ -136,6 +180,21 @@ const submitting = ref(false)
|
||||||
const submitError = ref<string | null>(null)
|
const submitError = ref<string | null>(null)
|
||||||
const submitSuccess = ref<string | null>(null)
|
const submitSuccess = ref<string | null>(null)
|
||||||
|
|
||||||
|
const checking = ref(false)
|
||||||
|
const similarChecked = ref(false)
|
||||||
|
const similarPosts = ref<SimilarPost[]>([])
|
||||||
|
const selectedRef = ref<string | null>(null)
|
||||||
|
|
||||||
|
function tierLabel(tier: SimilarityTier): string {
|
||||||
|
if (tier === 'exact_recipe') return 'Same recipe'
|
||||||
|
if (tier === 'very_similar') return 'Very similar'
|
||||||
|
return 'Similar'
|
||||||
|
}
|
||||||
|
|
||||||
|
function toggleRef(slug: string) {
|
||||||
|
selectedRef.value = selectedRef.value === slug ? null : slug
|
||||||
|
}
|
||||||
|
|
||||||
const dialogRef = ref<HTMLElement | null>(null)
|
const dialogRef = ref<HTMLElement | null>(null)
|
||||||
const firstFocusRef = ref<HTMLInputElement | null>(null)
|
const firstFocusRef = ref<HTMLInputElement | null>(null)
|
||||||
let previousFocus: HTMLElement | null = null
|
let previousFocus: HTMLElement | null = null
|
||||||
|
|
@ -189,6 +248,19 @@ onUnmounted(() => {
|
||||||
previousFocus?.focus()
|
previousFocus?.focus()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
async function onCheckThenSubmit() {
|
||||||
|
if (!title.value.trim()) return
|
||||||
|
checking.value = true
|
||||||
|
const planRecipeIds = props.plan?.slots?.map((s) => s.recipe_id) ?? []
|
||||||
|
const firstRecipeId = planRecipeIds[0] ?? null
|
||||||
|
similarPosts.value = await store.checkSimilar(title.value.trim(), firstRecipeId, 'plan')
|
||||||
|
similarChecked.value = true
|
||||||
|
checking.value = false
|
||||||
|
if (!similarPosts.value.length) {
|
||||||
|
await onSubmit()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function onSubmit() {
|
async function onSubmit() {
|
||||||
submitError.value = null
|
submitError.value = null
|
||||||
submitSuccess.value = null
|
submitSuccess.value = null
|
||||||
|
|
@ -205,6 +277,7 @@ async function onSubmit() {
|
||||||
if (props.plan?.slots?.length) {
|
if (props.plan?.slots?.length) {
|
||||||
payload.slots = props.plan.slots.map(({ day, meal_type, recipe_id }) => ({ day, meal_type, recipe_id }))
|
payload.slots = props.plan.slots.map(({ day, meal_type, recipe_id }) => ({ day, meal_type, recipe_id }))
|
||||||
}
|
}
|
||||||
|
if (selectedRef.value) payload.similar_to_ref = selectedRef.value
|
||||||
|
|
||||||
submitting.value = true
|
submitting.value = true
|
||||||
try {
|
try {
|
||||||
|
|
@ -295,6 +368,82 @@ async function onSubmit() {
|
||||||
flex-wrap: wrap;
|
flex-wrap: wrap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.similar-panel {
|
||||||
|
background: var(--color-surface-alt, var(--color-surface));
|
||||||
|
border: 1px solid var(--color-warning, #f59e0b);
|
||||||
|
border-radius: var(--radius-md);
|
||||||
|
padding: var(--spacing-sm) var(--spacing-md);
|
||||||
|
margin-bottom: var(--spacing-md);
|
||||||
|
}
|
||||||
|
|
||||||
|
.similar-heading {
|
||||||
|
margin: 0 0 var(--spacing-sm);
|
||||||
|
}
|
||||||
|
|
||||||
|
.similar-list {
|
||||||
|
list-style: none;
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: var(--spacing-xs);
|
||||||
|
}
|
||||||
|
|
||||||
|
.similar-item {
|
||||||
|
display: flex;
|
||||||
|
align-items: baseline;
|
||||||
|
gap: var(--spacing-xs);
|
||||||
|
flex-wrap: wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.similar-tier-badge {
|
||||||
|
font-size: var(--font-size-xs);
|
||||||
|
font-weight: 700;
|
||||||
|
padding: 1px 6px;
|
||||||
|
border-radius: var(--radius-sm);
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tier-exact_recipe {
|
||||||
|
background: var(--color-error-bg, #fee2e2);
|
||||||
|
color: var(--color-error, #dc2626);
|
||||||
|
}
|
||||||
|
|
||||||
|
.tier-very_similar {
|
||||||
|
background: var(--color-warning-bg, #fef3c7);
|
||||||
|
color: var(--color-warning-text, #92400e);
|
||||||
|
}
|
||||||
|
|
||||||
|
.tier-somewhat_similar {
|
||||||
|
background: var(--color-surface-alt, #f3f4f6);
|
||||||
|
color: var(--color-text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.similar-title {
|
||||||
|
font-weight: 600;
|
||||||
|
font-size: var(--font-size-sm);
|
||||||
|
}
|
||||||
|
|
||||||
|
.similar-by {
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-link {
|
||||||
|
background: none;
|
||||||
|
border: none;
|
||||||
|
color: var(--color-primary);
|
||||||
|
cursor: pointer;
|
||||||
|
padding: 0;
|
||||||
|
text-decoration: underline;
|
||||||
|
font-size: var(--font-size-xs);
|
||||||
|
margin-left: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-link.selected-ref {
|
||||||
|
color: var(--color-success);
|
||||||
|
font-weight: 700;
|
||||||
|
}
|
||||||
|
|
||||||
@media (max-width: 480px) {
|
@media (max-width: 480px) {
|
||||||
.modal-panel {
|
.modal-panel {
|
||||||
max-height: 95vh;
|
max-height: 95vh;
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@
|
||||||
v-for="domain in domains"
|
v-for="domain in domains"
|
||||||
:key="domain.id"
|
:key="domain.id"
|
||||||
:class="['btn', activeDomain === domain.id ? 'btn-primary' : 'btn-secondary']"
|
:class="['btn', activeDomain === domain.id ? 'btn-primary' : 'btn-secondary']"
|
||||||
|
:aria-pressed="activeDomain === domain.id"
|
||||||
@click="selectDomain(domain.id)"
|
@click="selectDomain(domain.id)"
|
||||||
>
|
>
|
||||||
{{ domain.label }}
|
{{ domain.label }}
|
||||||
|
|
@ -24,6 +25,7 @@
|
||||||
<div v-else class="category-list mb-sm flex flex-wrap gap-xs">
|
<div v-else class="category-list mb-sm flex flex-wrap gap-xs">
|
||||||
<button
|
<button
|
||||||
:class="['btn', 'btn-secondary', 'cat-btn', { active: activeCategory === '_all' }]"
|
:class="['btn', 'btn-secondary', 'cat-btn', { active: activeCategory === '_all' }]"
|
||||||
|
:aria-pressed="activeCategory === '_all'"
|
||||||
@click="selectCategory('_all')"
|
@click="selectCategory('_all')"
|
||||||
>
|
>
|
||||||
All
|
All
|
||||||
|
|
@ -32,6 +34,7 @@
|
||||||
v-for="cat in categories"
|
v-for="cat in categories"
|
||||||
:key="cat.category"
|
:key="cat.category"
|
||||||
:class="['btn', 'btn-secondary', 'cat-btn', { active: activeCategory === cat.category }]"
|
:class="['btn', 'btn-secondary', 'cat-btn', { active: activeCategory === cat.category }]"
|
||||||
|
:aria-pressed="activeCategory === cat.category"
|
||||||
@click="selectCategory(cat.category)"
|
@click="selectCategory(cat.category)"
|
||||||
>
|
>
|
||||||
{{ cat.category }}
|
{{ cat.category }}
|
||||||
|
|
@ -57,6 +60,7 @@
|
||||||
<template v-else>
|
<template v-else>
|
||||||
<button
|
<button
|
||||||
:class="['btn', 'btn-secondary', 'subcat-btn', { active: activeSubcategory === null }]"
|
:class="['btn', 'btn-secondary', 'subcat-btn', { active: activeSubcategory === null }]"
|
||||||
|
:aria-pressed="activeSubcategory === null"
|
||||||
@click="selectSubcategory(null)"
|
@click="selectSubcategory(null)"
|
||||||
>
|
>
|
||||||
All {{ activeCategory }}
|
All {{ activeCategory }}
|
||||||
|
|
@ -65,6 +69,7 @@
|
||||||
v-for="sub in subcategories"
|
v-for="sub in subcategories"
|
||||||
:key="sub.subcategory"
|
:key="sub.subcategory"
|
||||||
:class="['btn', 'btn-secondary', 'subcat-btn', { active: activeSubcategory === sub.subcategory }]"
|
:class="['btn', 'btn-secondary', 'subcat-btn', { active: activeSubcategory === sub.subcategory }]"
|
||||||
|
:aria-pressed="activeSubcategory === sub.subcategory"
|
||||||
@click="selectSubcategory(sub.subcategory)"
|
@click="selectSubcategory(sub.subcategory)"
|
||||||
>
|
>
|
||||||
{{ sub.subcategory }}
|
{{ sub.subcategory }}
|
||||||
|
|
@ -79,6 +84,25 @@
|
||||||
</template>
|
</template>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Browse breadcrumb — shows current position in domain > category > subcategory hierarchy -->
|
||||||
|
<nav v-if="activeDomain && activeCategory" class="browse-breadcrumb" aria-label="Browse location">
|
||||||
|
<button
|
||||||
|
class="crumb-btn"
|
||||||
|
@click="selectDomain(activeDomain)"
|
||||||
|
:aria-current="!activeCategory ? 'page' : undefined"
|
||||||
|
>{{ domains.find(d => d.id === activeDomain)?.label ?? activeDomain }}</button>
|
||||||
|
<span class="crumb-sep" aria-hidden="true">›</span>
|
||||||
|
<button
|
||||||
|
class="crumb-btn"
|
||||||
|
@click="selectCategory(activeCategory)"
|
||||||
|
:aria-current="!activeSubcategory ? 'page' : undefined"
|
||||||
|
>{{ activeCategory === '_all' ? 'All' : activeCategory }}</button>
|
||||||
|
<template v-if="activeSubcategory">
|
||||||
|
<span class="crumb-sep" aria-hidden="true">›</span>
|
||||||
|
<span class="crumb-current" aria-current="page">{{ activeSubcategory }}</span>
|
||||||
|
</template>
|
||||||
|
</nav>
|
||||||
|
|
||||||
<!-- Recipe grid -->
|
<!-- Recipe grid -->
|
||||||
<template v-if="activeCategory">
|
<template v-if="activeCategory">
|
||||||
<div v-if="loadingRecipes" class="text-secondary text-sm">Loading recipes…</div>
|
<div v-if="loadingRecipes" class="text-secondary text-sm">Loading recipes…</div>
|
||||||
|
|
@ -105,21 +129,25 @@
|
||||||
<div class="sort-btns flex gap-xs">
|
<div class="sort-btns flex gap-xs">
|
||||||
<button
|
<button
|
||||||
:class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'default' }]"
|
:class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'default' }]"
|
||||||
|
:aria-pressed="sortOrder === 'default'"
|
||||||
@click="setSort('default')"
|
@click="setSort('default')"
|
||||||
title="Corpus order"
|
title="Corpus order"
|
||||||
>Default</button>
|
>Default</button>
|
||||||
<button
|
<button
|
||||||
:class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'alpha' }]"
|
:class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'alpha' }]"
|
||||||
|
:aria-pressed="sortOrder === 'alpha'"
|
||||||
@click="setSort('alpha')"
|
@click="setSort('alpha')"
|
||||||
title="Alphabetical A→Z"
|
title="Alphabetical A→Z"
|
||||||
>A→Z</button>
|
>A→Z</button>
|
||||||
<button
|
<button
|
||||||
:class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'alpha_desc' }]"
|
:class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'alpha_desc' }]"
|
||||||
|
:aria-pressed="sortOrder === 'alpha_desc'"
|
||||||
@click="setSort('alpha_desc')"
|
@click="setSort('alpha_desc')"
|
||||||
title="Alphabetical Z→A"
|
title="Alphabetical Z→A"
|
||||||
>Z→A</button>
|
>Z→A</button>
|
||||||
<button
|
<button
|
||||||
:class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'match' }]"
|
:class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'match' }]"
|
||||||
|
:aria-pressed="sortOrder === 'match'"
|
||||||
:disabled="pantryCount === 0"
|
:disabled="pantryCount === 0"
|
||||||
@click="setSort('match')"
|
@click="setSort('match')"
|
||||||
:title="pantryCount > 0 ? 'Sort by pantry match %' : 'Add items to pantry to sort by match'"
|
:title="pantryCount > 0 ? 'Sort by pantry match %' : 'Add items to pantry to sort by match'"
|
||||||
|
|
@ -128,7 +156,11 @@
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="results-header flex-between mb-sm">
|
<div class="results-header flex-between mb-sm">
|
||||||
<span class="text-sm text-secondary">
|
<span
|
||||||
|
class="text-sm text-secondary"
|
||||||
|
aria-live="polite"
|
||||||
|
aria-atomic="true"
|
||||||
|
>
|
||||||
{{ total }} recipes
|
{{ total }} recipes
|
||||||
<span v-if="pantryCount > 0"> — pantry match shown</span>
|
<span v-if="pantryCount > 0"> — pantry match shown</span>
|
||||||
<span v-if="requiredIngredient.trim()"> — must include "{{ requiredIngredient.trim() }}"</span>
|
<span v-if="requiredIngredient.trim()"> — must include "{{ requiredIngredient.trim() }}"</span>
|
||||||
|
|
@ -137,12 +169,14 @@
|
||||||
<button
|
<button
|
||||||
class="btn btn-secondary btn-xs"
|
class="btn btn-secondary btn-xs"
|
||||||
:disabled="page <= 1"
|
:disabled="page <= 1"
|
||||||
|
aria-label="Previous page"
|
||||||
@click="changePage(page - 1)"
|
@click="changePage(page - 1)"
|
||||||
>‹ Prev</button>
|
>‹ Prev</button>
|
||||||
<span class="text-sm text-secondary page-indicator">{{ page }} / {{ totalPages }}</span>
|
<span class="text-sm text-secondary page-indicator" aria-live="polite">{{ page }} / {{ totalPages }}</span>
|
||||||
<button
|
<button
|
||||||
class="btn btn-secondary btn-xs"
|
class="btn btn-secondary btn-xs"
|
||||||
:disabled="page >= totalPages"
|
:disabled="page >= totalPages"
|
||||||
|
aria-label="Next page"
|
||||||
@click="changePage(page + 1)"
|
@click="changePage(page + 1)"
|
||||||
>Next ›</button>
|
>Next ›</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
@ -854,4 +888,40 @@ async function submitTag() {
|
||||||
font-size: 0.875rem;
|
font-size: 0.875rem;
|
||||||
margin-left: 0.5rem;
|
margin-left: 0.5rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ── Browse breadcrumb ───────────────────────────────────────────────────── */
|
||||||
|
.browse-breadcrumb {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
gap: 2px;
|
||||||
|
margin-bottom: var(--spacing-sm);
|
||||||
|
font-size: var(--font-size-xs, 0.78rem);
|
||||||
|
color: var(--color-text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.crumb-btn {
|
||||||
|
background: none;
|
||||||
|
border: none;
|
||||||
|
padding: 2px 4px;
|
||||||
|
cursor: pointer;
|
||||||
|
color: var(--color-primary);
|
||||||
|
font-size: inherit;
|
||||||
|
border-radius: var(--radius-sm);
|
||||||
|
}
|
||||||
|
|
||||||
|
.crumb-btn:hover {
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
.crumb-sep {
|
||||||
|
opacity: 0.5;
|
||||||
|
padding: 0 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.crumb-current {
|
||||||
|
padding: 2px 4px;
|
||||||
|
color: var(--color-text);
|
||||||
|
font-weight: 500;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|
|
||||||
|
|
@ -112,8 +112,8 @@
|
||||||
<path d="M23 19a2 2 0 0 1-2 2H3a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h4l2-3h6l2 3h4a2 2 0 0 1 2 2z"/>
|
<path d="M23 19a2 2 0 0 1-2 2H3a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h4l2-3h6l2 3h4a2 2 0 0 1 2 2z"/>
|
||||||
<circle cx="12" cy="13" r="4"/>
|
<circle cx="12" cy="13" r="4"/>
|
||||||
</svg>
|
</svg>
|
||||||
<p class="processing-label">Extracting recipe from {{ selectedFiles.length > 1 ? selectedFiles.length + ' photos' : 'photo' }}...</p>
|
<p class="processing-label">{{ scanStatusMessage }}</p>
|
||||||
<p class="processing-sub">This can take 10-30 seconds.</p>
|
<p class="processing-sub">This can take up to a minute on first use.</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
@ -329,13 +329,18 @@ function removeFile(index: number) {
|
||||||
|
|
||||||
// ── Scan ──────────────────────────────────────────────────────────────────────
|
// ── Scan ──────────────────────────────────────────────────────────────────────
|
||||||
const extracted = ref<ScannedRecipe | null>(null)
|
const extracted = ref<ScannedRecipe | null>(null)
|
||||||
|
const scanStatusMessage = ref('Uploading photos...')
|
||||||
|
|
||||||
async function startScan() {
|
async function startScan() {
|
||||||
if (selectedFiles.value.length === 0) return
|
if (selectedFiles.value.length === 0) return
|
||||||
uploadError.value = ''
|
uploadError.value = ''
|
||||||
|
scanStatusMessage.value = 'Uploading photos...'
|
||||||
phase.value = 'processing'
|
phase.value = 'processing'
|
||||||
try {
|
try {
|
||||||
const result = await recipeScanAPI.scan(selectedFiles.value)
|
const result = await recipeScanAPI.scanStream(
|
||||||
|
selectedFiles.value,
|
||||||
|
(_status: string, message: string) => { scanStatusMessage.value = message },
|
||||||
|
)
|
||||||
extracted.value = result
|
extracted.value = result
|
||||||
initEditState(result)
|
initEditState(result)
|
||||||
phase.value = 'review'
|
phase.value = 'review'
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -2,6 +2,7 @@
|
||||||
<div class="settings-view">
|
<div class="settings-view">
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<h2 class="section-title text-xl mb-md">Settings</h2>
|
<h2 class="section-title text-xl mb-md">Settings</h2>
|
||||||
|
<p class="text-xs text-muted mb-md">Changes save automatically.</p>
|
||||||
|
|
||||||
<!-- Cooking Equipment -->
|
<!-- Cooking Equipment -->
|
||||||
<section>
|
<section>
|
||||||
|
|
@ -19,7 +20,7 @@
|
||||||
class="tag-chip status-badge status-info"
|
class="tag-chip status-badge status-info"
|
||||||
>
|
>
|
||||||
{{ item }}
|
{{ item }}
|
||||||
<button class="chip-remove" @click="removeEquipment(item)" aria-label="Remove">×</button>
|
<button class="chip-remove" @click="removeEquipment(item)" :aria-label="'Remove equipment: ' + item">×</button>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
@ -50,18 +51,6 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Save button -->
|
|
||||||
<div class="flex-start gap-sm">
|
|
||||||
<button
|
|
||||||
class="btn btn-primary"
|
|
||||||
:disabled="settingsStore.loading"
|
|
||||||
@click="settingsStore.save()"
|
|
||||||
>
|
|
||||||
<span v-if="settingsStore.loading">Saving…</span>
|
|
||||||
<span v-else-if="settingsStore.saved">✓ Saved!</span>
|
|
||||||
<span v-else>Save Settings</span>
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<!-- Sensory Preferences -->
|
<!-- Sensory Preferences -->
|
||||||
|
|
@ -134,17 +123,6 @@
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="flex-start gap-sm mt-sm">
|
|
||||||
<button
|
|
||||||
class="btn btn-primary btn-sm"
|
|
||||||
:disabled="settingsStore.loading"
|
|
||||||
@click="settingsStore.saveSensory()"
|
|
||||||
>
|
|
||||||
<span v-if="settingsStore.loading">Saving…</span>
|
|
||||||
<span v-else-if="settingsStore.saved">Saved!</span>
|
|
||||||
<span v-else>Save sensory preferences</span>
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<!-- Units -->
|
<!-- Units -->
|
||||||
|
|
@ -169,17 +147,6 @@
|
||||||
Imperial (oz, cups, °F)
|
Imperial (oz, cups, °F)
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
<div class="flex-start gap-sm">
|
|
||||||
<button
|
|
||||||
class="btn btn-primary btn-sm"
|
|
||||||
:disabled="settingsStore.loading"
|
|
||||||
@click="settingsStore.save()"
|
|
||||||
>
|
|
||||||
<span v-if="settingsStore.loading">Saving…</span>
|
|
||||||
<span v-else-if="settingsStore.saved">✓ Saved!</span>
|
|
||||||
<span v-else>Save</span>
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<!-- Shopping Locale -->
|
<!-- Shopping Locale -->
|
||||||
|
|
@ -220,17 +187,6 @@
|
||||||
<option value="br">Brazil (BRL R$)</option>
|
<option value="br">Brazil (BRL R$)</option>
|
||||||
</optgroup>
|
</optgroup>
|
||||||
</select>
|
</select>
|
||||||
<div class="flex-start gap-sm mt-sm">
|
|
||||||
<button
|
|
||||||
class="btn btn-primary btn-sm"
|
|
||||||
:disabled="settingsStore.loading"
|
|
||||||
@click="settingsStore.save()"
|
|
||||||
>
|
|
||||||
<span v-if="settingsStore.loading">Saving…</span>
|
|
||||||
<span v-else-if="settingsStore.saved">✓ Saved!</span>
|
|
||||||
<span v-else>Save</span>
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<!-- Time-First Layout -->
|
<!-- Time-First Layout -->
|
||||||
|
|
@ -258,17 +214,6 @@
|
||||||
</span>
|
</span>
|
||||||
</label>
|
</label>
|
||||||
</div>
|
</div>
|
||||||
<div class="flex-start gap-sm mt-sm">
|
|
||||||
<button
|
|
||||||
class="btn btn-primary btn-sm"
|
|
||||||
:disabled="settingsStore.loading"
|
|
||||||
@click="settingsStore.save()"
|
|
||||||
>
|
|
||||||
<span v-if="settingsStore.loading">Saving…</span>
|
|
||||||
<span v-else-if="settingsStore.saved">✓ Saved!</span>
|
|
||||||
<span v-else>Save</span>
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<!-- Data Sharing (cloud only) -->
|
<!-- Data Sharing (cloud only) -->
|
||||||
|
|
@ -393,6 +338,12 @@
|
||||||
</template>
|
</template>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<Transition name="autosave-fade">
|
||||||
|
<div v-if="settingsStore.saved" class="autosave-toast" role="status" aria-live="polite">
|
||||||
|
✓ Saved
|
||||||
|
</div>
|
||||||
|
</Transition>
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
|
|
@ -871,4 +822,32 @@ function getNoiseClass(_value: NoiseLevel, idx: number): string {
|
||||||
border-color: var(--color-border, #e0e0e0);
|
border-color: var(--color-border, #e0e0e0);
|
||||||
color: var(--color-text-secondary, #888);
|
color: var(--color-text-secondary, #888);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ── Autosave toast ──────────────────────────────────────────────────────── */
|
||||||
|
|
||||||
|
.autosave-toast {
|
||||||
|
position: fixed;
|
||||||
|
bottom: 1.5rem;
|
||||||
|
right: 1.5rem;
|
||||||
|
background: var(--color-surface, #fff);
|
||||||
|
border: 1px solid var(--color-border, #e0e0e0);
|
||||||
|
border-radius: var(--radius-md, 0.5rem);
|
||||||
|
padding: 0.4rem 0.9rem;
|
||||||
|
font-size: var(--font-size-sm);
|
||||||
|
color: var(--color-success, #4a8c40);
|
||||||
|
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.12);
|
||||||
|
z-index: 500;
|
||||||
|
pointer-events: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.autosave-fade-enter-active,
|
||||||
|
.autosave-fade-leave-active {
|
||||||
|
transition: opacity 0.25s ease, transform 0.25s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.autosave-fade-enter-from,
|
||||||
|
.autosave-fade-leave-to {
|
||||||
|
opacity: 0;
|
||||||
|
transform: translateY(0.5rem);
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|
@ -627,6 +627,7 @@ export interface RecipeRequest {
|
||||||
complexity_filter: string | null
|
complexity_filter: string | null
|
||||||
max_time_min: number | null
|
max_time_min: number | null
|
||||||
max_total_min: number | null
|
max_total_min: number | null
|
||||||
|
max_active_min: number | null
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Staple {
|
export interface Staple {
|
||||||
|
|
@ -670,6 +671,21 @@ export interface BuildRequest {
|
||||||
role_overrides: Record<string, string>
|
role_overrides: Record<string, string>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Ask/RAG types ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export interface AskRecipeHit {
|
||||||
|
id: number
|
||||||
|
title: string
|
||||||
|
match_pct: number | null
|
||||||
|
category: string | null
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AskResponse {
|
||||||
|
answer: string | null
|
||||||
|
recipes: AskRecipeHit[]
|
||||||
|
tier: string
|
||||||
|
}
|
||||||
|
|
||||||
// ========== Recipes API ==========
|
// ========== Recipes API ==========
|
||||||
|
|
||||||
export const recipesAPI = {
|
export const recipesAPI = {
|
||||||
|
|
@ -736,6 +752,60 @@ export const recipesAPI = {
|
||||||
})
|
})
|
||||||
return response.data
|
return response.data
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/** Natural-language recipe search with optional LLM synthesis (Paid tier). */
|
||||||
|
async ask(question: string, pantryItems: string[] = []): Promise<AskResponse> {
|
||||||
|
const response = await api.post('/recipes/ask', { question, pantry_items: pantryItems }, { timeout: 30000 })
|
||||||
|
return response.data
|
||||||
|
},
|
||||||
|
|
||||||
|
/** Stream a recipe via native SSE (Ollama fallback). Calls callbacks as tokens arrive. */
|
||||||
|
async suggestRecipeStream(
|
||||||
|
req: RecipeRequest,
|
||||||
|
onChunk: (chunk: string) => void,
|
||||||
|
onDone: () => void,
|
||||||
|
onError: (err: string) => void,
|
||||||
|
): Promise<void> {
|
||||||
|
const baseUrl = (api.defaults.baseURL ?? '') as string
|
||||||
|
let response: Response
|
||||||
|
try {
|
||||||
|
response = await fetch(`${baseUrl}/recipes/suggest?stream=true`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(req),
|
||||||
|
})
|
||||||
|
} catch (err: unknown) {
|
||||||
|
onError(err instanceof Error ? err.message : 'Network error')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
onError(`HTTP ${response.status}`)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const reader = response.body?.getReader()
|
||||||
|
if (!reader) { onError('No response body'); return }
|
||||||
|
|
||||||
|
const decoder = new TextDecoder()
|
||||||
|
let buffer = ''
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read()
|
||||||
|
if (done) { onDone(); break }
|
||||||
|
buffer += decoder.decode(value, { stream: true })
|
||||||
|
const parts = buffer.split('\n\n')
|
||||||
|
buffer = parts.pop() ?? ''
|
||||||
|
for (const part of parts) {
|
||||||
|
if (!part.startsWith('data: ')) continue
|
||||||
|
try {
|
||||||
|
const data = JSON.parse(part.slice(6))
|
||||||
|
if (data.done) { onDone(); return }
|
||||||
|
else if (data.error) { onError(data.error); return }
|
||||||
|
else if (data.chunk) { onChunk(data.chunk) }
|
||||||
|
} catch { /* ignore malformed events */ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// ========== Settings API ==========
|
// ========== Settings API ==========
|
||||||
|
|
@ -1256,6 +1326,56 @@ export const recipeScanAPI = {
|
||||||
}).then((r) => r.data)
|
}).then((r) => r.data)
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/** Scan recipe photos with live SSE progress events.
|
||||||
|
*
|
||||||
|
* Calls onProgress(status, message) for each intermediate event
|
||||||
|
* ("allocating", "scanning", "structuring"), then resolves with the final
|
||||||
|
* ScannedRecipe on success. Rejects on error or timeout.
|
||||||
|
*/
|
||||||
|
async scanStream(
|
||||||
|
files: File[],
|
||||||
|
onProgress: (status: string, message: string) => void,
|
||||||
|
): Promise<ScannedRecipe> {
|
||||||
|
const form = new FormData()
|
||||||
|
files.forEach((f) => form.append('files', f))
|
||||||
|
|
||||||
|
const response = await fetch(`${API_BASE_URL}/recipes/scan/stream`, {
|
||||||
|
method: 'POST',
|
||||||
|
body: form,
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!response.ok || !response.body) {
|
||||||
|
let detail = ''
|
||||||
|
try { detail = await response.text() } catch (_) { /* ignore */ }
|
||||||
|
throw new Error(detail || `Scan failed (${response.status})`)
|
||||||
|
}
|
||||||
|
|
||||||
|
const reader = response.body.getReader()
|
||||||
|
const decoder = new TextDecoder()
|
||||||
|
let buffer = ''
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read()
|
||||||
|
if (done) break
|
||||||
|
|
||||||
|
buffer += decoder.decode(value, { stream: true })
|
||||||
|
const lines = buffer.split('\n')
|
||||||
|
buffer = lines.pop() ?? ''
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
if (!line.startsWith('data: ')) continue
|
||||||
|
let data: Record<string, unknown>
|
||||||
|
try { data = JSON.parse(line.slice(6)) } catch { continue }
|
||||||
|
|
||||||
|
if (data.status === 'done') return data.recipe as ScannedRecipe
|
||||||
|
if (data.status === 'error') throw new Error((data.message as string) || 'Scan failed')
|
||||||
|
onProgress(data.status as string, data.message as string)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error('Stream ended without a result')
|
||||||
|
},
|
||||||
|
|
||||||
/** Save a reviewed/edited scanned recipe to user_recipes. */
|
/** Save a reviewed/edited scanned recipe to user_recipes. */
|
||||||
saveScanned(recipe: Omit<ScannedRecipe, 'pantry_match_pct' | 'confidence' | 'warnings'> & { source?: string }): Promise<UserRecipe> {
|
saveScanned(recipe: Omit<ScannedRecipe, 'pantry_match_pct' | 'confidence' | 'warnings'> & { source?: string }): Promise<UserRecipe> {
|
||||||
return api.post('/recipes/scan/save', recipe).then((r) => r.data)
|
return api.post('/recipes/scan/save', recipe).then((r) => r.data)
|
||||||
|
|
|
||||||
|
|
@ -64,6 +64,20 @@ export interface PublishPayload {
|
||||||
recipe_id?: number
|
recipe_id?: number
|
||||||
outcome_notes?: string
|
outcome_notes?: string
|
||||||
slots?: CommunityPostSlot[]
|
slots?: CommunityPostSlot[]
|
||||||
|
similar_to_ref?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export type SimilarityTier = 'exact_recipe' | 'very_similar' | 'somewhat_similar'
|
||||||
|
|
||||||
|
export interface SimilarPost {
|
||||||
|
slug: string
|
||||||
|
title: string
|
||||||
|
recipe_name: string | null
|
||||||
|
pseudonym: string
|
||||||
|
published: string
|
||||||
|
similarity_tier: SimilarityTier
|
||||||
|
jaccard_score: number | null
|
||||||
|
tier_description: string
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface PublishResult {
|
export interface PublishResult {
|
||||||
|
|
@ -107,6 +121,25 @@ export const useCommunityStore = defineStore('community', () => {
|
||||||
return response.data
|
return response.data
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function checkSimilar(
|
||||||
|
title: string,
|
||||||
|
recipeId?: number | null,
|
||||||
|
postType?: string,
|
||||||
|
): Promise<SimilarPost[]> {
|
||||||
|
try {
|
||||||
|
const body: Record<string, unknown> = { title }
|
||||||
|
if (recipeId != null) body.recipe_id = recipeId
|
||||||
|
if (postType) body.post_type = postType
|
||||||
|
const response = await api.post<{ similar_posts: SimilarPost[] }>(
|
||||||
|
'/community/check-similar',
|
||||||
|
body,
|
||||||
|
)
|
||||||
|
return response.data.similar_posts
|
||||||
|
} catch {
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
posts,
|
posts,
|
||||||
loading,
|
loading,
|
||||||
|
|
@ -115,5 +148,6 @@ export const useCommunityStore = defineStore('community', () => {
|
||||||
fetchPosts,
|
fetchPosts,
|
||||||
forkPost,
|
forkPost,
|
||||||
publishPost,
|
publishPost,
|
||||||
|
checkSimilar,
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
|
||||||
|
|
@ -152,6 +152,7 @@ export const useRecipesStore = defineStore('recipes', () => {
|
||||||
const complexityFilter = ref<string | null>(null)
|
const complexityFilter = ref<string | null>(null)
|
||||||
const maxTimeMin = ref<number | null>(null)
|
const maxTimeMin = ref<number | null>(null)
|
||||||
const maxTotalMin = ref<number | null>(null)
|
const maxTotalMin = ref<number | null>(null)
|
||||||
|
const maxActiveMin = ref<number | null>(null)
|
||||||
const nutritionFilters = ref<NutritionFilters>({
|
const nutritionFilters = ref<NutritionFilters>({
|
||||||
max_calories: null,
|
max_calories: null,
|
||||||
max_sugar_g: null,
|
max_sugar_g: null,
|
||||||
|
|
@ -207,6 +208,7 @@ export const useRecipesStore = defineStore('recipes', () => {
|
||||||
complexity_filter: complexityFilter.value,
|
complexity_filter: complexityFilter.value,
|
||||||
max_time_min: maxTimeMin.value,
|
max_time_min: maxTimeMin.value,
|
||||||
max_total_min: maxTotalMin.value,
|
max_total_min: maxTotalMin.value,
|
||||||
|
max_active_min: maxActiveMin.value,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -377,6 +379,17 @@ export const useRecipesStore = defineStore('recipes', () => {
|
||||||
wildcardConfirmed.value = false
|
wildcardConfirmed.value = false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function streamSuggest(
|
||||||
|
pantryItems: string[],
|
||||||
|
secondaryPantryItems: Record<string, string>,
|
||||||
|
onChunk: (chunk: string) => void,
|
||||||
|
onDone: () => void,
|
||||||
|
onError: (err: string) => void,
|
||||||
|
): Promise<void> {
|
||||||
|
const req = _buildRequest(pantryItems, secondaryPantryItems)
|
||||||
|
await recipesAPI.suggestRecipeStream(req, onChunk, onDone, onError)
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
result,
|
result,
|
||||||
loading,
|
loading,
|
||||||
|
|
@ -396,6 +409,7 @@ export const useRecipesStore = defineStore('recipes', () => {
|
||||||
complexityFilter,
|
complexityFilter,
|
||||||
maxTimeMin,
|
maxTimeMin,
|
||||||
maxTotalMin,
|
maxTotalMin,
|
||||||
|
maxActiveMin,
|
||||||
nutritionFilters,
|
nutritionFilters,
|
||||||
dismissedIds,
|
dismissedIds,
|
||||||
dismissedCount,
|
dismissedCount,
|
||||||
|
|
@ -413,6 +427,7 @@ export const useRecipesStore = defineStore('recipes', () => {
|
||||||
missingIngredientMode,
|
missingIngredientMode,
|
||||||
builderFilterMode,
|
builderFilterMode,
|
||||||
suggest,
|
suggest,
|
||||||
|
streamSuggest,
|
||||||
loadMore,
|
loadMore,
|
||||||
dismiss,
|
dismiss,
|
||||||
undismiss,
|
undismiss,
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,5 @@
|
||||||
/**
|
|
||||||
* Settings Store
|
|
||||||
*
|
|
||||||
* Manages user settings (cooking equipment, preferences) using Pinia.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { defineStore } from 'pinia'
|
import { defineStore } from 'pinia'
|
||||||
import { ref } from 'vue'
|
import { ref, watch, nextTick } from 'vue'
|
||||||
import { settingsAPI } from '../services/api'
|
import { settingsAPI } from '../services/api'
|
||||||
import type { UnitSystem } from '../utils/units'
|
import type { UnitSystem } from '../utils/units'
|
||||||
import type { SensoryPreferences } from '../services/api'
|
import type { SensoryPreferences } from '../services/api'
|
||||||
|
|
@ -13,8 +7,12 @@ import { DEFAULT_SENSORY_PREFERENCES } from '../services/api'
|
||||||
|
|
||||||
export type TimeFirstLayout = 'auto' | 'time_first' | 'normal'
|
export type TimeFirstLayout = 'auto' | 'time_first' | 'normal'
|
||||||
|
|
||||||
|
function debounce(fn: () => void, ms: number): () => void {
|
||||||
|
let t: ReturnType<typeof setTimeout>
|
||||||
|
return () => { clearTimeout(t); t = setTimeout(fn, ms) }
|
||||||
|
}
|
||||||
|
|
||||||
export const useSettingsStore = defineStore('settings', () => {
|
export const useSettingsStore = defineStore('settings', () => {
|
||||||
// State
|
|
||||||
const cookingEquipment = ref<string[]>([])
|
const cookingEquipment = ref<string[]>([])
|
||||||
const unitSystem = ref<UnitSystem>('metric')
|
const unitSystem = ref<UnitSystem>('metric')
|
||||||
const shoppingLocale = ref<string>('us')
|
const shoppingLocale = ref<string>('us')
|
||||||
|
|
@ -23,7 +21,40 @@ export const useSettingsStore = defineStore('settings', () => {
|
||||||
const loading = ref(false)
|
const loading = ref(false)
|
||||||
const saved = ref(false)
|
const saved = ref(false)
|
||||||
|
|
||||||
// Actions
|
// Prevents autosave watchers from firing during initial load hydration.
|
||||||
|
// Set to true after nextTick() at the end of load() — by that point all
|
||||||
|
// watcher jobs queued by the hydration assignments have already flushed.
|
||||||
|
let _hydrated = false
|
||||||
|
|
||||||
|
function _flash() {
|
||||||
|
saved.value = true
|
||||||
|
setTimeout(() => { saved.value = false }, 2000)
|
||||||
|
}
|
||||||
|
|
||||||
|
async function _saveKey(key: string, value: string): Promise<void> {
|
||||||
|
if (!_hydrated) return
|
||||||
|
try {
|
||||||
|
await settingsAPI.setSetting(key, value)
|
||||||
|
_flash()
|
||||||
|
} catch (err: unknown) {
|
||||||
|
console.error('Autosave failed for key:', key, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const _autosave = {
|
||||||
|
equipment: debounce(() => _saveKey('cooking_equipment', JSON.stringify(cookingEquipment.value)), 600),
|
||||||
|
unit: debounce(() => _saveKey('unit_system', unitSystem.value), 600),
|
||||||
|
locale: debounce(() => _saveKey('shopping_locale', shoppingLocale.value), 600),
|
||||||
|
sensory: debounce(() => _saveKey('sensory_preferences', JSON.stringify(sensoryPreferences.value)), 600),
|
||||||
|
layout: debounce(() => _saveKey('time_first_layout', timeFirstLayout.value), 600),
|
||||||
|
}
|
||||||
|
|
||||||
|
watch(cookingEquipment, _autosave.equipment, { deep: true })
|
||||||
|
watch(unitSystem, _autosave.unit)
|
||||||
|
watch(shoppingLocale, _autosave.locale)
|
||||||
|
watch(sensoryPreferences, _autosave.sensory, { deep: true })
|
||||||
|
watch(timeFirstLayout, _autosave.layout)
|
||||||
|
|
||||||
async function load() {
|
async function load() {
|
||||||
loading.value = true
|
loading.value = true
|
||||||
try {
|
try {
|
||||||
|
|
@ -58,8 +89,15 @@ export const useSettingsStore = defineStore('settings', () => {
|
||||||
} finally {
|
} finally {
|
||||||
loading.value = false
|
loading.value = false
|
||||||
}
|
}
|
||||||
|
// Yield past the watcher flush triggered by hydration assignments above.
|
||||||
|
// After nextTick, any pending watcher jobs from this load() have already
|
||||||
|
// run (and been ignored by _hydrated guard), so user-driven changes from
|
||||||
|
// here forward will correctly trigger autosave.
|
||||||
|
await nextTick()
|
||||||
|
_hydrated = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Kept for explicit full-save scenarios (e.g. fallback, tests).
|
||||||
async function save() {
|
async function save() {
|
||||||
loading.value = true
|
loading.value = true
|
||||||
try {
|
try {
|
||||||
|
|
@ -70,10 +108,7 @@ export const useSettingsStore = defineStore('settings', () => {
|
||||||
settingsAPI.setSetting('sensory_preferences', JSON.stringify(sensoryPreferences.value)),
|
settingsAPI.setSetting('sensory_preferences', JSON.stringify(sensoryPreferences.value)),
|
||||||
settingsAPI.setSetting('time_first_layout', timeFirstLayout.value),
|
settingsAPI.setSetting('time_first_layout', timeFirstLayout.value),
|
||||||
])
|
])
|
||||||
saved.value = true
|
_flash()
|
||||||
setTimeout(() => {
|
|
||||||
saved.value = false
|
|
||||||
}, 2000)
|
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
console.error('Failed to save settings:', err)
|
console.error('Failed to save settings:', err)
|
||||||
} finally {
|
} finally {
|
||||||
|
|
@ -81,24 +116,17 @@ export const useSettingsStore = defineStore('settings', () => {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Kept for backward compat; autosave handles sensory changes now.
|
||||||
async function saveSensory() {
|
async function saveSensory() {
|
||||||
loading.value = true
|
|
||||||
try {
|
try {
|
||||||
await settingsAPI.setSetting(
|
await settingsAPI.setSetting('sensory_preferences', JSON.stringify(sensoryPreferences.value))
|
||||||
'sensory_preferences',
|
_flash()
|
||||||
JSON.stringify(sensoryPreferences.value),
|
|
||||||
)
|
|
||||||
saved.value = true
|
|
||||||
setTimeout(() => { saved.value = false }, 2000)
|
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
console.error('Failed to save sensory preferences:', err)
|
console.error('Failed to save sensory preferences:', err)
|
||||||
} finally {
|
|
||||||
loading.value = false
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
// State
|
|
||||||
cookingEquipment,
|
cookingEquipment,
|
||||||
unitSystem,
|
unitSystem,
|
||||||
shoppingLocale,
|
shoppingLocale,
|
||||||
|
|
@ -106,8 +134,6 @@ export const useSettingsStore = defineStore('settings', () => {
|
||||||
timeFirstLayout,
|
timeFirstLayout,
|
||||||
loading,
|
loading,
|
||||||
saved,
|
saved,
|
||||||
|
|
||||||
// Actions
|
|
||||||
load,
|
load,
|
||||||
save,
|
save,
|
||||||
saveSensory,
|
saveSensory,
|
||||||
|
|
|
||||||
16
manage.sh
16
manage.sh
|
|
@ -14,8 +14,8 @@ OVERRIDE_FLAG=""
|
||||||
[[ -f "compose.override.yml" ]] && OVERRIDE_FLAG="-f compose.override.yml"
|
[[ -f "compose.override.yml" ]] && OVERRIDE_FLAG="-f compose.override.yml"
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
echo "Usage: $0 {start|stop|restart|status|logs|open|build|test"
|
echo "Usage: $0 {start|stop|restart|status|logs|open|build|test|update"
|
||||||
echo " |cloud-start|cloud-stop|cloud-restart|cloud-status|cloud-logs|cloud-build}"
|
echo " |cloud-start|cloud-stop|cloud-restart|cloud-status|cloud-logs|cloud-build|cloud-update}"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Dev:"
|
echo "Dev:"
|
||||||
echo " start Build (if needed) and start all services"
|
echo " start Build (if needed) and start all services"
|
||||||
|
|
@ -26,6 +26,7 @@ usage() {
|
||||||
echo " open Open web UI in browser"
|
echo " open Open web UI in browser"
|
||||||
echo " build Rebuild Docker images without cache"
|
echo " build Rebuild Docker images without cache"
|
||||||
echo " test Run pytest test suite"
|
echo " test Run pytest test suite"
|
||||||
|
echo " update git pull + rebuild + restart dev stack"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Cloud (menagerie.circuitforge.tech/kiwi):"
|
echo "Cloud (menagerie.circuitforge.tech/kiwi):"
|
||||||
echo " cloud-start Build cloud images and start kiwi-cloud project"
|
echo " cloud-start Build cloud images and start kiwi-cloud project"
|
||||||
|
|
@ -34,6 +35,7 @@ usage() {
|
||||||
echo " cloud-status Show cloud containers"
|
echo " cloud-status Show cloud containers"
|
||||||
echo " cloud-logs Follow cloud logs [api|web — defaults to all]"
|
echo " cloud-logs Follow cloud logs [api|web — defaults to all]"
|
||||||
echo " cloud-build Rebuild cloud images without cache"
|
echo " cloud-build Rebuild cloud images without cache"
|
||||||
|
echo " cloud-update git pull + rebuild + restart cloud stack"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -68,6 +70,11 @@ case "$cmd" in
|
||||||
build)
|
build)
|
||||||
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG build --no-cache
|
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG build --no-cache
|
||||||
;;
|
;;
|
||||||
|
update)
|
||||||
|
git pull
|
||||||
|
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG up -d --build
|
||||||
|
echo "Kiwi updated and restarted → http://localhost:${WEB_PORT}"
|
||||||
|
;;
|
||||||
test)
|
test)
|
||||||
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG run --rm api \
|
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG run --rm api \
|
||||||
conda run -n job-seeker pytest tests/ -v
|
conda run -n job-seeker pytest tests/ -v
|
||||||
|
|
@ -95,6 +102,11 @@ case "$cmd" in
|
||||||
cloud-build)
|
cloud-build)
|
||||||
docker compose -f "$CLOUD_COMPOSE_FILE" -p "$CLOUD_PROJECT" build --no-cache
|
docker compose -f "$CLOUD_COMPOSE_FILE" -p "$CLOUD_PROJECT" build --no-cache
|
||||||
;;
|
;;
|
||||||
|
cloud-update)
|
||||||
|
git pull
|
||||||
|
docker compose -f "$CLOUD_COMPOSE_FILE" -p "$CLOUD_PROJECT" up -d --build
|
||||||
|
echo "Kiwi cloud updated and restarted → https://menagerie.circuitforge.tech/kiwi"
|
||||||
|
;;
|
||||||
|
|
||||||
*)
|
*)
|
||||||
usage
|
usage
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "kiwi"
|
name = "kiwi"
|
||||||
version = "0.6.0"
|
version = "0.10.0"
|
||||||
description = "Pantry tracking + leftover recipe suggestions"
|
description = "Pantry tracking + leftover recipe suggestions"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
|
|
|
||||||
218
scripts/pipeline/ingest_purplecarrot.py
Normal file
218
scripts/pipeline/ingest_purplecarrot.py
Normal file
|
|
@ -0,0 +1,218 @@
|
||||||
|
"""Ingest Purple Carrot scraped recipes into the Kiwi corpus database.
|
||||||
|
|
||||||
|
Reads recipes_purplecarrot_live.parquet (output of scrape_live.py) and
|
||||||
|
upserts into the shared recipes table, setting source='purplecarrot' and
|
||||||
|
using the recipe slug as the external_id (prefixed pc_).
|
||||||
|
|
||||||
|
Run after each weekly_harvest.sh scrape:
|
||||||
|
|
||||||
|
conda run -n cf python3 scripts/pipeline/ingest_purplecarrot.py \
|
||||||
|
[--db /Library/Assets/kiwi/kiwi.db] \
|
||||||
|
[--parquet /Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet]
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import math
|
||||||
|
import re
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# ── Helpers (inlined from build_recipe_index to avoid cross-module import) ─────
|
||||||
|
|
||||||
|
_MEASURE_PATTERN = re.compile(
|
||||||
|
r"^\d[\d\s/¼½¾⅓⅔]*\s*(cup|tbsp|tsp|oz|lb|g|kg|ml|l|clove|slice|piece|can|pkg|package|bunch|head|stalk|sprig|pinch|dash|to taste|as needed)s?\b",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
_LEAD_NUMBER = re.compile(r"^\d[\d\s/¼½¾⅓⅔]*\s*")
|
||||||
|
_TRAILING_QUALIFIER = re.compile(
|
||||||
|
r"\s*(to taste|as needed|or more|or less|optional|if desired|if needed)\s*$",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _float_or_none(val: object) -> float | None:
|
||||||
|
try:
|
||||||
|
v = float(val) # type: ignore[arg-type]
|
||||||
|
return v if v > 0 else None
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_list(val: object) -> list:
|
||||||
|
if val is None:
|
||||||
|
return []
|
||||||
|
if isinstance(val, float) and math.isnan(val):
|
||||||
|
return []
|
||||||
|
if isinstance(val, list):
|
||||||
|
return val
|
||||||
|
# Parquet often deserializes list columns as numpy arrays
|
||||||
|
try:
|
||||||
|
import numpy as np
|
||||||
|
if isinstance(val, np.ndarray):
|
||||||
|
return val.tolist()
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_ingredient_names(raw_list: list[str]) -> list[str]:
|
||||||
|
names = []
|
||||||
|
for raw in raw_list:
|
||||||
|
s = raw.lower().strip()
|
||||||
|
s = _MEASURE_PATTERN.sub("", s)
|
||||||
|
s = _LEAD_NUMBER.sub("", s)
|
||||||
|
s = re.sub(r"\(.*?\)", "", s)
|
||||||
|
s = re.sub(r",.*$", "", s)
|
||||||
|
s = _TRAILING_QUALIFIER.sub("", s)
|
||||||
|
s = s.strip(" -.,")
|
||||||
|
if s and len(s) > 1:
|
||||||
|
names.append(s)
|
||||||
|
return names
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_element_coverage(profiles: list[dict]) -> dict[str, float]:
|
||||||
|
counts: dict[str, int] = {}
|
||||||
|
for p in profiles:
|
||||||
|
for elem in p.get("elements", []):
|
||||||
|
counts[elem] = counts.get(elem, 0) + 1
|
||||||
|
if not profiles:
|
||||||
|
return {}
|
||||||
|
return {e: round(c / len(profiles), 3) for e, c in counts.items()}
|
||||||
|
|
||||||
|
# ── Config ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
DEFAULT_DB = Path("/Library/Assets/kiwi/kiwi.db")
|
||||||
|
DEFAULT_PARQUET = Path("/Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet")
|
||||||
|
|
||||||
|
|
||||||
|
# ── Ingest ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def ingest(db_path: Path, parquet_path: Path) -> None:
|
||||||
|
df = pd.read_parquet(parquet_path)
|
||||||
|
|
||||||
|
# Filter to rows with full recipe data
|
||||||
|
if "HasFullRecipe" in df.columns:
|
||||||
|
df = df[df["HasFullRecipe"] == True].copy()
|
||||||
|
|
||||||
|
if df.empty:
|
||||||
|
print("No full recipes found in parquet — nothing to ingest.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Ingesting {len(df)} Purple Carrot recipes into {db_path} …")
|
||||||
|
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
conn.execute("PRAGMA journal_mode=WAL")
|
||||||
|
|
||||||
|
# Pre-load ingredient element profiles for coverage calculation
|
||||||
|
profile_index: dict[str, list[str]] = {}
|
||||||
|
for row in conn.execute("SELECT name, elements FROM ingredient_profiles"):
|
||||||
|
try:
|
||||||
|
profile_index[row[0]] = json.loads(row[1])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
inserted = updated = 0
|
||||||
|
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
slug = str(row.get("Slug", "")).strip()
|
||||||
|
if not slug:
|
||||||
|
continue
|
||||||
|
|
||||||
|
external_id = f"pc_{slug}"
|
||||||
|
title = str(row.get("Name", "")).strip()[:500]
|
||||||
|
if not title:
|
||||||
|
continue
|
||||||
|
|
||||||
|
raw_ingredients = [str(i) for i in _safe_list(row.get("RecipeIngredientParts", []))]
|
||||||
|
directions = [str(d) for d in _safe_list(row.get("RecipeInstructions", []))]
|
||||||
|
|
||||||
|
ingredient_names = _extract_ingredient_names(raw_ingredients)
|
||||||
|
profiles = [
|
||||||
|
{"elements": profile_index[n]}
|
||||||
|
for n in ingredient_names if n in profile_index
|
||||||
|
]
|
||||||
|
coverage = _compute_element_coverage(profiles)
|
||||||
|
|
||||||
|
# Keywords: merge scraped tags with allergen info
|
||||||
|
kw_raw = _safe_list(row.get("Keywords", []))
|
||||||
|
allergens = str(row.get("Allergens", "") or "")
|
||||||
|
if allergens:
|
||||||
|
kw_raw = list(kw_raw) + [f"allergen:{a.strip()}" for a in allergens.split(",") if a.strip()]
|
||||||
|
keywords_json = json.dumps(kw_raw)
|
||||||
|
|
||||||
|
# Check if already present (same external_id)
|
||||||
|
existing = conn.execute(
|
||||||
|
"SELECT id FROM recipes WHERE external_id = ?", (external_id,)
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
params = (
|
||||||
|
title,
|
||||||
|
json.dumps(raw_ingredients),
|
||||||
|
json.dumps(ingredient_names),
|
||||||
|
json.dumps(directions),
|
||||||
|
"meal-kit", # category
|
||||||
|
keywords_json,
|
||||||
|
_float_or_none(row.get("Calories")),
|
||||||
|
_float_or_none(row.get("FatContent")),
|
||||||
|
_float_or_none(row.get("ProteinContent")),
|
||||||
|
None, # sodium_mg — not scraped
|
||||||
|
json.dumps(coverage),
|
||||||
|
None, # sugar_g — not scraped
|
||||||
|
_float_or_none(row.get("CarbohydrateContent")),
|
||||||
|
_float_or_none(row.get("FiberContent")),
|
||||||
|
2.0, # servings — PC meal kits are 2-serving by default
|
||||||
|
0, # nutrition_estimated — PC provides real data
|
||||||
|
)
|
||||||
|
|
||||||
|
if existing:
|
||||||
|
conn.execute("""
|
||||||
|
UPDATE recipes
|
||||||
|
SET title=?, ingredients=?, ingredient_names=?, directions=?,
|
||||||
|
category=?, keywords=?, calories=?, fat_g=?, protein_g=?,
|
||||||
|
sodium_mg=?, element_coverage=?,
|
||||||
|
sugar_g=?, carbs_g=?, fiber_g=?, servings=?, nutrition_estimated=?
|
||||||
|
WHERE external_id=?
|
||||||
|
""", params + (external_id,))
|
||||||
|
updated += 1
|
||||||
|
else:
|
||||||
|
conn.execute("""
|
||||||
|
INSERT INTO recipes
|
||||||
|
(external_id, source, title, ingredients, ingredient_names,
|
||||||
|
directions, category, keywords, calories, fat_g, protein_g,
|
||||||
|
sodium_mg, element_coverage,
|
||||||
|
sugar_g, carbs_g, fiber_g, servings, nutrition_estimated)
|
||||||
|
VALUES (?, 'purplecarrot', ?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||||
|
""", (external_id,) + params)
|
||||||
|
inserted += 1
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
print(f"Done — {inserted} inserted, {updated} updated")
|
||||||
|
|
||||||
|
|
||||||
|
# ── Main ───────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--db", type=Path, default=DEFAULT_DB)
|
||||||
|
parser.add_argument("--parquet", type=Path, default=DEFAULT_PARQUET)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if not args.parquet.exists():
|
||||||
|
print(f"ERROR: parquet not found at {args.parquet}")
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
ingest(args.db, args.parquet)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
68
scripts/pipeline/log_utils.py
Normal file
68
scripts/pipeline/log_utils.py
Normal file
|
|
@ -0,0 +1,68 @@
|
||||||
|
"""
|
||||||
|
Pipeline logging utility.
|
||||||
|
|
||||||
|
Adds a structured JSON FileHandler to the root logger so every pipeline
|
||||||
|
script automatically writes machine-readable logs to the shared datastore
|
||||||
|
at /Library/Assets/logs/pipeline/. Avocet ingests these for Turnstone
|
||||||
|
logreading training (kiwi#141 / avocet#67).
|
||||||
|
|
||||||
|
Usage (add near the top of main() after logging.basicConfig):
|
||||||
|
|
||||||
|
from scripts.pipeline.log_utils import attach_pipeline_log
|
||||||
|
attach_pipeline_log("scrape_recipes")
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
PIPELINE_LOG_DIR = Path(
|
||||||
|
os.environ.get("PIPELINE_LOG_DIR", "/Library/Assets/logs/pipeline")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class _JsonFormatter(logging.Formatter):
|
||||||
|
def format(self, record: logging.LogRecord) -> str:
|
||||||
|
payload: dict = {
|
||||||
|
"ts": datetime.fromtimestamp(record.created, tz=timezone.utc).isoformat(),
|
||||||
|
"level": record.levelname,
|
||||||
|
"logger": record.name,
|
||||||
|
"msg": record.getMessage(),
|
||||||
|
}
|
||||||
|
if record.exc_info:
|
||||||
|
payload["exc"] = self.formatException(record.exc_info)
|
||||||
|
# Any extra kwargs passed via logger.info("...", extra={...})
|
||||||
|
standard = {
|
||||||
|
"name", "msg", "args", "levelname", "levelno", "pathname",
|
||||||
|
"filename", "module", "exc_info", "exc_text", "stack_info",
|
||||||
|
"lineno", "funcName", "created", "msecs", "relativeCreated",
|
||||||
|
"thread", "threadName", "processName", "process", "message",
|
||||||
|
"taskName",
|
||||||
|
}
|
||||||
|
extra = {k: v for k, v in record.__dict__.items() if k not in standard}
|
||||||
|
if extra:
|
||||||
|
payload["extra"] = extra
|
||||||
|
return json.dumps(payload)
|
||||||
|
|
||||||
|
|
||||||
|
def attach_pipeline_log(script_name: str) -> Path:
|
||||||
|
"""Attach a JSON file handler to the root logger for pipeline logging.
|
||||||
|
|
||||||
|
Returns the path of the log file created.
|
||||||
|
"""
|
||||||
|
PIPELINE_LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
ts = datetime.now(tz=timezone.utc).strftime("%Y%m%dT%H%M%S")
|
||||||
|
log_path = PIPELINE_LOG_DIR / f"{script_name}_{ts}.jsonl"
|
||||||
|
|
||||||
|
handler = logging.FileHandler(log_path, encoding="utf-8")
|
||||||
|
handler.setLevel(logging.DEBUG)
|
||||||
|
handler.setFormatter(_JsonFormatter())
|
||||||
|
logging.getLogger().addHandler(handler)
|
||||||
|
|
||||||
|
logging.getLogger(__name__).info(
|
||||||
|
"Pipeline log: %s", log_path, extra={"script": script_name}
|
||||||
|
)
|
||||||
|
return log_path
|
||||||
0
scripts/pipeline/purple_carrot/__init__.py
Normal file
0
scripts/pipeline/purple_carrot/__init__.py
Normal file
120
scripts/pipeline/purple_carrot/discover_current_menu.py
Normal file
120
scripts/pipeline/purple_carrot/discover_current_menu.py
Normal file
|
|
@ -0,0 +1,120 @@
|
||||||
|
"""Discover Purple Carrot's current weekly menu recipe slugs.
|
||||||
|
|
||||||
|
The main /plant-based-recipes listing page always renders the current week's
|
||||||
|
menu as server-side HTML. This script pulls those slugs and writes them to a
|
||||||
|
parquet that can be passed directly to scrape_live.py via --slugs-from.
|
||||||
|
|
||||||
|
Run weekly (e.g. via cron) to accumulate new recipes as the menu rotates.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
conda run -n cf python3 scripts/pipeline/purple_carrot/discover_current_menu.py \
|
||||||
|
[--out /Library/Assets/kiwi/pipeline/recipes_purplecarrot_menu.parquet]
|
||||||
|
|
||||||
|
Then scrape:
|
||||||
|
conda run -n cf python3 scripts/pipeline/purple_carrot/scrape_live.py \
|
||||||
|
--slugs-from /Library/Assets/kiwi/pipeline/recipes_purplecarrot_menu.parquet \
|
||||||
|
--out /Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet \
|
||||||
|
--resume
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from datetime import date
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
# ── Config ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
LISTING_URL = "https://www.purplecarrot.com/plant-based-recipes"
|
||||||
|
BASE_URL = "https://www.purplecarrot.com/recipe/{slug}"
|
||||||
|
|
||||||
|
DEFAULT_OUT = Path("/Library/Assets/kiwi/pipeline/recipes_purplecarrot_menu.parquet")
|
||||||
|
|
||||||
|
HEADERS = {
|
||||||
|
"User-Agent": (
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||||
|
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
|
||||||
|
),
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
|
"Accept-Language": "en-US,en;q=0.5",
|
||||||
|
}
|
||||||
|
|
||||||
|
RECIPE_HREF_RE = re.compile(r"/recipe/([^?#]+)")
|
||||||
|
|
||||||
|
|
||||||
|
# ── Main ───────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def discover_current_slugs() -> list[str]:
|
||||||
|
"""Fetch the listing page and return unique recipe slugs from the current menu."""
|
||||||
|
resp = requests.get(LISTING_URL, headers=HEADERS, timeout=15)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
print(f"ERROR: listing page returned HTTP {resp.status_code}", file=sys.stderr)
|
||||||
|
return []
|
||||||
|
|
||||||
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
slugs: list[str] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
for a in soup.find_all("a", href=RECIPE_HREF_RE):
|
||||||
|
m = RECIPE_HREF_RE.search(a["href"])
|
||||||
|
if m:
|
||||||
|
slug = m.group(1)
|
||||||
|
if slug not in seen:
|
||||||
|
seen.add(slug)
|
||||||
|
slugs.append(slug)
|
||||||
|
return slugs
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--out", type=Path, default=DEFAULT_OUT)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print(f"Fetching current menu from {LISTING_URL} …")
|
||||||
|
slugs = discover_current_slugs()
|
||||||
|
|
||||||
|
if not slugs:
|
||||||
|
print("No slugs found — the listing page may have changed structure or blocked the request.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
today = date.today().isoformat()
|
||||||
|
records = [
|
||||||
|
{
|
||||||
|
"Slug": slug,
|
||||||
|
"SourceURL": BASE_URL.format(slug=slug),
|
||||||
|
"Source": "purplecarrot_menu",
|
||||||
|
"DiscoveredDate": today,
|
||||||
|
}
|
||||||
|
for slug in slugs
|
||||||
|
]
|
||||||
|
|
||||||
|
# Merge with any existing menu parquet (accumulate weeks)
|
||||||
|
df_new = pd.DataFrame(records)
|
||||||
|
args.out.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
if args.out.exists():
|
||||||
|
df_prev = pd.read_parquet(args.out)
|
||||||
|
combined = pd.concat([df_prev, df_new], ignore_index=True)
|
||||||
|
combined = combined.drop_duplicates(subset=["Slug"], keep="first")
|
||||||
|
df_new = combined
|
||||||
|
|
||||||
|
df_new.to_parquet(args.out, index=False)
|
||||||
|
|
||||||
|
print(f"Found {len(slugs)} current-menu slugs this week:")
|
||||||
|
for s in slugs:
|
||||||
|
print(f" {s}")
|
||||||
|
print(f"\nSaved {len(df_new)} total slugs (accumulated) to {args.out}")
|
||||||
|
print(f"\nTo scrape full recipes:")
|
||||||
|
print(f" conda run -n cf python3 scripts/pipeline/purple_carrot/scrape_live.py \\")
|
||||||
|
print(f" --slugs-from {args.out} \\")
|
||||||
|
print(f" --out /Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet \\")
|
||||||
|
print(f" --resume")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
218
scripts/pipeline/purple_carrot/discover_slugs_categories.py
Normal file
218
scripts/pipeline/purple_carrot/discover_slugs_categories.py
Normal file
|
|
@ -0,0 +1,218 @@
|
||||||
|
"""Discover Purple Carrot recipe slugs by crawling all recipe-category listing pages.
|
||||||
|
|
||||||
|
The site serves full server-rendered HTML for category pages, paginated via
|
||||||
|
?page=N. Each page loads 18 recipe cards. This script crawls every category
|
||||||
|
across all pages and writes a deduplicated slug inventory.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
conda run -n cf python3 scripts/pipeline/purple_carrot/discover_slugs_categories.py \
|
||||||
|
[--out /Library/Assets/kiwi/pipeline/recipes_purplecarrot_slugs.parquet] \
|
||||||
|
[--delay 2.0] \
|
||||||
|
[--max-pages 50] # safety cap per category (comfort-foods has ~18)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
# ── Config ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
BASE = "https://www.purplecarrot.com"
|
||||||
|
|
||||||
|
# All known category slugs (from /plant-based-recipes nav)
|
||||||
|
CATEGORIES: list[str] = [
|
||||||
|
"comfort-foods",
|
||||||
|
"family-friendly",
|
||||||
|
"healthy-desserts",
|
||||||
|
"holiday-recipes",
|
||||||
|
"quick-and-easy",
|
||||||
|
"party-foods",
|
||||||
|
"seasonal-menu",
|
||||||
|
"spring-recipes",
|
||||||
|
"summer-recipes",
|
||||||
|
"fall-recipes",
|
||||||
|
"winter-recipes",
|
||||||
|
"african",
|
||||||
|
"american",
|
||||||
|
"asian",
|
||||||
|
"comfort",
|
||||||
|
"french",
|
||||||
|
"indian",
|
||||||
|
"italian",
|
||||||
|
"mediterranean",
|
||||||
|
"mexican",
|
||||||
|
"middle-eastern",
|
||||||
|
"soups",
|
||||||
|
"salads",
|
||||||
|
"bowls",
|
||||||
|
"pasta",
|
||||||
|
"sandwiches-wraps",
|
||||||
|
"tacos",
|
||||||
|
"breakfast",
|
||||||
|
"snacks-sides",
|
||||||
|
]
|
||||||
|
|
||||||
|
DEFAULT_OUT = Path("/Library/Assets/kiwi/pipeline/recipes_purplecarrot_slugs.parquet")
|
||||||
|
EXISTING_PARQUET = Path("/Library/Assets/kiwi/pipeline/recipes_purplecarrot.parquet")
|
||||||
|
|
||||||
|
RECIPE_LINK_SELECTOR = "a.c-recipe__title"
|
||||||
|
SLUG_RE = re.compile(r"/recipe/([^?#]+)")
|
||||||
|
|
||||||
|
HEADERS = {
|
||||||
|
"User-Agent": (
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||||
|
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
|
||||||
|
),
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
|
"Accept-Language": "en-US,en;q=0.5",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Helpers ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _fetch_html(url: str, session: requests.Session) -> str | None:
|
||||||
|
"""Fetch URL and return HTML string, or None on failure."""
|
||||||
|
try:
|
||||||
|
resp = session.get(url, headers=HEADERS, timeout=15)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
return resp.text
|
||||||
|
if resp.status_code == 404:
|
||||||
|
return None # expected end of pagination
|
||||||
|
print(f" HTTP {resp.status_code} — {url}")
|
||||||
|
return None
|
||||||
|
except Exception as exc:
|
||||||
|
print(f" ERROR fetching {url}: {exc}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_slugs(html: str) -> list[str]:
|
||||||
|
"""Pull recipe slugs from one listing-page HTML response."""
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
slugs: list[str] = []
|
||||||
|
for a in soup.select(RECIPE_LINK_SELECTOR):
|
||||||
|
href = a.get("href", "")
|
||||||
|
m = SLUG_RE.search(href)
|
||||||
|
if m:
|
||||||
|
slugs.append(m.group(1))
|
||||||
|
return slugs
|
||||||
|
|
||||||
|
|
||||||
|
def _get_category_total(html: str) -> int | None:
|
||||||
|
"""Try to parse the recipe count shown on the category page (e.g. '319 Recipes')."""
|
||||||
|
m = re.search(r"(\d+)\s+Recipes?\b", html)
|
||||||
|
return int(m.group(1)) if m else None
|
||||||
|
|
||||||
|
|
||||||
|
def _discover_category(
|
||||||
|
category: str,
|
||||||
|
session: requests.Session,
|
||||||
|
delay: float,
|
||||||
|
max_pages: int,
|
||||||
|
) -> tuple[list[str], int]:
|
||||||
|
"""Crawl all pages of a category, return (slugs, pages_fetched)."""
|
||||||
|
slugs: list[str] = []
|
||||||
|
for page_num in range(1, max_pages + 1):
|
||||||
|
if page_num == 1:
|
||||||
|
url = f"{BASE}/recipe-categories/{category}"
|
||||||
|
else:
|
||||||
|
url = f"{BASE}/recipe-categories/{category}?page={page_num}"
|
||||||
|
|
||||||
|
html = _fetch_html(url, session)
|
||||||
|
if html is None:
|
||||||
|
break # 404 or error = past the end
|
||||||
|
|
||||||
|
page_slugs = _extract_slugs(html)
|
||||||
|
if not page_slugs:
|
||||||
|
# Show total if we got a page but no links (category slug may be wrong)
|
||||||
|
if page_num == 1:
|
||||||
|
total = _get_category_total(html)
|
||||||
|
if total is not None:
|
||||||
|
print(f" page 1 loaded (total={total}) but 0 recipe links — selector may need updating")
|
||||||
|
break
|
||||||
|
|
||||||
|
slugs.extend(page_slugs)
|
||||||
|
|
||||||
|
# Print progress
|
||||||
|
total_hint = _get_category_total(html) if page_num == 1 else None
|
||||||
|
total_str = f" / {total_hint}" if total_hint else ""
|
||||||
|
print(f" page {page_num}: +{len(page_slugs)} slugs ({len(slugs)}{total_str} cumulative)")
|
||||||
|
|
||||||
|
if len(page_slugs) < 18:
|
||||||
|
# Short page = last page
|
||||||
|
break
|
||||||
|
|
||||||
|
time.sleep(delay)
|
||||||
|
|
||||||
|
return slugs, (len(slugs) + 17) // 18 # approximate pages
|
||||||
|
|
||||||
|
|
||||||
|
# ── Main ───────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--out", type=Path, default=DEFAULT_OUT)
|
||||||
|
parser.add_argument("--delay", type=float, default=2.0,
|
||||||
|
help="Seconds between page requests")
|
||||||
|
parser.add_argument("--max-pages", type=int, default=50,
|
||||||
|
help="Safety cap on pages per category")
|
||||||
|
parser.add_argument("--categories", nargs="*",
|
||||||
|
help="Crawl only these category slugs (default: all)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
categories = args.categories or CATEGORIES
|
||||||
|
|
||||||
|
# Seed with any slugs from the Wayback parquet
|
||||||
|
known_slugs: set[str] = set()
|
||||||
|
if EXISTING_PARQUET.exists():
|
||||||
|
df_wb = pd.read_parquet(EXISTING_PARQUET)
|
||||||
|
known_slugs = set(df_wb["Slug"].dropna().tolist())
|
||||||
|
print(f"Seeded with {len(known_slugs)} slugs from Wayback parquet")
|
||||||
|
|
||||||
|
all_records: list[dict[str, Any]] = []
|
||||||
|
session = requests.Session()
|
||||||
|
|
||||||
|
for category in categories:
|
||||||
|
print(f"\n[{category}]")
|
||||||
|
cat_slugs, pages = _discover_category(category, session, args.delay, args.max_pages)
|
||||||
|
for slug in cat_slugs:
|
||||||
|
all_records.append({"Slug": slug, "Category": category, "Source": "purplecarrot_category"})
|
||||||
|
print(f" → {len(cat_slugs)} slugs across ~{pages} pages")
|
||||||
|
time.sleep(args.delay)
|
||||||
|
|
||||||
|
if not all_records:
|
||||||
|
print("\nNo records found — check that categories are correct and the site is accessible")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Deduplicate keeping first category encountered
|
||||||
|
df_new = pd.DataFrame(all_records)
|
||||||
|
df_new = df_new.drop_duplicates(subset=["Slug"], keep="first")
|
||||||
|
|
||||||
|
# Also include Wayback slugs not already in the new set
|
||||||
|
if known_slugs:
|
||||||
|
wb_only = known_slugs - set(df_new["Slug"].tolist())
|
||||||
|
if wb_only:
|
||||||
|
df_wb_extra = pd.DataFrame([
|
||||||
|
{"Slug": s, "Category": "wayback", "Source": "purplecarrot_wayback"}
|
||||||
|
for s in wb_only
|
||||||
|
])
|
||||||
|
df_new = pd.concat([df_new, df_wb_extra], ignore_index=True)
|
||||||
|
|
||||||
|
args.out.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
df_new.to_parquet(args.out, index=False)
|
||||||
|
|
||||||
|
new_count = len(df_new)
|
||||||
|
cat_count = len(df_new[df_new["Source"] == "purplecarrot_category"])
|
||||||
|
print(f"\nDone — {new_count} total slugs saved to {args.out}")
|
||||||
|
print(f" {cat_count} from category pages, {new_count - cat_count} from Wayback only")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
301
scripts/pipeline/purple_carrot/discover_wayback.py
Normal file
301
scripts/pipeline/purple_carrot/discover_wayback.py
Normal file
|
|
@ -0,0 +1,301 @@
|
||||||
|
"""
|
||||||
|
discover_wayback.py — enumerate Purple Carrot recipe slugs via the Wayback Machine.
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
1. CDX API → all archived /api/v2/menus/* URLs (multiple timestamps)
|
||||||
|
2. Replay → fetch each menu's menuItems, extract productPath slugs
|
||||||
|
3. CDX API → all archived /api/v1/products/* URLs (direct slug capture)
|
||||||
|
4. CDX API → /recipe-categories/* HTML pages for older slugs
|
||||||
|
5. Deduplicate and write manifest to OUT_FILE
|
||||||
|
|
||||||
|
Output (JSONL, one record per recipe):
|
||||||
|
{"slug": "...", "title": "...", "subtitle": "...", "cook_time": "...",
|
||||||
|
"tags": [...], "serving_size": 2, "image_url": "...",
|
||||||
|
"wayback_ts": "20260412150557", "source": "menu|product_api|category_page"}
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
conda run -n cf python -m scripts.pipeline.purple_carrot.discover_wayback
|
||||||
|
conda run -n cf python -m scripts.pipeline.purple_carrot.discover_wayback --out /Library/Assets/kiwi/pipeline/pc_slugs.jsonl
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
CDX_BASE = "https://web.archive.org/cdx/search/cdx"
|
||||||
|
WB_BASE = "https://web.archive.org/web"
|
||||||
|
PC_HOST = "www.purplecarrot.com"
|
||||||
|
|
||||||
|
# Polite delay between Wayback replay fetches (seconds)
|
||||||
|
REPLAY_DELAY = 1.0
|
||||||
|
CDX_DELAY = 0.5
|
||||||
|
|
||||||
|
DEFAULT_OUT = Path("/Library/Assets/kiwi/pipeline/pc_slugs.jsonl")
|
||||||
|
|
||||||
|
|
||||||
|
# ── CDX helpers ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def cdx_query(url_pattern: str, **kwargs) -> list[dict]:
|
||||||
|
"""Run a CDX search and return a list of result dicts."""
|
||||||
|
params = {
|
||||||
|
"url": url_pattern,
|
||||||
|
"output": "json",
|
||||||
|
"fl": "original,timestamp,statuscode",
|
||||||
|
"collapse": "urlkey",
|
||||||
|
"filter": "statuscode:200",
|
||||||
|
**kwargs,
|
||||||
|
}
|
||||||
|
for attempt in range(3):
|
||||||
|
try:
|
||||||
|
resp = requests.get(CDX_BASE, params=params, timeout=30)
|
||||||
|
resp.raise_for_status()
|
||||||
|
rows = resp.json()
|
||||||
|
if not rows or len(rows) < 2:
|
||||||
|
return []
|
||||||
|
headers = rows[0]
|
||||||
|
return [dict(zip(headers, row)) for row in rows[1:]]
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("CDX attempt %d failed: %s", attempt + 1, exc)
|
||||||
|
time.sleep(2 ** attempt)
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def wayback_get(url: str, timestamp: str) -> Any | None:
|
||||||
|
"""Fetch a Wayback replay of a URL and return parsed JSON (or None)."""
|
||||||
|
replay_url = f"{WB_BASE}/{timestamp}/{url}"
|
||||||
|
for attempt in range(3):
|
||||||
|
try:
|
||||||
|
resp = requests.get(replay_url, timeout=30)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
return resp.json()
|
||||||
|
if resp.status_code == 404:
|
||||||
|
return None
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Wayback GET attempt %d failed for %s: %s", attempt + 1, url, exc)
|
||||||
|
time.sleep(2 ** attempt)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ── Slug extraction ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def slug_from_product_path(path: str) -> str | None:
|
||||||
|
"""'/recipe/foo-bar-baz' → 'foo-bar-baz'."""
|
||||||
|
if not path:
|
||||||
|
return None
|
||||||
|
return path.strip("/").split("/")[-1] or None
|
||||||
|
|
||||||
|
|
||||||
|
def _menu_item_to_record(item: dict, wayback_ts: str) -> dict | None:
|
||||||
|
slug = slug_from_product_path(item.get("productPath", ""))
|
||||||
|
if not slug:
|
||||||
|
return None
|
||||||
|
return {
|
||||||
|
"slug": slug,
|
||||||
|
"title": item.get("title", ""),
|
||||||
|
"subtitle": item.get("subtitle", ""),
|
||||||
|
"cook_time": item.get("cookTime", ""),
|
||||||
|
"tags": item.get("filterTags") or [],
|
||||||
|
"serving_size": item.get("servingSize"),
|
||||||
|
"image_url": item.get("imageURL", ""),
|
||||||
|
"description": item.get("description", ""),
|
||||||
|
"wayback_ts": wayback_ts,
|
||||||
|
"source": "menu",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Discovery passes ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def pass_menus(seen_slugs: set[str]) -> list[dict]:
|
||||||
|
"""Walk all archived /api/v2/menus/* captures to extract slugs."""
|
||||||
|
records: list[dict] = []
|
||||||
|
|
||||||
|
# Find all distinct archived menu URLs
|
||||||
|
menu_cdx = cdx_query(f"{PC_HOST}/api/v2/menus/*", limit="500")
|
||||||
|
logger.info("CDX: %d archived menu URLs found", len(menu_cdx))
|
||||||
|
time.sleep(CDX_DELAY)
|
||||||
|
|
||||||
|
processed_menu_ids: set[str] = set()
|
||||||
|
|
||||||
|
for entry in menu_cdx:
|
||||||
|
url = entry["original"]
|
||||||
|
ts = entry["timestamp"]
|
||||||
|
|
||||||
|
# Skip the listing endpoint, only process individual menus
|
||||||
|
if not url.split("?")[0].rstrip("/").split("/")[-1].isdigit():
|
||||||
|
continue
|
||||||
|
|
||||||
|
menu_id = url.split("?")[0].rstrip("/").split("/")[-1]
|
||||||
|
if menu_id in processed_menu_ids:
|
||||||
|
continue
|
||||||
|
processed_menu_ids.add(menu_id)
|
||||||
|
|
||||||
|
logger.info("Fetching menu %s (ts=%s) ...", menu_id, ts)
|
||||||
|
data = wayback_get(url.split("?")[0] + "?logged_out=true", ts)
|
||||||
|
time.sleep(REPLAY_DELAY)
|
||||||
|
|
||||||
|
if not data or "menuItems" not in data:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for item in data["menuItems"]:
|
||||||
|
rec = _menu_item_to_record(item, ts)
|
||||||
|
if rec and rec["slug"] not in seen_slugs:
|
||||||
|
seen_slugs.add(rec["slug"])
|
||||||
|
records.append(rec)
|
||||||
|
logger.debug(" + %s", rec["slug"])
|
||||||
|
|
||||||
|
logger.info(" %d new slugs (total so far: %d)", len(records), len(seen_slugs))
|
||||||
|
|
||||||
|
return records
|
||||||
|
|
||||||
|
|
||||||
|
def pass_product_api(seen_slugs: set[str]) -> list[dict]:
|
||||||
|
"""Pick up any directly archived /api/v1/products/* URLs the menu pass missed."""
|
||||||
|
records: list[dict] = []
|
||||||
|
|
||||||
|
product_cdx = cdx_query(f"{PC_HOST}/api/v1/products/*", limit="5000")
|
||||||
|
logger.info("CDX: %d archived product API URLs found", len(product_cdx))
|
||||||
|
time.sleep(CDX_DELAY)
|
||||||
|
|
||||||
|
for entry in product_cdx:
|
||||||
|
slug = entry["original"].rstrip("/").split("/")[-1]
|
||||||
|
if not slug or slug in seen_slugs:
|
||||||
|
continue
|
||||||
|
seen_slugs.add(slug)
|
||||||
|
records.append({
|
||||||
|
"slug": slug,
|
||||||
|
"title": "",
|
||||||
|
"subtitle": "",
|
||||||
|
"cook_time": "",
|
||||||
|
"tags": [],
|
||||||
|
"serving_size": None,
|
||||||
|
"image_url": "",
|
||||||
|
"description": "",
|
||||||
|
"wayback_ts": entry["timestamp"],
|
||||||
|
"source": "product_api",
|
||||||
|
})
|
||||||
|
|
||||||
|
logger.info("product_api pass: %d new slugs", len(records))
|
||||||
|
return records
|
||||||
|
|
||||||
|
|
||||||
|
def pass_category_pages(seen_slugs: set[str]) -> list[dict]:
|
||||||
|
"""Parse archived recipe-categories HTML pages for slugs not in the API.
|
||||||
|
|
||||||
|
Category pages are rendered SSR/with inline JSON state on older captures,
|
||||||
|
so we do a simple regex scan for /recipe/<slug> patterns.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
records: list[dict] = []
|
||||||
|
SLUG_RE = re.compile(r'["\s]/recipe/([a-z0-9][a-z0-9\-]{3,})["\s/?]')
|
||||||
|
|
||||||
|
cat_cdx = cdx_query(f"{PC_HOST}/recipe-categories/*", limit="200")
|
||||||
|
logger.info("CDX: %d archived category pages found", len(cat_cdx))
|
||||||
|
time.sleep(CDX_DELAY)
|
||||||
|
|
||||||
|
seen_category_urls: set[str] = set()
|
||||||
|
|
||||||
|
for entry in cat_cdx:
|
||||||
|
url = entry["original"].split("?")[0]
|
||||||
|
if url in seen_category_urls:
|
||||||
|
continue
|
||||||
|
seen_category_urls.add(url)
|
||||||
|
|
||||||
|
replay_url = f"{WB_BASE}/{entry['timestamp']}/{url}"
|
||||||
|
try:
|
||||||
|
resp = requests.get(replay_url, timeout=30)
|
||||||
|
time.sleep(REPLAY_DELAY)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
continue
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Category page fetch failed: %s", exc)
|
||||||
|
continue
|
||||||
|
|
||||||
|
for slug in SLUG_RE.findall(resp.text):
|
||||||
|
if slug in seen_slugs:
|
||||||
|
continue
|
||||||
|
seen_slugs.add(slug)
|
||||||
|
records.append({
|
||||||
|
"slug": slug,
|
||||||
|
"title": "",
|
||||||
|
"subtitle": "",
|
||||||
|
"cook_time": "",
|
||||||
|
"tags": [],
|
||||||
|
"serving_size": None,
|
||||||
|
"image_url": "",
|
||||||
|
"description": "",
|
||||||
|
"wayback_ts": entry["timestamp"],
|
||||||
|
"source": "category_page",
|
||||||
|
})
|
||||||
|
|
||||||
|
logger.info("category_pages pass: %d new slugs", len(records))
|
||||||
|
return records
|
||||||
|
|
||||||
|
|
||||||
|
# ── Main ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def discover(out_file: Path) -> None:
|
||||||
|
seen: set[str] = set()
|
||||||
|
|
||||||
|
# Load previously discovered slugs so reruns are incremental
|
||||||
|
existing: list[dict] = []
|
||||||
|
if out_file.exists():
|
||||||
|
with open(out_file) as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if line:
|
||||||
|
rec = json.loads(line)
|
||||||
|
seen.add(rec["slug"])
|
||||||
|
existing.append(rec)
|
||||||
|
logger.info("Loaded %d existing slugs from %s", len(seen), out_file)
|
||||||
|
|
||||||
|
new_records: list[dict] = []
|
||||||
|
new_records += pass_menus(seen)
|
||||||
|
new_records += pass_product_api(seen)
|
||||||
|
new_records += pass_category_pages(seen)
|
||||||
|
|
||||||
|
out_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(out_file, "a") as f:
|
||||||
|
for rec in new_records:
|
||||||
|
f.write(json.dumps(rec) + "\n")
|
||||||
|
|
||||||
|
total = len(existing) + len(new_records)
|
||||||
|
logger.info(
|
||||||
|
"Done. %d new slugs written to %s (%d total).",
|
||||||
|
len(new_records), out_file, total,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(description="Discover Purple Carrot recipe slugs via Wayback")
|
||||||
|
parser.add_argument(
|
||||||
|
"--out",
|
||||||
|
type=Path,
|
||||||
|
default=DEFAULT_OUT,
|
||||||
|
help=f"Output JSONL manifest (default: {DEFAULT_OUT})",
|
||||||
|
)
|
||||||
|
parser.add_argument("--debug", action="store_true")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.DEBUG if args.debug else logging.INFO,
|
||||||
|
format="%(asctime)s %(levelname)s %(name)s: %(message)s",
|
||||||
|
)
|
||||||
|
|
||||||
|
from scripts.pipeline.log_utils import attach_pipeline_log
|
||||||
|
attach_pipeline_log("discover_wayback")
|
||||||
|
|
||||||
|
discover(args.out)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
250
scripts/pipeline/purple_carrot/scrape_live.py
Normal file
250
scripts/pipeline/purple_carrot/scrape_live.py
Normal file
|
|
@ -0,0 +1,250 @@
|
||||||
|
"""Playwright scraper for live purplecarrot.com recipe pages.
|
||||||
|
|
||||||
|
Uses the slug inventory already in recipes_purplecarrot.parquet and fills in
|
||||||
|
the missing ingredients/instructions by hitting the live site directly.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
conda run -n cf python3 scripts/pipeline/purple_carrot/scrape_live.py \
|
||||||
|
[--out /Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet] \
|
||||||
|
[--delay 2.5] \
|
||||||
|
[--limit 20]
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from playwright.sync_api import sync_playwright, Page, TimeoutError as PWTimeout
|
||||||
|
|
||||||
|
# ── Config ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
BASE_URL = "https://www.purplecarrot.com/recipe/{slug}"
|
||||||
|
DEFAULT_OUT = Path("/Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet")
|
||||||
|
EXISTING_PARQUET = Path("/Library/Assets/kiwi/pipeline/recipes_purplecarrot.parquet")
|
||||||
|
|
||||||
|
RENDER_WAIT_MS = 2500 # JS render settle time
|
||||||
|
NAV_TIMEOUT_MS = 20_000
|
||||||
|
|
||||||
|
|
||||||
|
# ── Page parser ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _text(page: Page, selector: str) -> str:
|
||||||
|
el = page.query_selector(selector)
|
||||||
|
return el.inner_text().strip() if el else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _texts(page: Page, selector: str) -> list[str]:
|
||||||
|
return [el.inner_text().strip() for el in page.query_selector_all(selector)]
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_recipe(page: Page, slug: str, source_url: str) -> dict[str, Any] | None:
|
||||||
|
"""Extract structured recipe data from the rendered page."""
|
||||||
|
body = page.inner_text("body")
|
||||||
|
|
||||||
|
# Abort if we've been bounced to a generic listing / 404
|
||||||
|
if "Page Not Found" in body or slug not in page.url:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ── Title ──────────────────────────────────────────────────────────────────
|
||||||
|
# The <h1> on product pages tends to be the recipe name
|
||||||
|
title = (_text(page, "h1") or _text(page, "[class*='recipe-title']")).strip()
|
||||||
|
if not title:
|
||||||
|
# Fallback: first heading-like text before "Ingredients"
|
||||||
|
idx = body.find("Ingredients\n")
|
||||||
|
title = body[:idx].strip().splitlines()[-1] if idx > 0 else ""
|
||||||
|
|
||||||
|
# ── Ingredients / Instructions via body text ───────────────────────────────
|
||||||
|
ing_start = body.find("\nIngredients\n")
|
||||||
|
inst_start = body.find("\nInstructions\n")
|
||||||
|
footer_start = body.find("\nShop\n") # footer sentinel
|
||||||
|
|
||||||
|
if ing_start == -1:
|
||||||
|
return None # page didn't render recipe content
|
||||||
|
|
||||||
|
raw_ingredients: list[str] = []
|
||||||
|
raw_instructions: list[str] = []
|
||||||
|
|
||||||
|
if ing_start != -1 and inst_start != -1:
|
||||||
|
ing_block = body[ing_start + len("\nIngredients\n"):inst_start].strip()
|
||||||
|
raw_ingredients = [l.strip() for l in ing_block.splitlines() if l.strip()]
|
||||||
|
|
||||||
|
if inst_start != -1:
|
||||||
|
end = footer_start if footer_start > inst_start else len(body)
|
||||||
|
inst_block = body[inst_start + len("\nInstructions\n"):end].strip()
|
||||||
|
# Steps start with a digit
|
||||||
|
steps: list[str] = []
|
||||||
|
current: list[str] = []
|
||||||
|
for line in inst_block.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
if re.match(r"^\d+$", line):
|
||||||
|
if current:
|
||||||
|
steps.append(" ".join(current))
|
||||||
|
current = []
|
||||||
|
elif line.startswith("CULINARY NOTES"):
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
current.append(line)
|
||||||
|
if current:
|
||||||
|
steps.append(" ".join(current))
|
||||||
|
raw_instructions = steps
|
||||||
|
|
||||||
|
# ── Nutrition ──────────────────────────────────────────────────────────────
|
||||||
|
def _extract_num(pattern: str) -> float | None:
|
||||||
|
m = re.search(pattern, body)
|
||||||
|
try:
|
||||||
|
return float(m.group(1)) if m else None
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
cal = _extract_num(r"(\d+)\s*CAL")
|
||||||
|
fat = _extract_num(r"(\d+(?:\.\d+)?)g\s*FAT")
|
||||||
|
carbs = _extract_num(r"(\d+(?:\.\d+)?)g\s*CARBS")
|
||||||
|
prot = _extract_num(r"(\d+(?:\.\d+)?)g\s*PROTEIN")
|
||||||
|
fiber = _extract_num(r"(\d+(?:\.\d+)?)g\s*FIBER")
|
||||||
|
|
||||||
|
# ── Allergens / tags ───────────────────────────────────────────────────────
|
||||||
|
allergen_m = re.search(r"Allergens?:\s*([^\n]+)", body)
|
||||||
|
allergens = allergen_m.group(1).strip() if allergen_m else ""
|
||||||
|
|
||||||
|
# Feature tags like HIGH-PROTEIN, QUICK, etc. appear before Ingredients
|
||||||
|
pre_ing = body[:ing_start]
|
||||||
|
tags = re.findall(r"\b(HIGH-PROTEIN|QUICK|SPICY|LOW[\-\s]CALORIE|VEGAN|FAMILY\s+FRIENDLY)\b", pre_ing)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"Slug": slug,
|
||||||
|
"Name": title,
|
||||||
|
"SourceURL": source_url,
|
||||||
|
"Source": "purplecarrot_live",
|
||||||
|
"RecipeIngredientParts": raw_ingredients,
|
||||||
|
"RecipeInstructions": raw_instructions,
|
||||||
|
"Calories": cal,
|
||||||
|
"FatContent": fat,
|
||||||
|
"CarbohydrateContent": carbs,
|
||||||
|
"ProteinContent": prot,
|
||||||
|
"FiberContent": fiber,
|
||||||
|
"Allergens": allergens,
|
||||||
|
"Keywords": tags,
|
||||||
|
"HasFullRecipe": bool(raw_ingredients and raw_instructions),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Main ───────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--out", type=Path, default=DEFAULT_OUT)
|
||||||
|
parser.add_argument("--delay", type=float, default=2.5,
|
||||||
|
help="Seconds between requests (be polite)")
|
||||||
|
parser.add_argument("--limit", type=int, default=0,
|
||||||
|
help="Stop after N slugs (0 = all)")
|
||||||
|
parser.add_argument("--resume", action="store_true",
|
||||||
|
help="Skip slugs already present in --out")
|
||||||
|
parser.add_argument("--slugs-from", type=Path, default=None,
|
||||||
|
help="Read slug inventory from this parquet instead of the default Wayback one")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Load slug inventory — either from a custom parquet or the default Wayback run
|
||||||
|
slugs_parquet = args.slugs_from if args.slugs_from else EXISTING_PARQUET
|
||||||
|
df_existing = pd.read_parquet(slugs_parquet)
|
||||||
|
slugs = df_existing["Slug"].dropna().unique().tolist()
|
||||||
|
# source_urls may not be present in custom parcets — fall back to constructing from slug
|
||||||
|
if "SourceURL" in df_existing.columns:
|
||||||
|
source_urls = dict(zip(df_existing["Slug"], df_existing["SourceURL"]))
|
||||||
|
else:
|
||||||
|
source_urls = {s: BASE_URL.format(slug=s) for s in slugs}
|
||||||
|
|
||||||
|
# Resume support
|
||||||
|
done_slugs: set[str] = set()
|
||||||
|
if args.resume and args.out.exists():
|
||||||
|
df_done = pd.read_parquet(args.out)
|
||||||
|
done_slugs = set(df_done["Slug"].dropna().tolist())
|
||||||
|
print(f"Resuming — {len(done_slugs)} slugs already scraped")
|
||||||
|
|
||||||
|
if args.limit:
|
||||||
|
slugs = slugs[: args.limit]
|
||||||
|
|
||||||
|
results: list[dict[str, Any]] = []
|
||||||
|
skipped = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
_UA = (
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||||
|
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch(headless=True)
|
||||||
|
|
||||||
|
for i, slug in enumerate(slugs):
|
||||||
|
if slug in done_slugs:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
url = BASE_URL.format(slug=slug)
|
||||||
|
print(f"[{i+1}/{len(slugs)}] {slug} … ", end="", flush=True)
|
||||||
|
|
||||||
|
# Use a fresh browser context per slug to avoid Cloudflare session-level
|
||||||
|
# bot detection, which fires on the 2nd+ request in the same context.
|
||||||
|
context = browser.new_context(
|
||||||
|
user_agent=_UA,
|
||||||
|
viewport={"width": 1280, "height": 900},
|
||||||
|
)
|
||||||
|
page = context.new_page()
|
||||||
|
|
||||||
|
try:
|
||||||
|
page.goto(url, timeout=NAV_TIMEOUT_MS, wait_until="domcontentloaded")
|
||||||
|
page.wait_for_timeout(RENDER_WAIT_MS)
|
||||||
|
recipe = _parse_recipe(page, slug, source_urls.get(slug, url))
|
||||||
|
except PWTimeout:
|
||||||
|
print("TIMEOUT")
|
||||||
|
failed += 1
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"ERROR: {exc}")
|
||||||
|
failed += 1
|
||||||
|
else:
|
||||||
|
if recipe is None:
|
||||||
|
print("no content (404 or redirect)")
|
||||||
|
failed += 1
|
||||||
|
elif recipe["HasFullRecipe"]:
|
||||||
|
n = len(recipe["RecipeIngredientParts"])
|
||||||
|
s = len(recipe["RecipeInstructions"])
|
||||||
|
print(f"OK ({n} ingredients, {s} steps)")
|
||||||
|
results.append(recipe)
|
||||||
|
else:
|
||||||
|
print(f"partial (ings={len(recipe['RecipeIngredientParts'])}, steps={len(recipe['RecipeInstructions'])})")
|
||||||
|
results.append(recipe)
|
||||||
|
finally:
|
||||||
|
context.close()
|
||||||
|
|
||||||
|
time.sleep(args.delay)
|
||||||
|
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
print(f"\nDone — {len(results)} scraped, {skipped} skipped, {failed} failed")
|
||||||
|
|
||||||
|
if results:
|
||||||
|
df_out = pd.DataFrame(results)
|
||||||
|
# Merge with existing metadata (nutrition stubs, wayback fields) for slugs
|
||||||
|
# that didn't previously have full data
|
||||||
|
args.out.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
if args.resume and args.out.exists():
|
||||||
|
df_prev = pd.read_parquet(args.out)
|
||||||
|
df_out = pd.concat([df_prev, df_out], ignore_index=True)
|
||||||
|
df_out = df_out.drop_duplicates(subset=["Slug"], keep="last")
|
||||||
|
df_out.to_parquet(args.out, index=False)
|
||||||
|
full_count = df_out["HasFullRecipe"].sum() if "HasFullRecipe" in df_out.columns else "?"
|
||||||
|
print(f"Saved {len(df_out)} rows to {args.out} ({full_count} with full recipes)")
|
||||||
|
else:
|
||||||
|
print("No results — output not written")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
538
scripts/pipeline/purple_carrot/scrape_recipes.py
Normal file
538
scripts/pipeline/purple_carrot/scrape_recipes.py
Normal file
|
|
@ -0,0 +1,538 @@
|
||||||
|
"""
|
||||||
|
scrape_recipes.py — fetch full recipe data for slugs in pc_slugs.jsonl.
|
||||||
|
|
||||||
|
For each slug:
|
||||||
|
1. Try Wayback /api/v1/products/<slug> — oldest capture first (pre-HelloFresh
|
||||||
|
acquisition data is more complete).
|
||||||
|
2. If instructions are empty, try the recipe HTML page via Wayback and parse
|
||||||
|
inline JSON state or structured markup.
|
||||||
|
3. Merge with metadata already in the manifest (title, tags, cook_time, etc.)
|
||||||
|
4. Emit one row per recipe to recipes_purplecarrot.parquet in food.com columnar
|
||||||
|
format so build_recipe_index.py can import it unchanged.
|
||||||
|
|
||||||
|
Output columns (food.com schema + PC extras ignored by the indexer):
|
||||||
|
RecipeId, Name, Subtitle, RecipeIngredientParts, RecipeInstructions,
|
||||||
|
RecipeCategory, Keywords, Calories, FatContent, ProteinContent,
|
||||||
|
SodiumContent, SugarContent, CarbohydrateContent, FiberContent,
|
||||||
|
RecipeServings, Description, ImageURL, CookTime, Slug, Source
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
conda run -n cf python -m scripts.pipeline.purple_carrot.scrape_recipes
|
||||||
|
conda run -n cf python -m scripts.pipeline.purple_carrot.scrape_recipes \\
|
||||||
|
--slugs /Library/Assets/kiwi/pipeline/pc_slugs.jsonl \\
|
||||||
|
--out /Library/Assets/kiwi/pipeline/recipes_purplecarrot.parquet \\
|
||||||
|
--resume
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
CDX_BASE = "https://web.archive.org/cdx/search/cdx"
|
||||||
|
WB_BASE = "https://web.archive.org/web"
|
||||||
|
PC_HOST = "www.purplecarrot.com"
|
||||||
|
|
||||||
|
REPLAY_DELAY = 2.0
|
||||||
|
CDX_DELAY = 3.0 # archive.org CDX rate-limits aggressively; be polite
|
||||||
|
|
||||||
|
DEFAULT_SLUGS = Path("/Library/Assets/kiwi/pipeline/pc_slugs.jsonl")
|
||||||
|
DEFAULT_OUT = Path("/Library/Assets/kiwi/pipeline/recipes_purplecarrot.parquet")
|
||||||
|
|
||||||
|
# Inline JSON state embedded by the SSR renderer — used as fallback HTML parser
|
||||||
|
_NEXT_DATA_RE = re.compile(r'<script id="__NEXT_DATA__"[^>]*>(.*?)</script>', re.DOTALL)
|
||||||
|
_REDUX_STATE_RE = re.compile(r'window\.__INITIAL_STATE__\s*=\s*(\{.*?\});\s*\n', re.DOTALL)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Wayback helpers ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _cdx_get(params: dict) -> list:
|
||||||
|
"""CDX request with retry on 429/503 (archive.org rate-limits aggressively)."""
|
||||||
|
for attempt in range(4):
|
||||||
|
try:
|
||||||
|
resp = requests.get(CDX_BASE, params=params, timeout=25)
|
||||||
|
if resp.status_code in (429, 503):
|
||||||
|
wait = 15 * (2 ** attempt)
|
||||||
|
logger.debug("CDX %s — backing off %ds", resp.status_code, wait)
|
||||||
|
time.sleep(wait)
|
||||||
|
continue
|
||||||
|
resp.raise_for_status()
|
||||||
|
rows = resp.json()
|
||||||
|
return rows if rows else []
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("CDX attempt %d failed: %s", attempt + 1, exc)
|
||||||
|
time.sleep(5 * (attempt + 1))
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _cdx_timestamps(slug: str) -> list[str]:
|
||||||
|
"""Return captured timestamps for a product slug, oldest first (pre-2022 window)."""
|
||||||
|
rows = _cdx_get({
|
||||||
|
"url": f"{PC_HOST}/api/v1/products/{slug}",
|
||||||
|
"output": "json",
|
||||||
|
"fl": "timestamp,statuscode",
|
||||||
|
"filter": "statuscode:200",
|
||||||
|
"limit": "20",
|
||||||
|
# Pre-HelloFresh-acquisition captures (2019-2021) are most likely
|
||||||
|
# to have full instructions — API stripped them post-acquisition.
|
||||||
|
"from": "20190101",
|
||||||
|
"to": "20211231",
|
||||||
|
})
|
||||||
|
if len(rows) < 2:
|
||||||
|
return []
|
||||||
|
return [row[0] for row in rows[1:]] # timestamps only, oldest first
|
||||||
|
|
||||||
|
|
||||||
|
def _wayback_json(url: str, timestamp: str) -> Any | None:
|
||||||
|
replay = f"{WB_BASE}/{timestamp}/{url}"
|
||||||
|
for attempt in range(3):
|
||||||
|
try:
|
||||||
|
resp = requests.get(replay, timeout=30)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
return resp.json()
|
||||||
|
if resp.status_code in (404, 410):
|
||||||
|
return None
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("Wayback JSON attempt %d failed (%s): %s", attempt + 1, url, exc)
|
||||||
|
time.sleep(2 ** attempt)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _wayback_html(url: str, timestamp: str) -> str | None:
|
||||||
|
replay = f"{WB_BASE}/{timestamp}/{url}"
|
||||||
|
for attempt in range(3):
|
||||||
|
try:
|
||||||
|
resp = requests.get(replay, timeout=30)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
return resp.text
|
||||||
|
if resp.status_code in (404, 410):
|
||||||
|
return None
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("Wayback HTML attempt %d failed (%s): %s", attempt + 1, url, exc)
|
||||||
|
time.sleep(2 ** attempt)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ── Recipe extraction from API JSON ──────────────────────────────────────────
|
||||||
|
|
||||||
|
def _extract_from_api(data: dict) -> dict | None:
|
||||||
|
"""Parse a /api/v1/products/<slug> response into our recipe dict.
|
||||||
|
|
||||||
|
Returns None if the response has no usable content (empty title, etc.).
|
||||||
|
Returns a partial dict if only some fields are populated — caller merges
|
||||||
|
with manifest metadata.
|
||||||
|
"""
|
||||||
|
if not data or not isinstance(data, dict):
|
||||||
|
return None
|
||||||
|
|
||||||
|
title = data.get("title", "").strip()
|
||||||
|
subtitle = data.get("subtitle", "").strip()
|
||||||
|
slug = data.get("slug", "")
|
||||||
|
|
||||||
|
skus = data.get("skus") or []
|
||||||
|
sku = skus[0] if skus else {}
|
||||||
|
|
||||||
|
# Instructions: list of {step_number, title, description}
|
||||||
|
raw_instructions = sku.get("instructions") or []
|
||||||
|
steps: list[str] = []
|
||||||
|
for step in sorted(raw_instructions, key=lambda s: s.get("step_number", 0)):
|
||||||
|
parts = []
|
||||||
|
if step.get("title"):
|
||||||
|
parts.append(step["title"])
|
||||||
|
if step.get("description"):
|
||||||
|
parts.append(step["description"])
|
||||||
|
if parts:
|
||||||
|
steps.append(". ".join(parts))
|
||||||
|
|
||||||
|
# Ingredients: may be in ingredients_quantity or ingredients
|
||||||
|
raw_ingr = sku.get("ingredients_quantity") or sku.get("ingredients") or []
|
||||||
|
ingredients: list[str] = []
|
||||||
|
for item in raw_ingr:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
qty = item.get("quantity") or item.get("qty") or ""
|
||||||
|
unit = item.get("unit") or ""
|
||||||
|
name = item.get("name") or item.get("ingredient", {}).get("name", "") if isinstance(item.get("ingredient"), dict) else item.get("ingredient_name", "")
|
||||||
|
raw = item.get("raw") or item.get("display_name") or ""
|
||||||
|
line = raw or " ".join(filter(None, [str(qty), str(unit), str(name)])).strip()
|
||||||
|
if line:
|
||||||
|
ingredients.append(line)
|
||||||
|
elif isinstance(item, str) and item.strip():
|
||||||
|
ingredients.append(item.strip())
|
||||||
|
|
||||||
|
nutrition = sku.get("nutrition_label") or {}
|
||||||
|
calories = _num(nutrition.get("calories") or sku.get("calories"))
|
||||||
|
fat = _num(nutrition.get("total_fat") or sku.get("fat"))
|
||||||
|
protein = _num(nutrition.get("protein") or sku.get("protein"))
|
||||||
|
sodium = _num(nutrition.get("sodium") or sku.get("sodium"))
|
||||||
|
sugar = _num(nutrition.get("sugar") or nutrition.get("total_sugars"))
|
||||||
|
carbs = _num(nutrition.get("total_carbohydrate") or sku.get("carbs"))
|
||||||
|
fiber = _num(nutrition.get("dietary_fiber") or sku.get("fiber"))
|
||||||
|
|
||||||
|
tags = sku.get("tags") or data.get("tags") or []
|
||||||
|
category = sku.get("meal_type") or sku.get("product_type") or ""
|
||||||
|
servings = _num(sku.get("servings"))
|
||||||
|
|
||||||
|
cook_time = sku.get("prep_and_cook_time") or ""
|
||||||
|
description = sku.get("description") or ""
|
||||||
|
|
||||||
|
images = sku.get("hero_images") or sku.get("image_versions") or []
|
||||||
|
# hero_images can be a list OR a dict keyed by size string — normalise to list
|
||||||
|
if isinstance(images, dict):
|
||||||
|
images = list(images.values())
|
||||||
|
image_url = ""
|
||||||
|
if images and isinstance(images[0], dict):
|
||||||
|
image_url = images[0].get("image_url") or images[0].get("url") or ""
|
||||||
|
if not image_url and data.get("square_image"):
|
||||||
|
sq = data["square_image"]
|
||||||
|
image_url = sq.get("url") if isinstance(sq, dict) else ""
|
||||||
|
|
||||||
|
return {
|
||||||
|
"slug": slug,
|
||||||
|
"title": title,
|
||||||
|
"subtitle": subtitle,
|
||||||
|
"steps": steps,
|
||||||
|
"ingredients": ingredients,
|
||||||
|
"category": category,
|
||||||
|
"tags": tags,
|
||||||
|
"calories": calories,
|
||||||
|
"fat": fat,
|
||||||
|
"protein": protein,
|
||||||
|
"sodium": sodium,
|
||||||
|
"sugar": sugar,
|
||||||
|
"carbs": carbs,
|
||||||
|
"fiber": fiber,
|
||||||
|
"servings": servings,
|
||||||
|
"cook_time": cook_time,
|
||||||
|
"description": description,
|
||||||
|
"image_url": image_url,
|
||||||
|
"has_full_recipe": bool(steps and ingredients),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _num(val: Any) -> float | None:
|
||||||
|
if val is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
v = float(str(val).replace("g", "").replace("mg", "").split()[0])
|
||||||
|
return v if v > 0 else None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ── Fallback: HTML inline state parsing ──────────────────────────────────────
|
||||||
|
|
||||||
|
def _extract_from_html(html: str, slug: str) -> dict | None:
|
||||||
|
"""Try to pull recipe data from inline JS state in older SSR pages."""
|
||||||
|
# Attempt 1: Next.js __NEXT_DATA__
|
||||||
|
m = _NEXT_DATA_RE.search(html)
|
||||||
|
if m:
|
||||||
|
try:
|
||||||
|
state = json.loads(m.group(1))
|
||||||
|
# Walk the Next.js page props tree looking for recipe data
|
||||||
|
props = state.get("props", {}).get("pageProps", {})
|
||||||
|
recipe = props.get("recipe") or props.get("product")
|
||||||
|
if recipe and isinstance(recipe, dict) and recipe.get("title"):
|
||||||
|
return _extract_from_api(recipe)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Attempt 2: Redux __INITIAL_STATE__
|
||||||
|
m = _REDUX_STATE_RE.search(html)
|
||||||
|
if m:
|
||||||
|
try:
|
||||||
|
state = json.loads(m.group(1))
|
||||||
|
# Try common Redux state shapes
|
||||||
|
for key in ("recipe", "product", "currentRecipe", "currentProduct"):
|
||||||
|
recipe = state.get(key)
|
||||||
|
if recipe and isinstance(recipe, dict) and recipe.get("title"):
|
||||||
|
return _extract_from_api(recipe)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Attempt 3: JSON-LD structured data
|
||||||
|
ld_matches = re.findall(
|
||||||
|
r'<script[^>]+type=["\']application/ld\+json["\'][^>]*>(.*?)</script>',
|
||||||
|
html, re.DOTALL
|
||||||
|
)
|
||||||
|
for raw in ld_matches:
|
||||||
|
try:
|
||||||
|
ld = json.loads(raw)
|
||||||
|
if isinstance(ld, list):
|
||||||
|
ld = next((x for x in ld if x.get("@type") == "Recipe"), None)
|
||||||
|
if not ld or ld.get("@type") != "Recipe":
|
||||||
|
continue
|
||||||
|
steps = []
|
||||||
|
for inst in (ld.get("recipeInstructions") or []):
|
||||||
|
if isinstance(inst, dict):
|
||||||
|
steps.append(inst.get("text", ""))
|
||||||
|
elif isinstance(inst, str):
|
||||||
|
steps.append(inst)
|
||||||
|
ingredients = ld.get("recipeIngredient") or []
|
||||||
|
return {
|
||||||
|
"slug": slug,
|
||||||
|
"title": ld.get("name", ""),
|
||||||
|
"subtitle": "",
|
||||||
|
"steps": [s for s in steps if s],
|
||||||
|
"ingredients": [i for i in ingredients if i],
|
||||||
|
"category": ld.get("recipeCategory", ""),
|
||||||
|
"tags": ld.get("keywords", "").split(",") if isinstance(ld.get("keywords"), str) else [],
|
||||||
|
"calories": _num((ld.get("nutrition") or {}).get("calories")),
|
||||||
|
"fat": None, "protein": None, "sodium": None,
|
||||||
|
"sugar": None, "carbs": None, "fiber": None,
|
||||||
|
"servings": _num(ld.get("recipeYield")),
|
||||||
|
"cook_time": str(ld.get("totalTime") or ld.get("cookTime") or ""),
|
||||||
|
"description": ld.get("description", ""),
|
||||||
|
"image_url": (ld["image"][0] if isinstance(ld.get("image"), list) else ld.get("image", "")) or "",
|
||||||
|
"has_full_recipe": True,
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ── Per-slug fetch ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def fetch_recipe(slug: str, manifest_meta: dict) -> dict | None:
|
||||||
|
"""Fetch the fullest available recipe data for a slug from Wayback.
|
||||||
|
|
||||||
|
Returns a merged dict of manifest metadata + API/HTML-extracted content.
|
||||||
|
"""
|
||||||
|
api_url = f"https://{PC_HOST}/api/v1/products/{slug}"
|
||||||
|
html_url = f"https://{PC_HOST}/recipe/{slug}"
|
||||||
|
|
||||||
|
recipe: dict | None = None
|
||||||
|
|
||||||
|
# Try product API — oldest captures are most likely to have full data
|
||||||
|
timestamps = _cdx_timestamps(slug)
|
||||||
|
time.sleep(CDX_DELAY)
|
||||||
|
|
||||||
|
if not timestamps and manifest_meta.get("wayback_ts"):
|
||||||
|
timestamps = [manifest_meta["wayback_ts"]]
|
||||||
|
|
||||||
|
for ts in timestamps:
|
||||||
|
data = _wayback_json(api_url, ts)
|
||||||
|
time.sleep(REPLAY_DELAY)
|
||||||
|
if not data:
|
||||||
|
continue
|
||||||
|
candidate = _extract_from_api(data)
|
||||||
|
if not candidate:
|
||||||
|
continue
|
||||||
|
recipe = candidate
|
||||||
|
if recipe.get("has_full_recipe"):
|
||||||
|
logger.debug("[%s] Full recipe from API (ts=%s)", slug, ts)
|
||||||
|
break
|
||||||
|
logger.debug("[%s] Partial API data (ts=%s) — trying HTML fallback", slug, ts)
|
||||||
|
|
||||||
|
# HTML fallback when API has no steps/ingredients
|
||||||
|
if not recipe or not recipe.get("has_full_recipe"):
|
||||||
|
html_ts_rows = _cdx_get({
|
||||||
|
"url": f"{PC_HOST}/recipe/{slug}",
|
||||||
|
"output": "json",
|
||||||
|
"fl": "timestamp,statuscode",
|
||||||
|
"filter": "statuscode:200",
|
||||||
|
"limit": "10",
|
||||||
|
})
|
||||||
|
html_timestamps = [row[0] for row in html_ts_rows[1:]] if len(html_ts_rows) > 1 else []
|
||||||
|
time.sleep(CDX_DELAY)
|
||||||
|
|
||||||
|
for ts in html_timestamps:
|
||||||
|
html = _wayback_html(html_url, ts)
|
||||||
|
time.sleep(REPLAY_DELAY)
|
||||||
|
if not html:
|
||||||
|
continue
|
||||||
|
html_recipe = _extract_from_html(html, slug)
|
||||||
|
if html_recipe and html_recipe.get("has_full_recipe"):
|
||||||
|
logger.debug("[%s] Full recipe from HTML (ts=%s)", slug, ts)
|
||||||
|
recipe = html_recipe
|
||||||
|
break
|
||||||
|
|
||||||
|
# Build merged record: manifest metadata fills any gaps from API/HTML
|
||||||
|
merged: dict = {
|
||||||
|
"slug": slug,
|
||||||
|
"title": manifest_meta.get("title", ""),
|
||||||
|
"subtitle": manifest_meta.get("subtitle", ""),
|
||||||
|
"steps": [],
|
||||||
|
"ingredients": [],
|
||||||
|
"category": "",
|
||||||
|
"tags": manifest_meta.get("tags") or [],
|
||||||
|
"calories": None,
|
||||||
|
"fat": None,
|
||||||
|
"protein": None,
|
||||||
|
"sodium": None,
|
||||||
|
"sugar": None,
|
||||||
|
"carbs": None,
|
||||||
|
"fiber": None,
|
||||||
|
"servings": manifest_meta.get("serving_size"),
|
||||||
|
"cook_time": manifest_meta.get("cook_time", ""),
|
||||||
|
"description": manifest_meta.get("description", ""),
|
||||||
|
"image_url": manifest_meta.get("image_url", ""),
|
||||||
|
"source": "purple_carrot",
|
||||||
|
"wayback_ts": manifest_meta.get("wayback_ts", ""),
|
||||||
|
"has_full_recipe": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
if recipe:
|
||||||
|
for key in recipe:
|
||||||
|
# Prefer API/HTML data; keep manifest value only when API field is empty
|
||||||
|
val = recipe[key]
|
||||||
|
if val or key not in merged or not merged[key]:
|
||||||
|
merged[key] = val
|
||||||
|
|
||||||
|
if not merged["title"]:
|
||||||
|
logger.warning("[%s] No title — skipping", slug)
|
||||||
|
return None
|
||||||
|
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
# ── Output formatting ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _to_dataframe_row(r: dict) -> dict:
|
||||||
|
"""Convert merged recipe dict to food.com-compatible parquet row."""
|
||||||
|
# Build plain-text input for allrecipes-style corpus compatibility
|
||||||
|
lines = [r["title"]]
|
||||||
|
if r.get("subtitle"):
|
||||||
|
lines.append(r["subtitle"])
|
||||||
|
if r.get("description"):
|
||||||
|
lines.append("")
|
||||||
|
lines.append(r["description"])
|
||||||
|
if r.get("ingredients"):
|
||||||
|
lines += ["", "Ingredients:"] + [f"- {i}" for i in r["ingredients"]]
|
||||||
|
if r.get("steps"):
|
||||||
|
lines += ["", "Directions:"] + [f"- {s}" for s in r["steps"]]
|
||||||
|
plain_text = "\n".join(lines)
|
||||||
|
|
||||||
|
source_url = f"https://www.purplecarrot.com/recipe/{r['slug']}"
|
||||||
|
|
||||||
|
return {
|
||||||
|
# food.com schema columns (used by build_recipe_index.py)
|
||||||
|
"RecipeId": f"pc_{r['slug']}",
|
||||||
|
"Name": r["title"],
|
||||||
|
"RecipeIngredientParts": r.get("ingredients") or [],
|
||||||
|
"RecipeInstructions": r.get("steps") or [],
|
||||||
|
"RecipeCategory": r.get("category", ""),
|
||||||
|
"Keywords": r.get("tags") or [],
|
||||||
|
"Calories": r.get("calories"),
|
||||||
|
"FatContent": r.get("fat"),
|
||||||
|
"ProteinContent": r.get("protein"),
|
||||||
|
"SodiumContent": r.get("sodium"),
|
||||||
|
"SugarContent": r.get("sugar"),
|
||||||
|
"CarbohydrateContent": r.get("carbs"),
|
||||||
|
"FiberContent": r.get("fiber"),
|
||||||
|
"RecipeServings": r.get("servings"),
|
||||||
|
# PC-specific extras (ignored by indexer, used by training pipeline)
|
||||||
|
"Subtitle": r.get("subtitle", ""),
|
||||||
|
"Description": r.get("description", ""),
|
||||||
|
"ImageURL": r.get("image_url", ""),
|
||||||
|
"CookTime": r.get("cook_time", ""),
|
||||||
|
"Slug": r["slug"],
|
||||||
|
"Source": "purple_carrot",
|
||||||
|
"SourceURL": source_url, # canonical attribution link shown in recipe UI
|
||||||
|
"HasFullRecipe": r.get("has_full_recipe", False),
|
||||||
|
"WaybackTs": r.get("wayback_ts", ""),
|
||||||
|
# Also emit plain-text input for allrecipes-compatible corpus search
|
||||||
|
"input": plain_text,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Main ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def scrape(slugs_file: Path, out_file: Path, resume: bool = True) -> None:
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# Load manifest
|
||||||
|
if not slugs_file.exists():
|
||||||
|
logger.error("Slugs manifest not found: %s", slugs_file)
|
||||||
|
return
|
||||||
|
|
||||||
|
manifest: dict[str, dict] = {}
|
||||||
|
with open(slugs_file) as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if line:
|
||||||
|
rec = json.loads(line)
|
||||||
|
slug = rec["slug"]
|
||||||
|
# Keep the richest metadata if slug appears from multiple sources
|
||||||
|
if slug not in manifest or rec.get("source") == "menu":
|
||||||
|
manifest[slug] = rec
|
||||||
|
|
||||||
|
logger.info("Manifest: %d unique slugs", len(manifest))
|
||||||
|
|
||||||
|
# Load already-scraped slugs for resume
|
||||||
|
done_slugs: set[str] = set()
|
||||||
|
existing_rows: list[dict] = []
|
||||||
|
if resume and out_file.exists():
|
||||||
|
try:
|
||||||
|
existing_df = pd.read_parquet(out_file)
|
||||||
|
done_slugs = set(existing_df["Slug"].tolist())
|
||||||
|
existing_rows = existing_df.to_dict("records")
|
||||||
|
logger.info("Resume: %d already scraped", len(done_slugs))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Could not load existing parquet for resume: %s", exc)
|
||||||
|
|
||||||
|
todo = [s for s in manifest if s not in done_slugs]
|
||||||
|
logger.info("%d slugs to fetch", len(todo))
|
||||||
|
|
||||||
|
rows = list(existing_rows)
|
||||||
|
for i, slug in enumerate(todo, 1):
|
||||||
|
logger.info("[%d/%d] %s", i, len(todo), slug)
|
||||||
|
recipe = fetch_recipe(slug, manifest[slug])
|
||||||
|
if recipe:
|
||||||
|
rows.append(_to_dataframe_row(recipe))
|
||||||
|
status = "full" if recipe.get("has_full_recipe") else "partial"
|
||||||
|
logger.info(" -> %s (%s)", recipe.get("title", "?"), status)
|
||||||
|
else:
|
||||||
|
logger.warning(" -> skipped (no title)")
|
||||||
|
|
||||||
|
# Write checkpoint every 50 recipes
|
||||||
|
if i % 50 == 0:
|
||||||
|
_write_parquet(rows, out_file)
|
||||||
|
logger.info("Checkpoint: %d recipes written", len(rows))
|
||||||
|
|
||||||
|
_write_parquet(rows, out_file)
|
||||||
|
full = sum(1 for r in rows if r.get("HasFullRecipe"))
|
||||||
|
logger.info(
|
||||||
|
"Done. %d recipes written to %s (%d full, %d partial).",
|
||||||
|
len(rows), out_file, full, len(rows) - full,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _write_parquet(rows: list[dict], out_file: Path) -> None:
|
||||||
|
import pandas as pd
|
||||||
|
out_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
pd.DataFrame(rows).to_parquet(out_file, index=False)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(description="Scrape Purple Carrot recipes from Wayback")
|
||||||
|
parser.add_argument("--slugs", type=Path, default=DEFAULT_SLUGS)
|
||||||
|
parser.add_argument("--out", type=Path, default=DEFAULT_OUT)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-resume", dest="resume", action="store_false",
|
||||||
|
help="Start fresh (ignore existing parquet)",
|
||||||
|
)
|
||||||
|
parser.add_argument("--debug", action="store_true")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.DEBUG if args.debug else logging.INFO,
|
||||||
|
format="%(asctime)s %(levelname)s %(name)s: %(message)s",
|
||||||
|
)
|
||||||
|
|
||||||
|
from scripts.pipeline.log_utils import attach_pipeline_log
|
||||||
|
attach_pipeline_log("scrape_recipes")
|
||||||
|
|
||||||
|
scrape(args.slugs, args.out, resume=args.resume)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
41
scripts/pipeline/purple_carrot/weekly_harvest.sh
Executable file
41
scripts/pipeline/purple_carrot/weekly_harvest.sh
Executable file
|
|
@ -0,0 +1,41 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# Weekly Purple Carrot recipe harvest
|
||||||
|
# Runs every Sunday night via cron.
|
||||||
|
# Discovers this week's menu and scrapes full recipe data.
|
||||||
|
# Logs to /Library/Assets/kiwi/pipeline/logs/purple_carrot_harvest.log
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
REPO="/Library/Development/CircuitForge/kiwi"
|
||||||
|
MENU_OUT="/Library/Assets/kiwi/pipeline/recipes_purplecarrot_menu.parquet"
|
||||||
|
LIVE_OUT="/Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet"
|
||||||
|
LOG_DIR="/Library/Assets/kiwi/pipeline/logs"
|
||||||
|
LOG="$LOG_DIR/purple_carrot_harvest.log"
|
||||||
|
|
||||||
|
mkdir -p "$LOG_DIR"
|
||||||
|
|
||||||
|
echo "=== Purple Carrot harvest $(date -u '+%Y-%m-%d %H:%M UTC') ===" >> "$LOG"
|
||||||
|
|
||||||
|
cd "$REPO"
|
||||||
|
|
||||||
|
# Step 1: discover this week's menu slugs
|
||||||
|
echo "[1/2] Discovering current menu slugs..." | tee -a "$LOG"
|
||||||
|
conda run -n cf python3 scripts/pipeline/purple_carrot/discover_current_menu.py \
|
||||||
|
--out "$MENU_OUT" 2>&1 | tee -a "$LOG"
|
||||||
|
|
||||||
|
# Step 2: scrape full recipe data for new slugs only (--resume skips already-scraped)
|
||||||
|
echo "[2/2] Scraping live recipe pages..." | tee -a "$LOG"
|
||||||
|
conda run -n cf python3 scripts/pipeline/purple_carrot/scrape_live.py \
|
||||||
|
--slugs-from "$MENU_OUT" \
|
||||||
|
--out "$LIVE_OUT" \
|
||||||
|
--resume \
|
||||||
|
--delay 3.0 2>&1 | tee -a "$LOG"
|
||||||
|
|
||||||
|
# Step 3: ingest new recipes into the shared corpus DB
|
||||||
|
echo "[3/3] Ingesting into corpus DB..." | tee -a "$LOG"
|
||||||
|
conda run -n cf python3 scripts/pipeline/ingest_purplecarrot.py \
|
||||||
|
--parquet "$LIVE_OUT" \
|
||||||
|
--db /Library/Assets/kiwi/kiwi.db 2>&1 | tee -a "$LOG"
|
||||||
|
|
||||||
|
echo "=== Done $(date -u '+%Y-%m-%d %H:%M UTC') ===" >> "$LOG"
|
||||||
|
echo "" >> "$LOG"
|
||||||
127
tests/services/test_llm_router_task.py
Normal file
127
tests/services/test_llm_router_task.py
Normal file
|
|
@ -0,0 +1,127 @@
|
||||||
|
"""Tests for task-based routing added to get_meal_plan_router()."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _make_task_ctx(url: str = "http://node:8080") -> MagicMock:
|
||||||
|
"""Mock context manager returned by task_allocate()."""
|
||||||
|
alloc = MagicMock()
|
||||||
|
alloc.url = url
|
||||||
|
alloc.allocation_id = "alloc-task-1"
|
||||||
|
alloc.service = "cf-text"
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__enter__ = MagicMock(return_value=alloc)
|
||||||
|
ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def _make_task_ctx_not_registered() -> MagicMock:
|
||||||
|
"""Mock context manager that raises TaskNotRegistered on enter."""
|
||||||
|
from app.services.task_inference import TaskNotRegistered
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__enter__ = MagicMock(side_effect=TaskNotRegistered("not registered"))
|
||||||
|
ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def _make_direct_alloc_ctx(url: str = "http://node:8080") -> MagicMock:
|
||||||
|
"""Mock context manager returned by CFOrchClient.allocate()."""
|
||||||
|
alloc = MagicMock()
|
||||||
|
alloc.url = url
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__enter__ = MagicMock(return_value=alloc)
|
||||||
|
ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_path_returns_orch_router_on_success(monkeypatch):
|
||||||
|
"""get_meal_plan_router() returns _OrchTextRouter when task allocation succeeds."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
import unittest.mock as um
|
||||||
|
# Patch the name as it exists in llm_router's own namespace (module-level import).
|
||||||
|
with um.patch("app.services.meal_plan.llm_router.task_allocate",
|
||||||
|
return_value=_make_task_ctx(url="http://node:9001")):
|
||||||
|
from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter
|
||||||
|
router, ctx = get_meal_plan_router()
|
||||||
|
|
||||||
|
assert isinstance(router, _OrchTextRouter)
|
||||||
|
assert router._base_url == "http://node:9001"
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_not_registered_falls_back_to_direct_allocate(monkeypatch):
|
||||||
|
"""get_meal_plan_router() falls back to direct cf-text allocation on TaskNotRegistered."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
direct_ctx = _make_direct_alloc_ctx(url="http://node:9002")
|
||||||
|
|
||||||
|
import unittest.mock as um
|
||||||
|
# Patch task_allocate in llm_router's namespace so TaskNotRegistered is raised.
|
||||||
|
with um.patch("app.services.meal_plan.llm_router.task_allocate",
|
||||||
|
return_value=_make_task_ctx_not_registered()), \
|
||||||
|
um.patch("app.services.meal_plan.llm_router.CFOrchClient") as MockClient:
|
||||||
|
MockClient.return_value.allocate.return_value = direct_ctx
|
||||||
|
from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter
|
||||||
|
router, ctx = get_meal_plan_router()
|
||||||
|
|
||||||
|
assert isinstance(router, _OrchTextRouter)
|
||||||
|
assert router._base_url == "http://node:9002"
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_cf_orch_url_returns_llm_router(monkeypatch):
|
||||||
|
"""get_meal_plan_router() returns LLMRouter when CF_ORCH_URL is not set."""
|
||||||
|
monkeypatch.delenv("CF_ORCH_URL", raising=False)
|
||||||
|
|
||||||
|
import unittest.mock as um
|
||||||
|
mock_lr = MagicMock()
|
||||||
|
with um.patch("app.services.meal_plan.llm_router.LLMRouter", return_value=mock_lr):
|
||||||
|
from app.services.meal_plan.llm_router import get_meal_plan_router
|
||||||
|
router, ctx = get_meal_plan_router()
|
||||||
|
|
||||||
|
assert router is mock_lr
|
||||||
|
|
||||||
|
|
||||||
|
def test_tier1_general_exception_falls_back_to_direct_allocate(monkeypatch):
|
||||||
|
"""get_meal_plan_router() falls back to direct allocation when task_allocate raises RuntimeError."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
direct_ctx = _make_direct_alloc_ctx(url="http://node:9003")
|
||||||
|
|
||||||
|
import unittest.mock as um
|
||||||
|
failing_ctx = MagicMock()
|
||||||
|
failing_ctx.__enter__ = MagicMock(side_effect=RuntimeError("coordinator down"))
|
||||||
|
failing_ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
|
||||||
|
with um.patch("app.services.meal_plan.llm_router.task_allocate",
|
||||||
|
return_value=failing_ctx), \
|
||||||
|
um.patch("app.services.meal_plan.llm_router.CFOrchClient") as MockClient:
|
||||||
|
MockClient.return_value.allocate.return_value = direct_ctx
|
||||||
|
from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter
|
||||||
|
router, ctx = get_meal_plan_router()
|
||||||
|
|
||||||
|
assert isinstance(router, _OrchTextRouter)
|
||||||
|
assert router._base_url == "http://node:9003"
|
||||||
|
|
||||||
|
|
||||||
|
def test_tier2_none_alloc_releases_ctx_and_falls_through(monkeypatch):
|
||||||
|
"""get_meal_plan_router() releases Tier 2 ctx and falls through when alloc is None."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
|
||||||
|
import unittest.mock as um
|
||||||
|
|
||||||
|
none_alloc_ctx = MagicMock()
|
||||||
|
none_alloc_ctx.__enter__ = MagicMock(return_value=None)
|
||||||
|
none_alloc_ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
|
||||||
|
mock_lr = MagicMock()
|
||||||
|
|
||||||
|
with um.patch("app.services.meal_plan.llm_router.task_allocate",
|
||||||
|
return_value=_make_task_ctx_not_registered()), \
|
||||||
|
um.patch("app.services.meal_plan.llm_router.CFOrchClient") as MockClient, \
|
||||||
|
um.patch("app.services.meal_plan.llm_router.LLMRouter", return_value=mock_lr):
|
||||||
|
MockClient.return_value.allocate.return_value = none_alloc_ctx
|
||||||
|
from app.services.meal_plan.llm_router import get_meal_plan_router
|
||||||
|
router, ctx = get_meal_plan_router()
|
||||||
|
|
||||||
|
assert router is mock_lr
|
||||||
|
none_alloc_ctx.__exit__.assert_called_once_with(None, None, None)
|
||||||
164
tests/services/test_task_inference.py
Normal file
164
tests/services/test_task_inference.py
Normal file
|
|
@ -0,0 +1,164 @@
|
||||||
|
"""Tests for app/services/task_inference.py"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _ok_resp(url: str = "http://node:8080", allocation_id: str = "alloc-123") -> MagicMock:
|
||||||
|
m = MagicMock()
|
||||||
|
m.status_code = 200
|
||||||
|
m.is_success = True
|
||||||
|
m.json.return_value = {
|
||||||
|
"url": url,
|
||||||
|
"allocation_id": allocation_id,
|
||||||
|
"gpu_id": 0,
|
||||||
|
"started": True,
|
||||||
|
"warm": False,
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
|
||||||
|
|
||||||
|
def _err_resp(status_code: int, text: str = "error") -> MagicMock:
|
||||||
|
m = MagicMock()
|
||||||
|
m.status_code = status_code
|
||||||
|
m.is_success = False
|
||||||
|
m.text = text
|
||||||
|
return m
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_yields_allocation_on_200(monkeypatch):
|
||||||
|
"""task_allocate() yields Allocation with url, allocation_id, service on 200."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=_ok_resp()) as mock_post, \
|
||||||
|
patch("app.services.task_inference.httpx.delete") as mock_del:
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text") as alloc:
|
||||||
|
assert alloc.url == "http://node:8080"
|
||||||
|
assert alloc.allocation_id == "alloc-123"
|
||||||
|
assert alloc.service == "cf-text"
|
||||||
|
called_url = mock_post.call_args[0][0]
|
||||||
|
assert called_url == "http://coord:7700/api/inference/task"
|
||||||
|
mock_del.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_uses_service_from_response_when_present(monkeypatch):
|
||||||
|
"""task_allocate() uses service from response dict over service_hint when available."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
resp = _ok_resp()
|
||||||
|
resp.json.return_value["service"] = "cf-vision"
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=resp), \
|
||||||
|
patch("app.services.task_inference.httpx.delete"):
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with task_allocate("kiwi", "ocr", service_hint="cf-docuvision") as alloc:
|
||||||
|
assert alloc.service == "cf-vision"
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_404_raises_task_not_registered(monkeypatch):
|
||||||
|
"""task_allocate() raises TaskNotRegistered on coordinator 404."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=_err_resp(404)):
|
||||||
|
from app.services.task_inference import task_allocate, TaskNotRegistered
|
||||||
|
with pytest.raises(TaskNotRegistered):
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_503_raises_runtime_error(monkeypatch):
|
||||||
|
"""task_allocate() raises RuntimeError on non-404 coordinator errors."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=_err_resp(503, "no GPU")):
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with pytest.raises(RuntimeError, match="HTTP 503"):
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_release_called_on_clean_exit(monkeypatch):
|
||||||
|
"""task_allocate() DELETEs the allocation on clean context exit."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=_ok_resp(allocation_id="xyz")), \
|
||||||
|
patch("app.services.task_inference.httpx.delete") as mock_del:
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
pass
|
||||||
|
release_url = mock_del.call_args[0][0]
|
||||||
|
assert "cf-text" in release_url
|
||||||
|
assert "xyz" in release_url
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_release_called_when_inner_block_raises(monkeypatch):
|
||||||
|
"""task_allocate() DELETEs the allocation even when the inner block raises."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=_ok_resp(allocation_id="abc")), \
|
||||||
|
patch("app.services.task_inference.httpx.delete") as mock_del:
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
raise ValueError("inner error")
|
||||||
|
mock_del.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_release_failure_is_swallowed(monkeypatch):
|
||||||
|
"""task_allocate() does not propagate DELETE failures."""
|
||||||
|
import httpx as _httpx
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=_ok_resp()), \
|
||||||
|
patch("app.services.task_inference.httpx.delete",
|
||||||
|
side_effect=_httpx.RequestError("gone", request=MagicMock())):
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text") as alloc:
|
||||||
|
assert alloc.url == "http://node:8080"
|
||||||
|
# no exception raised
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_no_orch_url_raises_runtime_error(monkeypatch):
|
||||||
|
"""task_allocate() raises RuntimeError when CF_ORCH_URL is not set."""
|
||||||
|
monkeypatch.delenv("CF_ORCH_URL", raising=False)
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with pytest.raises(RuntimeError, match="CF_ORCH_URL"):
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_network_error_raises_runtime_error(monkeypatch):
|
||||||
|
"""task_allocate() wraps httpx.RequestError in RuntimeError."""
|
||||||
|
import httpx as _httpx
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
with patch("app.services.task_inference.httpx.post",
|
||||||
|
side_effect=_httpx.RequestError("timeout", request=MagicMock())):
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with pytest.raises(RuntimeError, match="unreachable"):
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_malformed_json_raises_runtime_error(monkeypatch):
|
||||||
|
"""task_allocate() raises RuntimeError when coordinator returns non-JSON on 200."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
bad_resp = MagicMock()
|
||||||
|
bad_resp.status_code = 200
|
||||||
|
bad_resp.is_success = True
|
||||||
|
bad_resp.text = "<html>proxy error</html>"
|
||||||
|
bad_resp.json.side_effect = ValueError("not json")
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=bad_resp):
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with pytest.raises(RuntimeError, match="malformed"):
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def test_task_allocate_missing_url_field_raises_runtime_error(monkeypatch):
|
||||||
|
"""task_allocate() raises RuntimeError when coordinator response is missing url field."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
bad_resp = MagicMock()
|
||||||
|
bad_resp.status_code = 200
|
||||||
|
bad_resp.is_success = True
|
||||||
|
bad_resp.text = '{"allocation_id": "x"}'
|
||||||
|
bad_resp.json.return_value = {"allocation_id": "x"} # missing "url"
|
||||||
|
with patch("app.services.task_inference.httpx.post", return_value=bad_resp):
|
||||||
|
from app.services.task_inference import task_allocate
|
||||||
|
with pytest.raises(RuntimeError, match="malformed"):
|
||||||
|
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
|
||||||
|
pass
|
||||||
88
tests/services/test_vl_model_task.py
Normal file
88
tests/services/test_vl_model_task.py
Normal file
|
|
@ -0,0 +1,88 @@
|
||||||
|
"""Tests for task-based routing added to _try_docuvision()."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _mock_doc_result(text: str = "RECEIPT TEXT") -> MagicMock:
|
||||||
|
r = MagicMock()
|
||||||
|
r.text = text
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
def _make_task_ctx(url: str = "http://node:9010") -> MagicMock:
|
||||||
|
alloc = MagicMock()
|
||||||
|
alloc.url = url
|
||||||
|
alloc.allocation_id = "alloc-vis-1"
|
||||||
|
alloc.service = "cf-docuvision"
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__enter__ = MagicMock(return_value=alloc)
|
||||||
|
ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def _make_task_not_registered() -> MagicMock:
|
||||||
|
from app.services.task_inference import TaskNotRegistered
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__enter__ = MagicMock(side_effect=TaskNotRegistered("not registered"))
|
||||||
|
ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def _make_direct_alloc(url: str = "http://node:9011") -> MagicMock:
|
||||||
|
alloc = MagicMock()
|
||||||
|
alloc.url = url
|
||||||
|
ctx = MagicMock()
|
||||||
|
ctx.__enter__ = MagicMock(return_value=alloc)
|
||||||
|
ctx.__exit__ = MagicMock(return_value=False)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def test_try_docuvision_task_path_returns_text(monkeypatch, tmp_path):
|
||||||
|
"""_try_docuvision() uses task allocation and returns extracted text on success."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
fake_image = tmp_path / "receipt.jpg"
|
||||||
|
fake_image.write_bytes(b"fake")
|
||||||
|
|
||||||
|
with patch("app.services.task_inference.task_allocate",
|
||||||
|
return_value=_make_task_ctx(url="http://node:9010")), \
|
||||||
|
patch("app.services.ocr.docuvision_client.DocuvisionClient") as MockDoc:
|
||||||
|
MockDoc.return_value.extract_text.return_value = _mock_doc_result("STORE $12.34")
|
||||||
|
from app.services.ocr.vl_model import _try_docuvision
|
||||||
|
result = _try_docuvision(str(fake_image))
|
||||||
|
|
||||||
|
assert result == "STORE $12.34"
|
||||||
|
MockDoc.assert_called_once_with("http://node:9010")
|
||||||
|
|
||||||
|
|
||||||
|
def test_try_docuvision_falls_back_to_direct_on_task_not_registered(monkeypatch, tmp_path):
|
||||||
|
"""_try_docuvision() falls back to direct cf-docuvision allocation on TaskNotRegistered."""
|
||||||
|
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
|
||||||
|
fake_image = tmp_path / "receipt.jpg"
|
||||||
|
fake_image.write_bytes(b"fake")
|
||||||
|
|
||||||
|
with patch("app.services.task_inference.task_allocate",
|
||||||
|
return_value=_make_task_not_registered()), \
|
||||||
|
patch("circuitforge_orch.client.CFOrchClient") as MockClient, \
|
||||||
|
patch("app.services.ocr.docuvision_client.DocuvisionClient") as MockDoc:
|
||||||
|
MockClient.return_value.allocate.return_value = _make_direct_alloc("http://node:9011")
|
||||||
|
MockDoc.return_value.extract_text.return_value = _mock_doc_result("FALLBACK TEXT")
|
||||||
|
from app.services.ocr.vl_model import _try_docuvision
|
||||||
|
result = _try_docuvision(str(fake_image))
|
||||||
|
|
||||||
|
assert result == "FALLBACK TEXT"
|
||||||
|
MockDoc.assert_called_once_with("http://node:9011")
|
||||||
|
|
||||||
|
|
||||||
|
def test_try_docuvision_returns_none_without_cf_orch_url(monkeypatch, tmp_path):
|
||||||
|
"""_try_docuvision() returns None immediately when CF_ORCH_URL is not set."""
|
||||||
|
monkeypatch.delenv("CF_ORCH_URL", raising=False)
|
||||||
|
fake_image = tmp_path / "receipt.jpg"
|
||||||
|
fake_image.write_bytes(b"fake")
|
||||||
|
|
||||||
|
from app.services.ocr.vl_model import _try_docuvision
|
||||||
|
result = _try_docuvision(str(fake_image))
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
@ -17,12 +17,17 @@ from app.services.ocr.docuvision_client import DocuvisionClient, DocuvisionResul
|
||||||
|
|
||||||
|
|
||||||
def test_extract_text_sends_base64_image(tmp_path: Path) -> None:
|
def test_extract_text_sends_base64_image(tmp_path: Path) -> None:
|
||||||
"""extract_text() POSTs a base64-encoded image and returns parsed text."""
|
"""extract_text() POSTs image_b64 and returns parsed raw_text."""
|
||||||
image_file = tmp_path / "test.jpg"
|
image_file = tmp_path / "test.jpg"
|
||||||
image_file.write_bytes(b"fake-image-bytes")
|
image_file.write_bytes(b"fake-image-bytes")
|
||||||
|
|
||||||
mock_response = MagicMock()
|
mock_response = MagicMock()
|
||||||
mock_response.json.return_value = {"text": "Cheerios", "confidence": 0.95}
|
mock_response.json.return_value = {
|
||||||
|
"raw_text": "Cheerios",
|
||||||
|
"elements": [],
|
||||||
|
"tables": [],
|
||||||
|
"metadata": {"hint": "text", "confidence": 0.95},
|
||||||
|
}
|
||||||
mock_response.raise_for_status.return_value = None
|
mock_response.raise_for_status.return_value = None
|
||||||
|
|
||||||
with patch("httpx.Client") as mock_client_cls:
|
with patch("httpx.Client") as mock_client_cls:
|
||||||
|
|
@ -41,7 +46,8 @@ def test_extract_text_sends_base64_image(tmp_path: Path) -> None:
|
||||||
assert call_kwargs[0][0] == "http://docuvision:8080/extract"
|
assert call_kwargs[0][0] == "http://docuvision:8080/extract"
|
||||||
posted_json = call_kwargs[1]["json"]
|
posted_json = call_kwargs[1]["json"]
|
||||||
expected_b64 = base64.b64encode(b"fake-image-bytes").decode()
|
expected_b64 = base64.b64encode(b"fake-image-bytes").decode()
|
||||||
assert posted_json["image"] == expected_b64
|
assert posted_json["image_b64"] == expected_b64
|
||||||
|
assert posted_json["hint"] == "text"
|
||||||
|
|
||||||
|
|
||||||
def test_extract_text_raises_on_http_error(tmp_path: Path) -> None:
|
def test_extract_text_raises_on_http_error(tmp_path: Path) -> None:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue