Compare commits

..

No commits in common. "main" and "v0.9.1" have entirely different histories.
main ... v0.9.1

54 changed files with 496 additions and 5920 deletions

View file

@ -21,12 +21,10 @@ DATA_DIR=./data
# IP this machine advertises to the coordinator (must be reachable from coordinator host) # IP this machine advertises to the coordinator (must be reachable from coordinator host)
# CF_ORCH_ADVERTISE_HOST=10.1.10.71 # CF_ORCH_ADVERTISE_HOST=10.1.10.71
# GPU inference server (cf-orch coordinator for recipe scan, LLM generation, etc.) # CF-core hosted coordinator (managed cloud GPU inference — Paid+ tier)
# GPU_SERVER_URL: set to your local cf-orch coordinator (self-hosted rack). # Set CF_ORCH_URL to use a hosted cf-orch coordinator instead of self-hosting.
# CF_ORCH_URL is the backward-compat alias — both are honoured. # CF_LICENSE_KEY is read automatically by CFOrchClient for bearer auth.
# Paid+ default: when CF_LICENSE_KEY is present and neither URL is set, # CF_ORCH_URL=https://orch.circuitforge.tech
# the app automatically points to https://orch.circuitforge.tech.
# GPU_SERVER_URL=http://10.1.10.71:7700
# CF_LICENSE_KEY=CFG-KIWI-xxxx-xxxx-xxxx # CF_LICENSE_KEY=CFG-KIWI-xxxx-xxxx-xxxx
# LLM backend — env-var auto-config (no llm.yaml needed for bare-metal users) # LLM backend — env-var auto-config (no llm.yaml needed for bare-metal users)
@ -59,9 +57,6 @@ CF_APP_NAME=kiwi
# Unset = auto-detect: true if CLOUD_MODE or circuitforge_orch is installed (paid+ local). # Unset = auto-detect: true if CLOUD_MODE or circuitforge_orch is installed (paid+ local).
# Set false to force LocalScheduler even when cf-orch is present. # Set false to force LocalScheduler even when cf-orch is present.
# USE_ORCH_SCHEDULER=false # USE_ORCH_SCHEDULER=false
# GPU_SERVER_URL: cf-orch coordinator endpoint. Required for recipe scan (cf-docuvision)
# and LLM features on a self-hosted rack. CF_ORCH_URL is the backward-compat alias.
# GPU_SERVER_URL=http://10.1.10.71:7700
# Cloud mode (set in compose.cloud.yml; also set here for reference) # Cloud mode (set in compose.cloud.yml; also set here for reference)
# CLOUD_DATA_ROOT=/devl/kiwi-cloud-data # CLOUD_DATA_ROOT=/devl/kiwi-cloud-data

142
README.md
View file

@ -1,118 +1,80 @@
<!-- Logo coming soon — replace docs/kiwi-logo.svg when final icon ships --> # 🥝 Kiwi
<div align="center">
<img src="docs/kiwi-logo.svg" alt="Kiwi logo" width="96" height="96" />
# Kiwi > *Part of the CircuitForge LLC "AI for the tasks the system made hard on purpose" suite.*
**Pantry tracking and recipe suggestions — with or without an LLM.** **Pantry tracking and leftover recipe suggestions.**
[![License: MIT/BSL](https://img.shields.io/badge/license-MIT%20%2F%20BSL%201.1-blue)](#license) Scan barcodes, photograph receipts, and get recipe ideas based on what you already have — before it expires.
[![CI](https://git.opensourcesolarpunk.com/Circuit-Forge/kiwi/badges/workflows/ci.yml/badge.svg)](https://git.opensourcesolarpunk.com/Circuit-Forge/kiwi/actions)
[![Version](https://img.shields.io/badge/version-0.6.0-green)](https://git.opensourcesolarpunk.com/Circuit-Forge/kiwi/releases)
[Documentation](https://docs.circuitforge.tech/kiwi) · [Live demo](https://menagerie.circuitforge.tech/kiwi) · [circuitforge.tech](https://circuitforge.tech) **LLM support is optional.** Inventory tracking, barcode scanning, expiry alerts, CSV export, and receipt upload all work without any LLM configured. AI features (receipt OCR, recipe suggestions, meal planning) activate when a backend is available and are BYOK-unlockable at any tier.
*Part of the CircuitForge LLC suite — "AI for the tasks the system made hard on purpose."* **Status:** Beta · CircuitForge LLC
</div>
**[Documentation](https://docs.circuitforge.tech/kiwi/)** · [circuitforge.tech](https://circuitforge.tech)
--- ---
> **The LLM is optional.** Barcode scanning, receipt upload, expiry alerts, the full 200k+ recipe browser, and CSV export all work with zero LLM configured. Recipe suggestions and receipt OCR activate when a backend is available, and are BYOK-unlockable at any tier. You are never forced to send your data anywhere. ## What it does
--- - **Inventory tracking** — add items by barcode scan, receipt upload, or manually
- **Expiry alerts** — know what's about to go bad
- **Recipe browser** — browse the full recipe corpus by cuisine, meal type, dietary preference, or main ingredient; pantry match percentage shown inline (Free)
- **Saved recipes** — bookmark any recipe with notes, a 05 star rating, and free-text style tags (Free); organize into named collections (Paid)
- **Receipt OCR** — extract line items from receipt photos automatically (Paid tier, BYOK-unlockable)
- **Recipe suggestions** — four levels from pantry-match to full LLM generation (Paid tier, BYOK-unlockable)
- **Style auto-classifier** — LLM suggests style tags (comforting, hands-off, quick, etc.) for saved recipes (Paid tier, BYOK-unlockable)
- **Leftover mode** — prioritize nearly-expired items in recipe ranking (Free, 5/day; unlimited at Paid+)
- **LLM backend config** — configure inference via `circuitforge-core` env-var system; BYOK unlocks Paid AI features at any tier
- **Feedback FAB** — in-app feedback button; status probed on load, hidden if CF feedback endpoint unreachable
## What Kiwi does ## Stack
| Feature | Notes | - **Frontend:** Vue 3 SPA (Vite + TypeScript)
|---|---| - **Backend:** FastAPI + SQLite (via `circuitforge-core`)
| **Inventory tracking** | Add items by barcode scan, receipt upload, or manually | - **Auth:** CF session cookie → Directus JWT (cloud mode)
| **Expiry alerts** | Know what is about to go bad before it does | - **Licensing:** Heimdall (free tier auto-provisioned at signup)
| **Recipe browser** | 200k+ recipes — filter by cuisine, meal type, dietary preference, or main ingredient; pantry match percentage shown inline |
| **Leftover mode** | Prioritizes nearly-expired items in recipe ranking (5/day free, unlimited at Paid+) |
| **Recipe suggestions** | Four levels: direct corpus match, substitution/swap, cuisine-style adapter, full LLM generation |
| **Meal planning** | Plan meals for the week; pull from saved recipes or suggestions |
| **Saved recipes** | Bookmark any recipe with notes, 0-5 star rating, and free-text style tags; organize into named collections (Paid) |
| **Receipt OCR** | Extract line items from receipt photos automatically |
| **Dietary profiles** | Vegan, gluten-free, diabetic, and other constraints respected throughout |
| **Style auto-classifier** | LLM suggests style tags (comforting, hands-off, quick, etc.) for saved recipes |
| **Community feed** | Browse and share recipes with other Kiwi users |
| **CSV export** | Full pantry export, always available, no tier gate |
--- ## Running locally
## Quick start
**One-line install (self-hosted, Docker required):**
```bash ```bash
bash <(curl -fsSL https://git.opensourcesolarpunk.com/Circuit-Forge/kiwi/raw/branch/main/install.sh)
```
**Or clone and run manually:**
```bash
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/kiwi.git
cd kiwi
cp .env.example .env cp .env.example .env
./manage.sh build ./manage.sh build
./manage.sh start ./manage.sh start
# Web: http://localhost:8511 # Web: http://localhost:8511
# API: http://localhost:8512 # API: http://localhost:8512
``` ```
**Live cloud instance** (free account required): ## Cloud instance
[menagerie.circuitforge.tech/kiwi](https://menagerie.circuitforge.tech/kiwi)
Full setup and configuration guide: [docs.circuitforge.tech/kiwi](https://docs.circuitforge.tech/kiwi) ```bash
./manage.sh cloud-build
--- ./manage.sh cloud-start
# Served at menagerie.circuitforge.tech/kiwi (JWT-gated)
```
## Tiers ## Tiers
| Feature | Free | Paid | Premium | | Feature | Free | Paid | Premium |
|---|:---:|:---:|:---:| |---------|------|------|---------|
| Inventory CRUD | Yes | Yes | Yes | | Inventory CRUD | ✓ | ✓ | ✓ |
| Barcode scan | Yes | Yes | Yes | | Barcode scan | ✓ | ✓ | ✓ |
| Receipt upload | Yes | Yes | Yes | | Receipt upload | ✓ | ✓ | ✓ |
| Expiry alerts | Yes | Yes | Yes | | Expiry alerts | ✓ | ✓ | ✓ |
| CSV export | Yes | Yes | Yes | | CSV export | ✓ | ✓ | ✓ |
| Recipe browser (200k+ recipes) | Yes | Yes | Yes | | Recipe browser (domain/category) | ✓ | ✓ | ✓ |
| Save recipes + notes + star rating | Yes | Yes | Yes | | Save recipes + notes + star rating | ✓ | ✓ | ✓ |
| Style tags (manual, free-text) | Yes | Yes | Yes | | Style tags (manual, free-text) | ✓ | ✓ | ✓ |
| Leftover mode (5/day) | Yes | Yes | Yes | | Receipt OCR | BYOK | ✓ | ✓ |
| Receipt OCR | BYOK | Yes | Yes | | Recipe suggestions (L1L4) | BYOK | ✓ | ✓ |
| Recipe suggestions (L1L4) | BYOK | Yes | Yes | | Named recipe collections | — | ✓ | ✓ |
| Named recipe collections | — | Yes | Yes | | LLM style auto-classifier | — | BYOK | ✓ |
| LLM style auto-classifier | — | BYOK | Yes | | Meal planning | — | ✓ | ✓ |
| Meal planning | — | Yes | Yes | | Multi-household | — | — | ✓ |
| Multi-household | — | — | Yes | | Leftover mode (5/day) | ✓ | ✓ | ✓ |
**BYOK** = bring your own LLM backend. Configure `~/.config/circuitforge/llm.yaml` to unlock AI features at any tier without a paid subscription. BYOK = bring your own LLM backend (configure `~/.config/circuitforge/llm.yaml`)
---
## Stack
- **Frontend:** Vue 3 SPA (Vite + TypeScript), served on port 8511
- **Backend:** FastAPI + SQLite via `circuitforge-core`, API on port 8512
- **Auth:** CircuitForge session cookie (cloud mode); local mode requires no account
- **Licensing:** Heimdall — free tier auto-provisioned at signup
---
## Forgejo-primary
Kiwi is developed and maintained on Forgejo at [git.opensourcesolarpunk.com/Circuit-Forge/kiwi](https://git.opensourcesolarpunk.com/Circuit-Forge/kiwi). GitHub and Codeberg are read-only mirrors. File issues and submit pull requests on Forgejo.
---
## License ## License
Kiwi uses a split license: Discovery/pipeline layer: MIT
AI features: BSL 1.1 (free for personal non-commercial self-hosting)
- **Discovery and inventory pipeline** (barcode scan, expiry tracking, pantry CRUD, CSV export, recipe browser): [MIT](LICENSE-MIT)
- **AI features** (receipt OCR, LLM recipe suggestions, style auto-classifier): [BSL 1.1](LICENSE-BSL) — free for personal non-commercial self-hosting; commercial use or SaaS re-hosting requires a paid license. Converts to MIT after 4 years.
Humans own design, architecture, code review, testing, and verification. LLMs are part of our development workflow. [Our positions on LLM use →](https://circuitforge.tech/positions)
Privacy · Safety · Accessibility — co-equal, non-negotiable across all CircuitForge products.

View file

@ -1,332 +0,0 @@
# app/api/endpoints/activitypub.py
# MIT License
#
# ActivityPub endpoints for Kiwi instances:
# GET /.well-known/webfinger — WebFinger JRD
# GET /ap/actor — Instance actor document
# POST /ap/actor/inbox — Incoming activities
# GET /ap/outbox — Outgoing activities (OrderedCollection)
# GET /ap/posts/{slug} — Individual AP Note
# GET /ap/followers — Followers collection (count only)
# GET /ap/following — Following collection (empty stub)
#
# All endpoints are no-ops / 404 when AP_ENABLED=false or actor not loaded.
# The WebFinger and well-known routes are mounted at the root app level (not
# under /api/v1) — see main.py.
from __future__ import annotations
import asyncio
import json
import logging
from datetime import datetime, timezone
from fastapi import APIRouter, HTTPException, Request, Response
from fastapi.responses import JSONResponse
from app.core.config import settings
from app.services.ap.keys import get_actor
logger = logging.getLogger(__name__)
# ── Two routers: one for well-known (root mount), one for /ap prefix ─────────
webfinger_router = APIRouter(tags=["activitypub"])
ap_router = APIRouter(prefix="/ap", tags=["activitypub"])
_AP_CONTENT_TYPE = "application/activity+json"
_JRD_CONTENT_TYPE = "application/jrd+json"
def _actor_required():
actor = get_actor()
if actor is None:
raise HTTPException(status_code=404, detail="ActivityPub not enabled on this instance.")
return actor
# ── WebFinger ─────────────────────────────────────────────────────────────────
@webfinger_router.get("/.well-known/webfinger")
async def webfinger(resource: str | None = None):
actor = get_actor()
if actor is None:
raise HTTPException(status_code=404, detail="ActivityPub not enabled.")
expected = f"acct:kiwi@{settings.AP_HOST}"
if resource and resource != expected:
raise HTTPException(status_code=404, detail=f"Resource {resource!r} not found.")
jrd = {
"subject": expected,
"links": [
{
"rel": "self",
"type": _AP_CONTENT_TYPE,
"href": actor.actor_id,
}
],
}
return Response(
content=json.dumps(jrd),
media_type=_JRD_CONTENT_TYPE,
)
# ── Actor ─────────────────────────────────────────────────────────────────────
@ap_router.get("/actor")
async def get_actor_doc():
actor = _actor_required()
return Response(
content=json.dumps(actor.to_ap_dict()),
media_type=_AP_CONTENT_TYPE,
)
# ── Inbox (mounted via make_inbox_router below) ───────────────────────────────
async def _on_follow(activity: dict, headers: dict) -> None:
"""Accept Follow: add to ap_followers, send Accept(Follow) back."""
actor_url = activity.get("actor", "")
if not actor_url:
return
from app.db.store import Store
from app.core.config import settings as _settings
db_path = _settings.DB_PATH
inbox_url, shared_inbox = await asyncio.to_thread(_resolve_inbox, actor_url)
if inbox_url is None:
return
import sqlite3
conn = sqlite3.connect(str(db_path))
try:
conn.execute(
"""INSERT OR REPLACE INTO ap_followers
(actor_id, inbox_url, shared_inbox, followed_at, active)
VALUES (?, ?, ?, ?, 1)""",
(actor_url, inbox_url, shared_inbox, datetime.now(timezone.utc).isoformat()),
)
conn.commit()
finally:
conn.close()
actor = get_actor()
if actor is None:
return
accept = {
"@context": "https://www.w3.org/ns/activitystreams",
"id": f"{actor.actor_id}/accepts/{activity.get('id', 'unknown')}",
"type": "Accept",
"actor": actor.actor_id,
"object": activity,
}
from circuitforge_core.activitypub import deliver_activity
await asyncio.to_thread(deliver_activity, accept, inbox_url, actor, 10.0)
async def _on_undo(activity: dict, headers: dict) -> None:
"""Handle Undo(Follow): deactivate the follower row."""
inner = activity.get("object", {})
if isinstance(inner, dict) and inner.get("type") == "Follow":
actor_url = activity.get("actor", "")
if actor_url:
import sqlite3
conn = sqlite3.connect(str(settings.DB_PATH))
try:
conn.execute(
"UPDATE ap_followers SET active = 0 WHERE actor_id = ?", (actor_url,)
)
conn.commit()
finally:
conn.close()
async def _dedup_activity(activity_id: str | None) -> bool:
"""Return True (already seen) if activity_id is in ap_received; otherwise insert it."""
if not activity_id:
return False
import sqlite3
conn = sqlite3.connect(str(settings.DB_PATH))
try:
try:
conn.execute(
"INSERT INTO ap_received (activity_id) VALUES (?)", (activity_id,)
)
conn.commit()
return False
except sqlite3.IntegrityError:
return True
finally:
conn.close()
def _build_inbox_router():
from circuitforge_core.activitypub.inbox import make_inbox_router
async def on_follow(activity: dict, headers: dict) -> None:
if await _dedup_activity(activity.get("id")):
return
await _on_follow(activity, headers)
async def on_undo(activity: dict, headers: dict) -> None:
if await _dedup_activity(activity.get("id")):
return
await _on_undo(activity, headers)
return make_inbox_router(
handlers={"Follow": on_follow, "Undo": on_undo},
verify_key_fetcher=None, # Signature verification enabled in prod when actor is loaded
path="/inbox",
)
# Mount inbox at /ap/actor/inbox (AP spec: inbox is a sub-resource of the actor)
try:
_inbox_sub = _build_inbox_router()
ap_router.include_router(_inbox_sub, prefix="/actor")
except Exception as _e:
logger.warning("AP inbox router not available: %s", _e)
# ── Outbox ────────────────────────────────────────────────────────────────────
@ap_router.get("/outbox")
async def get_outbox(page: int | None = None, request: Request = None):
actor = _actor_required()
from app.api.endpoints.community import _get_community_store
store = _get_community_store()
base = f"https://{settings.AP_HOST}"
if store is None:
collection = {
"@context": "https://www.w3.org/ns/activitystreams",
"id": f"{actor.outbox_url}",
"type": "OrderedCollection",
"totalItems": 0,
"orderedItems": [],
}
return Response(content=json.dumps(collection), media_type=_AP_CONTENT_TYPE)
PAGE_SIZE = 20
offset = ((page or 1) - 1) * PAGE_SIZE
posts = await asyncio.to_thread(store.list_posts, limit=PAGE_SIZE, offset=offset)
items = [_post_to_ap_note(p, actor, base) for p in posts]
collection = {
"@context": "https://www.w3.org/ns/activitystreams",
"id": actor.outbox_url + (f"?page={page}" if page else ""),
"type": "OrderedCollectionPage" if page else "OrderedCollection",
"orderedItems": items,
}
return Response(content=json.dumps(collection), media_type=_AP_CONTENT_TYPE)
# ── Individual post ───────────────────────────────────────────────────────────
@ap_router.get("/posts/{slug}")
async def get_ap_post(slug: str):
actor = _actor_required()
from app.api.endpoints.community import _get_community_store
store = _get_community_store()
if store is None:
raise HTTPException(status_code=404, detail="Community DB not available.")
post = await asyncio.to_thread(store.get_post_by_slug, slug)
if post is None:
raise HTTPException(status_code=404, detail="Post not found.")
base = f"https://{settings.AP_HOST}"
note = _post_to_ap_note(post, actor, base)
return Response(content=json.dumps(note), media_type=_AP_CONTENT_TYPE)
# ── Followers / Following ─────────────────────────────────────────────────────
@ap_router.get("/followers")
async def get_followers():
actor = _actor_required()
import sqlite3
count = 0
try:
conn = sqlite3.connect(str(settings.DB_PATH))
row = conn.execute("SELECT COUNT(*) FROM ap_followers WHERE active = 1").fetchone()
conn.close()
count = row[0] if row else 0
except Exception:
pass
collection = {
"@context": "https://www.w3.org/ns/activitystreams",
"id": f"{actor.actor_id}/followers",
"type": "OrderedCollection",
"totalItems": count,
}
return Response(content=json.dumps(collection), media_type=_AP_CONTENT_TYPE)
@ap_router.get("/following")
async def get_following():
actor = _actor_required()
collection = {
"@context": "https://www.w3.org/ns/activitystreams",
"id": f"{actor.actor_id}/following",
"type": "OrderedCollection",
"totalItems": 0,
"orderedItems": [],
}
return Response(content=json.dumps(collection), media_type=_AP_CONTENT_TYPE)
# ── Helpers ───────────────────────────────────────────────────────────────────
def _post_to_ap_note(post, actor, base_url: str) -> dict:
from circuitforge_core.activitypub import make_note
from app.services.community.ap_compat import _build_content
diet_tags: list[str] = list(getattr(post, "dietary_tags", []) or [])
hashtags = [{"type": "Hashtag", "name": "#Kiwi", "href": f"{base_url}/ap/tags/kiwi"}]
for tag in diet_tags[:4]:
ht = "".join(w.capitalize() for w in tag.replace("-", " ").split())
hashtags.append({"type": "Hashtag", "name": f"#{ht}"})
content = _build_content(
{
"title": post.title,
"description": getattr(post, "description", None),
"outcome_notes": getattr(post, "outcome_notes", None),
"dietary_tags": diet_tags,
}
)
published = post.published
note = make_note(
actor_id=actor.actor_id,
content=content,
tag=hashtags,
published=published if isinstance(published, datetime) else None,
)
note["id"] = f"{base_url}/ap/posts/{post.slug}"
return note
def _resolve_inbox(actor_url: str) -> tuple[str | None, str | None]:
"""Fetch an AP actor document and extract inbox + sharedInbox URLs."""
try:
import httpx
resp = httpx.get(
actor_url,
headers={"Accept": "application/activity+json"},
timeout=8.0,
follow_redirects=True,
)
resp.raise_for_status()
doc = resp.json()
inbox = doc.get("inbox")
shared = doc.get("endpoints", {}).get("sharedInbox")
return inbox, shared
except Exception as exc:
logger.debug("Could not resolve actor %s: %s", actor_url, exc)
return None, None

View file

@ -167,54 +167,6 @@ def _validate_publish_body(body: dict) -> None:
raise HTTPException(status_code=422, detail="photo_url must be an https:// URL.") raise HTTPException(status_code=422, detail="photo_url must be an https:// URL.")
@router.post("/check-similar")
async def check_similar(body: dict, session: CloudUser = Depends(get_session)):
"""Pre-submission dedup check: return similar existing posts for the given title/recipe_id.
Safe to call with no community store configured returns empty list rather than 503.
"""
store = _get_community_store()
if store is None:
return {"similar_posts": []}
title = (body.get("title") or "").strip()
recipe_id = body.get("recipe_id")
post_type = body.get("post_type")
if not title:
return {"similar_posts": []}
candidates = await asyncio.to_thread(
store.search_similar_posts,
title,
recipe_id,
post_type,
8,
)
if not candidates:
return {"similar_posts": []}
from app.services.community.dedup import build_similar_post_result, fetch_recipe_ingredients
incoming_ingredients = await asyncio.to_thread(
fetch_recipe_ingredients, session.db, recipe_id
)
results = []
for post in candidates:
result = await asyncio.to_thread(
build_similar_post_result,
post,
recipe_id,
incoming_ingredients,
session.db,
)
if result["similarity_tier"] != "different":
results.append(result)
return {"similar_posts": results[:5]}
@router.post("/posts", status_code=201) @router.post("/posts", status_code=201)
async def publish_post(body: dict, session: CloudUser = Depends(get_session)): async def publish_post(body: dict, session: CloudUser = Depends(get_session)):
from app.tiers import can_use from app.tiers import can_use
@ -262,8 +214,6 @@ async def publish_post(body: dict, session: CloudUser = Depends(get_session)):
today = datetime.now(timezone.utc).strftime("%Y-%m-%d") today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
slug = f"kiwi-{_post_type_prefix(post_type)}-{pseudonym.lower().replace(' ', '')}-{today}-{slug_title}"[:120] slug = f"kiwi-{_post_type_prefix(post_type)}-{pseudonym.lower().replace(' ', '')}-{today}-{slug_title}"[:120]
similar_to_ref = body.get("similar_to_ref") or None
from circuitforge_core.community.models import CommunityPost from circuitforge_core.community.models import CommunityPost
post = CommunityPost( post = CommunityPost(
slug=slug, slug=slug,
@ -291,7 +241,6 @@ async def publish_post(body: dict, session: CloudUser = Depends(get_session)):
fat_pct=snapshot.fat_pct, fat_pct=snapshot.fat_pct,
protein_pct=snapshot.protein_pct, protein_pct=snapshot.protein_pct,
moisture_pct=snapshot.moisture_pct, moisture_pct=snapshot.moisture_pct,
similar_to_ref=similar_to_ref,
) )
try: try:
@ -301,41 +250,7 @@ async def publish_post(body: dict, session: CloudUser = Depends(get_session)):
status_code=409, status_code=409,
detail="A post with this title already exists today. Try a different title.", detail="A post with this title already exists today. Try a different title.",
) from exc ) from exc
return _post_to_dict(inserted)
post_dict = _post_to_dict(inserted)
# AP delivery + Mastodon post (Paid tier, AP_ENABLED, opted-in)
from app.core.config import settings as _settings
if _settings.AP_ENABLED and session.tier in ("paid", "premium", "ultra"):
from circuitforge_core.activitypub import make_create, make_note, PUBLIC
from app.services.ap.keys import get_actor
from app.services.ap.delivery import deliver_to_followers
_ap_actor = get_actor()
if _ap_actor is not None:
base = f"https://{_settings.AP_HOST}"
from app.api.endpoints.activitypub import _post_to_ap_note
_note = _post_to_ap_note(inserted, _ap_actor, base)
_activity = make_create(_ap_actor, _note)
asyncio.create_task(
asyncio.to_thread(
deliver_to_followers, inserted.slug, _activity, session.db
)
)
# Mastodon post if user has connected account and opted in
if body.get("post_to_mastodon"):
from app.services.ap.mastodon import build_post_content, get_token, post_status
_masto = await asyncio.to_thread(
get_token, session.db, session.user_id, _settings.AP_TOKEN_ENCRYPTION_KEY
)
if _masto:
_masto_url, _masto_token = _masto
_content = build_post_content(post_dict)
asyncio.create_task(
asyncio.to_thread(post_status, _masto_url, _masto_token, _content)
)
return post_dict
@router.delete("/posts/{slug}", status_code=204) @router.delete("/posts/{slug}", status_code=204)
@ -436,7 +351,6 @@ def _post_to_dict(post) -> dict:
"fat_pct": post.fat_pct, "fat_pct": post.fat_pct,
"protein_pct": post.protein_pct, "protein_pct": post.protein_pct,
"moisture_pct": post.moisture_pct, "moisture_pct": post.moisture_pct,
"similar_to_ref": getattr(post, "similar_to_ref", None),
} }

View file

@ -478,8 +478,7 @@ async def scan_barcode_image(
from app.services.openfoodfacts import OpenFoodFactsService from app.services.openfoodfacts import OpenFoodFactsService
from app.services.expiration_predictor import ExpirationPredictor from app.services.expiration_predictor import ExpirationPredictor
image_bytes = temp_file.read_bytes() barcodes = await asyncio.to_thread(BarcodeScanner().scan_image, temp_file)
barcodes = await asyncio.to_thread(BarcodeScanner().scan_from_bytes, image_bytes)
if not barcodes: if not barcodes:
return BarcodeScanResponse( return BarcodeScanResponse(
success=False, barcodes_found=0, results=[], success=False, barcodes_found=0, results=[],
@ -501,10 +500,9 @@ async def scan_barcode_image(
product_info = await off.lookup_product(code) product_info = await off.lookup_product(code)
product_source = "openfoodfacts" product_source = "openfoodfacts"
db_product = None
inventory_item = None inventory_item = None
if product_info: if product_info and auto_add_to_inventory:
db_product, _ = await asyncio.to_thread( product, _ = await asyncio.to_thread(
store.get_or_create_product, store.get_or_create_product,
product_info.get("name", code), product_info.get("name", code),
code, code,
@ -514,30 +512,29 @@ async def scan_barcode_image(
source=product_source, source=product_source,
source_data=product_info, source_data=product_info,
) )
if auto_add_to_inventory: exp = predictor.predict_expiration(
exp = predictor.predict_expiration( product_info.get("category", ""),
product_info.get("category", ""), location,
location, product_name=product_info.get("name", code),
product_name=product_info.get("name", code), tier=session.tier,
tier=session.tier, has_byok=session.has_byok,
has_byok=session.has_byok, )
) resolved_qty = product_info.get("pack_quantity") or quantity
resolved_qty = product_info.get("pack_quantity") or quantity resolved_unit = product_info.get("pack_unit") or "count"
resolved_unit = product_info.get("pack_unit") or "count" inventory_item = await asyncio.to_thread(
inventory_item = await asyncio.to_thread( store.add_inventory_item,
store.add_inventory_item, product["id"], location,
db_product["id"], location, quantity=resolved_qty,
quantity=resolved_qty, unit=resolved_unit,
unit=resolved_unit, expiration_date=str(exp) if exp else None,
expiration_date=str(exp) if exp else None, source="barcode_scan",
source="barcode_scan", )
) product_found = product_info is not None
product_found = db_product is not None
needs_capture = not product_found and has_visual_capture needs_capture = not product_found and has_visual_capture
results.append({ results.append({
"barcode": code, "barcode": code,
"barcode_type": bc.get("type", "unknown"), "barcode_type": bc.get("type", "unknown"),
"product": ProductResponse.model_validate(db_product) if db_product else None, "product": ProductResponse.model_validate(product_info) if product_info else None,
"inventory_item": InventoryItemResponse.model_validate(inventory_item) if inventory_item else None, "inventory_item": InventoryItemResponse.model_validate(inventory_item) if inventory_item else None,
"added_to_inventory": inventory_item is not None, "added_to_inventory": inventory_item is not None,
"needs_manual_entry": not product_found and not needs_capture, "needs_manual_entry": not product_found and not needs_capture,

View file

@ -1,133 +0,0 @@
# app/api/endpoints/mastodon_oauth.py
# MIT License
#
# Mastodon OAuth flow endpoints:
# POST /social/mastodon/connect — Start OAuth (dynamic app registration)
# GET /social/mastodon/callback — OAuth callback, exchange code for token
# DELETE /social/mastodon/disconnect — Revoke and remove stored token
# GET /social/mastodon/status — Check connection status
from __future__ import annotations
import asyncio
import logging
from urllib.parse import urlencode
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import RedirectResponse
from app.cloud_session import CloudUser, get_session
from app.core.config import settings
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/social/mastodon", tags=["mastodon"])
def _redirect_uri() -> str:
host = settings.AP_HOST or "localhost:8512"
return f"https://{host}/api/v1/social/mastodon/callback"
# In-memory pending state: maps state_token → {instance_url, client_id, client_secret, user_id}
# A real deployment would persist this in a short-TTL cache or DB.
_pending: dict[str, dict] = {}
@router.post("/connect")
async def connect_mastodon(body: dict, session: CloudUser = Depends(get_session)):
"""Start the Mastodon OAuth flow.
Body: {"instance_url": "https://mastodon.social"}
Returns: {"authorize_url": "..."}
"""
import secrets
from app.services.ap.mastodon import build_authorize_url, register_app
instance_url = (body.get("instance_url") or "").strip().rstrip("/")
if not instance_url.startswith("https://"):
raise HTTPException(status_code=422, detail="instance_url must be an https:// URL.")
redirect_uri = _redirect_uri()
try:
app_creds = await asyncio.to_thread(register_app, instance_url, redirect_uri)
except Exception as exc:
raise HTTPException(
status_code=502, detail=f"Could not register with Mastodon instance: {exc}"
) from exc
state = secrets.token_urlsafe(24)
_pending[state] = {
"instance_url": instance_url,
"client_id": app_creds["client_id"],
"client_secret": app_creds["client_secret"],
"user_id": session.user_id,
}
authorize_url = build_authorize_url(
instance_url=instance_url,
client_id=app_creds["client_id"],
redirect_uri=redirect_uri + f"?state={state}",
)
return {"authorize_url": authorize_url, "state": state}
@router.get("/callback")
async def mastodon_callback(code: str | None = None, state: str | None = None):
"""OAuth callback. Exchanges auth code for access token and stores it."""
if not code or not state:
raise HTTPException(status_code=400, detail="Missing code or state parameter.")
pending = _pending.pop(state, None)
if pending is None:
raise HTTPException(status_code=400, detail="Unknown or expired OAuth state.")
from app.services.ap.mastodon import exchange_code, store_token
redirect_uri = _redirect_uri() + f"?state={state}"
try:
access_token = await asyncio.to_thread(
exchange_code,
pending["instance_url"],
pending["client_id"],
pending["client_secret"],
code,
redirect_uri,
)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Token exchange failed: {exc}") from exc
await asyncio.to_thread(
store_token,
settings.DB_PATH,
pending["user_id"],
pending["instance_url"],
access_token,
settings.AP_TOKEN_ENCRYPTION_KEY,
)
# Redirect to frontend settings page after successful connect
return RedirectResponse(url="/#/settings?mastodon=connected", status_code=302)
@router.delete("/disconnect", status_code=204)
async def disconnect_mastodon(session: CloudUser = Depends(get_session)):
"""Remove the stored Mastodon token."""
from app.services.ap.mastodon import delete_token
await asyncio.to_thread(delete_token, settings.DB_PATH, session.user_id)
@router.get("/status")
async def mastodon_status(session: CloudUser = Depends(get_session)):
"""Return connection status and instance URL (no token value)."""
from app.services.ap.mastodon import get_token
result = await asyncio.to_thread(
get_token,
settings.DB_PATH,
session.user_id,
settings.AP_TOKEN_ENCRYPTION_KEY,
)
if result is None:
return {"connected": False, "instance_url": None}
instance_url, _ = result
return {"connected": True, "instance_url": instance_url}

View file

@ -11,7 +11,6 @@ BSL 1.1 -- recipe_scan requires Paid tier or BYOK.
from __future__ import annotations from __future__ import annotations
import asyncio import asyncio
import json as _json
import logging import logging
import uuid import uuid
from pathlib import Path from pathlib import Path
@ -19,7 +18,7 @@ from typing import Annotated
import aiofiles import aiofiles
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
from fastapi.responses import JSONResponse, StreamingResponse from fastapi.responses import JSONResponse
from app.cloud_session import CloudUser, get_session from app.cloud_session import CloudUser, get_session
from app.core.config import settings from app.core.config import settings
@ -169,15 +168,9 @@ async def scan_recipe(
) )
raise HTTPException(status_code=422, detail=msg) raise HTTPException(status_code=422, detail=msg)
except RuntimeError as exc: except RuntimeError as exc:
msg = str(exc)
logger.warning("Recipe scanner unavailable: %s", msg)
raise HTTPException( raise HTTPException(
status_code=503, status_code=503,
detail=( detail=str(exc),
"The recipe scanner is temporarily unavailable — "
"no vision backend could be reached. "
"Try again in a few minutes, or contact support if this persists."
),
) )
return _result_to_response(result) return _result_to_response(result)
@ -191,114 +184,6 @@ async def scan_recipe(
pass pass
# ── SSE scan endpoint ─────────────────────────────────────────────────────────
async def _scan_recipe_sse(saved_paths: list[Path], pantry_names: list[str]):
"""Async generator yielding SSE events for a recipe scan.
Emits progress events while the vision service allocates and runs, then a
final "done" event containing the full recipe payload (same shape as the
ScannedRecipeResponse from POST /scan).
Events:
{"status": "allocating", "message": "..."}
{"status": "scanning", "message": "..."}
{"status": "structuring","message": "..."}
{"status": "done", "recipe": {...}}
{"status": "error", "message": "..."}
"""
queue: asyncio.Queue = asyncio.Queue()
loop = asyncio.get_running_loop()
def _run() -> None:
def cb(status: str, message: str) -> None:
loop.call_soon_threadsafe(queue.put_nowait, {"status": status, "message": message})
try:
from app.services.recipe.recipe_scanner import RecipeScanner
result = RecipeScanner().scan(saved_paths, pantry_names=pantry_names, progress_cb=cb)
recipe_dict = _result_to_response(result).model_dump()
loop.call_soon_threadsafe(queue.put_nowait, {"status": "done", "recipe": recipe_dict})
except ValueError as exc:
loop.call_soon_threadsafe(queue.put_nowait, {"status": "error", "message": str(exc)})
except RuntimeError as exc:
loop.call_soon_threadsafe(queue.put_nowait, {"status": "error", "message": str(exc)})
except Exception as exc:
logger.exception("Unexpected error in recipe scan thread")
loop.call_soon_threadsafe(queue.put_nowait, {"status": "error", "message": "Scan failed unexpectedly."})
scan_task = asyncio.ensure_future(asyncio.to_thread(_run))
try:
while True:
try:
event = await asyncio.wait_for(queue.get(), timeout=180.0)
except asyncio.TimeoutError:
yield f"data: {_json.dumps({'status': 'error', 'message': 'Scan timed out after 3 minutes.'})}\n\n"
break
yield f"data: {_json.dumps(event)}\n\n"
if event["status"] in ("done", "error"):
break
finally:
if not scan_task.done():
scan_task.cancel()
@router.post("/scan/stream")
async def scan_recipe_stream(
files: Annotated[list[UploadFile], File(...)],
store: Store = Depends(get_store),
session: CloudUser = Depends(get_session),
):
"""Scan recipe photos and stream SSE progress events during model load.
Use this endpoint instead of POST /scan when you need live feedback during
cold-start model loading (first request after a GPU-idle period can take
30-60 seconds for cf-docuvision to warm up).
Tier: Paid (or BYOK) same gate as POST /scan.
"""
if not can_use("recipe_scan", session.tier, session.has_byok):
raise HTTPException(
status_code=403,
detail=(
"Recipe scanning requires Paid tier or a configured vision backend (BYOK). "
"Set ANTHROPIC_API_KEY or connect to a cf-orch vision service."
),
)
if not files:
raise HTTPException(status_code=422, detail="At least one image file is required.")
if len(files) > 4:
raise HTTPException(status_code=422, detail="Maximum 4 images per scan request.")
for f in files:
ct = (f.content_type or "").lower()
if ct and ct not in _ALLOWED_MIME_TYPES:
raise HTTPException(
status_code=422,
detail=f"Unsupported file type: {ct}. Supported: JPEG, PNG, WebP, HEIC.",
)
saved_paths: list[Path] = []
for f in files:
saved_paths.append(await _save_upload_temp(f))
inventory = await asyncio.to_thread(store.list_inventory)
pantry_names = [item["product_name"] for item in inventory if item.get("product_name")]
async def generate():
try:
async for chunk in _scan_recipe_sse(saved_paths, pantry_names):
yield chunk
finally:
for p in saved_paths:
try:
p.unlink(missing_ok=True)
except Exception:
pass
return StreamingResponse(generate(), media_type="text/event-stream")
# ── Save endpoint ────────────────────────────────────────────────────────────── # ── Save endpoint ──────────────────────────────────────────────────────────────
@router.post("/scan/save", response_model=UserRecipeResponse, status_code=201) @router.post("/scan/save", response_model=UserRecipeResponse, status_code=201)

View file

@ -6,9 +6,7 @@ import logging
from pathlib import Path from pathlib import Path
from typing import Annotated from typing import Annotated
import json as _json_mod
from fastapi import APIRouter, Depends, HTTPException, Query from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi.responses import StreamingResponse
from app.cloud_session import CloudUser, _auth_label, get_session from app.cloud_session import CloudUser, _auth_label, get_session
@ -16,9 +14,6 @@ log = logging.getLogger(__name__)
from app.db.session import get_store from app.db.session import get_store
from app.db.store import Store from app.db.store import Store
from app.models.schemas.recipe import ( from app.models.schemas.recipe import (
AskRequest,
AskResponse,
AskRecipeHit,
AssemblyTemplateOut, AssemblyTemplateOut,
BuildRequest, BuildRequest,
LeftoversResponse, LeftoversResponse,
@ -108,39 +103,6 @@ def _build_stream_prompt(db_path: Path, level: int) -> str:
store.close() store.close()
async def _stream_recipe_sse(db_path: Path, req: RecipeRequest):
"""Async generator that yields SSE events for a streaming recipe request.
Phase 1 (thread): classify pantry items using a temporary Store.
Phase 2 (async): stream tokens from LLM via LLMRecipeGenerator.stream_generate().
"""
def _prep(db_path: Path) -> tuple[list, list[str]]:
from app.services.recipe.element_classifier import IngredientClassifier
store = Store(db_path)
try:
classifier = IngredientClassifier(store)
profiles = classifier.classify_batch(req.pantry_items)
gaps = classifier.identify_gaps(profiles)
return profiles, gaps
finally:
store.close()
try:
profiles, gaps = await asyncio.to_thread(_prep, db_path)
except Exception as exc:
yield f"data: {_json_mod.dumps({'error': str(exc)})}\n\n"
return
from app.services.recipe.llm_recipe import LLMRecipeGenerator
gen = LLMRecipeGenerator(None)
try:
async for token in gen.stream_generate(req, profiles, gaps):
yield f"data: {_json_mod.dumps({'chunk': token})}\n\n"
yield f"data: {_json_mod.dumps({'done': True})}\n\n"
except Exception as exc:
yield f"data: {_json_mod.dumps({'error': str(exc)})}\n\n"
async def _enqueue_recipe_job(session: CloudUser, req: RecipeRequest): async def _enqueue_recipe_job(session: CloudUser, req: RecipeRequest):
"""Queue an async recipe_llm job and return 202 with job_id. """Queue an async recipe_llm job and return 202 with job_id.
@ -182,7 +144,6 @@ async def _enqueue_recipe_job(session: CloudUser, req: RecipeRequest):
async def suggest_recipes( async def suggest_recipes(
req: RecipeRequest, req: RecipeRequest,
async_mode: bool = Query(default=False, alias="async"), async_mode: bool = Query(default=False, alias="async"),
stream: bool = Query(default=False),
session: CloudUser = Depends(get_session), session: CloudUser = Depends(get_session),
store: Store = Depends(get_store), store: Store = Depends(get_store),
): ):
@ -218,13 +179,6 @@ async def suggest_recipes(
req = req.model_copy(update={"level": 2}) req = req.model_copy(update={"level": 2})
orch_fallback = True orch_fallback = True
if stream and req.level in (3, 4):
return StreamingResponse(
_stream_recipe_sse(session.db, req),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
)
if req.level in (3, 4) and async_mode: if req.level in (3, 4) and async_mode:
return await _enqueue_recipe_job(session, req) return await _enqueue_recipe_job(session, req)
@ -600,137 +554,6 @@ async def build_recipe(
return result return result
_ASK_STOPWORDS: frozenset[str] = frozenset({
"what", "can", "make", "with", "have", "some", "the", "and", "for",
"that", "this", "these", "those", "how", "about", "are", "there",
"give", "show", "find", "want", "need", "like", "any", "good",
"quick", "easy", "simple", "fast", "using", "use", "from", "into",
"more", "much", "just", "only", "my", "please", "could", "would",
"should", "something", "anything", "everything", "ideas", "idea",
"suggest", "meal", "food", "dish", "dishes", "today", "tonight",
"tomorrow", "now", "here", "there", "recipes", "recipe", "dinner",
"lunch", "breakfast", "snack", "under", "minutes", "hours", "time",
"left", "over", "also", "some", "make", "cook", "made", "cooked",
})
import re as _re
def _extract_ask_keywords(question: str) -> list[str]:
"""Extract food-relevant keywords from a natural language question."""
tokens = _re.findall(r"[a-zA-Z]+", question.lower())
return [t for t in tokens if len(t) > 3 and t not in _ASK_STOPWORDS]
def _ask_in_thread(db_path: Path, question: str, pantry_items: list[str]) -> AskResponse:
"""Run Ask logic in a worker thread.
Free tier: keyword extraction + FTS ingredient search.
Paid tier path: same search, then LLM synthesis over results.
The caller handles tier gating and LLM synthesis outside this thread
to avoid importing LLMRouter in a sync context.
"""
import json as _json
store = Store(db_path)
try:
keywords = _extract_ask_keywords(question)
ingredient_hits: list[dict] = []
if keywords:
ingredient_hits = store.search_recipes_by_ingredients(keywords, limit=15)
# Also search by title using the full question text as a substring hint.
# browse_recipes q= does title LIKE %q%. Extract the longest keyword
# from the question as the title probe (most likely to appear in a title).
title_hits: list[dict] = []
title_probe = max(keywords, key=len) if keywords else None
if title_probe:
browse_result = store.browse_recipes(
keywords=None,
page=1,
page_size=12,
pantry_items=pantry_items or None,
q=title_probe,
sort="match" if pantry_items else "default",
)
title_hits = browse_result.get("recipes", [])
# Merge by ID; ingredient hits come first (more semantically relevant).
seen: set[int] = set()
merged: list[dict] = []
for row in ingredient_hits + title_hits:
rid = row.get("id")
if rid is not None and rid not in seen:
seen.add(rid)
merged.append(row)
# Compute pantry match_pct if caller sent pantry items.
pantry_set = {p.lower() for p in pantry_items} if pantry_items else set()
hits: list[AskRecipeHit] = []
for row in merged[:12]:
match_pct: float | None = None
if pantry_set:
raw_names = row.get("ingredient_names") or []
if isinstance(raw_names, str):
try:
raw_names = _json.loads(raw_names)
except Exception:
raw_names = []
if raw_names:
covered = sum(
1 for n in raw_names
if any(p in n.lower() for p in pantry_set)
)
match_pct = round(covered / len(raw_names), 2)
hits.append(AskRecipeHit(
id=row["id"],
title=row.get("title", ""),
category=row.get("category"),
match_pct=match_pct,
))
return AskResponse(answer=None, recipes=hits, tier="free")
finally:
store.close()
@router.post("/ask", response_model=AskResponse)
async def ask_recipes(
req: AskRequest,
session: CloudUser = Depends(get_session),
) -> AskResponse:
"""Natural-language recipe search with optional LLM synthesis.
Free tier: keyword extraction from question FTS ingredient + title search.
Paid tier / BYOK: same search, then LLM synthesizes a short conversational answer.
"""
result = await asyncio.to_thread(_ask_in_thread, session.db, req.question, req.pantry_items)
# LLM synthesis: only for paid/premium/ultra tiers, not "local" dev tier.
# Wrapped in wait_for so an unresponsive model degrades gracefully to recipe list only.
paid_tier = session.tier in ("paid", "premium", "ultra")
if (paid_tier or session.has_byok) and result.recipes:
recipe_titles = ", ".join(r.title for r in result.recipes[:6])
prompt = (
f'You are a helpful kitchen assistant. The user asked: "{req.question}"\n\n'
f"Matching recipes: {recipe_titles}\n\n"
f"Write a brief, friendly 12 sentence response suggesting which of these "
f"recipes might best fit the question. Be specific and natural."
)
try:
from circuitforge_core.llm.router import LLMRouter
answer = await asyncio.wait_for(
asyncio.to_thread(LLMRouter().complete, prompt),
timeout=8.0,
)
result = result.model_copy(update={"answer": answer.strip() or None, "tier": "paid"})
except (Exception, asyncio.TimeoutError) as exc:
log.warning("Ask LLM synthesis skipped: %s", exc)
return result
@router.get("/{recipe_id}") @router.get("/{recipe_id}")
async def get_recipe(recipe_id: int, session: CloudUser = Depends(get_session)) -> dict: async def get_recipe(recipe_id: int, session: CloudUser = Depends(get_session)) -> dict:
def _get(db_path: Path, rid: int) -> dict | None: def _get(db_path: Path, rid: int) -> dict | None:

View file

@ -2,7 +2,6 @@ from fastapi import APIRouter
from app.api.endpoints import health, receipts, export, inventory, ocr, recipes, settings, staples, feedback, feedback_attach, household, saved_recipes, imitate, meal_plans, orch_usage, session, shopping from app.api.endpoints import health, receipts, export, inventory, ocr, recipes, settings, staples, feedback, feedback_attach, household, saved_recipes, imitate, meal_plans, orch_usage, session, shopping
from app.api.endpoints.community import router as community_router from app.api.endpoints.community import router as community_router
from app.api.endpoints.corrections import router as corrections_router from app.api.endpoints.corrections import router as corrections_router
from app.api.endpoints.mastodon_oauth import router as mastodon_router
from app.api.endpoints.recipe_scan import router as recipe_scan_router from app.api.endpoints.recipe_scan import router as recipe_scan_router
from app.api.endpoints.recipe_tags import router as recipe_tags_router from app.api.endpoints.recipe_tags import router as recipe_tags_router
@ -31,4 +30,3 @@ api_router.include_router(shopping.router, prefix="/shopping", tags=
api_router.include_router(community_router) api_router.include_router(community_router)
api_router.include_router(recipe_tags_router) api_router.include_router(recipe_tags_router)
api_router.include_router(corrections_router, prefix="/corrections", tags=["corrections"]) api_router.include_router(corrections_router, prefix="/corrections", tags=["corrections"])
api_router.include_router(mastodon_router)

View file

@ -65,24 +65,9 @@ class Settings:
# Quality # Quality
MIN_QUALITY_SCORE: float = float(os.environ.get("MIN_QUALITY_SCORE", "50.0")) MIN_QUALITY_SCORE: float = float(os.environ.get("MIN_QUALITY_SCORE", "50.0"))
# CF-core resource coordinator (VRAM lease management — lease broker, not inference) # CF-core resource coordinator (VRAM lease management)
COORDINATOR_URL: str = os.environ.get("COORDINATOR_URL", "http://localhost:7700") COORDINATOR_URL: str = os.environ.get("COORDINATOR_URL", "http://localhost:7700")
# GPU inference server URL
# Priority: GPU_SERVER_URL env var → CF_ORCH_URL env var (backward compat)
# → https://orch.circuitforge.tech when CF_LICENSE_KEY is present (Paid+)
# Resolved value is written back to os.environ["CF_ORCH_URL"] at startup so
# all service-layer callers that read CF_ORCH_URL directly see the right URL.
GPU_SERVER_URL: str | None = (
os.environ.get("GPU_SERVER_URL")
or os.environ.get("CF_ORCH_URL")
or (
"https://orch.circuitforge.tech"
if os.environ.get("CF_LICENSE_KEY")
else None
)
)
# Hosted cf-orch coordinator — bearer token for managed cloud GPU inference (Paid+) # Hosted cf-orch coordinator — bearer token for managed cloud GPU inference (Paid+)
# CFOrchClient reads CF_LICENSE_KEY automatically; exposed here for startup validation. # CFOrchClient reads CF_LICENSE_KEY automatically; exposed here for startup validation.
CF_LICENSE_KEY: str | None = os.environ.get("CF_LICENSE_KEY") CF_LICENSE_KEY: str | None = os.environ.get("CF_LICENSE_KEY")
@ -91,17 +76,6 @@ class Settings:
# runs don't pollute session counts. Set to the Directus UUID of the test user. # runs don't pollute session counts. Set to the Directus UUID of the test user.
E2E_TEST_USER_ID: str | None = os.environ.get("E2E_TEST_USER_ID") or None E2E_TEST_USER_ID: str | None = os.environ.get("E2E_TEST_USER_ID") or None
# ActivityPub federation (optional; disabled by default)
AP_ENABLED: bool = os.environ.get("AP_ENABLED", "false").lower() in ("1", "true", "yes")
AP_HOST: str = os.environ.get("AP_HOST", "") # e.g. kiwi.circuitforge.tech
CLOUD_DATA_ROOT: Path = Path(os.environ.get("CLOUD_DATA_ROOT", "/devl/kiwi-cloud-data"))
AP_KEY_PATH: Path = Path(
os.environ.get("AP_KEY_PATH", str(CLOUD_DATA_ROOT / "ap_keys" / "instance.pem"))
)
# Fernet key for Mastodon access token encryption (base64-urlsafe, 32 bytes)
# Leave unset to skip encryption (dev only)
AP_TOKEN_ENCRYPTION_KEY: str | None = os.environ.get("AP_TOKEN_ENCRYPTION_KEY") or None
# Feature flags # Feature flags
ENABLE_OCR: bool = os.environ.get("ENABLE_OCR", "false").lower() in ("1", "true", "yes") ENABLE_OCR: bool = os.environ.get("ENABLE_OCR", "false").lower() in ("1", "true", "yes")
# Use OrchestratedScheduler (coordinator-aware, multi-GPU fan-out) instead of # Use OrchestratedScheduler (coordinator-aware, multi-GPU fan-out) instead of
@ -123,9 +97,3 @@ class Settings:
settings = Settings() settings = Settings()
# Normalise GPU_SERVER_URL into CF_ORCH_URL so every service-layer caller that
# reads os.environ.get("CF_ORCH_URL") sees the resolved value, including the
# Paid+ cloud default injected above.
if settings.GPU_SERVER_URL:
os.environ["CF_ORCH_URL"] = settings.GPU_SERVER_URL

View file

@ -1,47 +0,0 @@
-- 042_activitypub.sql
-- ActivityPub federation tables: follower registry, delivery log, dedup, Mastodon tokens.
-- Follower registry: AP actors that Follow this Kiwi instance
CREATE TABLE IF NOT EXISTS ap_followers (
id INTEGER PRIMARY KEY,
actor_id TEXT NOT NULL UNIQUE, -- AP actor URL
inbox_url TEXT NOT NULL,
shared_inbox TEXT,
followed_at TEXT NOT NULL DEFAULT (datetime('now')),
active INTEGER NOT NULL DEFAULT 1
);
CREATE INDEX IF NOT EXISTS idx_ap_followers_active
ON ap_followers (active) WHERE active = 1;
-- Outgoing delivery log: one row per (post_slug, target_inbox) attempt
CREATE TABLE IF NOT EXISTS ap_deliveries (
id INTEGER PRIMARY KEY,
post_slug TEXT NOT NULL,
target_inbox TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending', -- pending | delivered | failed
attempts INTEGER NOT NULL DEFAULT 0,
last_error TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
delivered_at TEXT
);
CREATE INDEX IF NOT EXISTS idx_ap_deliveries_status
ON ap_deliveries (status) WHERE status != 'delivered';
-- Incoming activity dedup: prevents replay attacks and double-processing
CREATE TABLE IF NOT EXISTS ap_received (
activity_id TEXT PRIMARY KEY,
received_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-- Mastodon OAuth tokens: per-user, encrypted at rest
-- Stored in the user's local kiwi.db (CLOUD_MODE: per-user DB tree)
CREATE TABLE IF NOT EXISTS mastodon_tokens (
id INTEGER PRIMARY KEY,
directus_user_id TEXT NOT NULL UNIQUE,
instance_url TEXT NOT NULL,
access_token TEXT NOT NULL, -- Fernet-encrypted when AP_TOKEN_ENCRYPTION_KEY set
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);

View file

@ -43,11 +43,6 @@ async def _browse_counts_refresh_loop(corpus_path: str) -> None:
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
logger.info("Starting Kiwi API...") logger.info("Starting Kiwi API...")
settings.ensure_dirs() settings.ensure_dirs()
# Run DB migrations at startup (ensures all tables exist before any request)
from app.db.store import Store
_s = Store(settings.DB_PATH)
_s.close()
register_kiwi_programs() register_kiwi_programs()
# Start LLM background task scheduler # Start LLM background task scheduler
@ -59,14 +54,6 @@ async def lifespan(app: FastAPI):
from app.api.endpoints.community import init_community_store from app.api.endpoints.community import init_community_store
init_community_store(settings.COMMUNITY_DB_URL) init_community_store(settings.COMMUNITY_DB_URL)
# Initialize ActivityPub instance actor (no-op when AP_ENABLED=false)
if settings.AP_ENABLED and settings.AP_HOST:
try:
from app.services.ap.keys import init_actor
init_actor(host=settings.AP_HOST, key_path=settings.AP_KEY_PATH)
except Exception as _ap_exc:
logger.warning("AP init failed (AP features disabled): %s", _ap_exc)
# Browse counts cache — warm in-memory cache from disk, refresh if stale. # Browse counts cache — warm in-memory cache from disk, refresh if stale.
# Uses the corpus path the store will attach to at request time. # Uses the corpus path the store will attach to at request time.
corpus_path = os.environ.get("RECIPE_DB_PATH", str(settings.DB_PATH)) corpus_path = os.environ.get("RECIPE_DB_PATH", str(settings.DB_PATH))
@ -114,11 +101,6 @@ app.add_middleware(
app.include_router(api_router, prefix=settings.API_PREFIX) app.include_router(api_router, prefix=settings.API_PREFIX)
# AP endpoints: WebFinger at root (not under /api/v1), AP objects under /ap
from app.api.endpoints.activitypub import ap_router, webfinger_router
app.include_router(webfinger_router)
app.include_router(ap_router)
@app.get("/") @app.get("/")
async def root(): async def root():

View file

View file

@ -1,306 +0,0 @@
"""Kiwi MCP Server — read-only corpus DB access for tag/keyword audits.
Exposes four tools to Claude:
kiwi_query_corpus run a read-only SQL query against the corpus DB
kiwi_count_fts run an FTS5 MATCH expression and return row count
kiwi_sample_tags return tag frequency distribution by prefix
kiwi_browse_preview call the browse endpoint and return first-page results
Run with:
python -m app.mcp.server
(from /Library/Development/CircuitForge/kiwi with cf conda env active)
Configure in Claude Code ~/.claude/settings.json mcpServers:
"kiwi": {
"command": "/devl/miniconda3/envs/cf/bin/python",
"args": ["-m", "app.mcp.server"],
"cwd": "/Library/Development/CircuitForge/kiwi",
"env": {
"KIWI_DB_PATH": "/Library/Development/CircuitForge/kiwi/data/kiwi.db",
"KIWI_API_URL": "http://localhost:8512"
}
}
"""
from __future__ import annotations
import asyncio
import json
import os
import sqlite3
from pathlib import Path
import httpx
from mcp.server import Server
from mcp.server.stdio import stdio_server
from mcp.types import TextContent, Tool
_DB_PATH = os.environ.get(
"KIWI_DB_PATH",
str(Path(__file__).parents[3] / "data" / "kiwi.db"),
)
_API_URL = os.environ.get("KIWI_API_URL", "http://localhost:8512")
_TIMEOUT = 30.0
_QUERY_ROW_LIMIT = 200
server = Server("kiwi")
def _open_ro() -> sqlite3.Connection:
"""Open the corpus DB in read-only mode."""
uri = f"file:///{Path(_DB_PATH).as_posix()}?mode=ro"
conn = sqlite3.connect(uri, uri=True, check_same_thread=False)
conn.row_factory = sqlite3.Row
return conn
@server.list_tools()
async def list_tools() -> list[Tool]:
return [
Tool(
name="kiwi_query_corpus",
description=(
"Run a read-only SQL SELECT query against the Kiwi corpus DB (kiwi.db). "
"Returns up to 200 rows as a JSON array. "
"Key tables: recipes (id, title, ingredient_names, inferred_tags, source_url), "
"recipes_fts (FTS5 virtual table for full-text search), "
"ingredient_profiles (name, elements, texture_profile). "
"Use for schema exploration, spot-checking tag coverage, and counting results. "
"Read-only — any write statement will be rejected by SQLite."
),
inputSchema={
"type": "object",
"required": ["sql"],
"properties": {
"sql": {
"type": "string",
"description": (
"A SELECT statement. E.g.: "
"SELECT title, inferred_tags FROM recipes WHERE inferred_tags LIKE '%vegan%' LIMIT 10"
),
},
},
},
),
Tool(
name="kiwi_count_fts",
description=(
"Run an FTS5 MATCH expression against the recipes_fts table and return the hit count. "
"Useful for quickly auditing keyword coverage without a full query. "
"Always double-quote all terms in MATCH expressions. "
"E.g. match_expr='\"tofu\" OR \"tempeh\"' returns how many recipes include either."
),
inputSchema={
"type": "object",
"required": ["match_expr"],
"properties": {
"match_expr": {
"type": "string",
"description": (
"FTS5 MATCH expression string (without the MATCH keyword). "
'E.g. \'"lentil" OR "chickpea"\' or \'"pasta" AND "vegetarian"\''
),
},
},
},
),
Tool(
name="kiwi_sample_tags",
description=(
"Return tag frequency distribution from the corpus. "
"Queries inferred_tags column for tags matching the given prefix pattern. "
"Useful for auditing how well a category keyword set covers the corpus, "
"or discovering what tags exist under a domain (cuisine:, meal:, dietary:, texture:)."
),
inputSchema={
"type": "object",
"properties": {
"prefix": {
"type": "string",
"default": "",
"description": (
"Tag prefix to filter by. E.g. 'cuisine:' returns all cuisine tags, "
"'meal:' returns all meal type tags, '' returns all tags. "
"Returns top 50 by frequency."
),
},
"limit": {
"type": "integer",
"default": 50,
"description": "Max number of tag entries to return (default 50, max 200).",
},
},
},
),
Tool(
name="kiwi_browse_preview",
description=(
"Call the Kiwi browse endpoint and return first-page results. "
"Use to verify that a domain/category returns the expected recipes "
"after a keyword or tag change, without opening the browser. "
"Returns recipe titles, match counts, and total result count."
),
inputSchema={
"type": "object",
"required": ["domain", "category"],
"properties": {
"domain": {
"type": "string",
"description": (
"Browse domain slug. "
"Known domains: cuisine, meal_type, dietary, ingredient, occasion, texture."
),
},
"category": {
"type": "string",
"description": "Category slug within the domain, e.g. 'italian', 'breakfast', 'vegan'.",
},
"subcategory": {
"type": "string",
"default": "",
"description": "Optional subcategory slug to narrow further.",
},
"page_size": {
"type": "integer",
"default": 10,
"description": "Results per page (default 10, max 50).",
},
},
},
),
]
@server.call_tool()
async def call_tool(name: str, arguments: dict) -> list[TextContent]:
if name == "kiwi_query_corpus":
return await _query_corpus(arguments)
if name == "kiwi_count_fts":
return await _count_fts(arguments)
if name == "kiwi_sample_tags":
return await _sample_tags(arguments)
if name == "kiwi_browse_preview":
return await _browse_preview(arguments)
return [TextContent(type="text", text=f"Unknown tool: {name}")]
async def _query_corpus(args: dict) -> list[TextContent]:
sql = args.get("sql", "").strip()
if not sql.upper().startswith("SELECT"):
return [TextContent(type="text", text="Error: only SELECT statements are allowed.")]
def _run() -> list[dict]:
conn = _open_ro()
try:
cur = conn.execute(sql)
rows = cur.fetchmany(_QUERY_ROW_LIMIT)
return [dict(r) for r in rows]
finally:
conn.close()
try:
rows = await asyncio.get_event_loop().run_in_executor(None, _run)
return [TextContent(type="text", text=json.dumps(rows, indent=2, default=str))]
except Exception as exc:
return [TextContent(type="text", text=f"Query error: {exc}")]
async def _count_fts(args: dict) -> list[TextContent]:
match_expr = args.get("match_expr", "").strip()
if not match_expr:
return [TextContent(type="text", text="Error: match_expr is required.")]
def _run() -> int:
conn = _open_ro()
try:
cur = conn.execute(
"SELECT COUNT(*) FROM recipes_fts WHERE recipes_fts MATCH ?",
(match_expr,),
)
return cur.fetchone()[0]
finally:
conn.close()
try:
count = await asyncio.get_event_loop().run_in_executor(None, _run)
return [TextContent(type="text", text=json.dumps({"match_expr": match_expr, "count": count}))]
except Exception as exc:
return [TextContent(type="text", text=f"FTS error: {exc}")]
async def _sample_tags(args: dict) -> list[TextContent]:
prefix = args.get("prefix", "")
limit = min(int(args.get("limit", 50)), _QUERY_ROW_LIMIT)
def _run() -> list[dict]:
conn = _open_ro()
try:
# Split inferred_tags (comma or space separated) and count each tag
sql = """
WITH tag_rows AS (
SELECT trim(value) AS tag
FROM recipes, json_each('["' || replace(replace(inferred_tags, ', ', '","'), ',', '","') || '"]')
WHERE inferred_tags IS NOT NULL AND inferred_tags != ''
)
SELECT tag, COUNT(*) AS frequency
FROM tag_rows
WHERE tag LIKE ? AND tag != ''
GROUP BY tag
ORDER BY frequency DESC
LIMIT ?
"""
pattern = f"{prefix}%" if prefix else "%"
cur = conn.execute(sql, (pattern, limit))
return [{"tag": r["tag"], "frequency": r["frequency"]} for r in cur.fetchall()]
finally:
conn.close()
try:
tags = await asyncio.get_event_loop().run_in_executor(None, _run)
return [TextContent(type="text", text=json.dumps({"prefix": prefix, "tags": tags}, indent=2))]
except Exception as exc:
return [TextContent(type="text", text=f"Tag query error: {exc}")]
async def _browse_preview(args: dict) -> list[TextContent]:
domain = args.get("domain", "")
category = args.get("category", "")
subcategory = args.get("subcategory", "")
page_size = min(int(args.get("page_size", 10)), 50)
params: dict = {"page": 1, "page_size": page_size}
if subcategory:
params["subcategory"] = subcategory
async with httpx.AsyncClient(timeout=_TIMEOUT) as client:
try:
resp = await client.get(
f"{_API_URL}/api/v1/recipes/browse/{domain}/{category}",
params=params,
)
resp.raise_for_status()
except Exception as exc:
return [TextContent(type="text", text=f"Browse error: {exc}")]
data = resp.json()
summary = {
"domain": domain,
"category": category,
"subcategory": subcategory or None,
"total": data.get("total", 0),
"page_size": page_size,
"titles": [r.get("title", "") for r in data.get("recipes", [])],
}
return [TextContent(type="text", text=json.dumps(summary, indent=2))]
async def _main() -> None:
async with stdio_server() as (read_stream, write_stream):
await server.run(
read_stream,
write_stream,
server.create_initialization_options(),
)
if __name__ == "__main__":
asyncio.run(_main())

View file

@ -137,8 +137,7 @@ class RecipeRequest(BaseModel):
pantry_match_only: bool = False # when True, only return recipes with zero missing ingredients pantry_match_only: bool = False # when True, only return recipes with zero missing ingredients
complexity_filter: str | None = None # 'easy' | 'moderate' | 'involved' — None = any complexity_filter: str | None = None # 'easy' | 'moderate' | 'involved' — None = any
max_time_min: int | None = None # filter by estimated cooking time ceiling max_time_min: int | None = None # filter by estimated cooking time ceiling
max_total_min: int | None = None # filter by parsed total time (active + passive) max_total_min: int | None = None # filter by parsed total time from recipe directions
max_active_min: int | None = None # filter by hands-on active time only
unit_system: str = "metric" # "metric" | "imperial" unit_system: str = "metric" # "metric" | "imperial"
@ -206,24 +205,3 @@ class StreamTokenResponse(BaseModel):
stream_url: str stream_url: str
token: str token: str
expires_in_s: int expires_in_s: int
class AskRequest(BaseModel):
"""Request body for POST /recipes/ask."""
question: str = Field(min_length=1, max_length=500)
pantry_items: list[str] = Field(default_factory=list)
class AskRecipeHit(BaseModel):
"""A single recipe result from the Ask endpoint."""
id: int
title: str
match_pct: float | None = None
category: str | None = None
class AskResponse(BaseModel):
"""Response from POST /recipes/ask."""
answer: str | None = None # LLM-synthesized response (Paid tier only)
recipes: list[AskRecipeHit]
tier: str

View file

@ -1,115 +0,0 @@
# app/services/ap/delivery.py
# MIT License
from __future__ import annotations
import logging
import time
from datetime import datetime, timezone
from pathlib import Path
from circuitforge_core.activitypub import deliver_activity
from app.services.ap.keys import get_actor
logger = logging.getLogger(__name__)
_RETRIES = 3
_BACKOFF = [1.0, 4.0, 16.0]
def deliver_to_followers(post_slug: str, activity: dict, db_path: Path) -> None:
"""Deliver an AP activity to all active followers. Called as a background task.
Retries each inbox up to 3 times with exponential backoff.
Logs each attempt to ap_deliveries in the local kiwi.db.
"""
actor = get_actor()
if actor is None:
return
import sqlite3
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
try:
followers = conn.execute(
"SELECT inbox_url, shared_inbox FROM ap_followers WHERE active = 1"
).fetchall()
finally:
conn.close()
# Deduplicate by shared_inbox where available
inboxes: set[str] = set()
for row in followers:
inbox = row["shared_inbox"] or row["inbox_url"]
inboxes.add(inbox)
for inbox_url in inboxes:
_deliver_with_retry(post_slug=post_slug, activity=activity, inbox_url=inbox_url, db_path=db_path)
def _deliver_with_retry(
post_slug: str,
activity: dict,
inbox_url: str,
db_path: Path,
) -> None:
actor = get_actor()
if actor is None:
return
import sqlite3
conn = sqlite3.connect(str(db_path))
try:
conn.execute(
"INSERT OR IGNORE INTO ap_deliveries (post_slug, target_inbox, status) VALUES (?,?,?)",
(post_slug, inbox_url, "pending"),
)
conn.commit()
finally:
conn.close()
last_error: str | None = None
for attempt, delay in enumerate(_BACKOFF[:_RETRIES]):
try:
resp = deliver_activity(activity=activity, inbox_url=inbox_url, actor=actor, timeout=10.0)
if resp.status_code < 300:
_update_delivery(db_path, post_slug, inbox_url, "delivered", None)
return
last_error = f"HTTP {resp.status_code}"
except Exception as exc:
last_error = str(exc)[:200]
if attempt < _RETRIES - 1:
time.sleep(delay)
_update_delivery(db_path, post_slug, inbox_url, "failed", last_error)
logger.warning("AP delivery failed after %d attempts to %s: %s", _RETRIES, inbox_url, last_error)
def _update_delivery(
db_path: Path,
post_slug: str,
inbox_url: str,
status: str,
error: str | None,
) -> None:
import sqlite3
now = datetime.now(timezone.utc).isoformat()
conn = sqlite3.connect(str(db_path))
try:
if status == "delivered":
conn.execute(
"""UPDATE ap_deliveries SET status=?, attempts=attempts+1, delivered_at=?
WHERE post_slug=? AND target_inbox=?""",
(status, now, post_slug, inbox_url),
)
else:
conn.execute(
"""UPDATE ap_deliveries SET status=?, attempts=attempts+1, last_error=?
WHERE post_slug=? AND target_inbox=?""",
(status, error, post_slug, inbox_url),
)
conn.commit()
finally:
conn.close()

View file

@ -1,48 +0,0 @@
# app/services/ap/keys.py
# MIT License
from __future__ import annotations
import logging
from pathlib import Path
from circuitforge_core.activitypub import CFActor, generate_rsa_keypair, load_actor_from_key_file
logger = logging.getLogger(__name__)
_actor: CFActor | None = None
def get_actor() -> CFActor | None:
"""Return the loaded instance actor, or None if AP is not enabled."""
return _actor
def init_actor(host: str, key_path: Path) -> CFActor:
"""Load or generate the instance RSA keypair and build the CFActor singleton.
Called once at startup when AP_ENABLED=true. Generates a new 2048-bit keypair
if the key file does not yet exist (first boot).
"""
global _actor
key_path.parent.mkdir(parents=True, exist_ok=True)
if not key_path.exists():
logger.info("AP: no key file found at %s — generating new RSA-2048 keypair", key_path)
private_pem, _pub = generate_rsa_keypair(bits=2048)
key_path.write_text(private_pem, encoding="utf-8")
key_path.chmod(0o600)
base = f"https://{host}"
actor_id = f"{base}/ap/actor"
_actor = load_actor_from_key_file(
actor_id=actor_id,
username="kiwi",
display_name="Kiwi Pantry",
private_key_path=str(key_path),
summary="Community pantry and recipe feed from a Kiwi instance.",
)
logger.info("AP: instance actor loaded — %s", actor_id)
return _actor

View file

@ -1,194 +0,0 @@
# app/services/ap/mastodon.py
# MIT License
from __future__ import annotations
import logging
from pathlib import Path
import httpx
logger = logging.getLogger(__name__)
_APP_SCOPES = "write:statuses"
_APP_NAME = "Kiwi Pantry"
_APP_WEBSITE = "https://circuitforge.tech/kiwi"
def register_app(instance_url: str, redirect_uri: str) -> dict:
"""Dynamically register Kiwi as an OAuth app on the user's Mastodon instance.
Returns the app credentials dict (client_id, client_secret, etc.).
Raises httpx.HTTPError on failure.
"""
url = instance_url.rstrip("/") + "/api/v1/apps"
resp = httpx.post(
url,
data={
"client_name": _APP_NAME,
"redirect_uris": redirect_uri,
"scopes": _APP_SCOPES,
"website": _APP_WEBSITE,
},
timeout=10.0,
)
resp.raise_for_status()
return resp.json()
def build_authorize_url(instance_url: str, client_id: str, redirect_uri: str) -> str:
"""Return the OAuth authorize URL to redirect the user to."""
return (
f"{instance_url.rstrip('/')}/oauth/authorize"
f"?response_type=code"
f"&client_id={client_id}"
f"&redirect_uri={redirect_uri}"
f"&scope={_APP_SCOPES}"
)
def exchange_code(
instance_url: str,
client_id: str,
client_secret: str,
code: str,
redirect_uri: str,
) -> str:
"""Exchange an authorization code for an access token. Returns the token string."""
url = instance_url.rstrip("/") + "/oauth/token"
resp = httpx.post(
url,
data={
"grant_type": "authorization_code",
"client_id": client_id,
"client_secret": client_secret,
"redirect_uri": redirect_uri,
"code": code,
"scope": _APP_SCOPES,
},
timeout=10.0,
)
resp.raise_for_status()
return resp.json()["access_token"]
def post_status(instance_url: str, access_token: str, content: str) -> dict:
"""Post a status to the user's Mastodon account. Returns the status response dict."""
url = instance_url.rstrip("/") + "/api/v1/statuses"
resp = httpx.post(
url,
headers={"Authorization": f"Bearer {access_token}"},
json={"status": content, "visibility": "public"},
timeout=15.0,
)
resp.raise_for_status()
return resp.json()
def build_post_content(post: dict) -> str:
"""Format a community post dict as Mastodon-ready plain text."""
title = post.get("title") or "Untitled"
recipe = post.get("recipe_name")
notes = post.get("outcome_notes") or post.get("description")
tags_raw: list[str] = post.get("dietary_tags") or []
lines = []
if recipe and recipe != title:
lines.append(f"🍽 {title}{recipe}")
else:
lines.append(f"🍽 {title}")
if notes:
snippet = notes[:200].strip()
if len(notes) > 200:
snippet += ""
lines.append(f"\n{snippet}")
hashtags = ["#Kiwi", "#Cooking"]
for tag in tags_raw[:3]:
ht = "#" + "".join(w.capitalize() for w in tag.replace("-", " ").split())
hashtags.append(ht)
lines.append("\n" + " ".join(hashtags))
return "\n".join(lines)
def store_token(
db_path: Path,
directus_user_id: str,
instance_url: str,
access_token: str,
encryption_key: str | None,
) -> None:
"""Persist a Mastodon access token in the user's local kiwi.db."""
token_to_store = _encrypt(access_token, encryption_key)
import sqlite3
conn = sqlite3.connect(str(db_path))
try:
conn.execute(
"""INSERT INTO mastodon_tokens (directus_user_id, instance_url, access_token)
VALUES (?, ?, ?)
ON CONFLICT(directus_user_id) DO UPDATE SET
instance_url=excluded.instance_url,
access_token=excluded.access_token,
updated_at=datetime('now')""",
(directus_user_id, instance_url.rstrip("/"), token_to_store),
)
conn.commit()
finally:
conn.close()
def get_token(
db_path: Path,
directus_user_id: str,
encryption_key: str | None,
) -> tuple[str, str] | None:
"""Return (instance_url, plaintext_access_token) or None if not connected."""
import sqlite3
conn = sqlite3.connect(str(db_path))
try:
row = conn.execute(
"SELECT instance_url, access_token FROM mastodon_tokens WHERE directus_user_id = ?",
(directus_user_id,),
).fetchone()
finally:
conn.close()
if row is None:
return None
return row[0], _decrypt(row[1], encryption_key)
def delete_token(db_path: Path, directus_user_id: str) -> None:
"""Remove the user's stored Mastodon token."""
import sqlite3
conn = sqlite3.connect(str(db_path))
try:
conn.execute(
"DELETE FROM mastodon_tokens WHERE directus_user_id = ?", (directus_user_id,)
)
conn.commit()
finally:
conn.close()
def _encrypt(plaintext: str, key: str | None) -> str:
if key is None:
return plaintext
try:
from cryptography.fernet import Fernet
return Fernet(key.encode()).encrypt(plaintext.encode()).decode()
except Exception:
logger.warning("Mastodon token encryption failed — storing plaintext")
return plaintext
def _decrypt(ciphertext: str, key: str | None) -> str:
if key is None:
return ciphertext
try:
from cryptography.fernet import Fernet
return Fernet(key.encode()).decrypt(ciphertext.encode()).decode()
except Exception:
logger.warning("Mastodon token decryption failed — returning as-is")
return ciphertext

View file

@ -1,111 +0,0 @@
# app/services/community/dedup.py
# MIT License
from __future__ import annotations
import json
import logging
from pathlib import Path
logger = logging.getLogger(__name__)
_SIMILARITY_TIERS = {
"exact_recipe": "This exact recipe is already in the community feed.",
"very_similar": "Very similar recipes already exist (70%+ ingredient overlap).",
"somewhat_similar": "Somewhat similar recipes exist (35-70% ingredient overlap).",
"different": "No close matches found.",
}
def _parse_ingredient_names(raw) -> set[str]:
"""Return a normalised set of ingredient name tokens from various stored formats."""
if raw is None:
return set()
if isinstance(raw, str):
try:
raw = json.loads(raw)
except (ValueError, TypeError):
return set()
names: set[str] = set()
for item in raw:
if isinstance(item, str):
names.add(item.lower().strip())
elif isinstance(item, dict):
name = item.get("name") or item.get("ingredient") or ""
if name:
names.add(name.lower().strip())
return names
def jaccard(a: set[str], b: set[str]) -> float:
if not a and not b:
return 1.0
if not a or not b:
return 0.0
return len(a & b) / len(a | b)
def similarity_tier(jaccard_score: float, exact_recipe: bool) -> str:
if exact_recipe:
return "exact_recipe"
if jaccard_score >= 0.70:
return "very_similar"
if jaccard_score >= 0.35:
return "somewhat_similar"
return "different"
def fetch_recipe_ingredients(db_path: Path, recipe_id: int | None) -> set[str]:
"""Look up ingredient names for a recipe from the local corpus. Returns empty set on miss."""
if recipe_id is None:
return set()
try:
from app.db.store import Store
store = Store(db_path)
try:
row = store.get_recipe(recipe_id)
if row is None:
return set()
return _parse_ingredient_names(row.get("ingredient_names"))
finally:
store.close()
except Exception:
logger.debug("ingredient lookup failed for recipe_id=%s", recipe_id)
return set()
def build_similar_post_result(
post,
incoming_recipe_id: int | None,
incoming_ingredients: set[str],
db_path: Path,
) -> dict:
"""Build a similarity result dict for one existing community post."""
exact = (
incoming_recipe_id is not None
and post.recipe_id is not None
and post.recipe_id == incoming_recipe_id
)
j_score = 0.0
if not exact and incoming_ingredients:
existing_ingredients = fetch_recipe_ingredients(db_path, post.recipe_id)
if existing_ingredients:
j_score = jaccard(incoming_ingredients, existing_ingredients)
tier = similarity_tier(j_score, exact)
return {
"slug": post.slug,
"title": post.title,
"recipe_name": post.recipe_name,
"pseudonym": post.pseudonym,
"published": (
post.published.isoformat()
if hasattr(post.published, "isoformat")
else str(post.published)
),
"similarity_tier": tier,
"jaccard_score": round(j_score, 3) if not exact else None,
"tier_description": _SIMILARITY_TIERS.get(tier, ""),
}

View file

@ -2,20 +2,17 @@
# BSL 1.1 — LLM feature # BSL 1.1 — LLM feature
"""Provide a router-compatible LLM client for meal plan generation tasks. """Provide a router-compatible LLM client for meal plan generation tasks.
Cloud (CF_ORCH_URL set), tier 1 task-based routing (preferred): Cloud (CF_ORCH_URL set):
Calls /api/inference/task with product=kiwi, task=meal_plan. Allocates a cf-text service via cf-orch (3B-7B GGUF, ~2GB VRAM).
The coordinator resolves the model from assignments.yaml. Returns an _OrchTextRouter that wraps the cf-text HTTP endpoint
with a .complete(system, user, **kwargs) interface.
Cloud (CF_ORCH_URL set), tier 2 direct allocation (fallback):
Allocates cf-text directly via client.allocate(). Used when the task
is not yet registered in the coordinator (cf-orch#61 not deployed).
Local / self-hosted (no CF_ORCH_URL): Local / self-hosted (no CF_ORCH_URL):
Returns an LLMRouter instance which tries ollama, vllm, or any Returns an LLMRouter instance which tries ollama, vllm, or any
backend configured in ~/.config/circuitforge/llm.yaml. backend configured in ~/.config/circuitforge/llm.yaml.
All paths expose the same (router, ctx) interface so llm_planner.py Both paths expose the same interface so llm_timing.py and llm_planner.py
needs no knowledge of the backend. need no knowledge of the backend.
""" """
from __future__ import annotations from __future__ import annotations
@ -25,7 +22,8 @@ from contextlib import nullcontext
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# cf-orch service name and TTL for direct-allocate fallback path. # cf-orch service name and VRAM budget for meal plan LLM tasks.
# These are lighter than recipe_llm (4.0 GB) — cf-text handles them.
_SERVICE_TYPE = "cf-text" _SERVICE_TYPE = "cf-text"
_TTL_S = 120.0 _TTL_S = 120.0
_CALLER = "kiwi-meal-plan" _CALLER = "kiwi-meal-plan"
@ -64,79 +62,35 @@ class _OrchTextRouter:
return resp.choices[0].message.content or "" return resp.choices[0].message.content or ""
# Imported at module level so tests can patch the names in this module's namespace.
# app.services.task_inference.task_allocate — patch target for task routing tests.
try:
from app.services.task_inference import TaskNotRegistered, task_allocate
_HAS_TASK_INFERENCE = True
except ImportError:
_HAS_TASK_INFERENCE = False
# circuitforge_orch.client.CFOrchClient — patch target for direct-allocate fallback tests.
try:
from circuitforge_orch.client import CFOrchClient
except ImportError:
CFOrchClient = None # type: ignore[assignment,misc]
# circuitforge_core.llm.router.LLMRouter — patch target for local-inference tests.
try:
from circuitforge_core.llm.router import LLMRouter
except (ImportError, FileNotFoundError):
LLMRouter = None # type: ignore[assignment,misc]
def get_meal_plan_router(): def get_meal_plan_router():
"""Return an LLM client for meal plan tasks. """Return an LLM client for meal plan tasks.
Returns (router, ctx) where ctx is a context manager the caller holds Tries cf-orch cf-text allocation first (cloud); falls back to LLMRouter
open for the duration of the LLM call. Returns (None, nullcontext(None)) (local ollama/vllm). Returns None if no backend is available.
if no backend is available.
""" """
cf_orch_url = os.environ.get("CF_ORCH_URL") cf_orch_url = os.environ.get("CF_ORCH_URL")
if cf_orch_url: if cf_orch_url:
# Tier 1: task-based routing — coordinator owns model selection.
if _HAS_TASK_INFERENCE:
try:
ctx = task_allocate(
"kiwi", "meal_plan",
service_hint=_SERVICE_TYPE,
ttl_s=_TTL_S,
)
alloc = ctx.__enter__()
return _OrchTextRouter(alloc.url), ctx
except TaskNotRegistered:
logger.debug(
"kiwi.meal_plan not in coordinator assignments — "
"falling back to direct cf-text allocation"
)
except Exception as exc:
logger.debug("task allocation failed, trying direct allocate: %s", exc)
# Tier 2: direct allocation — hardcoded service type.
if CFOrchClient is not None:
try:
client = CFOrchClient(cf_orch_url)
ctx = client.allocate(
service=_SERVICE_TYPE,
ttl_s=_TTL_S,
caller=_CALLER,
)
alloc = ctx.__enter__()
if alloc is not None:
return _OrchTextRouter(alloc.url), ctx
ctx.__exit__(None, None, None) # release allocation before falling through
except Exception as exc:
logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc)
# Tier 3: local inference — ollama / vllm / openai-compat.
if LLMRouter is not None:
try: try:
return LLMRouter(), nullcontext(None) from circuitforge_orch.client import CFOrchClient
except FileNotFoundError: client = CFOrchClient(cf_orch_url)
logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled") ctx = client.allocate(
return None, nullcontext(None) service=_SERVICE_TYPE,
ttl_s=_TTL_S,
caller=_CALLER,
)
alloc = ctx.__enter__()
if alloc is not None:
return _OrchTextRouter(alloc.url), ctx
except Exception as exc: except Exception as exc:
logger.debug("LLMRouter init failed: %s", exc) logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc)
return None, nullcontext(None)
return None, nullcontext(None) # Local fallback: LLMRouter (ollama / vllm / openai-compat)
try:
from circuitforge_core.llm.router import LLMRouter
return LLMRouter(), nullcontext(None)
except FileNotFoundError:
logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled")
return None, nullcontext(None)
except Exception as exc:
logger.debug("LLMRouter init failed: %s", exc)
return None, nullcontext(None)

View file

@ -18,51 +18,43 @@ class DocuvisionResult:
class DocuvisionClient: class DocuvisionClient:
"""Thin client for the cf-docuvision service.""" """Thin client for the cf-docuvision service."""
def __init__(self, base_url: str, timeout: float = 120.0) -> None: def __init__(self, base_url: str) -> None:
self._base_url = base_url.rstrip("/") self._base_url = base_url.rstrip("/")
self._timeout = timeout
def extract_text(self, image_path: str | Path, hint: str = "text") -> DocuvisionResult: def extract_text(self, image_path: str | Path) -> DocuvisionResult:
"""Send an image to docuvision and return extracted text. """Send an image to docuvision and return extracted text."""
Args:
image_path: Path to the image file.
hint: Docuvision extraction hint "text" for dense prose (recipes),
"table" for tabular data, "form" for form fields, "auto" for
automatic detection.
"""
image_bytes = Path(image_path).read_bytes() image_bytes = Path(image_path).read_bytes()
b64 = base64.b64encode(image_bytes).decode() b64 = base64.b64encode(image_bytes).decode()
with httpx.Client(timeout=self._timeout) as client: with httpx.Client(timeout=30.0) as client:
resp = client.post( resp = client.post(
f"{self._base_url}/extract", f"{self._base_url}/extract",
json={"image_b64": b64, "hint": hint}, json={"image": b64},
) )
resp.raise_for_status() resp.raise_for_status()
data = resp.json() data = resp.json()
return DocuvisionResult( return DocuvisionResult(
text=data.get("raw_text", ""), text=data.get("text", ""),
confidence=data.get("metadata", {}).get("confidence"), confidence=data.get("confidence"),
raw=data, raw=data,
) )
async def extract_text_async(self, image_path: str | Path, hint: str = "text") -> DocuvisionResult: async def extract_text_async(self, image_path: str | Path) -> DocuvisionResult:
"""Async version.""" """Async version."""
image_bytes = Path(image_path).read_bytes() image_bytes = Path(image_path).read_bytes()
b64 = base64.b64encode(image_bytes).decode() b64 = base64.b64encode(image_bytes).decode()
async with httpx.AsyncClient(timeout=self._timeout) as client: async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post( resp = await client.post(
f"{self._base_url}/extract", f"{self._base_url}/extract",
json={"image_b64": b64, "hint": hint}, json={"image": b64},
) )
resp.raise_for_status() resp.raise_for_status()
data = resp.json() data = resp.json()
return DocuvisionResult( return DocuvisionResult(
text=data.get("raw_text", ""), text=data.get("text", ""),
confidence=data.get("metadata", {}).get("confidence"), confidence=data.get("confidence"),
raw=data, raw=data,
) )

View file

@ -32,29 +32,6 @@ def _try_docuvision(image_path: str | Path) -> str | None:
cf_orch_url = os.environ.get("CF_ORCH_URL") cf_orch_url = os.environ.get("CF_ORCH_URL")
if not cf_orch_url: if not cf_orch_url:
return None return None
# Tier 1: task-based routing — coordinator owns model selection.
try:
from app.services.task_inference import task_allocate, TaskNotRegistered
from app.services.ocr.docuvision_client import DocuvisionClient
try:
with task_allocate(
"kiwi", "ocr",
service_hint="cf-docuvision",
ttl_s=60.0,
) as alloc:
doc_client = DocuvisionClient(alloc.url)
result = doc_client.extract_text(image_path)
return result.text if result.text else None
except TaskNotRegistered:
logger.debug(
"kiwi.ocr not in coordinator assignments — "
"falling back to direct cf-docuvision allocation"
)
except Exception as exc:
logger.debug("task allocation path failed, trying direct allocate: %s", exc)
# Tier 2: direct allocation — hardcoded service type.
try: try:
from circuitforge_orch.client import CFOrchClient from circuitforge_orch.client import CFOrchClient
from app.services.ocr.docuvision_client import DocuvisionClient from app.services.ocr.docuvision_client import DocuvisionClient
@ -72,7 +49,7 @@ def _try_docuvision(image_path: str | Path) -> str | None:
result = doc_client.extract_text(image_path) result = doc_client.extract_text(image_path)
return result.text if result.text else None return result.text if result.text else None
except Exception as exc: except Exception as exc:
logger.debug("cf-docuvision fast-path failed, falling back to local VLM: %s", exc) logger.debug("cf-docuvision fast-path failed, falling back: %s", exc)
return None return None

View file

@ -93,18 +93,7 @@ class ElementClassifier:
return self._heuristic_profile(name) return self._heuristic_profile(name)
def classify_batch(self, names: list[str]) -> list[IngredientProfile]: def classify_batch(self, names: list[str]) -> list[IngredientProfile]:
"""Classify multiple names in one DB round-trip, falling back to heuristics.""" return [self.classify(n) for n in names]
if not names:
return []
normalised = [n.lower().strip() for n in names]
c = self._store._cp
placeholders = ",".join("?" * len(normalised))
rows = self._store._fetch_all(
f"SELECT * FROM {c}ingredient_profiles WHERE name IN ({placeholders})",
tuple(normalised),
)
by_name = {r["name"]: self._row_to_profile(r) for r in rows}
return [by_name.get(n) or self._heuristic_profile(n) for n in normalised]
def identify_gaps(self, profiles: list[IngredientProfile]) -> list[str]: def identify_gaps(self, profiles: list[IngredientProfile]) -> list[str]:
"""Return element names that have no coverage in the given profile list.""" """Return element names that have no coverage in the given profile list."""

View file

@ -1,14 +1,13 @@
"""LLM-driven recipe generator for Levels 3 and 4.""" """LLM-driven recipe generator for Levels 3 and 4."""
from __future__ import annotations from __future__ import annotations
import asyncio
import logging import logging
import os import os
import re import re
from contextlib import nullcontext from contextlib import nullcontext
from typing import TYPE_CHECKING, AsyncGenerator from typing import TYPE_CHECKING
from openai import AsyncOpenAI, OpenAI from openai import OpenAI
if TYPE_CHECKING: if TYPE_CHECKING:
from app.db.store import Store from app.db.store import Store
@ -150,8 +149,8 @@ class LLMRecipeGenerator:
return "\n".join(lines) return "\n".join(lines)
_SERVICE_TYPE = "cf-text" _SERVICE_TYPE = "vllm"
_MODEL_CANDIDATES = ["granite-4.1-8b", "deepseek-r1-1.5b"] _MODEL_CANDIDATES = ["Qwen2.5-3B-Instruct", "Phi-4-mini-instruct"]
_TTL_S = 300.0 _TTL_S = 300.0
_CALLER = "kiwi-recipe" _CALLER = "kiwi-recipe"
@ -183,12 +182,7 @@ class LLMRecipeGenerator:
With CF_ORCH_URL set: acquires a vLLM allocation via CFOrchClient and With CF_ORCH_URL set: acquires a vLLM allocation via CFOrchClient and
calls the OpenAI-compatible API directly against the allocated service URL. calls the OpenAI-compatible API directly against the allocated service URL.
Falls back to LLMRouter when: Allocation failure falls through to LLMRouter rather than silently returning "".
- Allocation succeeded but the service is cold (warm=False) avoids
making the user wait for model load; LLMRouter uses Ollama which is
already running.
- Allocation succeeded but the connection to the service URL fails the
agent may have registered the service but failed to start it.
Without CF_ORCH_URL: uses LLMRouter directly. Without CF_ORCH_URL: uses LLMRouter directly.
""" """
ctx = self._get_llm_context() ctx = self._get_llm_context()
@ -214,15 +208,6 @@ class LLMRecipeGenerator:
try: try:
if alloc is not None: if alloc is not None:
# Skip cold services — model not yet loaded means the user would
# wait 60120 s for model load before any response. Use LLMRouter
# (Ollama) instead, which is already warm on the host.
if not alloc.warm:
logger.info(
"cf-orch vllm allocated but cold (warm=False) — releasing and falling back to LLMRouter"
)
raise RuntimeError("vllm cold")
base_url = alloc.url.rstrip("/") + "/v1" base_url = alloc.url.rstrip("/") + "/v1"
client = OpenAI(base_url=base_url, api_key="any") client = OpenAI(base_url=base_url, api_key="any")
model = alloc.model or "__auto__" model = alloc.model or "__auto__"
@ -238,20 +223,6 @@ class LLMRecipeGenerator:
return LLMRouter().complete(prompt) return LLMRouter().complete(prompt)
except Exception as exc: except Exception as exc:
logger.error("LLM call failed: %s", exc) logger.error("LLM call failed: %s", exc)
# When cf-orch gave us an allocation but the service is unreachable
# (cold skip, connection refused, or other error), fall back to
# LLMRouter rather than silently returning empty.
# Skip "vllm" in the fallback order — that backend also routes through
# cf-orch, which would trigger a second (wasted) cold allocation.
if alloc is not None:
logger.info("Falling back to LLMRouter after vllm failure")
try:
from circuitforge_core.llm.router import LLMRouter
router = LLMRouter()
_order = [b for b in (router.config.get("fallback_order") or []) if b != "vllm"]
return router.complete(prompt, fallback_order=_order or None)
except Exception as fallback_exc:
logger.error("LLMRouter fallback also failed: %s", fallback_exc)
return "" return ""
finally: finally:
if ctx is not None: if ctx is not None:
@ -388,91 +359,3 @@ class LLMRecipeGenerator:
suggestions=[suggestion], suggestions=[suggestion],
element_gaps=gaps, element_gaps=gaps,
) )
async def stream_generate(
self,
req: RecipeRequest,
profiles: list,
gaps: list[str],
) -> AsyncGenerator[str, None]:
"""Stream LLM tokens for L3/L4. Yields raw text chunks as they arrive.
Tries cf-orch warm vllm first; falls back to Ollama via AsyncOpenAI.
When neither is reachable, falls back to blocking _call_llm and yields
the complete response as a single chunk so the caller always gets output.
"""
if req.level == 4:
prompt = self.build_level4_prompt(req)
else:
prompt = self.build_level3_prompt(req, profiles, gaps)
# Phase 1: try cf-orch warm vllm (sync allocation, wrapped in thread)
alloc_info = await asyncio.to_thread(self._try_alloc_for_stream)
if alloc_info is not None:
alloc, ctx = alloc_info
try:
async for token in self._stream_openai_compat(
alloc.url.rstrip("/") + "/v1", "any", alloc.model or "__auto__", prompt
):
yield token
return
except Exception as exc:
logger.debug("cf-orch stream failed, falling back to Ollama: %s", exc)
finally:
await asyncio.to_thread(lambda: _safe_exit(ctx))
# Phase 2: Ollama streaming via OpenAI-compat API
from circuitforge_core.llm.router import LLMRouter
router = LLMRouter()
ollama = router.config.get("backends", {}).get("ollama")
if ollama and ollama.get("enabled", True):
base_url = ollama["base_url"]
model = ollama.get("model", "llama3")
try:
async for token in self._stream_openai_compat(base_url, "any", model, prompt):
yield token
return
except Exception as exc:
logger.warning("Ollama streaming failed, falling back to blocking: %s", exc)
# Phase 3: blocking fallback — yields full response at once
result = await asyncio.to_thread(self._call_llm, prompt)
if result:
yield result
def _try_alloc_for_stream(self):
"""Attempt cf-orch allocation synchronously; return (alloc, ctx) or None."""
ctx = self._get_llm_context()
try:
alloc = ctx.__enter__()
if alloc is not None and alloc.warm:
return alloc, ctx
# Not warm — release and signal fallback
_safe_exit(ctx)
except Exception as exc:
logger.debug("cf-orch alloc for stream failed: %s", exc)
return None
@staticmethod
async def _stream_openai_compat(
base_url: str, api_key: str, model: str, prompt: str
) -> AsyncGenerator[str, None]:
client = AsyncOpenAI(base_url=base_url, api_key=api_key)
if model == "__auto__":
models = await client.models.list()
model = models.data[0].id
stream = await client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
stream=True,
)
async for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
def _safe_exit(ctx) -> None:
try:
ctx.__exit__(None, None, None)
except Exception:
pass

View file

@ -918,14 +918,6 @@ class RecipeEngine:
elif row_time_min > req.max_total_min: elif row_time_min > req.max_total_min:
continue continue
# Active (hands-on) time filter — independent of total time.
# Lets users request "≤30 min hands-on, any total" to include slow braises.
# Skips recipes where active_min == 0 (no time signals parsed) to avoid
# hiding valid results when the parser couldn't extract timing.
if req.max_active_min is not None and row_time_effort.active_min > 0:
if row_time_effort.active_min > req.max_active_min:
continue
# Level 2: also add dietary constraint swaps from substitution_pairs # Level 2: also add dietary constraint swaps from substitution_pairs
if req.level == 2 and req.constraints: if req.level == 2 and req.constraints:
for ing in ingredient_names: for ing in ingredient_names:

View file

@ -21,7 +21,6 @@ import json
import logging import logging
import os import os
import re import re
from collections.abc import Callable
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
@ -197,109 +196,34 @@ def _call_via_local_vlm(image_paths: list[Path], prompt: str) -> str:
return output return output
def _build_ocr_extraction_prompt(ocr_text: str) -> str: def _call_vision_backend(image_paths: list[Path], prompt: str) -> str:
"""Build a text-LLM prompt for structuring OCR output into recipe JSON.
Swaps the image-centric preamble of _EXTRACTION_PROMPT for an OCR-centric
one, then appends the combined OCR text as input. The JSON schema section
is shared verbatim to keep the two paths in sync.
"""
schema_idx = _EXTRACTION_PROMPT.find("Return a single JSON object")
schema_part = _EXTRACTION_PROMPT[schema_idx:] if schema_idx != -1 else _EXTRACTION_PROMPT
return (
"You are extracting a recipe from OCR text taken from a recipe card, "
"cookbook page, or handwritten note.\n\n"
"The text below was obtained via optical character recognition and may "
"contain minor scanning artifacts or formatting irregularities.\n\n"
f"{schema_part}\n\nOCR Text:\n{ocr_text}"
)
def _call_via_cf_text_vlm(alloc_url: str, image_paths: list[Path], prompt: str) -> str:
"""Call the cf-text OpenAI-compat API with images via the llama.cpp multimodal backend."""
import httpx
content: list[dict] = []
for i, path in enumerate(image_paths):
if i > 0:
content.append({"type": "text", "text": f"(Page {i + 1} of the same recipe:)"})
b64 = _load_image_b64(path)
content.append({
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{b64}"},
})
content.append({"type": "text", "text": prompt})
resp = httpx.post(
f"{alloc_url.rstrip('/')}/v1/chat/completions",
json={
"model": "local",
"messages": [{"role": "user", "content": content}],
"max_tokens": 2048,
"temperature": 0.0,
},
timeout=180.0,
)
resp.raise_for_status()
return resp.json()["choices"][0]["message"]["content"].strip()
def _call_vision_backend(
image_paths: list[Path],
prompt: str,
progress_cb: "Callable[[str, str], None] | None" = None,
) -> str:
"""Dispatch to the best available vision backend. """Dispatch to the best available vision backend.
Priority: cf-orch (Qwen2-VL GGUF via cf-text) -> local Qwen2.5-VL -> Anthropic API. Priority: cf-orch vision -> local Qwen2.5-VL -> Anthropic API.
Raises RuntimeError with a clear message when no backend is available. Raises RuntimeError with a clear message when no backend is available.
Args:
image_paths: Images to process.
prompt: Extraction prompt (used by local VLM / Anthropic paths).
progress_cb: Optional callback(status, message) for SSE progress events.
Called synchronously from the thread caller bridges to async.
""" """
def _progress(status: str, message: str) -> None:
if progress_cb:
progress_cb(status, message)
errors: list[str] = [] errors: list[str] = []
# 1. Try cf-orch task allocation → cf-docuvision (Qwen2-VL GGUF via llama.cpp). # 1. Try cf-orch vision allocation
# Two-step: docuvision OCRs the image(s), then LLMRouter structures the text into JSON.
cf_orch_url = os.environ.get("CF_ORCH_URL") cf_orch_url = os.environ.get("CF_ORCH_URL")
if cf_orch_url: if cf_orch_url:
try: try:
from app.services.task_inference import TaskNotRegistered, task_allocate from circuitforge_orch.client import CFOrchClient
from app.services.ocr.docuvision_client import DocuvisionClient from app.services.ocr.docuvision_client import DocuvisionClient
from circuitforge_core.llm.router import LLMRouter
try: client = CFOrchClient(cf_orch_url)
_progress("allocating", "Starting vision service...") with client.allocate(
with task_allocate("kiwi", "recipe_scan", service_hint="cf-docuvision", ttl_s=120.0) as alloc: service="cf-vision",
_progress("scanning", "Extracting recipe text from photo...") model_candidates=["qwen2.5-vl-7b", "cf-docuvision"],
ttl_s=90.0,
caller="kiwi-recipe-scan",
) as alloc:
if alloc is not None:
doc_client = DocuvisionClient(alloc.url) doc_client = DocuvisionClient(alloc.url)
ocr_parts: list[str] = [] # docuvision takes a single image -- use first image only for now
for i, path in enumerate(image_paths): result = doc_client.extract_text(image_paths[0])
result = doc_client.extract_text(path, hint="text") if result.text:
prefix = f"(Page {i + 1} of the same recipe)\n" if len(image_paths) > 1 else "" return result.text
ocr_parts.append(f"{prefix}{result.text}")
combined_ocr = "\n\n".join(ocr_parts)
if not combined_ocr.strip():
raise ValueError("Docuvision returned no text — image may not be a recipe")
_progress("structuring", "Parsing recipe structure...")
text = LLMRouter().complete(
_build_ocr_extraction_prompt(combined_ocr),
system="You are a recipe data extractor. Return ONLY valid JSON. No markdown, no explanation, no code fences.",
)
if text:
return text
except TaskNotRegistered:
logger.debug("kiwi.recipe_scan not yet registered in cf-orch assignments")
except Exception as exc: except Exception as exc:
logger.debug("cf-orch vision failed for recipe scan: %s", exc) logger.debug("cf-orch vision failed for recipe scan: %s", exc)
errors.append(f"cf-orch: {exc}") errors.append(f"cf-orch: {exc}")
@ -332,76 +256,40 @@ def _normalize_ingredient_name(name: str) -> str:
return name.lower().strip() return name.lower().strip()
def _extract_json_object(text: str) -> str | None:
"""Return the first balanced JSON object from text, or None if not found.
Uses brace-counting rather than a greedy regex so trailing prose and
nested objects are handled correctly.
"""
start = text.find("{")
if start == -1:
return None
depth = 0
in_string = False
escape_next = False
for i, ch in enumerate(text[start:], start):
if escape_next:
escape_next = False
continue
if ch == "\\" and in_string:
escape_next = True
continue
if ch == '"':
in_string = not in_string
continue
if in_string:
continue
if ch == "{":
depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
return text[start : i + 1]
return None
def _parse_scanner_json(raw_text: str) -> dict: def _parse_scanner_json(raw_text: str) -> dict:
"""Extract and return the JSON dict from VLM output. """Extract and return the JSON dict from VLM output.
Handles: Handles:
- Pure JSON - Pure JSON
- JSON in ```json ... ``` markdown fences - JSON wrapped in ```json ... ``` markdown fences
- Qwen3-style <think>...</think> or <thinking>...</thinking> preambles - JSON preceded by a line of prose ("Here is the recipe: {...}")
- JSON preceded or followed by prose
Raises ValueError on not_a_recipe or unparseable output. Raises ValueError on not_a_recipe or unparseable output.
""" """
text = raw_text.strip() text = raw_text.strip()
# Strip thinking-token blocks emitted by reasoning models (Qwen3, DeepSeek-R1, etc.)
text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL | re.IGNORECASE).strip()
text = re.sub(r"<thinking>.*?</thinking>", "", text, flags=re.DOTALL | re.IGNORECASE).strip()
# Strip markdown fences if present # Strip markdown fences if present
if "```" in text: if text.startswith("```"):
# Find the content between the first ``` pair parts = text.split("```")
fence_match = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL) for part in parts:
if fence_match: part = part.strip()
text = fence_match.group(1).strip() if part.startswith("json"):
part = part[4:].strip()
if part.startswith("{"):
text = part
break
# Try direct parse # Try direct parse first
try: try:
data = json.loads(text) data = json.loads(text)
except json.JSONDecodeError: except json.JSONDecodeError:
# Fall back to brace-balanced extraction from anywhere in the output # Extract first JSON object embedded in prose
candidate = _extract_json_object(text) match = re.search(r"\{.*\}", text, re.DOTALL)
if not candidate: if not match:
logger.warning("Could not parse JSON from LLM output (first 400 chars): %r", text[:400])
raise ValueError(f"Could not parse JSON from VLM output: {text[:200]!r}") raise ValueError(f"Could not parse JSON from VLM output: {text[:200]!r}")
try: try:
data = json.loads(candidate) data = json.loads(match.group(0))
except json.JSONDecodeError as exc: except json.JSONDecodeError as exc:
logger.warning("Brace-extracted JSON still invalid: %r", candidate[:400])
raise ValueError(f"Could not parse JSON from VLM output: {exc}") from exc raise ValueError(f"Could not parse JSON from VLM output: {exc}") from exc
if isinstance(data, dict) and data.get("error") == "not_a_recipe": if isinstance(data, dict) and data.get("error") == "not_a_recipe":
@ -462,7 +350,6 @@ class RecipeScanner:
self, self,
image_paths: list[Path], image_paths: list[Path],
pantry_names: list[str] | None = None, pantry_names: list[str] | None = None,
progress_cb: Callable[[str, str], None] | None = None,
) -> ScannedRecipeResult: ) -> ScannedRecipeResult:
"""Extract a structured recipe from one or more photos. """Extract a structured recipe from one or more photos.
@ -484,7 +371,7 @@ class RecipeScanner:
raise ValueError(f"Maximum {MAX_IMAGES} images per scan (got {len(image_paths)})") raise ValueError(f"Maximum {MAX_IMAGES} images per scan (got {len(image_paths)})")
# Call vision backend # Call vision backend
raw_text = _call_vision_backend(image_paths, _EXTRACTION_PROMPT, progress_cb=progress_cb) raw_text = _call_vision_backend(image_paths, _EXTRACTION_PROMPT)
# Parse JSON from VLM output # Parse JSON from VLM output
data = _parse_scanner_json(raw_text) data = _parse_scanner_json(raw_text)

View file

@ -1,124 +0,0 @@
# app/services/task_inference.py
# BSL 1.1 — LLM feature
"""Task-based service allocation via the cf-orch coordinator.
Calls POST /api/inference/task instead of a hardcoded service type.
The coordinator resolves model_id and service_type from assignments.yaml.
Fallback contract (for callers):
- 404 TaskNotRegistered (fall back to direct client.allocate())
- other error RuntimeError
- CF_ORCH_URL unset RuntimeError (guard with os.environ.get first)
"""
from __future__ import annotations
import logging
import os
from collections.abc import Generator
from contextlib import contextmanager
from dataclasses import dataclass
import httpx
logger = logging.getLogger(__name__)
class TaskNotRegistered(Exception):
"""Coordinator returned 404 for a product/task pair.
Means the task is not yet in assignments.yaml. Callers should fall
back to direct service allocation (client.allocate()).
"""
@dataclass(frozen=True)
class Allocation:
url: str
allocation_id: str
service: str
def _orch_url() -> str:
return os.environ.get("CF_ORCH_URL", "").rstrip("/")
@contextmanager
def task_allocate(
product: str,
task: str,
*,
service_hint: str,
ttl_s: float = 120.0,
) -> Generator[Allocation, None, None]:
"""Context manager: allocate a service via task-based routing.
Calls POST /api/inference/task, yields Allocation, releases on exit.
Supports both `with task_allocate(...) as alloc:` and manual
`ctx = task_allocate(...); alloc = ctx.__enter__()` patterns.
**Sync-only**: uses the synchronous httpx API. Do not call from an
``async def`` handler without wrapping in ``asyncio.to_thread``. Current
call sites (``llm_router.py``, ``vl_model.py``) are synchronous.
Args:
product: CF product name (e.g. "kiwi")
task: Task identifier (e.g. "meal_plan", "ocr")
service_hint: Service type for the release DELETE call. The
coordinator response does not include service_type, so the
caller provides it. When the coordinator is updated to return
service in the response (cf-orch#63), this becomes unused.
ttl_s: Allocation TTL in seconds.
Raises:
TaskNotRegistered: Coordinator returned 404.
RuntimeError: Coordinator unreachable, returned non-404 error, or
returned a malformed (non-JSON / missing fields) response.
RuntimeError: CF_ORCH_URL is not set.
"""
base = _orch_url()
if not base:
raise RuntimeError("CF_ORCH_URL is not set")
try:
resp = httpx.post(
f"{base}/api/inference/task",
json={"product": product, "task": task, "payload": {}},
timeout=30.0,
)
except httpx.RequestError as exc:
raise RuntimeError(f"cf-orch unreachable: {exc}") from exc
if resp.status_code == 404:
raise TaskNotRegistered(
f"No assignment for product={product!r} task={task!r}"
"ensure cf-orch#61/62 are deployed and coordinator reloaded"
)
if not resp.is_success:
raise RuntimeError(
f"cf-orch /api/inference/task failed: "
f"HTTP {resp.status_code}{resp.text[:200]}"
)
try:
data = resp.json()
alloc = Allocation(
url=data["url"],
allocation_id=data["allocation_id"],
service=data.get("service") or service_hint,
)
except (KeyError, ValueError) as exc:
raise RuntimeError(
f"cf-orch /api/inference/task returned malformed response: {exc}"
f"body: {resp.text[:200]}"
) from exc
try:
yield alloc
finally:
try:
httpx.delete(
f"{base}/api/services/{alloc.service}/allocations/{alloc.allocation_id}",
timeout=10.0,
)
except Exception as exc:
logger.debug("cf-orch task allocation release failed (non-fatal): %s", exc)

View file

@ -18,10 +18,6 @@ server {
proxy_set_header X-CF-Session $http_x_cf_session; proxy_set_header X-CF-Session $http_x_cf_session;
# Allow image uploads (barcode/receipt photos from phone cameras). # Allow image uploads (barcode/receipt photos from phone cameras).
client_max_body_size 20m; client_max_body_size 20m;
# LLM inference (recipe suggestions, expiry fallback) can take 60-120s.
# Default proxy_read_timeout is 60s which causes 504s on full recipe generation.
proxy_read_timeout 180s;
proxy_send_timeout 180s;
} }
# Direct-port LAN access (localhost:8515): when VITE_API_BASE='/kiwi', the frontend # Direct-port LAN access (localhost:8515): when VITE_API_BASE='/kiwi', the frontend
@ -38,8 +34,6 @@ server {
proxy_set_header X-Forwarded-Proto $http_x_forwarded_proto; proxy_set_header X-Forwarded-Proto $http_x_forwarded_proto;
proxy_set_header X-CF-Session $http_x_cf_session; proxy_set_header X-CF-Session $http_x_cf_session;
client_max_body_size 20m; client_max_body_size 20m;
proxy_read_timeout 180s;
proxy_send_timeout 180s;
} }
# When accessed directly (localhost:8515) instead of via Caddy (/kiwi path-strip), # When accessed directly (localhost:8515) instead of via Caddy (/kiwi path-strip),

View file

@ -106,39 +106,6 @@
<span class="form-hint">How you appear on posts -- not your real name or email.</span> <span class="form-hint">How you appear on posts -- not your real name or email.</span>
</div> </div>
<!-- Similarity check results -->
<div
v-if="similarPosts.length > 0"
class="similar-panel"
role="region"
aria-label="Similar stories found"
>
<p class="similar-heading text-sm">
<strong>Similar stories already exist.</strong>
You can publish as-is, mark yours as a variation, or cancel.
</p>
<ul class="similar-list" aria-label="Existing similar posts">
<li
v-for="hit in similarPosts"
:key="hit.slug"
class="similar-item"
>
<span class="similar-tier-badge" :class="`tier-${hit.similarity_tier}`">
{{ tierLabel(hit.similarity_tier) }}
</span>
<span class="similar-title">{{ hit.title }}</span>
<span class="similar-by text-muted text-xs">by {{ hit.pseudonym }}</span>
<button
class="btn-link text-xs"
:class="{ 'selected-ref': selectedRef === hit.slug }"
@click="toggleRef(hit.slug)"
>
{{ selectedRef === hit.slug ? 'Unmark variation' : 'Mark as variation' }}
</button>
</li>
</ul>
</div>
<!-- Submission feedback (aria-live region, always rendered) --> <!-- Submission feedback (aria-live region, always rendered) -->
<div <div
class="feedback-region" class="feedback-region"
@ -152,24 +119,13 @@
<!-- Footer actions --> <!-- Footer actions -->
<div class="modal-footer flex gap-sm"> <div class="modal-footer flex gap-sm">
<button <button
v-if="!similarPosts.length || similarChecked"
class="btn btn-primary" class="btn btn-primary"
:disabled="submitting || !title.trim()" :disabled="submitting || !title.trim()"
:aria-busy="submitting" :aria-busy="submitting"
@click="onSubmit" @click="onSubmit"
> >
<span v-if="submitting" class="spinner spinner-sm" aria-hidden="true"></span> <span v-if="submitting" class="spinner spinner-sm" aria-hidden="true"></span>
{{ submitting ? 'Publishing...' : (selectedRef ? 'Publish as variation' : 'Publish') }} {{ submitting ? 'Publishing...' : 'Publish' }}
</button>
<button
v-else
class="btn btn-primary"
:disabled="checking || !title.trim()"
:aria-busy="checking"
@click="onCheckThenSubmit"
>
<span v-if="checking" class="spinner spinner-sm" aria-hidden="true"></span>
{{ checking ? 'Checking...' : 'Publish' }}
</button> </button>
<button class="btn btn-secondary" @click="$emit('close')"> <button class="btn btn-secondary" @click="$emit('close')">
Cancel Cancel
@ -183,7 +139,7 @@
<script setup lang="ts"> <script setup lang="ts">
import { ref, onMounted, onUnmounted, nextTick } from 'vue' import { ref, onMounted, onUnmounted, nextTick } from 'vue'
import { useCommunityStore } from '../stores/community' import { useCommunityStore } from '../stores/community'
import type { PublishPayload, SimilarPost, SimilarityTier } from '../stores/community' import type { PublishPayload } from '../stores/community'
const props = defineProps<{ const props = defineProps<{
recipeId: number | null recipeId: number | null
@ -206,21 +162,6 @@ const submitting = ref(false)
const submitError = ref<string | null>(null) const submitError = ref<string | null>(null)
const submitSuccess = ref<string | null>(null) const submitSuccess = ref<string | null>(null)
const checking = ref(false)
const similarChecked = ref(false)
const similarPosts = ref<SimilarPost[]>([])
const selectedRef = ref<string | null>(null)
function tierLabel(tier: SimilarityTier): string {
if (tier === 'exact_recipe') return 'Same recipe'
if (tier === 'very_similar') return 'Very similar'
return 'Similar'
}
function toggleRef(slug: string) {
selectedRef.value = selectedRef.value === slug ? null : slug
}
const dialogRef = ref<HTMLElement | null>(null) const dialogRef = ref<HTMLElement | null>(null)
const firstFocusRef = ref<HTMLButtonElement | null>(null) const firstFocusRef = ref<HTMLButtonElement | null>(null)
let previousFocus: HTMLElement | null = null let previousFocus: HTMLElement | null = null
@ -274,17 +215,6 @@ onUnmounted(() => {
previousFocus?.focus() previousFocus?.focus()
}) })
async function onCheckThenSubmit() {
if (!title.value.trim()) return
checking.value = true
similarPosts.value = await store.checkSimilar(title.value.trim(), props.recipeId, postType.value)
similarChecked.value = true
checking.value = false
if (!similarPosts.value.length) {
await onSubmit()
}
}
async function onSubmit() { async function onSubmit() {
submitError.value = null submitError.value = null
submitSuccess.value = null submitSuccess.value = null
@ -298,7 +228,6 @@ async function onSubmit() {
if (outcomeNotes.value.trim()) payload.outcome_notes = outcomeNotes.value.trim() if (outcomeNotes.value.trim()) payload.outcome_notes = outcomeNotes.value.trim()
if (pseudonymName.value.trim()) payload.pseudonym_name = pseudonymName.value.trim() if (pseudonymName.value.trim()) payload.pseudonym_name = pseudonymName.value.trim()
if (props.recipeId != null) payload.recipe_id = props.recipeId if (props.recipeId != null) payload.recipe_id = props.recipeId
if (selectedRef.value) payload.similar_to_ref = selectedRef.value
submitting.value = true submitting.value = true
try { try {
@ -420,82 +349,6 @@ async function onSubmit() {
flex-wrap: wrap; flex-wrap: wrap;
} }
.similar-panel {
background: var(--color-surface-alt, var(--color-surface));
border: 1px solid var(--color-warning, #f59e0b);
border-radius: var(--radius-md);
padding: var(--spacing-sm) var(--spacing-md);
margin-bottom: var(--spacing-md);
}
.similar-heading {
margin: 0 0 var(--spacing-sm);
}
.similar-list {
list-style: none;
margin: 0;
padding: 0;
display: flex;
flex-direction: column;
gap: var(--spacing-xs);
}
.similar-item {
display: flex;
align-items: baseline;
gap: var(--spacing-xs);
flex-wrap: wrap;
}
.similar-tier-badge {
font-size: var(--font-size-xs);
font-weight: 700;
padding: 1px 6px;
border-radius: var(--radius-sm);
flex-shrink: 0;
}
.tier-exact_recipe {
background: var(--color-error-bg, #fee2e2);
color: var(--color-error, #dc2626);
}
.tier-very_similar {
background: var(--color-warning-bg, #fef3c7);
color: var(--color-warning-text, #92400e);
}
.tier-somewhat_similar {
background: var(--color-surface-alt, #f3f4f6);
color: var(--color-text-secondary);
}
.similar-title {
font-weight: 600;
font-size: var(--font-size-sm);
}
.similar-by {
flex-shrink: 0;
}
.btn-link {
background: none;
border: none;
color: var(--color-primary);
cursor: pointer;
padding: 0;
text-decoration: underline;
font-size: var(--font-size-xs);
margin-left: auto;
}
.btn-link.selected-ref {
color: var(--color-success);
font-weight: 700;
}
@media (max-width: 480px) { @media (max-width: 480px) {
.modal-panel { .modal-panel {
max-height: 95vh; max-height: 95vh;

View file

@ -78,39 +78,6 @@
<span class="form-hint">How you appear on posts -- not your real name or email.</span> <span class="form-hint">How you appear on posts -- not your real name or email.</span>
</div> </div>
<!-- Similarity check results (shown before final confirm) -->
<div
v-if="similarPosts.length > 0"
class="similar-panel"
role="region"
aria-label="Similar posts found"
>
<p class="similar-heading text-sm">
<strong>Similar plans already exist.</strong>
You can publish as-is, mark yours as a variation, or cancel.
</p>
<ul class="similar-list" aria-label="Existing similar posts">
<li
v-for="hit in similarPosts"
:key="hit.slug"
class="similar-item"
>
<span class="similar-tier-badge" :class="`tier-${hit.similarity_tier}`">
{{ tierLabel(hit.similarity_tier) }}
</span>
<span class="similar-title">{{ hit.title }}</span>
<span class="similar-by text-muted text-xs">by {{ hit.pseudonym }}</span>
<button
class="btn-link text-xs"
:class="{ 'selected-ref': selectedRef === hit.slug }"
@click="toggleRef(hit.slug)"
>
{{ selectedRef === hit.slug ? 'Unmark variation' : 'Mark as variation' }}
</button>
</li>
</ul>
</div>
<!-- Submission feedback (aria-live region, always rendered) --> <!-- Submission feedback (aria-live region, always rendered) -->
<div <div
class="feedback-region" class="feedback-region"
@ -124,24 +91,13 @@
<!-- Footer actions --> <!-- Footer actions -->
<div class="modal-footer flex gap-sm"> <div class="modal-footer flex gap-sm">
<button <button
v-if="!similarPosts.length || similarChecked"
class="btn btn-primary" class="btn btn-primary"
:disabled="submitting || !title.trim()" :disabled="submitting || !title.trim()"
:aria-busy="submitting" :aria-busy="submitting"
@click="onSubmit" @click="onSubmit"
> >
<span v-if="submitting" class="spinner spinner-sm" aria-hidden="true"></span> <span v-if="submitting" class="spinner spinner-sm" aria-hidden="true"></span>
{{ submitting ? 'Publishing...' : (selectedRef ? 'Publish as variation' : 'Publish') }} {{ submitting ? 'Publishing...' : 'Publish' }}
</button>
<button
v-else
class="btn btn-primary"
:disabled="checking || !title.trim()"
:aria-busy="checking"
@click="onCheckThenSubmit"
>
<span v-if="checking" class="spinner spinner-sm" aria-hidden="true"></span>
{{ checking ? 'Checking...' : 'Publish' }}
</button> </button>
<button class="btn btn-secondary" @click="$emit('close')"> <button class="btn btn-secondary" @click="$emit('close')">
Cancel Cancel
@ -155,7 +111,7 @@
<script setup lang="ts"> <script setup lang="ts">
import { ref, onMounted, onUnmounted, nextTick } from 'vue' import { ref, onMounted, onUnmounted, nextTick } from 'vue'
import { useCommunityStore } from '../stores/community' import { useCommunityStore } from '../stores/community'
import type { PublishPayload, SimilarPost, SimilarityTier } from '../stores/community' import type { PublishPayload } from '../stores/community'
const props = defineProps<{ const props = defineProps<{
plan?: { plan?: {
@ -180,21 +136,6 @@ const submitting = ref(false)
const submitError = ref<string | null>(null) const submitError = ref<string | null>(null)
const submitSuccess = ref<string | null>(null) const submitSuccess = ref<string | null>(null)
const checking = ref(false)
const similarChecked = ref(false)
const similarPosts = ref<SimilarPost[]>([])
const selectedRef = ref<string | null>(null)
function tierLabel(tier: SimilarityTier): string {
if (tier === 'exact_recipe') return 'Same recipe'
if (tier === 'very_similar') return 'Very similar'
return 'Similar'
}
function toggleRef(slug: string) {
selectedRef.value = selectedRef.value === slug ? null : slug
}
const dialogRef = ref<HTMLElement | null>(null) const dialogRef = ref<HTMLElement | null>(null)
const firstFocusRef = ref<HTMLInputElement | null>(null) const firstFocusRef = ref<HTMLInputElement | null>(null)
let previousFocus: HTMLElement | null = null let previousFocus: HTMLElement | null = null
@ -248,19 +189,6 @@ onUnmounted(() => {
previousFocus?.focus() previousFocus?.focus()
}) })
async function onCheckThenSubmit() {
if (!title.value.trim()) return
checking.value = true
const planRecipeIds = props.plan?.slots?.map((s) => s.recipe_id) ?? []
const firstRecipeId = planRecipeIds[0] ?? null
similarPosts.value = await store.checkSimilar(title.value.trim(), firstRecipeId, 'plan')
similarChecked.value = true
checking.value = false
if (!similarPosts.value.length) {
await onSubmit()
}
}
async function onSubmit() { async function onSubmit() {
submitError.value = null submitError.value = null
submitSuccess.value = null submitSuccess.value = null
@ -277,7 +205,6 @@ async function onSubmit() {
if (props.plan?.slots?.length) { if (props.plan?.slots?.length) {
payload.slots = props.plan.slots.map(({ day, meal_type, recipe_id }) => ({ day, meal_type, recipe_id })) payload.slots = props.plan.slots.map(({ day, meal_type, recipe_id }) => ({ day, meal_type, recipe_id }))
} }
if (selectedRef.value) payload.similar_to_ref = selectedRef.value
submitting.value = true submitting.value = true
try { try {
@ -368,82 +295,6 @@ async function onSubmit() {
flex-wrap: wrap; flex-wrap: wrap;
} }
.similar-panel {
background: var(--color-surface-alt, var(--color-surface));
border: 1px solid var(--color-warning, #f59e0b);
border-radius: var(--radius-md);
padding: var(--spacing-sm) var(--spacing-md);
margin-bottom: var(--spacing-md);
}
.similar-heading {
margin: 0 0 var(--spacing-sm);
}
.similar-list {
list-style: none;
margin: 0;
padding: 0;
display: flex;
flex-direction: column;
gap: var(--spacing-xs);
}
.similar-item {
display: flex;
align-items: baseline;
gap: var(--spacing-xs);
flex-wrap: wrap;
}
.similar-tier-badge {
font-size: var(--font-size-xs);
font-weight: 700;
padding: 1px 6px;
border-radius: var(--radius-sm);
flex-shrink: 0;
}
.tier-exact_recipe {
background: var(--color-error-bg, #fee2e2);
color: var(--color-error, #dc2626);
}
.tier-very_similar {
background: var(--color-warning-bg, #fef3c7);
color: var(--color-warning-text, #92400e);
}
.tier-somewhat_similar {
background: var(--color-surface-alt, #f3f4f6);
color: var(--color-text-secondary);
}
.similar-title {
font-weight: 600;
font-size: var(--font-size-sm);
}
.similar-by {
flex-shrink: 0;
}
.btn-link {
background: none;
border: none;
color: var(--color-primary);
cursor: pointer;
padding: 0;
text-decoration: underline;
font-size: var(--font-size-xs);
margin-left: auto;
}
.btn-link.selected-ref {
color: var(--color-success);
font-weight: 700;
}
@media (max-width: 480px) { @media (max-width: 480px) {
.modal-panel { .modal-panel {
max-height: 95vh; max-height: 95vh;

View file

@ -6,7 +6,6 @@
v-for="domain in domains" v-for="domain in domains"
:key="domain.id" :key="domain.id"
:class="['btn', activeDomain === domain.id ? 'btn-primary' : 'btn-secondary']" :class="['btn', activeDomain === domain.id ? 'btn-primary' : 'btn-secondary']"
:aria-pressed="activeDomain === domain.id"
@click="selectDomain(domain.id)" @click="selectDomain(domain.id)"
> >
{{ domain.label }} {{ domain.label }}
@ -25,7 +24,6 @@
<div v-else class="category-list mb-sm flex flex-wrap gap-xs"> <div v-else class="category-list mb-sm flex flex-wrap gap-xs">
<button <button
:class="['btn', 'btn-secondary', 'cat-btn', { active: activeCategory === '_all' }]" :class="['btn', 'btn-secondary', 'cat-btn', { active: activeCategory === '_all' }]"
:aria-pressed="activeCategory === '_all'"
@click="selectCategory('_all')" @click="selectCategory('_all')"
> >
All All
@ -34,7 +32,6 @@
v-for="cat in categories" v-for="cat in categories"
:key="cat.category" :key="cat.category"
:class="['btn', 'btn-secondary', 'cat-btn', { active: activeCategory === cat.category }]" :class="['btn', 'btn-secondary', 'cat-btn', { active: activeCategory === cat.category }]"
:aria-pressed="activeCategory === cat.category"
@click="selectCategory(cat.category)" @click="selectCategory(cat.category)"
> >
{{ cat.category }} {{ cat.category }}
@ -60,7 +57,6 @@
<template v-else> <template v-else>
<button <button
:class="['btn', 'btn-secondary', 'subcat-btn', { active: activeSubcategory === null }]" :class="['btn', 'btn-secondary', 'subcat-btn', { active: activeSubcategory === null }]"
:aria-pressed="activeSubcategory === null"
@click="selectSubcategory(null)" @click="selectSubcategory(null)"
> >
All {{ activeCategory }} All {{ activeCategory }}
@ -69,7 +65,6 @@
v-for="sub in subcategories" v-for="sub in subcategories"
:key="sub.subcategory" :key="sub.subcategory"
:class="['btn', 'btn-secondary', 'subcat-btn', { active: activeSubcategory === sub.subcategory }]" :class="['btn', 'btn-secondary', 'subcat-btn', { active: activeSubcategory === sub.subcategory }]"
:aria-pressed="activeSubcategory === sub.subcategory"
@click="selectSubcategory(sub.subcategory)" @click="selectSubcategory(sub.subcategory)"
> >
{{ sub.subcategory }} {{ sub.subcategory }}
@ -84,25 +79,6 @@
</template> </template>
</div> </div>
<!-- Browse breadcrumb shows current position in domain > category > subcategory hierarchy -->
<nav v-if="activeDomain && activeCategory" class="browse-breadcrumb" aria-label="Browse location">
<button
class="crumb-btn"
@click="selectDomain(activeDomain)"
:aria-current="!activeCategory ? 'page' : undefined"
>{{ domains.find(d => d.id === activeDomain)?.label ?? activeDomain }}</button>
<span class="crumb-sep" aria-hidden="true"></span>
<button
class="crumb-btn"
@click="selectCategory(activeCategory)"
:aria-current="!activeSubcategory ? 'page' : undefined"
>{{ activeCategory === '_all' ? 'All' : activeCategory }}</button>
<template v-if="activeSubcategory">
<span class="crumb-sep" aria-hidden="true"></span>
<span class="crumb-current" aria-current="page">{{ activeSubcategory }}</span>
</template>
</nav>
<!-- Recipe grid --> <!-- Recipe grid -->
<template v-if="activeCategory"> <template v-if="activeCategory">
<div v-if="loadingRecipes" class="text-secondary text-sm">Loading recipes</div> <div v-if="loadingRecipes" class="text-secondary text-sm">Loading recipes</div>
@ -129,25 +105,21 @@
<div class="sort-btns flex gap-xs"> <div class="sort-btns flex gap-xs">
<button <button
:class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'default' }]" :class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'default' }]"
:aria-pressed="sortOrder === 'default'"
@click="setSort('default')" @click="setSort('default')"
title="Corpus order" title="Corpus order"
>Default</button> >Default</button>
<button <button
:class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'alpha' }]" :class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'alpha' }]"
:aria-pressed="sortOrder === 'alpha'"
@click="setSort('alpha')" @click="setSort('alpha')"
title="Alphabetical A→Z" title="Alphabetical A→Z"
>AZ</button> >AZ</button>
<button <button
:class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'alpha_desc' }]" :class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'alpha_desc' }]"
:aria-pressed="sortOrder === 'alpha_desc'"
@click="setSort('alpha_desc')" @click="setSort('alpha_desc')"
title="Alphabetical Z→A" title="Alphabetical Z→A"
>ZA</button> >ZA</button>
<button <button
:class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'match' }]" :class="['btn', 'btn-secondary', 'sort-btn', { active: sortOrder === 'match' }]"
:aria-pressed="sortOrder === 'match'"
:disabled="pantryCount === 0" :disabled="pantryCount === 0"
@click="setSort('match')" @click="setSort('match')"
:title="pantryCount > 0 ? 'Sort by pantry match %' : 'Add items to pantry to sort by match'" :title="pantryCount > 0 ? 'Sort by pantry match %' : 'Add items to pantry to sort by match'"
@ -156,11 +128,7 @@
</div> </div>
<div class="results-header flex-between mb-sm"> <div class="results-header flex-between mb-sm">
<span <span class="text-sm text-secondary">
class="text-sm text-secondary"
aria-live="polite"
aria-atomic="true"
>
{{ total }} recipes {{ total }} recipes
<span v-if="pantryCount > 0"> pantry match shown</span> <span v-if="pantryCount > 0"> pantry match shown</span>
<span v-if="requiredIngredient.trim()"> must include "{{ requiredIngredient.trim() }}"</span> <span v-if="requiredIngredient.trim()"> must include "{{ requiredIngredient.trim() }}"</span>
@ -169,14 +137,12 @@
<button <button
class="btn btn-secondary btn-xs" class="btn btn-secondary btn-xs"
:disabled="page <= 1" :disabled="page <= 1"
aria-label="Previous page"
@click="changePage(page - 1)" @click="changePage(page - 1)"
> Prev</button> > Prev</button>
<span class="text-sm text-secondary page-indicator" aria-live="polite">{{ page }} / {{ totalPages }}</span> <span class="text-sm text-secondary page-indicator">{{ page }} / {{ totalPages }}</span>
<button <button
class="btn btn-secondary btn-xs" class="btn btn-secondary btn-xs"
:disabled="page >= totalPages" :disabled="page >= totalPages"
aria-label="Next page"
@click="changePage(page + 1)" @click="changePage(page + 1)"
>Next </button> >Next </button>
</div> </div>
@ -888,40 +854,4 @@ async function submitTag() {
font-size: 0.875rem; font-size: 0.875rem;
margin-left: 0.5rem; margin-left: 0.5rem;
} }
/* ── Browse breadcrumb ───────────────────────────────────────────────────── */
.browse-breadcrumb {
display: flex;
align-items: center;
flex-wrap: wrap;
gap: 2px;
margin-bottom: var(--spacing-sm);
font-size: var(--font-size-xs, 0.78rem);
color: var(--color-text-secondary);
}
.crumb-btn {
background: none;
border: none;
padding: 2px 4px;
cursor: pointer;
color: var(--color-primary);
font-size: inherit;
border-radius: var(--radius-sm);
}
.crumb-btn:hover {
text-decoration: underline;
}
.crumb-sep {
opacity: 0.5;
padding: 0 2px;
}
.crumb-current {
padding: 2px 4px;
color: var(--color-text);
font-weight: 500;
}
</style> </style>

View file

@ -112,8 +112,8 @@
<path d="M23 19a2 2 0 0 1-2 2H3a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h4l2-3h6l2 3h4a2 2 0 0 1 2 2z"/> <path d="M23 19a2 2 0 0 1-2 2H3a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h4l2-3h6l2 3h4a2 2 0 0 1 2 2z"/>
<circle cx="12" cy="13" r="4"/> <circle cx="12" cy="13" r="4"/>
</svg> </svg>
<p class="processing-label">{{ scanStatusMessage }}</p> <p class="processing-label">Extracting recipe from {{ selectedFiles.length > 1 ? selectedFiles.length + ' photos' : 'photo' }}...</p>
<p class="processing-sub">This can take up to a minute on first use.</p> <p class="processing-sub">This can take 10-30 seconds.</p>
</div> </div>
</div> </div>
@ -329,18 +329,13 @@ function removeFile(index: number) {
// Scan // Scan
const extracted = ref<ScannedRecipe | null>(null) const extracted = ref<ScannedRecipe | null>(null)
const scanStatusMessage = ref('Uploading photos...')
async function startScan() { async function startScan() {
if (selectedFiles.value.length === 0) return if (selectedFiles.value.length === 0) return
uploadError.value = '' uploadError.value = ''
scanStatusMessage.value = 'Uploading photos...'
phase.value = 'processing' phase.value = 'processing'
try { try {
const result = await recipeScanAPI.scanStream( const result = await recipeScanAPI.scan(selectedFiles.value)
selectedFiles.value,
(_status: string, message: string) => { scanStatusMessage.value = message },
)
extracted.value = result extracted.value = result
initEditState(result) initEditState(result)
phase.value = 'review' phase.value = 'review'

File diff suppressed because it is too large Load diff

View file

@ -2,7 +2,6 @@
<div class="settings-view"> <div class="settings-view">
<div class="card"> <div class="card">
<h2 class="section-title text-xl mb-md">Settings</h2> <h2 class="section-title text-xl mb-md">Settings</h2>
<p class="text-xs text-muted mb-md">Changes save automatically.</p>
<!-- Cooking Equipment --> <!-- Cooking Equipment -->
<section> <section>
@ -20,7 +19,7 @@
class="tag-chip status-badge status-info" class="tag-chip status-badge status-info"
> >
{{ item }} {{ item }}
<button class="chip-remove" @click="removeEquipment(item)" :aria-label="'Remove equipment: ' + item">×</button> <button class="chip-remove" @click="removeEquipment(item)" aria-label="Remove">×</button>
</span> </span>
</div> </div>
@ -51,6 +50,18 @@
</div> </div>
</div> </div>
<!-- Save button -->
<div class="flex-start gap-sm">
<button
class="btn btn-primary"
:disabled="settingsStore.loading"
@click="settingsStore.save()"
>
<span v-if="settingsStore.loading">Saving</span>
<span v-else-if="settingsStore.saved"> Saved!</span>
<span v-else>Save Settings</span>
</button>
</div>
</section> </section>
<!-- Sensory Preferences --> <!-- Sensory Preferences -->
@ -123,6 +134,17 @@
</p> </p>
</div> </div>
<div class="flex-start gap-sm mt-sm">
<button
class="btn btn-primary btn-sm"
:disabled="settingsStore.loading"
@click="settingsStore.saveSensory()"
>
<span v-if="settingsStore.loading">Saving</span>
<span v-else-if="settingsStore.saved">Saved!</span>
<span v-else>Save sensory preferences</span>
</button>
</div>
</section> </section>
<!-- Units --> <!-- Units -->
@ -147,6 +169,17 @@
Imperial (oz, cups, °F) Imperial (oz, cups, °F)
</button> </button>
</div> </div>
<div class="flex-start gap-sm">
<button
class="btn btn-primary btn-sm"
:disabled="settingsStore.loading"
@click="settingsStore.save()"
>
<span v-if="settingsStore.loading">Saving</span>
<span v-else-if="settingsStore.saved"> Saved!</span>
<span v-else>Save</span>
</button>
</div>
</section> </section>
<!-- Shopping Locale --> <!-- Shopping Locale -->
@ -187,6 +220,17 @@
<option value="br">Brazil (BRL R$)</option> <option value="br">Brazil (BRL R$)</option>
</optgroup> </optgroup>
</select> </select>
<div class="flex-start gap-sm mt-sm">
<button
class="btn btn-primary btn-sm"
:disabled="settingsStore.loading"
@click="settingsStore.save()"
>
<span v-if="settingsStore.loading">Saving</span>
<span v-else-if="settingsStore.saved"> Saved!</span>
<span v-else>Save</span>
</button>
</div>
</section> </section>
<!-- Time-First Layout --> <!-- Time-First Layout -->
@ -214,6 +258,17 @@
</span> </span>
</label> </label>
</div> </div>
<div class="flex-start gap-sm mt-sm">
<button
class="btn btn-primary btn-sm"
:disabled="settingsStore.loading"
@click="settingsStore.save()"
>
<span v-if="settingsStore.loading">Saving</span>
<span v-else-if="settingsStore.saved"> Saved!</span>
<span v-else>Save</span>
</button>
</div>
</section> </section>
<!-- Data Sharing (cloud only) --> <!-- Data Sharing (cloud only) -->
@ -338,12 +393,6 @@
</template> </template>
</div> </div>
</div> </div>
<Transition name="autosave-fade">
<div v-if="settingsStore.saved" class="autosave-toast" role="status" aria-live="polite">
Saved
</div>
</Transition>
</template> </template>
<script setup lang="ts"> <script setup lang="ts">
@ -822,32 +871,4 @@ function getNoiseClass(_value: NoiseLevel, idx: number): string {
border-color: var(--color-border, #e0e0e0); border-color: var(--color-border, #e0e0e0);
color: var(--color-text-secondary, #888); color: var(--color-text-secondary, #888);
} }
/* ── Autosave toast ──────────────────────────────────────────────────────── */
.autosave-toast {
position: fixed;
bottom: 1.5rem;
right: 1.5rem;
background: var(--color-surface, #fff);
border: 1px solid var(--color-border, #e0e0e0);
border-radius: var(--radius-md, 0.5rem);
padding: 0.4rem 0.9rem;
font-size: var(--font-size-sm);
color: var(--color-success, #4a8c40);
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.12);
z-index: 500;
pointer-events: none;
}
.autosave-fade-enter-active,
.autosave-fade-leave-active {
transition: opacity 0.25s ease, transform 0.25s ease;
}
.autosave-fade-enter-from,
.autosave-fade-leave-to {
opacity: 0;
transform: translateY(0.5rem);
}
</style> </style>

View file

@ -627,7 +627,6 @@ export interface RecipeRequest {
complexity_filter: string | null complexity_filter: string | null
max_time_min: number | null max_time_min: number | null
max_total_min: number | null max_total_min: number | null
max_active_min: number | null
} }
export interface Staple { export interface Staple {
@ -671,21 +670,6 @@ export interface BuildRequest {
role_overrides: Record<string, string> role_overrides: Record<string, string>
} }
// ── Ask/RAG types ──────────────────────────────────────────────────────────
export interface AskRecipeHit {
id: number
title: string
match_pct: number | null
category: string | null
}
export interface AskResponse {
answer: string | null
recipes: AskRecipeHit[]
tier: string
}
// ========== Recipes API ========== // ========== Recipes API ==========
export const recipesAPI = { export const recipesAPI = {
@ -752,60 +736,6 @@ export const recipesAPI = {
}) })
return response.data return response.data
}, },
/** Natural-language recipe search with optional LLM synthesis (Paid tier). */
async ask(question: string, pantryItems: string[] = []): Promise<AskResponse> {
const response = await api.post('/recipes/ask', { question, pantry_items: pantryItems }, { timeout: 30000 })
return response.data
},
/** Stream a recipe via native SSE (Ollama fallback). Calls callbacks as tokens arrive. */
async suggestRecipeStream(
req: RecipeRequest,
onChunk: (chunk: string) => void,
onDone: () => void,
onError: (err: string) => void,
): Promise<void> {
const baseUrl = (api.defaults.baseURL ?? '') as string
let response: Response
try {
response = await fetch(`${baseUrl}/recipes/suggest?stream=true`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(req),
})
} catch (err: unknown) {
onError(err instanceof Error ? err.message : 'Network error')
return
}
if (!response.ok) {
onError(`HTTP ${response.status}`)
return
}
const reader = response.body?.getReader()
if (!reader) { onError('No response body'); return }
const decoder = new TextDecoder()
let buffer = ''
while (true) {
const { done, value } = await reader.read()
if (done) { onDone(); break }
buffer += decoder.decode(value, { stream: true })
const parts = buffer.split('\n\n')
buffer = parts.pop() ?? ''
for (const part of parts) {
if (!part.startsWith('data: ')) continue
try {
const data = JSON.parse(part.slice(6))
if (data.done) { onDone(); return }
else if (data.error) { onError(data.error); return }
else if (data.chunk) { onChunk(data.chunk) }
} catch { /* ignore malformed events */ }
}
}
},
} }
// ========== Settings API ========== // ========== Settings API ==========
@ -1326,56 +1256,6 @@ export const recipeScanAPI = {
}).then((r) => r.data) }).then((r) => r.data)
}, },
/** Scan recipe photos with live SSE progress events.
*
* Calls onProgress(status, message) for each intermediate event
* ("allocating", "scanning", "structuring"), then resolves with the final
* ScannedRecipe on success. Rejects on error or timeout.
*/
async scanStream(
files: File[],
onProgress: (status: string, message: string) => void,
): Promise<ScannedRecipe> {
const form = new FormData()
files.forEach((f) => form.append('files', f))
const response = await fetch(`${API_BASE_URL}/recipes/scan/stream`, {
method: 'POST',
body: form,
})
if (!response.ok || !response.body) {
let detail = ''
try { detail = await response.text() } catch (_) { /* ignore */ }
throw new Error(detail || `Scan failed (${response.status})`)
}
const reader = response.body.getReader()
const decoder = new TextDecoder()
let buffer = ''
while (true) {
const { done, value } = await reader.read()
if (done) break
buffer += decoder.decode(value, { stream: true })
const lines = buffer.split('\n')
buffer = lines.pop() ?? ''
for (const line of lines) {
if (!line.startsWith('data: ')) continue
let data: Record<string, unknown>
try { data = JSON.parse(line.slice(6)) } catch { continue }
if (data.status === 'done') return data.recipe as ScannedRecipe
if (data.status === 'error') throw new Error((data.message as string) || 'Scan failed')
onProgress(data.status as string, data.message as string)
}
}
throw new Error('Stream ended without a result')
},
/** Save a reviewed/edited scanned recipe to user_recipes. */ /** Save a reviewed/edited scanned recipe to user_recipes. */
saveScanned(recipe: Omit<ScannedRecipe, 'pantry_match_pct' | 'confidence' | 'warnings'> & { source?: string }): Promise<UserRecipe> { saveScanned(recipe: Omit<ScannedRecipe, 'pantry_match_pct' | 'confidence' | 'warnings'> & { source?: string }): Promise<UserRecipe> {
return api.post('/recipes/scan/save', recipe).then((r) => r.data) return api.post('/recipes/scan/save', recipe).then((r) => r.data)

View file

@ -64,20 +64,6 @@ export interface PublishPayload {
recipe_id?: number recipe_id?: number
outcome_notes?: string outcome_notes?: string
slots?: CommunityPostSlot[] slots?: CommunityPostSlot[]
similar_to_ref?: string
}
export type SimilarityTier = 'exact_recipe' | 'very_similar' | 'somewhat_similar'
export interface SimilarPost {
slug: string
title: string
recipe_name: string | null
pseudonym: string
published: string
similarity_tier: SimilarityTier
jaccard_score: number | null
tier_description: string
} }
export interface PublishResult { export interface PublishResult {
@ -121,25 +107,6 @@ export const useCommunityStore = defineStore('community', () => {
return response.data return response.data
} }
async function checkSimilar(
title: string,
recipeId?: number | null,
postType?: string,
): Promise<SimilarPost[]> {
try {
const body: Record<string, unknown> = { title }
if (recipeId != null) body.recipe_id = recipeId
if (postType) body.post_type = postType
const response = await api.post<{ similar_posts: SimilarPost[] }>(
'/community/check-similar',
body,
)
return response.data.similar_posts
} catch {
return []
}
}
return { return {
posts, posts,
loading, loading,
@ -148,6 +115,5 @@ export const useCommunityStore = defineStore('community', () => {
fetchPosts, fetchPosts,
forkPost, forkPost,
publishPost, publishPost,
checkSimilar,
} }
}) })

View file

@ -152,7 +152,6 @@ export const useRecipesStore = defineStore('recipes', () => {
const complexityFilter = ref<string | null>(null) const complexityFilter = ref<string | null>(null)
const maxTimeMin = ref<number | null>(null) const maxTimeMin = ref<number | null>(null)
const maxTotalMin = ref<number | null>(null) const maxTotalMin = ref<number | null>(null)
const maxActiveMin = ref<number | null>(null)
const nutritionFilters = ref<NutritionFilters>({ const nutritionFilters = ref<NutritionFilters>({
max_calories: null, max_calories: null,
max_sugar_g: null, max_sugar_g: null,
@ -208,7 +207,6 @@ export const useRecipesStore = defineStore('recipes', () => {
complexity_filter: complexityFilter.value, complexity_filter: complexityFilter.value,
max_time_min: maxTimeMin.value, max_time_min: maxTimeMin.value,
max_total_min: maxTotalMin.value, max_total_min: maxTotalMin.value,
max_active_min: maxActiveMin.value,
} }
} }
@ -379,17 +377,6 @@ export const useRecipesStore = defineStore('recipes', () => {
wildcardConfirmed.value = false wildcardConfirmed.value = false
} }
async function streamSuggest(
pantryItems: string[],
secondaryPantryItems: Record<string, string>,
onChunk: (chunk: string) => void,
onDone: () => void,
onError: (err: string) => void,
): Promise<void> {
const req = _buildRequest(pantryItems, secondaryPantryItems)
await recipesAPI.suggestRecipeStream(req, onChunk, onDone, onError)
}
return { return {
result, result,
loading, loading,
@ -409,7 +396,6 @@ export const useRecipesStore = defineStore('recipes', () => {
complexityFilter, complexityFilter,
maxTimeMin, maxTimeMin,
maxTotalMin, maxTotalMin,
maxActiveMin,
nutritionFilters, nutritionFilters,
dismissedIds, dismissedIds,
dismissedCount, dismissedCount,
@ -427,7 +413,6 @@ export const useRecipesStore = defineStore('recipes', () => {
missingIngredientMode, missingIngredientMode,
builderFilterMode, builderFilterMode,
suggest, suggest,
streamSuggest,
loadMore, loadMore,
dismiss, dismiss,
undismiss, undismiss,

View file

@ -1,5 +1,11 @@
/**
* Settings Store
*
* Manages user settings (cooking equipment, preferences) using Pinia.
*/
import { defineStore } from 'pinia' import { defineStore } from 'pinia'
import { ref, watch, nextTick } from 'vue' import { ref } from 'vue'
import { settingsAPI } from '../services/api' import { settingsAPI } from '../services/api'
import type { UnitSystem } from '../utils/units' import type { UnitSystem } from '../utils/units'
import type { SensoryPreferences } from '../services/api' import type { SensoryPreferences } from '../services/api'
@ -7,12 +13,8 @@ import { DEFAULT_SENSORY_PREFERENCES } from '../services/api'
export type TimeFirstLayout = 'auto' | 'time_first' | 'normal' export type TimeFirstLayout = 'auto' | 'time_first' | 'normal'
function debounce(fn: () => void, ms: number): () => void {
let t: ReturnType<typeof setTimeout>
return () => { clearTimeout(t); t = setTimeout(fn, ms) }
}
export const useSettingsStore = defineStore('settings', () => { export const useSettingsStore = defineStore('settings', () => {
// State
const cookingEquipment = ref<string[]>([]) const cookingEquipment = ref<string[]>([])
const unitSystem = ref<UnitSystem>('metric') const unitSystem = ref<UnitSystem>('metric')
const shoppingLocale = ref<string>('us') const shoppingLocale = ref<string>('us')
@ -21,40 +23,7 @@ export const useSettingsStore = defineStore('settings', () => {
const loading = ref(false) const loading = ref(false)
const saved = ref(false) const saved = ref(false)
// Prevents autosave watchers from firing during initial load hydration. // Actions
// Set to true after nextTick() at the end of load() — by that point all
// watcher jobs queued by the hydration assignments have already flushed.
let _hydrated = false
function _flash() {
saved.value = true
setTimeout(() => { saved.value = false }, 2000)
}
async function _saveKey(key: string, value: string): Promise<void> {
if (!_hydrated) return
try {
await settingsAPI.setSetting(key, value)
_flash()
} catch (err: unknown) {
console.error('Autosave failed for key:', key, err)
}
}
const _autosave = {
equipment: debounce(() => _saveKey('cooking_equipment', JSON.stringify(cookingEquipment.value)), 600),
unit: debounce(() => _saveKey('unit_system', unitSystem.value), 600),
locale: debounce(() => _saveKey('shopping_locale', shoppingLocale.value), 600),
sensory: debounce(() => _saveKey('sensory_preferences', JSON.stringify(sensoryPreferences.value)), 600),
layout: debounce(() => _saveKey('time_first_layout', timeFirstLayout.value), 600),
}
watch(cookingEquipment, _autosave.equipment, { deep: true })
watch(unitSystem, _autosave.unit)
watch(shoppingLocale, _autosave.locale)
watch(sensoryPreferences, _autosave.sensory, { deep: true })
watch(timeFirstLayout, _autosave.layout)
async function load() { async function load() {
loading.value = true loading.value = true
try { try {
@ -89,15 +58,8 @@ export const useSettingsStore = defineStore('settings', () => {
} finally { } finally {
loading.value = false loading.value = false
} }
// Yield past the watcher flush triggered by hydration assignments above.
// After nextTick, any pending watcher jobs from this load() have already
// run (and been ignored by _hydrated guard), so user-driven changes from
// here forward will correctly trigger autosave.
await nextTick()
_hydrated = true
} }
// Kept for explicit full-save scenarios (e.g. fallback, tests).
async function save() { async function save() {
loading.value = true loading.value = true
try { try {
@ -108,7 +70,10 @@ export const useSettingsStore = defineStore('settings', () => {
settingsAPI.setSetting('sensory_preferences', JSON.stringify(sensoryPreferences.value)), settingsAPI.setSetting('sensory_preferences', JSON.stringify(sensoryPreferences.value)),
settingsAPI.setSetting('time_first_layout', timeFirstLayout.value), settingsAPI.setSetting('time_first_layout', timeFirstLayout.value),
]) ])
_flash() saved.value = true
setTimeout(() => {
saved.value = false
}, 2000)
} catch (err: unknown) { } catch (err: unknown) {
console.error('Failed to save settings:', err) console.error('Failed to save settings:', err)
} finally { } finally {
@ -116,17 +81,24 @@ export const useSettingsStore = defineStore('settings', () => {
} }
} }
// Kept for backward compat; autosave handles sensory changes now.
async function saveSensory() { async function saveSensory() {
loading.value = true
try { try {
await settingsAPI.setSetting('sensory_preferences', JSON.stringify(sensoryPreferences.value)) await settingsAPI.setSetting(
_flash() 'sensory_preferences',
JSON.stringify(sensoryPreferences.value),
)
saved.value = true
setTimeout(() => { saved.value = false }, 2000)
} catch (err: unknown) { } catch (err: unknown) {
console.error('Failed to save sensory preferences:', err) console.error('Failed to save sensory preferences:', err)
} finally {
loading.value = false
} }
} }
return { return {
// State
cookingEquipment, cookingEquipment,
unitSystem, unitSystem,
shoppingLocale, shoppingLocale,
@ -134,6 +106,8 @@ export const useSettingsStore = defineStore('settings', () => {
timeFirstLayout, timeFirstLayout,
loading, loading,
saved, saved,
// Actions
load, load,
save, save,
saveSensory, saveSensory,

View file

@ -14,8 +14,8 @@ OVERRIDE_FLAG=""
[[ -f "compose.override.yml" ]] && OVERRIDE_FLAG="-f compose.override.yml" [[ -f "compose.override.yml" ]] && OVERRIDE_FLAG="-f compose.override.yml"
usage() { usage() {
echo "Usage: $0 {start|stop|restart|status|logs|open|build|test|update" echo "Usage: $0 {start|stop|restart|status|logs|open|build|test"
echo " |cloud-start|cloud-stop|cloud-restart|cloud-status|cloud-logs|cloud-build|cloud-update}" echo " |cloud-start|cloud-stop|cloud-restart|cloud-status|cloud-logs|cloud-build}"
echo "" echo ""
echo "Dev:" echo "Dev:"
echo " start Build (if needed) and start all services" echo " start Build (if needed) and start all services"
@ -26,7 +26,6 @@ usage() {
echo " open Open web UI in browser" echo " open Open web UI in browser"
echo " build Rebuild Docker images without cache" echo " build Rebuild Docker images without cache"
echo " test Run pytest test suite" echo " test Run pytest test suite"
echo " update git pull + rebuild + restart dev stack"
echo "" echo ""
echo "Cloud (menagerie.circuitforge.tech/kiwi):" echo "Cloud (menagerie.circuitforge.tech/kiwi):"
echo " cloud-start Build cloud images and start kiwi-cloud project" echo " cloud-start Build cloud images and start kiwi-cloud project"
@ -35,7 +34,6 @@ usage() {
echo " cloud-status Show cloud containers" echo " cloud-status Show cloud containers"
echo " cloud-logs Follow cloud logs [api|web — defaults to all]" echo " cloud-logs Follow cloud logs [api|web — defaults to all]"
echo " cloud-build Rebuild cloud images without cache" echo " cloud-build Rebuild cloud images without cache"
echo " cloud-update git pull + rebuild + restart cloud stack"
exit 1 exit 1
} }
@ -70,11 +68,6 @@ case "$cmd" in
build) build)
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG build --no-cache docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG build --no-cache
;; ;;
update)
git pull
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG up -d --build
echo "Kiwi updated and restarted → http://localhost:${WEB_PORT}"
;;
test) test)
docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG run --rm api \ docker compose -f "$COMPOSE_FILE" $OVERRIDE_FLAG run --rm api \
conda run -n job-seeker pytest tests/ -v conda run -n job-seeker pytest tests/ -v
@ -102,11 +95,6 @@ case "$cmd" in
cloud-build) cloud-build)
docker compose -f "$CLOUD_COMPOSE_FILE" -p "$CLOUD_PROJECT" build --no-cache docker compose -f "$CLOUD_COMPOSE_FILE" -p "$CLOUD_PROJECT" build --no-cache
;; ;;
cloud-update)
git pull
docker compose -f "$CLOUD_COMPOSE_FILE" -p "$CLOUD_PROJECT" up -d --build
echo "Kiwi cloud updated and restarted → https://menagerie.circuitforge.tech/kiwi"
;;
*) *)
usage usage

View file

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "kiwi" name = "kiwi"
version = "0.10.0" version = "0.6.0"
description = "Pantry tracking + leftover recipe suggestions" description = "Pantry tracking + leftover recipe suggestions"
readme = "README.md" readme = "README.md"
requires-python = ">=3.11" requires-python = ">=3.11"

View file

@ -1,218 +0,0 @@
"""Ingest Purple Carrot scraped recipes into the Kiwi corpus database.
Reads recipes_purplecarrot_live.parquet (output of scrape_live.py) and
upserts into the shared recipes table, setting source='purplecarrot' and
using the recipe slug as the external_id (prefixed pc_).
Run after each weekly_harvest.sh scrape:
conda run -n cf python3 scripts/pipeline/ingest_purplecarrot.py \
[--db /Library/Assets/kiwi/kiwi.db] \
[--parquet /Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet]
"""
from __future__ import annotations
import argparse
import json
import sqlite3
from pathlib import Path
import math
import re
import pandas as pd
# ── Helpers (inlined from build_recipe_index to avoid cross-module import) ─────
_MEASURE_PATTERN = re.compile(
r"^\d[\d\s/¼½¾⅓⅔]*\s*(cup|tbsp|tsp|oz|lb|g|kg|ml|l|clove|slice|piece|can|pkg|package|bunch|head|stalk|sprig|pinch|dash|to taste|as needed)s?\b",
re.IGNORECASE,
)
_LEAD_NUMBER = re.compile(r"^\d[\d\s/¼½¾⅓⅔]*\s*")
_TRAILING_QUALIFIER = re.compile(
r"\s*(to taste|as needed|or more|or less|optional|if desired|if needed)\s*$",
re.IGNORECASE,
)
def _float_or_none(val: object) -> float | None:
try:
v = float(val) # type: ignore[arg-type]
return v if v > 0 else None
except (TypeError, ValueError):
return None
def _safe_list(val: object) -> list:
if val is None:
return []
if isinstance(val, float) and math.isnan(val):
return []
if isinstance(val, list):
return val
# Parquet often deserializes list columns as numpy arrays
try:
import numpy as np
if isinstance(val, np.ndarray):
return val.tolist()
except ImportError:
pass
return []
def _extract_ingredient_names(raw_list: list[str]) -> list[str]:
names = []
for raw in raw_list:
s = raw.lower().strip()
s = _MEASURE_PATTERN.sub("", s)
s = _LEAD_NUMBER.sub("", s)
s = re.sub(r"\(.*?\)", "", s)
s = re.sub(r",.*$", "", s)
s = _TRAILING_QUALIFIER.sub("", s)
s = s.strip(" -.,")
if s and len(s) > 1:
names.append(s)
return names
def _compute_element_coverage(profiles: list[dict]) -> dict[str, float]:
counts: dict[str, int] = {}
for p in profiles:
for elem in p.get("elements", []):
counts[elem] = counts.get(elem, 0) + 1
if not profiles:
return {}
return {e: round(c / len(profiles), 3) for e, c in counts.items()}
# ── Config ─────────────────────────────────────────────────────────────────────
DEFAULT_DB = Path("/Library/Assets/kiwi/kiwi.db")
DEFAULT_PARQUET = Path("/Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet")
# ── Ingest ─────────────────────────────────────────────────────────────────────
def ingest(db_path: Path, parquet_path: Path) -> None:
df = pd.read_parquet(parquet_path)
# Filter to rows with full recipe data
if "HasFullRecipe" in df.columns:
df = df[df["HasFullRecipe"] == True].copy()
if df.empty:
print("No full recipes found in parquet — nothing to ingest.")
return
print(f"Ingesting {len(df)} Purple Carrot recipes into {db_path}")
conn = sqlite3.connect(db_path)
try:
conn.execute("PRAGMA journal_mode=WAL")
# Pre-load ingredient element profiles for coverage calculation
profile_index: dict[str, list[str]] = {}
for row in conn.execute("SELECT name, elements FROM ingredient_profiles"):
try:
profile_index[row[0]] = json.loads(row[1])
except Exception:
pass
inserted = updated = 0
for _, row in df.iterrows():
slug = str(row.get("Slug", "")).strip()
if not slug:
continue
external_id = f"pc_{slug}"
title = str(row.get("Name", "")).strip()[:500]
if not title:
continue
raw_ingredients = [str(i) for i in _safe_list(row.get("RecipeIngredientParts", []))]
directions = [str(d) for d in _safe_list(row.get("RecipeInstructions", []))]
ingredient_names = _extract_ingredient_names(raw_ingredients)
profiles = [
{"elements": profile_index[n]}
for n in ingredient_names if n in profile_index
]
coverage = _compute_element_coverage(profiles)
# Keywords: merge scraped tags with allergen info
kw_raw = _safe_list(row.get("Keywords", []))
allergens = str(row.get("Allergens", "") or "")
if allergens:
kw_raw = list(kw_raw) + [f"allergen:{a.strip()}" for a in allergens.split(",") if a.strip()]
keywords_json = json.dumps(kw_raw)
# Check if already present (same external_id)
existing = conn.execute(
"SELECT id FROM recipes WHERE external_id = ?", (external_id,)
).fetchone()
params = (
title,
json.dumps(raw_ingredients),
json.dumps(ingredient_names),
json.dumps(directions),
"meal-kit", # category
keywords_json,
_float_or_none(row.get("Calories")),
_float_or_none(row.get("FatContent")),
_float_or_none(row.get("ProteinContent")),
None, # sodium_mg — not scraped
json.dumps(coverage),
None, # sugar_g — not scraped
_float_or_none(row.get("CarbohydrateContent")),
_float_or_none(row.get("FiberContent")),
2.0, # servings — PC meal kits are 2-serving by default
0, # nutrition_estimated — PC provides real data
)
if existing:
conn.execute("""
UPDATE recipes
SET title=?, ingredients=?, ingredient_names=?, directions=?,
category=?, keywords=?, calories=?, fat_g=?, protein_g=?,
sodium_mg=?, element_coverage=?,
sugar_g=?, carbs_g=?, fiber_g=?, servings=?, nutrition_estimated=?
WHERE external_id=?
""", params + (external_id,))
updated += 1
else:
conn.execute("""
INSERT INTO recipes
(external_id, source, title, ingredients, ingredient_names,
directions, category, keywords, calories, fat_g, protein_g,
sodium_mg, element_coverage,
sugar_g, carbs_g, fiber_g, servings, nutrition_estimated)
VALUES (?, 'purplecarrot', ?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
""", (external_id,) + params)
inserted += 1
conn.commit()
finally:
conn.close()
print(f"Done — {inserted} inserted, {updated} updated")
# ── Main ───────────────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--db", type=Path, default=DEFAULT_DB)
parser.add_argument("--parquet", type=Path, default=DEFAULT_PARQUET)
args = parser.parse_args()
if not args.parquet.exists():
print(f"ERROR: parquet not found at {args.parquet}")
raise SystemExit(1)
ingest(args.db, args.parquet)
if __name__ == "__main__":
main()

View file

@ -1,68 +0,0 @@
"""
Pipeline logging utility.
Adds a structured JSON FileHandler to the root logger so every pipeline
script automatically writes machine-readable logs to the shared datastore
at /Library/Assets/logs/pipeline/. Avocet ingests these for Turnstone
logreading training (kiwi#141 / avocet#67).
Usage (add near the top of main() after logging.basicConfig):
from scripts.pipeline.log_utils import attach_pipeline_log
attach_pipeline_log("scrape_recipes")
"""
from __future__ import annotations
import json
import logging
import os
from datetime import datetime, timezone
from pathlib import Path
PIPELINE_LOG_DIR = Path(
os.environ.get("PIPELINE_LOG_DIR", "/Library/Assets/logs/pipeline")
)
class _JsonFormatter(logging.Formatter):
def format(self, record: logging.LogRecord) -> str:
payload: dict = {
"ts": datetime.fromtimestamp(record.created, tz=timezone.utc).isoformat(),
"level": record.levelname,
"logger": record.name,
"msg": record.getMessage(),
}
if record.exc_info:
payload["exc"] = self.formatException(record.exc_info)
# Any extra kwargs passed via logger.info("...", extra={...})
standard = {
"name", "msg", "args", "levelname", "levelno", "pathname",
"filename", "module", "exc_info", "exc_text", "stack_info",
"lineno", "funcName", "created", "msecs", "relativeCreated",
"thread", "threadName", "processName", "process", "message",
"taskName",
}
extra = {k: v for k, v in record.__dict__.items() if k not in standard}
if extra:
payload["extra"] = extra
return json.dumps(payload)
def attach_pipeline_log(script_name: str) -> Path:
"""Attach a JSON file handler to the root logger for pipeline logging.
Returns the path of the log file created.
"""
PIPELINE_LOG_DIR.mkdir(parents=True, exist_ok=True)
ts = datetime.now(tz=timezone.utc).strftime("%Y%m%dT%H%M%S")
log_path = PIPELINE_LOG_DIR / f"{script_name}_{ts}.jsonl"
handler = logging.FileHandler(log_path, encoding="utf-8")
handler.setLevel(logging.DEBUG)
handler.setFormatter(_JsonFormatter())
logging.getLogger().addHandler(handler)
logging.getLogger(__name__).info(
"Pipeline log: %s", log_path, extra={"script": script_name}
)
return log_path

View file

@ -1,120 +0,0 @@
"""Discover Purple Carrot's current weekly menu recipe slugs.
The main /plant-based-recipes listing page always renders the current week's
menu as server-side HTML. This script pulls those slugs and writes them to a
parquet that can be passed directly to scrape_live.py via --slugs-from.
Run weekly (e.g. via cron) to accumulate new recipes as the menu rotates.
Usage:
conda run -n cf python3 scripts/pipeline/purple_carrot/discover_current_menu.py \
[--out /Library/Assets/kiwi/pipeline/recipes_purplecarrot_menu.parquet]
Then scrape:
conda run -n cf python3 scripts/pipeline/purple_carrot/scrape_live.py \
--slugs-from /Library/Assets/kiwi/pipeline/recipes_purplecarrot_menu.parquet \
--out /Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet \
--resume
"""
from __future__ import annotations
import re
import sys
from datetime import date
from pathlib import Path
import pandas as pd
import requests
from bs4 import BeautifulSoup
# ── Config ─────────────────────────────────────────────────────────────────────
LISTING_URL = "https://www.purplecarrot.com/plant-based-recipes"
BASE_URL = "https://www.purplecarrot.com/recipe/{slug}"
DEFAULT_OUT = Path("/Library/Assets/kiwi/pipeline/recipes_purplecarrot_menu.parquet")
HEADERS = {
"User-Agent": (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
}
RECIPE_HREF_RE = re.compile(r"/recipe/([^?#]+)")
# ── Main ───────────────────────────────────────────────────────────────────────
def discover_current_slugs() -> list[str]:
"""Fetch the listing page and return unique recipe slugs from the current menu."""
resp = requests.get(LISTING_URL, headers=HEADERS, timeout=15)
if resp.status_code != 200:
print(f"ERROR: listing page returned HTTP {resp.status_code}", file=sys.stderr)
return []
soup = BeautifulSoup(resp.text, "html.parser")
slugs: list[str] = []
seen: set[str] = set()
for a in soup.find_all("a", href=RECIPE_HREF_RE):
m = RECIPE_HREF_RE.search(a["href"])
if m:
slug = m.group(1)
if slug not in seen:
seen.add(slug)
slugs.append(slug)
return slugs
def main() -> None:
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--out", type=Path, default=DEFAULT_OUT)
args = parser.parse_args()
print(f"Fetching current menu from {LISTING_URL}")
slugs = discover_current_slugs()
if not slugs:
print("No slugs found — the listing page may have changed structure or blocked the request.")
sys.exit(1)
today = date.today().isoformat()
records = [
{
"Slug": slug,
"SourceURL": BASE_URL.format(slug=slug),
"Source": "purplecarrot_menu",
"DiscoveredDate": today,
}
for slug in slugs
]
# Merge with any existing menu parquet (accumulate weeks)
df_new = pd.DataFrame(records)
args.out.parent.mkdir(parents=True, exist_ok=True)
if args.out.exists():
df_prev = pd.read_parquet(args.out)
combined = pd.concat([df_prev, df_new], ignore_index=True)
combined = combined.drop_duplicates(subset=["Slug"], keep="first")
df_new = combined
df_new.to_parquet(args.out, index=False)
print(f"Found {len(slugs)} current-menu slugs this week:")
for s in slugs:
print(f" {s}")
print(f"\nSaved {len(df_new)} total slugs (accumulated) to {args.out}")
print(f"\nTo scrape full recipes:")
print(f" conda run -n cf python3 scripts/pipeline/purple_carrot/scrape_live.py \\")
print(f" --slugs-from {args.out} \\")
print(f" --out /Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet \\")
print(f" --resume")
if __name__ == "__main__":
main()

View file

@ -1,218 +0,0 @@
"""Discover Purple Carrot recipe slugs by crawling all recipe-category listing pages.
The site serves full server-rendered HTML for category pages, paginated via
?page=N. Each page loads 18 recipe cards. This script crawls every category
across all pages and writes a deduplicated slug inventory.
Usage:
conda run -n cf python3 scripts/pipeline/purple_carrot/discover_slugs_categories.py \
[--out /Library/Assets/kiwi/pipeline/recipes_purplecarrot_slugs.parquet] \
[--delay 2.0] \
[--max-pages 50] # safety cap per category (comfort-foods has ~18)
"""
from __future__ import annotations
import argparse
import re
import time
from pathlib import Path
from typing import Any
import pandas as pd
import requests
from bs4 import BeautifulSoup
# ── Config ─────────────────────────────────────────────────────────────────────
BASE = "https://www.purplecarrot.com"
# All known category slugs (from /plant-based-recipes nav)
CATEGORIES: list[str] = [
"comfort-foods",
"family-friendly",
"healthy-desserts",
"holiday-recipes",
"quick-and-easy",
"party-foods",
"seasonal-menu",
"spring-recipes",
"summer-recipes",
"fall-recipes",
"winter-recipes",
"african",
"american",
"asian",
"comfort",
"french",
"indian",
"italian",
"mediterranean",
"mexican",
"middle-eastern",
"soups",
"salads",
"bowls",
"pasta",
"sandwiches-wraps",
"tacos",
"breakfast",
"snacks-sides",
]
DEFAULT_OUT = Path("/Library/Assets/kiwi/pipeline/recipes_purplecarrot_slugs.parquet")
EXISTING_PARQUET = Path("/Library/Assets/kiwi/pipeline/recipes_purplecarrot.parquet")
RECIPE_LINK_SELECTOR = "a.c-recipe__title"
SLUG_RE = re.compile(r"/recipe/([^?#]+)")
HEADERS = {
"User-Agent": (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
}
# ── Helpers ────────────────────────────────────────────────────────────────────
def _fetch_html(url: str, session: requests.Session) -> str | None:
"""Fetch URL and return HTML string, or None on failure."""
try:
resp = session.get(url, headers=HEADERS, timeout=15)
if resp.status_code == 200:
return resp.text
if resp.status_code == 404:
return None # expected end of pagination
print(f" HTTP {resp.status_code}{url}")
return None
except Exception as exc:
print(f" ERROR fetching {url}: {exc}")
return None
def _extract_slugs(html: str) -> list[str]:
"""Pull recipe slugs from one listing-page HTML response."""
soup = BeautifulSoup(html, "html.parser")
slugs: list[str] = []
for a in soup.select(RECIPE_LINK_SELECTOR):
href = a.get("href", "")
m = SLUG_RE.search(href)
if m:
slugs.append(m.group(1))
return slugs
def _get_category_total(html: str) -> int | None:
"""Try to parse the recipe count shown on the category page (e.g. '319 Recipes')."""
m = re.search(r"(\d+)\s+Recipes?\b", html)
return int(m.group(1)) if m else None
def _discover_category(
category: str,
session: requests.Session,
delay: float,
max_pages: int,
) -> tuple[list[str], int]:
"""Crawl all pages of a category, return (slugs, pages_fetched)."""
slugs: list[str] = []
for page_num in range(1, max_pages + 1):
if page_num == 1:
url = f"{BASE}/recipe-categories/{category}"
else:
url = f"{BASE}/recipe-categories/{category}?page={page_num}"
html = _fetch_html(url, session)
if html is None:
break # 404 or error = past the end
page_slugs = _extract_slugs(html)
if not page_slugs:
# Show total if we got a page but no links (category slug may be wrong)
if page_num == 1:
total = _get_category_total(html)
if total is not None:
print(f" page 1 loaded (total={total}) but 0 recipe links — selector may need updating")
break
slugs.extend(page_slugs)
# Print progress
total_hint = _get_category_total(html) if page_num == 1 else None
total_str = f" / {total_hint}" if total_hint else ""
print(f" page {page_num}: +{len(page_slugs)} slugs ({len(slugs)}{total_str} cumulative)")
if len(page_slugs) < 18:
# Short page = last page
break
time.sleep(delay)
return slugs, (len(slugs) + 17) // 18 # approximate pages
# ── Main ───────────────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--out", type=Path, default=DEFAULT_OUT)
parser.add_argument("--delay", type=float, default=2.0,
help="Seconds between page requests")
parser.add_argument("--max-pages", type=int, default=50,
help="Safety cap on pages per category")
parser.add_argument("--categories", nargs="*",
help="Crawl only these category slugs (default: all)")
args = parser.parse_args()
categories = args.categories or CATEGORIES
# Seed with any slugs from the Wayback parquet
known_slugs: set[str] = set()
if EXISTING_PARQUET.exists():
df_wb = pd.read_parquet(EXISTING_PARQUET)
known_slugs = set(df_wb["Slug"].dropna().tolist())
print(f"Seeded with {len(known_slugs)} slugs from Wayback parquet")
all_records: list[dict[str, Any]] = []
session = requests.Session()
for category in categories:
print(f"\n[{category}]")
cat_slugs, pages = _discover_category(category, session, args.delay, args.max_pages)
for slug in cat_slugs:
all_records.append({"Slug": slug, "Category": category, "Source": "purplecarrot_category"})
print(f"{len(cat_slugs)} slugs across ~{pages} pages")
time.sleep(args.delay)
if not all_records:
print("\nNo records found — check that categories are correct and the site is accessible")
return
# Deduplicate keeping first category encountered
df_new = pd.DataFrame(all_records)
df_new = df_new.drop_duplicates(subset=["Slug"], keep="first")
# Also include Wayback slugs not already in the new set
if known_slugs:
wb_only = known_slugs - set(df_new["Slug"].tolist())
if wb_only:
df_wb_extra = pd.DataFrame([
{"Slug": s, "Category": "wayback", "Source": "purplecarrot_wayback"}
for s in wb_only
])
df_new = pd.concat([df_new, df_wb_extra], ignore_index=True)
args.out.parent.mkdir(parents=True, exist_ok=True)
df_new.to_parquet(args.out, index=False)
new_count = len(df_new)
cat_count = len(df_new[df_new["Source"] == "purplecarrot_category"])
print(f"\nDone — {new_count} total slugs saved to {args.out}")
print(f" {cat_count} from category pages, {new_count - cat_count} from Wayback only")
if __name__ == "__main__":
main()

View file

@ -1,301 +0,0 @@
"""
discover_wayback.py enumerate Purple Carrot recipe slugs via the Wayback Machine.
Strategy:
1. CDX API all archived /api/v2/menus/* URLs (multiple timestamps)
2. Replay fetch each menu's menuItems, extract productPath slugs
3. CDX API all archived /api/v1/products/* URLs (direct slug capture)
4. CDX API /recipe-categories/* HTML pages for older slugs
5. Deduplicate and write manifest to OUT_FILE
Output (JSONL, one record per recipe):
{"slug": "...", "title": "...", "subtitle": "...", "cook_time": "...",
"tags": [...], "serving_size": 2, "image_url": "...",
"wayback_ts": "20260412150557", "source": "menu|product_api|category_page"}
Usage:
conda run -n cf python -m scripts.pipeline.purple_carrot.discover_wayback
conda run -n cf python -m scripts.pipeline.purple_carrot.discover_wayback --out /Library/Assets/kiwi/pipeline/pc_slugs.jsonl
"""
from __future__ import annotations
import argparse
import json
import logging
import time
from pathlib import Path
from typing import Any
from urllib.parse import urlencode
import requests
logger = logging.getLogger(__name__)
CDX_BASE = "https://web.archive.org/cdx/search/cdx"
WB_BASE = "https://web.archive.org/web"
PC_HOST = "www.purplecarrot.com"
# Polite delay between Wayback replay fetches (seconds)
REPLAY_DELAY = 1.0
CDX_DELAY = 0.5
DEFAULT_OUT = Path("/Library/Assets/kiwi/pipeline/pc_slugs.jsonl")
# ── CDX helpers ───────────────────────────────────────────────────────────────
def cdx_query(url_pattern: str, **kwargs) -> list[dict]:
"""Run a CDX search and return a list of result dicts."""
params = {
"url": url_pattern,
"output": "json",
"fl": "original,timestamp,statuscode",
"collapse": "urlkey",
"filter": "statuscode:200",
**kwargs,
}
for attempt in range(3):
try:
resp = requests.get(CDX_BASE, params=params, timeout=30)
resp.raise_for_status()
rows = resp.json()
if not rows or len(rows) < 2:
return []
headers = rows[0]
return [dict(zip(headers, row)) for row in rows[1:]]
except Exception as exc:
logger.warning("CDX attempt %d failed: %s", attempt + 1, exc)
time.sleep(2 ** attempt)
return []
def wayback_get(url: str, timestamp: str) -> Any | None:
"""Fetch a Wayback replay of a URL and return parsed JSON (or None)."""
replay_url = f"{WB_BASE}/{timestamp}/{url}"
for attempt in range(3):
try:
resp = requests.get(replay_url, timeout=30)
if resp.status_code == 200:
return resp.json()
if resp.status_code == 404:
return None
except Exception as exc:
logger.warning("Wayback GET attempt %d failed for %s: %s", attempt + 1, url, exc)
time.sleep(2 ** attempt)
return None
# ── Slug extraction ───────────────────────────────────────────────────────────
def slug_from_product_path(path: str) -> str | None:
"""'/recipe/foo-bar-baz''foo-bar-baz'."""
if not path:
return None
return path.strip("/").split("/")[-1] or None
def _menu_item_to_record(item: dict, wayback_ts: str) -> dict | None:
slug = slug_from_product_path(item.get("productPath", ""))
if not slug:
return None
return {
"slug": slug,
"title": item.get("title", ""),
"subtitle": item.get("subtitle", ""),
"cook_time": item.get("cookTime", ""),
"tags": item.get("filterTags") or [],
"serving_size": item.get("servingSize"),
"image_url": item.get("imageURL", ""),
"description": item.get("description", ""),
"wayback_ts": wayback_ts,
"source": "menu",
}
# ── Discovery passes ──────────────────────────────────────────────────────────
def pass_menus(seen_slugs: set[str]) -> list[dict]:
"""Walk all archived /api/v2/menus/* captures to extract slugs."""
records: list[dict] = []
# Find all distinct archived menu URLs
menu_cdx = cdx_query(f"{PC_HOST}/api/v2/menus/*", limit="500")
logger.info("CDX: %d archived menu URLs found", len(menu_cdx))
time.sleep(CDX_DELAY)
processed_menu_ids: set[str] = set()
for entry in menu_cdx:
url = entry["original"]
ts = entry["timestamp"]
# Skip the listing endpoint, only process individual menus
if not url.split("?")[0].rstrip("/").split("/")[-1].isdigit():
continue
menu_id = url.split("?")[0].rstrip("/").split("/")[-1]
if menu_id in processed_menu_ids:
continue
processed_menu_ids.add(menu_id)
logger.info("Fetching menu %s (ts=%s) ...", menu_id, ts)
data = wayback_get(url.split("?")[0] + "?logged_out=true", ts)
time.sleep(REPLAY_DELAY)
if not data or "menuItems" not in data:
continue
for item in data["menuItems"]:
rec = _menu_item_to_record(item, ts)
if rec and rec["slug"] not in seen_slugs:
seen_slugs.add(rec["slug"])
records.append(rec)
logger.debug(" + %s", rec["slug"])
logger.info(" %d new slugs (total so far: %d)", len(records), len(seen_slugs))
return records
def pass_product_api(seen_slugs: set[str]) -> list[dict]:
"""Pick up any directly archived /api/v1/products/* URLs the menu pass missed."""
records: list[dict] = []
product_cdx = cdx_query(f"{PC_HOST}/api/v1/products/*", limit="5000")
logger.info("CDX: %d archived product API URLs found", len(product_cdx))
time.sleep(CDX_DELAY)
for entry in product_cdx:
slug = entry["original"].rstrip("/").split("/")[-1]
if not slug or slug in seen_slugs:
continue
seen_slugs.add(slug)
records.append({
"slug": slug,
"title": "",
"subtitle": "",
"cook_time": "",
"tags": [],
"serving_size": None,
"image_url": "",
"description": "",
"wayback_ts": entry["timestamp"],
"source": "product_api",
})
logger.info("product_api pass: %d new slugs", len(records))
return records
def pass_category_pages(seen_slugs: set[str]) -> list[dict]:
"""Parse archived recipe-categories HTML pages for slugs not in the API.
Category pages are rendered SSR/with inline JSON state on older captures,
so we do a simple regex scan for /recipe/<slug> patterns.
"""
import re
records: list[dict] = []
SLUG_RE = re.compile(r'["\s]/recipe/([a-z0-9][a-z0-9\-]{3,})["\s/?]')
cat_cdx = cdx_query(f"{PC_HOST}/recipe-categories/*", limit="200")
logger.info("CDX: %d archived category pages found", len(cat_cdx))
time.sleep(CDX_DELAY)
seen_category_urls: set[str] = set()
for entry in cat_cdx:
url = entry["original"].split("?")[0]
if url in seen_category_urls:
continue
seen_category_urls.add(url)
replay_url = f"{WB_BASE}/{entry['timestamp']}/{url}"
try:
resp = requests.get(replay_url, timeout=30)
time.sleep(REPLAY_DELAY)
if resp.status_code != 200:
continue
except Exception as exc:
logger.warning("Category page fetch failed: %s", exc)
continue
for slug in SLUG_RE.findall(resp.text):
if slug in seen_slugs:
continue
seen_slugs.add(slug)
records.append({
"slug": slug,
"title": "",
"subtitle": "",
"cook_time": "",
"tags": [],
"serving_size": None,
"image_url": "",
"description": "",
"wayback_ts": entry["timestamp"],
"source": "category_page",
})
logger.info("category_pages pass: %d new slugs", len(records))
return records
# ── Main ──────────────────────────────────────────────────────────────────────
def discover(out_file: Path) -> None:
seen: set[str] = set()
# Load previously discovered slugs so reruns are incremental
existing: list[dict] = []
if out_file.exists():
with open(out_file) as f:
for line in f:
line = line.strip()
if line:
rec = json.loads(line)
seen.add(rec["slug"])
existing.append(rec)
logger.info("Loaded %d existing slugs from %s", len(seen), out_file)
new_records: list[dict] = []
new_records += pass_menus(seen)
new_records += pass_product_api(seen)
new_records += pass_category_pages(seen)
out_file.parent.mkdir(parents=True, exist_ok=True)
with open(out_file, "a") as f:
for rec in new_records:
f.write(json.dumps(rec) + "\n")
total = len(existing) + len(new_records)
logger.info(
"Done. %d new slugs written to %s (%d total).",
len(new_records), out_file, total,
)
def main() -> None:
parser = argparse.ArgumentParser(description="Discover Purple Carrot recipe slugs via Wayback")
parser.add_argument(
"--out",
type=Path,
default=DEFAULT_OUT,
help=f"Output JSONL manifest (default: {DEFAULT_OUT})",
)
parser.add_argument("--debug", action="store_true")
args = parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if args.debug else logging.INFO,
format="%(asctime)s %(levelname)s %(name)s: %(message)s",
)
from scripts.pipeline.log_utils import attach_pipeline_log
attach_pipeline_log("discover_wayback")
discover(args.out)
if __name__ == "__main__":
main()

View file

@ -1,250 +0,0 @@
"""Playwright scraper for live purplecarrot.com recipe pages.
Uses the slug inventory already in recipes_purplecarrot.parquet and fills in
the missing ingredients/instructions by hitting the live site directly.
Usage:
conda run -n cf python3 scripts/pipeline/purple_carrot/scrape_live.py \
[--out /Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet] \
[--delay 2.5] \
[--limit 20]
"""
from __future__ import annotations
import argparse
import json
import re
import time
from pathlib import Path
from typing import Any
import pandas as pd
from playwright.sync_api import sync_playwright, Page, TimeoutError as PWTimeout
# ── Config ─────────────────────────────────────────────────────────────────────
BASE_URL = "https://www.purplecarrot.com/recipe/{slug}"
DEFAULT_OUT = Path("/Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet")
EXISTING_PARQUET = Path("/Library/Assets/kiwi/pipeline/recipes_purplecarrot.parquet")
RENDER_WAIT_MS = 2500 # JS render settle time
NAV_TIMEOUT_MS = 20_000
# ── Page parser ────────────────────────────────────────────────────────────────
def _text(page: Page, selector: str) -> str:
el = page.query_selector(selector)
return el.inner_text().strip() if el else ""
def _texts(page: Page, selector: str) -> list[str]:
return [el.inner_text().strip() for el in page.query_selector_all(selector)]
def _parse_recipe(page: Page, slug: str, source_url: str) -> dict[str, Any] | None:
"""Extract structured recipe data from the rendered page."""
body = page.inner_text("body")
# Abort if we've been bounced to a generic listing / 404
if "Page Not Found" in body or slug not in page.url:
return None
# ── Title ──────────────────────────────────────────────────────────────────
# The <h1> on product pages tends to be the recipe name
title = (_text(page, "h1") or _text(page, "[class*='recipe-title']")).strip()
if not title:
# Fallback: first heading-like text before "Ingredients"
idx = body.find("Ingredients\n")
title = body[:idx].strip().splitlines()[-1] if idx > 0 else ""
# ── Ingredients / Instructions via body text ───────────────────────────────
ing_start = body.find("\nIngredients\n")
inst_start = body.find("\nInstructions\n")
footer_start = body.find("\nShop\n") # footer sentinel
if ing_start == -1:
return None # page didn't render recipe content
raw_ingredients: list[str] = []
raw_instructions: list[str] = []
if ing_start != -1 and inst_start != -1:
ing_block = body[ing_start + len("\nIngredients\n"):inst_start].strip()
raw_ingredients = [l.strip() for l in ing_block.splitlines() if l.strip()]
if inst_start != -1:
end = footer_start if footer_start > inst_start else len(body)
inst_block = body[inst_start + len("\nInstructions\n"):end].strip()
# Steps start with a digit
steps: list[str] = []
current: list[str] = []
for line in inst_block.splitlines():
line = line.strip()
if not line:
continue
if re.match(r"^\d+$", line):
if current:
steps.append(" ".join(current))
current = []
elif line.startswith("CULINARY NOTES"):
break
else:
current.append(line)
if current:
steps.append(" ".join(current))
raw_instructions = steps
# ── Nutrition ──────────────────────────────────────────────────────────────
def _extract_num(pattern: str) -> float | None:
m = re.search(pattern, body)
try:
return float(m.group(1)) if m else None
except ValueError:
return None
cal = _extract_num(r"(\d+)\s*CAL")
fat = _extract_num(r"(\d+(?:\.\d+)?)g\s*FAT")
carbs = _extract_num(r"(\d+(?:\.\d+)?)g\s*CARBS")
prot = _extract_num(r"(\d+(?:\.\d+)?)g\s*PROTEIN")
fiber = _extract_num(r"(\d+(?:\.\d+)?)g\s*FIBER")
# ── Allergens / tags ───────────────────────────────────────────────────────
allergen_m = re.search(r"Allergens?:\s*([^\n]+)", body)
allergens = allergen_m.group(1).strip() if allergen_m else ""
# Feature tags like HIGH-PROTEIN, QUICK, etc. appear before Ingredients
pre_ing = body[:ing_start]
tags = re.findall(r"\b(HIGH-PROTEIN|QUICK|SPICY|LOW[\-\s]CALORIE|VEGAN|FAMILY\s+FRIENDLY)\b", pre_ing)
return {
"Slug": slug,
"Name": title,
"SourceURL": source_url,
"Source": "purplecarrot_live",
"RecipeIngredientParts": raw_ingredients,
"RecipeInstructions": raw_instructions,
"Calories": cal,
"FatContent": fat,
"CarbohydrateContent": carbs,
"ProteinContent": prot,
"FiberContent": fiber,
"Allergens": allergens,
"Keywords": tags,
"HasFullRecipe": bool(raw_ingredients and raw_instructions),
}
# ── Main ───────────────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--out", type=Path, default=DEFAULT_OUT)
parser.add_argument("--delay", type=float, default=2.5,
help="Seconds between requests (be polite)")
parser.add_argument("--limit", type=int, default=0,
help="Stop after N slugs (0 = all)")
parser.add_argument("--resume", action="store_true",
help="Skip slugs already present in --out")
parser.add_argument("--slugs-from", type=Path, default=None,
help="Read slug inventory from this parquet instead of the default Wayback one")
args = parser.parse_args()
# Load slug inventory — either from a custom parquet or the default Wayback run
slugs_parquet = args.slugs_from if args.slugs_from else EXISTING_PARQUET
df_existing = pd.read_parquet(slugs_parquet)
slugs = df_existing["Slug"].dropna().unique().tolist()
# source_urls may not be present in custom parcets — fall back to constructing from slug
if "SourceURL" in df_existing.columns:
source_urls = dict(zip(df_existing["Slug"], df_existing["SourceURL"]))
else:
source_urls = {s: BASE_URL.format(slug=s) for s in slugs}
# Resume support
done_slugs: set[str] = set()
if args.resume and args.out.exists():
df_done = pd.read_parquet(args.out)
done_slugs = set(df_done["Slug"].dropna().tolist())
print(f"Resuming — {len(done_slugs)} slugs already scraped")
if args.limit:
slugs = slugs[: args.limit]
results: list[dict[str, Any]] = []
skipped = 0
failed = 0
_UA = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
)
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
for i, slug in enumerate(slugs):
if slug in done_slugs:
skipped += 1
continue
url = BASE_URL.format(slug=slug)
print(f"[{i+1}/{len(slugs)}] {slug}", end="", flush=True)
# Use a fresh browser context per slug to avoid Cloudflare session-level
# bot detection, which fires on the 2nd+ request in the same context.
context = browser.new_context(
user_agent=_UA,
viewport={"width": 1280, "height": 900},
)
page = context.new_page()
try:
page.goto(url, timeout=NAV_TIMEOUT_MS, wait_until="domcontentloaded")
page.wait_for_timeout(RENDER_WAIT_MS)
recipe = _parse_recipe(page, slug, source_urls.get(slug, url))
except PWTimeout:
print("TIMEOUT")
failed += 1
except Exception as exc:
print(f"ERROR: {exc}")
failed += 1
else:
if recipe is None:
print("no content (404 or redirect)")
failed += 1
elif recipe["HasFullRecipe"]:
n = len(recipe["RecipeIngredientParts"])
s = len(recipe["RecipeInstructions"])
print(f"OK ({n} ingredients, {s} steps)")
results.append(recipe)
else:
print(f"partial (ings={len(recipe['RecipeIngredientParts'])}, steps={len(recipe['RecipeInstructions'])})")
results.append(recipe)
finally:
context.close()
time.sleep(args.delay)
browser.close()
print(f"\nDone — {len(results)} scraped, {skipped} skipped, {failed} failed")
if results:
df_out = pd.DataFrame(results)
# Merge with existing metadata (nutrition stubs, wayback fields) for slugs
# that didn't previously have full data
args.out.parent.mkdir(parents=True, exist_ok=True)
if args.resume and args.out.exists():
df_prev = pd.read_parquet(args.out)
df_out = pd.concat([df_prev, df_out], ignore_index=True)
df_out = df_out.drop_duplicates(subset=["Slug"], keep="last")
df_out.to_parquet(args.out, index=False)
full_count = df_out["HasFullRecipe"].sum() if "HasFullRecipe" in df_out.columns else "?"
print(f"Saved {len(df_out)} rows to {args.out} ({full_count} with full recipes)")
else:
print("No results — output not written")
if __name__ == "__main__":
main()

View file

@ -1,538 +0,0 @@
"""
scrape_recipes.py fetch full recipe data for slugs in pc_slugs.jsonl.
For each slug:
1. Try Wayback /api/v1/products/<slug> oldest capture first (pre-HelloFresh
acquisition data is more complete).
2. If instructions are empty, try the recipe HTML page via Wayback and parse
inline JSON state or structured markup.
3. Merge with metadata already in the manifest (title, tags, cook_time, etc.)
4. Emit one row per recipe to recipes_purplecarrot.parquet in food.com columnar
format so build_recipe_index.py can import it unchanged.
Output columns (food.com schema + PC extras ignored by the indexer):
RecipeId, Name, Subtitle, RecipeIngredientParts, RecipeInstructions,
RecipeCategory, Keywords, Calories, FatContent, ProteinContent,
SodiumContent, SugarContent, CarbohydrateContent, FiberContent,
RecipeServings, Description, ImageURL, CookTime, Slug, Source
Usage:
conda run -n cf python -m scripts.pipeline.purple_carrot.scrape_recipes
conda run -n cf python -m scripts.pipeline.purple_carrot.scrape_recipes \\
--slugs /Library/Assets/kiwi/pipeline/pc_slugs.jsonl \\
--out /Library/Assets/kiwi/pipeline/recipes_purplecarrot.parquet \\
--resume
"""
from __future__ import annotations
import argparse
import json
import logging
import re
import time
from pathlib import Path
from typing import Any
import requests
logger = logging.getLogger(__name__)
CDX_BASE = "https://web.archive.org/cdx/search/cdx"
WB_BASE = "https://web.archive.org/web"
PC_HOST = "www.purplecarrot.com"
REPLAY_DELAY = 2.0
CDX_DELAY = 3.0 # archive.org CDX rate-limits aggressively; be polite
DEFAULT_SLUGS = Path("/Library/Assets/kiwi/pipeline/pc_slugs.jsonl")
DEFAULT_OUT = Path("/Library/Assets/kiwi/pipeline/recipes_purplecarrot.parquet")
# Inline JSON state embedded by the SSR renderer — used as fallback HTML parser
_NEXT_DATA_RE = re.compile(r'<script id="__NEXT_DATA__"[^>]*>(.*?)</script>', re.DOTALL)
_REDUX_STATE_RE = re.compile(r'window\.__INITIAL_STATE__\s*=\s*(\{.*?\});\s*\n', re.DOTALL)
# ── Wayback helpers ───────────────────────────────────────────────────────────
def _cdx_get(params: dict) -> list:
"""CDX request with retry on 429/503 (archive.org rate-limits aggressively)."""
for attempt in range(4):
try:
resp = requests.get(CDX_BASE, params=params, timeout=25)
if resp.status_code in (429, 503):
wait = 15 * (2 ** attempt)
logger.debug("CDX %s — backing off %ds", resp.status_code, wait)
time.sleep(wait)
continue
resp.raise_for_status()
rows = resp.json()
return rows if rows else []
except Exception as exc:
logger.debug("CDX attempt %d failed: %s", attempt + 1, exc)
time.sleep(5 * (attempt + 1))
return []
def _cdx_timestamps(slug: str) -> list[str]:
"""Return captured timestamps for a product slug, oldest first (pre-2022 window)."""
rows = _cdx_get({
"url": f"{PC_HOST}/api/v1/products/{slug}",
"output": "json",
"fl": "timestamp,statuscode",
"filter": "statuscode:200",
"limit": "20",
# Pre-HelloFresh-acquisition captures (2019-2021) are most likely
# to have full instructions — API stripped them post-acquisition.
"from": "20190101",
"to": "20211231",
})
if len(rows) < 2:
return []
return [row[0] for row in rows[1:]] # timestamps only, oldest first
def _wayback_json(url: str, timestamp: str) -> Any | None:
replay = f"{WB_BASE}/{timestamp}/{url}"
for attempt in range(3):
try:
resp = requests.get(replay, timeout=30)
if resp.status_code == 200:
return resp.json()
if resp.status_code in (404, 410):
return None
except Exception as exc:
logger.debug("Wayback JSON attempt %d failed (%s): %s", attempt + 1, url, exc)
time.sleep(2 ** attempt)
return None
def _wayback_html(url: str, timestamp: str) -> str | None:
replay = f"{WB_BASE}/{timestamp}/{url}"
for attempt in range(3):
try:
resp = requests.get(replay, timeout=30)
if resp.status_code == 200:
return resp.text
if resp.status_code in (404, 410):
return None
except Exception as exc:
logger.debug("Wayback HTML attempt %d failed (%s): %s", attempt + 1, url, exc)
time.sleep(2 ** attempt)
return None
# ── Recipe extraction from API JSON ──────────────────────────────────────────
def _extract_from_api(data: dict) -> dict | None:
"""Parse a /api/v1/products/<slug> response into our recipe dict.
Returns None if the response has no usable content (empty title, etc.).
Returns a partial dict if only some fields are populated caller merges
with manifest metadata.
"""
if not data or not isinstance(data, dict):
return None
title = data.get("title", "").strip()
subtitle = data.get("subtitle", "").strip()
slug = data.get("slug", "")
skus = data.get("skus") or []
sku = skus[0] if skus else {}
# Instructions: list of {step_number, title, description}
raw_instructions = sku.get("instructions") or []
steps: list[str] = []
for step in sorted(raw_instructions, key=lambda s: s.get("step_number", 0)):
parts = []
if step.get("title"):
parts.append(step["title"])
if step.get("description"):
parts.append(step["description"])
if parts:
steps.append(". ".join(parts))
# Ingredients: may be in ingredients_quantity or ingredients
raw_ingr = sku.get("ingredients_quantity") or sku.get("ingredients") or []
ingredients: list[str] = []
for item in raw_ingr:
if isinstance(item, dict):
qty = item.get("quantity") or item.get("qty") or ""
unit = item.get("unit") or ""
name = item.get("name") or item.get("ingredient", {}).get("name", "") if isinstance(item.get("ingredient"), dict) else item.get("ingredient_name", "")
raw = item.get("raw") or item.get("display_name") or ""
line = raw or " ".join(filter(None, [str(qty), str(unit), str(name)])).strip()
if line:
ingredients.append(line)
elif isinstance(item, str) and item.strip():
ingredients.append(item.strip())
nutrition = sku.get("nutrition_label") or {}
calories = _num(nutrition.get("calories") or sku.get("calories"))
fat = _num(nutrition.get("total_fat") or sku.get("fat"))
protein = _num(nutrition.get("protein") or sku.get("protein"))
sodium = _num(nutrition.get("sodium") or sku.get("sodium"))
sugar = _num(nutrition.get("sugar") or nutrition.get("total_sugars"))
carbs = _num(nutrition.get("total_carbohydrate") or sku.get("carbs"))
fiber = _num(nutrition.get("dietary_fiber") or sku.get("fiber"))
tags = sku.get("tags") or data.get("tags") or []
category = sku.get("meal_type") or sku.get("product_type") or ""
servings = _num(sku.get("servings"))
cook_time = sku.get("prep_and_cook_time") or ""
description = sku.get("description") or ""
images = sku.get("hero_images") or sku.get("image_versions") or []
# hero_images can be a list OR a dict keyed by size string — normalise to list
if isinstance(images, dict):
images = list(images.values())
image_url = ""
if images and isinstance(images[0], dict):
image_url = images[0].get("image_url") or images[0].get("url") or ""
if not image_url and data.get("square_image"):
sq = data["square_image"]
image_url = sq.get("url") if isinstance(sq, dict) else ""
return {
"slug": slug,
"title": title,
"subtitle": subtitle,
"steps": steps,
"ingredients": ingredients,
"category": category,
"tags": tags,
"calories": calories,
"fat": fat,
"protein": protein,
"sodium": sodium,
"sugar": sugar,
"carbs": carbs,
"fiber": fiber,
"servings": servings,
"cook_time": cook_time,
"description": description,
"image_url": image_url,
"has_full_recipe": bool(steps and ingredients),
}
def _num(val: Any) -> float | None:
if val is None:
return None
try:
v = float(str(val).replace("g", "").replace("mg", "").split()[0])
return v if v > 0 else None
except Exception:
return None
# ── Fallback: HTML inline state parsing ──────────────────────────────────────
def _extract_from_html(html: str, slug: str) -> dict | None:
"""Try to pull recipe data from inline JS state in older SSR pages."""
# Attempt 1: Next.js __NEXT_DATA__
m = _NEXT_DATA_RE.search(html)
if m:
try:
state = json.loads(m.group(1))
# Walk the Next.js page props tree looking for recipe data
props = state.get("props", {}).get("pageProps", {})
recipe = props.get("recipe") or props.get("product")
if recipe and isinstance(recipe, dict) and recipe.get("title"):
return _extract_from_api(recipe)
except Exception:
pass
# Attempt 2: Redux __INITIAL_STATE__
m = _REDUX_STATE_RE.search(html)
if m:
try:
state = json.loads(m.group(1))
# Try common Redux state shapes
for key in ("recipe", "product", "currentRecipe", "currentProduct"):
recipe = state.get(key)
if recipe and isinstance(recipe, dict) and recipe.get("title"):
return _extract_from_api(recipe)
except Exception:
pass
# Attempt 3: JSON-LD structured data
ld_matches = re.findall(
r'<script[^>]+type=["\']application/ld\+json["\'][^>]*>(.*?)</script>',
html, re.DOTALL
)
for raw in ld_matches:
try:
ld = json.loads(raw)
if isinstance(ld, list):
ld = next((x for x in ld if x.get("@type") == "Recipe"), None)
if not ld or ld.get("@type") != "Recipe":
continue
steps = []
for inst in (ld.get("recipeInstructions") or []):
if isinstance(inst, dict):
steps.append(inst.get("text", ""))
elif isinstance(inst, str):
steps.append(inst)
ingredients = ld.get("recipeIngredient") or []
return {
"slug": slug,
"title": ld.get("name", ""),
"subtitle": "",
"steps": [s for s in steps if s],
"ingredients": [i for i in ingredients if i],
"category": ld.get("recipeCategory", ""),
"tags": ld.get("keywords", "").split(",") if isinstance(ld.get("keywords"), str) else [],
"calories": _num((ld.get("nutrition") or {}).get("calories")),
"fat": None, "protein": None, "sodium": None,
"sugar": None, "carbs": None, "fiber": None,
"servings": _num(ld.get("recipeYield")),
"cook_time": str(ld.get("totalTime") or ld.get("cookTime") or ""),
"description": ld.get("description", ""),
"image_url": (ld["image"][0] if isinstance(ld.get("image"), list) else ld.get("image", "")) or "",
"has_full_recipe": True,
}
except Exception:
pass
return None
# ── Per-slug fetch ─────────────────────────────────────────────────────────────
def fetch_recipe(slug: str, manifest_meta: dict) -> dict | None:
"""Fetch the fullest available recipe data for a slug from Wayback.
Returns a merged dict of manifest metadata + API/HTML-extracted content.
"""
api_url = f"https://{PC_HOST}/api/v1/products/{slug}"
html_url = f"https://{PC_HOST}/recipe/{slug}"
recipe: dict | None = None
# Try product API — oldest captures are most likely to have full data
timestamps = _cdx_timestamps(slug)
time.sleep(CDX_DELAY)
if not timestamps and manifest_meta.get("wayback_ts"):
timestamps = [manifest_meta["wayback_ts"]]
for ts in timestamps:
data = _wayback_json(api_url, ts)
time.sleep(REPLAY_DELAY)
if not data:
continue
candidate = _extract_from_api(data)
if not candidate:
continue
recipe = candidate
if recipe.get("has_full_recipe"):
logger.debug("[%s] Full recipe from API (ts=%s)", slug, ts)
break
logger.debug("[%s] Partial API data (ts=%s) — trying HTML fallback", slug, ts)
# HTML fallback when API has no steps/ingredients
if not recipe or not recipe.get("has_full_recipe"):
html_ts_rows = _cdx_get({
"url": f"{PC_HOST}/recipe/{slug}",
"output": "json",
"fl": "timestamp,statuscode",
"filter": "statuscode:200",
"limit": "10",
})
html_timestamps = [row[0] for row in html_ts_rows[1:]] if len(html_ts_rows) > 1 else []
time.sleep(CDX_DELAY)
for ts in html_timestamps:
html = _wayback_html(html_url, ts)
time.sleep(REPLAY_DELAY)
if not html:
continue
html_recipe = _extract_from_html(html, slug)
if html_recipe and html_recipe.get("has_full_recipe"):
logger.debug("[%s] Full recipe from HTML (ts=%s)", slug, ts)
recipe = html_recipe
break
# Build merged record: manifest metadata fills any gaps from API/HTML
merged: dict = {
"slug": slug,
"title": manifest_meta.get("title", ""),
"subtitle": manifest_meta.get("subtitle", ""),
"steps": [],
"ingredients": [],
"category": "",
"tags": manifest_meta.get("tags") or [],
"calories": None,
"fat": None,
"protein": None,
"sodium": None,
"sugar": None,
"carbs": None,
"fiber": None,
"servings": manifest_meta.get("serving_size"),
"cook_time": manifest_meta.get("cook_time", ""),
"description": manifest_meta.get("description", ""),
"image_url": manifest_meta.get("image_url", ""),
"source": "purple_carrot",
"wayback_ts": manifest_meta.get("wayback_ts", ""),
"has_full_recipe": False,
}
if recipe:
for key in recipe:
# Prefer API/HTML data; keep manifest value only when API field is empty
val = recipe[key]
if val or key not in merged or not merged[key]:
merged[key] = val
if not merged["title"]:
logger.warning("[%s] No title — skipping", slug)
return None
return merged
# ── Output formatting ─────────────────────────────────────────────────────────
def _to_dataframe_row(r: dict) -> dict:
"""Convert merged recipe dict to food.com-compatible parquet row."""
# Build plain-text input for allrecipes-style corpus compatibility
lines = [r["title"]]
if r.get("subtitle"):
lines.append(r["subtitle"])
if r.get("description"):
lines.append("")
lines.append(r["description"])
if r.get("ingredients"):
lines += ["", "Ingredients:"] + [f"- {i}" for i in r["ingredients"]]
if r.get("steps"):
lines += ["", "Directions:"] + [f"- {s}" for s in r["steps"]]
plain_text = "\n".join(lines)
source_url = f"https://www.purplecarrot.com/recipe/{r['slug']}"
return {
# food.com schema columns (used by build_recipe_index.py)
"RecipeId": f"pc_{r['slug']}",
"Name": r["title"],
"RecipeIngredientParts": r.get("ingredients") or [],
"RecipeInstructions": r.get("steps") or [],
"RecipeCategory": r.get("category", ""),
"Keywords": r.get("tags") or [],
"Calories": r.get("calories"),
"FatContent": r.get("fat"),
"ProteinContent": r.get("protein"),
"SodiumContent": r.get("sodium"),
"SugarContent": r.get("sugar"),
"CarbohydrateContent": r.get("carbs"),
"FiberContent": r.get("fiber"),
"RecipeServings": r.get("servings"),
# PC-specific extras (ignored by indexer, used by training pipeline)
"Subtitle": r.get("subtitle", ""),
"Description": r.get("description", ""),
"ImageURL": r.get("image_url", ""),
"CookTime": r.get("cook_time", ""),
"Slug": r["slug"],
"Source": "purple_carrot",
"SourceURL": source_url, # canonical attribution link shown in recipe UI
"HasFullRecipe": r.get("has_full_recipe", False),
"WaybackTs": r.get("wayback_ts", ""),
# Also emit plain-text input for allrecipes-compatible corpus search
"input": plain_text,
}
# ── Main ──────────────────────────────────────────────────────────────────────
def scrape(slugs_file: Path, out_file: Path, resume: bool = True) -> None:
import pandas as pd
# Load manifest
if not slugs_file.exists():
logger.error("Slugs manifest not found: %s", slugs_file)
return
manifest: dict[str, dict] = {}
with open(slugs_file) as f:
for line in f:
line = line.strip()
if line:
rec = json.loads(line)
slug = rec["slug"]
# Keep the richest metadata if slug appears from multiple sources
if slug not in manifest or rec.get("source") == "menu":
manifest[slug] = rec
logger.info("Manifest: %d unique slugs", len(manifest))
# Load already-scraped slugs for resume
done_slugs: set[str] = set()
existing_rows: list[dict] = []
if resume and out_file.exists():
try:
existing_df = pd.read_parquet(out_file)
done_slugs = set(existing_df["Slug"].tolist())
existing_rows = existing_df.to_dict("records")
logger.info("Resume: %d already scraped", len(done_slugs))
except Exception as exc:
logger.warning("Could not load existing parquet for resume: %s", exc)
todo = [s for s in manifest if s not in done_slugs]
logger.info("%d slugs to fetch", len(todo))
rows = list(existing_rows)
for i, slug in enumerate(todo, 1):
logger.info("[%d/%d] %s", i, len(todo), slug)
recipe = fetch_recipe(slug, manifest[slug])
if recipe:
rows.append(_to_dataframe_row(recipe))
status = "full" if recipe.get("has_full_recipe") else "partial"
logger.info(" -> %s (%s)", recipe.get("title", "?"), status)
else:
logger.warning(" -> skipped (no title)")
# Write checkpoint every 50 recipes
if i % 50 == 0:
_write_parquet(rows, out_file)
logger.info("Checkpoint: %d recipes written", len(rows))
_write_parquet(rows, out_file)
full = sum(1 for r in rows if r.get("HasFullRecipe"))
logger.info(
"Done. %d recipes written to %s (%d full, %d partial).",
len(rows), out_file, full, len(rows) - full,
)
def _write_parquet(rows: list[dict], out_file: Path) -> None:
import pandas as pd
out_file.parent.mkdir(parents=True, exist_ok=True)
pd.DataFrame(rows).to_parquet(out_file, index=False)
def main() -> None:
parser = argparse.ArgumentParser(description="Scrape Purple Carrot recipes from Wayback")
parser.add_argument("--slugs", type=Path, default=DEFAULT_SLUGS)
parser.add_argument("--out", type=Path, default=DEFAULT_OUT)
parser.add_argument(
"--no-resume", dest="resume", action="store_false",
help="Start fresh (ignore existing parquet)",
)
parser.add_argument("--debug", action="store_true")
args = parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if args.debug else logging.INFO,
format="%(asctime)s %(levelname)s %(name)s: %(message)s",
)
from scripts.pipeline.log_utils import attach_pipeline_log
attach_pipeline_log("scrape_recipes")
scrape(args.slugs, args.out, resume=args.resume)
if __name__ == "__main__":
main()

View file

@ -1,41 +0,0 @@
#!/usr/bin/env bash
# Weekly Purple Carrot recipe harvest
# Runs every Sunday night via cron.
# Discovers this week's menu and scrapes full recipe data.
# Logs to /Library/Assets/kiwi/pipeline/logs/purple_carrot_harvest.log
set -euo pipefail
REPO="/Library/Development/CircuitForge/kiwi"
MENU_OUT="/Library/Assets/kiwi/pipeline/recipes_purplecarrot_menu.parquet"
LIVE_OUT="/Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet"
LOG_DIR="/Library/Assets/kiwi/pipeline/logs"
LOG="$LOG_DIR/purple_carrot_harvest.log"
mkdir -p "$LOG_DIR"
echo "=== Purple Carrot harvest $(date -u '+%Y-%m-%d %H:%M UTC') ===" >> "$LOG"
cd "$REPO"
# Step 1: discover this week's menu slugs
echo "[1/2] Discovering current menu slugs..." | tee -a "$LOG"
conda run -n cf python3 scripts/pipeline/purple_carrot/discover_current_menu.py \
--out "$MENU_OUT" 2>&1 | tee -a "$LOG"
# Step 2: scrape full recipe data for new slugs only (--resume skips already-scraped)
echo "[2/2] Scraping live recipe pages..." | tee -a "$LOG"
conda run -n cf python3 scripts/pipeline/purple_carrot/scrape_live.py \
--slugs-from "$MENU_OUT" \
--out "$LIVE_OUT" \
--resume \
--delay 3.0 2>&1 | tee -a "$LOG"
# Step 3: ingest new recipes into the shared corpus DB
echo "[3/3] Ingesting into corpus DB..." | tee -a "$LOG"
conda run -n cf python3 scripts/pipeline/ingest_purplecarrot.py \
--parquet "$LIVE_OUT" \
--db /Library/Assets/kiwi/kiwi.db 2>&1 | tee -a "$LOG"
echo "=== Done $(date -u '+%Y-%m-%d %H:%M UTC') ===" >> "$LOG"
echo "" >> "$LOG"

View file

@ -1,127 +0,0 @@
"""Tests for task-based routing added to get_meal_plan_router()."""
from __future__ import annotations
from unittest.mock import MagicMock
import pytest
def _make_task_ctx(url: str = "http://node:8080") -> MagicMock:
"""Mock context manager returned by task_allocate()."""
alloc = MagicMock()
alloc.url = url
alloc.allocation_id = "alloc-task-1"
alloc.service = "cf-text"
ctx = MagicMock()
ctx.__enter__ = MagicMock(return_value=alloc)
ctx.__exit__ = MagicMock(return_value=False)
return ctx
def _make_task_ctx_not_registered() -> MagicMock:
"""Mock context manager that raises TaskNotRegistered on enter."""
from app.services.task_inference import TaskNotRegistered
ctx = MagicMock()
ctx.__enter__ = MagicMock(side_effect=TaskNotRegistered("not registered"))
ctx.__exit__ = MagicMock(return_value=False)
return ctx
def _make_direct_alloc_ctx(url: str = "http://node:8080") -> MagicMock:
"""Mock context manager returned by CFOrchClient.allocate()."""
alloc = MagicMock()
alloc.url = url
ctx = MagicMock()
ctx.__enter__ = MagicMock(return_value=alloc)
ctx.__exit__ = MagicMock(return_value=False)
return ctx
def test_task_path_returns_orch_router_on_success(monkeypatch):
"""get_meal_plan_router() returns _OrchTextRouter when task allocation succeeds."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
import unittest.mock as um
# Patch the name as it exists in llm_router's own namespace (module-level import).
with um.patch("app.services.meal_plan.llm_router.task_allocate",
return_value=_make_task_ctx(url="http://node:9001")):
from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter
router, ctx = get_meal_plan_router()
assert isinstance(router, _OrchTextRouter)
assert router._base_url == "http://node:9001"
def test_task_not_registered_falls_back_to_direct_allocate(monkeypatch):
"""get_meal_plan_router() falls back to direct cf-text allocation on TaskNotRegistered."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
direct_ctx = _make_direct_alloc_ctx(url="http://node:9002")
import unittest.mock as um
# Patch task_allocate in llm_router's namespace so TaskNotRegistered is raised.
with um.patch("app.services.meal_plan.llm_router.task_allocate",
return_value=_make_task_ctx_not_registered()), \
um.patch("app.services.meal_plan.llm_router.CFOrchClient") as MockClient:
MockClient.return_value.allocate.return_value = direct_ctx
from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter
router, ctx = get_meal_plan_router()
assert isinstance(router, _OrchTextRouter)
assert router._base_url == "http://node:9002"
def test_no_cf_orch_url_returns_llm_router(monkeypatch):
"""get_meal_plan_router() returns LLMRouter when CF_ORCH_URL is not set."""
monkeypatch.delenv("CF_ORCH_URL", raising=False)
import unittest.mock as um
mock_lr = MagicMock()
with um.patch("app.services.meal_plan.llm_router.LLMRouter", return_value=mock_lr):
from app.services.meal_plan.llm_router import get_meal_plan_router
router, ctx = get_meal_plan_router()
assert router is mock_lr
def test_tier1_general_exception_falls_back_to_direct_allocate(monkeypatch):
"""get_meal_plan_router() falls back to direct allocation when task_allocate raises RuntimeError."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
direct_ctx = _make_direct_alloc_ctx(url="http://node:9003")
import unittest.mock as um
failing_ctx = MagicMock()
failing_ctx.__enter__ = MagicMock(side_effect=RuntimeError("coordinator down"))
failing_ctx.__exit__ = MagicMock(return_value=False)
with um.patch("app.services.meal_plan.llm_router.task_allocate",
return_value=failing_ctx), \
um.patch("app.services.meal_plan.llm_router.CFOrchClient") as MockClient:
MockClient.return_value.allocate.return_value = direct_ctx
from app.services.meal_plan.llm_router import get_meal_plan_router, _OrchTextRouter
router, ctx = get_meal_plan_router()
assert isinstance(router, _OrchTextRouter)
assert router._base_url == "http://node:9003"
def test_tier2_none_alloc_releases_ctx_and_falls_through(monkeypatch):
"""get_meal_plan_router() releases Tier 2 ctx and falls through when alloc is None."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
import unittest.mock as um
none_alloc_ctx = MagicMock()
none_alloc_ctx.__enter__ = MagicMock(return_value=None)
none_alloc_ctx.__exit__ = MagicMock(return_value=False)
mock_lr = MagicMock()
with um.patch("app.services.meal_plan.llm_router.task_allocate",
return_value=_make_task_ctx_not_registered()), \
um.patch("app.services.meal_plan.llm_router.CFOrchClient") as MockClient, \
um.patch("app.services.meal_plan.llm_router.LLMRouter", return_value=mock_lr):
MockClient.return_value.allocate.return_value = none_alloc_ctx
from app.services.meal_plan.llm_router import get_meal_plan_router
router, ctx = get_meal_plan_router()
assert router is mock_lr
none_alloc_ctx.__exit__.assert_called_once_with(None, None, None)

View file

@ -1,164 +0,0 @@
"""Tests for app/services/task_inference.py"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
def _ok_resp(url: str = "http://node:8080", allocation_id: str = "alloc-123") -> MagicMock:
m = MagicMock()
m.status_code = 200
m.is_success = True
m.json.return_value = {
"url": url,
"allocation_id": allocation_id,
"gpu_id": 0,
"started": True,
"warm": False,
}
return m
def _err_resp(status_code: int, text: str = "error") -> MagicMock:
m = MagicMock()
m.status_code = status_code
m.is_success = False
m.text = text
return m
def test_task_allocate_yields_allocation_on_200(monkeypatch):
"""task_allocate() yields Allocation with url, allocation_id, service on 200."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
with patch("app.services.task_inference.httpx.post", return_value=_ok_resp()) as mock_post, \
patch("app.services.task_inference.httpx.delete") as mock_del:
from app.services.task_inference import task_allocate
with task_allocate("kiwi", "meal_plan", service_hint="cf-text") as alloc:
assert alloc.url == "http://node:8080"
assert alloc.allocation_id == "alloc-123"
assert alloc.service == "cf-text"
called_url = mock_post.call_args[0][0]
assert called_url == "http://coord:7700/api/inference/task"
mock_del.assert_called_once()
def test_task_allocate_uses_service_from_response_when_present(monkeypatch):
"""task_allocate() uses service from response dict over service_hint when available."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
resp = _ok_resp()
resp.json.return_value["service"] = "cf-vision"
with patch("app.services.task_inference.httpx.post", return_value=resp), \
patch("app.services.task_inference.httpx.delete"):
from app.services.task_inference import task_allocate
with task_allocate("kiwi", "ocr", service_hint="cf-docuvision") as alloc:
assert alloc.service == "cf-vision"
def test_task_allocate_404_raises_task_not_registered(monkeypatch):
"""task_allocate() raises TaskNotRegistered on coordinator 404."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
with patch("app.services.task_inference.httpx.post", return_value=_err_resp(404)):
from app.services.task_inference import task_allocate, TaskNotRegistered
with pytest.raises(TaskNotRegistered):
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
pass
def test_task_allocate_503_raises_runtime_error(monkeypatch):
"""task_allocate() raises RuntimeError on non-404 coordinator errors."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
with patch("app.services.task_inference.httpx.post", return_value=_err_resp(503, "no GPU")):
from app.services.task_inference import task_allocate
with pytest.raises(RuntimeError, match="HTTP 503"):
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
pass
def test_task_allocate_release_called_on_clean_exit(monkeypatch):
"""task_allocate() DELETEs the allocation on clean context exit."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
with patch("app.services.task_inference.httpx.post", return_value=_ok_resp(allocation_id="xyz")), \
patch("app.services.task_inference.httpx.delete") as mock_del:
from app.services.task_inference import task_allocate
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
pass
release_url = mock_del.call_args[0][0]
assert "cf-text" in release_url
assert "xyz" in release_url
def test_task_allocate_release_called_when_inner_block_raises(monkeypatch):
"""task_allocate() DELETEs the allocation even when the inner block raises."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
with patch("app.services.task_inference.httpx.post", return_value=_ok_resp(allocation_id="abc")), \
patch("app.services.task_inference.httpx.delete") as mock_del:
from app.services.task_inference import task_allocate
with pytest.raises(ValueError):
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
raise ValueError("inner error")
mock_del.assert_called_once()
def test_task_allocate_release_failure_is_swallowed(monkeypatch):
"""task_allocate() does not propagate DELETE failures."""
import httpx as _httpx
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
with patch("app.services.task_inference.httpx.post", return_value=_ok_resp()), \
patch("app.services.task_inference.httpx.delete",
side_effect=_httpx.RequestError("gone", request=MagicMock())):
from app.services.task_inference import task_allocate
with task_allocate("kiwi", "meal_plan", service_hint="cf-text") as alloc:
assert alloc.url == "http://node:8080"
# no exception raised
def test_task_allocate_no_orch_url_raises_runtime_error(monkeypatch):
"""task_allocate() raises RuntimeError when CF_ORCH_URL is not set."""
monkeypatch.delenv("CF_ORCH_URL", raising=False)
from app.services.task_inference import task_allocate
with pytest.raises(RuntimeError, match="CF_ORCH_URL"):
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
pass
def test_task_allocate_network_error_raises_runtime_error(monkeypatch):
"""task_allocate() wraps httpx.RequestError in RuntimeError."""
import httpx as _httpx
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
with patch("app.services.task_inference.httpx.post",
side_effect=_httpx.RequestError("timeout", request=MagicMock())):
from app.services.task_inference import task_allocate
with pytest.raises(RuntimeError, match="unreachable"):
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
pass
def test_task_allocate_malformed_json_raises_runtime_error(monkeypatch):
"""task_allocate() raises RuntimeError when coordinator returns non-JSON on 200."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
bad_resp = MagicMock()
bad_resp.status_code = 200
bad_resp.is_success = True
bad_resp.text = "<html>proxy error</html>"
bad_resp.json.side_effect = ValueError("not json")
with patch("app.services.task_inference.httpx.post", return_value=bad_resp):
from app.services.task_inference import task_allocate
with pytest.raises(RuntimeError, match="malformed"):
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
pass
def test_task_allocate_missing_url_field_raises_runtime_error(monkeypatch):
"""task_allocate() raises RuntimeError when coordinator response is missing url field."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
bad_resp = MagicMock()
bad_resp.status_code = 200
bad_resp.is_success = True
bad_resp.text = '{"allocation_id": "x"}'
bad_resp.json.return_value = {"allocation_id": "x"} # missing "url"
with patch("app.services.task_inference.httpx.post", return_value=bad_resp):
from app.services.task_inference import task_allocate
with pytest.raises(RuntimeError, match="malformed"):
with task_allocate("kiwi", "meal_plan", service_hint="cf-text"):
pass

View file

@ -1,88 +0,0 @@
"""Tests for task-based routing added to _try_docuvision()."""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
def _mock_doc_result(text: str = "RECEIPT TEXT") -> MagicMock:
r = MagicMock()
r.text = text
return r
def _make_task_ctx(url: str = "http://node:9010") -> MagicMock:
alloc = MagicMock()
alloc.url = url
alloc.allocation_id = "alloc-vis-1"
alloc.service = "cf-docuvision"
ctx = MagicMock()
ctx.__enter__ = MagicMock(return_value=alloc)
ctx.__exit__ = MagicMock(return_value=False)
return ctx
def _make_task_not_registered() -> MagicMock:
from app.services.task_inference import TaskNotRegistered
ctx = MagicMock()
ctx.__enter__ = MagicMock(side_effect=TaskNotRegistered("not registered"))
ctx.__exit__ = MagicMock(return_value=False)
return ctx
def _make_direct_alloc(url: str = "http://node:9011") -> MagicMock:
alloc = MagicMock()
alloc.url = url
ctx = MagicMock()
ctx.__enter__ = MagicMock(return_value=alloc)
ctx.__exit__ = MagicMock(return_value=False)
return ctx
def test_try_docuvision_task_path_returns_text(monkeypatch, tmp_path):
"""_try_docuvision() uses task allocation and returns extracted text on success."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
fake_image = tmp_path / "receipt.jpg"
fake_image.write_bytes(b"fake")
with patch("app.services.task_inference.task_allocate",
return_value=_make_task_ctx(url="http://node:9010")), \
patch("app.services.ocr.docuvision_client.DocuvisionClient") as MockDoc:
MockDoc.return_value.extract_text.return_value = _mock_doc_result("STORE $12.34")
from app.services.ocr.vl_model import _try_docuvision
result = _try_docuvision(str(fake_image))
assert result == "STORE $12.34"
MockDoc.assert_called_once_with("http://node:9010")
def test_try_docuvision_falls_back_to_direct_on_task_not_registered(monkeypatch, tmp_path):
"""_try_docuvision() falls back to direct cf-docuvision allocation on TaskNotRegistered."""
monkeypatch.setenv("CF_ORCH_URL", "http://coord:7700")
fake_image = tmp_path / "receipt.jpg"
fake_image.write_bytes(b"fake")
with patch("app.services.task_inference.task_allocate",
return_value=_make_task_not_registered()), \
patch("circuitforge_orch.client.CFOrchClient") as MockClient, \
patch("app.services.ocr.docuvision_client.DocuvisionClient") as MockDoc:
MockClient.return_value.allocate.return_value = _make_direct_alloc("http://node:9011")
MockDoc.return_value.extract_text.return_value = _mock_doc_result("FALLBACK TEXT")
from app.services.ocr.vl_model import _try_docuvision
result = _try_docuvision(str(fake_image))
assert result == "FALLBACK TEXT"
MockDoc.assert_called_once_with("http://node:9011")
def test_try_docuvision_returns_none_without_cf_orch_url(monkeypatch, tmp_path):
"""_try_docuvision() returns None immediately when CF_ORCH_URL is not set."""
monkeypatch.delenv("CF_ORCH_URL", raising=False)
fake_image = tmp_path / "receipt.jpg"
fake_image.write_bytes(b"fake")
from app.services.ocr.vl_model import _try_docuvision
result = _try_docuvision(str(fake_image))
assert result is None

View file

@ -17,17 +17,12 @@ from app.services.ocr.docuvision_client import DocuvisionClient, DocuvisionResul
def test_extract_text_sends_base64_image(tmp_path: Path) -> None: def test_extract_text_sends_base64_image(tmp_path: Path) -> None:
"""extract_text() POSTs image_b64 and returns parsed raw_text.""" """extract_text() POSTs a base64-encoded image and returns parsed text."""
image_file = tmp_path / "test.jpg" image_file = tmp_path / "test.jpg"
image_file.write_bytes(b"fake-image-bytes") image_file.write_bytes(b"fake-image-bytes")
mock_response = MagicMock() mock_response = MagicMock()
mock_response.json.return_value = { mock_response.json.return_value = {"text": "Cheerios", "confidence": 0.95}
"raw_text": "Cheerios",
"elements": [],
"tables": [],
"metadata": {"hint": "text", "confidence": 0.95},
}
mock_response.raise_for_status.return_value = None mock_response.raise_for_status.return_value = None
with patch("httpx.Client") as mock_client_cls: with patch("httpx.Client") as mock_client_cls:
@ -46,8 +41,7 @@ def test_extract_text_sends_base64_image(tmp_path: Path) -> None:
assert call_kwargs[0][0] == "http://docuvision:8080/extract" assert call_kwargs[0][0] == "http://docuvision:8080/extract"
posted_json = call_kwargs[1]["json"] posted_json = call_kwargs[1]["json"]
expected_b64 = base64.b64encode(b"fake-image-bytes").decode() expected_b64 = base64.b64encode(b"fake-image-bytes").decode()
assert posted_json["image_b64"] == expected_b64 assert posted_json["image"] == expected_b64
assert posted_json["hint"] == "text"
def test_extract_text_raises_on_http_error(tmp_path: Path) -> None: def test_extract_text_raises_on_http_error(tmp_path: Path) -> None: