From 21a066496169aa725b20537c2b74d0db67156f57 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Thu, 21 May 2026 16:22:26 -0700 Subject: [PATCH] feat(pipeline): weekly Purple Carrot harvest script + cron Add weekly_harvest.sh wrapper that: - Runs discover_current_menu.py to fetch this week's 23 active menu slugs - Runs scrape_live.py with --resume to scrape only new slugs - Appends timestamped output to /Library/Assets/kiwi/pipeline/logs/ Cron entry added to system crontab: 0 23 * * 0 (every Sunday 23:00) Logs: /Library/Assets/kiwi/pipeline/logs/purple_carrot_harvest.log --- .../pipeline/purple_carrot/weekly_harvest.sh | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100755 scripts/pipeline/purple_carrot/weekly_harvest.sh diff --git a/scripts/pipeline/purple_carrot/weekly_harvest.sh b/scripts/pipeline/purple_carrot/weekly_harvest.sh new file mode 100755 index 0000000..74bf6c8 --- /dev/null +++ b/scripts/pipeline/purple_carrot/weekly_harvest.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# Weekly Purple Carrot recipe harvest +# Runs every Sunday night via cron. +# Discovers this week's menu and scrapes full recipe data. +# Logs to /Library/Assets/kiwi/pipeline/logs/purple_carrot_harvest.log + +set -euo pipefail + +REPO="/Library/Development/CircuitForge/kiwi" +MENU_OUT="/Library/Assets/kiwi/pipeline/recipes_purplecarrot_menu.parquet" +LIVE_OUT="/Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet" +LOG_DIR="/Library/Assets/kiwi/pipeline/logs" +LOG="$LOG_DIR/purple_carrot_harvest.log" + +mkdir -p "$LOG_DIR" + +echo "=== Purple Carrot harvest $(date -u '+%Y-%m-%d %H:%M UTC') ===" >> "$LOG" + +cd "$REPO" + +# Step 1: discover this week's menu slugs +echo "[1/2] Discovering current menu slugs..." | tee -a "$LOG" +conda run -n cf python3 scripts/pipeline/purple_carrot/discover_current_menu.py \ + --out "$MENU_OUT" 2>&1 | tee -a "$LOG" + +# Step 2: scrape full recipe data for new slugs only (--resume skips already-scraped) +echo "[2/2] Scraping live recipe pages..." | tee -a "$LOG" +conda run -n cf python3 scripts/pipeline/purple_carrot/scrape_live.py \ + --slugs-from "$MENU_OUT" \ + --out "$LIVE_OUT" \ + --resume \ + --delay 3.0 2>&1 | tee -a "$LOG" + +echo "=== Done $(date -u '+%Y-%m-%d %H:%M UTC') ===" >> "$LOG" +echo "" >> "$LOG"