Add weekly_harvest.sh wrapper that: - Runs discover_current_menu.py to fetch this week's 23 active menu slugs - Runs scrape_live.py with --resume to scrape only new slugs - Appends timestamped output to /Library/Assets/kiwi/pipeline/logs/ Cron entry added to system crontab: 0 23 * * 0 (every Sunday 23:00) Logs: /Library/Assets/kiwi/pipeline/logs/purple_carrot_harvest.log
35 lines
1.2 KiB
Bash
Executable file
35 lines
1.2 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# Weekly Purple Carrot recipe harvest
|
|
# Runs every Sunday night via cron.
|
|
# Discovers this week's menu and scrapes full recipe data.
|
|
# Logs to /Library/Assets/kiwi/pipeline/logs/purple_carrot_harvest.log
|
|
|
|
set -euo pipefail
|
|
|
|
REPO="/Library/Development/CircuitForge/kiwi"
|
|
MENU_OUT="/Library/Assets/kiwi/pipeline/recipes_purplecarrot_menu.parquet"
|
|
LIVE_OUT="/Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet"
|
|
LOG_DIR="/Library/Assets/kiwi/pipeline/logs"
|
|
LOG="$LOG_DIR/purple_carrot_harvest.log"
|
|
|
|
mkdir -p "$LOG_DIR"
|
|
|
|
echo "=== Purple Carrot harvest $(date -u '+%Y-%m-%d %H:%M UTC') ===" >> "$LOG"
|
|
|
|
cd "$REPO"
|
|
|
|
# Step 1: discover this week's menu slugs
|
|
echo "[1/2] Discovering current menu slugs..." | tee -a "$LOG"
|
|
conda run -n cf python3 scripts/pipeline/purple_carrot/discover_current_menu.py \
|
|
--out "$MENU_OUT" 2>&1 | tee -a "$LOG"
|
|
|
|
# Step 2: scrape full recipe data for new slugs only (--resume skips already-scraped)
|
|
echo "[2/2] Scraping live recipe pages..." | tee -a "$LOG"
|
|
conda run -n cf python3 scripts/pipeline/purple_carrot/scrape_live.py \
|
|
--slugs-from "$MENU_OUT" \
|
|
--out "$LIVE_OUT" \
|
|
--resume \
|
|
--delay 3.0 2>&1 | tee -a "$LOG"
|
|
|
|
echo "=== Done $(date -u '+%Y-%m-%d %H:%M UTC') ===" >> "$LOG"
|
|
echo "" >> "$LOG"
|