feat(pipeline): weekly Purple Carrot harvest script + cron
Add weekly_harvest.sh wrapper that: - Runs discover_current_menu.py to fetch this week's 23 active menu slugs - Runs scrape_live.py with --resume to scrape only new slugs - Appends timestamped output to /Library/Assets/kiwi/pipeline/logs/ Cron entry added to system crontab: 0 23 * * 0 (every Sunday 23:00) Logs: /Library/Assets/kiwi/pipeline/logs/purple_carrot_harvest.log
This commit is contained in:
parent
a9ab996bcc
commit
21a0664961
1 changed files with 35 additions and 0 deletions
35
scripts/pipeline/purple_carrot/weekly_harvest.sh
Executable file
35
scripts/pipeline/purple_carrot/weekly_harvest.sh
Executable file
|
|
@ -0,0 +1,35 @@
|
|||
#!/usr/bin/env bash
|
||||
# Weekly Purple Carrot recipe harvest
|
||||
# Runs every Sunday night via cron.
|
||||
# Discovers this week's menu and scrapes full recipe data.
|
||||
# Logs to /Library/Assets/kiwi/pipeline/logs/purple_carrot_harvest.log
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
REPO="/Library/Development/CircuitForge/kiwi"
|
||||
MENU_OUT="/Library/Assets/kiwi/pipeline/recipes_purplecarrot_menu.parquet"
|
||||
LIVE_OUT="/Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet"
|
||||
LOG_DIR="/Library/Assets/kiwi/pipeline/logs"
|
||||
LOG="$LOG_DIR/purple_carrot_harvest.log"
|
||||
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
echo "=== Purple Carrot harvest $(date -u '+%Y-%m-%d %H:%M UTC') ===" >> "$LOG"
|
||||
|
||||
cd "$REPO"
|
||||
|
||||
# Step 1: discover this week's menu slugs
|
||||
echo "[1/2] Discovering current menu slugs..." | tee -a "$LOG"
|
||||
conda run -n cf python3 scripts/pipeline/purple_carrot/discover_current_menu.py \
|
||||
--out "$MENU_OUT" 2>&1 | tee -a "$LOG"
|
||||
|
||||
# Step 2: scrape full recipe data for new slugs only (--resume skips already-scraped)
|
||||
echo "[2/2] Scraping live recipe pages..." | tee -a "$LOG"
|
||||
conda run -n cf python3 scripts/pipeline/purple_carrot/scrape_live.py \
|
||||
--slugs-from "$MENU_OUT" \
|
||||
--out "$LIVE_OUT" \
|
||||
--resume \
|
||||
--delay 3.0 2>&1 | tee -a "$LOG"
|
||||
|
||||
echo "=== Done $(date -u '+%Y-%m-%d %H:%M UTC') ===" >> "$LOG"
|
||||
echo "" >> "$LOG"
|
||||
Loading…
Reference in a new issue