feat(pipeline): weekly Purple Carrot harvest script + cron
Add weekly_harvest.sh wrapper that: - Runs discover_current_menu.py to fetch this week's 23 active menu slugs - Runs scrape_live.py with --resume to scrape only new slugs - Appends timestamped output to /Library/Assets/kiwi/pipeline/logs/ Cron entry added to system crontab: 0 23 * * 0 (every Sunday 23:00) Logs: /Library/Assets/kiwi/pipeline/logs/purple_carrot_harvest.log
This commit is contained in:
parent
a9ab996bcc
commit
21a0664961
1 changed files with 35 additions and 0 deletions
35
scripts/pipeline/purple_carrot/weekly_harvest.sh
Executable file
35
scripts/pipeline/purple_carrot/weekly_harvest.sh
Executable file
|
|
@ -0,0 +1,35 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# Weekly Purple Carrot recipe harvest
|
||||||
|
# Runs every Sunday night via cron.
|
||||||
|
# Discovers this week's menu and scrapes full recipe data.
|
||||||
|
# Logs to /Library/Assets/kiwi/pipeline/logs/purple_carrot_harvest.log
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
REPO="/Library/Development/CircuitForge/kiwi"
|
||||||
|
MENU_OUT="/Library/Assets/kiwi/pipeline/recipes_purplecarrot_menu.parquet"
|
||||||
|
LIVE_OUT="/Library/Assets/kiwi/pipeline/recipes_purplecarrot_live.parquet"
|
||||||
|
LOG_DIR="/Library/Assets/kiwi/pipeline/logs"
|
||||||
|
LOG="$LOG_DIR/purple_carrot_harvest.log"
|
||||||
|
|
||||||
|
mkdir -p "$LOG_DIR"
|
||||||
|
|
||||||
|
echo "=== Purple Carrot harvest $(date -u '+%Y-%m-%d %H:%M UTC') ===" >> "$LOG"
|
||||||
|
|
||||||
|
cd "$REPO"
|
||||||
|
|
||||||
|
# Step 1: discover this week's menu slugs
|
||||||
|
echo "[1/2] Discovering current menu slugs..." | tee -a "$LOG"
|
||||||
|
conda run -n cf python3 scripts/pipeline/purple_carrot/discover_current_menu.py \
|
||||||
|
--out "$MENU_OUT" 2>&1 | tee -a "$LOG"
|
||||||
|
|
||||||
|
# Step 2: scrape full recipe data for new slugs only (--resume skips already-scraped)
|
||||||
|
echo "[2/2] Scraping live recipe pages..." | tee -a "$LOG"
|
||||||
|
conda run -n cf python3 scripts/pipeline/purple_carrot/scrape_live.py \
|
||||||
|
--slugs-from "$MENU_OUT" \
|
||||||
|
--out "$LIVE_OUT" \
|
||||||
|
--resume \
|
||||||
|
--delay 3.0 2>&1 | tee -a "$LOG"
|
||||||
|
|
||||||
|
echo "=== Done $(date -u '+%Y-%m-%d %H:%M UTC') ===" >> "$LOG"
|
||||||
|
echo "" >> "$LOG"
|
||||||
Loading…
Reference in a new issue