chore(corpus): preserve watermark files across updates; document corpus env vars
update.sh now backs up data/corpus_watermark.txt and data/incident_watermark.txt before git pull and restores them after, mirroring the existing watch.yaml pattern. Without this, an update would reset watermarks to zero and re-push all corpus entries from the beginning on the next export run. .env.example adds a corpus export section documenting the three env vars needed to opt a node into the Avocet training pipeline. Closes: #6
This commit is contained in:
parent
313b25e0d0
commit
5f7296ad6d
2 changed files with 39 additions and 3 deletions
|
|
@ -23,6 +23,15 @@
|
|||
# Remote endpoint to push diagnostic bundles for escalation.
|
||||
# TURNSTONE_BUNDLE_ENDPOINT=https://example.com/api/bundles
|
||||
|
||||
# --- Log corpus export to Avocet (optional) ---
|
||||
# Push ERROR/CRITICAL entries and labeled incidents to the Avocet corpus endpoint
|
||||
# for logreading fine-tune training. Requires a consent token issued by CF.
|
||||
# Contact alan@circuitforge.tech to register your node and receive a token.
|
||||
# Watermarks are stored at data/corpus_watermark.txt and data/incident_watermark.txt.
|
||||
# AVOCET_CORPUS_ENDPOINT=https://avocet.circuitforge.tech/api/corpus/log-batch
|
||||
# AVOCET_CONSENT_TOKEN=your-uuid-token-here
|
||||
# TURNSTONE_SOURCE_HOST=my-server-name # defaults to system hostname if unset
|
||||
|
||||
# --- Periodic batch glean ---
|
||||
# Seconds between automatic glean runs from sources.yaml. Set to 0 to disable.
|
||||
# TURNSTONE_GLEAN_INTERVAL=900
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@
|
|||
#
|
||||
# Local files preserved across updates:
|
||||
# patterns/watch.yaml — site-specific watch source config
|
||||
# data/corpus_watermark.txt — corpus export watermark (last exported rowid)
|
||||
# data/incident_watermark.txt — incident export watermark (last exported timestamp)
|
||||
# data/ — database and live journal files (bind-mounted, untouched)
|
||||
|
||||
set -euo pipefail
|
||||
|
|
@ -21,7 +23,9 @@ echo "==> Turnstone update: branch=$BRANCH"
|
|||
|
||||
# ── Preserve site-local config ────────────────────────────────────────────────
|
||||
# watch.yaml is tracked in git as a template but overridden per-host.
|
||||
# Back it up before the pull and restore it after.
|
||||
# Corpus watermarks track the last exported entry/incident — must survive updates
|
||||
# or the next export run will re-push everything from the beginning.
|
||||
# Back them up before the pull and restore after.
|
||||
WATCH_YAML="$REPO_DIR/patterns/watch.yaml"
|
||||
WATCH_BACKUP=""
|
||||
if [ -f "$WATCH_YAML" ]; then
|
||||
|
|
@ -29,6 +33,19 @@ if [ -f "$WATCH_YAML" ]; then
|
|||
cp "$WATCH_YAML" "$WATCH_BACKUP"
|
||||
fi
|
||||
|
||||
CORPUS_WM="$REPO_DIR/data/corpus_watermark.txt"
|
||||
INCIDENT_WM="$REPO_DIR/data/incident_watermark.txt"
|
||||
CORPUS_WM_BACKUP=""
|
||||
INCIDENT_WM_BACKUP=""
|
||||
if [ -f "$CORPUS_WM" ]; then
|
||||
CORPUS_WM_BACKUP=$(mktemp /tmp/corpus-wm.XXXXXX)
|
||||
cp "$CORPUS_WM" "$CORPUS_WM_BACKUP"
|
||||
fi
|
||||
if [ -f "$INCIDENT_WM" ]; then
|
||||
INCIDENT_WM_BACKUP=$(mktemp /tmp/incident-wm.XXXXXX)
|
||||
cp "$INCIDENT_WM" "$INCIDENT_WM_BACKUP"
|
||||
fi
|
||||
|
||||
# ── Pull ──────────────────────────────────────────────────────────────────────
|
||||
git fetch --all --tags --quiet
|
||||
|
||||
|
|
@ -50,6 +67,16 @@ if [ -n "$WATCH_BACKUP" ]; then
|
|||
rm -f "$WATCH_BACKUP"
|
||||
echo "==> Restored patterns/watch.yaml"
|
||||
fi
|
||||
if [ -n "$CORPUS_WM_BACKUP" ]; then
|
||||
cp "$CORPUS_WM_BACKUP" "$CORPUS_WM"
|
||||
rm -f "$CORPUS_WM_BACKUP"
|
||||
echo "==> Restored data/corpus_watermark.txt"
|
||||
fi
|
||||
if [ -n "$INCIDENT_WM_BACKUP" ]; then
|
||||
cp "$INCIDENT_WM_BACKUP" "$INCIDENT_WM"
|
||||
rm -f "$INCIDENT_WM_BACKUP"
|
||||
echo "==> Restored data/incident_watermark.txt"
|
||||
fi
|
||||
|
||||
# ── Build ─────────────────────────────────────────────────────────────────────
|
||||
echo "==> Building $IMAGE ..."
|
||||
|
|
|
|||
Loading…
Reference in a new issue