chore(corpus): preserve watermark files across updates; document corpus env vars
update.sh now backs up data/corpus_watermark.txt and data/incident_watermark.txt before git pull and restores them after, mirroring the existing watch.yaml pattern. Without this, an update would reset watermarks to zero and re-push all corpus entries from the beginning on the next export run. .env.example adds a corpus export section documenting the three env vars needed to opt a node into the Avocet training pipeline. Closes: #6
This commit is contained in:
parent
e2a78d45ef
commit
674e945004
2 changed files with 39 additions and 3 deletions
|
|
@ -23,6 +23,15 @@
|
||||||
# Remote endpoint to push diagnostic bundles for escalation.
|
# Remote endpoint to push diagnostic bundles for escalation.
|
||||||
# TURNSTONE_BUNDLE_ENDPOINT=https://example.com/api/bundles
|
# TURNSTONE_BUNDLE_ENDPOINT=https://example.com/api/bundles
|
||||||
|
|
||||||
|
# --- Log corpus export to Avocet (optional) ---
|
||||||
|
# Push ERROR/CRITICAL entries and labeled incidents to the Avocet corpus endpoint
|
||||||
|
# for logreading fine-tune training. Requires a consent token issued by CF.
|
||||||
|
# Contact alan@circuitforge.tech to register your node and receive a token.
|
||||||
|
# Watermarks are stored at data/corpus_watermark.txt and data/incident_watermark.txt.
|
||||||
|
# AVOCET_CORPUS_ENDPOINT=https://avocet.circuitforge.tech/api/corpus/log-batch
|
||||||
|
# AVOCET_CONSENT_TOKEN=your-uuid-token-here
|
||||||
|
# TURNSTONE_SOURCE_HOST=my-server-name # defaults to system hostname if unset
|
||||||
|
|
||||||
# --- Periodic batch glean ---
|
# --- Periodic batch glean ---
|
||||||
# Seconds between automatic glean runs from sources.yaml. Set to 0 to disable.
|
# Seconds between automatic glean runs from sources.yaml. Set to 0 to disable.
|
||||||
# TURNSTONE_GLEAN_INTERVAL=900
|
# TURNSTONE_GLEAN_INTERVAL=900
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,10 @@
|
||||||
# sudo bash /opt/turnstone/scripts/update.sh feat/live-watch # test a branch
|
# sudo bash /opt/turnstone/scripts/update.sh feat/live-watch # test a branch
|
||||||
#
|
#
|
||||||
# Local files preserved across updates:
|
# Local files preserved across updates:
|
||||||
# patterns/watch.yaml — site-specific watch source config
|
# patterns/watch.yaml — site-specific watch source config
|
||||||
# data/ — database and live journal files (bind-mounted, untouched)
|
# data/corpus_watermark.txt — corpus export watermark (last exported rowid)
|
||||||
|
# data/incident_watermark.txt — incident export watermark (last exported timestamp)
|
||||||
|
# data/ — database and live journal files (bind-mounted, untouched)
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
|
|
@ -21,7 +23,9 @@ echo "==> Turnstone update: branch=$BRANCH"
|
||||||
|
|
||||||
# ── Preserve site-local config ────────────────────────────────────────────────
|
# ── Preserve site-local config ────────────────────────────────────────────────
|
||||||
# watch.yaml is tracked in git as a template but overridden per-host.
|
# watch.yaml is tracked in git as a template but overridden per-host.
|
||||||
# Back it up before the pull and restore it after.
|
# Corpus watermarks track the last exported entry/incident — must survive updates
|
||||||
|
# or the next export run will re-push everything from the beginning.
|
||||||
|
# Back them up before the pull and restore after.
|
||||||
WATCH_YAML="$REPO_DIR/patterns/watch.yaml"
|
WATCH_YAML="$REPO_DIR/patterns/watch.yaml"
|
||||||
WATCH_BACKUP=""
|
WATCH_BACKUP=""
|
||||||
if [ -f "$WATCH_YAML" ]; then
|
if [ -f "$WATCH_YAML" ]; then
|
||||||
|
|
@ -29,6 +33,19 @@ if [ -f "$WATCH_YAML" ]; then
|
||||||
cp "$WATCH_YAML" "$WATCH_BACKUP"
|
cp "$WATCH_YAML" "$WATCH_BACKUP"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
CORPUS_WM="$REPO_DIR/data/corpus_watermark.txt"
|
||||||
|
INCIDENT_WM="$REPO_DIR/data/incident_watermark.txt"
|
||||||
|
CORPUS_WM_BACKUP=""
|
||||||
|
INCIDENT_WM_BACKUP=""
|
||||||
|
if [ -f "$CORPUS_WM" ]; then
|
||||||
|
CORPUS_WM_BACKUP=$(mktemp /tmp/corpus-wm.XXXXXX)
|
||||||
|
cp "$CORPUS_WM" "$CORPUS_WM_BACKUP"
|
||||||
|
fi
|
||||||
|
if [ -f "$INCIDENT_WM" ]; then
|
||||||
|
INCIDENT_WM_BACKUP=$(mktemp /tmp/incident-wm.XXXXXX)
|
||||||
|
cp "$INCIDENT_WM" "$INCIDENT_WM_BACKUP"
|
||||||
|
fi
|
||||||
|
|
||||||
# ── Pull ──────────────────────────────────────────────────────────────────────
|
# ── Pull ──────────────────────────────────────────────────────────────────────
|
||||||
git fetch --all --tags --quiet
|
git fetch --all --tags --quiet
|
||||||
|
|
||||||
|
|
@ -50,6 +67,16 @@ if [ -n "$WATCH_BACKUP" ]; then
|
||||||
rm -f "$WATCH_BACKUP"
|
rm -f "$WATCH_BACKUP"
|
||||||
echo "==> Restored patterns/watch.yaml"
|
echo "==> Restored patterns/watch.yaml"
|
||||||
fi
|
fi
|
||||||
|
if [ -n "$CORPUS_WM_BACKUP" ]; then
|
||||||
|
cp "$CORPUS_WM_BACKUP" "$CORPUS_WM"
|
||||||
|
rm -f "$CORPUS_WM_BACKUP"
|
||||||
|
echo "==> Restored data/corpus_watermark.txt"
|
||||||
|
fi
|
||||||
|
if [ -n "$INCIDENT_WM_BACKUP" ]; then
|
||||||
|
cp "$INCIDENT_WM_BACKUP" "$INCIDENT_WM"
|
||||||
|
rm -f "$INCIDENT_WM_BACKUP"
|
||||||
|
echo "==> Restored data/incident_watermark.txt"
|
||||||
|
fi
|
||||||
|
|
||||||
# ── Build ─────────────────────────────────────────────────────────────────────
|
# ── Build ─────────────────────────────────────────────────────────────────────
|
||||||
echo "==> Building $IMAGE ..."
|
echo "==> Building $IMAGE ..."
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue