From 07e151b01fbc941ccd02c167969190c3786b6e10 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Wed, 13 May 2026 04:55:25 -0700 Subject: [PATCH] refactor: use live watcher + systemd timer instead of cron for cluster ingest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Local Heimdall sources (journal, Docker containers, network syslog) are now tailed continuously by the built-in watcher via watch.yaml — no periodic collection needed for those. SSH collection of remote node journals is now handled by a systemd timer (turnstone-cluster-collect.service/.timer) instead of cron. collect_cluster_logs.sh simplified to only SSH-collect remote nodes and trigger ingest directly. docker-cluster.sh updated to mount: - /var/run/docker.sock (so watcher can run docker logs -f) - /run/systemd/journal (so watcher can run journalctl -f) - /devl/turnstone-cluster/patterns/ (cluster-specific watch.yaml) --- scripts/collect_cluster_logs.sh | 81 +++----------------- scripts/docker-cluster.sh | 91 ++++++++--------------- scripts/turnstone-cluster-collect.service | 10 +++ scripts/turnstone-cluster-collect.timer | 12 +++ 4 files changed, 63 insertions(+), 131 deletions(-) create mode 100644 scripts/turnstone-cluster-collect.service create mode 100644 scripts/turnstone-cluster-collect.timer diff --git a/scripts/collect_cluster_logs.sh b/scripts/collect_cluster_logs.sh index 0e505d4..d3f3edd 100644 --- a/scripts/collect_cluster_logs.sh +++ b/scripts/collect_cluster_logs.sh @@ -1,19 +1,13 @@ #!/usr/bin/env bash -# Collect recent journal logs from all CircuitForge cluster nodes and network -# devices into /opt/turnstone/data/ for Turnstone to ingest. +# Collect recent journal logs from remote CircuitForge cluster nodes +# into /devl/turnstone-cluster/data/ for Turnstone to ingest. # -# Run this before each ingest cycle (see cron below). -# Each remote node is collected via SSH; network devices via syslog-receiver. +# Local Heimdall sources (journal, Docker containers, network syslog) are +# handled by the Turnstone live watcher (watch.yaml) — no collection needed. # -# Prerequisites: -# - SSH key auth to each node (test: ssh hostname) -# - syslog-receiver.sh running separately (or rsyslog) for network devices -# -# Cron (combined with ingest, every 15 min): -# */15 * * * * bash /opt/turnstone/scripts/collect_cluster_logs.sh && \ -# docker exec turnstone-cluster python scripts/ingest_corpus.py \ -# --sources /patterns/sources-cluster.yaml --db /data/turnstone.db \ -# >> /var/log/turnstone-cluster-ingest.log 2>&1 +# Triggered by systemd timer: turnstone-cluster-collect.timer (every 15 min). +# Install: sudo cp /turnstone-cluster-collect.* /etc/systemd/system/ +# sudo systemctl daemon-reload && sudo systemctl enable --now turnstone-cluster-collect.timer # # Manual run: # bash /Library/Development/CircuitForge/turnstone/scripts/collect_cluster_logs.sh @@ -26,25 +20,7 @@ SSH_OPTS="-o ConnectTimeout=5 -o BatchMode=yes -o StrictHostKeyChecking=no" mkdir -p "${DATA_DIR}" -# ── Local Heimdall journal ──────────────────────────────────────────────────── -echo "heimdall: collecting local journal..." -journalctl \ - --output=json \ - --priority=0..5 \ - --since "${WINDOW}" \ - --no-pager \ - > "${DATA_DIR}/heimdall-journal.jsonl" -echo "heimdall: $(wc -l < "${DATA_DIR}/heimdall-journal.jsonl") entries" - -# Local kernel ring buffer -if dmesg -T &>/dev/null; then - dmesg -T > "${DATA_DIR}/heimdall-dmesg.txt" -else - dmesg > "${DATA_DIR}/heimdall-dmesg.txt" -fi - # ── Remote cluster nodes ────────────────────────────────────────────────────── -# Each entry: " " declare -A NODES=( [navi]="${DATA_DIR}/navi-journal.jsonl" [sif]="${DATA_DIR}/sif-journal.jsonl" @@ -66,44 +42,9 @@ for node in "${!NODES[@]}"; do fi done -# ── Docker container logs from Heimdall ────────────────────────────────────── -# Collect logs from key Docker services running on Heimdall. -# Add or remove container names as needed. -DOCKER_CONTAINERS=( - cf-orch-coordinator - cf-web - cf-directus - caddy-proxy -) - -for cname in "${DOCKER_CONTAINERS[@]}"; do - outfile="${DATA_DIR}/docker-${cname}.jsonl" - if docker inspect "${cname}" &>/dev/null 2>&1; then - # Docker log output: raw lines with timestamps; use json-file driver format - docker logs --since 20m "${cname}" 2>&1 | \ - python3 -c " -import sys, json, time -src = '${cname}' -for line in sys.stdin: - line = line.rstrip() - if not line: continue - print(json.dumps({'MESSAGE': line, 'SYSLOG_IDENTIFIER': src, '_TRANSPORT': 'docker', 'PRIORITY': '6'})) -" > "${outfile}" 2>/dev/null || : > "${outfile}" - echo "docker/${cname}: $(wc -l < "${outfile}") entries" - else - : > "${outfile}" - fi -done - -# ── Network syslog (written by syslog-receiver.service) ────────────────────── -# If the syslog receiver is running, it appends to this file. -# We don't rotate it here — ingest deduplicates by entry hash. -SYSLOG_FILE="${DATA_DIR}/network-syslog.txt" -if [ ! -f "${SYSLOG_FILE}" ]; then - : > "${SYSLOG_FILE}" - echo "network-syslog: created (empty — configure devices to send to port 5140)" -else - echo "network-syslog: $(wc -l < "${SYSLOG_FILE}") lines" -fi +# Trigger ingest of remote node journals into the running container. +docker exec turnstone-cluster python scripts/ingest_corpus.py \ + --sources /patterns/sources-cluster.yaml --db /data/turnstone.db \ + >> /var/log/turnstone-cluster-ingest.log 2>&1 echo "collect_cluster_logs: done" diff --git a/scripts/docker-cluster.sh b/scripts/docker-cluster.sh index 0459cb6..993f572 100644 --- a/scripts/docker-cluster.sh +++ b/scripts/docker-cluster.sh @@ -1,60 +1,49 @@ #!/usr/bin/env bash # docker-cluster.sh — Turnstone cluster monitoring instance on Heimdall. # -# Ingests logs from the full CircuitForge cluster: -# - Heimdall (local journal + dmesg) -# - Navi, Sif, Cass, Strahl (SSH-collected journals) -# - Docker services: cf-orch-coordinator, cf-web, cf-directus, caddy-proxy -# - Network syslog (router, switches, UniFi APs — UDP 5140) +# Local sources (Heimdall journal, Docker containers, network syslog) are +# tailed live by the built-in watcher (watch.yaml) — no periodic collection needed. # -# Logs are pre-collected to /devl/turnstone-cluster/data/ by collect_cluster_logs.sh -# before each ingest run. This script only manages the container lifecycle. +# Remote node journals (navi, sif, cass, strahl) are collected by a +# systemd timer every 15 minutes and ingested via ingest_corpus.py. +# Install the timer: +# sudo cp scripts/turnstone-cluster-collect.{service,timer} /etc/systemd/system/ +# sudo systemctl daemon-reload && sudo systemctl enable --now turnstone-cluster-collect.timer # # ── Prerequisites ──────────────────────────────────────────────────────────── -# 1. SSH key access to navi, sif, cass, strahl (test: ssh hostname) -# 2. Build the image first: -# cd /Library/Development/CircuitForge/turnstone -# docker build -t circuitforge/turnstone:latest . +# SSH key access to navi, sif, cass, strahl (test: ssh hostname) # -# 3. Run this script: -# bash /Library/Development/CircuitForge/turnstone/scripts/docker-cluster.sh -# -# ── Ingest cron (every 15 min — add to root's crontab: sudo crontab -e) ───── -# */15 * * * * bash /Library/Development/CircuitForge/turnstone/scripts/collect_cluster_logs.sh \ -# && docker exec turnstone-cluster python scripts/ingest_corpus.py \ -# --sources /patterns/sources-cluster.yaml --db /data/turnstone.db \ -# >> /var/log/turnstone-cluster-ingest.log 2>&1 +# ── Run ─────────────────────────────────────────────────────────────────────── +# bash /Library/Development/CircuitForge/turnstone/scripts/docker-cluster.sh # # ── Caddy reverse proxy (add to /devl/caddy-proxy/Caddyfile) ───────────────── # turnstone.heimdall.lan { -# reverse_proxy 127.0.0.1:8535 +# reverse_proxy 127.0.0.1:8534 # } # Then: docker restart caddy-proxy # # ── Ports ──────────────────────────────────────────────────────────────────── -# Turnstone UI → http://heimdall:8535/turnstone/ +# Turnstone UI → http://heimdall:8534/turnstone/ # set -euo pipefail REPO_DIR=/Library/Development/CircuitForge/turnstone DATA_DIR=/devl/turnstone-cluster/data -PATTERNS_DIR="${REPO_DIR}/patterns" +PATTERNS_DIR=/devl/turnstone-cluster/patterns PORT=8534 TZ=America/Los_Angeles # LLM: route to local cf-orch coordinator (same host, host network). -# Coordinator runs at 127.0.0.1 inside --net=host, so localhost works directly. -# Override LLM_URL to point at a different backend if needed. LLM_URL="${TURNSTONE_LLM_URL:-http://127.0.0.1:7701}" LLM_MODEL="${TURNSTONE_LLM_MODEL:-llama3.1:8b}" LLM_API_KEY="${TURNSTONE_LLM_API_KEY:-}" -mkdir -p "${DATA_DIR}" +mkdir -p "${DATA_DIR}" "${PATTERNS_DIR}" + +# Keep default.yaml in cluster patterns dir up to date with the repo copy. +cp "${REPO_DIR}/patterns/default.yaml" "${PATTERNS_DIR}/default.yaml" # ── Seed LLM preferences (only if not already configured) ──────────────────── -# preferences.json lives in the data dir and persists across container restarts. -# If it doesn't exist yet, write defaults pointing at the local cf-orch coordinator -# so the first ingest gets real summarization without manual UI config. PREFS_FILE="${DATA_DIR}/preferences.json" if [ ! -f "${PREFS_FILE}" ]; then python3 -c " @@ -71,6 +60,10 @@ else echo "Preferences already exist at ${PREFS_FILE} — skipping seed" fi +# Touch network-syslog.txt so the file watcher has something to tail +# before the syslog receiver writes to it. +touch "${DATA_DIR}/network-syslog.txt" + # ── Build image ─────────────────────────────────────────────────────────────── echo "Building Turnstone image..." docker build -t circuitforge/turnstone:latest "${REPO_DIR}" @@ -84,6 +77,8 @@ docker run -d \ --net=host \ -v "${DATA_DIR}:/data" \ -v "${PATTERNS_DIR}:/patterns:ro" \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /run/systemd/journal:/run/systemd/journal:ro \ -e TURNSTONE_DB=/data/turnstone.db \ -e TURNSTONE_SOURCE_HOST="heimdall-cluster" \ -e TURNSTONE_BUNDLE_ENDPOINT="${TURNSTONE_BUNDLE_ENDPOINT:-}" \ @@ -99,39 +94,13 @@ docker run -d \ echo "" echo "Turnstone cluster is starting up." echo " UI: http://heimdall:${PORT}/turnstone/" +echo " Live watching: Heimdall journal + Docker containers + network syslog" +echo " Remote nodes: install the systemd timer for periodic SSH collection" echo "" - -# ── systemd service (optional) ──────────────────────────────────────────────── -# To create a systemd unit that auto-starts on boot: -# sudo tee /etc/systemd/system/turnstone-cluster.service > /dev/null << 'EOF' -# [Unit] -# Description=Turnstone cluster log monitor -# After=docker.service -# Requires=docker.service -# -# [Service] -# Type=oneshot -# RemainAfterExit=yes -# ExecStart=/usr/bin/docker start turnstone-cluster -# ExecStop=/usr/bin/docker stop turnstone-cluster -# -# [Install] -# WantedBy=multi-user.target -# EOF -# sudo systemctl daemon-reload -# sudo systemctl enable --now turnstone-cluster - -echo "Check container health with:" +echo " sudo cp ${REPO_DIR}/scripts/turnstone-cluster-collect.{service,timer} /etc/systemd/system/" +echo " sudo systemctl daemon-reload && sudo systemctl enable --now turnstone-cluster-collect.timer" +echo "" +echo "Check container:" echo " docker ps --filter name=turnstone-cluster" echo " docker logs turnstone-cluster" -echo "" -echo "Ingest now:" -echo " bash ${REPO_DIR}/scripts/collect_cluster_logs.sh && \\" -echo " docker exec turnstone-cluster python scripts/ingest_corpus.py \\" -echo " --sources /patterns/sources-cluster.yaml --db /data/turnstone.db" -echo "" -echo "To set up the 15-minute cron, add to root's crontab (sudo crontab -e):" -echo " */15 * * * * bash ${REPO_DIR}/scripts/collect_cluster_logs.sh && \\" -echo " docker exec turnstone-cluster python scripts/ingest_corpus.py \\" -echo " --sources /patterns/sources-cluster.yaml --db /data/turnstone.db \\" -echo " >> /var/log/turnstone-cluster-ingest.log 2>&1" +echo " curl http://localhost:${PORT}/turnstone/api/watch/status" diff --git a/scripts/turnstone-cluster-collect.service b/scripts/turnstone-cluster-collect.service new file mode 100644 index 0000000..74ac355 --- /dev/null +++ b/scripts/turnstone-cluster-collect.service @@ -0,0 +1,10 @@ +[Unit] +Description=Turnstone — collect remote cluster node journals +After=network.target + +[Service] +Type=oneshot +ExecStart=/bin/bash /Library/Development/CircuitForge/turnstone/scripts/collect_cluster_logs.sh +StandardOutput=journal +StandardError=journal +SyslogIdentifier=turnstone-collect diff --git a/scripts/turnstone-cluster-collect.timer b/scripts/turnstone-cluster-collect.timer new file mode 100644 index 0000000..a6fc5c2 --- /dev/null +++ b/scripts/turnstone-cluster-collect.timer @@ -0,0 +1,12 @@ +[Unit] +Description=Turnstone — remote cluster journal collection (every 15 min) +Requires=turnstone-cluster-collect.service + +[Timer] +OnBootSec=2min +OnUnitActiveSec=15min +AccuracySec=30s +Persistent=true + +[Install] +WantedBy=timers.target