#!/usr/bin/env bash # Collect recent journal logs from all CircuitForge cluster nodes and network # devices into /opt/turnstone/data/ for Turnstone to ingest. # # Run this before each ingest cycle (see cron below). # Each remote node is collected via SSH; network devices via syslog-receiver. # # Prerequisites: # - SSH key auth to each node (test: ssh hostname) # - syslog-receiver.sh running separately (or rsyslog) for network devices # # Cron (combined with ingest, every 15 min): # */15 * * * * bash /opt/turnstone/scripts/collect_cluster_logs.sh && \ # docker exec turnstone-cluster python scripts/ingest_corpus.py \ # --sources /patterns/sources-cluster.yaml --db /data/turnstone.db \ # >> /var/log/turnstone-cluster-ingest.log 2>&1 # # Manual run: # bash /Library/Development/CircuitForge/turnstone/scripts/collect_cluster_logs.sh set -euo pipefail DATA_DIR=/devl/turnstone-cluster/data WINDOW="20 minutes ago" SSH_OPTS="-o ConnectTimeout=5 -o BatchMode=yes -o StrictHostKeyChecking=no" mkdir -p "${DATA_DIR}" # ── Local Heimdall journal ──────────────────────────────────────────────────── echo "heimdall: collecting local journal..." journalctl \ --output=json \ --priority=0..5 \ --since "${WINDOW}" \ --no-pager \ > "${DATA_DIR}/heimdall-journal.jsonl" echo "heimdall: $(wc -l < "${DATA_DIR}/heimdall-journal.jsonl") entries" # Local kernel ring buffer if dmesg -T &>/dev/null; then dmesg -T > "${DATA_DIR}/heimdall-dmesg.txt" else dmesg > "${DATA_DIR}/heimdall-dmesg.txt" fi # ── Remote cluster nodes ────────────────────────────────────────────────────── # Each entry: " " declare -A NODES=( [navi]="${DATA_DIR}/navi-journal.jsonl" [sif]="${DATA_DIR}/sif-journal.jsonl" [cass]="${DATA_DIR}/cass-journal.jsonl" [strahl]="${DATA_DIR}/strahl-journal.jsonl" ) for node in "${!NODES[@]}"; do outfile="${NODES[$node]}" echo "${node}: collecting journal..." if ssh ${SSH_OPTS} "${node}" true 2>/dev/null; then ssh ${SSH_OPTS} "${node}" \ "journalctl --output=json --priority=0..5 --since '${WINDOW}' --no-pager 2>/dev/null || true" \ > "${outfile}" 2>/dev/null || { echo "${node}: ssh failed, skipping"; : > "${outfile}"; } echo "${node}: $(wc -l < "${outfile}") entries" else echo "${node}: unreachable, skipping" : > "${outfile}" fi done # ── Docker container logs from Heimdall ────────────────────────────────────── # Collect logs from key Docker services running on Heimdall. # Add or remove container names as needed. DOCKER_CONTAINERS=( cf-orch-coordinator cf-web cf-directus caddy-proxy ) for cname in "${DOCKER_CONTAINERS[@]}"; do outfile="${DATA_DIR}/docker-${cname}.jsonl" if docker inspect "${cname}" &>/dev/null 2>&1; then # Docker log output: raw lines with timestamps; use json-file driver format docker logs --since 20m "${cname}" 2>&1 | \ python3 -c " import sys, json, time src = '${cname}' for line in sys.stdin: line = line.rstrip() if not line: continue print(json.dumps({'MESSAGE': line, 'SYSLOG_IDENTIFIER': src, '_TRANSPORT': 'docker', 'PRIORITY': '6'})) " > "${outfile}" 2>/dev/null || : > "${outfile}" echo "docker/${cname}: $(wc -l < "${outfile}") entries" else : > "${outfile}" fi done # ── Network syslog (written by syslog-receiver.service) ────────────────────── # If the syslog receiver is running, it appends to this file. # We don't rotate it here — ingest deduplicates by entry hash. SYSLOG_FILE="${DATA_DIR}/network-syslog.txt" if [ ! -f "${SYSLOG_FILE}" ]; then : > "${SYSLOG_FILE}" echo "network-syslog: created (empty — configure devices to send to port 5140)" else echo "network-syslog: $(wc -l < "${SYSLOG_FILE}") lines" fi echo "collect_cluster_logs: done"