diff --git a/patterns/sources-cluster.yaml b/patterns/sources-cluster.yaml new file mode 100644 index 0000000..81badd7 --- /dev/null +++ b/patterns/sources-cluster.yaml @@ -0,0 +1,55 @@ +# Turnstone log sources — Heimdall cluster ingest. +# Covers: Heimdall (local), Navi, Sif, Cass, Strahl (SSH-collected), +# Docker services on Heimdall, and network device syslog. +# +# Collected by scripts/collect_cluster_logs.sh before each ingest run. +# All paths are container-side (/data/ = bind-mount of /devl/turnstone-cluster/data/). +# +# Cron (collect + ingest, every 15 min): +# */15 * * * * bash /Library/Development/CircuitForge/turnstone/scripts/collect_cluster_logs.sh && \ +# docker exec turnstone-cluster python scripts/ingest_corpus.py \ +# --sources /patterns/sources-cluster.yaml --db /data/turnstone.db \ +# >> /var/log/turnstone-cluster-ingest.log 2>&1 + +sources: + # ── Heimdall (local) ───────────────────────────────────────────────────────── + - id: heimdall-journal + path: /data/heimdall-journal.jsonl + + - id: heimdall-dmesg + path: /data/heimdall-dmesg.txt + + # ── Remote cluster nodes (SSH-collected journals) ──────────────────────────── + - id: navi-journal + path: /data/navi-journal.jsonl + + - id: sif-journal + path: /data/sif-journal.jsonl + + - id: cass-journal + path: /data/cass-journal.jsonl + + - id: strahl-journal + path: /data/strahl-journal.jsonl + + # ── Docker services on Heimdall ────────────────────────────────────────────── + - id: docker-cf-orch-coordinator + path: /data/docker-cf-orch-coordinator.jsonl + + - id: docker-cf-web + path: /data/docker-cf-web.jsonl + + - id: docker-cf-directus + path: /data/docker-cf-directus.jsonl + + - id: docker-caddy-proxy + path: /data/docker-caddy-proxy.jsonl + + # ── Network syslog (router, switches, UniFi APs) ───────────────────────────── + # Written by syslog-receiver.service (UDP 5140 → /devl/turnstone-cluster/data/network-syslog.txt). + # Configure devices to send syslog to Heimdall:5140. + # UniFi: Settings → System → Remote Logging → Syslog Host = :5140 + # Ubiquiti EdgeRouter: set system syslog host facility all level debug + # Managed switches: varies by vendor — target UDP 5140 + - id: network-syslog + path: /data/network-syslog.txt diff --git a/scripts/collect_cluster_logs.sh b/scripts/collect_cluster_logs.sh new file mode 100644 index 0000000..0e505d4 --- /dev/null +++ b/scripts/collect_cluster_logs.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash +# Collect recent journal logs from all CircuitForge cluster nodes and network +# devices into /opt/turnstone/data/ for Turnstone to ingest. +# +# Run this before each ingest cycle (see cron below). +# Each remote node is collected via SSH; network devices via syslog-receiver. +# +# Prerequisites: +# - SSH key auth to each node (test: ssh hostname) +# - syslog-receiver.sh running separately (or rsyslog) for network devices +# +# Cron (combined with ingest, every 15 min): +# */15 * * * * bash /opt/turnstone/scripts/collect_cluster_logs.sh && \ +# docker exec turnstone-cluster python scripts/ingest_corpus.py \ +# --sources /patterns/sources-cluster.yaml --db /data/turnstone.db \ +# >> /var/log/turnstone-cluster-ingest.log 2>&1 +# +# Manual run: +# bash /Library/Development/CircuitForge/turnstone/scripts/collect_cluster_logs.sh + +set -euo pipefail + +DATA_DIR=/devl/turnstone-cluster/data +WINDOW="20 minutes ago" +SSH_OPTS="-o ConnectTimeout=5 -o BatchMode=yes -o StrictHostKeyChecking=no" + +mkdir -p "${DATA_DIR}" + +# ── Local Heimdall journal ──────────────────────────────────────────────────── +echo "heimdall: collecting local journal..." +journalctl \ + --output=json \ + --priority=0..5 \ + --since "${WINDOW}" \ + --no-pager \ + > "${DATA_DIR}/heimdall-journal.jsonl" +echo "heimdall: $(wc -l < "${DATA_DIR}/heimdall-journal.jsonl") entries" + +# Local kernel ring buffer +if dmesg -T &>/dev/null; then + dmesg -T > "${DATA_DIR}/heimdall-dmesg.txt" +else + dmesg > "${DATA_DIR}/heimdall-dmesg.txt" +fi + +# ── Remote cluster nodes ────────────────────────────────────────────────────── +# Each entry: " " +declare -A NODES=( + [navi]="${DATA_DIR}/navi-journal.jsonl" + [sif]="${DATA_DIR}/sif-journal.jsonl" + [cass]="${DATA_DIR}/cass-journal.jsonl" + [strahl]="${DATA_DIR}/strahl-journal.jsonl" +) + +for node in "${!NODES[@]}"; do + outfile="${NODES[$node]}" + echo "${node}: collecting journal..." + if ssh ${SSH_OPTS} "${node}" true 2>/dev/null; then + ssh ${SSH_OPTS} "${node}" \ + "journalctl --output=json --priority=0..5 --since '${WINDOW}' --no-pager 2>/dev/null || true" \ + > "${outfile}" 2>/dev/null || { echo "${node}: ssh failed, skipping"; : > "${outfile}"; } + echo "${node}: $(wc -l < "${outfile}") entries" + else + echo "${node}: unreachable, skipping" + : > "${outfile}" + fi +done + +# ── Docker container logs from Heimdall ────────────────────────────────────── +# Collect logs from key Docker services running on Heimdall. +# Add or remove container names as needed. +DOCKER_CONTAINERS=( + cf-orch-coordinator + cf-web + cf-directus + caddy-proxy +) + +for cname in "${DOCKER_CONTAINERS[@]}"; do + outfile="${DATA_DIR}/docker-${cname}.jsonl" + if docker inspect "${cname}" &>/dev/null 2>&1; then + # Docker log output: raw lines with timestamps; use json-file driver format + docker logs --since 20m "${cname}" 2>&1 | \ + python3 -c " +import sys, json, time +src = '${cname}' +for line in sys.stdin: + line = line.rstrip() + if not line: continue + print(json.dumps({'MESSAGE': line, 'SYSLOG_IDENTIFIER': src, '_TRANSPORT': 'docker', 'PRIORITY': '6'})) +" > "${outfile}" 2>/dev/null || : > "${outfile}" + echo "docker/${cname}: $(wc -l < "${outfile}") entries" + else + : > "${outfile}" + fi +done + +# ── Network syslog (written by syslog-receiver.service) ────────────────────── +# If the syslog receiver is running, it appends to this file. +# We don't rotate it here — ingest deduplicates by entry hash. +SYSLOG_FILE="${DATA_DIR}/network-syslog.txt" +if [ ! -f "${SYSLOG_FILE}" ]; then + : > "${SYSLOG_FILE}" + echo "network-syslog: created (empty — configure devices to send to port 5140)" +else + echo "network-syslog: $(wc -l < "${SYSLOG_FILE}") lines" +fi + +echo "collect_cluster_logs: done" diff --git a/scripts/docker-cluster.sh b/scripts/docker-cluster.sh new file mode 100644 index 0000000..0459cb6 --- /dev/null +++ b/scripts/docker-cluster.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +# docker-cluster.sh — Turnstone cluster monitoring instance on Heimdall. +# +# Ingests logs from the full CircuitForge cluster: +# - Heimdall (local journal + dmesg) +# - Navi, Sif, Cass, Strahl (SSH-collected journals) +# - Docker services: cf-orch-coordinator, cf-web, cf-directus, caddy-proxy +# - Network syslog (router, switches, UniFi APs — UDP 5140) +# +# Logs are pre-collected to /devl/turnstone-cluster/data/ by collect_cluster_logs.sh +# before each ingest run. This script only manages the container lifecycle. +# +# ── Prerequisites ──────────────────────────────────────────────────────────── +# 1. SSH key access to navi, sif, cass, strahl (test: ssh hostname) +# 2. Build the image first: +# cd /Library/Development/CircuitForge/turnstone +# docker build -t circuitforge/turnstone:latest . +# +# 3. Run this script: +# bash /Library/Development/CircuitForge/turnstone/scripts/docker-cluster.sh +# +# ── Ingest cron (every 15 min — add to root's crontab: sudo crontab -e) ───── +# */15 * * * * bash /Library/Development/CircuitForge/turnstone/scripts/collect_cluster_logs.sh \ +# && docker exec turnstone-cluster python scripts/ingest_corpus.py \ +# --sources /patterns/sources-cluster.yaml --db /data/turnstone.db \ +# >> /var/log/turnstone-cluster-ingest.log 2>&1 +# +# ── Caddy reverse proxy (add to /devl/caddy-proxy/Caddyfile) ───────────────── +# turnstone.heimdall.lan { +# reverse_proxy 127.0.0.1:8535 +# } +# Then: docker restart caddy-proxy +# +# ── Ports ──────────────────────────────────────────────────────────────────── +# Turnstone UI → http://heimdall:8535/turnstone/ +# +set -euo pipefail + +REPO_DIR=/Library/Development/CircuitForge/turnstone +DATA_DIR=/devl/turnstone-cluster/data +PATTERNS_DIR="${REPO_DIR}/patterns" +PORT=8534 +TZ=America/Los_Angeles + +# LLM: route to local cf-orch coordinator (same host, host network). +# Coordinator runs at 127.0.0.1 inside --net=host, so localhost works directly. +# Override LLM_URL to point at a different backend if needed. +LLM_URL="${TURNSTONE_LLM_URL:-http://127.0.0.1:7701}" +LLM_MODEL="${TURNSTONE_LLM_MODEL:-llama3.1:8b}" +LLM_API_KEY="${TURNSTONE_LLM_API_KEY:-}" + +mkdir -p "${DATA_DIR}" + +# ── Seed LLM preferences (only if not already configured) ──────────────────── +# preferences.json lives in the data dir and persists across container restarts. +# If it doesn't exist yet, write defaults pointing at the local cf-orch coordinator +# so the first ingest gets real summarization without manual UI config. +PREFS_FILE="${DATA_DIR}/preferences.json" +if [ ! -f "${PREFS_FILE}" ]; then + python3 -c " +import json +prefs = { + 'llm_url': '${LLM_URL}', + 'llm_model': '${LLM_MODEL}', + 'llm_api_key': '${LLM_API_KEY}', +} +print(json.dumps(prefs)) +" > "${PREFS_FILE}" + echo "Seeded ${PREFS_FILE} (llm_url=${LLM_URL}, model=${LLM_MODEL})" +else + echo "Preferences already exist at ${PREFS_FILE} — skipping seed" +fi + +# ── Build image ─────────────────────────────────────────────────────────────── +echo "Building Turnstone image..." +docker build -t circuitforge/turnstone:latest "${REPO_DIR}" + +# ── Deploy container ────────────────────────────────────────────────────────── +docker rm -f turnstone-cluster 2>/dev/null || true + +docker run -d \ + --name=turnstone-cluster \ + --restart=unless-stopped \ + --net=host \ + -v "${DATA_DIR}:/data" \ + -v "${PATTERNS_DIR}:/patterns:ro" \ + -e TURNSTONE_DB=/data/turnstone.db \ + -e TURNSTONE_SOURCE_HOST="heimdall-cluster" \ + -e TURNSTONE_BUNDLE_ENDPOINT="${TURNSTONE_BUNDLE_ENDPOINT:-}" \ + -e PYTHONUNBUFFERED=1 \ + -e TZ="${TZ}" \ + --health-cmd="curl -f http://localhost:${PORT}/turnstone/health || exit 1" \ + --health-interval=30s \ + --health-timeout=10s \ + --health-start-period=20s \ + --health-retries=3 \ + circuitforge/turnstone:latest + +echo "" +echo "Turnstone cluster is starting up." +echo " UI: http://heimdall:${PORT}/turnstone/" +echo "" + +# ── systemd service (optional) ──────────────────────────────────────────────── +# To create a systemd unit that auto-starts on boot: +# sudo tee /etc/systemd/system/turnstone-cluster.service > /dev/null << 'EOF' +# [Unit] +# Description=Turnstone cluster log monitor +# After=docker.service +# Requires=docker.service +# +# [Service] +# Type=oneshot +# RemainAfterExit=yes +# ExecStart=/usr/bin/docker start turnstone-cluster +# ExecStop=/usr/bin/docker stop turnstone-cluster +# +# [Install] +# WantedBy=multi-user.target +# EOF +# sudo systemctl daemon-reload +# sudo systemctl enable --now turnstone-cluster + +echo "Check container health with:" +echo " docker ps --filter name=turnstone-cluster" +echo " docker logs turnstone-cluster" +echo "" +echo "Ingest now:" +echo " bash ${REPO_DIR}/scripts/collect_cluster_logs.sh && \\" +echo " docker exec turnstone-cluster python scripts/ingest_corpus.py \\" +echo " --sources /patterns/sources-cluster.yaml --db /data/turnstone.db" +echo "" +echo "To set up the 15-minute cron, add to root's crontab (sudo crontab -e):" +echo " */15 * * * * bash ${REPO_DIR}/scripts/collect_cluster_logs.sh && \\" +echo " docker exec turnstone-cluster python scripts/ingest_corpus.py \\" +echo " --sources /patterns/sources-cluster.yaml --db /data/turnstone.db \\" +echo " >> /var/log/turnstone-cluster-ingest.log 2>&1"