#!/usr/bin/env bash # Export recent system messages to files the Turnstone container can glean. # # Exports: # journal-export.jsonl — journald (if journalctl is available) # dmesg-export.txt — kernel ring buffer (always) # # Output files land in /opt/turnstone/data/ which is bind-mounted at /data/ # inside the container. # # Usage (standalone): # sudo bash /opt/turnstone/scripts/export_journal.sh # # Cron (combined with glean): # */15 * * * * bash /opt/turnstone/scripts/export_journal.sh && \ # podman exec turnstone python scripts/ingest_corpus.py \ # --sources /patterns/sources.yaml --db /data/turnstone.db \ # >> /var/log/turnstone-glean.log 2>&1 set -euo pipefail DATA_DIR=/opt/turnstone/data # ── journald ───────────────────────────────────────────────────────────────── # 20-minute window (slightly wider than the 15-min cron interval) ensures no # gaps between runs. Ingest deduplicates via entry_id hash so overlap is safe. if command -v journalctl &>/dev/null; then journalctl \ --output=json \ --priority=0..5 \ --since "20 minutes ago" \ --no-pager \ > "${DATA_DIR}/journal-export.jsonl" echo "journald: $(wc -l < "${DATA_DIR}/journal-export.jsonl") entries" else # No journald — write an empty file so the sources.yaml entry doesn't warn : > "${DATA_DIR}/journal-export.jsonl" echo "journald: not available (skipped)" fi # ── dmesg ───────────────────────────────────────────────────────────────────── # Use -T for human-readable timestamps when available (util-linux >= 2.21). # Fall back to plain dmesg if -T is not supported. if dmesg -T &>/dev/null; then dmesg -T > "${DATA_DIR}/dmesg-export.txt" else dmesg > "${DATA_DIR}/dmesg-export.txt" fi echo "dmesg: $(wc -l < "${DATA_DIR}/dmesg-export.txt") lines"