- Add syslog.py — RFC 3164 parser for /var/log/syslog, /var/log/messages, auth.log, kern.log; ident prepended to message text for searchability - Add dmesg_log.py — handles both relative [secs.usecs] and human-readable [Dow Mon DD HH:MM:SS YYYY] formats; relative timestamps preserved as raw - Wire both into pipeline.py auto-detection (before plaintext fallback) - Update export_journal.sh: checks for journalctl availability, falls back gracefully on non-systemd systems; adds dmesg -T export (falls back to plain dmesg on older kernels) - Add syslog entries (commented) + dmesg source to sources.yaml - 30 tests covering both parsers (detection + parse correctness)
49 lines
2 KiB
Bash
49 lines
2 KiB
Bash
#!/usr/bin/env bash
|
|
# Export recent system messages to files the Turnstone container can ingest.
|
|
#
|
|
# Exports:
|
|
# journal-export.jsonl — journald (if journalctl is available)
|
|
# dmesg-export.txt — kernel ring buffer (always)
|
|
#
|
|
# Output files land in /opt/turnstone/data/ which is bind-mounted at /data/
|
|
# inside the container.
|
|
#
|
|
# Usage (standalone):
|
|
# sudo bash /opt/turnstone/scripts/export_journal.sh
|
|
#
|
|
# Cron (combined with ingest):
|
|
# */15 * * * * bash /opt/turnstone/scripts/export_journal.sh && \
|
|
# podman exec turnstone python scripts/ingest_corpus.py \
|
|
# --sources /patterns/sources.yaml --db /data/turnstone.db \
|
|
# >> /var/log/turnstone-ingest.log 2>&1
|
|
|
|
set -euo pipefail
|
|
|
|
DATA_DIR=/opt/turnstone/data
|
|
|
|
# ── journald ─────────────────────────────────────────────────────────────────
|
|
# 20-minute window (slightly wider than the 15-min cron interval) ensures no
|
|
# gaps between runs. Ingest deduplicates via entry_id hash so overlap is safe.
|
|
if command -v journalctl &>/dev/null; then
|
|
journalctl \
|
|
--output=json \
|
|
--priority=0..5 \
|
|
--since "20 minutes ago" \
|
|
--no-pager \
|
|
> "${DATA_DIR}/journal-export.jsonl"
|
|
echo "journald: $(wc -l < "${DATA_DIR}/journal-export.jsonl") entries"
|
|
else
|
|
# No journald — write an empty file so the sources.yaml entry doesn't warn
|
|
: > "${DATA_DIR}/journal-export.jsonl"
|
|
echo "journald: not available (skipped)"
|
|
fi
|
|
|
|
# ── dmesg ─────────────────────────────────────────────────────────────────────
|
|
# Use -T for human-readable timestamps when available (util-linux >= 2.21).
|
|
# Fall back to plain dmesg if -T is not supported.
|
|
if dmesg -T &>/dev/null; then
|
|
dmesg -T > "${DATA_DIR}/dmesg-export.txt"
|
|
else
|
|
dmesg > "${DATA_DIR}/dmesg-export.txt"
|
|
fi
|
|
echo "dmesg: $(wc -l < "${DATA_DIR}/dmesg-export.txt") lines"
|