Renames the app/ingest/ package to app/glean/ and updates all references across Python modules, shell scripts, Vue components, tests, and documentation. Intentionally preserved: - SQLite column name ingest_time (avoids schema migration) - RetrievedEntry.ingest_time field (maps to the column above) - Any public-facing JSON keys that reference ingest_time Changes by category: - app/ingest/ → app/glean/ (full package move, all parsers) - app/tasks/ingest_scheduler.py → app/tasks/glean_scheduler.py - scripts/ingest_corpus.py → scripts/glean_corpus.py - tests/test_ingest_*.py → tests/test_glean_*.py - Docstrings, log messages, comments: ingest → glean - Env var: TURNSTONE_INGEST_INTERVAL → TURNSTONE_GLEAN_INTERVAL - Shell scripts: glean.log, glean_corpus.py references - README.md: multi-source ingest → multi-source glean - .env.example: updated env var name - patterns/: new diagnostic patterns from 2026-05-20 SSH incident (service_crash_loop, pkg_daemon_restart, ssh_forward_conflict) - SourcesView.vue: pipeline label updated - All test import paths updated to app.glean.* 285 tests passing.
49 lines
2 KiB
Bash
49 lines
2 KiB
Bash
#!/usr/bin/env bash
|
|
# Export recent system messages to files the Turnstone container can glean.
|
|
#
|
|
# Exports:
|
|
# journal-export.jsonl — journald (if journalctl is available)
|
|
# dmesg-export.txt — kernel ring buffer (always)
|
|
#
|
|
# Output files land in /opt/turnstone/data/ which is bind-mounted at /data/
|
|
# inside the container.
|
|
#
|
|
# Usage (standalone):
|
|
# sudo bash /opt/turnstone/scripts/export_journal.sh
|
|
#
|
|
# Cron (combined with glean):
|
|
# */15 * * * * bash /opt/turnstone/scripts/export_journal.sh && \
|
|
# podman exec turnstone python scripts/ingest_corpus.py \
|
|
# --sources /patterns/sources.yaml --db /data/turnstone.db \
|
|
# >> /var/log/turnstone-glean.log 2>&1
|
|
|
|
set -euo pipefail
|
|
|
|
DATA_DIR=/opt/turnstone/data
|
|
|
|
# ── journald ─────────────────────────────────────────────────────────────────
|
|
# 20-minute window (slightly wider than the 15-min cron interval) ensures no
|
|
# gaps between runs. Ingest deduplicates via entry_id hash so overlap is safe.
|
|
if command -v journalctl &>/dev/null; then
|
|
journalctl \
|
|
--output=json \
|
|
--priority=0..5 \
|
|
--since "20 minutes ago" \
|
|
--no-pager \
|
|
> "${DATA_DIR}/journal-export.jsonl"
|
|
echo "journald: $(wc -l < "${DATA_DIR}/journal-export.jsonl") entries"
|
|
else
|
|
# No journald — write an empty file so the sources.yaml entry doesn't warn
|
|
: > "${DATA_DIR}/journal-export.jsonl"
|
|
echo "journald: not available (skipped)"
|
|
fi
|
|
|
|
# ── dmesg ─────────────────────────────────────────────────────────────────────
|
|
# Use -T for human-readable timestamps when available (util-linux >= 2.21).
|
|
# Fall back to plain dmesg if -T is not supported.
|
|
if dmesg -T &>/dev/null; then
|
|
dmesg -T > "${DATA_DIR}/dmesg-export.txt"
|
|
else
|
|
dmesg > "${DATA_DIR}/dmesg-export.txt"
|
|
fi
|
|
echo "dmesg: $(wc -l < "${DATA_DIR}/dmesg-export.txt") lines"
|