Renames the app/ingest/ package to app/glean/ and updates all references across Python modules, shell scripts, Vue components, tests, and documentation. Intentionally preserved: - SQLite column name ingest_time (avoids schema migration) - RetrievedEntry.ingest_time field (maps to the column above) - Any public-facing JSON keys that reference ingest_time Changes by category: - app/ingest/ → app/glean/ (full package move, all parsers) - app/tasks/ingest_scheduler.py → app/tasks/glean_scheduler.py - scripts/ingest_corpus.py → scripts/glean_corpus.py - tests/test_ingest_*.py → tests/test_glean_*.py - Docstrings, log messages, comments: ingest → glean - Env var: TURNSTONE_INGEST_INTERVAL → TURNSTONE_GLEAN_INTERVAL - Shell scripts: glean.log, glean_corpus.py references - README.md: multi-source ingest → multi-source glean - .env.example: updated env var name - patterns/: new diagnostic patterns from 2026-05-20 SSH incident (service_crash_loop, pkg_daemon_restart, ssh_forward_conflict) - SourcesView.vue: pipeline label updated - All test import paths updated to app.glean.* 285 tests passing.
90 lines
3.2 KiB
Python
90 lines
3.2 KiB
Python
"""Tests for the dmesg log gleaner."""
|
|
from __future__ import annotations
|
|
|
|
from app.glean.dmesg_log import is_dmesg_log, parse
|
|
|
|
RELATIVE_SAMPLE = """\
|
|
[ 0.000000] Linux version 6.8.0-65-generic
|
|
[ 0.012345] Command line: BOOT_IMAGE=/vmlinuz-6.8.0-65-generic
|
|
[12345.678901] usb 1-1: USB disconnect, device number 2
|
|
[12400.000000] EXT4-fs error (device sda1): ext4_find_entry: reading directory
|
|
[12401.000000] Out of memory: Kill process 1234 (firefox) score 900
|
|
"""
|
|
|
|
HUMAN_SAMPLE = """\
|
|
[Mon May 11 14:23:01 2026] Linux version 6.8.0-65-generic
|
|
[Mon May 11 14:23:02 2026] usb 1-1: USB disconnect, device number 2
|
|
[Mon May 11 14:24:00 2026] ata1: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
|
|
[Mon May 11 14:25:00 2026] Out of memory: Kill process 5678 (python3) score 800
|
|
"""
|
|
|
|
MIXED_SAMPLE = "[ 0.000000] boot message\n[Mon May 11 14:23:01 2026] human ts message\n"
|
|
|
|
|
|
class TestDetector:
|
|
def test_detects_relative(self):
|
|
assert is_dmesg_log("[ 0.000000] Linux version 6.8.0")
|
|
|
|
def test_detects_relative_large_offset(self):
|
|
assert is_dmesg_log("[12345.678901] usb disconnect")
|
|
|
|
def test_detects_human_timestamp(self):
|
|
assert is_dmesg_log("[Mon May 11 14:23:01 2026] message")
|
|
|
|
def test_rejects_syslog(self):
|
|
assert not is_dmesg_log("May 11 14:23:01 hostname sshd[1234]: message")
|
|
|
|
def test_rejects_servarr(self):
|
|
assert not is_dmesg_log("2026-05-11 02:31:51.5|Info|Component|Message")
|
|
|
|
def test_rejects_plaintext(self):
|
|
assert not is_dmesg_log("just a plain log line")
|
|
|
|
|
|
class TestRelativeParser:
|
|
def _parse(self, text: str) -> list:
|
|
return list(parse(iter(text.splitlines(keepends=True)), "dmesg_test", []))
|
|
|
|
def test_entry_count(self):
|
|
assert len(self._parse(RELATIVE_SAMPLE)) == 5
|
|
|
|
def test_relative_ts_raw_preserved(self):
|
|
entries = self._parse(RELATIVE_SAMPLE)
|
|
assert entries[0].timestamp_raw == "[0.000000]"
|
|
|
|
def test_relative_ts_iso_empty(self):
|
|
# No absolute time available from relative timestamps
|
|
entries = self._parse(RELATIVE_SAMPLE)
|
|
assert entries[0].timestamp_iso == ""
|
|
|
|
def test_message_text(self):
|
|
entries = self._parse(RELATIVE_SAMPLE)
|
|
assert "Linux version" in entries[0].text
|
|
|
|
def test_source_id_propagated(self):
|
|
assert all(e.source_id == "dmesg_test" for e in self._parse(RELATIVE_SAMPLE))
|
|
|
|
def test_sequence_is_monotonic(self):
|
|
entries = self._parse(RELATIVE_SAMPLE)
|
|
seqs = [e.sequence for e in entries]
|
|
assert seqs == sorted(seqs) and len(set(seqs)) == len(seqs)
|
|
|
|
|
|
class TestHumanParser:
|
|
def _parse(self, text: str) -> list:
|
|
return list(parse(iter(text.splitlines(keepends=True)), "dmesg_human", []))
|
|
|
|
def test_entry_count(self):
|
|
assert len(self._parse(HUMAN_SAMPLE)) == 4
|
|
|
|
def test_timestamp_parsed(self):
|
|
entries = self._parse(HUMAN_SAMPLE)
|
|
assert entries[0].timestamp_iso == "2026-05-11T14:23:01+00:00"
|
|
|
|
def test_timestamp_raw(self):
|
|
entries = self._parse(HUMAN_SAMPLE)
|
|
assert "Mon May 11" in entries[0].timestamp_raw
|
|
|
|
def test_message_text(self):
|
|
entries = self._parse(HUMAN_SAMPLE)
|
|
assert "Linux version" in entries[0].text
|