turnstone/app/services/incidents.py
pyr0ball aa80f307fe refactor: rename ingest → glean throughout codebase
Renames the app/ingest/ package to app/glean/ and updates all
references across Python modules, shell scripts, Vue components,
tests, and documentation.

Intentionally preserved:
- SQLite column name ingest_time (avoids schema migration)
- RetrievedEntry.ingest_time field (maps to the column above)
- Any public-facing JSON keys that reference ingest_time

Changes by category:
- app/ingest/ → app/glean/ (full package move, all parsers)
- app/tasks/ingest_scheduler.py → app/tasks/glean_scheduler.py
- scripts/ingest_corpus.py → scripts/glean_corpus.py
- tests/test_ingest_*.py → tests/test_glean_*.py
- Docstrings, log messages, comments: ingest → glean
- Env var: TURNSTONE_INGEST_INTERVAL → TURNSTONE_GLEAN_INTERVAL
- Shell scripts: glean.log, glean_corpus.py references
- README.md: multi-source ingest → multi-source glean
- .env.example: updated env var name
- patterns/: new diagnostic patterns from 2026-05-20 SSH incident
  (service_crash_loop, pkg_daemon_restart, ssh_forward_conflict)
- SourcesView.vue: pipeline label updated
- All test import paths updated to app.glean.*

285 tests passing.
2026-05-20 23:02:55 -07:00

223 lines
7.4 KiB
Python

"""CRUD operations for user-tagged incidents and received log bundles."""
from __future__ import annotations
import json
import sqlite3
import uuid
from pathlib import Path
from app.glean.base import now_iso
from app.services.models import Incident, ReceivedBundle
from app.services.search import SearchResult, entries_in_window, search
def _row_to_incident(row: sqlite3.Row) -> Incident:
return Incident(
id=row["id"],
label=row["label"],
issue_type=row["issue_type"] if "issue_type" in row.keys() else "",
started_at=row["started_at"],
ended_at=row["ended_at"],
notes=row["notes"],
created_at=row["created_at"],
severity=row["severity"],
)
def _row_to_bundle(row: sqlite3.Row) -> ReceivedBundle:
return ReceivedBundle(
id=row["id"],
source_host=row["source_host"],
issue_type=row["issue_type"],
label=row["label"],
severity=row["severity"],
started_at=row["started_at"],
bundled_at=row["bundled_at"],
entry_count=row["entry_count"],
bundle_json=row["bundle_json"],
)
def create_incident(
db_path: Path,
label: str,
issue_type: str = "",
started_at: str | None = None,
ended_at: str | None = None,
notes: str = "",
severity: str = "medium",
) -> Incident:
incident = Incident(
id=str(uuid.uuid4()),
label=label,
issue_type=issue_type,
started_at=started_at,
ended_at=ended_at,
notes=notes,
created_at=now_iso(),
severity=severity,
)
conn = sqlite3.connect(str(db_path))
conn.execute("PRAGMA journal_mode=WAL")
conn.execute(
"INSERT INTO incidents (id, label, issue_type, started_at, ended_at, notes, created_at, severity) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
(incident.id, incident.label, incident.issue_type, incident.started_at,
incident.ended_at, incident.notes, incident.created_at, incident.severity),
)
conn.commit()
conn.close()
return incident
def list_incidents(db_path: Path) -> list[Incident]:
conn = sqlite3.connect(str(db_path))
conn.execute("PRAGMA journal_mode=WAL")
conn.row_factory = sqlite3.Row
rows = conn.execute(
"SELECT * FROM incidents ORDER BY created_at DESC"
).fetchall()
conn.close()
return [_row_to_incident(r) for r in rows]
def get_incident(db_path: Path, incident_id: str) -> Incident | None:
conn = sqlite3.connect(str(db_path))
conn.execute("PRAGMA journal_mode=WAL")
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT * FROM incidents WHERE id = ?", (incident_id,)
).fetchone()
conn.close()
return _row_to_incident(row) if row else None
def delete_incident(db_path: Path, incident_id: str) -> bool:
conn = sqlite3.connect(str(db_path))
conn.execute("PRAGMA journal_mode=WAL")
cur = conn.execute("DELETE FROM incidents WHERE id = ?", (incident_id,))
conn.commit()
conn.close()
return cur.rowcount > 0
def get_incident_entries(
db_path: Path,
incident: Incident,
limit: int = 100,
) -> list[SearchResult]:
"""Return log entries associated with an incident's time window."""
half = limit // 2
common: dict = dict(since=incident.started_at, until=incident.ended_at, limit=half)
keyword_hits = search(db_path, query=incident.label, include_repeats=False, **common)
error_hits = search(db_path, query=incident.label, severity="ERROR", include_repeats=False, **common)
critical_hits = search(db_path, query=incident.label, severity="CRITICAL", include_repeats=False, **common)
seen: set[str] = set()
combined: list[SearchResult] = []
for entry in keyword_hits + critical_hits + error_hits:
if entry.entry_id not in seen:
seen.add(entry.entry_id)
combined.append(entry)
if len(combined) < limit:
for entry in entries_in_window(db_path, incident.started_at, incident.ended_at, severity="ERROR", limit=half):
if entry.entry_id not in seen:
seen.add(entry.entry_id)
combined.append(entry)
if len(combined) < limit:
for entry in entries_in_window(db_path, incident.started_at, incident.ended_at, limit=limit - len(combined)):
if entry.entry_id not in seen:
seen.add(entry.entry_id)
combined.append(entry)
combined.sort(key=lambda e: (e.timestamp_iso or "\xff", e.sequence))
return combined[:limit]
def build_bundle(
db_path: Path,
incident: Incident,
source_host: str,
limit: int = 200,
) -> dict:
"""Assemble a labeled bundle: incident metadata + related log entries."""
entries = get_incident_entries(db_path, incident, limit=limit)
return {
"bundle_version": 1,
"source_host": source_host,
"bundled_at": now_iso(),
"incident": {
"id": incident.id,
"label": incident.label,
"issue_type": incident.issue_type,
"started_at": incident.started_at,
"ended_at": incident.ended_at,
"severity": incident.severity,
"notes": incident.notes,
},
"log_entries": [
{
"entry_id": e.entry_id,
"source_id": e.source_id,
"timestamp_iso": e.timestamp_iso,
"severity": e.severity,
"text": e.text,
"matched_patterns": list(e.matched_patterns),
}
for e in entries
],
}
def store_bundle(db_path: Path, bundle: dict) -> ReceivedBundle:
"""Store an incoming bundle from a remote Turnstone instance."""
inc = bundle.get("incident", {})
record = ReceivedBundle(
id=str(uuid.uuid4()),
source_host=bundle.get("source_host", "unknown"),
issue_type=inc.get("issue_type", ""),
label=inc.get("label", ""),
severity=inc.get("severity", "medium"),
started_at=inc.get("started_at"),
bundled_at=bundle.get("bundled_at", now_iso()),
entry_count=len(bundle.get("log_entries", [])),
bundle_json=json.dumps(bundle),
)
conn = sqlite3.connect(str(db_path))
conn.execute("PRAGMA journal_mode=WAL")
conn.execute(
"INSERT INTO received_bundles "
"(id, source_host, issue_type, label, severity, started_at, bundled_at, entry_count, bundle_json) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
(record.id, record.source_host, record.issue_type, record.label,
record.severity, record.started_at, record.bundled_at, record.entry_count, record.bundle_json),
)
conn.commit()
conn.close()
return record
def list_bundles(db_path: Path) -> list[ReceivedBundle]:
conn = sqlite3.connect(str(db_path))
conn.execute("PRAGMA journal_mode=WAL")
conn.row_factory = sqlite3.Row
rows = conn.execute(
"SELECT id, source_host, issue_type, label, severity, started_at, bundled_at, entry_count, bundle_json "
"FROM received_bundles ORDER BY bundled_at DESC"
).fetchall()
conn.close()
return [_row_to_bundle(r) for r in rows]
def get_bundle(db_path: Path, bundle_id: str) -> ReceivedBundle | None:
conn = sqlite3.connect(str(db_path))
conn.execute("PRAGMA journal_mode=WAL")
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT * FROM received_bundles WHERE id = ?", (bundle_id,)
).fetchone()
conn.close()
return _row_to_bundle(row) if row else None