turnstone/app/services/incidents.py
pyr0ball f0fbe245f0 feat: bundle PII sanitization, onboarding wizard, NL source addition (#51, #52, #53)
Bundle export (#51):
- _redact_text() with 5 compiled regex patterns (IPv4, email, user=, host=, password=)
- build_bundle(sanitize=False) — per-entry redaction at export time
- sent_bundles table tracks every outgoing export (GET and POST /send)
- GET /api/sent-bundles exposes history; SentBundle model added
- BundlesView: Received/Sent tabs, sanitized badge, 5-entry preview, re-download
- IncidentsView: Sanitize PII checkbox next to Send Bundle

Onboarding wizard (#52):
- app/services/discover.py: journald/Docker/file detection (best-effort, safe in containers)
- GET /api/setup/status, /discover, POST /api/setup/write (additive, appends to existing)
- SetupWizard.vue: 3-step Detect → Select → Confirm
  - Step 1 shows grouped summary (journald/file/docker counts)
  - Step 2: collapsible groups with All/None section toggles
    - journald + file: pre-selected; docker: collapsed, none pre-selected
  - Step 3: YAML preview before write
- SourcesView: shows wizard on first run; Add Source button reuses it

NL source addition (#53):
- app/services/nl_source.py: keyword shortcut (13 well-known apps) + LLM fallback
- POST /api/setup/interpret: keyword → LLM → null (graceful fallback)
- NL field in wizard step 2; manual form shown when interpretation fails
- Added sources appear in grouped list immediately
2026-05-29 14:14:28 -07:00

285 lines
9.7 KiB
Python

"""CRUD operations for user-tagged incidents and received log bundles."""
from __future__ import annotations
import json
import re
import sqlite3
import uuid
from pathlib import Path
from app.glean.base import now_iso
from app.services.models import Incident, ReceivedBundle, SentBundle
from app.services.search import SearchResult, entries_in_window, search
_REDACT_PATTERNS: list[tuple[re.Pattern, str]] = [
(re.compile(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b"), "[IP]"),
(re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}"), "[EMAIL]"),
(re.compile(r"(?i)\b(user(?:name)?|uid)\s*[=:]\s*\S+"), r"\1=[USER]"),
(re.compile(r"(?i)\bhost\s*[=:]\s*\S+"), "host=[HOST]"),
(re.compile(r"(?i)\bpassword\s*[=:]\s*\S+"), "password=[REDACTED]"),
]
def _redact_text(text: str) -> str:
for pattern, replacement in _REDACT_PATTERNS:
text = pattern.sub(replacement, text)
return text
def _row_to_incident(row: sqlite3.Row) -> Incident:
return Incident(
id=row["id"],
label=row["label"],
issue_type=row["issue_type"] if "issue_type" in row.keys() else "",
started_at=row["started_at"],
ended_at=row["ended_at"],
notes=row["notes"],
created_at=row["created_at"],
severity=row["severity"],
)
def _row_to_bundle(row: sqlite3.Row) -> ReceivedBundle:
return ReceivedBundle(
id=row["id"],
source_host=row["source_host"],
issue_type=row["issue_type"],
label=row["label"],
severity=row["severity"],
started_at=row["started_at"],
bundled_at=row["bundled_at"],
entry_count=row["entry_count"],
bundle_json=row["bundle_json"],
)
def create_incident(
db_path: Path,
label: str,
issue_type: str = "",
started_at: str | None = None,
ended_at: str | None = None,
notes: str = "",
severity: str = "medium",
) -> Incident:
incident = Incident(
id=str(uuid.uuid4()),
label=label,
issue_type=issue_type,
started_at=started_at,
ended_at=ended_at,
notes=notes,
created_at=now_iso(),
severity=severity,
)
conn = sqlite3.connect(str(db_path), timeout=30.0)
conn.execute("PRAGMA journal_mode=WAL")
conn.execute(
"INSERT INTO incidents (id, label, issue_type, started_at, ended_at, notes, created_at, severity) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
(incident.id, incident.label, incident.issue_type, incident.started_at,
incident.ended_at, incident.notes, incident.created_at, incident.severity),
)
conn.commit()
conn.close()
return incident
def list_incidents(db_path: Path) -> list[Incident]:
conn = sqlite3.connect(str(db_path), timeout=30.0)
conn.execute("PRAGMA journal_mode=WAL")
conn.row_factory = sqlite3.Row
rows = conn.execute(
"SELECT * FROM incidents ORDER BY created_at DESC"
).fetchall()
conn.close()
return [_row_to_incident(r) for r in rows]
def get_incident(db_path: Path, incident_id: str) -> Incident | None:
conn = sqlite3.connect(str(db_path), timeout=30.0)
conn.execute("PRAGMA journal_mode=WAL")
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT * FROM incidents WHERE id = ?", (incident_id,)
).fetchone()
conn.close()
return _row_to_incident(row) if row else None
def delete_incident(db_path: Path, incident_id: str) -> bool:
conn = sqlite3.connect(str(db_path), timeout=30.0)
conn.execute("PRAGMA journal_mode=WAL")
cur = conn.execute("DELETE FROM incidents WHERE id = ?", (incident_id,))
conn.commit()
conn.close()
return cur.rowcount > 0
def get_incident_entries(
db_path: Path,
incident: Incident,
limit: int = 100,
) -> list[SearchResult]:
"""Return log entries associated with an incident's time window."""
half = limit // 2
common: dict = dict(since=incident.started_at, until=incident.ended_at, limit=half)
keyword_hits = search(db_path, query=incident.label, include_repeats=False, **common)
error_hits = search(db_path, query=incident.label, severity="ERROR", include_repeats=False, **common)
critical_hits = search(db_path, query=incident.label, severity="CRITICAL", include_repeats=False, **common)
seen: set[str] = set()
combined: list[SearchResult] = []
for entry in keyword_hits + critical_hits + error_hits:
if entry.entry_id not in seen:
seen.add(entry.entry_id)
combined.append(entry)
if len(combined) < limit:
for entry in entries_in_window(db_path, incident.started_at, incident.ended_at, severity="ERROR", limit=half):
if entry.entry_id not in seen:
seen.add(entry.entry_id)
combined.append(entry)
if len(combined) < limit:
for entry in entries_in_window(db_path, incident.started_at, incident.ended_at, limit=limit - len(combined)):
if entry.entry_id not in seen:
seen.add(entry.entry_id)
combined.append(entry)
combined.sort(key=lambda e: (e.timestamp_iso or "\xff", e.sequence))
return combined[:limit]
def build_bundle(
db_path: Path,
incident: Incident,
source_host: str,
limit: int = 200,
sanitize: bool = False,
) -> dict:
"""Assemble a labeled bundle: incident metadata + related log entries."""
entries = get_incident_entries(db_path, incident, limit=limit)
return {
"bundle_version": 1,
"source_host": source_host,
"bundled_at": now_iso(),
"sanitized": sanitize,
"incident": {
"id": incident.id,
"label": incident.label,
"issue_type": incident.issue_type,
"started_at": incident.started_at,
"ended_at": incident.ended_at,
"severity": incident.severity,
"notes": incident.notes,
},
"log_entries": [
{
"entry_id": e.entry_id,
"source_id": e.source_id,
"timestamp_iso": e.timestamp_iso,
"severity": e.severity,
"text": _redact_text(e.text) if sanitize else e.text,
"matched_patterns": list(e.matched_patterns),
}
for e in entries
],
}
def record_sent_bundle(db_path: Path, incident_id: str, bundle: dict, sanitized: bool) -> SentBundle:
"""Log an outgoing bundle export to the sent_bundles table."""
record = SentBundle(
id=str(uuid.uuid4()),
incident_id=incident_id,
exported_at=now_iso(),
sanitized=sanitized,
entry_count=len(bundle.get("log_entries", [])),
bundle_json=json.dumps(bundle),
)
conn = sqlite3.connect(str(db_path), timeout=30.0)
conn.execute("PRAGMA journal_mode=WAL")
conn.execute(
"INSERT INTO sent_bundles (id, incident_id, exported_at, sanitized, entry_count, bundle_json) "
"VALUES (?, ?, ?, ?, ?, ?)",
(record.id, record.incident_id, record.exported_at, int(record.sanitized),
record.entry_count, record.bundle_json),
)
conn.commit()
conn.close()
return record
def list_sent_bundles(db_path: Path) -> list[SentBundle]:
conn = sqlite3.connect(str(db_path), timeout=30.0)
conn.execute("PRAGMA journal_mode=WAL")
conn.row_factory = sqlite3.Row
rows = conn.execute(
"SELECT id, incident_id, exported_at, sanitized, entry_count, bundle_json "
"FROM sent_bundles ORDER BY exported_at DESC"
).fetchall()
conn.close()
return [
SentBundle(
id=r["id"],
incident_id=r["incident_id"],
exported_at=r["exported_at"],
sanitized=bool(r["sanitized"]),
entry_count=r["entry_count"],
bundle_json=r["bundle_json"],
)
for r in rows
]
def store_bundle(db_path: Path, bundle: dict) -> ReceivedBundle:
"""Store an incoming bundle from a remote Turnstone instance."""
inc = bundle.get("incident", {})
record = ReceivedBundle(
id=str(uuid.uuid4()),
source_host=bundle.get("source_host", "unknown"),
issue_type=inc.get("issue_type", ""),
label=inc.get("label", ""),
severity=inc.get("severity", "medium"),
started_at=inc.get("started_at"),
bundled_at=bundle.get("bundled_at", now_iso()),
entry_count=len(bundle.get("log_entries", [])),
bundle_json=json.dumps(bundle),
)
conn = sqlite3.connect(str(db_path), timeout=30.0)
conn.execute("PRAGMA journal_mode=WAL")
conn.execute(
"INSERT INTO received_bundles "
"(id, source_host, issue_type, label, severity, started_at, bundled_at, entry_count, bundle_json) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
(record.id, record.source_host, record.issue_type, record.label,
record.severity, record.started_at, record.bundled_at, record.entry_count, record.bundle_json),
)
conn.commit()
conn.close()
return record
def list_bundles(db_path: Path) -> list[ReceivedBundle]:
conn = sqlite3.connect(str(db_path), timeout=30.0)
conn.execute("PRAGMA journal_mode=WAL")
conn.row_factory = sqlite3.Row
rows = conn.execute(
"SELECT id, source_host, issue_type, label, severity, started_at, bundled_at, entry_count, bundle_json "
"FROM received_bundles ORDER BY bundled_at DESC"
).fetchall()
conn.close()
return [_row_to_bundle(r) for r in rows]
def get_bundle(db_path: Path, bundle_id: str) -> ReceivedBundle | None:
conn = sqlite3.connect(str(db_path), timeout=30.0)
conn.execute("PRAGMA journal_mode=WAL")
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT * FROM received_bundles WHERE id = ?", (bundle_id,)
).fetchone()
conn.close()
return _row_to_bundle(row) if row else None