turnstone/app/services/incidents.py
pyr0ball e543ab70f7 feat: dual-backend SQLite/Postgres + multi-tenant source namespacing
- Add app/db/ abstraction layer: Backend enum, DbConn wrapper,
  dialect helper (q() for ? vs %s paramstyle), get_conn(), tenant_id()
- Auto-detect backend from DATABASE_URL; SQLite remains default when
  unset — no config change for local deployments
- Add tenant_id column to all three logical DBs (main, context, incidents);
  idempotent ALTER TABLE migration runs before schema scripts on existing DBs
- All INSERTs inject tenant_id; SELECTs use (tenant_id = ? OR tenant_id = '')
  for backward compat with pre-namespacing rows
- Add docker-compose.yml with named volume turnstone_pgdata (survives rebuilds)
  and optional external Postgres support via DATABASE_URL override
- Add scripts/migrate_sqlite_to_postgres.py — one-shot idempotent migration
  for existing SQLite data; ON CONFLICT DO NOTHING for safe re-runs
- Fix SSH glean path in pipeline.py to use ensure_schema + get_conn
  (was still using raw sqlite3.connect + old _SCHEMA without tenant_id)
- Fix FTS5 JOIN ambiguity: qualify repeat_count as f.repeat_count in search
- Update all tests to use ensure_*_schema fixtures; add row_factory where needed
- 394/394 tests passing

Closes: #42
Closes: #50
2026-06-08 08:37:54 -07:00

279 lines
9.7 KiB
Python

"""CRUD operations for user-tagged incidents and received log bundles."""
from __future__ import annotations
import json
import re
import uuid
from pathlib import Path
from app.db import get_conn, resolve_tenant_id
from app.glean.base import now_iso
from app.services.models import Incident, ReceivedBundle, SentBundle
from app.services.search import SearchResult, entries_in_window, search
_REDACT_PATTERNS: list[tuple[re.Pattern, str]] = [
(re.compile(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b"), "[IP]"),
(re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}"), "[EMAIL]"),
(re.compile(r"(?i)\b(user(?:name)?|uid)\s*[=:]\s*\S+"), r"\1=[USER]"),
(re.compile(r"(?i)\bhost\s*[=:]\s*\S+"), "host=[HOST]"),
(re.compile(r"(?i)\bpassword\s*[=:]\s*\S+"), "password=[REDACTED]"),
]
def _redact_text(text: str) -> str:
for pattern, replacement in _REDACT_PATTERNS:
text = pattern.sub(replacement, text)
return text
def _row_to_incident(row) -> Incident:
return Incident(
id=row["id"],
label=row["label"],
issue_type=row["issue_type"] if "issue_type" in row.keys() else "",
started_at=row["started_at"],
ended_at=row["ended_at"],
notes=row["notes"],
created_at=row["created_at"],
severity=row["severity"],
)
def _row_to_bundle(row) -> ReceivedBundle:
return ReceivedBundle(
id=row["id"],
source_host=row["source_host"],
issue_type=row["issue_type"],
label=row["label"],
severity=row["severity"],
started_at=row["started_at"],
bundled_at=row["bundled_at"],
entry_count=row["entry_count"],
bundle_json=row["bundle_json"],
)
def create_incident(
db_path: Path,
label: str,
issue_type: str = "",
started_at: str | None = None,
ended_at: str | None = None,
notes: str = "",
severity: str = "medium",
) -> Incident:
tid = resolve_tenant_id()
incident = Incident(
id=str(uuid.uuid4()),
label=label,
issue_type=issue_type,
started_at=started_at,
ended_at=ended_at,
notes=notes,
created_at=now_iso(),
severity=severity,
)
with get_conn(db_path) as conn:
conn.execute(
"INSERT INTO incidents (id, tenant_id, label, issue_type, started_at, ended_at, notes, created_at, severity) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
(incident.id, tid, incident.label, incident.issue_type, incident.started_at,
incident.ended_at, incident.notes, incident.created_at, incident.severity),
)
conn.commit()
return incident
def list_incidents(db_path: Path) -> list[Incident]:
tid = resolve_tenant_id()
with get_conn(db_path) as conn:
rows = conn.execute(
"SELECT * FROM incidents WHERE (tenant_id = ? OR tenant_id = '') ORDER BY created_at DESC",
(tid,),
).fetchall()
return [_row_to_incident(r) for r in rows]
def get_incident(db_path: Path, incident_id: str) -> Incident | None:
tid = resolve_tenant_id()
with get_conn(db_path) as conn:
row = conn.execute(
"SELECT * FROM incidents WHERE id = ? AND (tenant_id = ? OR tenant_id = '')",
(incident_id, tid),
).fetchone()
return _row_to_incident(row) if row else None
def delete_incident(db_path: Path, incident_id: str) -> bool:
tid = resolve_tenant_id()
with get_conn(db_path) as conn:
cur = conn.execute(
"DELETE FROM incidents WHERE id = ? AND (tenant_id = ? OR tenant_id = '')",
(incident_id, tid),
)
conn.commit()
return cur.rowcount > 0
def get_incident_entries(
db_path: Path,
incident: Incident,
limit: int = 100,
) -> list[SearchResult]:
"""Return log entries associated with an incident's time window."""
half = limit // 2
common: dict = dict(since=incident.started_at, until=incident.ended_at, limit=half)
keyword_hits = search(db_path, query=incident.label, include_repeats=False, **common)
error_hits = search(db_path, query=incident.label, severity="ERROR", include_repeats=False, **common)
critical_hits = search(db_path, query=incident.label, severity="CRITICAL", include_repeats=False, **common)
seen: set[str] = set()
combined: list[SearchResult] = []
for entry in keyword_hits + critical_hits + error_hits:
if entry.entry_id not in seen:
seen.add(entry.entry_id)
combined.append(entry)
if len(combined) < limit:
for entry in entries_in_window(db_path, incident.started_at, incident.ended_at, severity="ERROR", limit=half):
if entry.entry_id not in seen:
seen.add(entry.entry_id)
combined.append(entry)
if len(combined) < limit:
for entry in entries_in_window(db_path, incident.started_at, incident.ended_at, limit=limit - len(combined)):
if entry.entry_id not in seen:
seen.add(entry.entry_id)
combined.append(entry)
combined.sort(key=lambda e: (e.timestamp_iso or "\xff", e.sequence))
return combined[:limit]
def build_bundle(
db_path: Path,
incident: Incident,
source_host: str,
limit: int = 200,
sanitize: bool = False,
) -> dict:
"""Assemble a labeled bundle: incident metadata + related log entries."""
entries = get_incident_entries(db_path, incident, limit=limit)
return {
"bundle_version": 1,
"source_host": source_host,
"bundled_at": now_iso(),
"sanitized": sanitize,
"incident": {
"id": incident.id,
"label": incident.label,
"issue_type": incident.issue_type,
"started_at": incident.started_at,
"ended_at": incident.ended_at,
"severity": incident.severity,
"notes": incident.notes,
},
"log_entries": [
{
"entry_id": e.entry_id,
"source_id": e.source_id,
"timestamp_iso": e.timestamp_iso,
"severity": e.severity,
"text": _redact_text(e.text) if sanitize else e.text,
"matched_patterns": list(e.matched_patterns),
}
for e in entries
],
}
def record_sent_bundle(db_path: Path, incident_id: str, bundle: dict, sanitized: bool) -> SentBundle:
"""Log an outgoing bundle export to the sent_bundles table."""
tid = resolve_tenant_id()
record = SentBundle(
id=str(uuid.uuid4()),
incident_id=incident_id,
exported_at=now_iso(),
sanitized=sanitized,
entry_count=len(bundle.get("log_entries", [])),
bundle_json=json.dumps(bundle),
)
with get_conn(db_path) as conn:
conn.execute(
"INSERT INTO sent_bundles (id, tenant_id, incident_id, exported_at, sanitized, entry_count, bundle_json) "
"VALUES (?, ?, ?, ?, ?, ?, ?)",
(record.id, tid, record.incident_id, record.exported_at,
int(record.sanitized), record.entry_count, record.bundle_json),
)
conn.commit()
return record
def list_sent_bundles(db_path: Path) -> list[SentBundle]:
tid = resolve_tenant_id()
with get_conn(db_path) as conn:
rows = conn.execute(
"SELECT id, incident_id, exported_at, sanitized, entry_count, bundle_json "
"FROM sent_bundles WHERE (tenant_id = ? OR tenant_id = '') ORDER BY exported_at DESC",
(tid,),
).fetchall()
return [
SentBundle(
id=r["id"],
incident_id=r["incident_id"],
exported_at=r["exported_at"],
sanitized=bool(r["sanitized"]),
entry_count=r["entry_count"],
bundle_json=r["bundle_json"],
)
for r in rows
]
def store_bundle(db_path: Path, bundle: dict) -> ReceivedBundle:
"""Store an incoming bundle from a remote Turnstone instance."""
tid = resolve_tenant_id()
inc = bundle.get("incident", {})
record = ReceivedBundle(
id=str(uuid.uuid4()),
source_host=bundle.get("source_host", "unknown"),
issue_type=inc.get("issue_type", ""),
label=inc.get("label", ""),
severity=inc.get("severity", "medium"),
started_at=inc.get("started_at"),
bundled_at=bundle.get("bundled_at", now_iso()),
entry_count=len(bundle.get("log_entries", [])),
bundle_json=json.dumps(bundle),
)
with get_conn(db_path) as conn:
conn.execute(
"INSERT INTO received_bundles "
"(id, tenant_id, source_host, issue_type, label, severity, started_at, bundled_at, entry_count, bundle_json) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
(record.id, tid, record.source_host, record.issue_type, record.label,
record.severity, record.started_at, record.bundled_at, record.entry_count, record.bundle_json),
)
conn.commit()
return record
def list_bundles(db_path: Path) -> list[ReceivedBundle]:
tid = resolve_tenant_id()
with get_conn(db_path) as conn:
rows = conn.execute(
"SELECT id, source_host, issue_type, label, severity, started_at, bundled_at, entry_count, bundle_json "
"FROM received_bundles WHERE (tenant_id = ? OR tenant_id = '') ORDER BY bundled_at DESC",
(tid,),
).fetchall()
return [_row_to_bundle(r) for r in rows]
def get_bundle(db_path: Path, bundle_id: str) -> ReceivedBundle | None:
tid = resolve_tenant_id()
with get_conn(db_path) as conn:
row = conn.execute(
"SELECT * FROM received_bundles WHERE id = ? AND (tenant_id = ? OR tenant_id = '')",
(bundle_id, tid),
).fetchone()
return _row_to_bundle(row) if row else None