- Add app/db/ abstraction layer: Backend enum, DbConn wrapper, dialect helper (q() for ? vs %s paramstyle), get_conn(), tenant_id() - Auto-detect backend from DATABASE_URL; SQLite remains default when unset — no config change for local deployments - Add tenant_id column to all three logical DBs (main, context, incidents); idempotent ALTER TABLE migration runs before schema scripts on existing DBs - All INSERTs inject tenant_id; SELECTs use (tenant_id = ? OR tenant_id = '') for backward compat with pre-namespacing rows - Add docker-compose.yml with named volume turnstone_pgdata (survives rebuilds) and optional external Postgres support via DATABASE_URL override - Add scripts/migrate_sqlite_to_postgres.py — one-shot idempotent migration for existing SQLite data; ON CONFLICT DO NOTHING for safe re-runs - Fix SSH glean path in pipeline.py to use ensure_schema + get_conn (was still using raw sqlite3.connect + old _SCHEMA without tenant_id) - Fix FTS5 JOIN ambiguity: qualify repeat_count as f.repeat_count in search - Update all tests to use ensure_*_schema fixtures; add row_factory where needed - 394/394 tests passing Closes: #42 Closes: #50
279 lines
9.7 KiB
Python
279 lines
9.7 KiB
Python
"""CRUD operations for user-tagged incidents and received log bundles."""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
import uuid
|
|
from pathlib import Path
|
|
|
|
from app.db import get_conn, resolve_tenant_id
|
|
from app.glean.base import now_iso
|
|
from app.services.models import Incident, ReceivedBundle, SentBundle
|
|
from app.services.search import SearchResult, entries_in_window, search
|
|
|
|
_REDACT_PATTERNS: list[tuple[re.Pattern, str]] = [
|
|
(re.compile(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b"), "[IP]"),
|
|
(re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}"), "[EMAIL]"),
|
|
(re.compile(r"(?i)\b(user(?:name)?|uid)\s*[=:]\s*\S+"), r"\1=[USER]"),
|
|
(re.compile(r"(?i)\bhost\s*[=:]\s*\S+"), "host=[HOST]"),
|
|
(re.compile(r"(?i)\bpassword\s*[=:]\s*\S+"), "password=[REDACTED]"),
|
|
]
|
|
|
|
|
|
def _redact_text(text: str) -> str:
|
|
for pattern, replacement in _REDACT_PATTERNS:
|
|
text = pattern.sub(replacement, text)
|
|
return text
|
|
|
|
|
|
def _row_to_incident(row) -> Incident:
|
|
return Incident(
|
|
id=row["id"],
|
|
label=row["label"],
|
|
issue_type=row["issue_type"] if "issue_type" in row.keys() else "",
|
|
started_at=row["started_at"],
|
|
ended_at=row["ended_at"],
|
|
notes=row["notes"],
|
|
created_at=row["created_at"],
|
|
severity=row["severity"],
|
|
)
|
|
|
|
|
|
def _row_to_bundle(row) -> ReceivedBundle:
|
|
return ReceivedBundle(
|
|
id=row["id"],
|
|
source_host=row["source_host"],
|
|
issue_type=row["issue_type"],
|
|
label=row["label"],
|
|
severity=row["severity"],
|
|
started_at=row["started_at"],
|
|
bundled_at=row["bundled_at"],
|
|
entry_count=row["entry_count"],
|
|
bundle_json=row["bundle_json"],
|
|
)
|
|
|
|
|
|
def create_incident(
|
|
db_path: Path,
|
|
label: str,
|
|
issue_type: str = "",
|
|
started_at: str | None = None,
|
|
ended_at: str | None = None,
|
|
notes: str = "",
|
|
severity: str = "medium",
|
|
) -> Incident:
|
|
tid = resolve_tenant_id()
|
|
incident = Incident(
|
|
id=str(uuid.uuid4()),
|
|
label=label,
|
|
issue_type=issue_type,
|
|
started_at=started_at,
|
|
ended_at=ended_at,
|
|
notes=notes,
|
|
created_at=now_iso(),
|
|
severity=severity,
|
|
)
|
|
with get_conn(db_path) as conn:
|
|
conn.execute(
|
|
"INSERT INTO incidents (id, tenant_id, label, issue_type, started_at, ended_at, notes, created_at, severity) "
|
|
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
(incident.id, tid, incident.label, incident.issue_type, incident.started_at,
|
|
incident.ended_at, incident.notes, incident.created_at, incident.severity),
|
|
)
|
|
conn.commit()
|
|
return incident
|
|
|
|
|
|
def list_incidents(db_path: Path) -> list[Incident]:
|
|
tid = resolve_tenant_id()
|
|
with get_conn(db_path) as conn:
|
|
rows = conn.execute(
|
|
"SELECT * FROM incidents WHERE (tenant_id = ? OR tenant_id = '') ORDER BY created_at DESC",
|
|
(tid,),
|
|
).fetchall()
|
|
return [_row_to_incident(r) for r in rows]
|
|
|
|
|
|
def get_incident(db_path: Path, incident_id: str) -> Incident | None:
|
|
tid = resolve_tenant_id()
|
|
with get_conn(db_path) as conn:
|
|
row = conn.execute(
|
|
"SELECT * FROM incidents WHERE id = ? AND (tenant_id = ? OR tenant_id = '')",
|
|
(incident_id, tid),
|
|
).fetchone()
|
|
return _row_to_incident(row) if row else None
|
|
|
|
|
|
def delete_incident(db_path: Path, incident_id: str) -> bool:
|
|
tid = resolve_tenant_id()
|
|
with get_conn(db_path) as conn:
|
|
cur = conn.execute(
|
|
"DELETE FROM incidents WHERE id = ? AND (tenant_id = ? OR tenant_id = '')",
|
|
(incident_id, tid),
|
|
)
|
|
conn.commit()
|
|
return cur.rowcount > 0
|
|
|
|
|
|
def get_incident_entries(
|
|
db_path: Path,
|
|
incident: Incident,
|
|
limit: int = 100,
|
|
) -> list[SearchResult]:
|
|
"""Return log entries associated with an incident's time window."""
|
|
half = limit // 2
|
|
common: dict = dict(since=incident.started_at, until=incident.ended_at, limit=half)
|
|
|
|
keyword_hits = search(db_path, query=incident.label, include_repeats=False, **common)
|
|
error_hits = search(db_path, query=incident.label, severity="ERROR", include_repeats=False, **common)
|
|
critical_hits = search(db_path, query=incident.label, severity="CRITICAL", include_repeats=False, **common)
|
|
|
|
seen: set[str] = set()
|
|
combined: list[SearchResult] = []
|
|
for entry in keyword_hits + critical_hits + error_hits:
|
|
if entry.entry_id not in seen:
|
|
seen.add(entry.entry_id)
|
|
combined.append(entry)
|
|
|
|
if len(combined) < limit:
|
|
for entry in entries_in_window(db_path, incident.started_at, incident.ended_at, severity="ERROR", limit=half):
|
|
if entry.entry_id not in seen:
|
|
seen.add(entry.entry_id)
|
|
combined.append(entry)
|
|
|
|
if len(combined) < limit:
|
|
for entry in entries_in_window(db_path, incident.started_at, incident.ended_at, limit=limit - len(combined)):
|
|
if entry.entry_id not in seen:
|
|
seen.add(entry.entry_id)
|
|
combined.append(entry)
|
|
|
|
combined.sort(key=lambda e: (e.timestamp_iso or "\xff", e.sequence))
|
|
return combined[:limit]
|
|
|
|
|
|
def build_bundle(
|
|
db_path: Path,
|
|
incident: Incident,
|
|
source_host: str,
|
|
limit: int = 200,
|
|
sanitize: bool = False,
|
|
) -> dict:
|
|
"""Assemble a labeled bundle: incident metadata + related log entries."""
|
|
entries = get_incident_entries(db_path, incident, limit=limit)
|
|
return {
|
|
"bundle_version": 1,
|
|
"source_host": source_host,
|
|
"bundled_at": now_iso(),
|
|
"sanitized": sanitize,
|
|
"incident": {
|
|
"id": incident.id,
|
|
"label": incident.label,
|
|
"issue_type": incident.issue_type,
|
|
"started_at": incident.started_at,
|
|
"ended_at": incident.ended_at,
|
|
"severity": incident.severity,
|
|
"notes": incident.notes,
|
|
},
|
|
"log_entries": [
|
|
{
|
|
"entry_id": e.entry_id,
|
|
"source_id": e.source_id,
|
|
"timestamp_iso": e.timestamp_iso,
|
|
"severity": e.severity,
|
|
"text": _redact_text(e.text) if sanitize else e.text,
|
|
"matched_patterns": list(e.matched_patterns),
|
|
}
|
|
for e in entries
|
|
],
|
|
}
|
|
|
|
|
|
def record_sent_bundle(db_path: Path, incident_id: str, bundle: dict, sanitized: bool) -> SentBundle:
|
|
"""Log an outgoing bundle export to the sent_bundles table."""
|
|
tid = resolve_tenant_id()
|
|
record = SentBundle(
|
|
id=str(uuid.uuid4()),
|
|
incident_id=incident_id,
|
|
exported_at=now_iso(),
|
|
sanitized=sanitized,
|
|
entry_count=len(bundle.get("log_entries", [])),
|
|
bundle_json=json.dumps(bundle),
|
|
)
|
|
with get_conn(db_path) as conn:
|
|
conn.execute(
|
|
"INSERT INTO sent_bundles (id, tenant_id, incident_id, exported_at, sanitized, entry_count, bundle_json) "
|
|
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
|
(record.id, tid, record.incident_id, record.exported_at,
|
|
int(record.sanitized), record.entry_count, record.bundle_json),
|
|
)
|
|
conn.commit()
|
|
return record
|
|
|
|
|
|
def list_sent_bundles(db_path: Path) -> list[SentBundle]:
|
|
tid = resolve_tenant_id()
|
|
with get_conn(db_path) as conn:
|
|
rows = conn.execute(
|
|
"SELECT id, incident_id, exported_at, sanitized, entry_count, bundle_json "
|
|
"FROM sent_bundles WHERE (tenant_id = ? OR tenant_id = '') ORDER BY exported_at DESC",
|
|
(tid,),
|
|
).fetchall()
|
|
return [
|
|
SentBundle(
|
|
id=r["id"],
|
|
incident_id=r["incident_id"],
|
|
exported_at=r["exported_at"],
|
|
sanitized=bool(r["sanitized"]),
|
|
entry_count=r["entry_count"],
|
|
bundle_json=r["bundle_json"],
|
|
)
|
|
for r in rows
|
|
]
|
|
|
|
|
|
def store_bundle(db_path: Path, bundle: dict) -> ReceivedBundle:
|
|
"""Store an incoming bundle from a remote Turnstone instance."""
|
|
tid = resolve_tenant_id()
|
|
inc = bundle.get("incident", {})
|
|
record = ReceivedBundle(
|
|
id=str(uuid.uuid4()),
|
|
source_host=bundle.get("source_host", "unknown"),
|
|
issue_type=inc.get("issue_type", ""),
|
|
label=inc.get("label", ""),
|
|
severity=inc.get("severity", "medium"),
|
|
started_at=inc.get("started_at"),
|
|
bundled_at=bundle.get("bundled_at", now_iso()),
|
|
entry_count=len(bundle.get("log_entries", [])),
|
|
bundle_json=json.dumps(bundle),
|
|
)
|
|
with get_conn(db_path) as conn:
|
|
conn.execute(
|
|
"INSERT INTO received_bundles "
|
|
"(id, tenant_id, source_host, issue_type, label, severity, started_at, bundled_at, entry_count, bundle_json) "
|
|
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
(record.id, tid, record.source_host, record.issue_type, record.label,
|
|
record.severity, record.started_at, record.bundled_at, record.entry_count, record.bundle_json),
|
|
)
|
|
conn.commit()
|
|
return record
|
|
|
|
|
|
def list_bundles(db_path: Path) -> list[ReceivedBundle]:
|
|
tid = resolve_tenant_id()
|
|
with get_conn(db_path) as conn:
|
|
rows = conn.execute(
|
|
"SELECT id, source_host, issue_type, label, severity, started_at, bundled_at, entry_count, bundle_json "
|
|
"FROM received_bundles WHERE (tenant_id = ? OR tenant_id = '') ORDER BY bundled_at DESC",
|
|
(tid,),
|
|
).fetchall()
|
|
return [_row_to_bundle(r) for r in rows]
|
|
|
|
|
|
def get_bundle(db_path: Path, bundle_id: str) -> ReceivedBundle | None:
|
|
tid = resolve_tenant_id()
|
|
with get_conn(db_path) as conn:
|
|
row = conn.execute(
|
|
"SELECT * FROM received_bundles WHERE id = ? AND (tenant_id = ? OR tenant_id = '')",
|
|
(bundle_id, tid),
|
|
).fetchone()
|
|
return _row_to_bundle(row) if row else None
|