turnstone/app/services/discover.py
pyr0ball f0fbe245f0 feat: bundle PII sanitization, onboarding wizard, NL source addition (#51, #52, #53)
Bundle export (#51):
- _redact_text() with 5 compiled regex patterns (IPv4, email, user=, host=, password=)
- build_bundle(sanitize=False) — per-entry redaction at export time
- sent_bundles table tracks every outgoing export (GET and POST /send)
- GET /api/sent-bundles exposes history; SentBundle model added
- BundlesView: Received/Sent tabs, sanitized badge, 5-entry preview, re-download
- IncidentsView: Sanitize PII checkbox next to Send Bundle

Onboarding wizard (#52):
- app/services/discover.py: journald/Docker/file detection (best-effort, safe in containers)
- GET /api/setup/status, /discover, POST /api/setup/write (additive, appends to existing)
- SetupWizard.vue: 3-step Detect → Select → Confirm
  - Step 1 shows grouped summary (journald/file/docker counts)
  - Step 2: collapsible groups with All/None section toggles
    - journald + file: pre-selected; docker: collapsed, none pre-selected
  - Step 3: YAML preview before write
- SourcesView: shows wizard on first run; Add Source button reuses it

NL source addition (#53):
- app/services/nl_source.py: keyword shortcut (13 well-known apps) + LLM fallback
- POST /api/setup/interpret: keyword → LLM → null (graceful fallback)
- NL field in wizard step 2; manual form shown when interpretation fails
- Added sources appear in grouped list immediately
2026-05-29 14:14:28 -07:00

173 lines
6.5 KiB
Python

"""Environment auto-discovery for the onboarding wizard.
All checks are best-effort — every function returns an empty list on failure
so the wizard degrades gracefully in containers, VMs, and minimal environments.
"""
from __future__ import annotations
import json
import logging
import os
import shutil
import subprocess
from pathlib import Path
from typing import Any
logger = logging.getLogger(__name__)
# Common log file candidates: (id, path, description)
_KNOWN_PATHS: list[tuple[str, str, str]] = [
("syslog", "/var/log/syslog", "System syslog (Debian/Ubuntu)"),
("syslog", "/var/log/messages", "System messages (RHEL/Rocky)"),
("auth", "/var/log/auth.log", "Auth log"),
("kern", "/var/log/kern.log", "Kernel log"),
("nginx-access", "/var/log/nginx/access.log", "Nginx access log"),
("nginx-error", "/var/log/nginx/error.log", "Nginx error log"),
("apache", "/var/log/apache2/access.log", "Apache access log"),
("apache-error", "/var/log/apache2/error.log", "Apache error log"),
("caddy", "/var/log/caddy/access.log", "Caddy access log"),
("docker-daemon","/var/log/docker.log", "Docker daemon log"),
("fail2ban", "/var/log/fail2ban.log", "Fail2ban log"),
("ufw", "/var/log/ufw.log", "UFW firewall log"),
]
def _run(cmd: list[str], timeout: float = 5.0) -> str | None:
"""Run a command and return stdout, or None on any error."""
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
return result.stdout if result.returncode == 0 else None
except Exception:
return None
def discover_journald() -> list[dict[str, Any]]:
"""Return a journald source candidate if journalctl is available."""
if not shutil.which("journalctl"):
return []
hostname = _run(["hostname"]) or "localhost"
hostname = hostname.strip()
return [{
"type": "journald",
"id": f"journal:{hostname}",
"label": f"System journal ({hostname})",
"description": "All systemd journal output from this host",
"available": True,
}]
def discover_docker() -> list[dict[str, Any]]:
"""Return Docker container candidates if Docker is running."""
for runtime in ("docker", "podman"):
if not shutil.which(runtime):
continue
out = _run([runtime, "ps", "--format", "{{json .}}"])
if out is None:
continue
containers = []
for line in out.splitlines():
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
name = obj.get("Names") or obj.get("Name") or obj.get("ID", "unknown")
# podman returns a list for Names
if isinstance(name, list):
name = name[0] if name else "unknown"
name = name.lstrip("/")
containers.append({
"type": "docker",
"id": f"{runtime}:{name}",
"label": f"{runtime.capitalize()}{name}",
"description": f"Container log stream for {name}",
"container": name,
"runtime": runtime,
"available": True,
})
except (json.JSONDecodeError, KeyError):
continue
if containers:
return containers
return []
def discover_files() -> list[dict[str, Any]]:
"""Return file-based source candidates for well-known log paths."""
found = []
seen_ids: set[str] = set()
for source_id, path, description in _KNOWN_PATHS:
if not os.path.exists(path):
continue
# deduplicate when both syslog and messages exist — take first match
if source_id in seen_ids:
continue
seen_ids.add(source_id)
found.append({
"type": "file",
"id": source_id,
"label": description,
"path": path,
"description": f"Read from {path}",
"available": True,
})
return found
def discover_all() -> dict[str, Any]:
"""Run all discovery checks and return a structured candidate list."""
candidates: list[dict[str, Any]] = []
candidates.extend(discover_journald())
candidates.extend(discover_docker())
candidates.extend(discover_files())
return {
"candidates": candidates,
"has_journald": any(c["type"] == "journald" for c in candidates),
"has_docker": any(c["type"] == "docker" for c in candidates),
"has_files": any(c["type"] == "file" for c in candidates),
}
def build_sources_yaml(selected: list[dict[str, Any]]) -> str:
"""Generate sources.yaml content from a list of selected candidates.
Each item must have: type, id, and type-specific fields (path, container, etc.).
"""
lines = [
"# Turnstone log sources — generated by the setup wizard.",
"# Edit this file to add, remove, or modify sources.",
"sources:",
]
for src in selected:
src_type = src.get("type", "file")
src_id = src.get("id", "unknown")
if src_type == "journald":
unit = src.get("unit")
lines.append(f" - id: {src_id}")
lines.append(f" type: journald")
if unit:
lines.append(f" unit: {unit}")
elif src_type == "docker":
runtime = src.get("runtime", "docker")
container = src.get("container", src_id.split(":")[-1])
lines.append(f" - id: {src_id}")
lines.append(f" type: docker")
lines.append(f" runtime: {runtime}")
lines.append(f" container: {container}")
else:
path = src.get("path", "")
lines.append(f" - id: {src_id}")
lines.append(f" path: {path}")
return "\n".join(lines) + "\n"
def validate_source(src: dict[str, Any]) -> str | None:
"""Return an error string if the source definition is invalid, else None."""
if not src.get("id"):
return "Source is missing 'id'"
src_type = src.get("type", "file")
if src_type == "file" and not src.get("path"):
return f"File source '{src['id']}' is missing 'path'"
if src_type == "docker" and not src.get("container"):
return f"Docker source '{src['id']}' is missing 'container'"
return None