feat: bundle PII sanitization, onboarding wizard, NL source addition (#51, #52, #53)

Bundle export (#51): - _redact_text() with 5 compiled regex patterns (IPv4, email, user=, host=, password=) - build_bundle(sanitize=False) — per-entry redaction at export time - sent_bundles table tracks every outgoing export (GET and POST /send) - GET /api/sent-bundles exposes history; SentBundle model added - BundlesView: Received/Sent tabs, sanitized badge, 5-entry preview, re-download - IncidentsView: Sanitize PII checkbox next to Send Bundle Onboarding wizard (#52): - app/services/discover.py: journald/Docker/file detection (best-effort, safe in containers) - GET /api/setup/status, /discover, POST /api/setup/write (additive, appends to existing) - SetupWizard.vue: 3-step Detect → Select → Confirm - Step 1 shows grouped summary (journald/file/docker counts) - Step 2: collapsible groups with All/None section toggles - journald + file: pre-selected; docker: collapsed, none pre-selected - Step 3: YAML preview before write - SourcesView: shows wizard on first run; Add Source button reuses it NL source addition (#53): - app/services/nl_source.py: keyword shortcut (13 well-known apps) + LLM fallback - POST /api/setup/interpret: keyword → LLM → null (graceful fallback) - NL field in wizard step 2; manual form shown when interpretation fails - Added sources appear in grouped list immediately
2026-05-29 14:14:28 -07:00 · 2026-05-29 14:14:28 -07:00 · f0fbe245f0
commit f0fbe245f0
parent ae922ef6c6
11 changed files with 1381 additions and 90 deletions
--- a/app/glean/pipeline.py
+++ b/app/glean/pipeline.py
@ -72,6 +72,17 @@ CREATE TABLE IF NOT EXISTS received_bundles (
 CREATE INDEX IF NOT EXISTS idx_bundles_bundled ON received_bundles(bundled_at);
 CREATE INDEX IF NOT EXISTS idx_bundles_type    ON received_bundles(issue_type);

+CREATE TABLE IF NOT EXISTS sent_bundles (
+    id           TEXT PRIMARY KEY,
+    incident_id  TEXT NOT NULL,
+    exported_at  TEXT NOT NULL,
+    sanitized    INTEGER NOT NULL DEFAULT 0,
+    entry_count  INTEGER NOT NULL DEFAULT 0,
+    bundle_json  TEXT NOT NULL
+);
+CREATE INDEX IF NOT EXISTS idx_sent_bundles_incident ON sent_bundles(incident_id);
+CREATE INDEX IF NOT EXISTS idx_sent_bundles_time     ON sent_bundles(exported_at);
+
 -- context tables moved to ensure_context_schema() / CONTEXT_DB_PATH
 -- kept here as no-ops so legacy single-file deployments still work
 CREATE TABLE IF NOT EXISTS context_facts (
--- a/app/rest.py
+++ b/app/rest.py
@ -10,7 +10,9 @@ import asyncio
 import dataclasses
 import hmac
 import json
+import logging
 import os
+import time

 # Offline mode: must be set before any HuggingFace library is imported.
 # Both flags must agree — HF hub and transformers each check independently.
@ -48,6 +50,8 @@ from app.services.blocklist import (
    update_candidate_status,
 )
 from app.services.pihole import PiholeClient
+from app.services.discover import discover_all, build_sources_yaml, validate_source
+from app.services.nl_source import interpret as _nl_interpret
 from app.services.incidents import (
    build_bundle,
    create_incident,
@ -57,6 +61,8 @@ from app.services.incidents import (
    get_incident_entries,
    list_bundles,
    list_incidents,
+    list_sent_bundles,
+    record_sent_bundle,
    store_bundle,
 )
 from app.services.search import (
@ -123,6 +129,10 @@ _compiled_patterns: list = []
@asynccontextmanager
 async def _lifespan(app: FastAPI):
    global _compiled_patterns
+    # Route turnstone.audit through uvicorn's own handler so it appears in api.log.
+    _audit_log.setLevel(logging.INFO)
+    for h in logging.getLogger("uvicorn.error").handlers:
+        _audit_log.addHandler(h)
    ensure_schema(DB_PATH)
    ensure_context_schema(CONTEXT_DB_PATH)
    _compiled_patterns = load_compiled_patterns(PATTERN_FILE)
@ -172,6 +182,27 @@ app.add_middleware(
    allow_headers=["*"],
 )

+_audit_log = logging.getLogger("turnstone.audit")
+
+
+@app.middleware("http")
+async def _audit_middleware(request: Request, call_next):
+    """Log every API request: timestamp, method, path, query (no body, no response data)."""
+    t0 = time.monotonic()
+    response = await call_next(request)
+    if request.url.path.startswith("/turnstone/api"):
+        ms = int((time.monotonic() - t0) * 1000)
+        qs = f"?{request.url.query}" if request.url.query else ""
+        _audit_log.info(
+            "%s %s%s %d %dms",
+            request.method,
+            request.url.path,
+            qs,
+            response.status_code,
+            ms,
+        )
+    return response
+

 _PREFS_DEFAULTS: dict = {
    "entry_point_style": "topbar",
@ -643,6 +674,96 @@ class BatchGleanRequest(BaseModel):
    entries: list[BatchEntry]


+# ── Setup / Onboarding wizard ──────────────────────────────────────────────
+
+class SetupWriteBody(BaseModel):
+    sources: list[dict]
+
+
+class NLInterpretBody(BaseModel):
+    description: str
+
+
+@router.get("/api/setup/status")
+def setup_status() -> dict:
+    """Return whether sources.yaml exists (wizard completion gate)."""
+    sources_file = PATTERN_DIR / "sources.yaml"
+    return {"configured": sources_file.exists()}
+
+
+@router.get("/api/setup/discover")
+def setup_discover() -> dict:
+    """Auto-detect available log sources on this host."""
+    return discover_all()
+
+
+@router.post("/api/setup/write")
+def setup_write(body: SetupWriteBody, background_tasks: BackgroundTasks) -> dict:
+    """Validate and write sources.yaml from a list of selected source definitions.
+
+    Each source is validated before writing.  An existing sources.yaml is
+    appended to, not overwritten, so post-setup additions are non-destructive.
+    """
+    errors = []
+    for src in body.sources:
+        err = validate_source(src)
+        if err:
+            errors.append(err)
+    if errors:
+        raise HTTPException(status_code=422, detail="; ".join(errors))
+
+    sources_file = PATTERN_DIR / "sources.yaml"
+    if sources_file.exists():
+        # Append to existing file: read current sources, merge, rewrite.
+        import yaml as _yaml
+        with open(sources_file) as f:
+            current = _yaml.safe_load(f) or {}
+        existing_ids = {s.get("id") for s in current.get("sources", [])}
+        new_sources = [s for s in body.sources if s.get("id") not in existing_ids]
+        if not new_sources:
+            return {"written": 0, "skipped": len(body.sources), "message": "All sources already configured"}
+        all_sources = current.get("sources", []) + new_sources
+        content = build_sources_yaml(all_sources)
+    else:
+        content = build_sources_yaml(body.sources)
+        new_sources = body.sources
+
+    PATTERN_DIR.mkdir(parents=True, exist_ok=True)
+    sources_file.write_text(content)
+
+    # Trigger a background glean of new sources
+    if GLEAN_INTERVAL > 0:
+        background_tasks.add_task(
+            _glean_file,
+            sources_file, DB_PATH, PATTERN_FILE, 1,
+        )
+
+    return {"written": len(new_sources), "skipped": len(body.sources) - len(new_sources)}
+
+
+@router.post("/api/setup/interpret")
+def setup_interpret(body: NLInterpretBody) -> dict:
+    """Interpret a plain-English source description into a SourceDefinition.
+
+    Uses a keyword lookup first (deterministic, no LLM needed), then falls
+    back to the configured LLM.  Returns null on failure so the UI can
+    show the manual form — never raises 500.
+    """
+    prefs = _load_prefs()
+    result = _nl_interpret(
+        description=body.description,
+        llm_url=prefs.get("llm_url") or None,
+        llm_model=prefs.get("llm_model") or None,
+        api_key=prefs.get("llm_api_key") or None,
+    )
+    if result is None:
+        return {"source": None, "fallback": True}
+    err = validate_source(result)
+    if err:
+        return {"source": None, "fallback": True, "validation_error": err}
+    return {"source": result, "fallback": False}
+
+
@router.post("/api/glean/batch")
 def glean_batch(payload: BatchGleanRequest, background_tasks: BackgroundTasks) -> dict:
    """Accept pre-parsed log entries from a remote Turnstone instance (submission protocol).
@ -839,21 +960,30 @@ def delete_incident_endpoint(incident_id: str) -> dict:


@router.get("/api/incidents/{incident_id}/bundle")
-def get_incident_bundle(incident_id: str) -> dict:
+def get_incident_bundle(incident_id: str, sanitize: bool = False) -> dict:
    incident = get_incident(DB_PATH, incident_id)
    if not incident:
        raise HTTPException(status_code=404, detail="Incident not found")
-    return build_bundle(DB_PATH, incident, source_host=SOURCE_HOST)
+    bundle = build_bundle(DB_PATH, incident, source_host=SOURCE_HOST, sanitize=sanitize)
+    record_sent_bundle(DB_PATH, incident_id, bundle, sanitized=sanitize)
+    return bundle
+
+
+@router.get("/api/sent-bundles")
+def list_sent_bundles_endpoint() -> dict:
+    bundles = list_sent_bundles(DB_PATH)
+    return {"bundles": [dataclasses.asdict(b) for b in bundles]}


@router.post("/api/incidents/{incident_id}/send")
-def send_incident_bundle(incident_id: str) -> dict:
+def send_incident_bundle(incident_id: str, sanitize: bool = False) -> dict:
    if not BUNDLE_ENDPOINT:
        raise HTTPException(status_code=503, detail="TURNSTONE_BUNDLE_ENDPOINT not configured")
    incident = get_incident(DB_PATH, incident_id)
    if not incident:
        raise HTTPException(status_code=404, detail="Incident not found")
-    bundle = build_bundle(DB_PATH, incident, source_host=SOURCE_HOST)
+    bundle = build_bundle(DB_PATH, incident, source_host=SOURCE_HOST, sanitize=sanitize)
+    record_sent_bundle(DB_PATH, incident_id, bundle, sanitized=sanitize)
    payload = json.dumps(bundle).encode()
    req = urllib.request.Request(
        BUNDLE_ENDPOINT,
--- a/app/services/discover.py
+++ b/app/services/discover.py
@ -0,0 +1,173 @@
+"""Environment auto-discovery for the onboarding wizard.
+
+All checks are best-effort — every function returns an empty list on failure
+so the wizard degrades gracefully in containers, VMs, and minimal environments.
+"""
+from __future__ import annotations
+
+import json
+import logging
+import os
+import shutil
+import subprocess
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+# Common log file candidates: (id, path, description)
+_KNOWN_PATHS: list[tuple[str, str, str]] = [
+    ("syslog",       "/var/log/syslog",              "System syslog (Debian/Ubuntu)"),
+    ("syslog",       "/var/log/messages",             "System messages (RHEL/Rocky)"),
+    ("auth",         "/var/log/auth.log",             "Auth log"),
+    ("kern",         "/var/log/kern.log",             "Kernel log"),
+    ("nginx-access", "/var/log/nginx/access.log",     "Nginx access log"),
+    ("nginx-error",  "/var/log/nginx/error.log",      "Nginx error log"),
+    ("apache",       "/var/log/apache2/access.log",   "Apache access log"),
+    ("apache-error", "/var/log/apache2/error.log",    "Apache error log"),
+    ("caddy",        "/var/log/caddy/access.log",     "Caddy access log"),
+    ("docker-daemon","/var/log/docker.log",           "Docker daemon log"),
+    ("fail2ban",     "/var/log/fail2ban.log",         "Fail2ban log"),
+    ("ufw",          "/var/log/ufw.log",              "UFW firewall log"),
+]
+
+
+def _run(cmd: list[str], timeout: float = 5.0) -> str | None:
+    """Run a command and return stdout, or None on any error."""
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
+        return result.stdout if result.returncode == 0 else None
+    except Exception:
+        return None
+
+
+def discover_journald() -> list[dict[str, Any]]:
+    """Return a journald source candidate if journalctl is available."""
+    if not shutil.which("journalctl"):
+        return []
+    hostname = _run(["hostname"]) or "localhost"
+    hostname = hostname.strip()
+    return [{
+        "type": "journald",
+        "id": f"journal:{hostname}",
+        "label": f"System journal ({hostname})",
+        "description": "All systemd journal output from this host",
+        "available": True,
+    }]
+
+
+def discover_docker() -> list[dict[str, Any]]:
+    """Return Docker container candidates if Docker is running."""
+    for runtime in ("docker", "podman"):
+        if not shutil.which(runtime):
+            continue
+        out = _run([runtime, "ps", "--format", "{{json .}}"])
+        if out is None:
+            continue
+        containers = []
+        for line in out.splitlines():
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                obj = json.loads(line)
+                name = obj.get("Names") or obj.get("Name") or obj.get("ID", "unknown")
+                # podman returns a list for Names
+                if isinstance(name, list):
+                    name = name[0] if name else "unknown"
+                name = name.lstrip("/")
+                containers.append({
+                    "type": "docker",
+                    "id": f"{runtime}:{name}",
+                    "label": f"{runtime.capitalize()} — {name}",
+                    "description": f"Container log stream for {name}",
+                    "container": name,
+                    "runtime": runtime,
+                    "available": True,
+                })
+            except (json.JSONDecodeError, KeyError):
+                continue
+        if containers:
+            return containers
+    return []
+
+
+def discover_files() -> list[dict[str, Any]]:
+    """Return file-based source candidates for well-known log paths."""
+    found = []
+    seen_ids: set[str] = set()
+    for source_id, path, description in _KNOWN_PATHS:
+        if not os.path.exists(path):
+            continue
+        # deduplicate when both syslog and messages exist — take first match
+        if source_id in seen_ids:
+            continue
+        seen_ids.add(source_id)
+        found.append({
+            "type": "file",
+            "id": source_id,
+            "label": description,
+            "path": path,
+            "description": f"Read from {path}",
+            "available": True,
+        })
+    return found
+
+
+def discover_all() -> dict[str, Any]:
+    """Run all discovery checks and return a structured candidate list."""
+    candidates: list[dict[str, Any]] = []
+    candidates.extend(discover_journald())
+    candidates.extend(discover_docker())
+    candidates.extend(discover_files())
+    return {
+        "candidates": candidates,
+        "has_journald": any(c["type"] == "journald" for c in candidates),
+        "has_docker":   any(c["type"] == "docker"   for c in candidates),
+        "has_files":    any(c["type"] == "file"      for c in candidates),
+    }
+
+
+def build_sources_yaml(selected: list[dict[str, Any]]) -> str:
+    """Generate sources.yaml content from a list of selected candidates.
+
+    Each item must have: type, id, and type-specific fields (path, container, etc.).
+    """
+    lines = [
+        "# Turnstone log sources — generated by the setup wizard.",
+        "# Edit this file to add, remove, or modify sources.",
+        "sources:",
+    ]
+    for src in selected:
+        src_type = src.get("type", "file")
+        src_id = src.get("id", "unknown")
+        if src_type == "journald":
+            unit = src.get("unit")
+            lines.append(f"  - id: {src_id}")
+            lines.append(f"    type: journald")
+            if unit:
+                lines.append(f"    unit: {unit}")
+        elif src_type == "docker":
+            runtime = src.get("runtime", "docker")
+            container = src.get("container", src_id.split(":")[-1])
+            lines.append(f"  - id: {src_id}")
+            lines.append(f"    type: docker")
+            lines.append(f"    runtime: {runtime}")
+            lines.append(f"    container: {container}")
+        else:
+            path = src.get("path", "")
+            lines.append(f"  - id: {src_id}")
+            lines.append(f"    path: {path}")
+    return "\n".join(lines) + "\n"
+
+
+def validate_source(src: dict[str, Any]) -> str | None:
+    """Return an error string if the source definition is invalid, else None."""
+    if not src.get("id"):
+        return "Source is missing 'id'"
+    src_type = src.get("type", "file")
+    if src_type == "file" and not src.get("path"):
+        return f"File source '{src['id']}' is missing 'path'"
+    if src_type == "docker" and not src.get("container"):
+        return f"Docker source '{src['id']}' is missing 'container'"
+    return None
--- a/app/services/incidents.py
+++ b/app/services/incidents.py
@ -2,14 +2,29 @@
 from __future__ import annotations

 import json
+import re
 import sqlite3
 import uuid
 from pathlib import Path

 from app.glean.base import now_iso
-from app.services.models import Incident, ReceivedBundle
+from app.services.models import Incident, ReceivedBundle, SentBundle
 from app.services.search import SearchResult, entries_in_window, search

+_REDACT_PATTERNS: list[tuple[re.Pattern, str]] = [
+    (re.compile(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b"), "[IP]"),
+    (re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}"), "[EMAIL]"),
+    (re.compile(r"(?i)\b(user(?:name)?|uid)\s*[=:]\s*\S+"), r"\1=[USER]"),
+    (re.compile(r"(?i)\bhost\s*[=:]\s*\S+"), "host=[HOST]"),
+    (re.compile(r"(?i)\bpassword\s*[=:]\s*\S+"), "password=[REDACTED]"),
+]
+
+
+def _redact_text(text: str) -> str:
+    for pattern, replacement in _REDACT_PATTERNS:
+        text = pattern.sub(replacement, text)
+    return text
+

 def _row_to_incident(row: sqlite3.Row) -> Incident:
    return Incident(
@ -142,6 +157,7 @@ def build_bundle(
    incident: Incident,
    source_host: str,
    limit: int = 200,
+    sanitize: bool = False,
 ) -> dict:
    """Assemble a labeled bundle: incident metadata + related log entries."""
    entries = get_incident_entries(db_path, incident, limit=limit)
@ -149,6 +165,7 @@ def build_bundle(
        "bundle_version": 1,
        "source_host": source_host,
        "bundled_at": now_iso(),
+        "sanitized": sanitize,
        "incident": {
            "id": incident.id,
            "label": incident.label,
@ -164,7 +181,7 @@ def build_bundle(
                "source_id": e.source_id,
                "timestamp_iso": e.timestamp_iso,
                "severity": e.severity,
-                "text": e.text,
+                "text": _redact_text(e.text) if sanitize else e.text,
                "matched_patterns": list(e.matched_patterns),
            }
            for e in entries
@ -172,6 +189,51 @@ def build_bundle(
    }


+def record_sent_bundle(db_path: Path, incident_id: str, bundle: dict, sanitized: bool) -> SentBundle:
+    """Log an outgoing bundle export to the sent_bundles table."""
+    record = SentBundle(
+        id=str(uuid.uuid4()),
+        incident_id=incident_id,
+        exported_at=now_iso(),
+        sanitized=sanitized,
+        entry_count=len(bundle.get("log_entries", [])),
+        bundle_json=json.dumps(bundle),
+    )
+    conn = sqlite3.connect(str(db_path), timeout=30.0)
+    conn.execute("PRAGMA journal_mode=WAL")
+    conn.execute(
+        "INSERT INTO sent_bundles (id, incident_id, exported_at, sanitized, entry_count, bundle_json) "
+        "VALUES (?, ?, ?, ?, ?, ?)",
+        (record.id, record.incident_id, record.exported_at, int(record.sanitized),
+         record.entry_count, record.bundle_json),
+    )
+    conn.commit()
+    conn.close()
+    return record
+
+
+def list_sent_bundles(db_path: Path) -> list[SentBundle]:
+    conn = sqlite3.connect(str(db_path), timeout=30.0)
+    conn.execute("PRAGMA journal_mode=WAL")
+    conn.row_factory = sqlite3.Row
+    rows = conn.execute(
+        "SELECT id, incident_id, exported_at, sanitized, entry_count, bundle_json "
+        "FROM sent_bundles ORDER BY exported_at DESC"
+    ).fetchall()
+    conn.close()
+    return [
+        SentBundle(
+            id=r["id"],
+            incident_id=r["incident_id"],
+            exported_at=r["exported_at"],
+            sanitized=bool(r["sanitized"]),
+            entry_count=r["entry_count"],
+            bundle_json=r["bundle_json"],
+        )
+        for r in rows
+    ]
+
+
 def store_bundle(db_path: Path, bundle: dict) -> ReceivedBundle:
    """Store an incoming bundle from a remote Turnstone instance."""
    inc = bundle.get("incident", {})
--- a/app/services/models.py
+++ b/app/services/models.py
@ -60,3 +60,15 @@ class ReceivedBundle:
    bundled_at: str
    entry_count: int
    bundle_json: str           # full bundle serialized as JSON string
+
+
+@dataclass(frozen=True)
+class SentBundle:
+    """A record of a bundle exported or sent from this instance."""
+
+    id: str
+    incident_id: str
+    exported_at: str
+    sanitized: bool
+    entry_count: int
+    bundle_json: str
--- a/app/services/nl_source.py
+++ b/app/services/nl_source.py
@ -0,0 +1,134 @@
+"""Natural-language log source interpretation (LLM path for #53).
+
+BSL-gated feature: the structured form fallback is MIT; the LLM interpretation
+requires the LLM service to be configured. The caller always validates the
+output against the source schema before writing anything.
+"""
+from __future__ import annotations
+
+import json
+import logging
+import re
+from typing import Any
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+_SYSTEM_PROMPT = """\
+You are a Turnstone log-source configuration assistant.
+The operator will describe a log source in plain English.
+Respond ONLY with a JSON object matching this schema — no prose, no markdown:
+
+{
+  "id":        "short-kebab-case identifier",
+  "type":      "file" | "journald" | "docker",
+  "path":      "/absolute/path  (file type only)",
+  "container": "container-name  (docker type only)",
+  "runtime":   "docker" | "podman"  (docker type only, default docker)",
+  "unit":      "service.service  (journald type only, omit for all-journal)",
+  "label":     "Human-readable name for the UI"
+}
+
+Rules:
+- For well-known apps (nginx, apache, caddy, sonarr, radarr, qbittorrent, plex, jellyfin),
+  use the conventional default log path.
+- If the operator mentions a Docker/Podman container, use type=docker.
+- If the operator mentions journald or a systemd service, use type=journald.
+- If uncertain, use type=file with the most likely path.
+- The "id" must be lowercase, hyphens only (no spaces, slashes, dots).
+- Never include trailing commas or comments in your JSON.
+"""
+
+# Well-known path lookup for common apps — used as a deterministic fallback
+_KNOWN_APPS: dict[str, dict[str, Any]] = {
+    "nginx":        {"id": "nginx-access",  "type": "file", "path": "/var/log/nginx/access.log"},
+    "apache":       {"id": "apache",        "type": "file", "path": "/var/log/apache2/access.log"},
+    "caddy":        {"id": "caddy",         "type": "file", "path": "/var/log/caddy/access.log"},
+    "sonarr":       {"id": "sonarr",        "type": "file", "path": "/var/log/sonarr/sonarr.0.txt"},
+    "radarr":       {"id": "radarr",        "type": "file", "path": "/var/log/radarr/radarr.0.txt"},
+    "qbittorrent":  {"id": "qbittorrent",   "type": "file", "path": "/var/log/qbittorrent/qbittorrent.log"},
+    "plex":         {"id": "plex",          "type": "file", "path": "/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Logs/Plex Media Server.log"},
+    "jellyfin":     {"id": "jellyfin",      "type": "file", "path": "/var/log/jellyfin/jellyfin.log"},
+    "syslog":       {"id": "syslog",        "type": "file", "path": "/var/log/syslog"},
+    "auth":         {"id": "auth",          "type": "file", "path": "/var/log/auth.log"},
+    "fail2ban":     {"id": "fail2ban",      "type": "file", "path": "/var/log/fail2ban.log"},
+    "docker":       {"id": "docker-daemon", "type": "file", "path": "/var/log/docker.log"},
+    "journal":      {"id": "journal",       "type": "journald"},
+    "journald":     {"id": "journal",       "type": "journald"},
+    "systemd":      {"id": "journal",       "type": "journald"},
+}
+
+
+def _keyword_match(description: str) -> dict[str, Any] | None:
+    """Try a simple keyword match before spending an LLM call."""
+    lower = description.lower()
+    for keyword, template in _KNOWN_APPS.items():
+        if keyword in lower:
+            result = dict(template)
+            result.setdefault("label", keyword.capitalize() + " log")
+            return result
+    return None
+
+
+def _extract_json(text: str) -> dict[str, Any] | None:
+    """Pull the first {...} block out of an LLM response."""
+    match = re.search(r"\{[^{}]+\}", text, re.DOTALL)
+    if not match:
+        return None
+    try:
+        return json.loads(match.group())
+    except json.JSONDecodeError:
+        return None
+
+
+def interpret(
+    description: str,
+    llm_url: str | None,
+    llm_model: str | None,
+    api_key: str | None = None,
+    timeout: float = 30.0,
+) -> dict[str, Any] | None:
+    """Interpret a natural-language source description.
+
+    Returns a source dict or None if interpretation fails.
+    The caller must validate the result with discover.validate_source()
+    before writing anything to disk.
+    """
+    # 1. Keyword shortcut — no LLM needed for well-known apps
+    kw = _keyword_match(description)
+    if kw:
+        logger.debug("NL source: keyword match for %r", description)
+        return kw
+
+    # 2. LLM path
+    if not llm_url or not llm_model:
+        logger.debug("NL source: no LLM configured, returning None")
+        return None
+
+    messages = [
+        {"role": "system", "content": _SYSTEM_PROMPT},
+        {"role": "user",   "content": description},
+    ]
+    headers = {"Content-Type": "application/json"}
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+
+    try:
+        resp = httpx.post(
+            f"{llm_url.rstrip('/')}/v1/chat/completions",
+            json={"model": llm_model, "messages": messages, "stream": False, "max_tokens": 256},
+            headers=headers,
+            timeout=timeout,
+        )
+        resp.raise_for_status()
+        content = resp.json()["choices"][0]["message"]["content"]
+        parsed = _extract_json(content)
+        if parsed:
+            parsed.setdefault("label", description[:60])
+            return parsed
+        logger.warning("NL source: could not extract JSON from LLM response")
+    except Exception as exc:
+        logger.warning("NL source: LLM call failed (%s): %s", type(exc).__name__, exc)
+
+    return None
--- a/docs/compliance/checklist.md
+++ b/docs/compliance/checklist.md
@ -0,0 +1,154 @@
+# Turnstone Compliance Checklist
+
+**Last reviewed:** 2026-05-28
+**Applies to:** All deployments handling log data in compliance-sensitive environments.
+
+Symbols: ✅ satisfied by code, ⚙️ operator action required, ⚠️ known limitation, 🔲 not implemented.
+
+---
+
+## Data Isolation
+
+### Source-level query isolation
+✅ **`source_filter` enforced on all log-returning endpoints.**
+Every endpoint that returns log entries accepts a `source` parameter. Both the FTS5 keyword search path and the time-window scan path apply `source_id LIKE ?` before returning results. No cross-source data leakage is possible through the API.
+
+Relevant code: `app/services/search.py` — `search()` and `entries_in_window()`.
+
+### FTS5 cross-source leakage
+✅ **FTS5 index includes `source_id` as an UNINDEXED column; all queries filter on it.**
+The virtual table schema stores `source_id` alongside each entry. Query functions always join back to the base table or filter the FTS result set by `source_id`. There is no full-corpus FTS path that ignores source.
+
+### SQLite file permissions
+⚙️ **Operator responsibility — not enforced by Turnstone.**
+Turnstone does not set file permissions on the database. Recommended posture for multi-user hosts:
+
+```bash
+# Restrict DB to the Turnstone process user only
+chmod 600 /devl/turnstone-cluster/data/turnstone.db
+chmod 600 /devl/turnstone-cluster/data/turnstone-context.db
+chown turnstone:turnstone /devl/turnstone-cluster/data/
+```
+
+Run Turnstone as a dedicated non-root user via systemd `User=turnstone`.
+
+---
+
+## Audit Logging
+
+### API query logging
+✅ **Implemented as FastAPI middleware (`turnstone.audit` logger).**
+Every request to `/turnstone/api/*` is logged at INFO level with:
+- Timestamp (from the logging handler)
+- HTTP method
+- Path + query string
+- Response status code
+- Request duration (ms)
+
+Body content is never logged. Example output:
+```
+2026-05-28 14:23:01 INFO turnstone.audit  GET /turnstone/api/diagnose/stream?source=heimdall-journal 200 1843ms
+```
+
+To capture audit logs to a separate file, configure the `turnstone.audit` logger in your logging config:
+```python
+# In your uvicorn startup or log config YAML:
+logging.getLogger("turnstone.audit").addHandler(
+    logging.FileHandler("/var/log/turnstone/audit.log")
+)
+```
+
+### Glean operation logging
+✅ **Glean scheduler logs source ID, entry count, and duration at INFO level.**
+Relevant logger: `app.tasks.glean_scheduler` — logs start, per-source stats, and errors.
+Log example:
+```
+INFO app.tasks.glean_scheduler  Batch glean complete in 12.4s — {'heimdall-journal': 847, 'plex': 12}
+```
+
+### Error logging
+✅ **Errors logged with source context but without PII in message fields.**
+Exception handlers in `rest.py` log at ERROR level with the endpoint path and error type. Raw log entry text is not included in error messages. Stack traces go to the `uvicorn.error` logger.
+
+---
+
+## LLM / PII Egress
+
+### Multi-agent pipeline (recommended path, `TURNSTONE_MULTI_AGENT_DIAGNOSE=true`)
+✅ **Raw log message text is NOT sent to the LLM.**
+Stage 5 (synthesizer) sends only:
+- The operator's query string
+- Timeline statistics (cluster counts, burst counts, gap counts — no entry text)
+- Hypothesis titles from Stage 3 (derived labels, not raw messages)
+- Runbook context from the operator's own uploaded documents
+
+No raw `MESSAGE` field content reaches the LLM in this path. Review: `app/services/diagnose/synthesizer.py`.
+
+### Legacy single-call path (`TURNSTONE_MULTI_AGENT_DIAGNOSE` unset or `false`)
+⚠️ **Raw log message text (truncated to 200 chars) IS sent to the LLM.**
+The legacy `summarize()` function in `app/services/llm.py` builds a prompt that includes up to 25 log entries with their `text` field (truncated). If log entries contain hostnames, usernames, IP addresses, or other PII, those values are included in the LLM call.
+
+**Operator action for PII-sensitive deployments:** Enable `TURNSTONE_MULTI_AGENT_DIAGNOSE=true` to use the pipeline path, which does not expose raw log text.
+
+### Avocet harvester (corpus export)
+✅ **Only pattern-tagged entries are exported; export can be disabled.**
+The harvester (`harvester/harvester.py`) only POSTs entries that matched at least one named pattern. It does not export the full corpus. Disable by leaving `TURNSTONE_SUBMIT_ENDPOINT` unset (the default).
+
+### External telemetry
+✅ **None.** Turnstone makes no calls to Sentry, Segment, Amplitude, or any analytics service. The only outbound network calls are:
+- Your configured `GPU_SERVER_URL` (LLM inference, operator-controlled)
+- HuggingFace Hub (model downloads — disable with `TURNSTONE_OFFLINE_MODE=1`)
+- SSH connections to configured remote log sources (operator-defined)
+
+---
+
+## Configuration Hardening
+
+For compliance deployments, set these in `.env`:
+
+```bash
+# Block HuggingFace network access (model weights pre-downloaded)
+TURNSTONE_OFFLINE_MODE=1
+
+# Require bearer token for all API calls
+TURNSTONE_API_KEY=<strong-random-token>
+
+# Use multi-agent pipeline (no raw log text to LLM)
+TURNSTONE_MULTI_AGENT_DIAGNOSE=true
+
+# Disable Avocet corpus push if not needed
+# (leave TURNSTONE_SUBMIT_ENDPOINT unset)
+```
+
+---
+
+## Outstanding Items
+
+🔲 **Per-user access control** — all authenticated clients share the same API key. There is no per-user identity, role separation, or per-source ACL. Track as a future enhancement.
+
+🔲 **Audit log retention policy** — Turnstone writes audit events to the logging system but does not manage log rotation or retention. Operator must configure log rotation (logrotate, systemd journal limits, etc.).
+
+🔲 **Encrypted DB at rest** — SQLite does not support transparent encryption. For encryption at rest, use full-disk encryption (LUKS) or an encrypted filesystem on the host.
+
+🔲 **TLS between client and Turnstone** — Turnstone binds to HTTP by default. For production, place Caddy or nginx in front and terminate TLS there. Do not expose port 8534 directly over untrusted networks.
+
+---
+
+## Data Subject Rights (GDPR / CCPA)
+
+### Right to erasure — anonymized records
+
+⚠️ **Anonymized log data cannot be selectively deleted on a per-subject basis.**
+
+When PII sanitization is applied to a bundle export (redacting IP addresses, usernames, hostnames), the resulting data is no longer linked to a specific data subject. As a consequence, Turnstone cannot identify which stored log entries relate to that subject and cannot fulfill a targeted deletion request for records that have already been anonymized.
+
+**Operators must clearly disclose this limitation to data subjects before export:**
+
+> "Anonymized log data exported or submitted from this system cannot be individually identified or selectively deleted. If data was exported in anonymized form, Turnstone cannot distinguish your records from others in the exported set. The right to erasure does not apply to data that is no longer personally identifiable."
+
+This is consistent with GDPR Recital 26, which excludes anonymized data from the regulation's scope. However, the original (pre-anonymization) records in Turnstone's local SQLite database *can* be deleted by source ID via the Sources view (Delete all entries for source) or directly via the database.
+
+**Recommended operator practice:**
+- Maintain a log of which bundles were exported, when, and to whom — the audit log (`turnstone.audit`) covers this.
+- Provide data subjects with the bundle export timestamp and source scope so they can verify what was shared.
+- For full erasure of pre-anonymization records: use `DELETE /api/sources/{source_id}` to purge all entries for a given source from the local DB.
--- a/web/src/components/SetupWizard.vue
+++ b/web/src/components/SetupWizard.vue
@ -0,0 +1,421 @@
+<template>
+  <div class="rounded border border-accent bg-surface-raised p-6 sm:p-8 max-w-2xl mx-auto">
+
+    <!-- Step indicator -->
+    <div class="flex items-center gap-2 mb-6">
+      <span v-for="(label, i) in stepLabels" :key="i" class="flex items-center gap-2">
+        <span
+          class="w-6 h-6 rounded-full flex items-center justify-center text-xs font-semibold border"
+          :class="i + 1 === step
+            ? 'bg-accent text-bg border-accent'
+            : i + 1 < step
+              ? 'bg-accent/20 text-accent border-accent/40'
+              : 'bg-surface text-text-dim border-surface-border'"
+        >{{ i + 1 }}</span>
+        <span class="text-xs hidden sm:inline" :class="i + 1 === step ? 'text-text-primary' : 'text-text-dim'">{{ label }}</span>
+        <span v-if="i < stepLabels.length - 1" class="text-text-dim text-xs">›</span>
+      </span>
+    </div>
+
+    <!-- ── Step 1: Detect ── -->
+    <div v-if="step === 1">
+      <h2 class="text-text-primary text-base font-semibold mb-1">Detecting log sources…</h2>
+      <p class="text-text-dim text-sm mb-5">Turnstone is scanning for available log sources on this host.</p>
+
+      <div v-if="discovering" class="flex items-center gap-2 text-text-dim text-sm py-4">
+        <svg class="animate-spin w-4 h-4 text-accent" viewBox="0 0 24 24" fill="none">
+          <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"/>
+          <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8v8z"/>
+        </svg>
+        Scanning…
+      </div>
+
+      <div v-else-if="discoverError" class="text-sev-error text-sm py-4">
+        {{ discoverError }}
+        <button @click="runDiscover" class="ml-2 underline text-accent text-xs">Retry</button>
+      </div>
+
+      <div v-else>
+        <div v-if="candidates.length === 0" class="text-text-dim text-sm py-3 mb-4">
+          No sources auto-detected. You can add sources manually in the next step.
+        </div>
+        <div v-else class="space-y-1 text-sm mb-4">
+          <div v-for="g in groups" :key="g.type" class="flex items-center gap-2 text-text-muted">
+            <span class="font-mono text-xs text-text-dim px-1.5 py-0.5 rounded border border-surface-border">{{ g.type }}</span>
+            <span><strong class="text-text-primary">{{ g.items.length }}</strong> {{ g.label }}</span>
+          </div>
+        </div>
+
+        <div class="flex justify-between items-center mt-6">
+          <a @click.prevent="$emit('skip')" href="#" class="text-text-dim text-xs hover:text-text-muted">
+            Skip — I'll edit sources.yaml manually
+          </a>
+          <button @click="step = 2" class="btn-primary text-sm">Continue →</button>
+        </div>
+      </div>
+    </div>
+
+    <!-- ── Step 2: Select ── -->
+    <div v-if="step === 2">
+      <h2 class="text-text-primary text-base font-semibold mb-1">Select log sources</h2>
+      <p class="text-text-dim text-sm mb-4">Choose which sources to monitor. You can add more later.</p>
+
+      <!-- Grouped source list -->
+      <div class="space-y-3 mb-4">
+        <div v-for="g in groups" :key="g.type" class="rounded border border-surface-border overflow-hidden">
+
+          <!-- Group header -->
+          <div class="flex items-center gap-3 px-3 py-2 bg-surface border-b border-surface-border">
+            <button @click="toggleGroupOpen(g.type)" class="flex items-center gap-2 flex-1 min-w-0 text-left">
+              <span class="text-text-dim text-xs">{{ groupOpen[g.type] ? '▾' : '▸' }}</span>
+              <span class="text-text-primary text-sm font-medium">{{ g.label }}</span>
+              <span class="text-text-dim text-xs">({{ g.items.length }})</span>
+              <span v-if="groupSelectedCount(g.type) > 0" class="text-accent text-xs ml-1">
+                {{ groupSelectedCount(g.type) }} selected
+              </span>
+            </button>
+            <div class="flex items-center gap-2 shrink-0">
+              <button
+                @click="selectGroup(g.type)"
+                class="text-xs px-2 py-0.5 rounded border border-surface-border text-text-dim hover:text-accent hover:border-accent transition-colors"
+              >All</button>
+              <button
+                @click="deselectGroup(g.type)"
+                class="text-xs px-2 py-0.5 rounded border border-surface-border text-text-dim hover:text-sev-error hover:border-sev-error transition-colors"
+              >None</button>
+            </div>
+          </div>
+
+          <!-- Group items -->
+          <div v-if="groupOpen[g.type]" class="divide-y divide-surface-border max-h-64 overflow-y-auto">
+            <label
+              v-for="c in g.items"
+              :key="c.id"
+              class="flex items-start gap-3 px-3 py-2.5 cursor-pointer transition-colors"
+              :class="isSelected(c) ? 'bg-accent/5' : 'hover:bg-surface'"
+            >
+              <input
+                type="checkbox"
+                :checked="isSelected(c)"
+                @change="toggleCandidate(c)"
+                class="mt-0.5 accent-accent shrink-0"
+              />
+              <div class="min-w-0 flex-1">
+                <div class="text-text-primary text-sm">{{ c.label }}</div>
+                <div v-if="c.path" class="font-mono text-xs text-text-dim mt-0.5 truncate">{{ c.path }}</div>
+                <div v-else-if="c.container" class="font-mono text-xs text-text-dim mt-0.5">{{ c.container }}</div>
+              </div>
+            </label>
+          </div>
+        </div>
+      </div>
+
+      <!-- NL / manual add -->
+      <div class="border border-surface-border rounded p-4 mb-4">
+        <p class="text-text-muted text-xs font-medium mb-2">Add a source by description</p>
+        <div class="flex gap-2">
+          <input
+            v-model="nlDescription"
+            type="text"
+            placeholder="e.g. nginx access log, qbittorrent, sonarr"
+            class="flex-1 bg-surface border border-surface-border rounded px-3 py-1.5 text-sm text-text-primary placeholder-text-dim focus:outline-none focus:border-accent"
+            @keydown.enter="interpretNL"
+          />
+          <button
+            @click="interpretNL"
+            :disabled="!nlDescription.trim() || interpreting"
+            class="btn-secondary text-xs px-3 disabled:opacity-40 disabled:cursor-not-allowed"
+          >{{ interpreting ? '…' : 'Add' }}</button>
+        </div>
+        <div v-if="nlError" class="text-sev-error text-xs mt-2">{{ nlError }}</div>
+        <div v-if="showManualForm" class="mt-3 space-y-2">
+          <p class="text-text-dim text-xs">Couldn't interpret that — fill in manually:</p>
+          <div class="flex gap-2">
+            <input v-model="manualId"   placeholder="id (e.g. nginx)" class="flex-1 input-sm" />
+            <input v-model="manualPath" placeholder="/path/to/log.txt" class="flex-1 input-sm" />
+          </div>
+          <button @click="addManual" class="btn-secondary text-xs mt-1">Add manually</button>
+        </div>
+      </div>
+
+      <div class="flex justify-between items-center">
+        <button @click="step = 1" class="text-text-dim text-xs hover:text-text-muted">← Back</button>
+        <div class="flex items-center gap-3">
+          <span class="text-text-dim text-xs">
+            {{ selected.length }} source{{ selected.length === 1 ? '' : 's' }} selected
+          </span>
+          <button
+            @click="step = 3"
+            :disabled="selected.length === 0"
+            class="btn-primary text-sm disabled:opacity-40 disabled:cursor-not-allowed"
+          >Review →</button>
+        </div>
+      </div>
+    </div>
+
+    <!-- ── Step 3: Confirm ── -->
+    <div v-if="step === 3">
+      <h2 class="text-text-primary text-base font-semibold mb-1">Confirm and write</h2>
+      <p class="text-text-dim text-sm mb-4">Review the <code class="bg-surface px-1 rounded">sources.yaml</code> that will be written.</p>
+
+      <pre class="bg-surface border border-surface-border rounded p-3 text-xs font-mono text-text-primary overflow-x-auto max-h-64 mb-5 whitespace-pre">{{ previewYaml }}</pre>
+
+      <div v-if="writeError" class="text-sev-error text-sm mb-4">{{ writeError }}</div>
+      <div v-if="writeSuccess" class="text-green-400 text-sm mb-4">{{ writeSuccess }}</div>
+
+      <div class="flex justify-between items-center">
+        <button @click="step = 2" class="text-text-dim text-xs hover:text-text-muted">← Back</button>
+        <button
+          @click="writeAndFinish"
+          :disabled="writing"
+          class="btn-primary text-sm disabled:opacity-40 disabled:cursor-not-allowed"
+        >{{ writing ? 'Writing…' : 'Write sources.yaml' }}</button>
+      </div>
+    </div>
+
+  </div>
+</template>
+
+<script setup lang="ts">
+import { ref, computed, reactive, onMounted } from 'vue'
+
+const BASE = import.meta.env.BASE_URL.replace(/\/$/, '')
+
+const emit = defineEmits<{ done: []; skip: [] }>()
+
+interface Candidate {
+  type: string
+  id: string
+  label: string
+  description: string
+  path?: string
+  container?: string
+  runtime?: string
+  unit?: string
+  available: boolean
+}
+
+interface Group {
+  type: string
+  label: string
+  items: Candidate[]
+}
+
+const GROUP_META: Record<string, { label: string; order: number; defaultOpen: boolean; preselect: boolean }> = {
+  journald: { label: 'System journal',      order: 0, defaultOpen: true,  preselect: true  },
+  file:     { label: 'Log files',           order: 1, defaultOpen: true,  preselect: true  },
+  docker:   { label: 'Docker containers',   order: 2, defaultOpen: false, preselect: false },
+}
+
+const stepLabels = ['Detect', 'Select', 'Confirm']
+const step        = ref(1)
+const discovering = ref(false)
+const discoverError = ref<string | null>(null)
+const candidates  = ref<Candidate[]>([])
+const selected    = ref<Candidate[]>([])
+
+// Track which groups are expanded
+const groupOpen = reactive<Record<string, boolean>>({})
+
+const groups = computed<Group[]>(() => {
+  const map: Record<string, Candidate[]> = {}
+  for (const c of candidates.value) {
+    ;(map[c.type] ??= []).push(c)
+  }
+  return Object.entries(map)
+    .map(([type, items]) => ({
+      type,
+      label: GROUP_META[type]?.label ?? type,
+      items,
+    }))
+    .sort((a, b) => (GROUP_META[a.type]?.order ?? 99) - (GROUP_META[b.type]?.order ?? 99))
+})
+
+function groupSelectedCount(type: string): number {
+  const group = groups.value.find(g => g.type === type)
+  if (!group) return 0
+  return group.items.filter(c => isSelected(c)).length
+}
+
+function toggleGroupOpen(type: string) {
+  groupOpen[type] = !groupOpen[type]
+}
+
+function selectGroup(type: string) {
+  const group = groups.value.find(g => g.type === type)
+  if (!group) return
+  const newIds = new Set(selected.value.map(s => s.id))
+  const additions = group.items.filter(c => !newIds.has(c.id))
+  selected.value = [...selected.value, ...additions]
+  groupOpen[type] = true
+}
+
+function deselectGroup(type: string) {
+  const group = groups.value.find(g => g.type === type)
+  if (!group) return
+  const removeIds = new Set(group.items.map(c => c.id))
+  selected.value = selected.value.filter(s => !removeIds.has(s.id))
+}
+
+// NL / manual add
+const nlDescription  = ref('')
+const interpreting   = ref(false)
+const nlError        = ref<string | null>(null)
+const showManualForm = ref(false)
+const manualId       = ref('')
+const manualPath     = ref('')
+
+// Write
+const writing      = ref(false)
+const writeError   = ref<string | null>(null)
+const writeSuccess = ref<string | null>(null)
+
+const previewYaml = computed(() => {
+  if (!selected.value.length) return '# No sources selected'
+  const lines = ['sources:']
+  for (const src of selected.value) {
+    if (src.type === 'journald') {
+      lines.push(`  - id: ${src.id}`)
+      lines.push(`    type: journald`)
+      if (src.unit) lines.push(`    unit: ${src.unit}`)
+    } else if (src.type === 'docker') {
+      lines.push(`  - id: ${src.id}`)
+      lines.push(`    type: docker`)
+      lines.push(`    runtime: ${src.runtime ?? 'docker'}`)
+      lines.push(`    container: ${src.container ?? src.id.split(':').pop()}`)
+    } else {
+      lines.push(`  - id: ${src.id}`)
+      lines.push(`    path: ${src.path}`)
+    }
+  }
+  return lines.join('\n')
+})
+
+function isSelected(c: Candidate): boolean {
+  return selected.value.some(s => s.id === c.id)
+}
+
+function toggleCandidate(c: Candidate) {
+  if (isSelected(c)) {
+    selected.value = selected.value.filter(s => s.id !== c.id)
+  } else {
+    selected.value = [...selected.value, c]
+  }
+}
+
+async function runDiscover() {
+  discovering.value   = true
+  discoverError.value = null
+  try {
+    const res = await fetch(`${BASE}/api/setup/discover`)
+    if (!res.ok) throw new Error(`HTTP ${res.status}`)
+    const data = await res.json()
+    candidates.value = data.candidates ?? []
+
+    // Initialise group open state and pre-selection per group meta
+    for (const [type, meta] of Object.entries(GROUP_META)) {
+      groupOpen[type] = meta.defaultOpen
+    }
+    // Any type not in GROUP_META gets collapsed by default
+    for (const c of candidates.value) {
+      if (!(c.type in groupOpen)) groupOpen[c.type] = false
+    }
+
+    // Pre-select only groups where preselect = true
+    selected.value = candidates.value.filter(c => GROUP_META[c.type]?.preselect ?? false)
+  } catch (e: any) {
+    discoverError.value = e.message ?? 'Discovery failed'
+  } finally {
+    discovering.value = false
+  }
+}
+
+async function interpretNL() {
+  if (!nlDescription.value.trim()) return
+  interpreting.value   = true
+  nlError.value        = null
+  showManualForm.value = false
+  try {
+    const res = await fetch(`${BASE}/api/setup/interpret`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ description: nlDescription.value }),
+    })
+    const data = await res.json()
+    if (data.source) {
+      const candidate: Candidate = { available: true, ...data.source }
+      // Add to candidates so it appears in the correct group
+      if (!candidates.value.some(c => c.id === candidate.id)) {
+        candidates.value = [...candidates.value, candidate]
+        if (!(candidate.type in groupOpen)) groupOpen[candidate.type] = true
+      }
+      if (!isSelected(candidate)) selected.value = [...selected.value, candidate]
+      nlDescription.value = ''
+    } else {
+      showManualForm.value = true
+      nlError.value = data.validation_error
+        ? `Validation: ${data.validation_error}`
+        : 'Could not interpret — fill in manually below.'
+    }
+  } catch {
+    showManualForm.value = true
+    nlError.value = 'Interpretation failed — fill in manually below.'
+  } finally {
+    interpreting.value = false
+  }
+}
+
+function addManual() {
+  if (!manualId.value.trim() || !manualPath.value.trim()) return
+  const candidate: Candidate = {
+    type: 'file',
+    id: manualId.value.trim(),
+    path: manualPath.value.trim(),
+    label: manualId.value.trim(),
+    description: `Read from ${manualPath.value.trim()}`,
+    available: true,
+  }
+  if (!candidates.value.some(c => c.id === candidate.id)) {
+    candidates.value = [...candidates.value, candidate]
+    groupOpen['file'] = true
+  }
+  if (!isSelected(candidate)) selected.value = [...selected.value, candidate]
+  manualId.value       = ''
+  manualPath.value     = ''
+  showManualForm.value = false
+  nlDescription.value  = ''
+  nlError.value        = null
+}
+
+async function writeAndFinish() {
+  writing.value    = true
+  writeError.value = null
+  try {
+    const res = await fetch(`${BASE}/api/setup/write`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ sources: selected.value }),
+    })
+    if (!res.ok) {
+      const err = await res.json().catch(() => ({ detail: res.statusText }))
+      writeError.value = err.detail ?? 'Write failed'
+      return
+    }
+    const data = await res.json()
+    writeSuccess.value = `Wrote ${data.written} source${data.written === 1 ? '' : 's'} to sources.yaml.`
+    setTimeout(() => emit('done'), 1200)
+  } catch (e: any) {
+    writeError.value = e.message ?? 'Network error'
+  } finally {
+    writing.value = false
+  }
+}
+
+onMounted(runDiscover)
+</script>
+
+<style scoped>
+.input-sm {
+  @apply bg-surface border border-surface-border rounded px-2 py-1 text-xs text-text-primary focus:outline-none focus:border-accent;
+}
+</style>
--- a/web/src/views/BundlesView.vue
+++ b/web/src/views/BundlesView.vue
@ -3,10 +3,29 @@

    <!-- Header -->
    <div class="mb-6">
-      <h1 class="text-text-primary text-xl font-semibold mb-1">Received Bundles</h1>
-      <p class="text-text-dim text-sm">Labeled incident bundles sent from remote Turnstone instances. Use these to build detection signatures.</p>
+      <h1 class="text-text-primary text-xl font-semibold mb-1">Bundles</h1>
+      <p class="text-text-dim text-sm">Incident bundles sent to and received from remote Turnstone instances.</p>
    </div>

+    <!-- Tabs -->
+    <div class="flex gap-1 mb-5 border-b border-surface-border">
+      <button
+        v-for="tab in tabs"
+        :key="tab.key"
+        @click="activeTab = tab.key"
+        class="px-4 py-2 text-sm font-medium border-b-2 -mb-px transition-colors"
+        :class="activeTab === tab.key
+          ? 'border-accent text-accent'
+          : 'border-transparent text-text-dim hover:text-text-muted'"
+      >
+        {{ tab.label }}
+        <span v-if="tab.key === 'received' && bundles.length" class="ml-1.5 text-xs bg-surface px-1.5 py-0.5 rounded text-text-dim border border-surface-border">{{ bundles.length }}</span>
+        <span v-if="tab.key === 'sent' && sentBundles.length" class="ml-1.5 text-xs bg-surface px-1.5 py-0.5 rounded text-text-dim border border-surface-border">{{ sentBundles.length }}</span>
+      </button>
+    </div>
+
+    <!-- ── RECEIVED TAB ── -->
+    <div v-if="activeTab === 'received'">
      <div v-if="loading" class="text-text-dim py-8 text-center text-sm">Loading…</div>

      <div v-else-if="bundles.length === 0" class="rounded border border-surface-border bg-surface-raised p-8 text-center">
@ -19,12 +38,11 @@
          v-for="b in bundles"
          :key="b.id"
          class="rounded border bg-surface-raised overflow-hidden"
-        :class="selected?.id === b.id ? 'border-accent' : 'border-surface-border'"
+          :class="selectedReceived?.id === b.id ? 'border-accent' : 'border-surface-border'"
        >
-        <!-- Bundle header row -->
          <div
            class="flex flex-wrap items-center gap-2 sm:gap-3 px-3 sm:px-4 py-3 cursor-pointer hover:bg-surface transition-colors"
-          @click="toggleBundle(b)"
+            @click="toggleReceived(b)"
          >
            <span class="font-mono text-xs text-accent bg-surface px-1.5 py-0.5 rounded border border-surface-border shrink-0">
              {{ b.issue_type || 'untyped' }}
@ -34,11 +52,10 @@
            <span class="px-2 py-0.5 rounded text-xs font-medium border shrink-0" :style="severityStyle(b.severity)">{{ b.severity }}</span>
            <span class="text-text-dim text-xs shrink-0">{{ b.entry_count }} entries</span>
            <span class="text-text-dim text-xs shrink-0 hidden sm:inline">{{ formatTs(b.bundled_at) }}</span>
-          <span class="text-text-dim text-xs shrink-0">{{ selected?.id === b.id ? '▲' : '▼' }}</span>
+            <span class="text-text-dim text-xs shrink-0">{{ selectedReceived?.id === b.id ? '▲' : '▼' }}</span>
          </div>

-        <!-- Expanded entries -->
-        <div v-if="selected?.id === b.id" class="border-t border-surface-border">
+          <div v-if="selectedReceived?.id === b.id" class="border-t border-surface-border">
            <div v-if="expandLoading" class="text-text-dim text-sm px-4 py-4">Loading entries…</div>
            <div v-else-if="expandedEntries.length === 0" class="text-text-dim text-sm px-4 py-4">No entries in bundle.</div>
            <div v-else class="p-4 space-y-1 max-h-[32rem] overflow-y-auto">
@ -66,6 +83,72 @@
          </div>
        </div>
      </div>
+    </div>
+
+    <!-- ── SENT TAB ── -->
+    <div v-if="activeTab === 'sent'">
+      <div v-if="sentLoading" class="text-text-dim py-8 text-center text-sm">Loading…</div>
+
+      <div v-else-if="sentBundles.length === 0" class="rounded border border-surface-border bg-surface-raised p-8 text-center">
+        <p class="text-text-muted text-base mb-1">No bundles sent yet.</p>
+        <p class="text-text-dim text-sm">Bundles you export or send to a remote instance appear here for review.</p>
+      </div>
+
+      <div v-else class="space-y-3">
+        <div
+          v-for="s in sentBundles"
+          :key="s.id"
+          class="rounded border bg-surface-raised overflow-hidden"
+          :class="selectedSent?.id === s.id ? 'border-accent' : 'border-surface-border'"
+        >
+          <div
+            class="flex flex-wrap items-center gap-2 sm:gap-3 px-3 sm:px-4 py-3 cursor-pointer hover:bg-surface transition-colors"
+            @click="toggleSent(s)"
+          >
+            <span class="font-mono text-xs text-text-dim bg-surface px-1.5 py-0.5 rounded border border-surface-border shrink-0">
+              {{ sentIncidentLabel(s) }}
+            </span>
+            <span class="text-text-primary text-sm flex-1 min-w-0 truncate">{{ sentIncidentType(s) }}</span>
+            <span
+              class="px-2 py-0.5 rounded text-xs font-medium border shrink-0"
+              :class="s.sanitized ? 'text-green-400 border-green-400/30 bg-green-400/10' : 'text-text-dim border-surface-border'"
+            >
+              {{ s.sanitized ? 'sanitized' : 'raw' }}
+            </span>
+            <span class="text-text-dim text-xs shrink-0">{{ s.entry_count }} entries</span>
+            <span class="text-text-dim text-xs shrink-0 hidden sm:inline">{{ formatTs(s.exported_at) }}</span>
+            <button
+              @click.stop="redownloadSent(s)"
+              class="text-xs px-2 py-0.5 rounded border border-surface-border text-text-muted hover:text-accent hover:border-accent transition-colors shrink-0"
+            >
+              ↓
+            </button>
+            <span class="text-text-dim text-xs shrink-0">{{ selectedSent?.id === s.id ? '▲' : '▼' }}</span>
+          </div>
+
+          <div v-if="selectedSent?.id === s.id" class="border-t border-surface-border">
+            <div class="p-4 space-y-1 max-h-[32rem] overflow-y-auto">
+              <div class="flex items-center gap-2 mb-3">
+                <p class="text-text-dim text-xs">{{ sentExpandedEntries.length }} log entries (first 5 shown)</p>
+                <p v-if="s.sanitized" class="text-xs text-green-400 ml-auto">PII patterns redacted</p>
+                <p v-else class="text-xs text-text-dim ml-auto">Not sanitized — contains raw log text</p>
+              </div>
+              <div
+                v-for="entry in sentExpandedEntries"
+                :key="entry.entry_id"
+                class="font-mono text-xs py-1 px-2 rounded bg-surface border border-surface-border"
+              >
+                <span class="text-text-dim mr-2">{{ shortTs(entry.timestamp_iso) }}</span>
+                <span :class="['mr-2', severityClass(entry.severity)]">{{ entry.severity || '?' }}</span>
+                <span class="text-text-muted">{{ lastPart(entry.source_id) }}</span>
+                <span class="text-text-dim mx-1">|</span>
+                <span class="text-text-primary">{{ entry.text.slice(0, 200) }}</span>
+              </div>
+            </div>
+          </div>
+        </div>
+      </div>
+    </div>

  </div>
 </template>
@ -87,6 +170,15 @@ interface BundleSummary {
  bundle_json: string
 }

+interface SentBundleSummary {
+  id: string
+  incident_id: string
+  exported_at: string
+  sanitized: boolean
+  entry_count: number
+  bundle_json: string
+}
+
 interface LogEntry {
  entry_id: string
  source_id: string
@ -96,41 +188,69 @@ interface LogEntry {
  matched_patterns: string[]
 }

+type TabKey = 'received' | 'sent'
+const tabs: { key: TabKey; label: string }[] = [
+  { key: 'received', label: 'Received' },
+  { key: 'sent',     label: 'Sent' },
+]
+const activeTab = ref<TabKey>('received')
+
+// Received
 const bundles          = ref<BundleSummary[]>([])
 const loading          = ref(true)
-const selected      = ref<BundleSummary | null>(null)
+const selectedReceived = ref<BundleSummary | null>(null)
 const expandedEntries  = ref<LogEntry[]>([])
 const expandLoading    = ref(false)

+// Sent
+const sentBundles         = ref<SentBundleSummary[]>([])
+const sentLoading         = ref(true)
+const selectedSent        = ref<SentBundleSummary | null>(null)
+const sentExpandedEntries = ref<LogEntry[]>([])
+
 onMounted(async () => {
-  try {
-    const res = await fetch(`${BASE}/api/bundles`)
-    if (res.ok) bundles.value = (await res.json()).bundles
-  } finally {
+  const [recRes, sentRes] = await Promise.all([
+    fetch(`${BASE}/api/bundles`),
+    fetch(`${BASE}/api/sent-bundles`),
+  ])
+  if (recRes.ok)  bundles.value      = (await recRes.json()).bundles
+  if (sentRes.ok) sentBundles.value  = (await sentRes.json()).bundles
  loading.value     = false
-  }
+  sentLoading.value = false
 })

-async function toggleBundle(b: BundleSummary) {
-  if (selected.value?.id === b.id) {
-    selected.value = null
+async function toggleReceived(b: BundleSummary) {
+  if (selectedReceived.value?.id === b.id) {
+    selectedReceived.value = null
    expandedEntries.value = []
    return
  }
-  selected.value = b
+  selectedReceived.value = b
  expandedEntries.value = []
  expandLoading.value = true
  try {
-    // bundle_json is stored inline — parse it directly, no round-trip needed
    const parsed = JSON.parse(b.bundle_json)
    expandedEntries.value = parsed.log_entries ?? []
-  } catch {
-    expandLoading.value = false
  } finally {
    expandLoading.value = false
  }
 }

+function toggleSent(s: SentBundleSummary) {
+  if (selectedSent.value?.id === s.id) {
+    selectedSent.value = null
+    sentExpandedEntries.value = []
+    return
+  }
+  selectedSent.value = s
+  try {
+    const parsed = JSON.parse(s.bundle_json)
+    sentExpandedEntries.value = (parsed.log_entries ?? []).slice(0, 5)
+  } catch {
+    sentExpandedEntries.value = []
+  }
+}
+
 function exportBundle(b: BundleSummary) {
  const blob = new Blob([b.bundle_json], { type: 'application/json' })
  const url  = URL.createObjectURL(blob)
@ -141,6 +261,26 @@ function exportBundle(b: BundleSummary) {
  URL.revokeObjectURL(url)
 }

+function redownloadSent(s: SentBundleSummary) {
+  const parsed  = JSON.parse(s.bundle_json)
+  const label   = parsed.incident?.issue_type || 'bundle'
+  const blob    = new Blob([s.bundle_json], { type: 'application/json' })
+  const url     = URL.createObjectURL(blob)
+  const a       = document.createElement('a')
+  a.href        = url
+  a.download    = `sent-${label}-${s.id.slice(0, 8)}.json`
+  a.click()
+  URL.revokeObjectURL(url)
+}
+
+function sentIncidentLabel(s: SentBundleSummary): string {
+  try { return JSON.parse(s.bundle_json).incident?.label ?? s.incident_id.slice(0, 8) } catch { return s.incident_id.slice(0, 8) }
+}
+
+function sentIncidentType(s: SentBundleSummary): string {
+  try { return JSON.parse(s.bundle_json).incident?.issue_type || 'untyped' } catch { return 'untyped' }
+}
+
 function severityStyle(sev: string): Record<string, string> {
  const k = sev?.toLowerCase() ?? 'low'
  const known = ['low', 'medium', 'high', 'critical']
--- a/web/src/views/IncidentsView.vue
+++ b/web/src/views/IncidentsView.vue
@ -74,6 +74,10 @@
          <span v-if="selected.issue_type" class="font-mono text-xs text-accent">{{ selected.issue_type }}</span>
        </div>
        <div class="flex flex-wrap items-center gap-2 sm:gap-3 mt-1 sm:mt-0">
+          <label class="flex items-center gap-1.5 text-xs text-text-dim cursor-pointer select-none">
+            <input type="checkbox" v-model="sanitizeBundle" class="accent-accent" />
+            Sanitize PII
+          </label>
          <button
            @click="sendBundle(selected.id)"
            :disabled="sending"
@ -181,6 +185,7 @@ const selectedEntries = ref<Entry[]>([])
 const entriesLoading  = ref(false)
 const sending         = ref(false)
 const sendStatus      = ref<{ ok: boolean; msg: string } | null>(null)
+const sanitizeBundle  = ref(false)

 async function selectIncident(inc: Incident) {
  selected.value = inc
@ -202,15 +207,17 @@ async function sendBundle(id: string) {
  sending.value = true
  sendStatus.value = null
  try {
-    const res = await fetch(`${BASE}/api/incidents/${id}/send`, { method: 'POST' })
+    const params = sanitizeBundle.value ? '?sanitize=true' : ''
+    const res = await fetch(`${BASE}/api/incidents/${id}/send${params}`, { method: 'POST' })
    if (res.ok) {
      const data = await res.json()
-      sendStatus.value = { ok: true, msg: `Sent ${data.entry_count} entries` }
+      const tag = sanitizeBundle.value ? ' (sanitized)' : ''
+      sendStatus.value = { ok: true, msg: `Sent ${data.entry_count} entries${tag}` }
    } else {
      const err = await res.json().catch(() => ({ detail: res.statusText }))
      sendStatus.value = { ok: false, msg: err.detail ?? 'Send failed' }
    }
-  } catch (e) {
+  } catch {
    sendStatus.value = { ok: false, msg: 'Network error' }
  } finally {
    sending.value = false
--- a/web/src/views/SourcesView.vue
+++ b/web/src/views/SourcesView.vue
@ -5,11 +5,35 @@
        <h1 class="text-text-primary text-xl font-semibold mb-1">Log Sources</h1>
        <p class="text-text-dim text-sm">All hosts and services in the gleaned corpus.</p>
      </div>
-      <label class="btn-secondary text-sm cursor-pointer shrink-0">
+      <div class="flex items-center gap-2 shrink-0">
+        <button
+          @click="showAddPanel = !showAddPanel"
+          class="btn-secondary text-sm"
+        >
+          + Add Source
+        </button>
+        <label class="btn-secondary text-sm cursor-pointer">
          <span>Upload log file</span>
          <input type="file" class="hidden" @change="handleUpload" />
        </label>
      </div>
+    </div>
+
+    <!-- First-run wizard -->
+    <div v-if="showWizard" class="mb-6">
+      <SetupWizard
+        @done="onWizardDone"
+        @skip="showWizard = false; loadSources()"
+      />
+    </div>
+
+    <!-- Post-setup Add Source panel (condensed wizard steps 1-2) -->
+    <div v-else-if="showAddPanel" class="mb-6">
+      <SetupWizard
+        @done="showAddPanel = false; loadSources()"
+        @skip="showAddPanel = false"
+      />
+    </div>

    <!-- Upload / action feedback -->
    <div v-if="actionMsg" class="mb-4 text-sm rounded border px-4 py-2.5"
@ -17,14 +41,14 @@
      {{ actionMsg }}
    </div>

-    <div v-if="loading" class="text-text-dim py-8 text-center text-sm">Loading…</div>
+    <div v-if="!showWizard && loading" class="text-text-dim py-8 text-center text-sm">Loading…</div>

-    <div v-else-if="sources.length === 0" class="text-text-dim py-12 text-center">
+    <div v-else-if="!showWizard && sources.length === 0" class="text-text-dim py-12 text-center">
      <p class="mb-1">No log sources found.</p>
-      <p class="text-sm">Run the glean pipeline: <code class="bg-surface-raised px-1 rounded">python scripts/glean_corpus.py</code></p>
+      <p class="text-sm">Use <strong>Add Source</strong> above or edit <code class="bg-surface-raised px-1 rounded">sources.yaml</code> directly.</p>
    </div>

-    <div v-else class="rounded border border-surface-border overflow-hidden">
+    <div v-else-if="!showWizard && sources.length > 0" class="rounded border border-surface-border overflow-hidden">
      <div class="overflow-x-auto">
        <table class="w-full text-sm min-w-[620px]">
          <thead class="bg-surface-raised border-b border-surface-border">
@ -121,6 +145,7 @@

 <script setup lang="ts">
 import { ref, onMounted } from 'vue'
+import SetupWizard from '@/components/SetupWizard.vue'

 // Unified source row shown in the table (merges configured + DB-only sources).
 interface SourceRow {
@ -155,9 +180,28 @@ const loading = ref(true)
 const busy = ref(new Set<string>())
 const actionMsg = ref('')
 const actionError = ref(false)
+const showWizard = ref(false)
+const showAddPanel = ref(false)

 const BASE = import.meta.env.BASE_URL.replace(/\/$/, '')

+async function checkSetupStatus(): Promise<void> {
+  try {
+    const res = await fetch(`${BASE}/api/setup/status`)
+    if (res.ok) {
+      const data = await res.json()
+      if (!data.configured) showWizard.value = true
+    }
+  } catch {
+    // If the check fails, don't block the page
+  }
+}
+
+function onWizardDone(): void {
+  showWizard.value = false
+  loadSources()
+}
+
 async function loadSources(): Promise<void> {
  try {
    // Primary list: configured sources from sources.yaml (enriched with DB stats).
@ -211,7 +255,10 @@ async function loadSources(): Promise<void> {
  }
 }

-onMounted(loadSources)
+onMounted(async () => {
+  await checkSetupStatus()
+  if (!showWizard.value) await loadSources()
+})

 function setBusy(id: string, on: boolean): void {
  const next = new Set(busy.value)