feat: bundle PII sanitization, onboarding wizard, NL source addition (#51, #52, #53)

Bundle export (#51):
- _redact_text() with 5 compiled regex patterns (IPv4, email, user=, host=, password=)
- build_bundle(sanitize=False) — per-entry redaction at export time
- sent_bundles table tracks every outgoing export (GET and POST /send)
- GET /api/sent-bundles exposes history; SentBundle model added
- BundlesView: Received/Sent tabs, sanitized badge, 5-entry preview, re-download
- IncidentsView: Sanitize PII checkbox next to Send Bundle

Onboarding wizard (#52):
- app/services/discover.py: journald/Docker/file detection (best-effort, safe in containers)
- GET /api/setup/status, /discover, POST /api/setup/write (additive, appends to existing)
- SetupWizard.vue: 3-step Detect → Select → Confirm
  - Step 1 shows grouped summary (journald/file/docker counts)
  - Step 2: collapsible groups with All/None section toggles
    - journald + file: pre-selected; docker: collapsed, none pre-selected
  - Step 3: YAML preview before write
- SourcesView: shows wizard on first run; Add Source button reuses it

NL source addition (#53):
- app/services/nl_source.py: keyword shortcut (13 well-known apps) + LLM fallback
- POST /api/setup/interpret: keyword → LLM → null (graceful fallback)
- NL field in wizard step 2; manual form shown when interpretation fails
- Added sources appear in grouped list immediately
This commit is contained in:
pyr0ball 2026-05-29 14:14:28 -07:00
parent ae922ef6c6
commit f0fbe245f0
11 changed files with 1381 additions and 90 deletions

View file

@ -72,6 +72,17 @@ CREATE TABLE IF NOT EXISTS received_bundles (
CREATE INDEX IF NOT EXISTS idx_bundles_bundled ON received_bundles(bundled_at);
CREATE INDEX IF NOT EXISTS idx_bundles_type ON received_bundles(issue_type);
CREATE TABLE IF NOT EXISTS sent_bundles (
id TEXT PRIMARY KEY,
incident_id TEXT NOT NULL,
exported_at TEXT NOT NULL,
sanitized INTEGER NOT NULL DEFAULT 0,
entry_count INTEGER NOT NULL DEFAULT 0,
bundle_json TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_sent_bundles_incident ON sent_bundles(incident_id);
CREATE INDEX IF NOT EXISTS idx_sent_bundles_time ON sent_bundles(exported_at);
-- context tables moved to ensure_context_schema() / CONTEXT_DB_PATH
-- kept here as no-ops so legacy single-file deployments still work
CREATE TABLE IF NOT EXISTS context_facts (

View file

@ -10,7 +10,9 @@ import asyncio
import dataclasses
import hmac
import json
import logging
import os
import time
# Offline mode: must be set before any HuggingFace library is imported.
# Both flags must agree — HF hub and transformers each check independently.
@ -48,6 +50,8 @@ from app.services.blocklist import (
update_candidate_status,
)
from app.services.pihole import PiholeClient
from app.services.discover import discover_all, build_sources_yaml, validate_source
from app.services.nl_source import interpret as _nl_interpret
from app.services.incidents import (
build_bundle,
create_incident,
@ -57,6 +61,8 @@ from app.services.incidents import (
get_incident_entries,
list_bundles,
list_incidents,
list_sent_bundles,
record_sent_bundle,
store_bundle,
)
from app.services.search import (
@ -123,6 +129,10 @@ _compiled_patterns: list = []
@asynccontextmanager
async def _lifespan(app: FastAPI):
global _compiled_patterns
# Route turnstone.audit through uvicorn's own handler so it appears in api.log.
_audit_log.setLevel(logging.INFO)
for h in logging.getLogger("uvicorn.error").handlers:
_audit_log.addHandler(h)
ensure_schema(DB_PATH)
ensure_context_schema(CONTEXT_DB_PATH)
_compiled_patterns = load_compiled_patterns(PATTERN_FILE)
@ -172,6 +182,27 @@ app.add_middleware(
allow_headers=["*"],
)
_audit_log = logging.getLogger("turnstone.audit")
@app.middleware("http")
async def _audit_middleware(request: Request, call_next):
"""Log every API request: timestamp, method, path, query (no body, no response data)."""
t0 = time.monotonic()
response = await call_next(request)
if request.url.path.startswith("/turnstone/api"):
ms = int((time.monotonic() - t0) * 1000)
qs = f"?{request.url.query}" if request.url.query else ""
_audit_log.info(
"%s %s%s %d %dms",
request.method,
request.url.path,
qs,
response.status_code,
ms,
)
return response
_PREFS_DEFAULTS: dict = {
"entry_point_style": "topbar",
@ -643,6 +674,96 @@ class BatchGleanRequest(BaseModel):
entries: list[BatchEntry]
# ── Setup / Onboarding wizard ──────────────────────────────────────────────
class SetupWriteBody(BaseModel):
sources: list[dict]
class NLInterpretBody(BaseModel):
description: str
@router.get("/api/setup/status")
def setup_status() -> dict:
"""Return whether sources.yaml exists (wizard completion gate)."""
sources_file = PATTERN_DIR / "sources.yaml"
return {"configured": sources_file.exists()}
@router.get("/api/setup/discover")
def setup_discover() -> dict:
"""Auto-detect available log sources on this host."""
return discover_all()
@router.post("/api/setup/write")
def setup_write(body: SetupWriteBody, background_tasks: BackgroundTasks) -> dict:
"""Validate and write sources.yaml from a list of selected source definitions.
Each source is validated before writing. An existing sources.yaml is
appended to, not overwritten, so post-setup additions are non-destructive.
"""
errors = []
for src in body.sources:
err = validate_source(src)
if err:
errors.append(err)
if errors:
raise HTTPException(status_code=422, detail="; ".join(errors))
sources_file = PATTERN_DIR / "sources.yaml"
if sources_file.exists():
# Append to existing file: read current sources, merge, rewrite.
import yaml as _yaml
with open(sources_file) as f:
current = _yaml.safe_load(f) or {}
existing_ids = {s.get("id") for s in current.get("sources", [])}
new_sources = [s for s in body.sources if s.get("id") not in existing_ids]
if not new_sources:
return {"written": 0, "skipped": len(body.sources), "message": "All sources already configured"}
all_sources = current.get("sources", []) + new_sources
content = build_sources_yaml(all_sources)
else:
content = build_sources_yaml(body.sources)
new_sources = body.sources
PATTERN_DIR.mkdir(parents=True, exist_ok=True)
sources_file.write_text(content)
# Trigger a background glean of new sources
if GLEAN_INTERVAL > 0:
background_tasks.add_task(
_glean_file,
sources_file, DB_PATH, PATTERN_FILE, 1,
)
return {"written": len(new_sources), "skipped": len(body.sources) - len(new_sources)}
@router.post("/api/setup/interpret")
def setup_interpret(body: NLInterpretBody) -> dict:
"""Interpret a plain-English source description into a SourceDefinition.
Uses a keyword lookup first (deterministic, no LLM needed), then falls
back to the configured LLM. Returns null on failure so the UI can
show the manual form never raises 500.
"""
prefs = _load_prefs()
result = _nl_interpret(
description=body.description,
llm_url=prefs.get("llm_url") or None,
llm_model=prefs.get("llm_model") or None,
api_key=prefs.get("llm_api_key") or None,
)
if result is None:
return {"source": None, "fallback": True}
err = validate_source(result)
if err:
return {"source": None, "fallback": True, "validation_error": err}
return {"source": result, "fallback": False}
@router.post("/api/glean/batch")
def glean_batch(payload: BatchGleanRequest, background_tasks: BackgroundTasks) -> dict:
"""Accept pre-parsed log entries from a remote Turnstone instance (submission protocol).
@ -839,21 +960,30 @@ def delete_incident_endpoint(incident_id: str) -> dict:
@router.get("/api/incidents/{incident_id}/bundle")
def get_incident_bundle(incident_id: str) -> dict:
def get_incident_bundle(incident_id: str, sanitize: bool = False) -> dict:
incident = get_incident(DB_PATH, incident_id)
if not incident:
raise HTTPException(status_code=404, detail="Incident not found")
return build_bundle(DB_PATH, incident, source_host=SOURCE_HOST)
bundle = build_bundle(DB_PATH, incident, source_host=SOURCE_HOST, sanitize=sanitize)
record_sent_bundle(DB_PATH, incident_id, bundle, sanitized=sanitize)
return bundle
@router.get("/api/sent-bundles")
def list_sent_bundles_endpoint() -> dict:
bundles = list_sent_bundles(DB_PATH)
return {"bundles": [dataclasses.asdict(b) for b in bundles]}
@router.post("/api/incidents/{incident_id}/send")
def send_incident_bundle(incident_id: str) -> dict:
def send_incident_bundle(incident_id: str, sanitize: bool = False) -> dict:
if not BUNDLE_ENDPOINT:
raise HTTPException(status_code=503, detail="TURNSTONE_BUNDLE_ENDPOINT not configured")
incident = get_incident(DB_PATH, incident_id)
if not incident:
raise HTTPException(status_code=404, detail="Incident not found")
bundle = build_bundle(DB_PATH, incident, source_host=SOURCE_HOST)
bundle = build_bundle(DB_PATH, incident, source_host=SOURCE_HOST, sanitize=sanitize)
record_sent_bundle(DB_PATH, incident_id, bundle, sanitized=sanitize)
payload = json.dumps(bundle).encode()
req = urllib.request.Request(
BUNDLE_ENDPOINT,

173
app/services/discover.py Normal file
View file

@ -0,0 +1,173 @@
"""Environment auto-discovery for the onboarding wizard.
All checks are best-effort every function returns an empty list on failure
so the wizard degrades gracefully in containers, VMs, and minimal environments.
"""
from __future__ import annotations
import json
import logging
import os
import shutil
import subprocess
from pathlib import Path
from typing import Any
logger = logging.getLogger(__name__)
# Common log file candidates: (id, path, description)
_KNOWN_PATHS: list[tuple[str, str, str]] = [
("syslog", "/var/log/syslog", "System syslog (Debian/Ubuntu)"),
("syslog", "/var/log/messages", "System messages (RHEL/Rocky)"),
("auth", "/var/log/auth.log", "Auth log"),
("kern", "/var/log/kern.log", "Kernel log"),
("nginx-access", "/var/log/nginx/access.log", "Nginx access log"),
("nginx-error", "/var/log/nginx/error.log", "Nginx error log"),
("apache", "/var/log/apache2/access.log", "Apache access log"),
("apache-error", "/var/log/apache2/error.log", "Apache error log"),
("caddy", "/var/log/caddy/access.log", "Caddy access log"),
("docker-daemon","/var/log/docker.log", "Docker daemon log"),
("fail2ban", "/var/log/fail2ban.log", "Fail2ban log"),
("ufw", "/var/log/ufw.log", "UFW firewall log"),
]
def _run(cmd: list[str], timeout: float = 5.0) -> str | None:
"""Run a command and return stdout, or None on any error."""
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
return result.stdout if result.returncode == 0 else None
except Exception:
return None
def discover_journald() -> list[dict[str, Any]]:
"""Return a journald source candidate if journalctl is available."""
if not shutil.which("journalctl"):
return []
hostname = _run(["hostname"]) or "localhost"
hostname = hostname.strip()
return [{
"type": "journald",
"id": f"journal:{hostname}",
"label": f"System journal ({hostname})",
"description": "All systemd journal output from this host",
"available": True,
}]
def discover_docker() -> list[dict[str, Any]]:
"""Return Docker container candidates if Docker is running."""
for runtime in ("docker", "podman"):
if not shutil.which(runtime):
continue
out = _run([runtime, "ps", "--format", "{{json .}}"])
if out is None:
continue
containers = []
for line in out.splitlines():
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
name = obj.get("Names") or obj.get("Name") or obj.get("ID", "unknown")
# podman returns a list for Names
if isinstance(name, list):
name = name[0] if name else "unknown"
name = name.lstrip("/")
containers.append({
"type": "docker",
"id": f"{runtime}:{name}",
"label": f"{runtime.capitalize()}{name}",
"description": f"Container log stream for {name}",
"container": name,
"runtime": runtime,
"available": True,
})
except (json.JSONDecodeError, KeyError):
continue
if containers:
return containers
return []
def discover_files() -> list[dict[str, Any]]:
"""Return file-based source candidates for well-known log paths."""
found = []
seen_ids: set[str] = set()
for source_id, path, description in _KNOWN_PATHS:
if not os.path.exists(path):
continue
# deduplicate when both syslog and messages exist — take first match
if source_id in seen_ids:
continue
seen_ids.add(source_id)
found.append({
"type": "file",
"id": source_id,
"label": description,
"path": path,
"description": f"Read from {path}",
"available": True,
})
return found
def discover_all() -> dict[str, Any]:
"""Run all discovery checks and return a structured candidate list."""
candidates: list[dict[str, Any]] = []
candidates.extend(discover_journald())
candidates.extend(discover_docker())
candidates.extend(discover_files())
return {
"candidates": candidates,
"has_journald": any(c["type"] == "journald" for c in candidates),
"has_docker": any(c["type"] == "docker" for c in candidates),
"has_files": any(c["type"] == "file" for c in candidates),
}
def build_sources_yaml(selected: list[dict[str, Any]]) -> str:
"""Generate sources.yaml content from a list of selected candidates.
Each item must have: type, id, and type-specific fields (path, container, etc.).
"""
lines = [
"# Turnstone log sources — generated by the setup wizard.",
"# Edit this file to add, remove, or modify sources.",
"sources:",
]
for src in selected:
src_type = src.get("type", "file")
src_id = src.get("id", "unknown")
if src_type == "journald":
unit = src.get("unit")
lines.append(f" - id: {src_id}")
lines.append(f" type: journald")
if unit:
lines.append(f" unit: {unit}")
elif src_type == "docker":
runtime = src.get("runtime", "docker")
container = src.get("container", src_id.split(":")[-1])
lines.append(f" - id: {src_id}")
lines.append(f" type: docker")
lines.append(f" runtime: {runtime}")
lines.append(f" container: {container}")
else:
path = src.get("path", "")
lines.append(f" - id: {src_id}")
lines.append(f" path: {path}")
return "\n".join(lines) + "\n"
def validate_source(src: dict[str, Any]) -> str | None:
"""Return an error string if the source definition is invalid, else None."""
if not src.get("id"):
return "Source is missing 'id'"
src_type = src.get("type", "file")
if src_type == "file" and not src.get("path"):
return f"File source '{src['id']}' is missing 'path'"
if src_type == "docker" and not src.get("container"):
return f"Docker source '{src['id']}' is missing 'container'"
return None

View file

@ -2,14 +2,29 @@
from __future__ import annotations
import json
import re
import sqlite3
import uuid
from pathlib import Path
from app.glean.base import now_iso
from app.services.models import Incident, ReceivedBundle
from app.services.models import Incident, ReceivedBundle, SentBundle
from app.services.search import SearchResult, entries_in_window, search
_REDACT_PATTERNS: list[tuple[re.Pattern, str]] = [
(re.compile(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b"), "[IP]"),
(re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}"), "[EMAIL]"),
(re.compile(r"(?i)\b(user(?:name)?|uid)\s*[=:]\s*\S+"), r"\1=[USER]"),
(re.compile(r"(?i)\bhost\s*[=:]\s*\S+"), "host=[HOST]"),
(re.compile(r"(?i)\bpassword\s*[=:]\s*\S+"), "password=[REDACTED]"),
]
def _redact_text(text: str) -> str:
for pattern, replacement in _REDACT_PATTERNS:
text = pattern.sub(replacement, text)
return text
def _row_to_incident(row: sqlite3.Row) -> Incident:
return Incident(
@ -142,6 +157,7 @@ def build_bundle(
incident: Incident,
source_host: str,
limit: int = 200,
sanitize: bool = False,
) -> dict:
"""Assemble a labeled bundle: incident metadata + related log entries."""
entries = get_incident_entries(db_path, incident, limit=limit)
@ -149,6 +165,7 @@ def build_bundle(
"bundle_version": 1,
"source_host": source_host,
"bundled_at": now_iso(),
"sanitized": sanitize,
"incident": {
"id": incident.id,
"label": incident.label,
@ -164,7 +181,7 @@ def build_bundle(
"source_id": e.source_id,
"timestamp_iso": e.timestamp_iso,
"severity": e.severity,
"text": e.text,
"text": _redact_text(e.text) if sanitize else e.text,
"matched_patterns": list(e.matched_patterns),
}
for e in entries
@ -172,6 +189,51 @@ def build_bundle(
}
def record_sent_bundle(db_path: Path, incident_id: str, bundle: dict, sanitized: bool) -> SentBundle:
"""Log an outgoing bundle export to the sent_bundles table."""
record = SentBundle(
id=str(uuid.uuid4()),
incident_id=incident_id,
exported_at=now_iso(),
sanitized=sanitized,
entry_count=len(bundle.get("log_entries", [])),
bundle_json=json.dumps(bundle),
)
conn = sqlite3.connect(str(db_path), timeout=30.0)
conn.execute("PRAGMA journal_mode=WAL")
conn.execute(
"INSERT INTO sent_bundles (id, incident_id, exported_at, sanitized, entry_count, bundle_json) "
"VALUES (?, ?, ?, ?, ?, ?)",
(record.id, record.incident_id, record.exported_at, int(record.sanitized),
record.entry_count, record.bundle_json),
)
conn.commit()
conn.close()
return record
def list_sent_bundles(db_path: Path) -> list[SentBundle]:
conn = sqlite3.connect(str(db_path), timeout=30.0)
conn.execute("PRAGMA journal_mode=WAL")
conn.row_factory = sqlite3.Row
rows = conn.execute(
"SELECT id, incident_id, exported_at, sanitized, entry_count, bundle_json "
"FROM sent_bundles ORDER BY exported_at DESC"
).fetchall()
conn.close()
return [
SentBundle(
id=r["id"],
incident_id=r["incident_id"],
exported_at=r["exported_at"],
sanitized=bool(r["sanitized"]),
entry_count=r["entry_count"],
bundle_json=r["bundle_json"],
)
for r in rows
]
def store_bundle(db_path: Path, bundle: dict) -> ReceivedBundle:
"""Store an incoming bundle from a remote Turnstone instance."""
inc = bundle.get("incident", {})

View file

@ -60,3 +60,15 @@ class ReceivedBundle:
bundled_at: str
entry_count: int
bundle_json: str # full bundle serialized as JSON string
@dataclass(frozen=True)
class SentBundle:
"""A record of a bundle exported or sent from this instance."""
id: str
incident_id: str
exported_at: str
sanitized: bool
entry_count: int
bundle_json: str

134
app/services/nl_source.py Normal file
View file

@ -0,0 +1,134 @@
"""Natural-language log source interpretation (LLM path for #53).
BSL-gated feature: the structured form fallback is MIT; the LLM interpretation
requires the LLM service to be configured. The caller always validates the
output against the source schema before writing anything.
"""
from __future__ import annotations
import json
import logging
import re
from typing import Any
import httpx
logger = logging.getLogger(__name__)
_SYSTEM_PROMPT = """\
You are a Turnstone log-source configuration assistant.
The operator will describe a log source in plain English.
Respond ONLY with a JSON object matching this schema no prose, no markdown:
{
"id": "short-kebab-case identifier",
"type": "file" | "journald" | "docker",
"path": "/absolute/path (file type only)",
"container": "container-name (docker type only)",
"runtime": "docker" | "podman" (docker type only, default docker)",
"unit": "service.service (journald type only, omit for all-journal)",
"label": "Human-readable name for the UI"
}
Rules:
- For well-known apps (nginx, apache, caddy, sonarr, radarr, qbittorrent, plex, jellyfin),
use the conventional default log path.
- If the operator mentions a Docker/Podman container, use type=docker.
- If the operator mentions journald or a systemd service, use type=journald.
- If uncertain, use type=file with the most likely path.
- The "id" must be lowercase, hyphens only (no spaces, slashes, dots).
- Never include trailing commas or comments in your JSON.
"""
# Well-known path lookup for common apps — used as a deterministic fallback
_KNOWN_APPS: dict[str, dict[str, Any]] = {
"nginx": {"id": "nginx-access", "type": "file", "path": "/var/log/nginx/access.log"},
"apache": {"id": "apache", "type": "file", "path": "/var/log/apache2/access.log"},
"caddy": {"id": "caddy", "type": "file", "path": "/var/log/caddy/access.log"},
"sonarr": {"id": "sonarr", "type": "file", "path": "/var/log/sonarr/sonarr.0.txt"},
"radarr": {"id": "radarr", "type": "file", "path": "/var/log/radarr/radarr.0.txt"},
"qbittorrent": {"id": "qbittorrent", "type": "file", "path": "/var/log/qbittorrent/qbittorrent.log"},
"plex": {"id": "plex", "type": "file", "path": "/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Logs/Plex Media Server.log"},
"jellyfin": {"id": "jellyfin", "type": "file", "path": "/var/log/jellyfin/jellyfin.log"},
"syslog": {"id": "syslog", "type": "file", "path": "/var/log/syslog"},
"auth": {"id": "auth", "type": "file", "path": "/var/log/auth.log"},
"fail2ban": {"id": "fail2ban", "type": "file", "path": "/var/log/fail2ban.log"},
"docker": {"id": "docker-daemon", "type": "file", "path": "/var/log/docker.log"},
"journal": {"id": "journal", "type": "journald"},
"journald": {"id": "journal", "type": "journald"},
"systemd": {"id": "journal", "type": "journald"},
}
def _keyword_match(description: str) -> dict[str, Any] | None:
"""Try a simple keyword match before spending an LLM call."""
lower = description.lower()
for keyword, template in _KNOWN_APPS.items():
if keyword in lower:
result = dict(template)
result.setdefault("label", keyword.capitalize() + " log")
return result
return None
def _extract_json(text: str) -> dict[str, Any] | None:
"""Pull the first {...} block out of an LLM response."""
match = re.search(r"\{[^{}]+\}", text, re.DOTALL)
if not match:
return None
try:
return json.loads(match.group())
except json.JSONDecodeError:
return None
def interpret(
description: str,
llm_url: str | None,
llm_model: str | None,
api_key: str | None = None,
timeout: float = 30.0,
) -> dict[str, Any] | None:
"""Interpret a natural-language source description.
Returns a source dict or None if interpretation fails.
The caller must validate the result with discover.validate_source()
before writing anything to disk.
"""
# 1. Keyword shortcut — no LLM needed for well-known apps
kw = _keyword_match(description)
if kw:
logger.debug("NL source: keyword match for %r", description)
return kw
# 2. LLM path
if not llm_url or not llm_model:
logger.debug("NL source: no LLM configured, returning None")
return None
messages = [
{"role": "system", "content": _SYSTEM_PROMPT},
{"role": "user", "content": description},
]
headers = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
try:
resp = httpx.post(
f"{llm_url.rstrip('/')}/v1/chat/completions",
json={"model": llm_model, "messages": messages, "stream": False, "max_tokens": 256},
headers=headers,
timeout=timeout,
)
resp.raise_for_status()
content = resp.json()["choices"][0]["message"]["content"]
parsed = _extract_json(content)
if parsed:
parsed.setdefault("label", description[:60])
return parsed
logger.warning("NL source: could not extract JSON from LLM response")
except Exception as exc:
logger.warning("NL source: LLM call failed (%s): %s", type(exc).__name__, exc)
return None

View file

@ -0,0 +1,154 @@
# Turnstone Compliance Checklist
**Last reviewed:** 2026-05-28
**Applies to:** All deployments handling log data in compliance-sensitive environments.
Symbols: ✅ satisfied by code, ⚙️ operator action required, ⚠️ known limitation, 🔲 not implemented.
---
## Data Isolation
### Source-level query isolation
**`source_filter` enforced on all log-returning endpoints.**
Every endpoint that returns log entries accepts a `source` parameter. Both the FTS5 keyword search path and the time-window scan path apply `source_id LIKE ?` before returning results. No cross-source data leakage is possible through the API.
Relevant code: `app/services/search.py``search()` and `entries_in_window()`.
### FTS5 cross-source leakage
✅ **FTS5 index includes `source_id` as an UNINDEXED column; all queries filter on it.**
The virtual table schema stores `source_id` alongside each entry. Query functions always join back to the base table or filter the FTS result set by `source_id`. There is no full-corpus FTS path that ignores source.
### SQLite file permissions
⚙️ **Operator responsibility — not enforced by Turnstone.**
Turnstone does not set file permissions on the database. Recommended posture for multi-user hosts:
```bash
# Restrict DB to the Turnstone process user only
chmod 600 /devl/turnstone-cluster/data/turnstone.db
chmod 600 /devl/turnstone-cluster/data/turnstone-context.db
chown turnstone:turnstone /devl/turnstone-cluster/data/
```
Run Turnstone as a dedicated non-root user via systemd `User=turnstone`.
---
## Audit Logging
### API query logging
✅ **Implemented as FastAPI middleware (`turnstone.audit` logger).**
Every request to `/turnstone/api/*` is logged at INFO level with:
- Timestamp (from the logging handler)
- HTTP method
- Path + query string
- Response status code
- Request duration (ms)
Body content is never logged. Example output:
```
2026-05-28 14:23:01 INFO turnstone.audit GET /turnstone/api/diagnose/stream?source=heimdall-journal 200 1843ms
```
To capture audit logs to a separate file, configure the `turnstone.audit` logger in your logging config:
```python
# In your uvicorn startup or log config YAML:
logging.getLogger("turnstone.audit").addHandler(
logging.FileHandler("/var/log/turnstone/audit.log")
)
```
### Glean operation logging
✅ **Glean scheduler logs source ID, entry count, and duration at INFO level.**
Relevant logger: `app.tasks.glean_scheduler` — logs start, per-source stats, and errors.
Log example:
```
INFO app.tasks.glean_scheduler Batch glean complete in 12.4s — {'heimdall-journal': 847, 'plex': 12}
```
### Error logging
✅ **Errors logged with source context but without PII in message fields.**
Exception handlers in `rest.py` log at ERROR level with the endpoint path and error type. Raw log entry text is not included in error messages. Stack traces go to the `uvicorn.error` logger.
---
## LLM / PII Egress
### Multi-agent pipeline (recommended path, `TURNSTONE_MULTI_AGENT_DIAGNOSE=true`)
✅ **Raw log message text is NOT sent to the LLM.**
Stage 5 (synthesizer) sends only:
- The operator's query string
- Timeline statistics (cluster counts, burst counts, gap counts — no entry text)
- Hypothesis titles from Stage 3 (derived labels, not raw messages)
- Runbook context from the operator's own uploaded documents
No raw `MESSAGE` field content reaches the LLM in this path. Review: `app/services/diagnose/synthesizer.py`.
### Legacy single-call path (`TURNSTONE_MULTI_AGENT_DIAGNOSE` unset or `false`)
⚠️ **Raw log message text (truncated to 200 chars) IS sent to the LLM.**
The legacy `summarize()` function in `app/services/llm.py` builds a prompt that includes up to 25 log entries with their `text` field (truncated). If log entries contain hostnames, usernames, IP addresses, or other PII, those values are included in the LLM call.
**Operator action for PII-sensitive deployments:** Enable `TURNSTONE_MULTI_AGENT_DIAGNOSE=true` to use the pipeline path, which does not expose raw log text.
### Avocet harvester (corpus export)
✅ **Only pattern-tagged entries are exported; export can be disabled.**
The harvester (`harvester/harvester.py`) only POSTs entries that matched at least one named pattern. It does not export the full corpus. Disable by leaving `TURNSTONE_SUBMIT_ENDPOINT` unset (the default).
### External telemetry
**None.** Turnstone makes no calls to Sentry, Segment, Amplitude, or any analytics service. The only outbound network calls are:
- Your configured `GPU_SERVER_URL` (LLM inference, operator-controlled)
- HuggingFace Hub (model downloads — disable with `TURNSTONE_OFFLINE_MODE=1`)
- SSH connections to configured remote log sources (operator-defined)
---
## Configuration Hardening
For compliance deployments, set these in `.env`:
```bash
# Block HuggingFace network access (model weights pre-downloaded)
TURNSTONE_OFFLINE_MODE=1
# Require bearer token for all API calls
TURNSTONE_API_KEY=<strong-random-token>
# Use multi-agent pipeline (no raw log text to LLM)
TURNSTONE_MULTI_AGENT_DIAGNOSE=true
# Disable Avocet corpus push if not needed
# (leave TURNSTONE_SUBMIT_ENDPOINT unset)
```
---
## Outstanding Items
🔲 **Per-user access control** — all authenticated clients share the same API key. There is no per-user identity, role separation, or per-source ACL. Track as a future enhancement.
🔲 **Audit log retention policy** — Turnstone writes audit events to the logging system but does not manage log rotation or retention. Operator must configure log rotation (logrotate, systemd journal limits, etc.).
🔲 **Encrypted DB at rest** — SQLite does not support transparent encryption. For encryption at rest, use full-disk encryption (LUKS) or an encrypted filesystem on the host.
🔲 **TLS between client and Turnstone** — Turnstone binds to HTTP by default. For production, place Caddy or nginx in front and terminate TLS there. Do not expose port 8534 directly over untrusted networks.
---
## Data Subject Rights (GDPR / CCPA)
### Right to erasure — anonymized records
⚠️ **Anonymized log data cannot be selectively deleted on a per-subject basis.**
When PII sanitization is applied to a bundle export (redacting IP addresses, usernames, hostnames), the resulting data is no longer linked to a specific data subject. As a consequence, Turnstone cannot identify which stored log entries relate to that subject and cannot fulfill a targeted deletion request for records that have already been anonymized.
**Operators must clearly disclose this limitation to data subjects before export:**
> "Anonymized log data exported or submitted from this system cannot be individually identified or selectively deleted. If data was exported in anonymized form, Turnstone cannot distinguish your records from others in the exported set. The right to erasure does not apply to data that is no longer personally identifiable."
This is consistent with GDPR Recital 26, which excludes anonymized data from the regulation's scope. However, the original (pre-anonymization) records in Turnstone's local SQLite database *can* be deleted by source ID via the Sources view (Delete all entries for source) or directly via the database.
**Recommended operator practice:**
- Maintain a log of which bundles were exported, when, and to whom — the audit log (`turnstone.audit`) covers this.
- Provide data subjects with the bundle export timestamp and source scope so they can verify what was shared.
- For full erasure of pre-anonymization records: use `DELETE /api/sources/{source_id}` to purge all entries for a given source from the local DB.

View file

@ -0,0 +1,421 @@
<template>
<div class="rounded border border-accent bg-surface-raised p-6 sm:p-8 max-w-2xl mx-auto">
<!-- Step indicator -->
<div class="flex items-center gap-2 mb-6">
<span v-for="(label, i) in stepLabels" :key="i" class="flex items-center gap-2">
<span
class="w-6 h-6 rounded-full flex items-center justify-center text-xs font-semibold border"
:class="i + 1 === step
? 'bg-accent text-bg border-accent'
: i + 1 < step
? 'bg-accent/20 text-accent border-accent/40'
: 'bg-surface text-text-dim border-surface-border'"
>{{ i + 1 }}</span>
<span class="text-xs hidden sm:inline" :class="i + 1 === step ? 'text-text-primary' : 'text-text-dim'">{{ label }}</span>
<span v-if="i < stepLabels.length - 1" class="text-text-dim text-xs"></span>
</span>
</div>
<!-- Step 1: Detect -->
<div v-if="step === 1">
<h2 class="text-text-primary text-base font-semibold mb-1">Detecting log sources</h2>
<p class="text-text-dim text-sm mb-5">Turnstone is scanning for available log sources on this host.</p>
<div v-if="discovering" class="flex items-center gap-2 text-text-dim text-sm py-4">
<svg class="animate-spin w-4 h-4 text-accent" viewBox="0 0 24 24" fill="none">
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"/>
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8v8z"/>
</svg>
Scanning
</div>
<div v-else-if="discoverError" class="text-sev-error text-sm py-4">
{{ discoverError }}
<button @click="runDiscover" class="ml-2 underline text-accent text-xs">Retry</button>
</div>
<div v-else>
<div v-if="candidates.length === 0" class="text-text-dim text-sm py-3 mb-4">
No sources auto-detected. You can add sources manually in the next step.
</div>
<div v-else class="space-y-1 text-sm mb-4">
<div v-for="g in groups" :key="g.type" class="flex items-center gap-2 text-text-muted">
<span class="font-mono text-xs text-text-dim px-1.5 py-0.5 rounded border border-surface-border">{{ g.type }}</span>
<span><strong class="text-text-primary">{{ g.items.length }}</strong> {{ g.label }}</span>
</div>
</div>
<div class="flex justify-between items-center mt-6">
<a @click.prevent="$emit('skip')" href="#" class="text-text-dim text-xs hover:text-text-muted">
Skip I'll edit sources.yaml manually
</a>
<button @click="step = 2" class="btn-primary text-sm">Continue </button>
</div>
</div>
</div>
<!-- Step 2: Select -->
<div v-if="step === 2">
<h2 class="text-text-primary text-base font-semibold mb-1">Select log sources</h2>
<p class="text-text-dim text-sm mb-4">Choose which sources to monitor. You can add more later.</p>
<!-- Grouped source list -->
<div class="space-y-3 mb-4">
<div v-for="g in groups" :key="g.type" class="rounded border border-surface-border overflow-hidden">
<!-- Group header -->
<div class="flex items-center gap-3 px-3 py-2 bg-surface border-b border-surface-border">
<button @click="toggleGroupOpen(g.type)" class="flex items-center gap-2 flex-1 min-w-0 text-left">
<span class="text-text-dim text-xs">{{ groupOpen[g.type] ? '▾' : '▸' }}</span>
<span class="text-text-primary text-sm font-medium">{{ g.label }}</span>
<span class="text-text-dim text-xs">({{ g.items.length }})</span>
<span v-if="groupSelectedCount(g.type) > 0" class="text-accent text-xs ml-1">
{{ groupSelectedCount(g.type) }} selected
</span>
</button>
<div class="flex items-center gap-2 shrink-0">
<button
@click="selectGroup(g.type)"
class="text-xs px-2 py-0.5 rounded border border-surface-border text-text-dim hover:text-accent hover:border-accent transition-colors"
>All</button>
<button
@click="deselectGroup(g.type)"
class="text-xs px-2 py-0.5 rounded border border-surface-border text-text-dim hover:text-sev-error hover:border-sev-error transition-colors"
>None</button>
</div>
</div>
<!-- Group items -->
<div v-if="groupOpen[g.type]" class="divide-y divide-surface-border max-h-64 overflow-y-auto">
<label
v-for="c in g.items"
:key="c.id"
class="flex items-start gap-3 px-3 py-2.5 cursor-pointer transition-colors"
:class="isSelected(c) ? 'bg-accent/5' : 'hover:bg-surface'"
>
<input
type="checkbox"
:checked="isSelected(c)"
@change="toggleCandidate(c)"
class="mt-0.5 accent-accent shrink-0"
/>
<div class="min-w-0 flex-1">
<div class="text-text-primary text-sm">{{ c.label }}</div>
<div v-if="c.path" class="font-mono text-xs text-text-dim mt-0.5 truncate">{{ c.path }}</div>
<div v-else-if="c.container" class="font-mono text-xs text-text-dim mt-0.5">{{ c.container }}</div>
</div>
</label>
</div>
</div>
</div>
<!-- NL / manual add -->
<div class="border border-surface-border rounded p-4 mb-4">
<p class="text-text-muted text-xs font-medium mb-2">Add a source by description</p>
<div class="flex gap-2">
<input
v-model="nlDescription"
type="text"
placeholder="e.g. nginx access log, qbittorrent, sonarr"
class="flex-1 bg-surface border border-surface-border rounded px-3 py-1.5 text-sm text-text-primary placeholder-text-dim focus:outline-none focus:border-accent"
@keydown.enter="interpretNL"
/>
<button
@click="interpretNL"
:disabled="!nlDescription.trim() || interpreting"
class="btn-secondary text-xs px-3 disabled:opacity-40 disabled:cursor-not-allowed"
>{{ interpreting ? '…' : 'Add' }}</button>
</div>
<div v-if="nlError" class="text-sev-error text-xs mt-2">{{ nlError }}</div>
<div v-if="showManualForm" class="mt-3 space-y-2">
<p class="text-text-dim text-xs">Couldn't interpret that fill in manually:</p>
<div class="flex gap-2">
<input v-model="manualId" placeholder="id (e.g. nginx)" class="flex-1 input-sm" />
<input v-model="manualPath" placeholder="/path/to/log.txt" class="flex-1 input-sm" />
</div>
<button @click="addManual" class="btn-secondary text-xs mt-1">Add manually</button>
</div>
</div>
<div class="flex justify-between items-center">
<button @click="step = 1" class="text-text-dim text-xs hover:text-text-muted"> Back</button>
<div class="flex items-center gap-3">
<span class="text-text-dim text-xs">
{{ selected.length }} source{{ selected.length === 1 ? '' : 's' }} selected
</span>
<button
@click="step = 3"
:disabled="selected.length === 0"
class="btn-primary text-sm disabled:opacity-40 disabled:cursor-not-allowed"
>Review </button>
</div>
</div>
</div>
<!-- Step 3: Confirm -->
<div v-if="step === 3">
<h2 class="text-text-primary text-base font-semibold mb-1">Confirm and write</h2>
<p class="text-text-dim text-sm mb-4">Review the <code class="bg-surface px-1 rounded">sources.yaml</code> that will be written.</p>
<pre class="bg-surface border border-surface-border rounded p-3 text-xs font-mono text-text-primary overflow-x-auto max-h-64 mb-5 whitespace-pre">{{ previewYaml }}</pre>
<div v-if="writeError" class="text-sev-error text-sm mb-4">{{ writeError }}</div>
<div v-if="writeSuccess" class="text-green-400 text-sm mb-4">{{ writeSuccess }}</div>
<div class="flex justify-between items-center">
<button @click="step = 2" class="text-text-dim text-xs hover:text-text-muted"> Back</button>
<button
@click="writeAndFinish"
:disabled="writing"
class="btn-primary text-sm disabled:opacity-40 disabled:cursor-not-allowed"
>{{ writing ? 'Writing…' : 'Write sources.yaml' }}</button>
</div>
</div>
</div>
</template>
<script setup lang="ts">
import { ref, computed, reactive, onMounted } from 'vue'
const BASE = import.meta.env.BASE_URL.replace(/\/$/, '')
const emit = defineEmits<{ done: []; skip: [] }>()
interface Candidate {
type: string
id: string
label: string
description: string
path?: string
container?: string
runtime?: string
unit?: string
available: boolean
}
interface Group {
type: string
label: string
items: Candidate[]
}
const GROUP_META: Record<string, { label: string; order: number; defaultOpen: boolean; preselect: boolean }> = {
journald: { label: 'System journal', order: 0, defaultOpen: true, preselect: true },
file: { label: 'Log files', order: 1, defaultOpen: true, preselect: true },
docker: { label: 'Docker containers', order: 2, defaultOpen: false, preselect: false },
}
const stepLabels = ['Detect', 'Select', 'Confirm']
const step = ref(1)
const discovering = ref(false)
const discoverError = ref<string | null>(null)
const candidates = ref<Candidate[]>([])
const selected = ref<Candidate[]>([])
// Track which groups are expanded
const groupOpen = reactive<Record<string, boolean>>({})
const groups = computed<Group[]>(() => {
const map: Record<string, Candidate[]> = {}
for (const c of candidates.value) {
;(map[c.type] ??= []).push(c)
}
return Object.entries(map)
.map(([type, items]) => ({
type,
label: GROUP_META[type]?.label ?? type,
items,
}))
.sort((a, b) => (GROUP_META[a.type]?.order ?? 99) - (GROUP_META[b.type]?.order ?? 99))
})
function groupSelectedCount(type: string): number {
const group = groups.value.find(g => g.type === type)
if (!group) return 0
return group.items.filter(c => isSelected(c)).length
}
function toggleGroupOpen(type: string) {
groupOpen[type] = !groupOpen[type]
}
function selectGroup(type: string) {
const group = groups.value.find(g => g.type === type)
if (!group) return
const newIds = new Set(selected.value.map(s => s.id))
const additions = group.items.filter(c => !newIds.has(c.id))
selected.value = [...selected.value, ...additions]
groupOpen[type] = true
}
function deselectGroup(type: string) {
const group = groups.value.find(g => g.type === type)
if (!group) return
const removeIds = new Set(group.items.map(c => c.id))
selected.value = selected.value.filter(s => !removeIds.has(s.id))
}
// NL / manual add
const nlDescription = ref('')
const interpreting = ref(false)
const nlError = ref<string | null>(null)
const showManualForm = ref(false)
const manualId = ref('')
const manualPath = ref('')
// Write
const writing = ref(false)
const writeError = ref<string | null>(null)
const writeSuccess = ref<string | null>(null)
const previewYaml = computed(() => {
if (!selected.value.length) return '# No sources selected'
const lines = ['sources:']
for (const src of selected.value) {
if (src.type === 'journald') {
lines.push(` - id: ${src.id}`)
lines.push(` type: journald`)
if (src.unit) lines.push(` unit: ${src.unit}`)
} else if (src.type === 'docker') {
lines.push(` - id: ${src.id}`)
lines.push(` type: docker`)
lines.push(` runtime: ${src.runtime ?? 'docker'}`)
lines.push(` container: ${src.container ?? src.id.split(':').pop()}`)
} else {
lines.push(` - id: ${src.id}`)
lines.push(` path: ${src.path}`)
}
}
return lines.join('\n')
})
function isSelected(c: Candidate): boolean {
return selected.value.some(s => s.id === c.id)
}
function toggleCandidate(c: Candidate) {
if (isSelected(c)) {
selected.value = selected.value.filter(s => s.id !== c.id)
} else {
selected.value = [...selected.value, c]
}
}
async function runDiscover() {
discovering.value = true
discoverError.value = null
try {
const res = await fetch(`${BASE}/api/setup/discover`)
if (!res.ok) throw new Error(`HTTP ${res.status}`)
const data = await res.json()
candidates.value = data.candidates ?? []
// Initialise group open state and pre-selection per group meta
for (const [type, meta] of Object.entries(GROUP_META)) {
groupOpen[type] = meta.defaultOpen
}
// Any type not in GROUP_META gets collapsed by default
for (const c of candidates.value) {
if (!(c.type in groupOpen)) groupOpen[c.type] = false
}
// Pre-select only groups where preselect = true
selected.value = candidates.value.filter(c => GROUP_META[c.type]?.preselect ?? false)
} catch (e: any) {
discoverError.value = e.message ?? 'Discovery failed'
} finally {
discovering.value = false
}
}
async function interpretNL() {
if (!nlDescription.value.trim()) return
interpreting.value = true
nlError.value = null
showManualForm.value = false
try {
const res = await fetch(`${BASE}/api/setup/interpret`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ description: nlDescription.value }),
})
const data = await res.json()
if (data.source) {
const candidate: Candidate = { available: true, ...data.source }
// Add to candidates so it appears in the correct group
if (!candidates.value.some(c => c.id === candidate.id)) {
candidates.value = [...candidates.value, candidate]
if (!(candidate.type in groupOpen)) groupOpen[candidate.type] = true
}
if (!isSelected(candidate)) selected.value = [...selected.value, candidate]
nlDescription.value = ''
} else {
showManualForm.value = true
nlError.value = data.validation_error
? `Validation: ${data.validation_error}`
: 'Could not interpret — fill in manually below.'
}
} catch {
showManualForm.value = true
nlError.value = 'Interpretation failed — fill in manually below.'
} finally {
interpreting.value = false
}
}
function addManual() {
if (!manualId.value.trim() || !manualPath.value.trim()) return
const candidate: Candidate = {
type: 'file',
id: manualId.value.trim(),
path: manualPath.value.trim(),
label: manualId.value.trim(),
description: `Read from ${manualPath.value.trim()}`,
available: true,
}
if (!candidates.value.some(c => c.id === candidate.id)) {
candidates.value = [...candidates.value, candidate]
groupOpen['file'] = true
}
if (!isSelected(candidate)) selected.value = [...selected.value, candidate]
manualId.value = ''
manualPath.value = ''
showManualForm.value = false
nlDescription.value = ''
nlError.value = null
}
async function writeAndFinish() {
writing.value = true
writeError.value = null
try {
const res = await fetch(`${BASE}/api/setup/write`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ sources: selected.value }),
})
if (!res.ok) {
const err = await res.json().catch(() => ({ detail: res.statusText }))
writeError.value = err.detail ?? 'Write failed'
return
}
const data = await res.json()
writeSuccess.value = `Wrote ${data.written} source${data.written === 1 ? '' : 's'} to sources.yaml.`
setTimeout(() => emit('done'), 1200)
} catch (e: any) {
writeError.value = e.message ?? 'Network error'
} finally {
writing.value = false
}
}
onMounted(runDiscover)
</script>
<style scoped>
.input-sm {
@apply bg-surface border border-surface-border rounded px-2 py-1 text-xs text-text-primary focus:outline-none focus:border-accent;
}
</style>

View file

@ -3,10 +3,29 @@
<!-- Header -->
<div class="mb-6">
<h1 class="text-text-primary text-xl font-semibold mb-1">Received Bundles</h1>
<p class="text-text-dim text-sm">Labeled incident bundles sent from remote Turnstone instances. Use these to build detection signatures.</p>
<h1 class="text-text-primary text-xl font-semibold mb-1">Bundles</h1>
<p class="text-text-dim text-sm">Incident bundles sent to and received from remote Turnstone instances.</p>
</div>
<!-- Tabs -->
<div class="flex gap-1 mb-5 border-b border-surface-border">
<button
v-for="tab in tabs"
:key="tab.key"
@click="activeTab = tab.key"
class="px-4 py-2 text-sm font-medium border-b-2 -mb-px transition-colors"
:class="activeTab === tab.key
? 'border-accent text-accent'
: 'border-transparent text-text-dim hover:text-text-muted'"
>
{{ tab.label }}
<span v-if="tab.key === 'received' && bundles.length" class="ml-1.5 text-xs bg-surface px-1.5 py-0.5 rounded text-text-dim border border-surface-border">{{ bundles.length }}</span>
<span v-if="tab.key === 'sent' && sentBundles.length" class="ml-1.5 text-xs bg-surface px-1.5 py-0.5 rounded text-text-dim border border-surface-border">{{ sentBundles.length }}</span>
</button>
</div>
<!-- RECEIVED TAB -->
<div v-if="activeTab === 'received'">
<div v-if="loading" class="text-text-dim py-8 text-center text-sm">Loading</div>
<div v-else-if="bundles.length === 0" class="rounded border border-surface-border bg-surface-raised p-8 text-center">
@ -19,12 +38,11 @@
v-for="b in bundles"
:key="b.id"
class="rounded border bg-surface-raised overflow-hidden"
:class="selected?.id === b.id ? 'border-accent' : 'border-surface-border'"
:class="selectedReceived?.id === b.id ? 'border-accent' : 'border-surface-border'"
>
<!-- Bundle header row -->
<div
class="flex flex-wrap items-center gap-2 sm:gap-3 px-3 sm:px-4 py-3 cursor-pointer hover:bg-surface transition-colors"
@click="toggleBundle(b)"
@click="toggleReceived(b)"
>
<span class="font-mono text-xs text-accent bg-surface px-1.5 py-0.5 rounded border border-surface-border shrink-0">
{{ b.issue_type || 'untyped' }}
@ -34,11 +52,10 @@
<span class="px-2 py-0.5 rounded text-xs font-medium border shrink-0" :style="severityStyle(b.severity)">{{ b.severity }}</span>
<span class="text-text-dim text-xs shrink-0">{{ b.entry_count }} entries</span>
<span class="text-text-dim text-xs shrink-0 hidden sm:inline">{{ formatTs(b.bundled_at) }}</span>
<span class="text-text-dim text-xs shrink-0">{{ selected?.id === b.id ? '▲' : '▼' }}</span>
<span class="text-text-dim text-xs shrink-0">{{ selectedReceived?.id === b.id ? '▲' : '▼' }}</span>
</div>
<!-- Expanded entries -->
<div v-if="selected?.id === b.id" class="border-t border-surface-border">
<div v-if="selectedReceived?.id === b.id" class="border-t border-surface-border">
<div v-if="expandLoading" class="text-text-dim text-sm px-4 py-4">Loading entries</div>
<div v-else-if="expandedEntries.length === 0" class="text-text-dim text-sm px-4 py-4">No entries in bundle.</div>
<div v-else class="p-4 space-y-1 max-h-[32rem] overflow-y-auto">
@ -66,6 +83,72 @@
</div>
</div>
</div>
</div>
<!-- SENT TAB -->
<div v-if="activeTab === 'sent'">
<div v-if="sentLoading" class="text-text-dim py-8 text-center text-sm">Loading</div>
<div v-else-if="sentBundles.length === 0" class="rounded border border-surface-border bg-surface-raised p-8 text-center">
<p class="text-text-muted text-base mb-1">No bundles sent yet.</p>
<p class="text-text-dim text-sm">Bundles you export or send to a remote instance appear here for review.</p>
</div>
<div v-else class="space-y-3">
<div
v-for="s in sentBundles"
:key="s.id"
class="rounded border bg-surface-raised overflow-hidden"
:class="selectedSent?.id === s.id ? 'border-accent' : 'border-surface-border'"
>
<div
class="flex flex-wrap items-center gap-2 sm:gap-3 px-3 sm:px-4 py-3 cursor-pointer hover:bg-surface transition-colors"
@click="toggleSent(s)"
>
<span class="font-mono text-xs text-text-dim bg-surface px-1.5 py-0.5 rounded border border-surface-border shrink-0">
{{ sentIncidentLabel(s) }}
</span>
<span class="text-text-primary text-sm flex-1 min-w-0 truncate">{{ sentIncidentType(s) }}</span>
<span
class="px-2 py-0.5 rounded text-xs font-medium border shrink-0"
:class="s.sanitized ? 'text-green-400 border-green-400/30 bg-green-400/10' : 'text-text-dim border-surface-border'"
>
{{ s.sanitized ? 'sanitized' : 'raw' }}
</span>
<span class="text-text-dim text-xs shrink-0">{{ s.entry_count }} entries</span>
<span class="text-text-dim text-xs shrink-0 hidden sm:inline">{{ formatTs(s.exported_at) }}</span>
<button
@click.stop="redownloadSent(s)"
class="text-xs px-2 py-0.5 rounded border border-surface-border text-text-muted hover:text-accent hover:border-accent transition-colors shrink-0"
>
</button>
<span class="text-text-dim text-xs shrink-0">{{ selectedSent?.id === s.id ? '▲' : '▼' }}</span>
</div>
<div v-if="selectedSent?.id === s.id" class="border-t border-surface-border">
<div class="p-4 space-y-1 max-h-[32rem] overflow-y-auto">
<div class="flex items-center gap-2 mb-3">
<p class="text-text-dim text-xs">{{ sentExpandedEntries.length }} log entries (first 5 shown)</p>
<p v-if="s.sanitized" class="text-xs text-green-400 ml-auto">PII patterns redacted</p>
<p v-else class="text-xs text-text-dim ml-auto">Not sanitized contains raw log text</p>
</div>
<div
v-for="entry in sentExpandedEntries"
:key="entry.entry_id"
class="font-mono text-xs py-1 px-2 rounded bg-surface border border-surface-border"
>
<span class="text-text-dim mr-2">{{ shortTs(entry.timestamp_iso) }}</span>
<span :class="['mr-2', severityClass(entry.severity)]">{{ entry.severity || '?' }}</span>
<span class="text-text-muted">{{ lastPart(entry.source_id) }}</span>
<span class="text-text-dim mx-1">|</span>
<span class="text-text-primary">{{ entry.text.slice(0, 200) }}</span>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</template>
@ -87,6 +170,15 @@ interface BundleSummary {
bundle_json: string
}
interface SentBundleSummary {
id: string
incident_id: string
exported_at: string
sanitized: boolean
entry_count: number
bundle_json: string
}
interface LogEntry {
entry_id: string
source_id: string
@ -96,41 +188,69 @@ interface LogEntry {
matched_patterns: string[]
}
type TabKey = 'received' | 'sent'
const tabs: { key: TabKey; label: string }[] = [
{ key: 'received', label: 'Received' },
{ key: 'sent', label: 'Sent' },
]
const activeTab = ref<TabKey>('received')
// Received
const bundles = ref<BundleSummary[]>([])
const loading = ref(true)
const selected = ref<BundleSummary | null>(null)
const selectedReceived = ref<BundleSummary | null>(null)
const expandedEntries = ref<LogEntry[]>([])
const expandLoading = ref(false)
// Sent
const sentBundles = ref<SentBundleSummary[]>([])
const sentLoading = ref(true)
const selectedSent = ref<SentBundleSummary | null>(null)
const sentExpandedEntries = ref<LogEntry[]>([])
onMounted(async () => {
try {
const res = await fetch(`${BASE}/api/bundles`)
if (res.ok) bundles.value = (await res.json()).bundles
} finally {
const [recRes, sentRes] = await Promise.all([
fetch(`${BASE}/api/bundles`),
fetch(`${BASE}/api/sent-bundles`),
])
if (recRes.ok) bundles.value = (await recRes.json()).bundles
if (sentRes.ok) sentBundles.value = (await sentRes.json()).bundles
loading.value = false
}
sentLoading.value = false
})
async function toggleBundle(b: BundleSummary) {
if (selected.value?.id === b.id) {
selected.value = null
async function toggleReceived(b: BundleSummary) {
if (selectedReceived.value?.id === b.id) {
selectedReceived.value = null
expandedEntries.value = []
return
}
selected.value = b
selectedReceived.value = b
expandedEntries.value = []
expandLoading.value = true
try {
// bundle_json is stored inline parse it directly, no round-trip needed
const parsed = JSON.parse(b.bundle_json)
expandedEntries.value = parsed.log_entries ?? []
} catch {
expandLoading.value = false
} finally {
expandLoading.value = false
}
}
function toggleSent(s: SentBundleSummary) {
if (selectedSent.value?.id === s.id) {
selectedSent.value = null
sentExpandedEntries.value = []
return
}
selectedSent.value = s
try {
const parsed = JSON.parse(s.bundle_json)
sentExpandedEntries.value = (parsed.log_entries ?? []).slice(0, 5)
} catch {
sentExpandedEntries.value = []
}
}
function exportBundle(b: BundleSummary) {
const blob = new Blob([b.bundle_json], { type: 'application/json' })
const url = URL.createObjectURL(blob)
@ -141,6 +261,26 @@ function exportBundle(b: BundleSummary) {
URL.revokeObjectURL(url)
}
function redownloadSent(s: SentBundleSummary) {
const parsed = JSON.parse(s.bundle_json)
const label = parsed.incident?.issue_type || 'bundle'
const blob = new Blob([s.bundle_json], { type: 'application/json' })
const url = URL.createObjectURL(blob)
const a = document.createElement('a')
a.href = url
a.download = `sent-${label}-${s.id.slice(0, 8)}.json`
a.click()
URL.revokeObjectURL(url)
}
function sentIncidentLabel(s: SentBundleSummary): string {
try { return JSON.parse(s.bundle_json).incident?.label ?? s.incident_id.slice(0, 8) } catch { return s.incident_id.slice(0, 8) }
}
function sentIncidentType(s: SentBundleSummary): string {
try { return JSON.parse(s.bundle_json).incident?.issue_type || 'untyped' } catch { return 'untyped' }
}
function severityStyle(sev: string): Record<string, string> {
const k = sev?.toLowerCase() ?? 'low'
const known = ['low', 'medium', 'high', 'critical']

View file

@ -74,6 +74,10 @@
<span v-if="selected.issue_type" class="font-mono text-xs text-accent">{{ selected.issue_type }}</span>
</div>
<div class="flex flex-wrap items-center gap-2 sm:gap-3 mt-1 sm:mt-0">
<label class="flex items-center gap-1.5 text-xs text-text-dim cursor-pointer select-none">
<input type="checkbox" v-model="sanitizeBundle" class="accent-accent" />
Sanitize PII
</label>
<button
@click="sendBundle(selected.id)"
:disabled="sending"
@ -181,6 +185,7 @@ const selectedEntries = ref<Entry[]>([])
const entriesLoading = ref(false)
const sending = ref(false)
const sendStatus = ref<{ ok: boolean; msg: string } | null>(null)
const sanitizeBundle = ref(false)
async function selectIncident(inc: Incident) {
selected.value = inc
@ -202,15 +207,17 @@ async function sendBundle(id: string) {
sending.value = true
sendStatus.value = null
try {
const res = await fetch(`${BASE}/api/incidents/${id}/send`, { method: 'POST' })
const params = sanitizeBundle.value ? '?sanitize=true' : ''
const res = await fetch(`${BASE}/api/incidents/${id}/send${params}`, { method: 'POST' })
if (res.ok) {
const data = await res.json()
sendStatus.value = { ok: true, msg: `Sent ${data.entry_count} entries` }
const tag = sanitizeBundle.value ? ' (sanitized)' : ''
sendStatus.value = { ok: true, msg: `Sent ${data.entry_count} entries${tag}` }
} else {
const err = await res.json().catch(() => ({ detail: res.statusText }))
sendStatus.value = { ok: false, msg: err.detail ?? 'Send failed' }
}
} catch (e) {
} catch {
sendStatus.value = { ok: false, msg: 'Network error' }
} finally {
sending.value = false

View file

@ -5,11 +5,35 @@
<h1 class="text-text-primary text-xl font-semibold mb-1">Log Sources</h1>
<p class="text-text-dim text-sm">All hosts and services in the gleaned corpus.</p>
</div>
<label class="btn-secondary text-sm cursor-pointer shrink-0">
<div class="flex items-center gap-2 shrink-0">
<button
@click="showAddPanel = !showAddPanel"
class="btn-secondary text-sm"
>
+ Add Source
</button>
<label class="btn-secondary text-sm cursor-pointer">
<span>Upload log file</span>
<input type="file" class="hidden" @change="handleUpload" />
</label>
</div>
</div>
<!-- First-run wizard -->
<div v-if="showWizard" class="mb-6">
<SetupWizard
@done="onWizardDone"
@skip="showWizard = false; loadSources()"
/>
</div>
<!-- Post-setup Add Source panel (condensed wizard steps 1-2) -->
<div v-else-if="showAddPanel" class="mb-6">
<SetupWizard
@done="showAddPanel = false; loadSources()"
@skip="showAddPanel = false"
/>
</div>
<!-- Upload / action feedback -->
<div v-if="actionMsg" class="mb-4 text-sm rounded border px-4 py-2.5"
@ -17,14 +41,14 @@
{{ actionMsg }}
</div>
<div v-if="loading" class="text-text-dim py-8 text-center text-sm">Loading</div>
<div v-if="!showWizard && loading" class="text-text-dim py-8 text-center text-sm">Loading</div>
<div v-else-if="sources.length === 0" class="text-text-dim py-12 text-center">
<div v-else-if="!showWizard && sources.length === 0" class="text-text-dim py-12 text-center">
<p class="mb-1">No log sources found.</p>
<p class="text-sm">Run the glean pipeline: <code class="bg-surface-raised px-1 rounded">python scripts/glean_corpus.py</code></p>
<p class="text-sm">Use <strong>Add Source</strong> above or edit <code class="bg-surface-raised px-1 rounded">sources.yaml</code> directly.</p>
</div>
<div v-else class="rounded border border-surface-border overflow-hidden">
<div v-else-if="!showWizard && sources.length > 0" class="rounded border border-surface-border overflow-hidden">
<div class="overflow-x-auto">
<table class="w-full text-sm min-w-[620px]">
<thead class="bg-surface-raised border-b border-surface-border">
@ -121,6 +145,7 @@
<script setup lang="ts">
import { ref, onMounted } from 'vue'
import SetupWizard from '@/components/SetupWizard.vue'
// Unified source row shown in the table (merges configured + DB-only sources).
interface SourceRow {
@ -155,9 +180,28 @@ const loading = ref(true)
const busy = ref(new Set<string>())
const actionMsg = ref('')
const actionError = ref(false)
const showWizard = ref(false)
const showAddPanel = ref(false)
const BASE = import.meta.env.BASE_URL.replace(/\/$/, '')
async function checkSetupStatus(): Promise<void> {
try {
const res = await fetch(`${BASE}/api/setup/status`)
if (res.ok) {
const data = await res.json()
if (!data.configured) showWizard.value = true
}
} catch {
// If the check fails, don't block the page
}
}
function onWizardDone(): void {
showWizard.value = false
loadSources()
}
async function loadSources(): Promise<void> {
try {
// Primary list: configured sources from sources.yaml (enriched with DB stats).
@ -211,7 +255,10 @@ async function loadSources(): Promise<void> {
}
}
onMounted(loadSources)
onMounted(async () => {
await checkSetupStatus()
if (!showWizard.value) await loadSources()
})
function setBusy(id: string, on: boolean): void {
const next = new Set(busy.value)