Bundle export (#51): - _redact_text() with 5 compiled regex patterns (IPv4, email, user=, host=, password=) - build_bundle(sanitize=False) — per-entry redaction at export time - sent_bundles table tracks every outgoing export (GET and POST /send) - GET /api/sent-bundles exposes history; SentBundle model added - BundlesView: Received/Sent tabs, sanitized badge, 5-entry preview, re-download - IncidentsView: Sanitize PII checkbox next to Send Bundle Onboarding wizard (#52): - app/services/discover.py: journald/Docker/file detection (best-effort, safe in containers) - GET /api/setup/status, /discover, POST /api/setup/write (additive, appends to existing) - SetupWizard.vue: 3-step Detect → Select → Confirm - Step 1 shows grouped summary (journald/file/docker counts) - Step 2: collapsible groups with All/None section toggles - journald + file: pre-selected; docker: collapsed, none pre-selected - Step 3: YAML preview before write - SourcesView: shows wizard on first run; Add Source button reuses it NL source addition (#53): - app/services/nl_source.py: keyword shortcut (13 well-known apps) + LLM fallback - POST /api/setup/interpret: keyword → LLM → null (graceful fallback) - NL field in wizard step 2; manual form shown when interpretation fails - Added sources appear in grouped list immediately
134 lines
5.4 KiB
Python
134 lines
5.4 KiB
Python
"""Natural-language log source interpretation (LLM path for #53).
|
|
|
|
BSL-gated feature: the structured form fallback is MIT; the LLM interpretation
|
|
requires the LLM service to be configured. The caller always validates the
|
|
output against the source schema before writing anything.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import re
|
|
from typing import Any
|
|
|
|
import httpx
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_SYSTEM_PROMPT = """\
|
|
You are a Turnstone log-source configuration assistant.
|
|
The operator will describe a log source in plain English.
|
|
Respond ONLY with a JSON object matching this schema — no prose, no markdown:
|
|
|
|
{
|
|
"id": "short-kebab-case identifier",
|
|
"type": "file" | "journald" | "docker",
|
|
"path": "/absolute/path (file type only)",
|
|
"container": "container-name (docker type only)",
|
|
"runtime": "docker" | "podman" (docker type only, default docker)",
|
|
"unit": "service.service (journald type only, omit for all-journal)",
|
|
"label": "Human-readable name for the UI"
|
|
}
|
|
|
|
Rules:
|
|
- For well-known apps (nginx, apache, caddy, sonarr, radarr, qbittorrent, plex, jellyfin),
|
|
use the conventional default log path.
|
|
- If the operator mentions a Docker/Podman container, use type=docker.
|
|
- If the operator mentions journald or a systemd service, use type=journald.
|
|
- If uncertain, use type=file with the most likely path.
|
|
- The "id" must be lowercase, hyphens only (no spaces, slashes, dots).
|
|
- Never include trailing commas or comments in your JSON.
|
|
"""
|
|
|
|
# Well-known path lookup for common apps — used as a deterministic fallback
|
|
_KNOWN_APPS: dict[str, dict[str, Any]] = {
|
|
"nginx": {"id": "nginx-access", "type": "file", "path": "/var/log/nginx/access.log"},
|
|
"apache": {"id": "apache", "type": "file", "path": "/var/log/apache2/access.log"},
|
|
"caddy": {"id": "caddy", "type": "file", "path": "/var/log/caddy/access.log"},
|
|
"sonarr": {"id": "sonarr", "type": "file", "path": "/var/log/sonarr/sonarr.0.txt"},
|
|
"radarr": {"id": "radarr", "type": "file", "path": "/var/log/radarr/radarr.0.txt"},
|
|
"qbittorrent": {"id": "qbittorrent", "type": "file", "path": "/var/log/qbittorrent/qbittorrent.log"},
|
|
"plex": {"id": "plex", "type": "file", "path": "/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Logs/Plex Media Server.log"},
|
|
"jellyfin": {"id": "jellyfin", "type": "file", "path": "/var/log/jellyfin/jellyfin.log"},
|
|
"syslog": {"id": "syslog", "type": "file", "path": "/var/log/syslog"},
|
|
"auth": {"id": "auth", "type": "file", "path": "/var/log/auth.log"},
|
|
"fail2ban": {"id": "fail2ban", "type": "file", "path": "/var/log/fail2ban.log"},
|
|
"docker": {"id": "docker-daemon", "type": "file", "path": "/var/log/docker.log"},
|
|
"journal": {"id": "journal", "type": "journald"},
|
|
"journald": {"id": "journal", "type": "journald"},
|
|
"systemd": {"id": "journal", "type": "journald"},
|
|
}
|
|
|
|
|
|
def _keyword_match(description: str) -> dict[str, Any] | None:
|
|
"""Try a simple keyword match before spending an LLM call."""
|
|
lower = description.lower()
|
|
for keyword, template in _KNOWN_APPS.items():
|
|
if keyword in lower:
|
|
result = dict(template)
|
|
result.setdefault("label", keyword.capitalize() + " log")
|
|
return result
|
|
return None
|
|
|
|
|
|
def _extract_json(text: str) -> dict[str, Any] | None:
|
|
"""Pull the first {...} block out of an LLM response."""
|
|
match = re.search(r"\{[^{}]+\}", text, re.DOTALL)
|
|
if not match:
|
|
return None
|
|
try:
|
|
return json.loads(match.group())
|
|
except json.JSONDecodeError:
|
|
return None
|
|
|
|
|
|
def interpret(
|
|
description: str,
|
|
llm_url: str | None,
|
|
llm_model: str | None,
|
|
api_key: str | None = None,
|
|
timeout: float = 30.0,
|
|
) -> dict[str, Any] | None:
|
|
"""Interpret a natural-language source description.
|
|
|
|
Returns a source dict or None if interpretation fails.
|
|
The caller must validate the result with discover.validate_source()
|
|
before writing anything to disk.
|
|
"""
|
|
# 1. Keyword shortcut — no LLM needed for well-known apps
|
|
kw = _keyword_match(description)
|
|
if kw:
|
|
logger.debug("NL source: keyword match for %r", description)
|
|
return kw
|
|
|
|
# 2. LLM path
|
|
if not llm_url or not llm_model:
|
|
logger.debug("NL source: no LLM configured, returning None")
|
|
return None
|
|
|
|
messages = [
|
|
{"role": "system", "content": _SYSTEM_PROMPT},
|
|
{"role": "user", "content": description},
|
|
]
|
|
headers = {"Content-Type": "application/json"}
|
|
if api_key:
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
|
|
try:
|
|
resp = httpx.post(
|
|
f"{llm_url.rstrip('/')}/v1/chat/completions",
|
|
json={"model": llm_model, "messages": messages, "stream": False, "max_tokens": 256},
|
|
headers=headers,
|
|
timeout=timeout,
|
|
)
|
|
resp.raise_for_status()
|
|
content = resp.json()["choices"][0]["message"]["content"]
|
|
parsed = _extract_json(content)
|
|
if parsed:
|
|
parsed.setdefault("label", description[:60])
|
|
return parsed
|
|
logger.warning("NL source: could not extract JSON from LLM response")
|
|
except Exception as exc:
|
|
logger.warning("NL source: LLM call failed (%s): %s", type(exc).__name__, exc)
|
|
|
|
return None
|