Compare commits
No commits in common. "v0.6.1" and "main" have entirely different histories.
70 changed files with 8794 additions and 822 deletions
53
.env.example
53
.env.example
|
|
@ -10,7 +10,7 @@
|
||||||
# GPU_SERVER_URL — URL of your GPU inference server (Ollama, vLLM, or cf-orch coordinator).
|
# GPU_SERVER_URL — URL of your GPU inference server (Ollama, vLLM, or cf-orch coordinator).
|
||||||
# Paid+ users: leave unset to auto-default to https://orch.circuitforge.tech via CF_LICENSE_KEY.
|
# Paid+ users: leave unset to auto-default to https://orch.circuitforge.tech via CF_LICENSE_KEY.
|
||||||
# Local Ollama (default if unset): http://localhost:11434
|
# Local Ollama (default if unset): http://localhost:11434
|
||||||
# Local cf-orch coordinator: http://10.1.10.71:7700
|
# Local cf-orch coordinator: http://<YOUR_HOST_IP>:7700
|
||||||
# CF_ORCH_URL is also accepted as a backward-compatible alias.
|
# CF_ORCH_URL is also accepted as a backward-compatible alias.
|
||||||
# GPU_SERVER_URL=http://localhost:11434
|
# GPU_SERVER_URL=http://localhost:11434
|
||||||
|
|
||||||
|
|
@ -23,6 +23,15 @@
|
||||||
# Remote endpoint to push diagnostic bundles for escalation.
|
# Remote endpoint to push diagnostic bundles for escalation.
|
||||||
# TURNSTONE_BUNDLE_ENDPOINT=https://example.com/api/bundles
|
# TURNSTONE_BUNDLE_ENDPOINT=https://example.com/api/bundles
|
||||||
|
|
||||||
|
# --- Log corpus export to Avocet (optional) ---
|
||||||
|
# Push ERROR/CRITICAL entries and labeled incidents to the Avocet corpus endpoint
|
||||||
|
# for logreading fine-tune training. Requires a consent token issued by CF.
|
||||||
|
# Contact alan@circuitforge.tech to register your node and receive a token.
|
||||||
|
# Watermarks are stored at data/corpus_watermark.txt and data/incident_watermark.txt.
|
||||||
|
# AVOCET_CORPUS_ENDPOINT=https://avocet.circuitforge.tech/api/corpus/log-batch
|
||||||
|
# AVOCET_CONSENT_TOKEN=your-uuid-token-here
|
||||||
|
# TURNSTONE_SOURCE_HOST=my-server-name # defaults to system hostname if unset
|
||||||
|
|
||||||
# --- Periodic batch glean ---
|
# --- Periodic batch glean ---
|
||||||
# Seconds between automatic glean runs from sources.yaml. Set to 0 to disable.
|
# Seconds between automatic glean runs from sources.yaml. Set to 0 to disable.
|
||||||
# TURNSTONE_GLEAN_INTERVAL=900
|
# TURNSTONE_GLEAN_INTERVAL=900
|
||||||
|
|
@ -42,6 +51,32 @@
|
||||||
# TURNSTONE_EMBED_MODEL=BAAI/bge-small-en-v1.5
|
# TURNSTONE_EMBED_MODEL=BAAI/bge-small-en-v1.5
|
||||||
# TURNSTONE_EMBED_DEVICE=cpu
|
# TURNSTONE_EMBED_DEVICE=cpu
|
||||||
|
|
||||||
|
# --- Cybersec scoring pipeline (zero-shot, second-pass on flagged entries) ---
|
||||||
|
# Runs a zero-shot classifier on entries already flagged by the anomaly scorer
|
||||||
|
# or that have pattern matches — a focused second opinion using cybersec vocabulary.
|
||||||
|
# The DeBERTa-v3-base-mnli model (required by the diagnose pipeline) is the recommended
|
||||||
|
# zero-shot classifier — it produces human-readable cybersec labels with no fine-tuning.
|
||||||
|
# TURNSTONE_CYBERSEC_MODEL=MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli
|
||||||
|
# TURNSTONE_CYBERSEC_DEVICE=cpu
|
||||||
|
# TURNSTONE_CYBERSEC_THRESHOLD=0.60 # lower than anomaly threshold (zero-shot is calibrated differently)
|
||||||
|
|
||||||
|
# --- Anomaly scoring pipeline (IDS / watchdog) ---
|
||||||
|
# Batch-scores every ingested log entry after each glean cycle.
|
||||||
|
# Any HuggingFace text-classification model works; the byviz classifier (already
|
||||||
|
# required by the diagnose pipeline) is the recommended starting point.
|
||||||
|
# Detections above the threshold are inserted into the detections table and
|
||||||
|
# surfaced in the Security Alerts tab.
|
||||||
|
#
|
||||||
|
# Set TURNSTONE_ANOMALY_MODEL to enable; leave unset to disable (safe default).
|
||||||
|
# TURNSTONE_ANOMALY_MODEL=byviz/bylastic_classification_logs
|
||||||
|
# TURNSTONE_ANOMALY_DEVICE=cpu # or "cuda" / "mps" for GPU inference
|
||||||
|
# TURNSTONE_ANOMALY_THRESHOLD=0.80 # confidence floor for detection insertion
|
||||||
|
# TURNSTONE_ANOMALY_INTERVAL=0 # standalone loop (0 = glean-triggered only)
|
||||||
|
#
|
||||||
|
# HuggingFace model cache — share with the host to avoid re-downloading models.
|
||||||
|
# HF_HOME=/hf_cache # inside container (set in docker-compose)
|
||||||
|
# HF_CACHE_PATH=/Library/Assets/LLM # host bind-mount source (docker-compose only)
|
||||||
|
|
||||||
# --- Air-gapped / offline deployment ---
|
# --- Air-gapped / offline deployment ---
|
||||||
# Set to 1 to block all HuggingFace hub network access at runtime.
|
# Set to 1 to block all HuggingFace hub network access at runtime.
|
||||||
# Pre-download models to ~/.cache/huggingface/ before deploying — see docs/air-gapped-deployment.md.
|
# Pre-download models to ~/.cache/huggingface/ before deploying — see docs/air-gapped-deployment.md.
|
||||||
|
|
@ -51,3 +86,19 @@
|
||||||
# When set, all /api/ requests require: Authorization: Bearer <token>
|
# When set, all /api/ requests require: Authorization: Bearer <token>
|
||||||
# Generate a token: python -c "import secrets; print(secrets.token_urlsafe(32))"
|
# Generate a token: python -c "import secrets; print(secrets.token_urlsafe(32))"
|
||||||
# TURNSTONE_API_KEY=your-secret-token-here
|
# TURNSTONE_API_KEY=your-secret-token-here
|
||||||
|
|
||||||
|
# --- The Orchard (harvest receiver only) ---
|
||||||
|
# Set on the central harvest.circuitforge.tech instance to enable branch management.
|
||||||
|
# TURNSTONE_ORCHARD_ADMIN_KEY=your-admin-secret-here
|
||||||
|
# TURNSTONE_ORCHARD_DATA_ROOT=/devl/docker/turnstone-submissions
|
||||||
|
# TURNSTONE_ORCHARD_CADDYFILE=/devl/caddy-proxy/Caddyfile
|
||||||
|
# TURNSTONE_ORCHARD_CADDY_CONTAINER=caddy-proxy
|
||||||
|
# TURNSTONE_ORCHARD_HARVEST_HOST=https://harvest.circuitforge.tech
|
||||||
|
# TURNSTONE_ORCHARD_PORT_BASE=8538
|
||||||
|
# TURNSTONE_ORCHARD_IMAGE=localhost/turnstone:latest
|
||||||
|
|
||||||
|
# --- Orchard branch (submitting node) ---
|
||||||
|
# Set TURNSTONE_SUBMIT_ENDPOINT to push pattern-matched log entries to the harvest receiver.
|
||||||
|
# Generate your branch slug and API key via: POST /api/orchard/graft on the harvest instance.
|
||||||
|
# TURNSTONE_SUBMIT_ENDPOINT=https://harvest.circuitforge.tech/your-slug
|
||||||
|
# TURNSTONE_BRANCH_KEY=api-key-from-graft-response
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,13 @@
|
||||||
"""Context fact and document CRUD — MIT licensed."""
|
"""Context fact and document CRUD — MIT licensed."""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import sqlite3
|
|
||||||
import uuid
|
import uuid
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.db import get_conn, resolve_tenant_id
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class ContextFact:
|
class ContextFact:
|
||||||
|
|
@ -28,19 +29,8 @@ class ContextDocument:
|
||||||
uploaded_at: str
|
uploaded_at: str
|
||||||
|
|
||||||
|
|
||||||
def _connect(db_path: Path) -> sqlite3.Connection:
|
|
||||||
# timeout=30: retry for up to 30 s when another writer (e.g. the glean
|
|
||||||
# collector) holds a WAL write lock. PRAGMA busy_timeout is a SQLite-level
|
|
||||||
# hint that operates after the connection is open; the Python sqlite3 module's
|
|
||||||
# own retry loop is controlled solely by this timeout= argument.
|
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
|
||||||
conn.execute("PRAGMA foreign_keys=ON")
|
|
||||||
conn.row_factory = sqlite3.Row
|
|
||||||
return conn
|
|
||||||
|
|
||||||
|
|
||||||
def add_fact(db_path: Path, category: str, key: str, value: str, source: str | None = None) -> ContextFact:
|
def add_fact(db_path: Path, category: str, key: str, value: str, source: str | None = None) -> ContextFact:
|
||||||
|
tid = resolve_tenant_id()
|
||||||
fact = ContextFact(
|
fact = ContextFact(
|
||||||
id=str(uuid.uuid4()),
|
id=str(uuid.uuid4()),
|
||||||
category=category,
|
category=category,
|
||||||
|
|
@ -49,27 +39,28 @@ def add_fact(db_path: Path, category: str, key: str, value: str, source: str | N
|
||||||
source=source,
|
source=source,
|
||||||
created_at=datetime.now(timezone.utc).isoformat(),
|
created_at=datetime.now(timezone.utc).isoformat(),
|
||||||
)
|
)
|
||||||
conn = _connect(db_path)
|
with get_conn(db_path) as conn:
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"INSERT INTO context_facts(id, category, key, value, source, created_at) VALUES (?,?,?,?,?,?)",
|
"INSERT INTO context_facts(id, tenant_id, category, key, value, source, created_at) VALUES (?,?,?,?,?,?,?)",
|
||||||
(fact.id, fact.category, fact.key, fact.value, fact.source, fact.created_at),
|
(fact.id, tid, fact.category, fact.key, fact.value, fact.source, fact.created_at),
|
||||||
)
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
|
||||||
return fact
|
return fact
|
||||||
|
|
||||||
|
|
||||||
def list_facts(db_path: Path, category: str | None = None) -> list[ContextFact]:
|
def list_facts(db_path: Path, category: str | None = None) -> list[ContextFact]:
|
||||||
conn = _connect(db_path)
|
tid = resolve_tenant_id()
|
||||||
if category:
|
with get_conn(db_path) as conn:
|
||||||
rows = conn.execute(
|
if category:
|
||||||
"SELECT * FROM context_facts WHERE category=? ORDER BY created_at", (category,)
|
rows = conn.execute(
|
||||||
).fetchall()
|
"SELECT * FROM context_facts WHERE category=? AND (tenant_id=? OR tenant_id='') ORDER BY created_at",
|
||||||
else:
|
(category, tid),
|
||||||
rows = conn.execute(
|
).fetchall()
|
||||||
"SELECT * FROM context_facts ORDER BY category, created_at"
|
else:
|
||||||
).fetchall()
|
rows = conn.execute(
|
||||||
conn.close()
|
"SELECT * FROM context_facts WHERE (tenant_id=? OR tenant_id='') ORDER BY category, created_at",
|
||||||
|
(tid,),
|
||||||
|
).fetchall()
|
||||||
return [
|
return [
|
||||||
ContextFact(
|
ContextFact(
|
||||||
id=r["id"], category=r["category"], key=r["key"],
|
id=r["id"], category=r["category"], key=r["key"],
|
||||||
|
|
@ -80,10 +71,13 @@ def list_facts(db_path: Path, category: str | None = None) -> list[ContextFact]:
|
||||||
|
|
||||||
|
|
||||||
def delete_fact(db_path: Path, fact_id: str) -> bool:
|
def delete_fact(db_path: Path, fact_id: str) -> bool:
|
||||||
conn = _connect(db_path)
|
tid = resolve_tenant_id()
|
||||||
cursor = conn.execute("DELETE FROM context_facts WHERE id=?", (fact_id,))
|
with get_conn(db_path) as conn:
|
||||||
conn.commit()
|
cursor = conn.execute(
|
||||||
conn.close()
|
"DELETE FROM context_facts WHERE id=? AND (tenant_id=? OR tenant_id='')",
|
||||||
|
(fact_id, tid),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
return cursor.rowcount > 0
|
return cursor.rowcount > 0
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -94,6 +88,7 @@ def add_document(
|
||||||
full_text: str,
|
full_text: str,
|
||||||
file_size: int | None = None,
|
file_size: int | None = None,
|
||||||
) -> ContextDocument:
|
) -> ContextDocument:
|
||||||
|
tid = resolve_tenant_id()
|
||||||
doc = ContextDocument(
|
doc = ContextDocument(
|
||||||
id=str(uuid.uuid4()),
|
id=str(uuid.uuid4()),
|
||||||
filename=filename,
|
filename=filename,
|
||||||
|
|
@ -102,24 +97,24 @@ def add_document(
|
||||||
file_size=file_size,
|
file_size=file_size,
|
||||||
uploaded_at=datetime.now(timezone.utc).isoformat(),
|
uploaded_at=datetime.now(timezone.utc).isoformat(),
|
||||||
)
|
)
|
||||||
conn = _connect(db_path)
|
with get_conn(db_path) as conn:
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"INSERT INTO context_documents(id, filename, doc_type, full_text, file_size, uploaded_at)"
|
"INSERT INTO context_documents(id, tenant_id, filename, doc_type, full_text, file_size, uploaded_at)"
|
||||||
" VALUES (?,?,?,?,?,?)",
|
" VALUES (?,?,?,?,?,?,?)",
|
||||||
(doc.id, doc.filename, doc.doc_type, doc.full_text, doc.file_size, doc.uploaded_at),
|
(doc.id, tid, doc.filename, doc.doc_type, doc.full_text, doc.file_size, doc.uploaded_at),
|
||||||
)
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
|
||||||
def list_documents(db_path: Path) -> list[ContextDocument]:
|
def list_documents(db_path: Path) -> list[ContextDocument]:
|
||||||
conn = _connect(db_path)
|
tid = resolve_tenant_id()
|
||||||
rows = conn.execute(
|
with get_conn(db_path) as conn:
|
||||||
"SELECT id, filename, doc_type, full_text, file_size, uploaded_at"
|
rows = conn.execute(
|
||||||
" FROM context_documents ORDER BY uploaded_at DESC"
|
"SELECT id, filename, doc_type, full_text, file_size, uploaded_at"
|
||||||
).fetchall()
|
" FROM context_documents WHERE (tenant_id=? OR tenant_id='') ORDER BY uploaded_at DESC",
|
||||||
conn.close()
|
(tid,),
|
||||||
|
).fetchall()
|
||||||
return [
|
return [
|
||||||
ContextDocument(
|
ContextDocument(
|
||||||
id=r["id"], filename=r["filename"], doc_type=r["doc_type"],
|
id=r["id"], filename=r["filename"], doc_type=r["doc_type"],
|
||||||
|
|
@ -130,8 +125,11 @@ def list_documents(db_path: Path) -> list[ContextDocument]:
|
||||||
|
|
||||||
|
|
||||||
def delete_document(db_path: Path, doc_id: str) -> bool:
|
def delete_document(db_path: Path, doc_id: str) -> bool:
|
||||||
conn = _connect(db_path)
|
tid = resolve_tenant_id()
|
||||||
cursor = conn.execute("DELETE FROM context_documents WHERE id=?", (doc_id,))
|
with get_conn(db_path) as conn:
|
||||||
conn.commit()
|
cursor = conn.execute(
|
||||||
conn.close()
|
"DELETE FROM context_documents WHERE id=? AND (tenant_id=? OR tenant_id='')",
|
||||||
|
(doc_id, tid),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
return cursor.rowcount > 0
|
return cursor.rowcount > 0
|
||||||
|
|
|
||||||
36
app/db/__init__.py
Normal file
36
app/db/__init__.py
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
"""Turnstone database abstraction — unified SQLite / Postgres interface.
|
||||||
|
|
||||||
|
Public API:
|
||||||
|
BACKEND — Backend.SQLITE or Backend.POSTGRES
|
||||||
|
get_conn(path) — context manager yielding a DbConn
|
||||||
|
resolve_tenant_id() — this node's tenant ID (env or hostname)
|
||||||
|
q(sql) — rewrite ? placeholders to %s for Postgres
|
||||||
|
frag — SQL fragment helpers (insert_or_ignore, source_group_expr, …)
|
||||||
|
ensure_schema — idempotent schema init
|
||||||
|
close_pool — call during shutdown when using Postgres
|
||||||
|
"""
|
||||||
|
from app.db.backend import BACKEND, Backend
|
||||||
|
from app.db.conn import DbConn, close_pool, get_conn
|
||||||
|
from app.db.dialect import frag, q
|
||||||
|
from app.db.schema import (
|
||||||
|
ensure_context_schema,
|
||||||
|
ensure_incidents_schema,
|
||||||
|
ensure_schema,
|
||||||
|
migrate_incidents_to_dedicated_db,
|
||||||
|
)
|
||||||
|
from app.db.tenant import resolve_tenant_id
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"BACKEND",
|
||||||
|
"Backend",
|
||||||
|
"DbConn",
|
||||||
|
"close_pool",
|
||||||
|
"get_conn",
|
||||||
|
"frag",
|
||||||
|
"q",
|
||||||
|
"ensure_schema",
|
||||||
|
"ensure_context_schema",
|
||||||
|
"ensure_incidents_schema",
|
||||||
|
"migrate_incidents_to_dedicated_db",
|
||||||
|
"resolve_tenant_id",
|
||||||
|
]
|
||||||
20
app/db/backend.py
Normal file
20
app/db/backend.py
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
"""Backend detection — SQLITE (default) or POSTGRES based on DATABASE_URL."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class Backend(Enum):
|
||||||
|
SQLITE = "sqlite"
|
||||||
|
POSTGRES = "postgres"
|
||||||
|
|
||||||
|
|
||||||
|
def _detect() -> Backend:
|
||||||
|
url = os.environ.get("DATABASE_URL", "")
|
||||||
|
if url.startswith(("postgresql://", "postgres://", "postgresql+psycopg://")):
|
||||||
|
return Backend.POSTGRES
|
||||||
|
return Backend.SQLITE
|
||||||
|
|
||||||
|
|
||||||
|
BACKEND: Backend = _detect()
|
||||||
137
app/db/conn.py
Normal file
137
app/db/conn.py
Normal file
|
|
@ -0,0 +1,137 @@
|
||||||
|
"""Uniform connection wrapper over sqlite3 and psycopg3.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
with get_conn(db_path) as conn:
|
||||||
|
conn.execute("SELECT ...", (param,))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
For Postgres, db_path is ignored — all connections go through the shared pool.
|
||||||
|
The pool is initialized lazily on first use from DATABASE_URL.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Generator
|
||||||
|
|
||||||
|
from app.db.backend import BACKEND, Backend
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_pool: Any = None # psycopg_pool.ConnectionPool, typed as Any to avoid import-time errors
|
||||||
|
|
||||||
|
|
||||||
|
class _NopCursor:
|
||||||
|
"""Returned when a PRAGMA or other SQLite-only statement is skipped on Postgres."""
|
||||||
|
rowcount = 0
|
||||||
|
|
||||||
|
def fetchall(self) -> list:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def fetchone(self) -> None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter([])
|
||||||
|
|
||||||
|
|
||||||
|
class DbConn:
|
||||||
|
"""Wraps a raw sqlite3 or psycopg connection with a uniform execute API.
|
||||||
|
|
||||||
|
Row access is always dict-like:
|
||||||
|
- SQLite: conn.row_factory = sqlite3.Row (supports row["col"] and row[0])
|
||||||
|
- Postgres: row_factory = dict_row (returns plain dicts)
|
||||||
|
"""
|
||||||
|
|
||||||
|
__slots__ = ("_c", "_backend")
|
||||||
|
|
||||||
|
def __init__(self, raw: Any, backend: Backend) -> None:
|
||||||
|
self._c = raw
|
||||||
|
self._backend = backend
|
||||||
|
|
||||||
|
def _prep(self, sql: str) -> str | None:
|
||||||
|
"""Return None to skip (PRAGMA on Postgres), else return ready-to-execute SQL."""
|
||||||
|
stripped = sql.strip()
|
||||||
|
if self._backend == Backend.POSTGRES and stripped.lower().startswith("pragma"):
|
||||||
|
return None
|
||||||
|
if self._backend == Backend.POSTGRES:
|
||||||
|
return stripped.replace("?", "%s")
|
||||||
|
return stripped
|
||||||
|
|
||||||
|
def execute(self, sql: str, params: Any = ()) -> Any:
|
||||||
|
prepared = self._prep(sql)
|
||||||
|
if prepared is None:
|
||||||
|
return _NopCursor()
|
||||||
|
return self._c.execute(prepared, params)
|
||||||
|
|
||||||
|
def executemany(self, sql: str, params_seq: Any) -> Any:
|
||||||
|
prepared = self._prep(sql)
|
||||||
|
if prepared is None:
|
||||||
|
return _NopCursor()
|
||||||
|
return self._c.executemany(prepared, params_seq)
|
||||||
|
|
||||||
|
def commit(self) -> None:
|
||||||
|
self._c.commit()
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
self._c.close()
|
||||||
|
|
||||||
|
def __enter__(self) -> "DbConn":
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *_: Any) -> None:
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_pool() -> Any:
|
||||||
|
global _pool
|
||||||
|
if _pool is not None:
|
||||||
|
return _pool
|
||||||
|
try:
|
||||||
|
from psycopg_pool import ConnectionPool # type: ignore[import]
|
||||||
|
url = os.environ["DATABASE_URL"]
|
||||||
|
_pool = ConnectionPool(url, min_size=2, max_size=10, open=True)
|
||||||
|
logger.info("Postgres connection pool opened (DATABASE_URL set)")
|
||||||
|
return _pool
|
||||||
|
except ImportError as exc:
|
||||||
|
raise RuntimeError(
|
||||||
|
"psycopg[binary,pool] is required for Postgres backend. "
|
||||||
|
"Run: pip install 'psycopg[binary,pool]'"
|
||||||
|
) from exc
|
||||||
|
except KeyError:
|
||||||
|
raise RuntimeError("DATABASE_URL must be set when using Postgres backend") from None
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def get_conn(db_path: Path | None = None) -> Generator[DbConn, None, None]:
|
||||||
|
"""Yield a DbConn backed by sqlite3 (db_path required) or the Postgres pool."""
|
||||||
|
if BACKEND == Backend.POSTGRES:
|
||||||
|
pool = _get_pool()
|
||||||
|
from psycopg.rows import dict_row # type: ignore[import]
|
||||||
|
with pool.connection() as raw:
|
||||||
|
raw.row_factory = dict_row
|
||||||
|
yield DbConn(raw, BACKEND)
|
||||||
|
else:
|
||||||
|
if db_path is None:
|
||||||
|
raise ValueError("db_path is required for SQLite backend")
|
||||||
|
raw = sqlite3.connect(str(db_path), timeout=90.0)
|
||||||
|
raw.row_factory = sqlite3.Row
|
||||||
|
try:
|
||||||
|
raw.execute("PRAGMA journal_mode=WAL")
|
||||||
|
raw.execute("PRAGMA busy_timeout=90000")
|
||||||
|
raw.execute("PRAGMA foreign_keys=ON")
|
||||||
|
yield DbConn(raw, BACKEND)
|
||||||
|
finally:
|
||||||
|
raw.close()
|
||||||
|
|
||||||
|
|
||||||
|
def close_pool() -> None:
|
||||||
|
"""Close the Postgres connection pool — call during application shutdown."""
|
||||||
|
global _pool
|
||||||
|
if _pool is not None:
|
||||||
|
_pool.close()
|
||||||
|
_pool = None
|
||||||
|
logger.info("Postgres connection pool closed")
|
||||||
93
app/db/dialect.py
Normal file
93
app/db/dialect.py
Normal file
|
|
@ -0,0 +1,93 @@
|
||||||
|
"""Per-backend SQL fragments and placeholder rewriting.
|
||||||
|
|
||||||
|
All production SQL should be written with SQLite-style `?` placeholders.
|
||||||
|
Call q(sql) before passing to execute/executemany — it rewrites to %s for
|
||||||
|
Postgres and leaves SQLite queries untouched.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app.db.backend import BACKEND, Backend
|
||||||
|
|
||||||
|
|
||||||
|
def q(sql: str) -> str:
|
||||||
|
"""Rewrite ? placeholders to %s for Postgres; no-op for SQLite."""
|
||||||
|
if BACKEND == Backend.POSTGRES:
|
||||||
|
return sql.replace("?", "%s")
|
||||||
|
return sql
|
||||||
|
|
||||||
|
|
||||||
|
class _Fragments:
|
||||||
|
"""SQL fragments that differ between backends."""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def insert_or_ignore(self) -> str:
|
||||||
|
return "INSERT" if BACKEND == Backend.POSTGRES else "INSERT OR IGNORE"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def on_conflict_ignore(self) -> str:
|
||||||
|
# Caller must substitute the column name(s) at use time when using Postgres.
|
||||||
|
# For log_entries: ON CONFLICT (tenant_id, id) DO NOTHING
|
||||||
|
# For generic use this property is a no-op sentinel; prefer insert_ignore_into().
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def insert_ignore_entries(self) -> str:
|
||||||
|
"""Full INSERT ... ON CONFLICT clause for log_entries."""
|
||||||
|
if BACKEND == Backend.POSTGRES:
|
||||||
|
return "INSERT INTO log_entries"
|
||||||
|
return "INSERT OR IGNORE INTO log_entries"
|
||||||
|
|
||||||
|
def entries_conflict_clause(self) -> str:
|
||||||
|
if BACKEND == Backend.POSTGRES:
|
||||||
|
return "ON CONFLICT (tenant_id, id) DO NOTHING"
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def fingerprint_upsert(self) -> str:
|
||||||
|
if BACKEND == Backend.POSTGRES:
|
||||||
|
return (
|
||||||
|
"INSERT INTO glean_fingerprints (tenant_id, path, mtime, size, gleaned_at)"
|
||||||
|
" VALUES (%s, %s, %s, %s, %s)"
|
||||||
|
" ON CONFLICT (tenant_id, path)"
|
||||||
|
" DO UPDATE SET mtime=EXCLUDED.mtime, size=EXCLUDED.size, gleaned_at=EXCLUDED.gleaned_at"
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
"INSERT OR REPLACE INTO glean_fingerprints (tenant_id, path, mtime, size, gleaned_at)"
|
||||||
|
" VALUES (?,?,?,?,?)"
|
||||||
|
)
|
||||||
|
|
||||||
|
def source_group_expr(self, col: str = "source_id") -> str:
|
||||||
|
"""SQL expression that collapses prefix:host:unit → prefix:host stem."""
|
||||||
|
if BACKEND == Backend.POSTGRES:
|
||||||
|
return f"""
|
||||||
|
CASE
|
||||||
|
WHEN array_length(string_to_array({col}, ':'), 1) >= 3
|
||||||
|
THEN split_part({col}, ':', 1) || ':' || split_part({col}, ':', 2)
|
||||||
|
ELSE {col}
|
||||||
|
END
|
||||||
|
"""
|
||||||
|
return f"""
|
||||||
|
CASE
|
||||||
|
WHEN INSTR(SUBSTR({col}, INSTR({col}, ':')+1), ':') > 0
|
||||||
|
THEN SUBSTR({col}, 1,
|
||||||
|
INSTR({col}, ':')
|
||||||
|
+ INSTR(SUBSTR({col}, INSTR({col}, ':')+1), ':')
|
||||||
|
- 1)
|
||||||
|
ELSE {col}
|
||||||
|
END
|
||||||
|
"""
|
||||||
|
|
||||||
|
def fts_match_clause(self) -> str:
|
||||||
|
"""WHERE clause fragment for FTS query. Caller supplies the query param."""
|
||||||
|
if BACKEND == Backend.POSTGRES:
|
||||||
|
return "text_tsv @@ websearch_to_tsquery('english', %s)"
|
||||||
|
return "log_fts MATCH ?"
|
||||||
|
|
||||||
|
def fts_rank_expr(self) -> str:
|
||||||
|
"""ORDER BY expression for FTS rank (best match first). Postgres needs the query twice."""
|
||||||
|
if BACKEND == Backend.POSTGRES:
|
||||||
|
# ts_rank returns 0..1 where higher is better; pass the query again as param
|
||||||
|
return "ts_rank(text_tsv, websearch_to_tsquery('english', %s)) DESC"
|
||||||
|
# FTS5 rank is negative BM25; ASC = most-negative = best match
|
||||||
|
return "rank ASC"
|
||||||
|
|
||||||
|
|
||||||
|
frag = _Fragments()
|
||||||
537
app/db/schema.py
Normal file
537
app/db/schema.py
Normal file
|
|
@ -0,0 +1,537 @@
|
||||||
|
"""Schema creation and idempotent migrations for all Turnstone databases.
|
||||||
|
|
||||||
|
Three logical databases (main, context, incidents) map to:
|
||||||
|
- SQLite: three separate .db files (avoids write-lock contention)
|
||||||
|
- Postgres: three table-groups in one physical DB (row-level locking makes separation unnecessary)
|
||||||
|
|
||||||
|
All ensure_* functions are idempotent: safe to call on every startup.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.db.backend import BACKEND, Backend
|
||||||
|
from app.db.conn import get_conn
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# SQLite DDL — kept as executescript strings (SQLite only)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_MAIN_SCHEMA_SQLITE = """
|
||||||
|
CREATE TABLE IF NOT EXISTS log_entries (
|
||||||
|
id TEXT NOT NULL,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
source_id TEXT NOT NULL,
|
||||||
|
sequence INTEGER NOT NULL,
|
||||||
|
timestamp_raw TEXT,
|
||||||
|
timestamp_iso TEXT,
|
||||||
|
ingest_time TEXT NOT NULL,
|
||||||
|
severity TEXT,
|
||||||
|
repeat_count INTEGER DEFAULT 1,
|
||||||
|
out_of_order INTEGER DEFAULT 0,
|
||||||
|
matched_patterns TEXT DEFAULT '[]',
|
||||||
|
text TEXT NOT NULL,
|
||||||
|
anomaly_score REAL,
|
||||||
|
anomaly_label TEXT,
|
||||||
|
anomaly_scored_at TEXT,
|
||||||
|
ml_score REAL,
|
||||||
|
ml_label TEXT,
|
||||||
|
ml_scored_at TEXT,
|
||||||
|
PRIMARY KEY (tenant_id, id)
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_source ON log_entries(source_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_tenant_src ON log_entries(tenant_id, source_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_timestamp ON log_entries(timestamp_iso);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_ts_repeat ON log_entries(timestamp_iso, repeat_count);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_severity ON log_entries(tenant_id, severity);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_patterns ON log_entries(matched_patterns);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_anomaly ON log_entries(tenant_id, anomaly_score);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_ml_scored ON log_entries(tenant_id, ml_scored_at);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS detections (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
entry_id TEXT NOT NULL,
|
||||||
|
source_id TEXT NOT NULL,
|
||||||
|
anomaly_label TEXT NOT NULL,
|
||||||
|
anomaly_score REAL NOT NULL,
|
||||||
|
severity TEXT NOT NULL,
|
||||||
|
text TEXT NOT NULL,
|
||||||
|
timestamp_iso TEXT,
|
||||||
|
detected_at TEXT NOT NULL,
|
||||||
|
acknowledged INTEGER NOT NULL DEFAULT 0,
|
||||||
|
acknowledged_at TEXT,
|
||||||
|
notes TEXT NOT NULL DEFAULT '',
|
||||||
|
scorer TEXT NOT NULL DEFAULT 'anomaly'
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_detections_tenant ON detections(tenant_id, detected_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_detections_ack ON detections(acknowledged);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_detections_label ON detections(anomaly_label);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_detections_entry ON detections(entry_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_detections_scorer ON detections(scorer);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS glean_fingerprints (
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
path TEXT NOT NULL,
|
||||||
|
mtime REAL NOT NULL,
|
||||||
|
size INTEGER NOT NULL,
|
||||||
|
gleaned_at TEXT NOT NULL,
|
||||||
|
PRIMARY KEY (tenant_id, path)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS incidents (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
label TEXT NOT NULL,
|
||||||
|
issue_type TEXT NOT NULL DEFAULT '',
|
||||||
|
started_at TEXT,
|
||||||
|
ended_at TEXT,
|
||||||
|
notes TEXT NOT NULL DEFAULT '',
|
||||||
|
created_at TEXT NOT NULL,
|
||||||
|
severity TEXT NOT NULL DEFAULT 'medium'
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_incidents_time ON incidents(started_at, ended_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_incidents_tenant ON incidents(tenant_id);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS received_bundles (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
source_host TEXT NOT NULL,
|
||||||
|
issue_type TEXT NOT NULL DEFAULT '',
|
||||||
|
label TEXT NOT NULL,
|
||||||
|
severity TEXT NOT NULL DEFAULT 'medium',
|
||||||
|
started_at TEXT,
|
||||||
|
bundled_at TEXT NOT NULL,
|
||||||
|
entry_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
bundle_json TEXT NOT NULL
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_bundles_bundled ON received_bundles(bundled_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_bundles_type ON received_bundles(issue_type);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS sent_bundles (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
incident_id TEXT NOT NULL,
|
||||||
|
exported_at TEXT NOT NULL,
|
||||||
|
sanitized INTEGER NOT NULL DEFAULT 0,
|
||||||
|
entry_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
bundle_json TEXT NOT NULL
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_sent_bundles_incident ON sent_bundles(incident_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_sent_bundles_time ON sent_bundles(exported_at);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS blocklist_candidates (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
domain_or_ip TEXT NOT NULL,
|
||||||
|
source_device_ip TEXT,
|
||||||
|
source_device_name TEXT,
|
||||||
|
first_seen TEXT NOT NULL,
|
||||||
|
last_seen TEXT NOT NULL,
|
||||||
|
hit_count INTEGER DEFAULT 1,
|
||||||
|
status TEXT DEFAULT 'pending',
|
||||||
|
pushed_at TEXT,
|
||||||
|
log_evidence TEXT DEFAULT '[]',
|
||||||
|
matched_rule TEXT,
|
||||||
|
llm_score REAL,
|
||||||
|
llm_reason TEXT
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_blocklist_device ON blocklist_candidates(source_device_ip);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_blocklist_status ON blocklist_candidates(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_blocklist_domain ON blocklist_candidates(domain_or_ip);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_blocklist_tenant ON blocklist_candidates(tenant_id);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS ssh_targets (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
label TEXT NOT NULL,
|
||||||
|
host TEXT NOT NULL,
|
||||||
|
port INTEGER NOT NULL DEFAULT 22,
|
||||||
|
user TEXT NOT NULL,
|
||||||
|
key_path TEXT NOT NULL,
|
||||||
|
last_tested TEXT,
|
||||||
|
last_ok INTEGER DEFAULT NULL,
|
||||||
|
last_error TEXT,
|
||||||
|
created_at TEXT NOT NULL,
|
||||||
|
updated_at TEXT NOT NULL
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
|
||||||
|
_CONTEXT_SCHEMA_SQLITE = """
|
||||||
|
CREATE TABLE IF NOT EXISTS context_facts (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
category TEXT NOT NULL,
|
||||||
|
key TEXT NOT NULL,
|
||||||
|
value TEXT NOT NULL,
|
||||||
|
source TEXT,
|
||||||
|
created_at TEXT NOT NULL
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_facts_category ON context_facts(category);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_facts_key ON context_facts(key);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_facts_tenant ON context_facts(tenant_id);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS context_documents (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
filename TEXT NOT NULL,
|
||||||
|
doc_type TEXT NOT NULL,
|
||||||
|
full_text TEXT NOT NULL,
|
||||||
|
file_size INTEGER,
|
||||||
|
uploaded_at TEXT NOT NULL
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_docs_tenant ON context_documents(tenant_id);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS context_chunks (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
document_id TEXT NOT NULL REFERENCES context_documents(id) ON DELETE CASCADE,
|
||||||
|
chunk_index INTEGER NOT NULL,
|
||||||
|
text TEXT NOT NULL,
|
||||||
|
embedding BLOB
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_chunks_doc ON context_chunks(document_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_chunks_tenant ON context_chunks(tenant_id);
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Postgres DDL — executed statement-by-statement
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_MAIN_SCHEMA_PG_STMTS = [
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS log_entries (
|
||||||
|
id TEXT NOT NULL,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
source_id TEXT NOT NULL,
|
||||||
|
sequence INTEGER NOT NULL,
|
||||||
|
timestamp_raw TEXT,
|
||||||
|
timestamp_iso TEXT,
|
||||||
|
ingest_time TEXT NOT NULL,
|
||||||
|
severity TEXT,
|
||||||
|
repeat_count INTEGER DEFAULT 1,
|
||||||
|
out_of_order INTEGER DEFAULT 0,
|
||||||
|
matched_patterns TEXT DEFAULT '[]',
|
||||||
|
text TEXT NOT NULL,
|
||||||
|
text_tsv tsvector,
|
||||||
|
anomaly_score DOUBLE PRECISION,
|
||||||
|
anomaly_label TEXT,
|
||||||
|
anomaly_scored_at TEXT,
|
||||||
|
ml_score DOUBLE PRECISION,
|
||||||
|
ml_label TEXT,
|
||||||
|
ml_scored_at TEXT,
|
||||||
|
PRIMARY KEY (tenant_id, id)
|
||||||
|
)
|
||||||
|
""",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_tenant_src ON log_entries(tenant_id, source_id)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_timestamp ON log_entries(timestamp_iso)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_severity ON log_entries(tenant_id, severity)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_patterns ON log_entries(matched_patterns)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_fts_gin ON log_entries USING GIN(text_tsv)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_anomaly ON log_entries(tenant_id, anomaly_score)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_ml_scored ON log_entries(tenant_id, ml_scored_at)",
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS detections (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
entry_id TEXT NOT NULL,
|
||||||
|
source_id TEXT NOT NULL,
|
||||||
|
anomaly_label TEXT NOT NULL,
|
||||||
|
anomaly_score DOUBLE PRECISION NOT NULL,
|
||||||
|
severity TEXT NOT NULL,
|
||||||
|
text TEXT NOT NULL,
|
||||||
|
timestamp_iso TEXT,
|
||||||
|
detected_at TEXT NOT NULL,
|
||||||
|
acknowledged INTEGER NOT NULL DEFAULT 0,
|
||||||
|
acknowledged_at TEXT,
|
||||||
|
notes TEXT NOT NULL DEFAULT '',
|
||||||
|
scorer TEXT NOT NULL DEFAULT 'anomaly'
|
||||||
|
)
|
||||||
|
""",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_detections_tenant ON detections(tenant_id, detected_at)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_detections_ack ON detections(acknowledged)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_detections_label ON detections(anomaly_label)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_detections_entry ON detections(entry_id)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_detections_scorer ON detections(scorer)",
|
||||||
|
"""
|
||||||
|
CREATE OR REPLACE FUNCTION _ts_update_text_tsv() RETURNS trigger AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.text_tsv := to_tsvector('english', COALESCE(NEW.text, ''));
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql
|
||||||
|
""",
|
||||||
|
"""
|
||||||
|
DO $$ BEGIN
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1 FROM pg_trigger WHERE tgname = 'trig_log_entries_tsv'
|
||||||
|
) THEN
|
||||||
|
CREATE TRIGGER trig_log_entries_tsv
|
||||||
|
BEFORE INSERT OR UPDATE OF text ON log_entries
|
||||||
|
FOR EACH ROW EXECUTE FUNCTION _ts_update_text_tsv();
|
||||||
|
END IF;
|
||||||
|
END $$
|
||||||
|
""",
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS glean_fingerprints (
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
path TEXT NOT NULL,
|
||||||
|
mtime DOUBLE PRECISION NOT NULL,
|
||||||
|
size BIGINT NOT NULL,
|
||||||
|
gleaned_at TEXT NOT NULL,
|
||||||
|
PRIMARY KEY (tenant_id, path)
|
||||||
|
)
|
||||||
|
""",
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS incidents (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
label TEXT NOT NULL,
|
||||||
|
issue_type TEXT NOT NULL DEFAULT '',
|
||||||
|
started_at TEXT,
|
||||||
|
ended_at TEXT,
|
||||||
|
notes TEXT NOT NULL DEFAULT '',
|
||||||
|
created_at TEXT NOT NULL,
|
||||||
|
severity TEXT NOT NULL DEFAULT 'medium'
|
||||||
|
)
|
||||||
|
""",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_incidents_time ON incidents(started_at, ended_at)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_incidents_tenant ON incidents(tenant_id)",
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS received_bundles (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
source_host TEXT NOT NULL,
|
||||||
|
issue_type TEXT NOT NULL DEFAULT '',
|
||||||
|
label TEXT NOT NULL,
|
||||||
|
severity TEXT NOT NULL DEFAULT 'medium',
|
||||||
|
started_at TEXT,
|
||||||
|
bundled_at TEXT NOT NULL,
|
||||||
|
entry_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
bundle_json TEXT NOT NULL
|
||||||
|
)
|
||||||
|
""",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_bundles_bundled ON received_bundles(bundled_at)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_bundles_type ON received_bundles(issue_type)",
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS sent_bundles (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
incident_id TEXT NOT NULL,
|
||||||
|
exported_at TEXT NOT NULL,
|
||||||
|
sanitized INTEGER NOT NULL DEFAULT 0,
|
||||||
|
entry_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
bundle_json TEXT NOT NULL
|
||||||
|
)
|
||||||
|
""",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_sent_bundles_incident ON sent_bundles(incident_id)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_sent_bundles_time ON sent_bundles(exported_at)",
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS blocklist_candidates (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
domain_or_ip TEXT NOT NULL,
|
||||||
|
source_device_ip TEXT,
|
||||||
|
source_device_name TEXT,
|
||||||
|
first_seen TEXT NOT NULL,
|
||||||
|
last_seen TEXT NOT NULL,
|
||||||
|
hit_count INTEGER DEFAULT 1,
|
||||||
|
status TEXT DEFAULT 'pending',
|
||||||
|
pushed_at TEXT,
|
||||||
|
log_evidence TEXT DEFAULT '[]',
|
||||||
|
matched_rule TEXT,
|
||||||
|
llm_score DOUBLE PRECISION,
|
||||||
|
llm_reason TEXT
|
||||||
|
)
|
||||||
|
""",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_blocklist_device ON blocklist_candidates(source_device_ip)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_blocklist_status ON blocklist_candidates(status)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_blocklist_domain ON blocklist_candidates(domain_or_ip)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_blocklist_tenant ON blocklist_candidates(tenant_id)",
|
||||||
|
]
|
||||||
|
|
||||||
|
_CONTEXT_SCHEMA_PG_STMTS = [
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS context_facts (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
category TEXT NOT NULL,
|
||||||
|
key TEXT NOT NULL,
|
||||||
|
value TEXT NOT NULL,
|
||||||
|
source TEXT,
|
||||||
|
created_at TEXT NOT NULL
|
||||||
|
)
|
||||||
|
""",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_facts_category ON context_facts(category)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_facts_key ON context_facts(key)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_facts_tenant ON context_facts(tenant_id)",
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS context_documents (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
filename TEXT NOT NULL,
|
||||||
|
doc_type TEXT NOT NULL,
|
||||||
|
full_text TEXT NOT NULL,
|
||||||
|
file_size BIGINT,
|
||||||
|
uploaded_at TEXT NOT NULL
|
||||||
|
)
|
||||||
|
""",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_docs_tenant ON context_documents(tenant_id)",
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS context_chunks (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
document_id TEXT NOT NULL REFERENCES context_documents(id) ON DELETE CASCADE,
|
||||||
|
chunk_index INTEGER NOT NULL,
|
||||||
|
text TEXT NOT NULL,
|
||||||
|
embedding BYTEA
|
||||||
|
)
|
||||||
|
""",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_chunks_doc ON context_chunks(document_id)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_chunks_tenant ON context_chunks(tenant_id)",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# SQLite additive column migrations — applied after CREATE TABLE on every boot
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_MAIN_MIGRATIONS_SQLITE = [
|
||||||
|
"ALTER TABLE log_entries ADD COLUMN tenant_id TEXT NOT NULL DEFAULT ''",
|
||||||
|
"ALTER TABLE incidents ADD COLUMN issue_type TEXT NOT NULL DEFAULT ''",
|
||||||
|
"ALTER TABLE incidents ADD COLUMN tenant_id TEXT NOT NULL DEFAULT ''",
|
||||||
|
"ALTER TABLE received_bundles ADD COLUMN tenant_id TEXT NOT NULL DEFAULT ''",
|
||||||
|
"ALTER TABLE sent_bundles ADD COLUMN tenant_id TEXT NOT NULL DEFAULT ''",
|
||||||
|
"ALTER TABLE blocklist_candidates ADD COLUMN tenant_id TEXT NOT NULL DEFAULT ''",
|
||||||
|
"ALTER TABLE glean_fingerprints ADD COLUMN tenant_id TEXT NOT NULL DEFAULT ''",
|
||||||
|
"ALTER TABLE glean_fingerprints ADD COLUMN mtime REAL",
|
||||||
|
"ALTER TABLE glean_fingerprints ADD COLUMN size INTEGER",
|
||||||
|
"ALTER TABLE glean_fingerprints ADD COLUMN gleaned_at TEXT",
|
||||||
|
"ALTER TABLE log_entries ADD COLUMN anomaly_score REAL",
|
||||||
|
"ALTER TABLE log_entries ADD COLUMN anomaly_label TEXT",
|
||||||
|
"ALTER TABLE log_entries ADD COLUMN anomaly_scored_at TEXT",
|
||||||
|
"ALTER TABLE log_entries ADD COLUMN ml_score REAL",
|
||||||
|
"ALTER TABLE log_entries ADD COLUMN ml_label TEXT",
|
||||||
|
"ALTER TABLE log_entries ADD COLUMN ml_scored_at TEXT",
|
||||||
|
"ALTER TABLE detections ADD COLUMN scorer TEXT NOT NULL DEFAULT 'anomaly'",
|
||||||
|
"ALTER TABLE log_entries ADD COLUMN anonymized INTEGER DEFAULT NULL",
|
||||||
|
]
|
||||||
|
|
||||||
|
_CONTEXT_MIGRATIONS_SQLITE = [
|
||||||
|
"ALTER TABLE context_facts ADD COLUMN tenant_id TEXT NOT NULL DEFAULT ''",
|
||||||
|
"ALTER TABLE context_documents ADD COLUMN tenant_id TEXT NOT NULL DEFAULT ''",
|
||||||
|
"ALTER TABLE context_chunks ADD COLUMN tenant_id TEXT NOT NULL DEFAULT ''",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _run_sqlite_migrations(conn: sqlite3.Connection, stmts: list[str]) -> None:
|
||||||
|
for stmt in stmts:
|
||||||
|
try:
|
||||||
|
conn.execute(stmt)
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass # column already exists or table not present yet — both are fine
|
||||||
|
|
||||||
|
|
||||||
|
def _run_pg_stmts(stmts: list[str]) -> None:
|
||||||
|
"""Execute Postgres DDL statements — each in its own transaction for IF NOT EXISTS safety."""
|
||||||
|
from psycopg import connect as pg_connect # type: ignore[import]
|
||||||
|
import os
|
||||||
|
url = os.environ["DATABASE_URL"]
|
||||||
|
with pg_connect(url, autocommit=True) as conn:
|
||||||
|
for stmt in stmts:
|
||||||
|
stripped = stmt.strip()
|
||||||
|
if stripped:
|
||||||
|
conn.execute(stripped)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Public API
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def ensure_schema(db_path: Path) -> None:
|
||||||
|
"""Ensure main log/incidents/blocklist tables exist. Idempotent."""
|
||||||
|
if BACKEND == Backend.POSTGRES:
|
||||||
|
_run_pg_stmts(_MAIN_SCHEMA_PG_STMTS)
|
||||||
|
logger.debug("Postgres main schema verified")
|
||||||
|
return
|
||||||
|
|
||||||
|
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
||||||
|
conn.execute("PRAGMA journal_mode=WAL")
|
||||||
|
# Migrations first: add tenant_id to existing tables BEFORE index creation touches it
|
||||||
|
_run_sqlite_migrations(conn, _MAIN_MIGRATIONS_SQLITE)
|
||||||
|
conn.commit()
|
||||||
|
conn.executescript(_MAIN_SCHEMA_SQLITE)
|
||||||
|
conn.close()
|
||||||
|
logger.debug("SQLite main schema verified at %s", db_path)
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_context_schema(db_path: Path) -> None:
|
||||||
|
"""Ensure context KB tables exist. Idempotent."""
|
||||||
|
if BACKEND == Backend.POSTGRES:
|
||||||
|
_run_pg_stmts(_CONTEXT_SCHEMA_PG_STMTS)
|
||||||
|
logger.debug("Postgres context schema verified")
|
||||||
|
return
|
||||||
|
|
||||||
|
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
||||||
|
conn.execute("PRAGMA journal_mode=WAL")
|
||||||
|
conn.execute("PRAGMA foreign_keys=ON")
|
||||||
|
_run_sqlite_migrations(conn, _CONTEXT_MIGRATIONS_SQLITE)
|
||||||
|
conn.commit()
|
||||||
|
conn.executescript(_CONTEXT_SCHEMA_SQLITE)
|
||||||
|
conn.close()
|
||||||
|
logger.debug("SQLite context schema verified at %s", db_path)
|
||||||
|
|
||||||
|
|
||||||
|
def migrate_incidents_to_dedicated_db(main_db: Path, incidents_db: Path) -> int:
|
||||||
|
"""One-shot migration: copy incidents/bundles rows from main DB to incidents DB.
|
||||||
|
|
||||||
|
Safe to call on every startup — rows already in incidents_db are skipped.
|
||||||
|
No-op for Postgres (single DB, no migration needed).
|
||||||
|
"""
|
||||||
|
if BACKEND == Backend.POSTGRES:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
src = sqlite3.connect(str(main_db), timeout=30.0)
|
||||||
|
src.row_factory = sqlite3.Row
|
||||||
|
dst = sqlite3.connect(str(incidents_db), timeout=30.0)
|
||||||
|
migrated = 0
|
||||||
|
for table in ("incidents", "received_bundles", "sent_bundles"):
|
||||||
|
try:
|
||||||
|
rows = src.execute(f"SELECT * FROM {table}").fetchall() # noqa: S608
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
continue
|
||||||
|
if not rows:
|
||||||
|
continue
|
||||||
|
cols = ", ".join(rows[0].keys())
|
||||||
|
placeholders = ", ".join("?" * len(rows[0].keys()))
|
||||||
|
dst.executemany(
|
||||||
|
f"INSERT OR IGNORE INTO {table} ({cols}) VALUES ({placeholders})", # noqa: S608
|
||||||
|
[tuple(r) for r in rows],
|
||||||
|
)
|
||||||
|
migrated += len(rows)
|
||||||
|
dst.commit()
|
||||||
|
src.close()
|
||||||
|
dst.close()
|
||||||
|
return migrated
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_incidents_schema(db_path: Path) -> None:
|
||||||
|
"""Ensure incidents/bundles tables exist. Idempotent.
|
||||||
|
|
||||||
|
For Postgres, incidents live in the same DB as log_entries (already created by
|
||||||
|
ensure_schema), so this is a no-op — the tables were created above.
|
||||||
|
"""
|
||||||
|
if BACKEND == Backend.POSTGRES:
|
||||||
|
return
|
||||||
|
|
||||||
|
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
||||||
|
conn.execute("PRAGMA journal_mode=WAL")
|
||||||
|
_run_sqlite_migrations(conn, _MAIN_MIGRATIONS_SQLITE)
|
||||||
|
conn.commit()
|
||||||
|
conn.executescript(_MAIN_SCHEMA_SQLITE)
|
||||||
|
conn.close()
|
||||||
|
logger.debug("SQLite incidents schema verified at %s", db_path)
|
||||||
12
app/db/tenant.py
Normal file
12
app/db/tenant.py
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
"""Tenant ID resolution — TURNSTONE_TENANT_ID env var, hostname fallback."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1)
|
||||||
|
def resolve_tenant_id() -> str:
|
||||||
|
"""Return this node's tenant ID. Result is cached after first call."""
|
||||||
|
return os.environ.get("TURNSTONE_TENANT_ID") or socket.gethostname()
|
||||||
|
|
@ -1,18 +1,19 @@
|
||||||
"""Upload adapter: processes file bytes and writes to context store — MIT licensed."""
|
"""Upload adapter: processes file bytes and writes to context store — MIT licensed."""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import sqlite3
|
|
||||||
import uuid
|
import uuid
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from app.context.chunker import process_upload
|
from app.context.chunker import process_upload
|
||||||
from app.context.store import add_document, add_fact
|
from app.context.store import add_document, add_fact
|
||||||
|
from app.db import get_conn, resolve_tenant_id
|
||||||
|
|
||||||
|
|
||||||
def glean_upload(db_path: Path, filename: str, content: bytes) -> dict[str, Any]:
|
def glean_upload(db_path: Path, filename: str, content: bytes) -> dict[str, Any]:
|
||||||
"""Process an uploaded file and write to context store. Returns result summary."""
|
"""Process an uploaded file and write to context store. Returns result summary."""
|
||||||
doc_type, facts, chunks = process_upload(filename, content)
|
doc_type, facts, chunks = process_upload(filename, content)
|
||||||
|
tid = resolve_tenant_id()
|
||||||
|
|
||||||
doc = add_document(
|
doc = add_document(
|
||||||
db_path,
|
db_path,
|
||||||
|
|
@ -25,15 +26,13 @@ def glean_upload(db_path: Path, filename: str, content: bytes) -> dict[str, Any]
|
||||||
for fact in facts:
|
for fact in facts:
|
||||||
add_fact(db_path, fact.category, fact.key, fact.value, source="upload")
|
add_fact(db_path, fact.category, fact.key, fact.value, source="upload")
|
||||||
|
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
with get_conn(db_path) as conn:
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
for i, chunk_text in enumerate(chunks):
|
||||||
for i, chunk_text in enumerate(chunks):
|
conn.execute(
|
||||||
conn.execute(
|
"INSERT INTO context_chunks(id, tenant_id, document_id, chunk_index, text) VALUES (?,?,?,?,?)",
|
||||||
"INSERT INTO context_chunks(id, document_id, chunk_index, text) VALUES (?,?,?,?)",
|
(str(uuid.uuid4()), tid, doc.id, i, chunk_text),
|
||||||
(str(uuid.uuid4()), doc.id, i, chunk_text),
|
)
|
||||||
)
|
conn.commit()
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"document_id": doc.id,
|
"document_id": doc.id,
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,24 @@
|
||||||
"""Glean pipeline: auto-detect format, parse, write to SQLite."""
|
"""Glean pipeline: auto-detect format, parse, write to SQLite or Postgres."""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import sqlite3
|
import sqlite3 # still used in migrate_incidents_to_dedicated_db (SQLite-only migration)
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterator
|
from typing import Any, Iterator
|
||||||
|
|
||||||
|
from app.db import (
|
||||||
|
frag,
|
||||||
|
get_conn,
|
||||||
|
resolve_tenant_id,
|
||||||
|
)
|
||||||
|
from app.db.schema import (
|
||||||
|
ensure_context_schema,
|
||||||
|
ensure_incidents_schema,
|
||||||
|
ensure_schema,
|
||||||
|
migrate_incidents_to_dedicated_db,
|
||||||
|
)
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
|
|
@ -38,7 +50,8 @@ CREATE TABLE IF NOT EXISTS log_entries (
|
||||||
repeat_count INTEGER DEFAULT 1,
|
repeat_count INTEGER DEFAULT 1,
|
||||||
out_of_order INTEGER DEFAULT 0,
|
out_of_order INTEGER DEFAULT 0,
|
||||||
matched_patterns TEXT DEFAULT '[]',
|
matched_patterns TEXT DEFAULT '[]',
|
||||||
text TEXT NOT NULL
|
text TEXT NOT NULL,
|
||||||
|
anonymized INTEGER DEFAULT NULL
|
||||||
);
|
);
|
||||||
CREATE INDEX IF NOT EXISTS idx_source ON log_entries(source_id);
|
CREATE INDEX IF NOT EXISTS idx_source ON log_entries(source_id);
|
||||||
CREATE INDEX IF NOT EXISTS idx_timestamp ON log_entries(timestamp_iso);
|
CREATE INDEX IF NOT EXISTS idx_timestamp ON log_entries(timestamp_iso);
|
||||||
|
|
@ -169,127 +182,13 @@ CREATE INDEX IF NOT EXISTS idx_chunks_doc ON context_chunks(document_id);
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def ensure_schema(db_path: Path) -> None:
|
# ensure_schema / ensure_context_schema / ensure_incidents_schema / migrate_incidents_to_dedicated_db
|
||||||
"""Create all tables and apply additive migrations. Safe to call on every startup."""
|
# are now implemented in app/db/schema.py and re-exported via app/db/__init__.py.
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
# The imports at the top of this file bring them in; these names are kept as module-level
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
# symbols so existing callers (rest.py, tests) still find them here without changes.
|
||||||
conn.executescript(_SCHEMA)
|
|
||||||
# Additive column migrations — ALTER TABLE silently skips if column exists
|
|
||||||
for stmt in [
|
|
||||||
"ALTER TABLE incidents ADD COLUMN issue_type TEXT NOT NULL DEFAULT ''",
|
|
||||||
]:
|
|
||||||
try:
|
|
||||||
conn.execute(stmt)
|
|
||||||
except sqlite3.OperationalError:
|
|
||||||
pass
|
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
|
|
||||||
def ensure_context_schema(db_path: Path) -> None:
|
# _INCIDENTS_SCHEMA and its ensure_/migrate_ functions moved to app/db/schema.py
|
||||||
"""Create context KB tables in a dedicated database file.
|
|
||||||
|
|
||||||
Using a separate file from the main log DB means context fact writes never
|
|
||||||
contend with the high-throughput glean scheduler, which can hold the main
|
|
||||||
DB write lock for seconds at a time when flushing large journal batches.
|
|
||||||
"""
|
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
|
||||||
conn.execute("PRAGMA foreign_keys=ON")
|
|
||||||
conn.executescript(_CONTEXT_SCHEMA)
|
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
|
|
||||||
_INCIDENTS_SCHEMA = """
|
|
||||||
CREATE TABLE IF NOT EXISTS incidents (
|
|
||||||
id TEXT PRIMARY KEY,
|
|
||||||
label TEXT NOT NULL,
|
|
||||||
issue_type TEXT NOT NULL DEFAULT '',
|
|
||||||
started_at TEXT,
|
|
||||||
ended_at TEXT,
|
|
||||||
notes TEXT NOT NULL DEFAULT '',
|
|
||||||
created_at TEXT NOT NULL,
|
|
||||||
severity TEXT NOT NULL DEFAULT 'medium'
|
|
||||||
);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_incidents_time ON incidents(started_at, ended_at);
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS received_bundles (
|
|
||||||
id TEXT PRIMARY KEY,
|
|
||||||
source_host TEXT NOT NULL,
|
|
||||||
issue_type TEXT NOT NULL DEFAULT '',
|
|
||||||
label TEXT NOT NULL,
|
|
||||||
severity TEXT NOT NULL DEFAULT 'medium',
|
|
||||||
started_at TEXT,
|
|
||||||
bundled_at TEXT NOT NULL,
|
|
||||||
entry_count INTEGER NOT NULL DEFAULT 0,
|
|
||||||
bundle_json TEXT NOT NULL
|
|
||||||
);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_bundles_bundled ON received_bundles(bundled_at);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_bundles_type ON received_bundles(issue_type);
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS sent_bundles (
|
|
||||||
id TEXT PRIMARY KEY,
|
|
||||||
incident_id TEXT NOT NULL,
|
|
||||||
exported_at TEXT NOT NULL,
|
|
||||||
sanitized INTEGER NOT NULL DEFAULT 0,
|
|
||||||
entry_count INTEGER NOT NULL DEFAULT 0,
|
|
||||||
bundle_json TEXT NOT NULL
|
|
||||||
);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_sent_bundles_incident ON sent_bundles(incident_id);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_sent_bundles_time ON sent_bundles(exported_at);
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def ensure_incidents_schema(db_path: Path) -> None:
|
|
||||||
"""Create incidents tables in a dedicated database file.
|
|
||||||
|
|
||||||
Using a separate file from the main log DB means incident writes never
|
|
||||||
contend with the FTS5 bulk-insert write lock held by the glean scheduler.
|
|
||||||
Mirrors the context_facts split (CONTEXT_DB_PATH / turnstone-context.db).
|
|
||||||
"""
|
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
|
||||||
conn.executescript(_INCIDENTS_SCHEMA)
|
|
||||||
for stmt in [
|
|
||||||
"ALTER TABLE incidents ADD COLUMN issue_type TEXT NOT NULL DEFAULT ''",
|
|
||||||
]:
|
|
||||||
try:
|
|
||||||
conn.execute(stmt)
|
|
||||||
except sqlite3.OperationalError:
|
|
||||||
pass
|
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
|
|
||||||
def migrate_incidents_to_dedicated_db(main_db: Path, incidents_db: Path) -> int:
|
|
||||||
"""One-shot migration: copy incidents/bundles rows from main DB to incidents DB.
|
|
||||||
|
|
||||||
Safe to call on every startup — rows already present in incidents_db are
|
|
||||||
skipped via INSERT OR IGNORE. Returns the count of rows migrated.
|
|
||||||
"""
|
|
||||||
src = sqlite3.connect(str(main_db), timeout=30.0)
|
|
||||||
src.row_factory = sqlite3.Row
|
|
||||||
dst = sqlite3.connect(str(incidents_db), timeout=30.0)
|
|
||||||
migrated = 0
|
|
||||||
for table in ("incidents", "received_bundles", "sent_bundles"):
|
|
||||||
try:
|
|
||||||
rows = src.execute(f"SELECT * FROM {table}").fetchall() # noqa: S608
|
|
||||||
except sqlite3.OperationalError:
|
|
||||||
continue
|
|
||||||
if not rows:
|
|
||||||
continue
|
|
||||||
cols = ", ".join(rows[0].keys())
|
|
||||||
placeholders = ", ".join("?" * len(rows[0].keys()))
|
|
||||||
dst.executemany(
|
|
||||||
f"INSERT OR IGNORE INTO {table} ({cols}) VALUES ({placeholders})", # noqa: S608
|
|
||||||
[tuple(r) for r in rows],
|
|
||||||
)
|
|
||||||
migrated += len(rows)
|
|
||||||
dst.commit()
|
|
||||||
src.close()
|
|
||||||
dst.close()
|
|
||||||
return migrated
|
|
||||||
|
|
||||||
|
|
||||||
def _fingerprint(path: Path) -> tuple[float, int]:
|
def _fingerprint(path: Path) -> tuple[float, int]:
|
||||||
|
|
@ -298,36 +197,28 @@ def _fingerprint(path: Path) -> tuple[float, int]:
|
||||||
return st.st_mtime, st.st_size
|
return st.st_mtime, st.st_size
|
||||||
|
|
||||||
|
|
||||||
def _fp_unchanged(conn: sqlite3.Connection, path: Path, mtime: float, size: int) -> bool:
|
def _fp_unchanged(conn: Any, path: Path, mtime: float, size: int) -> bool:
|
||||||
"""Return True only when the stored fingerprint exactly matches (mtime, size).
|
"""Return True only when the stored fingerprint exactly matches (mtime, size)."""
|
||||||
|
tid = resolve_tenant_id()
|
||||||
A smaller size (log rotation) or a larger size (new lines appended) both
|
|
||||||
return False so the caller re-gleams the file.
|
|
||||||
"""
|
|
||||||
row = conn.execute(
|
row = conn.execute(
|
||||||
"SELECT mtime, size FROM glean_fingerprints WHERE path = ?",
|
"SELECT mtime, size FROM glean_fingerprints WHERE path = ? AND (tenant_id = ? OR tenant_id = '')",
|
||||||
(str(path),),
|
(str(path), tid),
|
||||||
).fetchone()
|
).fetchone()
|
||||||
if row is None:
|
if row is None:
|
||||||
return False
|
return False
|
||||||
return row[0] == mtime and row[1] == size
|
return row["mtime"] == mtime and row["size"] == size
|
||||||
|
|
||||||
|
|
||||||
def _save_fingerprint(
|
def _save_fingerprint(
|
||||||
conn: sqlite3.Connection,
|
conn: Any,
|
||||||
path: Path,
|
path: Path,
|
||||||
mtime: float,
|
mtime: float,
|
||||||
size: int,
|
size: int,
|
||||||
gleaned_at: str,
|
gleaned_at: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Upsert the fingerprint for *path* after a successful glean."""
|
"""Upsert the fingerprint for *path* after a successful glean."""
|
||||||
conn.execute(
|
tid = resolve_tenant_id()
|
||||||
"""
|
conn.execute(frag.fingerprint_upsert(), (tid, str(path), mtime, size, gleaned_at))
|
||||||
INSERT OR REPLACE INTO glean_fingerprints (path, mtime, size, gleaned_at)
|
|
||||||
VALUES (?, ?, ?, ?)
|
|
||||||
""",
|
|
||||||
(str(path), mtime, size, gleaned_at),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _detect_format(first_line: str) -> str:
|
def _detect_format(first_line: str) -> str:
|
||||||
|
|
@ -400,18 +291,22 @@ def _parse_file(
|
||||||
yield from plaintext.parse(all_lines(), source_id, compiled, ingest_time)
|
yield from plaintext.parse(all_lines(), source_id, compiled, ingest_time)
|
||||||
|
|
||||||
|
|
||||||
def _write_batch(conn: sqlite3.Connection, batch: list[RetrievedEntry]) -> None:
|
def _write_batch(conn: Any, batch: list[RetrievedEntry]) -> None:
|
||||||
conn.executemany(
|
tid = resolve_tenant_id()
|
||||||
"""
|
conflict = frag.entries_conflict_clause()
|
||||||
INSERT OR IGNORE INTO log_entries
|
sql = f"""
|
||||||
(id, source_id, sequence, timestamp_raw, timestamp_iso,
|
{frag.insert_ignore_entries()}
|
||||||
|
(tenant_id, id, source_id, sequence, timestamp_raw, timestamp_iso,
|
||||||
ingest_time, severity, repeat_count, out_of_order,
|
ingest_time, severity, repeat_count, out_of_order,
|
||||||
matched_patterns, text)
|
matched_patterns, text)
|
||||||
VALUES (?,?,?,?,?,?,?,?,?,?,?)
|
VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
|
||||||
""",
|
{conflict}
|
||||||
|
"""
|
||||||
|
conn.executemany(
|
||||||
|
sql,
|
||||||
[
|
[
|
||||||
(
|
(
|
||||||
e.entry_id, e.source_id, e.sequence,
|
tid, e.entry_id, e.source_id, e.sequence,
|
||||||
e.timestamp_raw, e.timestamp_iso, e.ingest_time,
|
e.timestamp_raw, e.timestamp_iso, e.ingest_time,
|
||||||
e.severity, e.repeat_count, int(e.out_of_order),
|
e.severity, e.repeat_count, int(e.out_of_order),
|
||||||
json.dumps(list(e.matched_patterns)), e.text,
|
json.dumps(list(e.matched_patterns)), e.text,
|
||||||
|
|
@ -435,46 +330,41 @@ def _glean_files(
|
||||||
ingest_time = now_iso()
|
ingest_time = now_iso()
|
||||||
source_id_map = source_id_map or {}
|
source_id_map = source_id_map or {}
|
||||||
|
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
ensure_schema(db_path)
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
|
||||||
conn.executescript(_SCHEMA)
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
stats: dict[str, int] = {}
|
with get_conn(db_path) as conn:
|
||||||
skipped: list[str] = []
|
stats: dict[str, int] = {}
|
||||||
|
skipped: list[str] = []
|
||||||
|
|
||||||
for log_file in files:
|
for log_file in files:
|
||||||
source_id = source_id_map.get(log_file, log_file.stem)
|
source_id = source_id_map.get(log_file, log_file.stem)
|
||||||
|
|
||||||
# Fingerprint check — skip files whose mtime+size haven't changed.
|
mtime, size = _fingerprint(log_file)
|
||||||
mtime, size = _fingerprint(log_file)
|
if not force and _fp_unchanged(conn, log_file, mtime, size):
|
||||||
if not force and _fp_unchanged(conn, log_file, mtime, size):
|
logger.debug("Skipping unchanged file: %s", log_file.name)
|
||||||
logger.debug("Skipping unchanged file: %s", log_file.name)
|
skipped.append(log_file.name)
|
||||||
skipped.append(log_file.name)
|
stats[source_id] = stats.get(source_id, 0)
|
||||||
stats[source_id] = stats.get(source_id, 0)
|
continue
|
||||||
continue
|
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
batch: list[RetrievedEntry] = []
|
batch: list[RetrievedEntry] = []
|
||||||
for entry in _parse_file(log_file, compiled, ingest_time, source_id=source_id):
|
for entry in _parse_file(log_file, compiled, ingest_time, source_id=source_id):
|
||||||
batch.append(entry)
|
batch.append(entry)
|
||||||
if len(batch) >= batch_size:
|
if len(batch) >= batch_size:
|
||||||
|
_write_batch(conn, batch)
|
||||||
|
conn.commit()
|
||||||
|
count += len(batch)
|
||||||
|
batch.clear()
|
||||||
|
if batch:
|
||||||
_write_batch(conn, batch)
|
_write_batch(conn, batch)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
count += len(batch)
|
count += len(batch)
|
||||||
batch.clear()
|
|
||||||
if batch:
|
_save_fingerprint(conn, log_file, mtime, size, ingest_time)
|
||||||
_write_batch(conn, batch)
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
count += len(batch)
|
|
||||||
|
|
||||||
_save_fingerprint(conn, log_file, mtime, size, ingest_time)
|
stats[source_id] = stats.get(source_id, 0) + count
|
||||||
conn.commit()
|
logger.info("Gleaned %d entries from %s (source: %s)", count, log_file.name, source_id)
|
||||||
|
|
||||||
stats[source_id] = stats.get(source_id, 0) + count
|
|
||||||
logger.info("Gleaned %d entries from %s (source: %s)", count, log_file.name, source_id)
|
|
||||||
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
if skipped:
|
if skipped:
|
||||||
logger.info("Skipped %d unchanged file(s): %s", len(skipped), ", ".join(skipped))
|
logger.info("Skipped %d unchanged file(s): %s", len(skipped), ", ".join(skipped))
|
||||||
|
|
@ -493,7 +383,7 @@ def _stream_and_write(
|
||||||
source_id: str,
|
source_id: str,
|
||||||
compiled: list[tuple[LogPattern, object]],
|
compiled: list[tuple[LogPattern, object]],
|
||||||
ingest_time: str,
|
ingest_time: str,
|
||||||
conn: sqlite3.Connection,
|
conn: Any,
|
||||||
batch_size: int,
|
batch_size: int,
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Stream *cmd* output through *parser* and write entries to *conn*.
|
"""Stream *cmd* output through *parser* and write entries to *conn*.
|
||||||
|
|
@ -525,7 +415,7 @@ def _glean_ssh_source(
|
||||||
src: dict, # type: ignore[type-arg]
|
src: dict, # type: ignore[type-arg]
|
||||||
compiled: list[tuple[LogPattern, object]],
|
compiled: list[tuple[LogPattern, object]],
|
||||||
ingest_time: str,
|
ingest_time: str,
|
||||||
conn: sqlite3.Connection,
|
conn: Any,
|
||||||
batch_size: int,
|
batch_size: int,
|
||||||
) -> dict[str, int]:
|
) -> dict[str, int]:
|
||||||
"""Open one SSHTransport connection for *src* and glean all its glean items.
|
"""Open one SSHTransport connection for *src* and glean all its glean items.
|
||||||
|
|
@ -618,15 +508,9 @@ def glean_ssh_source(
|
||||||
compiled = _compile(load_patterns(effective_pattern_file))
|
compiled = _compile(load_patterns(effective_pattern_file))
|
||||||
ingest_time = now_iso()
|
ingest_time = now_iso()
|
||||||
|
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
ensure_schema(db_path)
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
with get_conn(db_path) as conn:
|
||||||
conn.executescript(_SCHEMA)
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
try:
|
|
||||||
stats = _glean_ssh_source(src, compiled, ingest_time, conn, batch_size)
|
stats = _glean_ssh_source(src, compiled, ingest_time, conn, batch_size)
|
||||||
finally:
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
logger.info("Rebuilding FTS index after SSH source glean...")
|
logger.info("Rebuilding FTS index after SSH source glean...")
|
||||||
build_fts_index(db_path)
|
build_fts_index(db_path)
|
||||||
|
|
@ -645,7 +529,7 @@ def glean_dir(
|
||||||
Pass ``force=True`` to bypass fingerprint checks and re-glean all files
|
Pass ``force=True`` to bypass fingerprint checks and re-glean all files
|
||||||
regardless of whether they have changed since the last run.
|
regardless of whether they have changed since the last run.
|
||||||
"""
|
"""
|
||||||
files = sorted(corpus_dir.glob("*.jsonl")) + sorted(corpus_dir.glob("*.log"))
|
files = sorted(corpus_dir.rglob("*.jsonl")) + sorted(corpus_dir.rglob("*.log"))
|
||||||
return _glean_files(files, db_path, pattern_file, batch_size, force=force)
|
return _glean_files(files, db_path, pattern_file, batch_size, force=force)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -740,18 +624,13 @@ def glean_sources(
|
||||||
compiled = _compile(load_patterns(effective_pattern_file))
|
compiled = _compile(load_patterns(effective_pattern_file))
|
||||||
ingest_time = now_iso()
|
ingest_time = now_iso()
|
||||||
|
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
ensure_schema(db_path)
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
with get_conn(db_path) as conn:
|
||||||
conn.executescript(_SCHEMA)
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
try:
|
|
||||||
for src in ssh_sources:
|
for src in ssh_sources:
|
||||||
ssh_stats = _glean_ssh_source(src, compiled, ingest_time, conn, batch_size)
|
ssh_stats = _glean_ssh_source(src, compiled, ingest_time, conn, batch_size)
|
||||||
for k, v in ssh_stats.items():
|
for k, v in ssh_stats.items():
|
||||||
stats[k] = stats.get(k, 0) + v
|
stats[k] = stats.get(k, 0) + v
|
||||||
finally:
|
conn.commit()
|
||||||
conn.close()
|
|
||||||
|
|
||||||
# Rebuild FTS only when SSH sources added entries (_glean_files already
|
# Rebuild FTS only when SSH sources added entries (_glean_files already
|
||||||
# rebuilds when local sources are present; safe to call again if both ran).
|
# rebuilds when local sources are present; safe to call again if both ran).
|
||||||
|
|
|
||||||
|
|
@ -32,10 +32,11 @@ def _extract_ts(line: str) -> tuple[str, str]:
|
||||||
if m:
|
if m:
|
||||||
ts_raw = m.group("ts")
|
ts_raw = m.group("ts")
|
||||||
try:
|
try:
|
||||||
# Strip fractional seconds / TZ for strptime compat
|
# Strip fractional seconds / TZ for strptime compat.
|
||||||
|
# Normalise ISO 8601 T-separator to space so strptime format matches.
|
||||||
clean = re.sub(r"(\.\d+)?([Zz]|[+-]\d{2}:?\d{2})?$", "", ts_raw).strip()
|
clean = re.sub(r"(\.\d+)?([Zz]|[+-]\d{2}:?\d{2})?$", "", ts_raw).strip()
|
||||||
clean = clean.replace("T", " ")
|
clean = clean.replace("T", " ")
|
||||||
dt = datetime.strptime(clean, fmt)
|
dt = datetime.strptime(clean, fmt.replace("T", " "))
|
||||||
if dt.year == 1900:
|
if dt.year == 1900:
|
||||||
dt = dt.replace(year=datetime.now().year)
|
dt = dt.replace(year=datetime.now().year)
|
||||||
dt = dt.astimezone(timezone.utc)
|
dt = dt.astimezone(timezone.utc)
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sqlite3
|
import sqlite3 # still used for the pre-index-check on SQLite backend
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
@ -53,15 +53,15 @@ _index_ready = False
|
||||||
|
|
||||||
|
|
||||||
def _ensure_index() -> None:
|
def _ensure_index() -> None:
|
||||||
"""Build FTS index on first use; skip if already present."""
|
"""Build FTS index on first use; skip if already present (SQLite only)."""
|
||||||
global _index_ready
|
global _index_ready
|
||||||
if _index_ready:
|
if _index_ready:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
conn = sqlite3.connect(str(DB_PATH), timeout=30.0)
|
raw = sqlite3.connect(str(DB_PATH), timeout=30.0)
|
||||||
count = conn.execute("SELECT COUNT(*) FROM log_fts").fetchone()[0]
|
count = raw.execute("SELECT COUNT(*) FROM log_fts").fetchone()[0]
|
||||||
conn.close()
|
raw.close()
|
||||||
if count > 0:
|
if count > 0:
|
||||||
_index_ready = True
|
_index_ready = True
|
||||||
logger.info("FTS index present (%d entries)", count)
|
logger.info("FTS index present (%d entries)", count)
|
||||||
|
|
@ -93,7 +93,7 @@ def search_logs(
|
||||||
Example: '"connection refused" OR "connection lost"'
|
Example: '"connection refused" OR "connection lost"'
|
||||||
severity: Filter by level — EMERGENCY, ALERT, CRITICAL, ERROR, WARN, NOTICE, INFO, DEBUG.
|
severity: Filter by level — EMERGENCY, ALERT, CRITICAL, ERROR, WARN, NOTICE, INFO, DEBUG.
|
||||||
source: Partial match on source_id. Format is 'corpus:host:service'.
|
source: Partial match on source_id. Format is 'corpus:host:service'.
|
||||||
Example: 'xanderland:caddy' matches all Caddy entries from xanderland.
|
Example: 'myserver:caddy' matches all Caddy entries from myserver.
|
||||||
pattern: Filter by named pattern tag applied at glean time.
|
pattern: Filter by named pattern tag applied at glean time.
|
||||||
Known tags: auth_failure, connection_lost, oom, segfault, disk_full,
|
Known tags: auth_failure, connection_lost, oom, segfault, disk_full,
|
||||||
timeout, caddy_tls_error, caddy_config_error, caddy_auth_error,
|
timeout, caddy_tls_error, caddy_config_error, caddy_auth_error,
|
||||||
|
|
|
||||||
474
app/rest.py
474
app/rest.py
|
|
@ -12,6 +12,7 @@ import hmac
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
# Offline mode: must be set before any HuggingFace library is imported.
|
# Offline mode: must be set before any HuggingFace library is imported.
|
||||||
|
|
@ -29,13 +30,14 @@ from typing import Annotated
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from fastapi import APIRouter, BackgroundTasks, Depends, FastAPI, HTTPException, Query, Request, UploadFile
|
from fastapi import APIRouter, BackgroundTasks, Depends, FastAPI, Header, HTTPException, Query, Request, UploadFile
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from fastapi.responses import FileResponse, RedirectResponse, StreamingResponse
|
from fastapi.responses import FileResponse, RedirectResponse, StreamingResponse
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from app.glean.pipeline import ensure_schema, ensure_context_schema, ensure_incidents_schema, migrate_incidents_to_dedicated_db, glean_file as _glean_file, glean_ssh_source as _glean_ssh_source
|
from app.db import close_pool, ensure_schema, ensure_context_schema, ensure_incidents_schema, migrate_incidents_to_dedicated_db
|
||||||
|
from app.glean.pipeline import glean_file as _glean_file, glean_ssh_source as _glean_ssh_source
|
||||||
from app.glean.base import load_compiled_patterns, now_iso
|
from app.glean.base import load_compiled_patterns, now_iso
|
||||||
from app.glean.tautulli import parse_webhook as _parse_tautulli
|
from app.glean.tautulli import parse_webhook as _parse_tautulli
|
||||||
from app.glean.wazuh import is_wazuh_alert as _is_wazuh_alert, parse as _parse_wazuh
|
from app.glean.wazuh import is_wazuh_alert as _is_wazuh_alert, parse as _parse_wazuh
|
||||||
|
|
@ -50,8 +52,10 @@ from app.services.blocklist import (
|
||||||
update_candidate_status,
|
update_candidate_status,
|
||||||
)
|
)
|
||||||
from app.services.pihole import PiholeClient
|
from app.services.pihole import PiholeClient
|
||||||
from app.services.discover import discover_all, build_sources_yaml, validate_source
|
from app.services.discover import discover_all, build_sources_yaml, validate_source, scan_log_directories
|
||||||
from app.services.nl_source import interpret as _nl_interpret
|
from app.services.nl_source import interpret as _nl_interpret
|
||||||
|
from app.services import orchard as _orchard
|
||||||
|
from app.services import ssh_targets as _ssh_targets
|
||||||
from app.services.incidents import (
|
from app.services.incidents import (
|
||||||
build_bundle,
|
build_bundle,
|
||||||
create_incident,
|
create_incident,
|
||||||
|
|
@ -87,6 +91,10 @@ from app.glean.doc_upload import glean_upload as _glean_upload
|
||||||
from app.context.wizard import get_schema as _wizard_schema, advance_step, is_complete, apply_session
|
from app.context.wizard import get_schema as _wizard_schema, advance_step, is_complete, apply_session
|
||||||
from app.context.chunker import UnsupportedDocType, FileTooLarge
|
from app.context.chunker import UnsupportedDocType, FileTooLarge
|
||||||
from app.tasks.glean_scheduler import get_state as _glean_state, run_once as _run_glean, scheduler_loop as _scheduler_loop, submit_matched as _submit_matched
|
from app.tasks.glean_scheduler import get_state as _glean_state, run_once as _run_glean, scheduler_loop as _scheduler_loop, submit_matched as _submit_matched
|
||||||
|
from app.tasks.anomaly_scorer import get_state as _scorer_state, run_once as _run_scorer
|
||||||
|
from app.tasks.cybersec_scorer import get_state as _cybersec_state, run_once as _run_cybersec
|
||||||
|
from app.services.anomaly import list_detections as _list_detections, acknowledge_detection as _ack_detection
|
||||||
|
from app.services.cybersec import list_cybersec_detections as _list_cybersec, CYBERSEC_LABELS
|
||||||
from app.glean.mqtt_subscriber import run_mqtt_subscribers as _run_mqtt_subscribers
|
from app.glean.mqtt_subscriber import run_mqtt_subscribers as _run_mqtt_subscribers
|
||||||
|
|
||||||
DB_PATH = Path(os.environ.get("TURNSTONE_DB", Path(__file__).parent.parent / "data" / "turnstone.db"))
|
DB_PATH = Path(os.environ.get("TURNSTONE_DB", Path(__file__).parent.parent / "data" / "turnstone.db"))
|
||||||
|
|
@ -108,9 +116,19 @@ PATTERN_DIR = Path(os.environ.get("TURNSTONE_PATTERNS", Path(__file__).parent.pa
|
||||||
PATTERN_FILE = PATTERN_DIR / "default.yaml"
|
PATTERN_FILE = PATTERN_DIR / "default.yaml"
|
||||||
GLEAN_INTERVAL = int(os.environ.get("TURNSTONE_GLEAN_INTERVAL", "900"))
|
GLEAN_INTERVAL = int(os.environ.get("TURNSTONE_GLEAN_INTERVAL", "900"))
|
||||||
SUBMIT_ENDPOINT = os.environ.get("TURNSTONE_SUBMIT_ENDPOINT", "").rstrip("/")
|
SUBMIT_ENDPOINT = os.environ.get("TURNSTONE_SUBMIT_ENDPOINT", "").rstrip("/")
|
||||||
|
ANOMALY_MODEL = os.environ.get("TURNSTONE_ANOMALY_MODEL", "")
|
||||||
|
ANOMALY_DEVICE = os.environ.get("TURNSTONE_ANOMALY_DEVICE", "cpu")
|
||||||
|
ANOMALY_THRESHOLD = float(os.environ.get("TURNSTONE_ANOMALY_THRESHOLD", "0.75"))
|
||||||
|
CYBERSEC_MODEL = os.environ.get("TURNSTONE_CYBERSEC_MODEL", "")
|
||||||
|
CYBERSEC_DEVICE = os.environ.get("TURNSTONE_CYBERSEC_DEVICE", "cpu")
|
||||||
|
CYBERSEC_THRESHOLD = float(os.environ.get("TURNSTONE_CYBERSEC_THRESHOLD", "0.60"))
|
||||||
|
AUTO_INCIDENT = os.environ.get("TURNSTONE_AUTO_INCIDENT", "true").lower() not in ("0", "false", "no")
|
||||||
# When set, all /api/ routes require Authorization: Bearer <key>.
|
# When set, all /api/ routes require Authorization: Bearer <key>.
|
||||||
# Unset (default) means no authentication — suitable for local-only deployments.
|
# Unset (default) means no authentication — suitable for local-only deployments.
|
||||||
_API_KEY: str | None = os.environ.get("TURNSTONE_API_KEY") or None
|
_API_KEY: str | None = os.environ.get("TURNSTONE_API_KEY") or None
|
||||||
|
# Admin key for The Orchard graft/deactivate endpoints on the harvest receiver.
|
||||||
|
# If unset, the orchard management endpoints return 501.
|
||||||
|
_ORCHARD_ADMIN_KEY: str | None = os.environ.get("TURNSTONE_ORCHARD_ADMIN_KEY") or None
|
||||||
|
|
||||||
# GPU inference server URL.
|
# GPU inference server URL.
|
||||||
# Priority: GPU_SERVER_URL → CF_ORCH_URL (backward compat) → orch.circuitforge.tech (Paid+).
|
# Priority: GPU_SERVER_URL → CF_ORCH_URL (backward compat) → orch.circuitforge.tech (Paid+).
|
||||||
|
|
@ -164,6 +182,14 @@ async def _lifespan(app: FastAPI):
|
||||||
sources_file, DB_PATH, PATTERN_FILE, GLEAN_INTERVAL,
|
sources_file, DB_PATH, PATTERN_FILE, GLEAN_INTERVAL,
|
||||||
submit_endpoint=SUBMIT_ENDPOINT or None,
|
submit_endpoint=SUBMIT_ENDPOINT or None,
|
||||||
source_host=SOURCE_HOST,
|
source_host=SOURCE_HOST,
|
||||||
|
anomaly_model=ANOMALY_MODEL,
|
||||||
|
anomaly_device=ANOMALY_DEVICE,
|
||||||
|
anomaly_threshold=ANOMALY_THRESHOLD,
|
||||||
|
cybersec_model=CYBERSEC_MODEL,
|
||||||
|
cybersec_device=CYBERSEC_DEVICE,
|
||||||
|
cybersec_threshold=CYBERSEC_THRESHOLD,
|
||||||
|
incidents_db_path=INCIDENTS_DB_PATH,
|
||||||
|
auto_incident=AUTO_INCIDENT,
|
||||||
),
|
),
|
||||||
name="glean-scheduler",
|
name="glean-scheduler",
|
||||||
)
|
)
|
||||||
|
|
@ -185,9 +211,10 @@ async def _lifespan(app: FastAPI):
|
||||||
await task
|
await task
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
pass
|
pass
|
||||||
|
close_pool() # no-op if SQLite backend
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI(title="Turnstone API", version="0.6.1", docs_url="/turnstone/docs", redoc_url=None, lifespan=_lifespan)
|
app = FastAPI(title="Turnstone API", version="0.7.0", docs_url="/turnstone/docs", redoc_url=None, lifespan=_lifespan)
|
||||||
|
|
||||||
app.add_middleware(
|
app.add_middleware(
|
||||||
CORSMiddleware,
|
CORSMiddleware,
|
||||||
|
|
@ -256,6 +283,10 @@ class DiagnoseRequest(BaseModel):
|
||||||
source: str | None = None
|
source: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class SourceSuggestRequest(BaseModel):
|
||||||
|
query: str
|
||||||
|
|
||||||
|
|
||||||
class SeverityOverride(BaseModel):
|
class SeverityOverride(BaseModel):
|
||||||
name: str
|
name: str
|
||||||
pattern: str
|
pattern: str
|
||||||
|
|
@ -276,6 +307,13 @@ class SettingsBody(BaseModel):
|
||||||
pihole_api_key: str | None = None
|
pihole_api_key: str | None = None
|
||||||
router_source_ids: str | None = None
|
router_source_ids: str | None = None
|
||||||
device_names: str | None = None
|
device_names: str | None = None
|
||||||
|
notion_token: str | None = None
|
||||||
|
notion_database_id: str | None = None
|
||||||
|
jira_url: str | None = None
|
||||||
|
jira_email: str | None = None
|
||||||
|
jira_api_token: str | None = None
|
||||||
|
jira_project_key: str | None = None
|
||||||
|
jira_issue_type: str | None = None
|
||||||
|
|
||||||
|
|
||||||
class IncidentCreate(BaseModel):
|
class IncidentCreate(BaseModel):
|
||||||
|
|
@ -502,6 +540,71 @@ async def diagnose_post_stream(body: DiagnoseRequest) -> StreamingResponse:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_SUGGEST_STOPWORDS = frozenset({
|
||||||
|
"the", "and", "that", "this", "with", "have", "from", "they",
|
||||||
|
"been", "their", "what", "when", "there", "some", "would", "make",
|
||||||
|
"like", "into", "time", "look", "just", "know", "take", "year",
|
||||||
|
"your", "good", "some", "could", "them", "then", "very", "also",
|
||||||
|
"back", "after", "work", "need", "even", "much", "most", "tell",
|
||||||
|
"does", "more", "once", "help", "seem", "here", "about", "issue",
|
||||||
|
"thing", "logs", "error", "again", "still", "these", "those",
|
||||||
|
"getting", "having", "trying", "going", "where", "which", "cant",
|
||||||
|
"now", "set", "kind", "weird", "stable", "huge", "real", "nice",
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/api/sources/suggest")
|
||||||
|
def suggest_sources(body: SourceSuggestRequest) -> dict:
|
||||||
|
"""Return source IDs ranked by relevance to a natural-language problem description.
|
||||||
|
|
||||||
|
Also returns ``untracked_names`` — query tokens that look like hostnames or
|
||||||
|
service names but do not appear in any monitored source, so the UI can
|
||||||
|
prompt the user to add them.
|
||||||
|
"""
|
||||||
|
all_sources = _list_sources(DB_PATH)
|
||||||
|
query_tokens = {
|
||||||
|
t.lower()
|
||||||
|
for t in re.findall(r"[a-zA-Z][a-zA-Z0-9_-]*", body.query)
|
||||||
|
if len(t) > 2 and t.lower() not in _SUGGEST_STOPWORDS
|
||||||
|
}
|
||||||
|
|
||||||
|
# Build a flat set of every token present in any source ID
|
||||||
|
all_source_tokens: set[str] = set()
|
||||||
|
source_token_map: dict[str, set[str]] = {}
|
||||||
|
for src in all_sources:
|
||||||
|
src_id: str = src["source_id"]
|
||||||
|
parts = {
|
||||||
|
p.lower()
|
||||||
|
for seg in re.split(r"[:\-_\d]+", src_id)
|
||||||
|
for p in [seg.strip()]
|
||||||
|
if len(p) > 2
|
||||||
|
}
|
||||||
|
source_token_map[src_id] = parts
|
||||||
|
all_source_tokens |= parts
|
||||||
|
|
||||||
|
suggestions = []
|
||||||
|
for src_id, parts in source_token_map.items():
|
||||||
|
matched = query_tokens & parts
|
||||||
|
if matched:
|
||||||
|
score = round(len(matched) / max(len(parts), 1), 3)
|
||||||
|
suggestions.append({
|
||||||
|
"source_id": src_id,
|
||||||
|
"score": score,
|
||||||
|
"matched_tokens": sorted(matched),
|
||||||
|
})
|
||||||
|
|
||||||
|
suggestions.sort(key=lambda x: x["score"], reverse=True)
|
||||||
|
|
||||||
|
# Tokens that look like host/service names but aren't in any source
|
||||||
|
untracked = sorted(query_tokens - all_source_tokens)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"suggested": suggestions,
|
||||||
|
"untracked_names": untracked,
|
||||||
|
"all_source_ids": [s["source_id"] for s in all_sources],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@router.get("/api/settings")
|
@router.get("/api/settings")
|
||||||
def get_settings() -> dict:
|
def get_settings() -> dict:
|
||||||
return _load_prefs()
|
return _load_prefs()
|
||||||
|
|
@ -538,6 +641,20 @@ def patch_settings(body: SettingsBody) -> dict:
|
||||||
prefs["router_source_ids"] = body.router_source_ids
|
prefs["router_source_ids"] = body.router_source_ids
|
||||||
if body.device_names is not None:
|
if body.device_names is not None:
|
||||||
prefs["device_names"] = body.device_names
|
prefs["device_names"] = body.device_names
|
||||||
|
if body.notion_token is not None:
|
||||||
|
prefs["notion_token"] = body.notion_token
|
||||||
|
if body.notion_database_id is not None:
|
||||||
|
prefs["notion_database_id"] = body.notion_database_id
|
||||||
|
if body.jira_url is not None:
|
||||||
|
prefs["jira_url"] = body.jira_url
|
||||||
|
if body.jira_email is not None:
|
||||||
|
prefs["jira_email"] = body.jira_email
|
||||||
|
if body.jira_api_token is not None:
|
||||||
|
prefs["jira_api_token"] = body.jira_api_token
|
||||||
|
if body.jira_project_key is not None:
|
||||||
|
prefs["jira_project_key"] = body.jira_project_key
|
||||||
|
if body.jira_issue_type is not None:
|
||||||
|
prefs["jira_issue_type"] = body.jira_issue_type
|
||||||
_save_prefs(prefs)
|
_save_prefs(prefs)
|
||||||
return prefs
|
return prefs
|
||||||
|
|
||||||
|
|
@ -722,6 +839,89 @@ class BatchGleanRequest(BaseModel):
|
||||||
entries: list[BatchEntry]
|
entries: list[BatchEntry]
|
||||||
|
|
||||||
|
|
||||||
|
# ── SSH target manager ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class SshTargetCreate(BaseModel):
|
||||||
|
label: str
|
||||||
|
host: str
|
||||||
|
port: int = 22
|
||||||
|
user: str
|
||||||
|
key_path: str
|
||||||
|
|
||||||
|
|
||||||
|
class SshTargetUpdate(BaseModel):
|
||||||
|
label: str | None = None
|
||||||
|
host: str | None = None
|
||||||
|
port: int | None = None
|
||||||
|
user: str | None = None
|
||||||
|
key_path: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/api/ssh-targets")
|
||||||
|
def list_ssh_targets() -> dict:
|
||||||
|
"""List all configured SSH targets (never returns key contents)."""
|
||||||
|
targets = _ssh_targets.list_targets(DB_PATH)
|
||||||
|
return {"targets": [_ssh_targets.target_to_dict(t, include_warning=True) for t in targets]}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/api/ssh-targets")
|
||||||
|
def create_ssh_target(body: SshTargetCreate) -> dict:
|
||||||
|
"""Create a new SSH target."""
|
||||||
|
try:
|
||||||
|
target = _ssh_targets.create_target(
|
||||||
|
DB_PATH,
|
||||||
|
label=body.label,
|
||||||
|
host=body.host,
|
||||||
|
port=body.port,
|
||||||
|
user=body.user,
|
||||||
|
key_path=body.key_path,
|
||||||
|
)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise HTTPException(status_code=422, detail=str(exc))
|
||||||
|
d = _ssh_targets.target_to_dict(target, include_warning=True)
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
@router.patch("/api/ssh-targets/{target_id}")
|
||||||
|
def update_ssh_target(target_id: str, body: SshTargetUpdate) -> dict:
|
||||||
|
"""Update an existing SSH target."""
|
||||||
|
try:
|
||||||
|
target = _ssh_targets.update_target(
|
||||||
|
DB_PATH,
|
||||||
|
target_id,
|
||||||
|
label=body.label,
|
||||||
|
host=body.host,
|
||||||
|
port=body.port,
|
||||||
|
user=body.user,
|
||||||
|
key_path=body.key_path,
|
||||||
|
)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise HTTPException(status_code=422, detail=str(exc))
|
||||||
|
if target is None:
|
||||||
|
raise HTTPException(status_code=404, detail=f"SSH target {target_id!r} not found")
|
||||||
|
return _ssh_targets.target_to_dict(target, include_warning=True)
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/api/ssh-targets/{target_id}")
|
||||||
|
def delete_ssh_target(target_id: str) -> dict:
|
||||||
|
"""Remove an SSH target."""
|
||||||
|
if not _ssh_targets.delete_target(DB_PATH, target_id):
|
||||||
|
raise HTTPException(status_code=404, detail=f"SSH target {target_id!r} not found")
|
||||||
|
return {"deleted": target_id}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/api/ssh-targets/{target_id}/test")
|
||||||
|
def test_ssh_target(target_id: str) -> dict:
|
||||||
|
"""Test an SSH connection by running a no-op remote command.
|
||||||
|
|
||||||
|
Records the result in the DB so the UI can show a persistent status badge.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return _ssh_targets.test_connection(DB_PATH, target_id)
|
||||||
|
except KeyError as exc:
|
||||||
|
raise HTTPException(status_code=404, detail=str(exc))
|
||||||
|
|
||||||
|
|
||||||
# ── Setup / Onboarding wizard ──────────────────────────────────────────────
|
# ── Setup / Onboarding wizard ──────────────────────────────────────────────
|
||||||
|
|
||||||
class SetupWriteBody(BaseModel):
|
class SetupWriteBody(BaseModel):
|
||||||
|
|
@ -745,6 +945,28 @@ def setup_discover() -> dict:
|
||||||
return discover_all()
|
return discover_all()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/api/setup/scan")
|
||||||
|
def setup_scan(
|
||||||
|
query: str = "",
|
||||||
|
dirs: str = "",
|
||||||
|
max_results: int = 25,
|
||||||
|
) -> dict:
|
||||||
|
"""Scan the filesystem for log files ranked by recency and keyword match.
|
||||||
|
|
||||||
|
Accepts an optional ?query= to weight results toward files matching the
|
||||||
|
problem context (e.g. 'nginx 502', 'docker timeout', 'ssh refused').
|
||||||
|
Accepts an optional ?dirs= comma-separated list to override default scan
|
||||||
|
directories (/var/log, /opt).
|
||||||
|
"""
|
||||||
|
scan_dirs = [d.strip() for d in dirs.split(",") if d.strip()] or None
|
||||||
|
candidates = scan_log_directories(
|
||||||
|
query=query or None,
|
||||||
|
dirs=scan_dirs,
|
||||||
|
max_results=min(max_results, 100),
|
||||||
|
)
|
||||||
|
return {"candidates": candidates, "query": query or None}
|
||||||
|
|
||||||
|
|
||||||
@router.post("/api/setup/write")
|
@router.post("/api/setup/write")
|
||||||
def setup_write(body: SetupWriteBody, background_tasks: BackgroundTasks) -> dict:
|
def setup_write(body: SetupWriteBody, background_tasks: BackgroundTasks) -> dict:
|
||||||
"""Validate and write sources.yaml from a list of selected source definitions.
|
"""Validate and write sources.yaml from a list of selected source definitions.
|
||||||
|
|
@ -813,12 +1035,24 @@ def setup_interpret(body: NLInterpretBody) -> dict:
|
||||||
|
|
||||||
|
|
||||||
@router.post("/api/glean/batch")
|
@router.post("/api/glean/batch")
|
||||||
def glean_batch(payload: BatchGleanRequest, background_tasks: BackgroundTasks) -> dict:
|
def glean_batch(
|
||||||
|
payload: BatchGleanRequest,
|
||||||
|
background_tasks: BackgroundTasks,
|
||||||
|
authorization: str | None = Header(default=None),
|
||||||
|
) -> dict:
|
||||||
"""Accept pre-parsed log entries from a remote Turnstone instance (submission protocol).
|
"""Accept pre-parsed log entries from a remote Turnstone instance (submission protocol).
|
||||||
|
|
||||||
Used by nodes with TURNSTONE_SUBMIT_ENDPOINT configured to push their
|
Used by nodes with TURNSTONE_SUBMIT_ENDPOINT configured to push their
|
||||||
pattern-matched entries to a central receiving instance.
|
pattern-matched entries to a central receiving instance.
|
||||||
|
|
||||||
|
When TURNSTONE_ORCHARD_ADMIN_KEY is set on the receiver, requests must
|
||||||
|
include Authorization: Bearer <api_key> where the key was issued at graft time.
|
||||||
"""
|
"""
|
||||||
|
branch_key_env = os.environ.get("TURNSTONE_BRANCH_KEY", "")
|
||||||
|
if branch_key_env:
|
||||||
|
provided = (authorization or "").removeprefix("Bearer ").strip()
|
||||||
|
if not provided or provided != branch_key_env:
|
||||||
|
raise HTTPException(status_code=401, detail="Invalid branch API key")
|
||||||
if not payload.entries:
|
if not payload.entries:
|
||||||
return {"gleaned": 0}
|
return {"gleaned": 0}
|
||||||
conn = sqlite3.connect(str(DB_PATH), timeout=30.0)
|
conn = sqlite3.connect(str(DB_PATH), timeout=30.0)
|
||||||
|
|
@ -854,6 +1088,86 @@ def glean_batch(payload: BatchGleanRequest, background_tasks: BackgroundTasks) -
|
||||||
return {"gleaned": len(payload.entries), "source_host": payload.source_host}
|
return {"gleaned": len(payload.entries), "source_host": payload.source_host}
|
||||||
|
|
||||||
|
|
||||||
|
def _require_orchard_admin(authorization: str | None) -> None:
|
||||||
|
"""Raise 401/501 if the Orchard admin key check fails."""
|
||||||
|
if _ORCHARD_ADMIN_KEY is None:
|
||||||
|
raise HTTPException(status_code=501, detail="Orchard management not enabled on this instance — set TURNSTONE_ORCHARD_ADMIN_KEY")
|
||||||
|
provided = (authorization or "").removeprefix("Bearer ").strip()
|
||||||
|
if not hmac.compare_digest(_ORCHARD_ADMIN_KEY, provided):
|
||||||
|
raise HTTPException(status_code=401, detail="Invalid Orchard admin key")
|
||||||
|
|
||||||
|
|
||||||
|
class GraftRequest(BaseModel):
|
||||||
|
slug: str
|
||||||
|
contact_email: str
|
||||||
|
agreed_to_terms: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/api/orchard/graft")
|
||||||
|
def orchard_graft(
|
||||||
|
body: GraftRequest,
|
||||||
|
authorization: str | None = Header(default=None),
|
||||||
|
) -> dict:
|
||||||
|
"""Provision a new Orchard branch node.
|
||||||
|
|
||||||
|
Admin-only: requires Authorization: Bearer <TURNSTONE_ORCHARD_ADMIN_KEY>.
|
||||||
|
Returns the submit endpoint and a one-time API key.
|
||||||
|
"""
|
||||||
|
_require_orchard_admin(authorization)
|
||||||
|
try:
|
||||||
|
result = _orchard.graft(body.slug, body.contact_email, body.agreed_to_terms)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise HTTPException(status_code=422, detail=str(exc))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("Orchard graft failed: %s", exc)
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/api/orchard/branches")
|
||||||
|
def orchard_list_branches(
|
||||||
|
authorization: str | None = Header(default=None),
|
||||||
|
) -> dict:
|
||||||
|
"""List all Orchard branches. Admin-only."""
|
||||||
|
_require_orchard_admin(authorization)
|
||||||
|
branches = _orchard.list_branches()
|
||||||
|
# Strip api_key_hash from public response
|
||||||
|
safe = [{k: v for k, v in b.items() if k != "api_key_hash"} for b in branches]
|
||||||
|
return {"branches": safe}
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/api/orchard/branches/{slug}")
|
||||||
|
def orchard_deactivate(
|
||||||
|
slug: str,
|
||||||
|
authorization: str | None = Header(default=None),
|
||||||
|
) -> dict:
|
||||||
|
"""Deactivate a branch: stop its container and remove its Caddy route. Admin-only."""
|
||||||
|
_require_orchard_admin(authorization)
|
||||||
|
try:
|
||||||
|
return _orchard.deactivate(slug)
|
||||||
|
except KeyError as exc:
|
||||||
|
raise HTTPException(status_code=404, detail=str(exc))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("Orchard deactivate failed: %s", exc)
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc))
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/api/orchard/branches/{slug}/anonymize")
|
||||||
|
def orchard_anonymize(
|
||||||
|
slug: str,
|
||||||
|
authorization: str | None = Header(default=None),
|
||||||
|
) -> dict:
|
||||||
|
"""Run the anonymization worker over a branch DB. Admin-only."""
|
||||||
|
_require_orchard_admin(authorization)
|
||||||
|
try:
|
||||||
|
return _orchard.run_anonymization(slug)
|
||||||
|
except KeyError as exc:
|
||||||
|
raise HTTPException(status_code=404, detail=str(exc))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("Orchard anonymize failed: %s", exc)
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc))
|
||||||
|
|
||||||
|
|
||||||
@router.get("/api/tasks/glean/status")
|
@router.get("/api/tasks/glean/status")
|
||||||
def glean_task_status() -> dict:
|
def glean_task_status() -> dict:
|
||||||
"""Return the current state of the periodic glean scheduler."""
|
"""Return the current state of the periodic glean scheduler."""
|
||||||
|
|
@ -993,7 +1307,7 @@ def get_incident_endpoint(incident_id: str) -> dict:
|
||||||
incident = get_incident(INCIDENTS_DB_PATH, incident_id)
|
incident = get_incident(INCIDENTS_DB_PATH, incident_id)
|
||||||
if not incident:
|
if not incident:
|
||||||
raise HTTPException(status_code=404, detail="Incident not found")
|
raise HTTPException(status_code=404, detail="Incident not found")
|
||||||
entries = get_incident_entries(INCIDENTS_DB_PATH, incident)
|
entries = get_incident_entries(DB_PATH, incident)
|
||||||
return {
|
return {
|
||||||
**dataclasses.asdict(incident),
|
**dataclasses.asdict(incident),
|
||||||
"entries": [dataclasses.asdict(e) for e in entries],
|
"entries": [dataclasses.asdict(e) for e in entries],
|
||||||
|
|
@ -1012,7 +1326,7 @@ def get_incident_bundle(incident_id: str, sanitize: bool = False) -> dict:
|
||||||
incident = get_incident(INCIDENTS_DB_PATH, incident_id)
|
incident = get_incident(INCIDENTS_DB_PATH, incident_id)
|
||||||
if not incident:
|
if not incident:
|
||||||
raise HTTPException(status_code=404, detail="Incident not found")
|
raise HTTPException(status_code=404, detail="Incident not found")
|
||||||
bundle = build_bundle(INCIDENTS_DB_PATH, incident, source_host=SOURCE_HOST, sanitize=sanitize)
|
bundle = build_bundle(DB_PATH, incident, source_host=SOURCE_HOST, sanitize=sanitize)
|
||||||
record_sent_bundle(INCIDENTS_DB_PATH, incident_id, bundle, sanitized=sanitize)
|
record_sent_bundle(INCIDENTS_DB_PATH, incident_id, bundle, sanitized=sanitize)
|
||||||
return bundle
|
return bundle
|
||||||
|
|
||||||
|
|
@ -1030,7 +1344,7 @@ def send_incident_bundle(incident_id: str, sanitize: bool = False) -> dict:
|
||||||
incident = get_incident(INCIDENTS_DB_PATH, incident_id)
|
incident = get_incident(INCIDENTS_DB_PATH, incident_id)
|
||||||
if not incident:
|
if not incident:
|
||||||
raise HTTPException(status_code=404, detail="Incident not found")
|
raise HTTPException(status_code=404, detail="Incident not found")
|
||||||
bundle = build_bundle(INCIDENTS_DB_PATH, incident, source_host=SOURCE_HOST, sanitize=sanitize)
|
bundle = build_bundle(DB_PATH, incident, source_host=SOURCE_HOST, sanitize=sanitize)
|
||||||
record_sent_bundle(INCIDENTS_DB_PATH, incident_id, bundle, sanitized=sanitize)
|
record_sent_bundle(INCIDENTS_DB_PATH, incident_id, bundle, sanitized=sanitize)
|
||||||
payload = json.dumps(bundle).encode()
|
payload = json.dumps(bundle).encode()
|
||||||
req = urllib.request.Request(
|
req = urllib.request.Request(
|
||||||
|
|
@ -1048,6 +1362,41 @@ def send_incident_bundle(incident_id: str, sanitize: bool = False) -> dict:
|
||||||
raise HTTPException(status_code=502, detail=f"Send failed: {exc}") from exc
|
raise HTTPException(status_code=502, detail=f"Send failed: {exc}") from exc
|
||||||
|
|
||||||
|
|
||||||
|
class TicketExportRequest(BaseModel):
|
||||||
|
target: str # "notion" | "jira"
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/api/incidents/{incident_id}/export")
|
||||||
|
def export_incident_ticket(incident_id: str, body: TicketExportRequest) -> dict:
|
||||||
|
"""Push an incident to an external ticket tracker (Notion or Jira)."""
|
||||||
|
from app.services.ticket_export import export_incident, available_targets
|
||||||
|
incident = get_incident(INCIDENTS_DB_PATH, incident_id)
|
||||||
|
if not incident:
|
||||||
|
raise HTTPException(status_code=404, detail="Incident not found")
|
||||||
|
if body.target not in available_targets():
|
||||||
|
raise HTTPException(status_code=422, detail=f"Unknown target. Supported: {available_targets()}")
|
||||||
|
|
||||||
|
prefs = _load_prefs()
|
||||||
|
config = {k: prefs.get(k, "") for k in (
|
||||||
|
"notion_token", "notion_database_id",
|
||||||
|
"jira_url", "jira_email", "jira_api_token", "jira_project_key", "jira_issue_type",
|
||||||
|
)}
|
||||||
|
|
||||||
|
from app.services.incidents import get_incident_entries
|
||||||
|
raw_entries = get_incident_entries(DB_PATH, incident)
|
||||||
|
entries = [dataclasses.asdict(e) for e in raw_entries]
|
||||||
|
incident_dict = dataclasses.asdict(incident)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = export_incident(body.target, incident_dict, entries, config)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise HTTPException(status_code=422, detail=str(exc)) from exc
|
||||||
|
except RuntimeError as exc:
|
||||||
|
raise HTTPException(status_code=502, detail=str(exc)) from exc
|
||||||
|
|
||||||
|
return {"target": body.target, "url": result["url"], "ticket_id": result["ticket_id"]}
|
||||||
|
|
||||||
|
|
||||||
@router.post("/api/bundles")
|
@router.post("/api/bundles")
|
||||||
def receive_bundle(bundle: dict) -> dict:
|
def receive_bundle(bundle: dict) -> dict:
|
||||||
record = store_bundle(INCIDENTS_DB_PATH, bundle)
|
record = store_bundle(INCIDENTS_DB_PATH, bundle)
|
||||||
|
|
@ -1316,6 +1665,115 @@ async def debug_search(q: str):
|
||||||
app.include_router(_ctx)
|
app.include_router(_ctx)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Anomaly scoring endpoints
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_anomaly = APIRouter(prefix="/turnstone/api/anomaly", dependencies=[Depends(_check_api_key)])
|
||||||
|
|
||||||
|
|
||||||
|
@_anomaly.get("/status")
|
||||||
|
async def anomaly_status():
|
||||||
|
"""Return scorer state and configuration."""
|
||||||
|
state = _scorer_state()
|
||||||
|
return {
|
||||||
|
"model": ANOMALY_MODEL or None,
|
||||||
|
"threshold": ANOMALY_THRESHOLD,
|
||||||
|
"device": ANOMALY_DEVICE,
|
||||||
|
"enabled": bool(ANOMALY_MODEL),
|
||||||
|
**vars(state),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@_anomaly.post("/run")
|
||||||
|
async def anomaly_run(background_tasks: BackgroundTasks):
|
||||||
|
"""Trigger a manual anomaly scoring pass (runs in background)."""
|
||||||
|
if not ANOMALY_MODEL:
|
||||||
|
raise HTTPException(status_code=400, detail="TURNSTONE_ANOMALY_MODEL not configured")
|
||||||
|
background_tasks.add_task(
|
||||||
|
_run_scorer, DB_PATH, ANOMALY_MODEL, ANOMALY_DEVICE, 256, ANOMALY_THRESHOLD
|
||||||
|
)
|
||||||
|
return {"ok": True, "message": "scorer triggered"}
|
||||||
|
|
||||||
|
|
||||||
|
@_anomaly.get("/detections")
|
||||||
|
async def anomaly_detections(
|
||||||
|
limit: int = Query(100, ge=1, le=1000),
|
||||||
|
unacked_only: bool = Query(False),
|
||||||
|
label: str | None = Query(None),
|
||||||
|
scorer: str | None = Query(None),
|
||||||
|
):
|
||||||
|
"""List detections ordered by detected_at DESC. Optionally filter by scorer ('anomaly'|'cybersec')."""
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
rows = await loop.run_in_executor(
|
||||||
|
None, lambda: _list_detections(DB_PATH, limit=limit, unacked_only=unacked_only, label=label, scorer=scorer)
|
||||||
|
)
|
||||||
|
return {"detections": rows, "total": len(rows)}
|
||||||
|
|
||||||
|
|
||||||
|
@_anomaly.post("/detections/{detection_id}/acknowledge")
|
||||||
|
async def acknowledge_detection(detection_id: str, notes: str = ""):
|
||||||
|
"""Acknowledge a detection (mark as reviewed)."""
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
updated = await loop.run_in_executor(
|
||||||
|
None, lambda: _ack_detection(DB_PATH, detection_id, notes)
|
||||||
|
)
|
||||||
|
if not updated:
|
||||||
|
raise HTTPException(status_code=404, detail="Detection not found")
|
||||||
|
return {"ok": True}
|
||||||
|
|
||||||
|
|
||||||
|
app.include_router(_anomaly)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Cybersec scoring endpoints
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_cybersec_router = APIRouter(prefix="/turnstone/api/cybersec", dependencies=[Depends(_check_api_key)])
|
||||||
|
|
||||||
|
|
||||||
|
@_cybersec_router.get("/status")
|
||||||
|
async def cybersec_status():
|
||||||
|
"""Return cybersec scorer state and configuration."""
|
||||||
|
return {
|
||||||
|
"model": CYBERSEC_MODEL or None,
|
||||||
|
"threshold": CYBERSEC_THRESHOLD,
|
||||||
|
"device": CYBERSEC_DEVICE,
|
||||||
|
"enabled": bool(CYBERSEC_MODEL),
|
||||||
|
"candidate_labels": CYBERSEC_LABELS,
|
||||||
|
**_cybersec_state(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@_cybersec_router.post("/run")
|
||||||
|
async def cybersec_run(background_tasks: BackgroundTasks):
|
||||||
|
"""Trigger a manual cybersec scoring pass (runs in background)."""
|
||||||
|
if not CYBERSEC_MODEL:
|
||||||
|
raise HTTPException(status_code=400, detail="TURNSTONE_CYBERSEC_MODEL not configured")
|
||||||
|
background_tasks.add_task(
|
||||||
|
_run_cybersec, DB_PATH, CYBERSEC_MODEL, CYBERSEC_DEVICE, 32, CYBERSEC_THRESHOLD
|
||||||
|
)
|
||||||
|
return {"ok": True, "message": "cybersec scorer triggered"}
|
||||||
|
|
||||||
|
|
||||||
|
@_cybersec_router.get("/detections")
|
||||||
|
async def cybersec_detections(
|
||||||
|
limit: int = Query(100, ge=1, le=1000),
|
||||||
|
unacked_only: bool = Query(False),
|
||||||
|
label: str | None = Query(None),
|
||||||
|
):
|
||||||
|
"""List cybersec detections ordered by detected_at DESC."""
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
rows = await loop.run_in_executor(
|
||||||
|
None, lambda: _list_cybersec(DB_PATH, limit=limit, unacked_only=unacked_only, label=label)
|
||||||
|
)
|
||||||
|
return {"detections": rows, "total": len(rows)}
|
||||||
|
|
||||||
|
|
||||||
|
app.include_router(_cybersec_router)
|
||||||
|
|
||||||
|
|
||||||
# Root redirect → /turnstone/
|
# Root redirect → /turnstone/
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
def root_redirect() -> RedirectResponse:
|
def root_redirect() -> RedirectResponse:
|
||||||
|
|
|
||||||
305
app/services/anomaly.py
Normal file
305
app/services/anomaly.py
Normal file
|
|
@ -0,0 +1,305 @@
|
||||||
|
"""Anomaly scoring pipeline — batch-score log_entries with a HF classifier.
|
||||||
|
|
||||||
|
Designed to run after each glean cycle (or standalone). When no model is
|
||||||
|
configured the scorer is a no-op and returns immediately, so it is always
|
||||||
|
safe to wire into the glean pipeline.
|
||||||
|
|
||||||
|
Model: any HuggingFace text-classification model. The existing Hybrid-BERT
|
||||||
|
label map (from diagnose/classifier.py) is reused when the model produces
|
||||||
|
NORMAL/SECURITY_ANOMALY/… outputs; other models get a generic severity map.
|
||||||
|
|
||||||
|
Scoring strategy
|
||||||
|
----------------
|
||||||
|
- Query unscored rows in batches (WHERE anomaly_scored_at IS NULL)
|
||||||
|
- Run each entry text through the HF pipeline
|
||||||
|
- Write anomaly_score + anomaly_label + anomaly_scored_at back
|
||||||
|
- INSERT high-confidence hits (score >= threshold) into detections table,
|
||||||
|
skipping duplicates so the scorer is safe to re-run
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from app.db import get_conn, resolve_tenant_id
|
||||||
|
from app.db.dialect import q
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Label maps — reuse Hybrid-BERT vocabulary from diagnose/classifier.py
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_HYBRID_BERT_SEVERITY: dict[str, str] = {
|
||||||
|
"NORMAL": "INFO",
|
||||||
|
"SECURITY_ANOMALY": "ERROR",
|
||||||
|
"SYSTEM_FAILURE": "CRITICAL",
|
||||||
|
"PERFORMANCE_ISSUE": "WARN",
|
||||||
|
"NETWORK_ANOMALY": "WARN",
|
||||||
|
"CONFIG_ERROR": "ERROR",
|
||||||
|
"HARDWARE_ISSUE": "CRITICAL",
|
||||||
|
}
|
||||||
|
|
||||||
|
_GENERIC_SEVERITY: dict[str, str] = {
|
||||||
|
"CRITICAL": "CRITICAL",
|
||||||
|
"ERROR": "ERROR",
|
||||||
|
"WARNING": "WARN",
|
||||||
|
"WARN": "WARN",
|
||||||
|
"INFO": "INFO",
|
||||||
|
"DEBUG": "DEBUG",
|
||||||
|
}
|
||||||
|
|
||||||
|
_ANOMALOUS_LABELS: frozenset[str] = frozenset(
|
||||||
|
{
|
||||||
|
"SECURITY_ANOMALY",
|
||||||
|
"SYSTEM_FAILURE",
|
||||||
|
"PERFORMANCE_ISSUE",
|
||||||
|
"NETWORK_ANOMALY",
|
||||||
|
"CONFIG_ERROR",
|
||||||
|
"HARDWARE_ISSUE",
|
||||||
|
"CRITICAL",
|
||||||
|
"ERROR",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
_DEFAULT_THRESHOLD = float(os.environ.get("TURNSTONE_ANOMALY_THRESHOLD", "0.75"))
|
||||||
|
_DEFAULT_MODEL = os.environ.get("TURNSTONE_ANOMALY_MODEL", "")
|
||||||
|
_DEFAULT_DEVICE = os.environ.get("TURNSTONE_ANOMALY_DEVICE", "cpu")
|
||||||
|
_DEFAULT_BATCH = int(os.environ.get("TURNSTONE_ANOMALY_BATCH", "256"))
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# ML singleton
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_pipeline: Any | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_pipeline(model_id: str, device: str) -> Any:
|
||||||
|
global _pipeline # noqa: PLW0603
|
||||||
|
if _pipeline is None:
|
||||||
|
from transformers import pipeline as hf_pipeline # type: ignore[import-untyped]
|
||||||
|
_pipeline = hf_pipeline("text-classification", model=model_id, device=device)
|
||||||
|
return _pipeline
|
||||||
|
|
||||||
|
|
||||||
|
def reset_pipeline() -> None:
|
||||||
|
"""Reset the cached pipeline singleton (test helper)."""
|
||||||
|
global _pipeline # noqa: PLW0603
|
||||||
|
_pipeline = None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Result types
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ScoringResult:
|
||||||
|
scored: int = 0
|
||||||
|
detections: int = 0
|
||||||
|
skipped: bool = False
|
||||||
|
error: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Internal helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _map_label(raw_label: str, score: float) -> tuple[str, str]:
|
||||||
|
"""Return (normalised_label, severity) for a raw model output label."""
|
||||||
|
upper = raw_label.upper()
|
||||||
|
if upper in _HYBRID_BERT_SEVERITY:
|
||||||
|
return upper, _HYBRID_BERT_SEVERITY[upper]
|
||||||
|
sev = _GENERIC_SEVERITY.get(upper, "WARN")
|
||||||
|
return upper, sev
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_unscored(conn: Any, tenant_id: str, limit: int) -> list[dict]:
|
||||||
|
rows = conn.execute(
|
||||||
|
q("""
|
||||||
|
SELECT id, source_id, text, timestamp_iso, severity
|
||||||
|
FROM log_entries
|
||||||
|
WHERE anomaly_scored_at IS NULL
|
||||||
|
AND (tenant_id = ? OR tenant_id = '')
|
||||||
|
ORDER BY ingest_time DESC
|
||||||
|
LIMIT ?
|
||||||
|
"""),
|
||||||
|
(tenant_id, limit),
|
||||||
|
).fetchall()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
def _write_scores(
|
||||||
|
conn: Any,
|
||||||
|
rows: list[dict],
|
||||||
|
scored_at: str,
|
||||||
|
) -> None:
|
||||||
|
conn.executemany(
|
||||||
|
q("UPDATE log_entries SET anomaly_score = ?, anomaly_label = ?, anomaly_scored_at = ? WHERE id = ?"),
|
||||||
|
[(r["anomaly_score"], r["anomaly_label"], scored_at, r["id"]) for r in rows],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _insert_detections(conn: Any, rows: list[dict], tenant_id: str, detected_at: str) -> int:
|
||||||
|
inserted = 0
|
||||||
|
for r in rows:
|
||||||
|
try:
|
||||||
|
conn.execute(
|
||||||
|
q("""
|
||||||
|
INSERT INTO detections
|
||||||
|
(id, tenant_id, entry_id, source_id, anomaly_label, anomaly_score,
|
||||||
|
severity, text, timestamp_iso, detected_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
"""),
|
||||||
|
(
|
||||||
|
str(uuid.uuid4()),
|
||||||
|
tenant_id,
|
||||||
|
r["id"],
|
||||||
|
r["source_id"],
|
||||||
|
r["anomaly_label"],
|
||||||
|
r["anomaly_score"],
|
||||||
|
r["severity"],
|
||||||
|
r["text"][:2000],
|
||||||
|
r.get("timestamp_iso"),
|
||||||
|
detected_at,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
inserted += 1
|
||||||
|
except Exception: # noqa: BLE001
|
||||||
|
pass # duplicate entry_id or constraint violation — skip
|
||||||
|
return inserted
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Public API
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def score_unscored(
|
||||||
|
db_path: Path,
|
||||||
|
model_id: str = _DEFAULT_MODEL,
|
||||||
|
device: str = _DEFAULT_DEVICE,
|
||||||
|
batch_size: int = _DEFAULT_BATCH,
|
||||||
|
threshold: float = _DEFAULT_THRESHOLD,
|
||||||
|
) -> ScoringResult:
|
||||||
|
"""Score all unscored log_entries in batches.
|
||||||
|
|
||||||
|
Returns immediately (skipped=True) when model_id is empty — allows
|
||||||
|
unconditional wiring without requiring the model to be configured.
|
||||||
|
"""
|
||||||
|
if not model_id:
|
||||||
|
return ScoringResult(skipped=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
pipe = _get_pipeline(model_id, device)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("Failed to load anomaly model %r: %s", model_id, exc)
|
||||||
|
return ScoringResult(error=str(exc))
|
||||||
|
|
||||||
|
tenant_id = resolve_tenant_id()
|
||||||
|
total_scored = 0
|
||||||
|
total_detections = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
with get_conn(db_path) as conn:
|
||||||
|
batch = _fetch_unscored(conn, tenant_id, batch_size)
|
||||||
|
if not batch:
|
||||||
|
break
|
||||||
|
|
||||||
|
texts = [r["text"][:512] for r in batch]
|
||||||
|
try:
|
||||||
|
predictions = pipe(texts, truncation=True, max_length=512)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("Inference error on batch of %d: %s", len(batch), exc)
|
||||||
|
return ScoringResult(scored=total_scored, detections=total_detections, error=str(exc))
|
||||||
|
|
||||||
|
scored_at = datetime.now(tz=timezone.utc).isoformat()
|
||||||
|
scored_rows: list[dict] = []
|
||||||
|
detection_rows: list[dict] = []
|
||||||
|
|
||||||
|
for row, pred in zip(batch, predictions):
|
||||||
|
label, severity = _map_label(pred["label"], pred["score"])
|
||||||
|
enriched = {**row, "anomaly_score": pred["score"], "anomaly_label": label, "severity": severity}
|
||||||
|
scored_rows.append(enriched)
|
||||||
|
if label in _ANOMALOUS_LABELS and pred["score"] >= threshold:
|
||||||
|
detection_rows.append(enriched)
|
||||||
|
|
||||||
|
for _attempt in range(4):
|
||||||
|
try:
|
||||||
|
with get_conn(db_path) as conn:
|
||||||
|
_write_scores(conn, scored_rows, scored_at)
|
||||||
|
det_count = _insert_detections(conn, detection_rows, tenant_id, scored_at)
|
||||||
|
conn.commit()
|
||||||
|
break
|
||||||
|
except Exception as exc:
|
||||||
|
if "database is locked" in str(exc).lower() and _attempt < 3:
|
||||||
|
logger.warning("DB locked, retrying write in 10s (attempt %d/4)", _attempt + 1)
|
||||||
|
time.sleep(10)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
total_scored += len(scored_rows)
|
||||||
|
total_detections += det_count
|
||||||
|
logger.info(
|
||||||
|
"Scored %d entries, %d detections (threshold=%.2f)",
|
||||||
|
len(scored_rows), det_count, threshold,
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(batch) < batch_size:
|
||||||
|
break
|
||||||
|
|
||||||
|
return ScoringResult(scored=total_scored, detections=total_detections)
|
||||||
|
|
||||||
|
|
||||||
|
def list_detections(
|
||||||
|
db_path: Path,
|
||||||
|
limit: int = 100,
|
||||||
|
unacked_only: bool = False,
|
||||||
|
label: str | None = None,
|
||||||
|
scorer: str | None = None,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Return detections ordered by detected_at DESC."""
|
||||||
|
tenant_id = resolve_tenant_id()
|
||||||
|
conditions = ["(tenant_id = ? OR tenant_id = '')"]
|
||||||
|
params: list[Any] = [tenant_id]
|
||||||
|
|
||||||
|
if unacked_only:
|
||||||
|
conditions.append("acknowledged = 0")
|
||||||
|
if label:
|
||||||
|
conditions.append(q("anomaly_label = ?"))
|
||||||
|
params.append(label.upper())
|
||||||
|
if scorer:
|
||||||
|
conditions.append(q("scorer = ?"))
|
||||||
|
params.append(scorer.lower())
|
||||||
|
|
||||||
|
where = " AND ".join(conditions)
|
||||||
|
with get_conn(db_path) as conn:
|
||||||
|
rows = conn.execute(
|
||||||
|
q(f"SELECT * FROM detections WHERE {where} ORDER BY detected_at DESC LIMIT ?"), # noqa: S608
|
||||||
|
(*params, limit),
|
||||||
|
).fetchall()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
def acknowledge_detection(db_path: Path, detection_id: str, notes: str = "") -> bool:
|
||||||
|
"""Mark a detection as acknowledged. Returns True if a row was updated."""
|
||||||
|
tenant_id = resolve_tenant_id()
|
||||||
|
acked_at = datetime.now(tz=timezone.utc).isoformat()
|
||||||
|
with get_conn(db_path) as conn:
|
||||||
|
cur = conn.execute(
|
||||||
|
q("""
|
||||||
|
UPDATE detections
|
||||||
|
SET acknowledged = 1, acknowledged_at = ?, notes = ?
|
||||||
|
WHERE id = ? AND (tenant_id = ? OR tenant_id = '')
|
||||||
|
"""),
|
||||||
|
(acked_at, notes, detection_id, tenant_id),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
return cur.rowcount > 0
|
||||||
|
|
@ -4,10 +4,12 @@ from __future__ import annotations
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import sqlite3
|
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from app.db import get_conn, resolve_tenant_id
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
|
|
@ -91,26 +93,26 @@ def _now_iso() -> str:
|
||||||
return datetime.now(timezone.utc).isoformat()
|
return datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
|
|
||||||
def _row_to_candidate(row: tuple) -> BlocklistCandidate:
|
def _row_to_candidate(row: Any) -> BlocklistCandidate:
|
||||||
return BlocklistCandidate(
|
return BlocklistCandidate(
|
||||||
id=row[0],
|
id=row["id"],
|
||||||
domain_or_ip=row[1],
|
domain_or_ip=row["domain_or_ip"],
|
||||||
source_device_ip=row[2],
|
source_device_ip=row["source_device_ip"],
|
||||||
source_device_name=row[3],
|
source_device_name=row["source_device_name"],
|
||||||
first_seen=row[4],
|
first_seen=row["first_seen"],
|
||||||
last_seen=row[5],
|
last_seen=row["last_seen"],
|
||||||
hit_count=row[6],
|
hit_count=row["hit_count"],
|
||||||
status=row[7],
|
status=row["status"],
|
||||||
pushed_at=row[8],
|
pushed_at=row["pushed_at"],
|
||||||
log_evidence=json.loads(row[9] or "[]"),
|
log_evidence=json.loads(row["log_evidence"] or "[]"),
|
||||||
matched_rule=row[10],
|
matched_rule=row["matched_rule"],
|
||||||
llm_score=row[11],
|
llm_score=row["llm_score"],
|
||||||
llm_reason=row[12],
|
llm_reason=row["llm_reason"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _upsert_candidate(
|
def _upsert_candidate(
|
||||||
conn: sqlite3.Connection,
|
conn: Any,
|
||||||
domain_or_ip: str,
|
domain_or_ip: str,
|
||||||
source_device_ip: str | None,
|
source_device_ip: str | None,
|
||||||
source_device_name: str | None,
|
source_device_name: str | None,
|
||||||
|
|
@ -119,26 +121,29 @@ def _upsert_candidate(
|
||||||
now: str,
|
now: str,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""Insert or update a candidate. Returns True if a new row was created."""
|
"""Insert or update a candidate. Returns True if a new row was created."""
|
||||||
|
tid = resolve_tenant_id()
|
||||||
row = conn.execute(
|
row = conn.execute(
|
||||||
"SELECT id, hit_count, log_evidence FROM blocklist_candidates "
|
"SELECT id, hit_count, log_evidence FROM blocklist_candidates "
|
||||||
"WHERE domain_or_ip = ? AND source_device_ip IS ?",
|
"WHERE domain_or_ip = ? AND source_device_ip IS ? AND (tenant_id = ? OR tenant_id = '')",
|
||||||
(domain_or_ip, source_device_ip),
|
(domain_or_ip, source_device_ip, tid),
|
||||||
).fetchone()
|
).fetchone()
|
||||||
|
|
||||||
if row is None:
|
if row is None:
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"""INSERT INTO blocklist_candidates
|
"""INSERT INTO blocklist_candidates
|
||||||
(id, domain_or_ip, source_device_ip, source_device_name,
|
(id, tenant_id, domain_or_ip, source_device_ip, source_device_name,
|
||||||
first_seen, last_seen, hit_count, status, pushed_at, log_evidence, matched_rule)
|
first_seen, last_seen, hit_count, status, pushed_at, log_evidence, matched_rule)
|
||||||
VALUES (?, ?, ?, ?, ?, ?, 1, 'pending', NULL, ?, ?)""",
|
VALUES (?, ?, ?, ?, ?, ?, ?, 1, 'pending', NULL, ?, ?)""",
|
||||||
(
|
(
|
||||||
str(uuid.uuid4()), domain_or_ip, source_device_ip, source_device_name,
|
str(uuid.uuid4()), tid, domain_or_ip, source_device_ip, source_device_name,
|
||||||
now, now, json.dumps([entry_id]), matched_rule,
|
now, now, json.dumps([entry_id]), matched_rule,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
existing_id, hit_count, existing_evidence = row
|
existing_id = row["id"]
|
||||||
|
hit_count = row["hit_count"]
|
||||||
|
existing_evidence = row["log_evidence"]
|
||||||
evidence = json.loads(existing_evidence or "[]")
|
evidence = json.loads(existing_evidence or "[]")
|
||||||
if entry_id not in evidence:
|
if entry_id not in evidence:
|
||||||
evidence.append(entry_id)
|
evidence.append(entry_id)
|
||||||
|
|
@ -172,14 +177,16 @@ def run_scan(
|
||||||
now = _now_iso()
|
now = _now_iso()
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
tid = resolve_tenant_id()
|
||||||
try:
|
with get_conn(db_path) as conn:
|
||||||
rows = conn.execute(
|
rows = conn.execute(
|
||||||
f"SELECT id, text FROM log_entries WHERE source_id IN ({placeholders})",
|
f"SELECT id, text FROM log_entries WHERE source_id IN ({placeholders}) AND (tenant_id = ? OR tenant_id = '')", # noqa: S608
|
||||||
router_source_ids,
|
(*router_source_ids, tid),
|
||||||
).fetchall()
|
).fetchall()
|
||||||
|
|
||||||
for entry_id, text in rows:
|
for row in rows:
|
||||||
|
entry_id, text = row["id"], row["text"]
|
||||||
|
# rest of loop body follows unchanged
|
||||||
src_ip: str | None = None
|
src_ip: str | None = None
|
||||||
dst: str | None = None
|
dst: str | None = None
|
||||||
|
|
||||||
|
|
@ -204,8 +211,6 @@ def run_scan(
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
finally:
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
return count
|
return count
|
||||||
|
|
||||||
|
|
@ -226,26 +231,27 @@ def list_candidates(
|
||||||
status: str | None = None,
|
status: str | None = None,
|
||||||
device_ip: str | None = None,
|
device_ip: str | None = None,
|
||||||
) -> list[BlocklistCandidate]:
|
) -> list[BlocklistCandidate]:
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
tid = resolve_tenant_id()
|
||||||
try:
|
conditions = ["(tenant_id = ? OR tenant_id = '')"]
|
||||||
query = f"{_CANDIDATE_SELECT} WHERE 1=1"
|
params: list = [tid]
|
||||||
params: list = []
|
if status and status != "all":
|
||||||
if status and status != "all":
|
conditions.append("status = ?")
|
||||||
query += " AND status = ?"
|
params.append(status)
|
||||||
params.append(status)
|
if device_ip:
|
||||||
if device_ip:
|
conditions.append("source_device_ip = ?")
|
||||||
query += " AND source_device_ip = ?"
|
params.append(device_ip)
|
||||||
params.append(device_ip)
|
where = " AND ".join(conditions)
|
||||||
query += " ORDER BY last_seen DESC"
|
with get_conn(db_path) as conn:
|
||||||
rows = conn.execute(query, params).fetchall()
|
rows = conn.execute(
|
||||||
finally:
|
f"{_CANDIDATE_SELECT} WHERE {where} ORDER BY last_seen DESC", # noqa: S608
|
||||||
conn.close()
|
params,
|
||||||
|
).fetchall()
|
||||||
return [_row_to_candidate(r) for r in rows]
|
return [_row_to_candidate(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
def _get_candidate(conn: sqlite3.Connection, candidate_id: str) -> BlocklistCandidate:
|
def _get_candidate(conn: Any, candidate_id: str) -> BlocklistCandidate:
|
||||||
row = conn.execute(
|
row = conn.execute(
|
||||||
f"{_CANDIDATE_SELECT} WHERE id=?",
|
f"{_CANDIDATE_SELECT} WHERE id=?", # noqa: S608
|
||||||
(candidate_id,),
|
(candidate_id,),
|
||||||
).fetchone()
|
).fetchone()
|
||||||
if row is None:
|
if row is None:
|
||||||
|
|
@ -255,43 +261,31 @@ def _get_candidate(conn: sqlite3.Connection, candidate_id: str) -> BlocklistCand
|
||||||
|
|
||||||
def get_candidate(db_path: Path, candidate_id: str) -> BlocklistCandidate:
|
def get_candidate(db_path: Path, candidate_id: str) -> BlocklistCandidate:
|
||||||
"""Fetch a single candidate by ID. Raises KeyError if not found."""
|
"""Fetch a single candidate by ID. Raises KeyError if not found."""
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
with get_conn(db_path) as conn:
|
||||||
try:
|
|
||||||
return _get_candidate(conn, candidate_id)
|
return _get_candidate(conn, candidate_id)
|
||||||
finally:
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
|
|
||||||
def update_candidate_status(db_path: Path, candidate_id: str, new_status: str) -> BlocklistCandidate:
|
def update_candidate_status(db_path: Path, candidate_id: str, new_status: str) -> BlocklistCandidate:
|
||||||
if new_status not in _VALID_STATUSES:
|
if new_status not in _VALID_STATUSES:
|
||||||
raise ValueError(f"Invalid status {new_status!r}. Must be one of {_VALID_STATUSES}")
|
raise ValueError(f"Invalid status {new_status!r}. Must be one of {_VALID_STATUSES}")
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
with get_conn(db_path) as conn:
|
||||||
try:
|
|
||||||
conn.execute("UPDATE blocklist_candidates SET status=? WHERE id=?", (new_status, candidate_id))
|
conn.execute("UPDATE blocklist_candidates SET status=? WHERE id=?", (new_status, candidate_id))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
return _get_candidate(conn, candidate_id)
|
return _get_candidate(conn, candidate_id)
|
||||||
finally:
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
|
|
||||||
def mark_pushed(db_path: Path, candidate_id: str) -> BlocklistCandidate:
|
def mark_pushed(db_path: Path, candidate_id: str) -> BlocklistCandidate:
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
with get_conn(db_path) as conn:
|
||||||
try:
|
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"UPDATE blocklist_candidates SET status='pushed', pushed_at=? WHERE id=?",
|
"UPDATE blocklist_candidates SET status='pushed', pushed_at=? WHERE id=?",
|
||||||
(_now_iso(), candidate_id),
|
(_now_iso(), candidate_id),
|
||||||
)
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
return _get_candidate(conn, candidate_id)
|
return _get_candidate(conn, candidate_id)
|
||||||
finally:
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
|
|
||||||
def mark_unblocked(db_path: Path, candidate_id: str) -> BlocklistCandidate:
|
def mark_unblocked(db_path: Path, candidate_id: str) -> BlocklistCandidate:
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
with get_conn(db_path) as conn:
|
||||||
try:
|
|
||||||
conn.execute("UPDATE blocklist_candidates SET status='unblocked' WHERE id=?", (candidate_id,))
|
conn.execute("UPDATE blocklist_candidates SET status='unblocked' WHERE id=?", (candidate_id,))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
return _get_candidate(conn, candidate_id)
|
return _get_candidate(conn, candidate_id)
|
||||||
finally:
|
|
||||||
conn.close()
|
|
||||||
|
|
|
||||||
241
app/services/cybersec.py
Normal file
241
app/services/cybersec.py
Normal file
|
|
@ -0,0 +1,241 @@
|
||||||
|
"""Cybersecurity-focused scoring pipeline using zero-shot classification.
|
||||||
|
|
||||||
|
Runs a second-pass analysis on entries that were already flagged by the
|
||||||
|
anomaly scorer or that have pattern matches. Uses a zero-shot classification
|
||||||
|
model (DeBERTa-v3-base-mnli is cached locally) so no fine-tuning is needed.
|
||||||
|
|
||||||
|
The scorer writes ml_score / ml_label / ml_scored_at to log_entries and
|
||||||
|
inserts high-confidence non-normal hits into the detections table tagged
|
||||||
|
with scorer='cybersec'.
|
||||||
|
|
||||||
|
Env vars
|
||||||
|
--------
|
||||||
|
TURNSTONE_CYBERSEC_MODEL — HF model id for zero-shot classification.
|
||||||
|
Recommended: MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli
|
||||||
|
(already cached from the diagnose pipeline).
|
||||||
|
Set to empty string to disable (safe default).
|
||||||
|
TURNSTONE_CYBERSEC_DEVICE — 'cpu' (default) or 'cuda'
|
||||||
|
TURNSTONE_CYBERSEC_THRESHOLD — float confidence floor for detection insertion (default 0.60)
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import uuid
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from app.db import get_conn, resolve_tenant_id
|
||||||
|
from app.db.dialect import q
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Candidate labels — cybersec vocabulary for zero-shot inference
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
CYBERSEC_LABELS: list[str] = [
|
||||||
|
"authentication failure or brute force attack",
|
||||||
|
"privilege escalation or unauthorized access",
|
||||||
|
"network intrusion or port scan",
|
||||||
|
"malware or suspicious process activity",
|
||||||
|
"data exfiltration or unusual outbound traffic",
|
||||||
|
"normal system operation",
|
||||||
|
]
|
||||||
|
|
||||||
|
_NORMAL_LABEL = "normal system operation"
|
||||||
|
|
||||||
|
_LABEL_SEVERITY: dict[str, str] = {
|
||||||
|
"authentication failure or brute force attack": "ERROR",
|
||||||
|
"privilege escalation or unauthorized access": "CRITICAL",
|
||||||
|
"network intrusion or port scan": "ERROR",
|
||||||
|
"malware or suspicious process activity": "CRITICAL",
|
||||||
|
"data exfiltration or unusual outbound traffic":"CRITICAL",
|
||||||
|
"normal system operation": "INFO",
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Pipeline singleton
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_pipeline: Any = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_pipeline(model_id: str, device: str) -> Any:
|
||||||
|
global _pipeline # noqa: PLW0603
|
||||||
|
if _pipeline is None:
|
||||||
|
from transformers import pipeline # type: ignore[import-untyped]
|
||||||
|
logger.info("loading cybersec zero-shot pipeline: %s on %s", model_id, device)
|
||||||
|
_pipeline = pipeline(
|
||||||
|
"zero-shot-classification",
|
||||||
|
model=model_id,
|
||||||
|
device=0 if device == "cuda" else -1,
|
||||||
|
)
|
||||||
|
logger.info("cybersec pipeline ready")
|
||||||
|
return _pipeline
|
||||||
|
|
||||||
|
|
||||||
|
def reset_pipeline() -> None:
|
||||||
|
"""Clear the cached pipeline — for testing only."""
|
||||||
|
global _pipeline # noqa: PLW0603
|
||||||
|
_pipeline = None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Result type
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CybersecResult:
|
||||||
|
scored: int = 0
|
||||||
|
detections: int = 0
|
||||||
|
skipped: bool = False
|
||||||
|
error: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Core scoring function
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def score_security_entries(
|
||||||
|
db_path: Path,
|
||||||
|
model_id: str,
|
||||||
|
device: str = "cpu",
|
||||||
|
batch_size: int = 32,
|
||||||
|
threshold: float = 0.60,
|
||||||
|
) -> CybersecResult:
|
||||||
|
"""Score entries that were anomaly-flagged or pattern-matched.
|
||||||
|
|
||||||
|
Only entries with ml_scored_at IS NULL are processed (idempotent).
|
||||||
|
Writes ml_score / ml_label / ml_scored_at and inserts high-confidence
|
||||||
|
hits into detections with scorer='cybersec'.
|
||||||
|
"""
|
||||||
|
if not model_id:
|
||||||
|
return CybersecResult(skipped=True)
|
||||||
|
|
||||||
|
tenant_id = resolve_tenant_id()
|
||||||
|
try:
|
||||||
|
pipe = _get_pipeline(model_id, device)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("failed to load cybersec pipeline: %s", exc)
|
||||||
|
return CybersecResult(error=str(exc))
|
||||||
|
|
||||||
|
total_scored = 0
|
||||||
|
total_detections = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
with get_conn(db_path) as conn:
|
||||||
|
# Only score entries that are worth a second look:
|
||||||
|
# anomaly-flagged (non-normal) OR have at least one pattern match.
|
||||||
|
rows = conn.execute(
|
||||||
|
q("""
|
||||||
|
SELECT id, source_id, text, timestamp_iso
|
||||||
|
FROM log_entries
|
||||||
|
WHERE (tenant_id = ? OR tenant_id = '')
|
||||||
|
AND ml_scored_at IS NULL
|
||||||
|
AND (
|
||||||
|
(anomaly_label IS NOT NULL AND anomaly_label != 'NORMAL')
|
||||||
|
OR (matched_patterns IS NOT NULL AND matched_patterns != '[]' AND matched_patterns != '')
|
||||||
|
)
|
||||||
|
LIMIT ?
|
||||||
|
"""),
|
||||||
|
(tenant_id, batch_size * 10),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return CybersecResult(skipped=True)
|
||||||
|
|
||||||
|
# Process in chunks to avoid OOM on large backlogs
|
||||||
|
for i in range(0, len(rows), batch_size):
|
||||||
|
chunk = rows[i : i + batch_size]
|
||||||
|
texts = [r["text"] for r in chunk]
|
||||||
|
|
||||||
|
try:
|
||||||
|
results = pipe(texts, candidate_labels=CYBERSEC_LABELS, multi_label=False)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("zero-shot inference error on chunk %d: %s", i, exc)
|
||||||
|
continue
|
||||||
|
|
||||||
|
now = datetime.now(tz=timezone.utc).isoformat()
|
||||||
|
|
||||||
|
with get_conn(db_path) as conn:
|
||||||
|
for row, result in zip(chunk, results):
|
||||||
|
top_label: str = result["labels"][0]
|
||||||
|
top_score: float = result["scores"][0]
|
||||||
|
|
||||||
|
conn.execute(
|
||||||
|
q("""
|
||||||
|
UPDATE log_entries
|
||||||
|
SET ml_score = ?, ml_label = ?, ml_scored_at = ?
|
||||||
|
WHERE id = ? AND (tenant_id = ? OR tenant_id = '')
|
||||||
|
"""),
|
||||||
|
(top_score, top_label, now, row["id"], tenant_id),
|
||||||
|
)
|
||||||
|
total_scored += 1
|
||||||
|
|
||||||
|
if top_score >= threshold and top_label != _NORMAL_LABEL:
|
||||||
|
severity = _LABEL_SEVERITY.get(top_label, "WARN")
|
||||||
|
try:
|
||||||
|
conn.execute(
|
||||||
|
q("""
|
||||||
|
INSERT INTO detections
|
||||||
|
(id, tenant_id, entry_id, source_id, anomaly_label,
|
||||||
|
anomaly_score, severity, text, timestamp_iso,
|
||||||
|
detected_at, scorer)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'cybersec')
|
||||||
|
"""),
|
||||||
|
(
|
||||||
|
str(uuid.uuid4()),
|
||||||
|
tenant_id,
|
||||||
|
row["id"],
|
||||||
|
row["source_id"],
|
||||||
|
top_label,
|
||||||
|
top_score,
|
||||||
|
severity,
|
||||||
|
row["text"],
|
||||||
|
row["timestamp_iso"],
|
||||||
|
now,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
total_detections += 1
|
||||||
|
except Exception:
|
||||||
|
pass # entry may already have a detection — skip
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("cybersec scoring failed: %s", exc, exc_info=True)
|
||||||
|
return CybersecResult(scored=total_scored, detections=total_detections, error=str(exc))
|
||||||
|
|
||||||
|
return CybersecResult(scored=total_scored, detections=total_detections)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Query helpers (used by REST layer)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def list_cybersec_detections(
|
||||||
|
db_path: Path,
|
||||||
|
limit: int = 100,
|
||||||
|
unacked_only: bool = False,
|
||||||
|
label: str | None = None,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Return cybersec detections ordered by detected_at DESC."""
|
||||||
|
tenant_id = resolve_tenant_id()
|
||||||
|
conditions = ["(tenant_id = ? OR tenant_id = '')", "scorer = 'cybersec'"]
|
||||||
|
params: list[Any] = [tenant_id]
|
||||||
|
|
||||||
|
if unacked_only:
|
||||||
|
conditions.append("acknowledged = 0")
|
||||||
|
if label:
|
||||||
|
conditions.append(q("anomaly_label = ?"))
|
||||||
|
params.append(label)
|
||||||
|
|
||||||
|
where = " AND ".join(conditions)
|
||||||
|
with get_conn(db_path) as conn:
|
||||||
|
rows = conn.execute(
|
||||||
|
q(f"SELECT * FROM detections WHERE {where} ORDER BY detected_at DESC LIMIT ?"), # noqa: S608
|
||||||
|
(*params, limit),
|
||||||
|
).fetchall()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
@ -23,16 +23,30 @@ _JSON_FENCE_RE = re.compile(
|
||||||
re.MULTILINE,
|
re.MULTILINE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Reasoning models (DeepSeek-R1, Qwen QwQ, Llama thinking variants) embed
|
||||||
|
# chain-of-thought inside <think>…</think> tags in the content field.
|
||||||
|
# Strip them so only the final response reaches the UI.
|
||||||
|
_THINK_TAG_RE = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_thinking(text: str) -> str:
|
||||||
|
"""Remove <think>…</think> blocks and trim surrounding whitespace."""
|
||||||
|
return _THINK_TAG_RE.sub("", text).strip()
|
||||||
|
|
||||||
|
|
||||||
def extract_content(resp_json: dict) -> str | None:
|
def extract_content(resp_json: dict) -> str | None:
|
||||||
"""Pull text content from an OpenAI-compat chat completion response.
|
"""Pull text content from an OpenAI-compat chat completion response.
|
||||||
|
|
||||||
|
Strips reasoning-model thinking tags before returning.
|
||||||
Returns None when the response has no choices or empty content.
|
Returns None when the response has no choices or empty content.
|
||||||
"""
|
"""
|
||||||
choices = resp_json.get("choices") or []
|
choices = resp_json.get("choices") or []
|
||||||
if not choices:
|
if not choices:
|
||||||
return None
|
return None
|
||||||
return (choices[0].get("message", {}).get("content") or "").strip() or None
|
raw = (choices[0].get("message", {}).get("content") or "").strip()
|
||||||
|
if not raw:
|
||||||
|
return None
|
||||||
|
return _strip_thinking(raw) or None
|
||||||
|
|
||||||
|
|
||||||
def strip_json_fences(raw: str) -> str:
|
def strip_json_fences(raw: str) -> str:
|
||||||
|
|
@ -101,7 +115,7 @@ def call_llm(
|
||||||
- The task endpoint is unreachable (connection error, timeout, etc.).
|
- The task endpoint is unreachable (connection error, timeout, etc.).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
llm_url: Base URL of the LLM backend (e.g. ``http://10.1.10.71:7700``).
|
llm_url: Base URL of the LLM backend (e.g. ``http://<YOUR_HOST_IP>:7700``).
|
||||||
llm_model: Model identifier used in the OpenAI-compat fallback call.
|
llm_model: Model identifier used in the OpenAI-compat fallback call.
|
||||||
llm_api_key: Optional bearer token for authenticated endpoints.
|
llm_api_key: Optional bearer token for authenticated endpoints.
|
||||||
messages: OpenAI-style message list (system + user turns).
|
messages: OpenAI-style message list (system + user turns).
|
||||||
|
|
|
||||||
|
|
@ -64,13 +64,43 @@ def _build_hypothesis_block(ranked: list[RankedHypothesis]) -> str:
|
||||||
h = rh.hypothesis
|
h = rh.hypothesis
|
||||||
conf_pct = int(h.confidence * 100)
|
conf_pct = int(h.confidence * 100)
|
||||||
novelty = f"{rh.novelty_score:.2f}"
|
novelty = f"{rh.novelty_score:.2f}"
|
||||||
|
desc = f"\n {h.description}" if h.description else ""
|
||||||
lines.append(
|
lines.append(
|
||||||
f"- [{h.severity}, {conf_pct}%] {h.title}\n"
|
f"- [{h.severity}, {conf_pct}% conf, novelty {novelty}] {h.title}{desc}"
|
||||||
f" Novelty: {novelty}"
|
|
||||||
)
|
)
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_timeline_block(timeline: TimelineResult) -> str:
|
||||||
|
"""Build a sequenced cluster block so the synthesizer can narrate what happened.
|
||||||
|
|
||||||
|
Mirrors the format used by the hypothesizer, but adds gap information so the
|
||||||
|
LLM can reason about silence windows between bursts.
|
||||||
|
"""
|
||||||
|
if not timeline.clusters:
|
||||||
|
return "(no clusters)"
|
||||||
|
lines: list[str] = []
|
||||||
|
for i, c in enumerate(timeline.clusters):
|
||||||
|
ts = c.start_iso or "unknown"
|
||||||
|
sources = ", ".join(list(c.source_ids)[:3])
|
||||||
|
tags = ", ".join(list(c.pattern_tags)[:4])
|
||||||
|
burst_label = " [BURST]" if c.burst else ""
|
||||||
|
gap_label = (
|
||||||
|
f" (+{int(c.gap_before_seconds)}s silence)"
|
||||||
|
if c.gap_before_seconds > 30
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
text_preview = c.representative_text[:200]
|
||||||
|
line = (
|
||||||
|
f"Cluster {i + 1}{burst_label}{gap_label} @ {ts} [{c.severity}] "
|
||||||
|
f"({sources}) — {text_preview}"
|
||||||
|
)
|
||||||
|
if tags:
|
||||||
|
line += f" [patterns: {tags}]"
|
||||||
|
lines.append(line)
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
def _build_context_block(ctx: RetrievedContext) -> str:
|
def _build_context_block(ctx: RetrievedContext) -> str:
|
||||||
"""Build the runbook context block for the prompt."""
|
"""Build the runbook context block for the prompt."""
|
||||||
parts: list[str] = []
|
parts: list[str] = []
|
||||||
|
|
@ -144,17 +174,18 @@ class SummarySynthesizer:
|
||||||
|
|
||||||
system_prompt = _SYSTEM_PROMPTS.get(tech_level, _SYSTEM_PROMPTS["sysadmin"])
|
system_prompt = _SYSTEM_PROMPTS.get(tech_level, _SYSTEM_PROMPTS["sysadmin"])
|
||||||
hypothesis_block = _build_hypothesis_block(ranked)
|
hypothesis_block = _build_hypothesis_block(ranked)
|
||||||
|
timeline_block = _build_timeline_block(timeline)
|
||||||
context_block = _build_context_block(ctx)
|
context_block = _build_context_block(ctx)
|
||||||
dominant = ", ".join(timeline.dominant_sources[:5]) or "none"
|
dominant = ", ".join(timeline.dominant_sources[:5]) or "none"
|
||||||
|
|
||||||
user_message = (
|
user_message = (
|
||||||
f"Query: {query}\n\n"
|
f"Query: {query}\n\n"
|
||||||
f"Timeline summary:\n"
|
f"Timeline ({len(timeline.clusters)} clusters, "
|
||||||
f"- {len(timeline.clusters)} clusters, "
|
|
||||||
f"{timeline.burst_count} bursts, "
|
f"{timeline.burst_count} bursts, "
|
||||||
f"{timeline.gap_count} silence gaps\n"
|
f"{timeline.gap_count} silence gaps; "
|
||||||
f"- Primary sources: {dominant}\n\n"
|
f"primary sources: {dominant}):\n"
|
||||||
f"Top hypotheses:\n{hypothesis_block}\n\n"
|
f"{timeline_block}\n\n"
|
||||||
|
f"Root-cause hypotheses:\n{hypothesis_block}\n\n"
|
||||||
f"Context from runbooks:\n{context_block}"
|
f"Context from runbooks:\n{context_block}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,8 +8,10 @@ from __future__ import annotations
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
|
@ -171,3 +173,113 @@ def validate_source(src: dict[str, Any]) -> str | None:
|
||||||
if src_type == "docker" and not src.get("container"):
|
if src_type == "docker" and not src.get("container"):
|
||||||
return f"Docker source '{src['id']}' is missing 'container'"
|
return f"Docker source '{src['id']}' is missing 'container'"
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# Extensions considered as log files in the filesystem scanner.
|
||||||
|
_LOG_EXTENSIONS = {"", ".log", ".txt", ".out", ".err"}
|
||||||
|
# Max file size to consider (500 MB).
|
||||||
|
_MAX_SIZE = 500 * 1024 * 1024
|
||||||
|
# Recency half-life in days — files older than this are scored near 0.
|
||||||
|
_RECENCY_HALFLIFE_DAYS = 30
|
||||||
|
|
||||||
|
|
||||||
|
def _path_to_source_id(path: Path) -> str:
|
||||||
|
"""Convert an absolute path to a kebab-case source ID."""
|
||||||
|
raw = re.sub(r"[^a-zA-Z0-9]+", "-", str(path)).strip("-").lower()
|
||||||
|
return raw[:64]
|
||||||
|
|
||||||
|
|
||||||
|
def scan_log_directories(
|
||||||
|
query: str | None = None,
|
||||||
|
dirs: list[str] | None = None,
|
||||||
|
max_depth: int = 4,
|
||||||
|
max_results: int = 25,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Scan filesystem directories for log files ranked by recency and keyword match.
|
||||||
|
|
||||||
|
Scoring weights:
|
||||||
|
- Recency (0-1): mtime within the last 30 days, decays exponentially
|
||||||
|
- Size (0-1): prefer 1 KB – 50 MB; empty or huge files score low
|
||||||
|
- Keyword (0-1): stem matches between query words and path components
|
||||||
|
|
||||||
|
Returns up to *max_results* candidates sorted by descending score.
|
||||||
|
"""
|
||||||
|
if dirs is None:
|
||||||
|
dirs = ["/var/log", "/opt"]
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
query_stems: list[str] = []
|
||||||
|
if query:
|
||||||
|
query_stems = [w.lower() for w in re.split(r"\W+", query) if len(w) >= 3]
|
||||||
|
|
||||||
|
candidates: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
def _walk(root: Path, depth: int) -> None:
|
||||||
|
if depth > max_depth:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
entries = list(root.iterdir())
|
||||||
|
except OSError:
|
||||||
|
return
|
||||||
|
for entry in entries:
|
||||||
|
if entry.name.startswith("."):
|
||||||
|
continue
|
||||||
|
if entry.is_symlink():
|
||||||
|
continue
|
||||||
|
if entry.is_dir():
|
||||||
|
_walk(entry, depth + 1)
|
||||||
|
continue
|
||||||
|
if not entry.is_file():
|
||||||
|
continue
|
||||||
|
if entry.suffix.lower() not in _LOG_EXTENSIONS:
|
||||||
|
continue
|
||||||
|
# Skip compressed archives
|
||||||
|
if entry.name.endswith((".gz", ".bz2", ".xz", ".zst")):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
stat = entry.stat()
|
||||||
|
except OSError:
|
||||||
|
continue
|
||||||
|
if stat.st_size == 0 or stat.st_size > _MAX_SIZE:
|
||||||
|
continue
|
||||||
|
if not os.access(entry, os.R_OK):
|
||||||
|
continue
|
||||||
|
|
||||||
|
age_days = (now - stat.st_mtime) / 86400
|
||||||
|
recency = max(0.0, 1.0 - age_days / _RECENCY_HALFLIFE_DAYS)
|
||||||
|
|
||||||
|
if stat.st_size < 1024:
|
||||||
|
size_score = 0.3
|
||||||
|
elif stat.st_size <= 50 * 1024 * 1024:
|
||||||
|
size_score = 1.0
|
||||||
|
else:
|
||||||
|
# Large files: linear decay from 50 MB to 500 MB
|
||||||
|
size_score = max(0.1, 1.0 - (stat.st_size - 50 * 1024 * 1024) / _MAX_SIZE)
|
||||||
|
|
||||||
|
keyword_score = 0.0
|
||||||
|
if query_stems:
|
||||||
|
path_lower = str(entry).lower()
|
||||||
|
matches = sum(1 for stem in query_stems if stem in path_lower)
|
||||||
|
keyword_score = min(1.0, matches / max(len(query_stems), 1))
|
||||||
|
|
||||||
|
if query_stems:
|
||||||
|
total = recency * 0.4 + size_score * 0.2 + keyword_score * 0.4
|
||||||
|
else:
|
||||||
|
total = recency * 0.7 + size_score * 0.3
|
||||||
|
|
||||||
|
candidates.append({
|
||||||
|
"type": "file",
|
||||||
|
"id": _path_to_source_id(entry),
|
||||||
|
"path": str(entry),
|
||||||
|
"label": entry.name,
|
||||||
|
"size_bytes": stat.st_size,
|
||||||
|
"mtime": stat.st_mtime,
|
||||||
|
"score": round(total, 3),
|
||||||
|
"available": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
for d in dirs:
|
||||||
|
_walk(Path(d), depth=0)
|
||||||
|
|
||||||
|
candidates.sort(key=lambda c: c["score"], reverse=True)
|
||||||
|
return candidates[:max_results]
|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,10 @@ from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import sqlite3
|
|
||||||
import uuid
|
import uuid
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.db import get_conn, resolve_tenant_id
|
||||||
from app.glean.base import now_iso
|
from app.glean.base import now_iso
|
||||||
from app.services.models import Incident, ReceivedBundle, SentBundle
|
from app.services.models import Incident, ReceivedBundle, SentBundle
|
||||||
from app.services.search import SearchResult, entries_in_window, search
|
from app.services.search import SearchResult, entries_in_window, search
|
||||||
|
|
@ -26,7 +26,7 @@ def _redact_text(text: str) -> str:
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def _row_to_incident(row: sqlite3.Row) -> Incident:
|
def _row_to_incident(row) -> Incident:
|
||||||
return Incident(
|
return Incident(
|
||||||
id=row["id"],
|
id=row["id"],
|
||||||
label=row["label"],
|
label=row["label"],
|
||||||
|
|
@ -39,7 +39,7 @@ def _row_to_incident(row: sqlite3.Row) -> Incident:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _row_to_bundle(row: sqlite3.Row) -> ReceivedBundle:
|
def _row_to_bundle(row) -> ReceivedBundle:
|
||||||
return ReceivedBundle(
|
return ReceivedBundle(
|
||||||
id=row["id"],
|
id=row["id"],
|
||||||
source_host=row["source_host"],
|
source_host=row["source_host"],
|
||||||
|
|
@ -62,6 +62,7 @@ def create_incident(
|
||||||
notes: str = "",
|
notes: str = "",
|
||||||
severity: str = "medium",
|
severity: str = "medium",
|
||||||
) -> Incident:
|
) -> Incident:
|
||||||
|
tid = resolve_tenant_id()
|
||||||
incident = Incident(
|
incident = Incident(
|
||||||
id=str(uuid.uuid4()),
|
id=str(uuid.uuid4()),
|
||||||
label=label,
|
label=label,
|
||||||
|
|
@ -72,47 +73,45 @@ def create_incident(
|
||||||
created_at=now_iso(),
|
created_at=now_iso(),
|
||||||
severity=severity,
|
severity=severity,
|
||||||
)
|
)
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
with get_conn(db_path) as conn:
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
conn.execute(
|
||||||
conn.execute(
|
"INSERT INTO incidents (id, tenant_id, label, issue_type, started_at, ended_at, notes, created_at, severity) "
|
||||||
"INSERT INTO incidents (id, label, issue_type, started_at, ended_at, notes, created_at, severity) "
|
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||||
"VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
|
(incident.id, tid, incident.label, incident.issue_type, incident.started_at,
|
||||||
(incident.id, incident.label, incident.issue_type, incident.started_at,
|
incident.ended_at, incident.notes, incident.created_at, incident.severity),
|
||||||
incident.ended_at, incident.notes, incident.created_at, incident.severity),
|
)
|
||||||
)
|
conn.commit()
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
return incident
|
return incident
|
||||||
|
|
||||||
|
|
||||||
def list_incidents(db_path: Path) -> list[Incident]:
|
def list_incidents(db_path: Path) -> list[Incident]:
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
tid = resolve_tenant_id()
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
with get_conn(db_path) as conn:
|
||||||
conn.row_factory = sqlite3.Row
|
rows = conn.execute(
|
||||||
rows = conn.execute(
|
"SELECT * FROM incidents WHERE (tenant_id = ? OR tenant_id = '') ORDER BY created_at DESC",
|
||||||
"SELECT * FROM incidents ORDER BY created_at DESC"
|
(tid,),
|
||||||
).fetchall()
|
).fetchall()
|
||||||
conn.close()
|
|
||||||
return [_row_to_incident(r) for r in rows]
|
return [_row_to_incident(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
def get_incident(db_path: Path, incident_id: str) -> Incident | None:
|
def get_incident(db_path: Path, incident_id: str) -> Incident | None:
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
tid = resolve_tenant_id()
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
with get_conn(db_path) as conn:
|
||||||
conn.row_factory = sqlite3.Row
|
row = conn.execute(
|
||||||
row = conn.execute(
|
"SELECT * FROM incidents WHERE id = ? AND (tenant_id = ? OR tenant_id = '')",
|
||||||
"SELECT * FROM incidents WHERE id = ?", (incident_id,)
|
(incident_id, tid),
|
||||||
).fetchone()
|
).fetchone()
|
||||||
conn.close()
|
|
||||||
return _row_to_incident(row) if row else None
|
return _row_to_incident(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
def delete_incident(db_path: Path, incident_id: str) -> bool:
|
def delete_incident(db_path: Path, incident_id: str) -> bool:
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
tid = resolve_tenant_id()
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
with get_conn(db_path) as conn:
|
||||||
cur = conn.execute("DELETE FROM incidents WHERE id = ?", (incident_id,))
|
cur = conn.execute(
|
||||||
conn.commit()
|
"DELETE FROM incidents WHERE id = ? AND (tenant_id = ? OR tenant_id = '')",
|
||||||
conn.close()
|
(incident_id, tid),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
return cur.rowcount > 0
|
return cur.rowcount > 0
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -191,6 +190,7 @@ def build_bundle(
|
||||||
|
|
||||||
def record_sent_bundle(db_path: Path, incident_id: str, bundle: dict, sanitized: bool) -> SentBundle:
|
def record_sent_bundle(db_path: Path, incident_id: str, bundle: dict, sanitized: bool) -> SentBundle:
|
||||||
"""Log an outgoing bundle export to the sent_bundles table."""
|
"""Log an outgoing bundle export to the sent_bundles table."""
|
||||||
|
tid = resolve_tenant_id()
|
||||||
record = SentBundle(
|
record = SentBundle(
|
||||||
id=str(uuid.uuid4()),
|
id=str(uuid.uuid4()),
|
||||||
incident_id=incident_id,
|
incident_id=incident_id,
|
||||||
|
|
@ -199,28 +199,25 @@ def record_sent_bundle(db_path: Path, incident_id: str, bundle: dict, sanitized:
|
||||||
entry_count=len(bundle.get("log_entries", [])),
|
entry_count=len(bundle.get("log_entries", [])),
|
||||||
bundle_json=json.dumps(bundle),
|
bundle_json=json.dumps(bundle),
|
||||||
)
|
)
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
with get_conn(db_path) as conn:
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
conn.execute(
|
||||||
conn.execute(
|
"INSERT INTO sent_bundles (id, tenant_id, incident_id, exported_at, sanitized, entry_count, bundle_json) "
|
||||||
"INSERT INTO sent_bundles (id, incident_id, exported_at, sanitized, entry_count, bundle_json) "
|
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||||
"VALUES (?, ?, ?, ?, ?, ?)",
|
(record.id, tid, record.incident_id, record.exported_at,
|
||||||
(record.id, record.incident_id, record.exported_at, int(record.sanitized),
|
int(record.sanitized), record.entry_count, record.bundle_json),
|
||||||
record.entry_count, record.bundle_json),
|
)
|
||||||
)
|
conn.commit()
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
return record
|
return record
|
||||||
|
|
||||||
|
|
||||||
def list_sent_bundles(db_path: Path) -> list[SentBundle]:
|
def list_sent_bundles(db_path: Path) -> list[SentBundle]:
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
tid = resolve_tenant_id()
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
with get_conn(db_path) as conn:
|
||||||
conn.row_factory = sqlite3.Row
|
rows = conn.execute(
|
||||||
rows = conn.execute(
|
"SELECT id, incident_id, exported_at, sanitized, entry_count, bundle_json "
|
||||||
"SELECT id, incident_id, exported_at, sanitized, entry_count, bundle_json "
|
"FROM sent_bundles WHERE (tenant_id = ? OR tenant_id = '') ORDER BY exported_at DESC",
|
||||||
"FROM sent_bundles ORDER BY exported_at DESC"
|
(tid,),
|
||||||
).fetchall()
|
).fetchall()
|
||||||
conn.close()
|
|
||||||
return [
|
return [
|
||||||
SentBundle(
|
SentBundle(
|
||||||
id=r["id"],
|
id=r["id"],
|
||||||
|
|
@ -236,6 +233,7 @@ def list_sent_bundles(db_path: Path) -> list[SentBundle]:
|
||||||
|
|
||||||
def store_bundle(db_path: Path, bundle: dict) -> ReceivedBundle:
|
def store_bundle(db_path: Path, bundle: dict) -> ReceivedBundle:
|
||||||
"""Store an incoming bundle from a remote Turnstone instance."""
|
"""Store an incoming bundle from a remote Turnstone instance."""
|
||||||
|
tid = resolve_tenant_id()
|
||||||
inc = bundle.get("incident", {})
|
inc = bundle.get("incident", {})
|
||||||
record = ReceivedBundle(
|
record = ReceivedBundle(
|
||||||
id=str(uuid.uuid4()),
|
id=str(uuid.uuid4()),
|
||||||
|
|
@ -248,38 +246,34 @@ def store_bundle(db_path: Path, bundle: dict) -> ReceivedBundle:
|
||||||
entry_count=len(bundle.get("log_entries", [])),
|
entry_count=len(bundle.get("log_entries", [])),
|
||||||
bundle_json=json.dumps(bundle),
|
bundle_json=json.dumps(bundle),
|
||||||
)
|
)
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
with get_conn(db_path) as conn:
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
conn.execute(
|
||||||
conn.execute(
|
"INSERT INTO received_bundles "
|
||||||
"INSERT INTO received_bundles "
|
"(id, tenant_id, source_host, issue_type, label, severity, started_at, bundled_at, entry_count, bundle_json) "
|
||||||
"(id, source_host, issue_type, label, severity, started_at, bundled_at, entry_count, bundle_json) "
|
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||||
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
(record.id, tid, record.source_host, record.issue_type, record.label,
|
||||||
(record.id, record.source_host, record.issue_type, record.label,
|
record.severity, record.started_at, record.bundled_at, record.entry_count, record.bundle_json),
|
||||||
record.severity, record.started_at, record.bundled_at, record.entry_count, record.bundle_json),
|
)
|
||||||
)
|
conn.commit()
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
return record
|
return record
|
||||||
|
|
||||||
|
|
||||||
def list_bundles(db_path: Path) -> list[ReceivedBundle]:
|
def list_bundles(db_path: Path) -> list[ReceivedBundle]:
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
tid = resolve_tenant_id()
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
with get_conn(db_path) as conn:
|
||||||
conn.row_factory = sqlite3.Row
|
rows = conn.execute(
|
||||||
rows = conn.execute(
|
"SELECT id, source_host, issue_type, label, severity, started_at, bundled_at, entry_count, bundle_json "
|
||||||
"SELECT id, source_host, issue_type, label, severity, started_at, bundled_at, entry_count, bundle_json "
|
"FROM received_bundles WHERE (tenant_id = ? OR tenant_id = '') ORDER BY bundled_at DESC",
|
||||||
"FROM received_bundles ORDER BY bundled_at DESC"
|
(tid,),
|
||||||
).fetchall()
|
).fetchall()
|
||||||
conn.close()
|
|
||||||
return [_row_to_bundle(r) for r in rows]
|
return [_row_to_bundle(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
def get_bundle(db_path: Path, bundle_id: str) -> ReceivedBundle | None:
|
def get_bundle(db_path: Path, bundle_id: str) -> ReceivedBundle | None:
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
tid = resolve_tenant_id()
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
with get_conn(db_path) as conn:
|
||||||
conn.row_factory = sqlite3.Row
|
row = conn.execute(
|
||||||
row = conn.execute(
|
"SELECT * FROM received_bundles WHERE id = ? AND (tenant_id = ? OR tenant_id = '')",
|
||||||
"SELECT * FROM received_bundles WHERE id = ?", (bundle_id,)
|
(bundle_id, tid),
|
||||||
).fetchone()
|
).fetchone()
|
||||||
conn.close()
|
|
||||||
return _row_to_bundle(row) if row else None
|
return _row_to_bundle(row) if row else None
|
||||||
|
|
|
||||||
|
|
@ -88,7 +88,7 @@ def summarize(
|
||||||
logger.debug("Task endpoint unavailable (%s) — falling back to direct model", exc)
|
logger.debug("Task endpoint unavailable (%s) — falling back to direct model", exc)
|
||||||
|
|
||||||
# Fallback: OpenAI-compat endpoint with explicit model name (local instances,
|
# Fallback: OpenAI-compat endpoint with explicit model name (local instances,
|
||||||
# xanderland, or any cf-orch that doesn't have task assignments loaded).
|
# or any cf-orch node that doesn't have task assignments loaded).
|
||||||
try:
|
try:
|
||||||
resp = httpx.post(
|
resp = httpx.post(
|
||||||
f"{llm_url.rstrip('/')}/v1/chat/completions",
|
f"{llm_url.rstrip('/')}/v1/chat/completions",
|
||||||
|
|
|
||||||
327
app/services/orchard.py
Normal file
327
app/services/orchard.py
Normal file
|
|
@ -0,0 +1,327 @@
|
||||||
|
"""The Orchard — auto-enrollment of new Turnstone branch nodes.
|
||||||
|
|
||||||
|
A "branch" is an external Turnstone instance that submits pattern-matched log
|
||||||
|
entries to a central harvest receiver (harvest.circuitforge.tech). Grafting
|
||||||
|
provisions the receiving infrastructure for a new branch:
|
||||||
|
|
||||||
|
1. Creates a data dir at ORCHARD_DATA_ROOT/<slug>/
|
||||||
|
2. Starts a new turnstone-submissions-<slug> Docker container
|
||||||
|
3. Injects a handle_path block into the Caddyfile marker section
|
||||||
|
4. Restarts caddy-proxy to activate the route
|
||||||
|
5. Persists the branch registry to orchard-branches.yaml
|
||||||
|
|
||||||
|
Admin auth: the graft/deactivate endpoints require
|
||||||
|
Authorization: Bearer <TURNSTONE_ORCHARD_ADMIN_KEY>
|
||||||
|
|
||||||
|
Set TURNSTONE_ORCHARD_ADMIN_KEY in the environment on the harvest instance.
|
||||||
|
If unset, the endpoints return 501 Not Implemented (feature is off).
|
||||||
|
|
||||||
|
Anonymization: a separate pass (run_anonymization) replaces IPs, hostnames,
|
||||||
|
and usernames in branch DBs with stable pseudonyms before Avocet reads them.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
|
import ipaddress
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import secrets
|
||||||
|
import sqlite3
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Config (read from env on the harvest instance)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
ORCHARD_DATA_ROOT = Path(os.environ.get("TURNSTONE_ORCHARD_DATA_ROOT", "/devl/docker/turnstone-submissions"))
|
||||||
|
ORCHARD_CADDYFILE = Path(os.environ.get("TURNSTONE_ORCHARD_CADDYFILE", "/devl/caddy-proxy/Caddyfile"))
|
||||||
|
ORCHARD_CADDY_CONTAINER = os.environ.get("TURNSTONE_ORCHARD_CADDY_CONTAINER", "caddy-proxy")
|
||||||
|
ORCHARD_HARVEST_HOST = os.environ.get("TURNSTONE_ORCHARD_HARVEST_HOST", "https://harvest.circuitforge.tech")
|
||||||
|
ORCHARD_IMAGE = os.environ.get("TURNSTONE_ORCHARD_IMAGE", "localhost/turnstone:latest")
|
||||||
|
|
||||||
|
# Ports for submission containers start here and scan upward.
|
||||||
|
ORCHARD_PORT_BASE = int(os.environ.get("TURNSTONE_ORCHARD_PORT_BASE", "8538"))
|
||||||
|
|
||||||
|
_REGISTRY_FILE = ORCHARD_DATA_ROOT / "orchard-branches.yaml"
|
||||||
|
|
||||||
|
_CADDY_BRANCH_START = "# --- ORCHARD BRANCHES: auto-managed by POST /api/orchard/graft, do not edit manually ---"
|
||||||
|
_CADDY_BRANCH_END = "# --- END ORCHARD BRANCHES ---"
|
||||||
|
|
||||||
|
_SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9-]{1,30}[a-z0-9]$")
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Branch registry
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _load_registry() -> list[dict[str, Any]]:
|
||||||
|
if not _REGISTRY_FILE.exists():
|
||||||
|
return []
|
||||||
|
import yaml as _yaml
|
||||||
|
try:
|
||||||
|
data = _yaml.safe_load(_REGISTRY_FILE.read_text()) or {}
|
||||||
|
return data.get("branches", [])
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _save_registry(branches: list[dict[str, Any]]) -> None:
|
||||||
|
import yaml as _yaml
|
||||||
|
_REGISTRY_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
_REGISTRY_FILE.write_text(_yaml.dump({"branches": branches}, default_flow_style=False))
|
||||||
|
|
||||||
|
|
||||||
|
def list_branches() -> list[dict[str, Any]]:
|
||||||
|
return _load_registry()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Port allocation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _next_free_port() -> int:
|
||||||
|
used = {b["port"] for b in _load_registry() if "port" in b}
|
||||||
|
port = ORCHARD_PORT_BASE
|
||||||
|
while port in used:
|
||||||
|
port += 1
|
||||||
|
return port
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Caddy route injection
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _build_branch_block(slug: str, port: int) -> str:
|
||||||
|
return (
|
||||||
|
f" handle_path /{slug}/* {{\n"
|
||||||
|
f" reverse_proxy http://host.docker.internal:{port} {{\n"
|
||||||
|
f" header_up X-Real-IP {{remote_host}}\n"
|
||||||
|
f" header_up X-Forwarded-Proto {{scheme}}\n"
|
||||||
|
f" flush_interval -1\n"
|
||||||
|
f" transport http {{\n"
|
||||||
|
f" response_header_timeout 0\n"
|
||||||
|
f" read_timeout 0\n"
|
||||||
|
f" }}\n"
|
||||||
|
f" }}\n"
|
||||||
|
f" }}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _rewrite_caddy_branches(branches: list[dict[str, Any]]) -> None:
|
||||||
|
"""Replace the auto-managed section in the Caddyfile with current branches."""
|
||||||
|
if not ORCHARD_CADDYFILE.exists():
|
||||||
|
raise RuntimeError(f"Caddyfile not found at {ORCHARD_CADDYFILE}")
|
||||||
|
|
||||||
|
text = ORCHARD_CADDYFILE.read_text()
|
||||||
|
start_idx = text.find(_CADDY_BRANCH_START)
|
||||||
|
end_idx = text.find(_CADDY_BRANCH_END)
|
||||||
|
if start_idx == -1 or end_idx == -1:
|
||||||
|
raise RuntimeError("Caddyfile is missing the ORCHARD BRANCHES marker section")
|
||||||
|
|
||||||
|
active = [b for b in branches if b.get("active", True)]
|
||||||
|
blocks = "\n".join(_build_branch_block(b["slug"], b["port"]) for b in active)
|
||||||
|
replacement = f"{_CADDY_BRANCH_START}\n{blocks}\n {_CADDY_BRANCH_END}"
|
||||||
|
|
||||||
|
new_text = text[:start_idx] + replacement + text[end_idx + len(_CADDY_BRANCH_END):]
|
||||||
|
ORCHARD_CADDYFILE.write_text(new_text)
|
||||||
|
logger.info("Caddyfile updated with %d active branch routes", len(active))
|
||||||
|
|
||||||
|
|
||||||
|
def _reload_caddy() -> None:
|
||||||
|
result = subprocess.run(
|
||||||
|
["docker", "restart", ORCHARD_CADDY_CONTAINER],
|
||||||
|
capture_output=True, text=True, timeout=30,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise RuntimeError(f"docker restart {ORCHARD_CADDY_CONTAINER} failed: {result.stderr}")
|
||||||
|
logger.info("Restarted %s", ORCHARD_CADDY_CONTAINER)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Container provisioning
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _start_branch_container(slug: str, port: int, data_dir: Path) -> None:
|
||||||
|
patterns_dir = data_dir / "patterns"
|
||||||
|
patterns_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
data_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Seed default patterns if not already present
|
||||||
|
repo_patterns = Path(__file__).parent.parent.parent / "patterns"
|
||||||
|
for yaml_file in ("default.yaml", "sources-example.yaml"):
|
||||||
|
src = repo_patterns / yaml_file
|
||||||
|
dst = patterns_dir / yaml_file
|
||||||
|
if src.exists() and not dst.exists():
|
||||||
|
dst.write_text(src.read_text())
|
||||||
|
|
||||||
|
container_name = f"turnstone-submissions-{slug}"
|
||||||
|
cmd = [
|
||||||
|
"docker", "run", "-d",
|
||||||
|
"--name", container_name,
|
||||||
|
"--restart", "unless-stopped",
|
||||||
|
"-p", f"{port}:8534",
|
||||||
|
"-v", f"{data_dir}:/data",
|
||||||
|
"-v", f"{patterns_dir}:/patterns",
|
||||||
|
"-e", f"TURNSTONE_DB=/data/turnstone.db",
|
||||||
|
"-e", f"TURNSTONE_SOURCE_HOST={slug}",
|
||||||
|
"-e", "PYTHONUNBUFFERED=1",
|
||||||
|
"-e", "TZ=America/Los_Angeles",
|
||||||
|
ORCHARD_IMAGE,
|
||||||
|
]
|
||||||
|
# Remove any stale container with the same name first
|
||||||
|
subprocess.run(["docker", "rm", "-f", container_name], capture_output=True)
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise RuntimeError(f"docker run for {container_name} failed: {result.stderr}")
|
||||||
|
logger.info("Started container %s on port %d", container_name, port)
|
||||||
|
|
||||||
|
|
||||||
|
def _stop_branch_container(slug: str) -> None:
|
||||||
|
container_name = f"turnstone-submissions-{slug}"
|
||||||
|
subprocess.run(["docker", "rm", "-f", container_name], capture_output=True, timeout=30)
|
||||||
|
logger.info("Removed container %s", container_name)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Public API
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def graft(slug: str, contact_email: str, agreed_to_terms: bool) -> dict[str, Any]:
|
||||||
|
"""Provision a new Orchard branch and return connection details."""
|
||||||
|
if not agreed_to_terms:
|
||||||
|
raise ValueError("agreed_to_terms must be true")
|
||||||
|
if not _SLUG_RE.match(slug):
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid slug {slug!r}: must be 2-32 lowercase alphanumeric/hyphen, "
|
||||||
|
"cannot start or end with a hyphen"
|
||||||
|
)
|
||||||
|
|
||||||
|
branches = _load_registry()
|
||||||
|
if any(b["slug"] == slug for b in branches):
|
||||||
|
raise ValueError(f"Branch {slug!r} already exists")
|
||||||
|
|
||||||
|
port = _next_free_port()
|
||||||
|
data_dir = ORCHARD_DATA_ROOT / slug
|
||||||
|
api_key = secrets.token_urlsafe(32)
|
||||||
|
|
||||||
|
branch: dict[str, Any] = {
|
||||||
|
"slug": slug,
|
||||||
|
"port": port,
|
||||||
|
"contact_email": contact_email,
|
||||||
|
"api_key_hash": hashlib.sha256(api_key.encode()).hexdigest(),
|
||||||
|
"grafted_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||||||
|
"active": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
_start_branch_container(slug, port, data_dir)
|
||||||
|
branches.append(branch)
|
||||||
|
_save_registry(branches)
|
||||||
|
|
||||||
|
_rewrite_caddy_branches(branches)
|
||||||
|
_reload_caddy()
|
||||||
|
|
||||||
|
submit_endpoint = f"{ORCHARD_HARVEST_HOST}/{slug}"
|
||||||
|
logger.info("Grafted branch %r at %s", slug, submit_endpoint)
|
||||||
|
return {
|
||||||
|
"slug": slug,
|
||||||
|
"submit_endpoint": submit_endpoint,
|
||||||
|
"api_key": api_key,
|
||||||
|
"port": port,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def deactivate(slug: str) -> dict[str, Any]:
|
||||||
|
"""Deactivate a branch: stop its container and remove its Caddy route."""
|
||||||
|
branches = _load_registry()
|
||||||
|
branch = next((b for b in branches if b["slug"] == slug), None)
|
||||||
|
if branch is None:
|
||||||
|
raise KeyError(f"Branch {slug!r} not found")
|
||||||
|
|
||||||
|
_stop_branch_container(slug)
|
||||||
|
branch["active"] = False
|
||||||
|
_save_registry(branches)
|
||||||
|
_rewrite_caddy_branches(branches)
|
||||||
|
_reload_caddy()
|
||||||
|
return {"slug": slug, "deactivated": True}
|
||||||
|
|
||||||
|
|
||||||
|
def verify_api_key(slug: str, key: str) -> bool:
|
||||||
|
"""Check whether *key* is valid for the given branch slug."""
|
||||||
|
branches = _load_registry()
|
||||||
|
branch = next((b for b in branches if b["slug"] == slug and b.get("active")), None)
|
||||||
|
if branch is None:
|
||||||
|
return False
|
||||||
|
expected = branch.get("api_key_hash", "")
|
||||||
|
provided = hashlib.sha256(key.encode()).hexdigest()
|
||||||
|
return hmac.compare_digest(expected, provided)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Anonymization worker
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_IP_RE = re.compile(
|
||||||
|
r"\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b"
|
||||||
|
)
|
||||||
|
_USERNAME_RE = re.compile(r"\bfor\s+(\w+)\b|\buser\s+(\w+)\b|\bsession\s+opened\s+for\s+(\w+)\b", re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def _pseudonym(value: str, salt: bytes, prefix: str) -> str:
|
||||||
|
digest = hmac.new(salt, value.encode(), "sha256").hexdigest()[:10]
|
||||||
|
return f"{prefix}-{digest}"
|
||||||
|
|
||||||
|
|
||||||
|
def _anonymize_text(text: str, salt: bytes) -> str:
|
||||||
|
def replace_ip(m: re.Match) -> str:
|
||||||
|
return _pseudonym(m.group(), salt, "ip")
|
||||||
|
|
||||||
|
def replace_user(m: re.Match) -> str:
|
||||||
|
user = next(g for g in m.groups() if g)
|
||||||
|
return m.group().replace(user, _pseudonym(user, salt, "user"))
|
||||||
|
|
||||||
|
text = _IP_RE.sub(replace_ip, text)
|
||||||
|
text = _USERNAME_RE.sub(replace_user, text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def run_anonymization(slug: str) -> dict[str, Any]:
|
||||||
|
"""Anonymize IPs and usernames in a branch DB in-place.
|
||||||
|
|
||||||
|
Uses a stable per-branch salt so pseudonyms are consistent across runs
|
||||||
|
but not reversible without the salt.
|
||||||
|
"""
|
||||||
|
branch = next((b for b in _load_registry() if b["slug"] == slug), None)
|
||||||
|
if branch is None:
|
||||||
|
raise KeyError(f"Branch {slug!r} not found")
|
||||||
|
|
||||||
|
db_path = ORCHARD_DATA_ROOT / slug / "turnstone.db"
|
||||||
|
if not db_path.exists():
|
||||||
|
return {"slug": slug, "anonymized": 0}
|
||||||
|
|
||||||
|
# Per-branch salt derived from api_key_hash for stability
|
||||||
|
salt = branch["api_key_hash"].encode()[:32].ljust(32, b"0")
|
||||||
|
|
||||||
|
conn = sqlite3.connect(str(db_path), timeout=30)
|
||||||
|
conn.execute("PRAGMA journal_mode=WAL")
|
||||||
|
rows = conn.execute("SELECT id, text FROM log_entries WHERE anonymized IS NULL OR anonymized = 0").fetchall()
|
||||||
|
|
||||||
|
updated = 0
|
||||||
|
for row_id, text in rows:
|
||||||
|
clean = _anonymize_text(text or "", salt)
|
||||||
|
if clean != text:
|
||||||
|
conn.execute("UPDATE log_entries SET text = ?, anonymized = 1 WHERE id = ?", (clean, row_id))
|
||||||
|
updated += 1
|
||||||
|
else:
|
||||||
|
conn.execute("UPDATE log_entries SET anonymized = 1 WHERE id = ?", (row_id,))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
logger.info("Anonymized %d/%d entries in branch %r", updated, len(rows), slug)
|
||||||
|
return {"slug": slug, "anonymized": updated, "total_processed": len(rows)}
|
||||||
|
|
@ -1,4 +1,8 @@
|
||||||
"""FTS5-based log search with optional hybrid BM25 + vector re-ranking."""
|
"""FTS-based log search with optional hybrid BM25 + vector re-ranking.
|
||||||
|
|
||||||
|
SQLite backend: FTS5 virtual table with Porter stemmer.
|
||||||
|
Postgres backend: tsvector column with GIN index + websearch_to_tsquery.
|
||||||
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
@ -6,8 +10,11 @@ import logging
|
||||||
import re
|
import re
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.db import BACKEND, Backend, frag, get_conn, resolve_tenant_id
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -28,48 +35,47 @@ class SearchResult:
|
||||||
def build_fts_index(db_path: Path) -> None:
|
def build_fts_index(db_path: Path) -> None:
|
||||||
"""Build (or rebuild) the FTS5 index from log_entries. Safe to re-run.
|
"""Build (or rebuild) the FTS5 index from log_entries. Safe to re-run.
|
||||||
|
|
||||||
Drops and recreates the table if the schema is stale (missing sequence column).
|
For Postgres, the tsvector column is maintained by a trigger — this is a no-op.
|
||||||
"""
|
"""
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
if BACKEND == Backend.POSTGRES:
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
return
|
||||||
|
|
||||||
# Check whether existing table has the sequence column; rebuild if not.
|
with get_conn(db_path) as conn:
|
||||||
needs_rebuild = False
|
needs_rebuild = False
|
||||||
try:
|
try:
|
||||||
conn.execute("SELECT sequence FROM log_fts LIMIT 0")
|
conn.execute("SELECT sequence FROM log_fts LIMIT 0")
|
||||||
except sqlite3.OperationalError:
|
except Exception:
|
||||||
needs_rebuild = True
|
needs_rebuild = True
|
||||||
|
|
||||||
if needs_rebuild:
|
if needs_rebuild:
|
||||||
conn.execute("DROP TABLE IF EXISTS log_fts")
|
conn.execute("DROP TABLE IF EXISTS log_fts")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
conn.executescript("""
|
conn.execute("""
|
||||||
CREATE VIRTUAL TABLE IF NOT EXISTS log_fts USING fts5(
|
CREATE VIRTUAL TABLE IF NOT EXISTS log_fts USING fts5(
|
||||||
text,
|
text,
|
||||||
entry_id UNINDEXED,
|
entry_id UNINDEXED,
|
||||||
source_id UNINDEXED,
|
source_id UNINDEXED,
|
||||||
sequence UNINDEXED,
|
sequence UNINDEXED,
|
||||||
severity UNINDEXED,
|
severity UNINDEXED,
|
||||||
timestamp_iso UNINDEXED,
|
timestamp_iso UNINDEXED,
|
||||||
matched_patterns UNINDEXED,
|
matched_patterns UNINDEXED,
|
||||||
repeat_count UNINDEXED,
|
repeat_count UNINDEXED,
|
||||||
out_of_order UNINDEXED,
|
out_of_order UNINDEXED,
|
||||||
tokenize = 'porter ascii'
|
tokenize = 'porter ascii'
|
||||||
);
|
)
|
||||||
""")
|
""")
|
||||||
# Only insert rows not already indexed
|
conn.execute("""
|
||||||
conn.execute("""
|
INSERT INTO log_fts(text, entry_id, source_id, sequence, severity,
|
||||||
INSERT INTO log_fts(text, entry_id, source_id, sequence, severity,
|
timestamp_iso, matched_patterns,
|
||||||
timestamp_iso, matched_patterns,
|
repeat_count, out_of_order)
|
||||||
repeat_count, out_of_order)
|
SELECT e.text, e.id, e.source_id, e.sequence, e.severity,
|
||||||
SELECT e.text, e.id, e.source_id, e.sequence, e.severity,
|
e.timestamp_iso, e.matched_patterns,
|
||||||
e.timestamp_iso, e.matched_patterns,
|
e.repeat_count, e.out_of_order
|
||||||
e.repeat_count, e.out_of_order
|
FROM log_entries e
|
||||||
FROM log_entries e
|
WHERE e.id NOT IN (SELECT entry_id FROM log_fts WHERE entry_id IS NOT NULL)
|
||||||
WHERE e.id NOT IN (SELECT entry_id FROM log_fts WHERE entry_id IS NOT NULL)
|
""")
|
||||||
""")
|
conn.commit()
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
|
|
||||||
def _sanitize_fts_query(raw: str, or_mode: bool = False) -> str:
|
def _sanitize_fts_query(raw: str, or_mode: bool = False) -> str:
|
||||||
|
|
@ -198,54 +204,88 @@ def _bm25_search(
|
||||||
include_repeats: bool = False,
|
include_repeats: bool = False,
|
||||||
or_mode: bool = False,
|
or_mode: bool = False,
|
||||||
) -> list[SearchResult]:
|
) -> list[SearchResult]:
|
||||||
"""Pure BM25 FTS5 search — internal helper used by both search() and _hybrid_search()."""
|
"""FTS search — BM25 via FTS5 (SQLite) or tsvector (Postgres)."""
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
tid = resolve_tenant_id()
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
|
||||||
conn.row_factory = sqlite3.Row
|
|
||||||
|
|
||||||
|
if BACKEND == Backend.POSTGRES:
|
||||||
|
return _pg_fts_search(
|
||||||
|
db_path, query, tid,
|
||||||
|
severity=severity, source_filter=source_filter,
|
||||||
|
pattern_filter=pattern_filter, since=since, until=until,
|
||||||
|
limit=limit, include_repeats=include_repeats,
|
||||||
|
)
|
||||||
|
|
||||||
|
return _sqlite_fts_search(
|
||||||
|
db_path, query, tid,
|
||||||
|
severity=severity, source_filter=source_filter,
|
||||||
|
pattern_filter=pattern_filter, since=since, until=until,
|
||||||
|
limit=limit, include_repeats=include_repeats, or_mode=or_mode,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _sqlite_fts_search(
|
||||||
|
db_path: Path,
|
||||||
|
query: str,
|
||||||
|
tid: str,
|
||||||
|
severity: str | None,
|
||||||
|
source_filter: str | None,
|
||||||
|
pattern_filter: str | None,
|
||||||
|
since: str | None,
|
||||||
|
until: str | None,
|
||||||
|
limit: int,
|
||||||
|
include_repeats: bool,
|
||||||
|
or_mode: bool,
|
||||||
|
) -> list[SearchResult]:
|
||||||
fts_query = _sanitize_fts_query(query, or_mode=or_mode)
|
fts_query = _sanitize_fts_query(query, or_mode=or_mode)
|
||||||
conditions = ["log_fts MATCH ?"]
|
conditions = [
|
||||||
params: list = [fts_query]
|
"log_fts MATCH ?",
|
||||||
|
"(e.tenant_id = ? OR e.tenant_id = '')",
|
||||||
|
]
|
||||||
|
params: list = [fts_query, tid]
|
||||||
|
|
||||||
if severity:
|
if severity:
|
||||||
conditions.append("severity = ?")
|
conditions.append("f.severity = ?")
|
||||||
params.append(severity.upper())
|
params.append(severity.upper())
|
||||||
if source_filter:
|
if source_filter:
|
||||||
conditions.append("source_id LIKE ?")
|
conditions.append("f.source_id LIKE ?")
|
||||||
params.append(f"%{source_filter}%")
|
params.append(f"%{source_filter}%")
|
||||||
if pattern_filter:
|
if pattern_filter:
|
||||||
conditions.append("matched_patterns LIKE ?")
|
conditions.append("f.matched_patterns LIKE ?")
|
||||||
params.append(f'%"{pattern_filter}"%')
|
params.append(f'%"{pattern_filter}"%')
|
||||||
if since:
|
if since:
|
||||||
conditions.append("timestamp_iso >= ?")
|
conditions.append("f.timestamp_iso >= ?")
|
||||||
params.append(since)
|
params.append(since)
|
||||||
if until:
|
if until:
|
||||||
conditions.append("timestamp_iso <= ?")
|
conditions.append("f.timestamp_iso <= ?")
|
||||||
params.append(until)
|
params.append(until)
|
||||||
if not include_repeats:
|
if not include_repeats:
|
||||||
conditions.append("repeat_count = 1")
|
conditions.append("f.repeat_count = 1")
|
||||||
|
|
||||||
where = " AND ".join(conditions)
|
where = " AND ".join(conditions)
|
||||||
params.append(limit)
|
params.append(limit)
|
||||||
|
|
||||||
|
raw = sqlite3.connect(str(db_path), timeout=30.0)
|
||||||
|
raw.row_factory = sqlite3.Row
|
||||||
try:
|
try:
|
||||||
rows = conn.execute(
|
rows = raw.execute(
|
||||||
f"""
|
f"""
|
||||||
SELECT entry_id, source_id, sequence, timestamp_iso, severity,
|
SELECT f.entry_id, f.source_id, f.sequence, f.timestamp_iso, f.severity,
|
||||||
repeat_count, out_of_order, matched_patterns, text, rank
|
f.repeat_count, f.out_of_order, f.matched_patterns, f.text, f.rank
|
||||||
FROM log_fts
|
FROM log_fts f
|
||||||
|
JOIN log_entries e ON e.id = f.entry_id
|
||||||
WHERE {where}
|
WHERE {where}
|
||||||
ORDER BY rank
|
ORDER BY f.rank
|
||||||
LIMIT ?
|
LIMIT ?
|
||||||
""",
|
""",
|
||||||
params,
|
params,
|
||||||
).fetchall()
|
).fetchall()
|
||||||
except sqlite3.OperationalError as e:
|
except sqlite3.OperationalError as exc:
|
||||||
logger.warning("FTS query failed (%s) — index may not be built yet", e)
|
logger.warning("FTS query failed (%s) — index may not be built yet", exc)
|
||||||
conn.close()
|
|
||||||
return []
|
return []
|
||||||
|
finally:
|
||||||
|
raw.close()
|
||||||
|
|
||||||
results = [
|
return [
|
||||||
SearchResult(
|
SearchResult(
|
||||||
entry_id=r["entry_id"],
|
entry_id=r["entry_id"],
|
||||||
source_id=r["source_id"],
|
source_id=r["source_id"],
|
||||||
|
|
@ -256,12 +296,83 @@ def _bm25_search(
|
||||||
out_of_order=bool(r["out_of_order"]),
|
out_of_order=bool(r["out_of_order"]),
|
||||||
matched_patterns=json.loads(r["matched_patterns"] or "[]"),
|
matched_patterns=json.loads(r["matched_patterns"] or "[]"),
|
||||||
text=r["text"],
|
text=r["text"],
|
||||||
rank=r["rank"],
|
rank=float(r["rank"]),
|
||||||
|
)
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _pg_fts_search(
|
||||||
|
db_path: Path,
|
||||||
|
query: str,
|
||||||
|
tid: str,
|
||||||
|
severity: str | None,
|
||||||
|
source_filter: str | None,
|
||||||
|
pattern_filter: str | None,
|
||||||
|
since: str | None,
|
||||||
|
until: str | None,
|
||||||
|
limit: int,
|
||||||
|
include_repeats: bool,
|
||||||
|
) -> list[SearchResult]:
|
||||||
|
"""Postgres FTS via tsvector column and websearch_to_tsquery."""
|
||||||
|
tsq = "websearch_to_tsquery('english', %s)"
|
||||||
|
conditions = [
|
||||||
|
f"text_tsv @@ {tsq}",
|
||||||
|
"(tenant_id = %s OR tenant_id = '')",
|
||||||
|
]
|
||||||
|
params: list = [query, tid]
|
||||||
|
|
||||||
|
if severity:
|
||||||
|
conditions.append("severity = %s")
|
||||||
|
params.append(severity.upper())
|
||||||
|
if source_filter:
|
||||||
|
conditions.append("source_id LIKE %s")
|
||||||
|
params.append(f"%{source_filter}%")
|
||||||
|
if pattern_filter:
|
||||||
|
conditions.append("matched_patterns LIKE %s")
|
||||||
|
params.append(f'%"{pattern_filter}"%')
|
||||||
|
if since:
|
||||||
|
conditions.append("timestamp_iso >= %s")
|
||||||
|
params.append(since)
|
||||||
|
if until:
|
||||||
|
conditions.append("timestamp_iso <= %s")
|
||||||
|
params.append(until)
|
||||||
|
if not include_repeats:
|
||||||
|
conditions.append("repeat_count = 1")
|
||||||
|
|
||||||
|
where = " AND ".join(conditions)
|
||||||
|
# ts_rank needs the tsquery again — append it then the limit
|
||||||
|
params.extend([query, limit])
|
||||||
|
|
||||||
|
with get_conn(db_path) as conn:
|
||||||
|
rows = conn.execute(
|
||||||
|
f"""
|
||||||
|
SELECT id AS entry_id, source_id, sequence, timestamp_iso, severity,
|
||||||
|
repeat_count, out_of_order, matched_patterns, text,
|
||||||
|
ts_rank(text_tsv, {tsq}) AS rank
|
||||||
|
FROM log_entries
|
||||||
|
WHERE {where}
|
||||||
|
ORDER BY rank DESC
|
||||||
|
LIMIT %s
|
||||||
|
""",
|
||||||
|
params,
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
return [
|
||||||
|
SearchResult(
|
||||||
|
entry_id=r["entry_id"],
|
||||||
|
source_id=r["source_id"],
|
||||||
|
sequence=r["sequence"],
|
||||||
|
timestamp_iso=r["timestamp_iso"],
|
||||||
|
severity=r["severity"],
|
||||||
|
repeat_count=r["repeat_count"],
|
||||||
|
out_of_order=bool(r["out_of_order"]),
|
||||||
|
matched_patterns=json.loads(r["matched_patterns"] or "[]"),
|
||||||
|
text=r["text"],
|
||||||
|
rank=float(r["rank"]),
|
||||||
)
|
)
|
||||||
for r in rows
|
for r in rows
|
||||||
]
|
]
|
||||||
conn.close()
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def entries_in_window(
|
def entries_in_window(
|
||||||
|
|
@ -282,12 +393,12 @@ def entries_in_window(
|
||||||
(e.g. network-syslog) don't crowd out lower-volume but more interesting ones.
|
(e.g. network-syslog) don't crowd out lower-volume but more interesting ones.
|
||||||
Errors/warnings are ranked first within each source partition.
|
Errors/warnings are ranked first within each source partition.
|
||||||
"""
|
"""
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
tid = resolve_tenant_id()
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
conditions: list[str] = [
|
||||||
conn.row_factory = sqlite3.Row
|
"repeat_count = 1",
|
||||||
|
"(tenant_id = ? OR tenant_id = '')",
|
||||||
conditions: list[str] = ["repeat_count = 1"]
|
]
|
||||||
params: list = []
|
params: list = [tid]
|
||||||
|
|
||||||
if since:
|
if since:
|
||||||
conditions.append("timestamp_iso >= ?")
|
conditions.append("timestamp_iso >= ?")
|
||||||
|
|
@ -305,8 +416,7 @@ def entries_in_window(
|
||||||
where = " AND ".join(conditions)
|
where = " AND ".join(conditions)
|
||||||
|
|
||||||
if per_source_cap is not None:
|
if per_source_cap is not None:
|
||||||
# Use a window function to cap rows per source, errors/warnings first.
|
sql = f"""
|
||||||
query = f"""
|
|
||||||
WITH ranked AS (
|
WITH ranked AS (
|
||||||
SELECT id as entry_id, source_id, sequence, timestamp_iso, severity,
|
SELECT id as entry_id, source_id, sequence, timestamp_iso, severity,
|
||||||
repeat_count, out_of_order, matched_patterns, text, 0.0 as rank,
|
repeat_count, out_of_order, matched_patterns, text, 0.0 as rank,
|
||||||
|
|
@ -333,7 +443,7 @@ def entries_in_window(
|
||||||
"""
|
"""
|
||||||
params.extend([per_source_cap, limit])
|
params.extend([per_source_cap, limit])
|
||||||
else:
|
else:
|
||||||
query = f"""
|
sql = f"""
|
||||||
SELECT id as entry_id, source_id, sequence, timestamp_iso, severity,
|
SELECT id as entry_id, source_id, sequence, timestamp_iso, severity,
|
||||||
repeat_count, out_of_order, matched_patterns, text, 0.0 as rank
|
repeat_count, out_of_order, matched_patterns, text, 0.0 as rank
|
||||||
FROM log_entries
|
FROM log_entries
|
||||||
|
|
@ -343,8 +453,8 @@ def entries_in_window(
|
||||||
"""
|
"""
|
||||||
params.append(limit)
|
params.append(limit)
|
||||||
|
|
||||||
rows = conn.execute(query, params).fetchall()
|
with get_conn(db_path) as conn:
|
||||||
conn.close()
|
rows = conn.execute(sql, params).fetchall()
|
||||||
|
|
||||||
return [
|
return [
|
||||||
SearchResult(
|
SearchResult(
|
||||||
|
|
@ -357,7 +467,7 @@ def entries_in_window(
|
||||||
out_of_order=bool(r["out_of_order"]),
|
out_of_order=bool(r["out_of_order"]),
|
||||||
matched_patterns=json.loads(r["matched_patterns"] or "[]"),
|
matched_patterns=json.loads(r["matched_patterns"] or "[]"),
|
||||||
text=r["text"],
|
text=r["text"],
|
||||||
rank=r["rank"],
|
rank=float(r["rank"]),
|
||||||
)
|
)
|
||||||
for r in rows
|
for r in rows
|
||||||
]
|
]
|
||||||
|
|
@ -376,16 +486,14 @@ def recent_source_errors(
|
||||||
Bypasses FTS ranking so text content doesn't affect which errors surface.
|
Bypasses FTS ranking so text content doesn't affect which errors surface.
|
||||||
Used by diagnose when FTS keyword search returns nothing for a known source.
|
Used by diagnose when FTS keyword search returns nothing for a known source.
|
||||||
"""
|
"""
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
tid = resolve_tenant_id()
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
|
||||||
conn.row_factory = sqlite3.Row
|
|
||||||
|
|
||||||
conditions = [
|
conditions = [
|
||||||
"source_id LIKE ?",
|
"source_id LIKE ?",
|
||||||
"severity = ?",
|
"severity = ?",
|
||||||
"repeat_count = 1",
|
"repeat_count = 1",
|
||||||
|
"(tenant_id = ? OR tenant_id = '')",
|
||||||
]
|
]
|
||||||
params: list = [f"%{source_filter}%", severity.upper()]
|
params: list = [f"%{source_filter}%", severity.upper(), tid]
|
||||||
|
|
||||||
if since:
|
if since:
|
||||||
conditions.append("timestamp_iso >= ?")
|
conditions.append("timestamp_iso >= ?")
|
||||||
|
|
@ -397,18 +505,18 @@ def recent_source_errors(
|
||||||
params.append(limit)
|
params.append(limit)
|
||||||
where = " AND ".join(conditions)
|
where = " AND ".join(conditions)
|
||||||
|
|
||||||
rows = conn.execute(
|
with get_conn(db_path) as conn:
|
||||||
f"""
|
rows = conn.execute(
|
||||||
SELECT id as entry_id, source_id, sequence, timestamp_iso, severity,
|
f"""
|
||||||
repeat_count, out_of_order, matched_patterns, text, 0.0 as rank
|
SELECT id as entry_id, source_id, sequence, timestamp_iso, severity,
|
||||||
FROM log_entries
|
repeat_count, out_of_order, matched_patterns, text, 0.0 as rank
|
||||||
WHERE {where}
|
FROM log_entries
|
||||||
ORDER BY timestamp_iso DESC
|
WHERE {where}
|
||||||
LIMIT ?
|
ORDER BY timestamp_iso DESC
|
||||||
""",
|
LIMIT ?
|
||||||
params,
|
""",
|
||||||
).fetchall()
|
params,
|
||||||
conn.close()
|
).fetchall()
|
||||||
|
|
||||||
return [
|
return [
|
||||||
SearchResult(
|
SearchResult(
|
||||||
|
|
@ -421,7 +529,7 @@ def recent_source_errors(
|
||||||
out_of_order=bool(r["out_of_order"]),
|
out_of_order=bool(r["out_of_order"]),
|
||||||
matched_patterns=json.loads(r["matched_patterns"] or "[]"),
|
matched_patterns=json.loads(r["matched_patterns"] or "[]"),
|
||||||
text=r["text"],
|
text=r["text"],
|
||||||
rank=r["rank"],
|
rank=float(r["rank"]),
|
||||||
)
|
)
|
||||||
for r in rows
|
for r in rows
|
||||||
]
|
]
|
||||||
|
|
@ -436,37 +544,34 @@ def list_sources(db_path: Path) -> list[dict]:
|
||||||
returned as-is. ``unit_count`` reports how many distinct sub-units were
|
returned as-is. ``unit_count`` reports how many distinct sub-units were
|
||||||
merged into each row.
|
merged into each row.
|
||||||
"""
|
"""
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
tid = resolve_tenant_id()
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
group_expr = frag.source_group_expr("source_id")
|
||||||
rows = conn.execute("""
|
with get_conn(db_path) as conn:
|
||||||
SELECT
|
rows = conn.execute(
|
||||||
CASE
|
f"""
|
||||||
WHEN INSTR(SUBSTR(source_id, INSTR(source_id, ':')+1), ':') > 0
|
SELECT
|
||||||
THEN SUBSTR(source_id, 1,
|
{group_expr} AS group_id,
|
||||||
INSTR(source_id, ':')
|
COUNT(DISTINCT source_id) AS unit_count,
|
||||||
+ INSTR(SUBSTR(source_id, INSTR(source_id, ':')+1), ':')
|
COUNT(*) AS entry_count,
|
||||||
- 1)
|
MIN(timestamp_iso) AS earliest,
|
||||||
ELSE source_id
|
MAX(timestamp_iso) AS latest,
|
||||||
END AS group_id,
|
SUM(CASE WHEN severity IN ('ERROR','CRITICAL','EMERGENCY','ALERT')
|
||||||
COUNT(DISTINCT source_id) AS unit_count,
|
THEN 1 ELSE 0 END) AS error_count
|
||||||
COUNT(*) AS entry_count,
|
FROM log_entries
|
||||||
MIN(timestamp_iso) AS earliest,
|
WHERE (tenant_id = ? OR tenant_id = '')
|
||||||
MAX(timestamp_iso) AS latest,
|
GROUP BY group_id
|
||||||
SUM(CASE WHEN severity IN ('ERROR','CRITICAL','EMERGENCY','ALERT')
|
ORDER BY entry_count DESC
|
||||||
THEN 1 ELSE 0 END) AS error_count
|
""",
|
||||||
FROM log_entries
|
(tid,),
|
||||||
GROUP BY group_id
|
).fetchall()
|
||||||
ORDER BY entry_count DESC
|
|
||||||
""").fetchall()
|
|
||||||
conn.close()
|
|
||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
"source_id": r[0],
|
"source_id": r["group_id"],
|
||||||
"unit_count": r[1],
|
"unit_count": r["unit_count"],
|
||||||
"entry_count": r[2],
|
"entry_count": r["entry_count"],
|
||||||
"earliest": r[3],
|
"earliest": r["earliest"],
|
||||||
"latest": r[4],
|
"latest": r["latest"],
|
||||||
"error_count": r[5],
|
"error_count": r["error_count"],
|
||||||
}
|
}
|
||||||
for r in rows
|
for r in rows
|
||||||
]
|
]
|
||||||
|
|
@ -498,47 +603,80 @@ def stats_summary(db_path: Path, window_hours: int = 24, severity_overrides: lis
|
||||||
Queries plain log_entries (not FTS) so it works even before the index is built.
|
Queries plain log_entries (not FTS) so it works even before the index is built.
|
||||||
"""
|
"""
|
||||||
rules = _compile_overrides(severity_overrides or [])
|
rules = _compile_overrides(severity_overrides or [])
|
||||||
|
tid = resolve_tenant_id()
|
||||||
|
group_expr = frag.source_group_expr("source_id")
|
||||||
|
since_iso = (
|
||||||
|
datetime.now(timezone.utc) - timedelta(hours=window_hours)
|
||||||
|
).strftime("%Y-%m-%dT%H:%M:%S")
|
||||||
|
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
with get_conn(db_path) as conn:
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
row = conn.execute(
|
||||||
conn.row_factory = sqlite3.Row
|
"""
|
||||||
|
SELECT
|
||||||
|
COUNT(*) AS total,
|
||||||
|
SUM(CASE WHEN severity = 'CRITICAL' THEN 1 ELSE 0 END) AS criticals,
|
||||||
|
SUM(CASE WHEN severity IN ('ERROR','CRITICAL','EMERGENCY','ALERT') THEN 1 ELSE 0 END) AS errors
|
||||||
|
FROM log_entries
|
||||||
|
WHERE timestamp_iso >= ?
|
||||||
|
AND repeat_count = 1
|
||||||
|
AND (tenant_id = ? OR tenant_id = '')
|
||||||
|
""",
|
||||||
|
(since_iso, tid),
|
||||||
|
).fetchone()
|
||||||
|
total_24h = int(row["total"] or 0)
|
||||||
|
criticals_24h = int(row["criticals"] or 0)
|
||||||
|
errors_24h = int(row["errors"] or 0)
|
||||||
|
|
||||||
since_expr = f"strftime('%Y-%m-%dT%H:%M:%S', 'now', '-{window_hours} hours')"
|
source_rows = conn.execute(
|
||||||
|
f"""
|
||||||
|
SELECT
|
||||||
|
{group_expr} AS group_id,
|
||||||
|
COUNT(*) AS entry_count,
|
||||||
|
SUM(CASE WHEN severity IN ('ERROR','CRITICAL','EMERGENCY','ALERT') THEN 1 ELSE 0 END) AS error_count,
|
||||||
|
MAX(timestamp_iso) AS latest
|
||||||
|
FROM log_entries
|
||||||
|
WHERE timestamp_iso >= ?
|
||||||
|
AND repeat_count = 1
|
||||||
|
AND (tenant_id = ? OR tenant_id = '')
|
||||||
|
GROUP BY group_id
|
||||||
|
ORDER BY error_count DESC, entry_count DESC
|
||||||
|
""",
|
||||||
|
(since_iso, tid),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
# Overall counts in window
|
crit_rows = conn.execute(
|
||||||
row = conn.execute(f"""
|
"""
|
||||||
SELECT
|
SELECT id as entry_id, source_id, timestamp_iso, severity, text
|
||||||
COUNT(*) AS total,
|
FROM log_entries
|
||||||
SUM(CASE WHEN severity = 'CRITICAL' THEN 1 ELSE 0 END) AS criticals,
|
WHERE severity = 'CRITICAL'
|
||||||
SUM(CASE WHEN severity IN ('ERROR','CRITICAL','EMERGENCY','ALERT') THEN 1 ELSE 0 END) AS errors
|
AND repeat_count = 1
|
||||||
FROM log_entries
|
AND (tenant_id = ? OR tenant_id = '')
|
||||||
WHERE timestamp_iso >= {since_expr}
|
ORDER BY timestamp_iso DESC
|
||||||
AND repeat_count = 1
|
LIMIT 25
|
||||||
""").fetchone()
|
""",
|
||||||
total_24h = int(row["total"] or 0)
|
(tid,),
|
||||||
criticals_24h = int(row["criticals"] or 0)
|
).fetchall()
|
||||||
errors_24h = int(row["errors"] or 0)
|
|
||||||
|
timeline_rows = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT id as entry_id, source_id, timestamp_iso, severity, text
|
||||||
|
FROM log_entries
|
||||||
|
WHERE severity IN ('CRITICAL','ERROR','WARN','WARNING','EMERGENCY','ALERT')
|
||||||
|
AND timestamp_iso >= ?
|
||||||
|
AND timestamp_iso IS NOT NULL
|
||||||
|
AND repeat_count = 1
|
||||||
|
AND (tenant_id = ? OR tenant_id = '')
|
||||||
|
ORDER BY timestamp_iso DESC
|
||||||
|
LIMIT 300
|
||||||
|
""",
|
||||||
|
(since_iso, tid),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
last_row = conn.execute(
|
||||||
|
"SELECT MAX(ingest_time) AS t FROM log_entries WHERE (tenant_id = ? OR tenant_id = '')",
|
||||||
|
(tid,),
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
# Per-source breakdown — grouped by prefix:host stem (same logic as list_sources).
|
|
||||||
source_rows = conn.execute(f"""
|
|
||||||
SELECT
|
|
||||||
CASE
|
|
||||||
WHEN INSTR(SUBSTR(source_id, INSTR(source_id, ':')+1), ':') > 0
|
|
||||||
THEN SUBSTR(source_id, 1,
|
|
||||||
INSTR(source_id, ':')
|
|
||||||
+ INSTR(SUBSTR(source_id, INSTR(source_id, ':')+1), ':')
|
|
||||||
- 1)
|
|
||||||
ELSE source_id
|
|
||||||
END AS group_id,
|
|
||||||
COUNT(*) AS entry_count,
|
|
||||||
SUM(CASE WHEN severity IN ('ERROR','CRITICAL','EMERGENCY','ALERT') THEN 1 ELSE 0 END) AS error_count,
|
|
||||||
MAX(timestamp_iso) AS latest
|
|
||||||
FROM log_entries
|
|
||||||
WHERE timestamp_iso >= {since_expr}
|
|
||||||
AND repeat_count = 1
|
|
||||||
GROUP BY group_id
|
|
||||||
ORDER BY error_count DESC, entry_count DESC
|
|
||||||
""").fetchall()
|
|
||||||
source_health = [
|
source_health = [
|
||||||
{
|
{
|
||||||
"source_id": r["group_id"],
|
"source_id": r["group_id"],
|
||||||
|
|
@ -549,16 +687,6 @@ def stats_summary(db_path: Path, window_hours: int = 24, severity_overrides: lis
|
||||||
for r in source_rows
|
for r in source_rows
|
||||||
]
|
]
|
||||||
|
|
||||||
# Fetch candidate criticals (fetch more so filtering doesn't leave us with too few)
|
|
||||||
crit_rows = conn.execute("""
|
|
||||||
SELECT id as entry_id, source_id, timestamp_iso, severity, text
|
|
||||||
FROM log_entries
|
|
||||||
WHERE severity = 'CRITICAL' AND repeat_count = 1
|
|
||||||
ORDER BY timestamp_iso DESC
|
|
||||||
LIMIT 25
|
|
||||||
""").fetchall()
|
|
||||||
|
|
||||||
# Apply overrides: skip entries whose effective severity is no longer CRITICAL
|
|
||||||
suppressed = 0
|
suppressed = 0
|
||||||
recent_criticals = []
|
recent_criticals = []
|
||||||
for r in crit_rows:
|
for r in crit_rows:
|
||||||
|
|
@ -576,10 +704,18 @@ def stats_summary(db_path: Path, window_hours: int = 24, severity_overrides: lis
|
||||||
else:
|
else:
|
||||||
suppressed += 1
|
suppressed += 1
|
||||||
|
|
||||||
last_row = conn.execute("SELECT MAX(ingest_time) AS t FROM log_entries").fetchone()
|
timeline_events = [
|
||||||
last_gleaned: str | None = last_row["t"] if last_row else None
|
{
|
||||||
|
"entry_id": r["entry_id"],
|
||||||
|
"source_id": r["source_id"],
|
||||||
|
"timestamp_iso": r["timestamp_iso"],
|
||||||
|
"severity": r["severity"],
|
||||||
|
"text": r["text"],
|
||||||
|
}
|
||||||
|
for r in timeline_rows
|
||||||
|
]
|
||||||
|
|
||||||
conn.close()
|
last_gleaned: str | None = last_row["t"] if last_row else None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"window_hours": window_hours,
|
"window_hours": window_hours,
|
||||||
|
|
@ -590,6 +726,7 @@ def stats_summary(db_path: Path, window_hours: int = 24, severity_overrides: lis
|
||||||
"recent_criticals": recent_criticals,
|
"recent_criticals": recent_criticals,
|
||||||
"suppressed_criticals": suppressed,
|
"suppressed_criticals": suppressed,
|
||||||
"last_gleaned": last_gleaned,
|
"last_gleaned": last_gleaned,
|
||||||
|
"timeline_events": timeline_events,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
265
app/services/ssh_targets.py
Normal file
265
app/services/ssh_targets.py
Normal file
|
|
@ -0,0 +1,265 @@
|
||||||
|
"""SSH target registry — persisted in the main SQLite DB.
|
||||||
|
|
||||||
|
Targets are stored as path references only. The private key is never
|
||||||
|
read into the database, logged, or returned by any API response.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import stat
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SshTarget:
|
||||||
|
id: str
|
||||||
|
label: str
|
||||||
|
host: str
|
||||||
|
port: int
|
||||||
|
user: str
|
||||||
|
key_path: str
|
||||||
|
last_tested: str | None
|
||||||
|
last_ok: bool | None
|
||||||
|
last_error: str | None
|
||||||
|
created_at: str
|
||||||
|
updated_at: str
|
||||||
|
|
||||||
|
|
||||||
|
def _row_to_target(row: tuple) -> SshTarget:
|
||||||
|
return SshTarget(
|
||||||
|
id=row[0],
|
||||||
|
label=row[1],
|
||||||
|
host=row[2],
|
||||||
|
port=row[3],
|
||||||
|
user=row[4],
|
||||||
|
key_path=row[5],
|
||||||
|
last_tested=row[6],
|
||||||
|
last_ok=bool(row[7]) if row[7] is not None else None,
|
||||||
|
last_error=row[8],
|
||||||
|
created_at=row[9],
|
||||||
|
updated_at=row[10],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _now() -> str:
|
||||||
|
return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CRUD
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def list_targets(db_path: Path) -> list[SshTarget]:
|
||||||
|
conn = sqlite3.connect(str(db_path), timeout=10)
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT id, label, host, port, user, key_path, last_tested, last_ok, last_error, created_at, updated_at "
|
||||||
|
"FROM ssh_targets ORDER BY label"
|
||||||
|
).fetchall()
|
||||||
|
conn.close()
|
||||||
|
return [_row_to_target(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
def get_target(db_path: Path, target_id: str) -> SshTarget | None:
|
||||||
|
conn = sqlite3.connect(str(db_path), timeout=10)
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT id, label, host, port, user, key_path, last_tested, last_ok, last_error, created_at, updated_at "
|
||||||
|
"FROM ssh_targets WHERE id = ?",
|
||||||
|
(target_id,),
|
||||||
|
).fetchone()
|
||||||
|
conn.close()
|
||||||
|
return _row_to_target(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
|
def create_target(
|
||||||
|
db_path: Path,
|
||||||
|
label: str,
|
||||||
|
host: str,
|
||||||
|
port: int,
|
||||||
|
user: str,
|
||||||
|
key_path: str,
|
||||||
|
) -> SshTarget:
|
||||||
|
resolved = _validate_key_path(key_path)
|
||||||
|
now = _now()
|
||||||
|
target_id = str(uuid.uuid4())
|
||||||
|
conn = sqlite3.connect(str(db_path), timeout=10)
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO ssh_targets (id, label, host, port, user, key_path, created_at, updated_at) "
|
||||||
|
"VALUES (?,?,?,?,?,?,?,?)",
|
||||||
|
(target_id, label, host, port, user, str(resolved), now, now),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
return get_target(db_path, target_id) # type: ignore[return-value]
|
||||||
|
|
||||||
|
|
||||||
|
def update_target(
|
||||||
|
db_path: Path,
|
||||||
|
target_id: str,
|
||||||
|
*,
|
||||||
|
label: str | None = None,
|
||||||
|
host: str | None = None,
|
||||||
|
port: int | None = None,
|
||||||
|
user: str | None = None,
|
||||||
|
key_path: str | None = None,
|
||||||
|
) -> SshTarget | None:
|
||||||
|
existing = get_target(db_path, target_id)
|
||||||
|
if existing is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
resolved_key = str(_validate_key_path(key_path)) if key_path else existing.key_path
|
||||||
|
conn = sqlite3.connect(str(db_path), timeout=10)
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE ssh_targets SET label=?, host=?, port=?, user=?, key_path=?, updated_at=? WHERE id=?",
|
||||||
|
(
|
||||||
|
label if label is not None else existing.label,
|
||||||
|
host if host is not None else existing.host,
|
||||||
|
port if port is not None else existing.port,
|
||||||
|
user if user is not None else existing.user,
|
||||||
|
resolved_key,
|
||||||
|
_now(),
|
||||||
|
target_id,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
return get_target(db_path, target_id)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_target(db_path: Path, target_id: str) -> bool:
|
||||||
|
conn = sqlite3.connect(str(db_path), timeout=10)
|
||||||
|
cur = conn.execute("DELETE FROM ssh_targets WHERE id = ?", (target_id,))
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
return cur.rowcount > 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test connection
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_connection(db_path: Path, target_id: str) -> dict[str, Any]:
|
||||||
|
"""Attempt an SSH no-op and record the result.
|
||||||
|
|
||||||
|
Runs `true` on the remote host — no data is pulled. Returns
|
||||||
|
{ok: bool, error: str|null, tested_at: str}.
|
||||||
|
"""
|
||||||
|
target = get_target(db_path, target_id)
|
||||||
|
if target is None:
|
||||||
|
raise KeyError(f"SSH target {target_id!r} not found")
|
||||||
|
|
||||||
|
# Lazy import — paramiko is optional
|
||||||
|
try:
|
||||||
|
from paramiko import SSHClient, AutoAddPolicy, AuthenticationException, SSHException
|
||||||
|
except ImportError:
|
||||||
|
_record_test(db_path, target_id, ok=False, error="paramiko not installed")
|
||||||
|
return {"ok": False, "error": "paramiko not installed — run: pip install paramiko", "tested_at": _now()}
|
||||||
|
|
||||||
|
key_path = str(Path(target.key_path).expanduser())
|
||||||
|
error: str | None = None
|
||||||
|
ok = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
client = SSHClient()
|
||||||
|
client.set_missing_host_key_policy(AutoAddPolicy())
|
||||||
|
client.connect(
|
||||||
|
hostname=target.host,
|
||||||
|
port=target.port,
|
||||||
|
username=target.user,
|
||||||
|
key_filename=key_path,
|
||||||
|
timeout=10,
|
||||||
|
banner_timeout=10,
|
||||||
|
)
|
||||||
|
stdin, stdout, stderr = client.exec_command("true", timeout=10)
|
||||||
|
exit_code = stdout.channel.recv_exit_status()
|
||||||
|
client.close()
|
||||||
|
ok = exit_code == 0
|
||||||
|
if not ok:
|
||||||
|
error = f"Remote command exited with code {exit_code}"
|
||||||
|
except AuthenticationException:
|
||||||
|
error = f"Authentication failed — check key path and remote authorized_keys"
|
||||||
|
except SSHException as exc:
|
||||||
|
error = f"SSH error: {exc}"
|
||||||
|
except OSError as exc:
|
||||||
|
error = f"Connection failed: {exc}"
|
||||||
|
except Exception as exc:
|
||||||
|
error = f"Unexpected error: {exc}"
|
||||||
|
|
||||||
|
tested_at = _now()
|
||||||
|
_record_test(db_path, target_id, ok=ok, error=error, tested_at=tested_at)
|
||||||
|
return {"ok": ok, "error": error, "tested_at": tested_at}
|
||||||
|
|
||||||
|
|
||||||
|
def _record_test(
|
||||||
|
db_path: Path,
|
||||||
|
target_id: str,
|
||||||
|
*,
|
||||||
|
ok: bool,
|
||||||
|
error: str | None,
|
||||||
|
tested_at: str | None = None,
|
||||||
|
) -> None:
|
||||||
|
if tested_at is None:
|
||||||
|
tested_at = _now()
|
||||||
|
conn = sqlite3.connect(str(db_path), timeout=10)
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE ssh_targets SET last_tested=?, last_ok=?, last_error=?, updated_at=? WHERE id=?",
|
||||||
|
(tested_at, 1 if ok else 0, error, _now(), target_id),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Validation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _validate_key_path(raw: str) -> Path:
|
||||||
|
"""Resolve and validate the SSH key path.
|
||||||
|
|
||||||
|
Returns the resolved Path. Raises ValueError with a user-readable message
|
||||||
|
on any problem (does not raise on world-readable — just returns a warning
|
||||||
|
to the caller so the UI can display it non-blocking).
|
||||||
|
"""
|
||||||
|
p = Path(raw).expanduser()
|
||||||
|
if not p.exists():
|
||||||
|
raise ValueError(f"Key file not found: {p}")
|
||||||
|
if not p.is_file():
|
||||||
|
raise ValueError(f"Key path is not a file: {p}")
|
||||||
|
return p
|
||||||
|
|
||||||
|
|
||||||
|
def key_path_warning(key_path: str) -> str | None:
|
||||||
|
"""Return a warning string if the key file has overly permissive mode, else None."""
|
||||||
|
try:
|
||||||
|
p = Path(key_path).expanduser()
|
||||||
|
mode = p.stat().st_mode
|
||||||
|
if mode & (stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH | stat.S_IWOTH):
|
||||||
|
perms = oct(mode & 0o777)
|
||||||
|
return f"Key file permissions are too open ({perms}). SSH may refuse to use it — run: chmod 600 {p}"
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def target_to_dict(t: SshTarget, include_warning: bool = False) -> dict[str, Any]:
|
||||||
|
"""Serialize a target for API responses. Never includes key contents."""
|
||||||
|
d: dict[str, Any] = {
|
||||||
|
"id": t.id,
|
||||||
|
"label": t.label,
|
||||||
|
"host": t.host,
|
||||||
|
"port": t.port,
|
||||||
|
"user": t.user,
|
||||||
|
"key_path": t.key_path,
|
||||||
|
"last_tested": t.last_tested,
|
||||||
|
"last_ok": t.last_ok,
|
||||||
|
"last_error": t.last_error,
|
||||||
|
"created_at": t.created_at,
|
||||||
|
"updated_at": t.updated_at,
|
||||||
|
}
|
||||||
|
if include_warning:
|
||||||
|
d["key_warning"] = key_path_warning(t.key_path)
|
||||||
|
return d
|
||||||
213
app/services/ticket_export.py
Normal file
213
app/services/ticket_export.py
Normal file
|
|
@ -0,0 +1,213 @@
|
||||||
|
"""Incident ticket export — push Turnstone incidents to external trackers.
|
||||||
|
|
||||||
|
Supported targets: "notion", "jira"
|
||||||
|
|
||||||
|
Each exporter receives the incident dict and a list of log entry dicts,
|
||||||
|
and returns {"url": str, "ticket_id": str}.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Notion exporter
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _notion_export(
|
||||||
|
incident: dict[str, Any],
|
||||||
|
entries: list[dict[str, Any]],
|
||||||
|
token: str,
|
||||||
|
database_id: str,
|
||||||
|
) -> dict[str, str]:
|
||||||
|
"""Create a Notion page in *database_id* from an incident.
|
||||||
|
|
||||||
|
Notion block types used: heading_2, bulleted_list_item, paragraph.
|
||||||
|
Rich text max length is 2000 chars per block.
|
||||||
|
"""
|
||||||
|
if not token or not database_id:
|
||||||
|
raise ValueError("Notion not configured — set notion_token and notion_database_id in Settings")
|
||||||
|
|
||||||
|
def _text(s: str, bold: bool = False) -> dict:
|
||||||
|
chunk: dict[str, Any] = {"type": "text", "text": {"content": s[:2000]}}
|
||||||
|
if bold:
|
||||||
|
chunk["annotations"] = {"bold": True}
|
||||||
|
return chunk
|
||||||
|
|
||||||
|
log_blocks: list[dict] = []
|
||||||
|
for e in entries[:50]: # Notion has page size limits
|
||||||
|
line = f"[{e.get('severity') or '?'}] {e.get('source_id', '')} — {e.get('text', '')[:160]}"
|
||||||
|
log_blocks.append({"object": "block", "type": "bulleted_list_item",
|
||||||
|
"bulleted_list_item": {"rich_text": [_text(line)]}})
|
||||||
|
|
||||||
|
sev = incident.get("severity", "medium").upper()
|
||||||
|
issue_type = incident.get("issue_type") or "—"
|
||||||
|
window = f"{incident.get('started_at') or '?'} → {incident.get('ended_at') or 'ongoing'}"
|
||||||
|
|
||||||
|
children: list[dict] = [
|
||||||
|
{"object": "block", "type": "heading_2",
|
||||||
|
"heading_2": {"rich_text": [_text("Incident Details", bold=True)]}},
|
||||||
|
{"object": "block", "type": "paragraph",
|
||||||
|
"paragraph": {"rich_text": [
|
||||||
|
_text("Severity: ", bold=True), _text(sev),
|
||||||
|
_text(" Type: ", bold=True), _text(issue_type),
|
||||||
|
_text(" Window: ", bold=True), _text(window),
|
||||||
|
]}},
|
||||||
|
]
|
||||||
|
if incident.get("notes"):
|
||||||
|
children.append({"object": "block", "type": "paragraph",
|
||||||
|
"paragraph": {"rich_text": [_text("Notes: ", bold=True), _text(incident["notes"])]}})
|
||||||
|
|
||||||
|
children.append({"object": "block", "type": "heading_2",
|
||||||
|
"heading_2": {"rich_text": [_text("Log Evidence")]}})
|
||||||
|
children.extend(log_blocks)
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"parent": {"database_id": database_id},
|
||||||
|
"properties": {
|
||||||
|
"title": {"title": [_text(incident.get("label", "Unnamed Incident"))]},
|
||||||
|
},
|
||||||
|
"children": children,
|
||||||
|
}
|
||||||
|
|
||||||
|
resp = httpx.post(
|
||||||
|
"https://api.notion.com/v1/pages",
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {token}",
|
||||||
|
"Notion-Version": "2022-06-28",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
json=payload,
|
||||||
|
timeout=15,
|
||||||
|
)
|
||||||
|
if not resp.is_success:
|
||||||
|
raise RuntimeError(f"Notion API error {resp.status_code}: {resp.text[:300]}")
|
||||||
|
|
||||||
|
page = resp.json()
|
||||||
|
page_id = page["id"]
|
||||||
|
url = page.get("url") or f"https://notion.so/{page_id.replace('-', '')}"
|
||||||
|
return {"url": url, "ticket_id": page_id}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Jira exporter
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _jira_export(
|
||||||
|
incident: dict[str, Any],
|
||||||
|
entries: list[dict[str, Any]],
|
||||||
|
jira_url: str,
|
||||||
|
email: str,
|
||||||
|
api_token: str,
|
||||||
|
project_key: str,
|
||||||
|
issue_type: str = "Bug",
|
||||||
|
) -> dict[str, str]:
|
||||||
|
"""Create a Jira issue via REST API v3 (cloud or Server 8.4+)."""
|
||||||
|
if not jira_url or not email or not api_token or not project_key:
|
||||||
|
raise ValueError("Jira not configured — set jira_url, jira_email, jira_api_token, and jira_project_key in Settings")
|
||||||
|
|
||||||
|
base = jira_url.rstrip("/")
|
||||||
|
sev = incident.get("severity", "medium").upper()
|
||||||
|
inc_type = incident.get("issue_type") or "incident"
|
||||||
|
window = f"{incident.get('started_at') or '?'} → {incident.get('ended_at') or 'ongoing'}"
|
||||||
|
|
||||||
|
log_lines = "\n".join(
|
||||||
|
f"[{e.get('severity') or '?'}] {e.get('source_id', '')} — {e.get('text', '')[:160]}"
|
||||||
|
for e in entries[:40]
|
||||||
|
)
|
||||||
|
description = (
|
||||||
|
f"*Severity:* {sev} | *Type:* {inc_type} | *Window:* {window}\n\n"
|
||||||
|
+ (f"*Notes:* {incident['notes']}\n\n" if incident.get("notes") else "")
|
||||||
|
+ "h2. Log Evidence\n\n{{code}}\n" + log_lines + "\n{{code}}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Jira REST v3 uses Atlassian Document Format for description
|
||||||
|
adf_body = {
|
||||||
|
"type": "doc",
|
||||||
|
"version": 1,
|
||||||
|
"content": [
|
||||||
|
{"type": "paragraph", "content": [{"type": "text", "text": description}]},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"fields": {
|
||||||
|
"project": {"key": project_key},
|
||||||
|
"summary": incident.get("label", "Unnamed Incident"),
|
||||||
|
"issuetype": {"name": issue_type},
|
||||||
|
"description": adf_body,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
import base64 as _b64
|
||||||
|
creds = _b64.b64encode(f"{email}:{api_token}".encode()).decode()
|
||||||
|
resp = httpx.post(
|
||||||
|
f"{base}/rest/api/3/issue",
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Basic {creds}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Accept": "application/json",
|
||||||
|
},
|
||||||
|
json=payload,
|
||||||
|
timeout=15,
|
||||||
|
)
|
||||||
|
if not resp.is_success:
|
||||||
|
raise RuntimeError(f"Jira API error {resp.status_code}: {resp.text[:300]}")
|
||||||
|
|
||||||
|
data = resp.json()
|
||||||
|
issue_key = data["key"]
|
||||||
|
url = f"{base}/browse/{issue_key}"
|
||||||
|
return {"url": url, "ticket_id": issue_key}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Public API
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_EXPORTERS = {
|
||||||
|
"notion": _notion_export,
|
||||||
|
"jira": _jira_export,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def available_targets() -> list[str]:
|
||||||
|
return list(_EXPORTERS.keys())
|
||||||
|
|
||||||
|
|
||||||
|
def export_incident(
|
||||||
|
target: str,
|
||||||
|
incident: dict[str, Any],
|
||||||
|
entries: list[dict[str, Any]],
|
||||||
|
config: dict[str, str],
|
||||||
|
) -> dict[str, str]:
|
||||||
|
"""Dispatch to the appropriate exporter.
|
||||||
|
|
||||||
|
*config* is pulled from the settings pref dict — callers pass the relevant
|
||||||
|
subset so this service stays stateless and testable.
|
||||||
|
|
||||||
|
Returns {"url": str, "ticket_id": str}.
|
||||||
|
Raises ValueError for unknown target or missing config.
|
||||||
|
Raises RuntimeError on API-level failures.
|
||||||
|
"""
|
||||||
|
if target not in _EXPORTERS:
|
||||||
|
raise ValueError(f"Unknown ticket target: {target!r}. Supported: {list(_EXPORTERS)}")
|
||||||
|
|
||||||
|
if target == "notion":
|
||||||
|
return _notion_export(
|
||||||
|
incident, entries,
|
||||||
|
token=config.get("notion_token", ""),
|
||||||
|
database_id=config.get("notion_database_id", ""),
|
||||||
|
)
|
||||||
|
if target == "jira":
|
||||||
|
return _jira_export(
|
||||||
|
incident, entries,
|
||||||
|
jira_url=config.get("jira_url", ""),
|
||||||
|
email=config.get("jira_email", ""),
|
||||||
|
api_token=config.get("jira_api_token", ""),
|
||||||
|
project_key=config.get("jira_project_key", ""),
|
||||||
|
issue_type=config.get("jira_issue_type", "Bug"),
|
||||||
|
)
|
||||||
|
raise ValueError(f"Unhandled target: {target!r}")
|
||||||
114
app/tasks/anomaly_scorer.py
Normal file
114
app/tasks/anomaly_scorer.py
Normal file
|
|
@ -0,0 +1,114 @@
|
||||||
|
"""Background anomaly scoring task.
|
||||||
|
|
||||||
|
Runs score_unscored() after each glean cycle (triggered by glean_scheduler)
|
||||||
|
or on its own interval when TURNSTONE_ANOMALY_INTERVAL is set.
|
||||||
|
|
||||||
|
Set TURNSTONE_ANOMALY_MODEL to a HuggingFace model ID to activate.
|
||||||
|
When the env var is empty (default) the scorer is a no-op.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.services.anomaly import ScoringResult, score_unscored
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_DEFAULT_INTERVAL = int(os.environ.get("TURNSTONE_ANOMALY_INTERVAL", "0"))
|
||||||
|
|
||||||
|
_lock = asyncio.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ScorerState:
|
||||||
|
last_run_at: str | None = None
|
||||||
|
last_duration_s: float | None = None
|
||||||
|
last_scored: int = 0
|
||||||
|
last_detections: int = 0
|
||||||
|
last_error: str | None = None
|
||||||
|
run_count: int = 0
|
||||||
|
next_run_at: str | None = None
|
||||||
|
running: bool = False
|
||||||
|
total_scored: int = 0
|
||||||
|
total_detections: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
_state = ScorerState()
|
||||||
|
|
||||||
|
|
||||||
|
def get_state() -> ScorerState:
|
||||||
|
return _state
|
||||||
|
|
||||||
|
|
||||||
|
async def run_once(
|
||||||
|
db_path: Path,
|
||||||
|
model_id: str = "",
|
||||||
|
device: str = "cpu",
|
||||||
|
batch_size: int = 256,
|
||||||
|
threshold: float = 0.75,
|
||||||
|
) -> ScoringResult:
|
||||||
|
"""Score unscored entries once. Skips if already running or model not configured."""
|
||||||
|
if _lock.locked():
|
||||||
|
return ScoringResult(skipped=True, error="scorer already running")
|
||||||
|
|
||||||
|
async with _lock:
|
||||||
|
_state.running = True
|
||||||
|
started = datetime.now(tz=timezone.utc)
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
result: ScoringResult = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
lambda: score_unscored(db_path, model_id, device, batch_size, threshold),
|
||||||
|
)
|
||||||
|
duration = (datetime.now(tz=timezone.utc) - started).total_seconds()
|
||||||
|
_state.last_run_at = started.isoformat()
|
||||||
|
_state.last_duration_s = round(duration, 2)
|
||||||
|
_state.last_scored = result.scored
|
||||||
|
_state.last_detections = result.detections
|
||||||
|
_state.last_error = result.error
|
||||||
|
_state.run_count += 1
|
||||||
|
_state.total_scored += result.scored
|
||||||
|
_state.total_detections += result.detections
|
||||||
|
if not result.skipped:
|
||||||
|
logger.info(
|
||||||
|
"Anomaly scorer: %d scored, %d detections in %.1fs",
|
||||||
|
result.scored, result.detections, duration,
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
except Exception as exc:
|
||||||
|
duration = (datetime.now(tz=timezone.utc) - started).total_seconds()
|
||||||
|
_state.last_run_at = started.isoformat()
|
||||||
|
_state.last_duration_s = round(duration, 2)
|
||||||
|
_state.last_error = str(exc)
|
||||||
|
_state.run_count += 1
|
||||||
|
logger.error("Anomaly scorer failed: %s", exc)
|
||||||
|
return ScoringResult(error=str(exc))
|
||||||
|
finally:
|
||||||
|
_state.running = False
|
||||||
|
|
||||||
|
|
||||||
|
async def scorer_loop(
|
||||||
|
db_path: Path,
|
||||||
|
model_id: str,
|
||||||
|
device: str,
|
||||||
|
interval_s: int,
|
||||||
|
batch_size: int = 256,
|
||||||
|
threshold: float = 0.75,
|
||||||
|
) -> None:
|
||||||
|
"""Score unscored entries every interval_s seconds until cancelled."""
|
||||||
|
logger.info("Anomaly scorer loop started — interval %ds, model: %s", interval_s, model_id)
|
||||||
|
while True:
|
||||||
|
await run_once(db_path, model_id, device, batch_size, threshold)
|
||||||
|
next_run = datetime.now(tz=timezone.utc) + timedelta(seconds=interval_s)
|
||||||
|
_state.next_run_at = next_run.isoformat()
|
||||||
|
try:
|
||||||
|
await asyncio.sleep(interval_s)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
logger.info("Anomaly scorer loop cancelled")
|
||||||
|
_state.next_run_at = None
|
||||||
|
raise
|
||||||
84
app/tasks/cybersec_scorer.py
Normal file
84
app/tasks/cybersec_scorer.py
Normal file
|
|
@ -0,0 +1,84 @@
|
||||||
|
"""Background task wrapper for the cybersec zero-shot scoring pipeline."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.services.cybersec import score_security_entries
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_lock = asyncio.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CybersecState:
|
||||||
|
last_run_at: str | None = None
|
||||||
|
last_duration_s: float | None = None
|
||||||
|
last_scored: int = 0
|
||||||
|
last_detections: int = 0
|
||||||
|
last_error: str | None = None
|
||||||
|
run_count: int = 0
|
||||||
|
running: bool = False
|
||||||
|
total_scored: int = 0
|
||||||
|
total_detections: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
_state = CybersecState()
|
||||||
|
|
||||||
|
|
||||||
|
def get_state() -> dict:
|
||||||
|
return {
|
||||||
|
"last_run_at": _state.last_run_at,
|
||||||
|
"last_duration_s":_state.last_duration_s,
|
||||||
|
"last_scored": _state.last_scored,
|
||||||
|
"last_detections":_state.last_detections,
|
||||||
|
"last_error": _state.last_error,
|
||||||
|
"run_count": _state.run_count,
|
||||||
|
"running": _state.running,
|
||||||
|
"total_scored": _state.total_scored,
|
||||||
|
"total_detections": _state.total_detections,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def run_once(
|
||||||
|
db_path: Path,
|
||||||
|
model_id: str,
|
||||||
|
device: str = "cpu",
|
||||||
|
batch_size: int = 32,
|
||||||
|
threshold: float = 0.60,
|
||||||
|
) -> None:
|
||||||
|
"""Single cybersec scoring pass — no-op if already running or no model set."""
|
||||||
|
if not model_id or _lock.locked():
|
||||||
|
return
|
||||||
|
|
||||||
|
async with _lock:
|
||||||
|
_state.running = True
|
||||||
|
started = datetime.now(tz=timezone.utc)
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
result = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
lambda: score_security_entries(db_path, model_id, device, batch_size, threshold),
|
||||||
|
)
|
||||||
|
elapsed = (datetime.now(tz=timezone.utc) - started).total_seconds()
|
||||||
|
_state.last_run_at = started.isoformat()
|
||||||
|
_state.last_duration_s = elapsed
|
||||||
|
_state.last_scored = result.scored
|
||||||
|
_state.last_detections = result.detections
|
||||||
|
_state.last_error = result.error
|
||||||
|
_state.run_count += 1
|
||||||
|
_state.total_scored += result.scored
|
||||||
|
_state.total_detections += result.detections
|
||||||
|
if result.error:
|
||||||
|
logger.error("cybersec scorer error: %s", result.error)
|
||||||
|
elif not result.skipped:
|
||||||
|
logger.info(
|
||||||
|
"cybersec scorer: scored=%d detections=%d in %.1fs",
|
||||||
|
result.scored, result.detections, elapsed,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
_state.running = False
|
||||||
|
|
@ -11,7 +11,7 @@ from __future__ import annotations
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import sqlite3
|
from app.db import get_conn, resolve_tenant_id
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
@ -20,6 +20,9 @@ from typing import Any
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from app.glean.pipeline import glean_sources
|
from app.glean.pipeline import glean_sources
|
||||||
|
from app.tasks.anomaly_scorer import run_once as _run_scorer
|
||||||
|
from app.tasks.cybersec_scorer import run_once as _run_cybersec
|
||||||
|
from app.tasks.incident_detector import run_once as _run_incident_detector
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -49,9 +52,8 @@ def get_state() -> IngestState:
|
||||||
|
|
||||||
def _query_matched_since(db_path: Path, since: str | None) -> list[dict]:
|
def _query_matched_since(db_path: Path, since: str | None) -> list[dict]:
|
||||||
"""Return entries with non-empty matched_patterns, optionally filtered by ingest_time."""
|
"""Return entries with non-empty matched_patterns, optionally filtered by ingest_time."""
|
||||||
conn = sqlite3.connect(str(db_path), timeout=30.0)
|
tid = resolve_tenant_id()
|
||||||
conn.row_factory = sqlite3.Row
|
with get_conn(db_path) as conn:
|
||||||
try:
|
|
||||||
if since:
|
if since:
|
||||||
rows = conn.execute(
|
rows = conn.execute(
|
||||||
"""
|
"""
|
||||||
|
|
@ -59,11 +61,13 @@ def _query_matched_since(db_path: Path, since: str | None) -> list[dict]:
|
||||||
ingest_time, severity, repeat_count, out_of_order,
|
ingest_time, severity, repeat_count, out_of_order,
|
||||||
matched_patterns, text
|
matched_patterns, text
|
||||||
FROM log_entries
|
FROM log_entries
|
||||||
WHERE matched_patterns != '[]' AND ingest_time > ?
|
WHERE matched_patterns != '[]'
|
||||||
|
AND ingest_time > ?
|
||||||
|
AND (tenant_id = ? OR tenant_id = '')
|
||||||
ORDER BY ingest_time
|
ORDER BY ingest_time
|
||||||
LIMIT 5000
|
LIMIT 5000
|
||||||
""",
|
""",
|
||||||
(since,),
|
(since, tid),
|
||||||
).fetchall()
|
).fetchall()
|
||||||
else:
|
else:
|
||||||
rows = conn.execute(
|
rows = conn.execute(
|
||||||
|
|
@ -73,13 +77,13 @@ def _query_matched_since(db_path: Path, since: str | None) -> list[dict]:
|
||||||
matched_patterns, text
|
matched_patterns, text
|
||||||
FROM log_entries
|
FROM log_entries
|
||||||
WHERE matched_patterns != '[]'
|
WHERE matched_patterns != '[]'
|
||||||
|
AND (tenant_id = ? OR tenant_id = '')
|
||||||
ORDER BY ingest_time DESC
|
ORDER BY ingest_time DESC
|
||||||
LIMIT 5000
|
LIMIT 5000
|
||||||
""",
|
""",
|
||||||
|
(tid,),
|
||||||
).fetchall()
|
).fetchall()
|
||||||
return [dict(r) for r in rows]
|
return [dict(r) for r in rows]
|
||||||
finally:
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def submit_matched(
|
async def submit_matched(
|
||||||
|
|
@ -122,6 +126,14 @@ async def run_once(
|
||||||
submit_endpoint: str | None = None,
|
submit_endpoint: str | None = None,
|
||||||
source_host: str = "unknown",
|
source_host: str = "unknown",
|
||||||
force: bool = False,
|
force: bool = False,
|
||||||
|
anomaly_model: str = "",
|
||||||
|
anomaly_device: str = "cpu",
|
||||||
|
anomaly_threshold: float = 0.75,
|
||||||
|
cybersec_model: str = "",
|
||||||
|
cybersec_device: str = "cpu",
|
||||||
|
cybersec_threshold: float = 0.60,
|
||||||
|
incidents_db_path: Path | None = None,
|
||||||
|
auto_incident: bool = True,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Ingest all sources once, then submit matched entries if configured.
|
"""Ingest all sources once, then submit matched entries if configured.
|
||||||
|
|
||||||
|
|
@ -162,6 +174,18 @@ async def run_once(
|
||||||
if submit_endpoint:
|
if submit_endpoint:
|
||||||
await submit_matched(db_path, submit_endpoint, source_host, since=_state.last_submitted_at)
|
await submit_matched(db_path, submit_endpoint, source_host, since=_state.last_submitted_at)
|
||||||
|
|
||||||
|
if anomaly_model:
|
||||||
|
await _run_scorer(db_path, anomaly_model, anomaly_device, threshold=anomaly_threshold)
|
||||||
|
|
||||||
|
if cybersec_model:
|
||||||
|
await _run_cybersec(db_path, cybersec_model, cybersec_device, threshold=cybersec_threshold)
|
||||||
|
|
||||||
|
if auto_incident and incidents_db_path:
|
||||||
|
glean_started_iso = _state.last_run_at
|
||||||
|
result = await _run_incident_detector(db_path, incidents_db_path, since=glean_started_iso)
|
||||||
|
if result["created"]:
|
||||||
|
logger.info("Incident detector: %d incident(s) auto-created", result["created"])
|
||||||
|
|
||||||
return {"ok": True, "stats": _state.last_stats, "duration_s": _state.last_duration_s}
|
return {"ok": True, "stats": _state.last_stats, "duration_s": _state.last_duration_s}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -172,13 +196,37 @@ async def scheduler_loop(
|
||||||
interval_s: int,
|
interval_s: int,
|
||||||
submit_endpoint: str | None = None,
|
submit_endpoint: str | None = None,
|
||||||
source_host: str = "unknown",
|
source_host: str = "unknown",
|
||||||
|
anomaly_model: str = "",
|
||||||
|
anomaly_device: str = "cpu",
|
||||||
|
anomaly_threshold: float = 0.75,
|
||||||
|
cybersec_model: str = "",
|
||||||
|
cybersec_device: str = "cpu",
|
||||||
|
cybersec_threshold: float = 0.60,
|
||||||
|
incidents_db_path: Path | None = None,
|
||||||
|
auto_incident: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Run glean + optional submission every interval_s seconds until cancelled."""
|
"""Run glean + optional submission + optional anomaly/cybersec scoring every interval_s seconds."""
|
||||||
logger.info("Ingest scheduler started — interval %ds, sources: %s", interval_s, sources_file)
|
logger.info("Ingest scheduler started — interval %ds, sources: %s", interval_s, sources_file)
|
||||||
if submit_endpoint:
|
if submit_endpoint:
|
||||||
logger.info("Submission enabled — endpoint: %s", submit_endpoint)
|
logger.info("Submission enabled — endpoint: %s", submit_endpoint)
|
||||||
|
if anomaly_model:
|
||||||
|
logger.info("Anomaly scoring enabled — model: %s", anomaly_model)
|
||||||
|
if cybersec_model:
|
||||||
|
logger.info("Cybersec scoring enabled — model: %s", cybersec_model)
|
||||||
|
if auto_incident and incidents_db_path:
|
||||||
|
logger.info("Auto-incident detection enabled")
|
||||||
while True:
|
while True:
|
||||||
await run_once(sources_file, db_path, pattern_file, submit_endpoint, source_host)
|
await run_once(
|
||||||
|
sources_file, db_path, pattern_file, submit_endpoint, source_host,
|
||||||
|
anomaly_model=anomaly_model,
|
||||||
|
anomaly_device=anomaly_device,
|
||||||
|
anomaly_threshold=anomaly_threshold,
|
||||||
|
cybersec_model=cybersec_model,
|
||||||
|
cybersec_device=cybersec_device,
|
||||||
|
cybersec_threshold=cybersec_threshold,
|
||||||
|
incidents_db_path=incidents_db_path,
|
||||||
|
auto_incident=auto_incident,
|
||||||
|
)
|
||||||
next_run = datetime.now(tz=timezone.utc) + timedelta(seconds=interval_s)
|
next_run = datetime.now(tz=timezone.utc) + timedelta(seconds=interval_s)
|
||||||
_state.next_run_at = next_run.isoformat()
|
_state.next_run_at = next_run.isoformat()
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
188
app/tasks/incident_detector.py
Normal file
188
app/tasks/incident_detector.py
Normal file
|
|
@ -0,0 +1,188 @@
|
||||||
|
"""Post-glean automatic incident detection.
|
||||||
|
|
||||||
|
After each batch glean, scan entries ingested since the last run for
|
||||||
|
ERROR/CRITICAL clusters. If a source produces >= threshold errors within
|
||||||
|
window_s seconds, auto-create an incident unless one already exists for
|
||||||
|
that source in that time window.
|
||||||
|
|
||||||
|
Environment variables (all optional):
|
||||||
|
TURNSTONE_AUTO_INCIDENT_THRESHOLD integer, default 5
|
||||||
|
TURNSTONE_AUTO_INCIDENT_WINDOW seconds, default 600 (10 min)
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from collections import defaultdict
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.db import get_conn, resolve_tenant_id
|
||||||
|
from app.services.incidents import create_incident
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_THRESHOLD = int(os.environ.get("TURNSTONE_AUTO_INCIDENT_THRESHOLD", "5"))
|
||||||
|
_WINDOW_S = int(os.environ.get("TURNSTONE_AUTO_INCIDENT_WINDOW", "600"))
|
||||||
|
|
||||||
|
# Severity rank — used to pick the cluster's worst severity
|
||||||
|
_SEV_RANK = {"CRITICAL": 3, "ERROR": 2, "WARN": 1, "INFO": 0, "DEBUG": 0}
|
||||||
|
|
||||||
|
|
||||||
|
def _query_recent_errors(db_path: Path, since: str | None) -> list[dict]:
|
||||||
|
tid = resolve_tenant_id()
|
||||||
|
with get_conn(db_path) as conn:
|
||||||
|
if since:
|
||||||
|
rows = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT source_id, timestamp_iso, severity
|
||||||
|
FROM log_entries
|
||||||
|
WHERE severity IN ('ERROR', 'CRITICAL')
|
||||||
|
AND ingest_time > ?
|
||||||
|
AND (tenant_id = ? OR tenant_id = '')
|
||||||
|
ORDER BY source_id, timestamp_iso ASC
|
||||||
|
""",
|
||||||
|
(since, tid),
|
||||||
|
).fetchall()
|
||||||
|
else:
|
||||||
|
rows = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT source_id, timestamp_iso, severity
|
||||||
|
FROM log_entries
|
||||||
|
WHERE severity IN ('ERROR', 'CRITICAL')
|
||||||
|
AND (tenant_id = ? OR tenant_id = '')
|
||||||
|
ORDER BY source_id, timestamp_iso ASC
|
||||||
|
LIMIT 10000
|
||||||
|
""",
|
||||||
|
(tid,),
|
||||||
|
).fetchall()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_ts(iso: str | None) -> float | None:
|
||||||
|
"""Parse ISO timestamp to epoch seconds; return None on failure."""
|
||||||
|
if not iso:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
dt = datetime.fromisoformat(iso.replace("Z", "+00:00"))
|
||||||
|
return dt.timestamp()
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _find_clusters(
|
||||||
|
events: list[dict], window_s: int, threshold: int
|
||||||
|
) -> list[tuple[str, str, str]]:
|
||||||
|
"""Return (started_at_iso, ended_at_iso, worst_severity) for each cluster."""
|
||||||
|
# Filter to events with parseable timestamps, sorted ascending
|
||||||
|
timed = []
|
||||||
|
for e in events:
|
||||||
|
t = _parse_ts(e["timestamp_iso"])
|
||||||
|
if t is not None:
|
||||||
|
timed.append((t, e["timestamp_iso"], e["severity"]))
|
||||||
|
timed.sort()
|
||||||
|
|
||||||
|
clusters: list[tuple[str, str, str]] = []
|
||||||
|
i = 0
|
||||||
|
while i < len(timed):
|
||||||
|
j = i
|
||||||
|
while j < len(timed) and timed[j][0] - timed[i][0] <= window_s:
|
||||||
|
j += 1
|
||||||
|
count = j - i
|
||||||
|
if count >= threshold:
|
||||||
|
worst = max((timed[k][2] for k in range(i, j)), key=lambda s: _SEV_RANK.get(s, 0))
|
||||||
|
clusters.append((timed[i][1], timed[j - 1][1], worst))
|
||||||
|
i = j # skip past the cluster to avoid overlap
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
return clusters
|
||||||
|
|
||||||
|
|
||||||
|
def _incident_exists_for_cluster(
|
||||||
|
incidents_db_path: Path, source_id: str, started_at: str, ended_at: str
|
||||||
|
) -> bool:
|
||||||
|
"""Return True if an auto-incident for this source already covers the window."""
|
||||||
|
issue_type = f"auto:{source_id}"
|
||||||
|
start_ts = _parse_ts(started_at)
|
||||||
|
end_ts = _parse_ts(ended_at)
|
||||||
|
if start_ts is None or end_ts is None:
|
||||||
|
return False
|
||||||
|
tid = resolve_tenant_id()
|
||||||
|
with get_conn(incidents_db_path) as conn:
|
||||||
|
rows = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT started_at, ended_at FROM incidents
|
||||||
|
WHERE issue_type = ?
|
||||||
|
AND (tenant_id = ? OR tenant_id = '')
|
||||||
|
""",
|
||||||
|
(issue_type, tid),
|
||||||
|
).fetchall()
|
||||||
|
for row in rows:
|
||||||
|
ex_start = _parse_ts(row["started_at"])
|
||||||
|
ex_end = _parse_ts(row["ended_at"])
|
||||||
|
if ex_start is None or ex_end is None:
|
||||||
|
continue
|
||||||
|
# Overlap check: two intervals [a,b] and [c,d] overlap when a<=d and b>=c
|
||||||
|
if ex_start <= end_ts and ex_end >= start_ts:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def detect_and_create(
|
||||||
|
db_path: Path,
|
||||||
|
incidents_db_path: Path,
|
||||||
|
since: str | None,
|
||||||
|
threshold: int = _THRESHOLD,
|
||||||
|
window_s: int = _WINDOW_S,
|
||||||
|
) -> dict[str, int]:
|
||||||
|
"""Detect error clusters and create incidents. Returns {"created": N}."""
|
||||||
|
entries = _query_recent_errors(db_path, since)
|
||||||
|
if not entries:
|
||||||
|
return {"created": 0}
|
||||||
|
|
||||||
|
by_source: dict[str, list[dict]] = defaultdict(list)
|
||||||
|
for e in entries:
|
||||||
|
by_source[e["source_id"]].append(e)
|
||||||
|
|
||||||
|
created = 0
|
||||||
|
for source_id, events in by_source.items():
|
||||||
|
clusters = _find_clusters(events, window_s, threshold)
|
||||||
|
for started_at, ended_at, worst_sev in clusters:
|
||||||
|
if _incident_exists_for_cluster(incidents_db_path, source_id, started_at, ended_at):
|
||||||
|
continue
|
||||||
|
n = len(events) # event count for this source in the glean window
|
||||||
|
sev_label = "critical" if worst_sev == "CRITICAL" else "high"
|
||||||
|
create_incident(
|
||||||
|
incidents_db_path,
|
||||||
|
label=f"Auto: {source_id} — {n} errors",
|
||||||
|
issue_type=f"auto:{source_id}",
|
||||||
|
started_at=started_at,
|
||||||
|
ended_at=ended_at,
|
||||||
|
notes="Auto-detected error cluster. Review and label as needed.",
|
||||||
|
severity=sev_label,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Auto-incident created: source=%s window=[%s, %s] severity=%s",
|
||||||
|
source_id, started_at, ended_at, sev_label,
|
||||||
|
)
|
||||||
|
created += 1
|
||||||
|
|
||||||
|
if created:
|
||||||
|
logger.info("Incident detector: %d new incident(s) created", created)
|
||||||
|
return {"created": created}
|
||||||
|
|
||||||
|
|
||||||
|
async def run_once(
|
||||||
|
db_path: Path,
|
||||||
|
incidents_db_path: Path,
|
||||||
|
since: str | None,
|
||||||
|
threshold: int = _THRESHOLD,
|
||||||
|
window_s: int = _WINDOW_S,
|
||||||
|
) -> dict[str, int]:
|
||||||
|
"""Async wrapper — runs detection in a thread to avoid blocking the event loop."""
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
return await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
lambda: detect_and_create(db_path, incidents_db_path, since, threshold, window_s),
|
||||||
|
)
|
||||||
|
|
@ -8,7 +8,6 @@ from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import sqlite3
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import threading
|
import threading
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
@ -21,17 +20,16 @@ import yaml
|
||||||
from app.glean import journald as journald_parser, syslog as syslog_parser
|
from app.glean import journald as journald_parser, syslog as syslog_parser
|
||||||
from app.glean import plaintext as plaintext_parser, servarr as servarr_parser, plex as plex_parser
|
from app.glean import plaintext as plaintext_parser, servarr as servarr_parser, plex as plex_parser
|
||||||
from app.glean import qbittorrent as qbit_parser, caddy as caddy_parser
|
from app.glean import qbittorrent as qbit_parser, caddy as caddy_parser
|
||||||
from app.glean.pipeline import _detect_format
|
from app.db import get_conn
|
||||||
|
from app.db.schema import ensure_schema
|
||||||
|
from app.glean.pipeline import _detect_format, _write_batch
|
||||||
from app.glean.base import _compile, load_patterns, now_iso
|
from app.glean.base import _compile, load_patterns, now_iso
|
||||||
from app.glean.pipeline import _write_batch, _SCHEMA
|
|
||||||
from app.services.search import build_fts_index
|
|
||||||
from app.services.models import RetrievedEntry
|
from app.services.models import RetrievedEntry
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
FLUSH_INTERVAL_SEC = 10
|
FLUSH_INTERVAL_SEC = 10
|
||||||
FLUSH_BATCH_SIZE = 100
|
FLUSH_BATCH_SIZE = 100
|
||||||
FTS_SYNC_EVERY_N_FLUSHES = 3 # sync FTS every ~30s under normal load
|
|
||||||
|
|
||||||
|
|
||||||
# ── Config ────────────────────────────────────────────────────────────────────
|
# ── Config ────────────────────────────────────────────────────────────────────
|
||||||
|
|
@ -111,10 +109,7 @@ class WatchSource:
|
||||||
patterns = load_patterns(self.pattern_file)
|
patterns = load_patterns(self.pattern_file)
|
||||||
compiled = _compile(patterns)
|
compiled = _compile(patterns)
|
||||||
|
|
||||||
conn = sqlite3.connect(str(self.db_path), timeout=30.0)
|
ensure_schema(self.db_path)
|
||||||
conn.execute("PRAGMA journal_mode=WAL")
|
|
||||||
conn.executescript(_SCHEMA)
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cmd = self._build_command()
|
cmd = self._build_command()
|
||||||
|
|
@ -127,12 +122,10 @@ class WatchSource:
|
||||||
text=True,
|
text=True,
|
||||||
bufsize=1,
|
bufsize=1,
|
||||||
)
|
)
|
||||||
self._drain(conn, compiled)
|
self._drain(compiled)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self._error = str(exc)
|
self._error = str(exc)
|
||||||
logger.error("Watch source %r crashed: %s", self.config.source_id, exc)
|
logger.error("Watch source %r crashed: %s", self.config.source_id, exc)
|
||||||
finally:
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
def _build_command(self) -> list[str] | None:
|
def _build_command(self) -> list[str] | None:
|
||||||
t = self.config.source_type
|
t = self.config.source_type
|
||||||
|
|
@ -193,7 +186,7 @@ class WatchSource:
|
||||||
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _drain(self, conn: sqlite3.Connection, compiled) -> None:
|
def _drain(self, compiled) -> None:
|
||||||
"""Read lines from the subprocess and flush to DB periodically."""
|
"""Read lines from the subprocess and flush to DB periodically."""
|
||||||
assert self._proc is not None
|
assert self._proc is not None
|
||||||
buffer: list[str] = []
|
buffer: list[str] = []
|
||||||
|
|
@ -221,29 +214,28 @@ class WatchSource:
|
||||||
should_flush = len(buffer) >= FLUSH_BATCH_SIZE or elapsed >= FLUSH_INTERVAL_SEC
|
should_flush = len(buffer) >= FLUSH_BATCH_SIZE or elapsed >= FLUSH_INTERVAL_SEC
|
||||||
|
|
||||||
if buffer and should_flush:
|
if buffer and should_flush:
|
||||||
flush_count = self._flush(conn, buffer, compiled, flush_count)
|
flush_count = self._flush(buffer, compiled, flush_count)
|
||||||
buffer.clear()
|
buffer.clear()
|
||||||
last_flush = datetime.now(tz=timezone.utc)
|
last_flush = datetime.now(tz=timezone.utc)
|
||||||
|
|
||||||
# Flush remainder
|
# Flush remainder
|
||||||
if buffer:
|
if buffer:
|
||||||
self._flush(conn, buffer, compiled, flush_count)
|
self._flush(buffer, compiled, flush_count)
|
||||||
|
|
||||||
def _flush(self, conn: sqlite3.Connection, lines: list[str], compiled, flush_count: int) -> int:
|
def _flush(self, lines: list[str], compiled, flush_count: int) -> int:
|
||||||
ingest_time = now_iso()
|
ingest_time = now_iso()
|
||||||
try:
|
try:
|
||||||
entries = self._parse_lines(lines, ingest_time, compiled)
|
entries = self._parse_lines(lines, ingest_time, compiled)
|
||||||
if entries:
|
if entries:
|
||||||
_write_batch(conn, entries)
|
with get_conn(self.db_path) as conn:
|
||||||
conn.commit()
|
_write_batch(conn, entries)
|
||||||
|
conn.commit()
|
||||||
self._entry_count += len(entries)
|
self._entry_count += len(entries)
|
||||||
self._last_event = now_iso()
|
self._last_event = now_iso()
|
||||||
if entries:
|
if entries:
|
||||||
self._last_event = entries[-1].timestamp_iso or self._last_event
|
self._last_event = entries[-1].timestamp_iso or self._last_event
|
||||||
|
|
||||||
flush_count += 1
|
flush_count += 1
|
||||||
if flush_count % FTS_SYNC_EVERY_N_FLUSHES == 0:
|
|
||||||
build_fts_index(self.db_path)
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("Flush error for %r: %s", self.config.source_id, exc)
|
logger.warning("Flush error for %r: %s", self.config.source_id, exc)
|
||||||
return flush_count
|
return flush_count
|
||||||
|
|
|
||||||
|
|
@ -6,33 +6,33 @@
|
||||||
# databases for training data.
|
# databases for training data.
|
||||||
#
|
#
|
||||||
# Ports:
|
# Ports:
|
||||||
# 8536 → submissions-daniel (harvest.circuitforge.tech/daniel/*)
|
# 8536 → submissions-contrib1 (harvest.circuitforge.tech/contrib1/*)
|
||||||
# 8537 → submissions-xander (harvest.circuitforge.tech/xander/*)
|
# 8537 → submissions-contrib2 (harvest.circuitforge.tech/contrib2/*)
|
||||||
#
|
#
|
||||||
# Deploy on Heimdall:
|
# Deploy on Heimdall:
|
||||||
# docker compose -f docker-compose.submissions.yml up -d
|
# docker compose -f docker-compose.submissions.yml up -d
|
||||||
#
|
#
|
||||||
# Database locations:
|
# Database locations:
|
||||||
# /devl/docker/turnstone-submissions/daniel/turnstone.db
|
# /devl/docker/turnstone-submissions/contrib1/turnstone.db
|
||||||
# /devl/docker/turnstone-submissions/xander/turnstone.db
|
# /devl/docker/turnstone-submissions/contrib2/turnstone.db
|
||||||
#
|
#
|
||||||
# These instances have TURNSTONE_INGEST_INTERVAL=0 — they only receive POSTs,
|
# These instances have TURNSTONE_INGEST_INTERVAL=0 — they only receive POSTs,
|
||||||
# they do not run their own scheduled ingest.
|
# they do not run their own scheduled ingest.
|
||||||
|
|
||||||
services:
|
services:
|
||||||
submissions-daniel:
|
submissions-contrib1:
|
||||||
image: turnstone:latest
|
image: turnstone:latest
|
||||||
container_name: turnstone-submissions-daniel
|
container_name: turnstone-submissions-contrib1
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
ports:
|
ports:
|
||||||
- "8536:8534"
|
- "8536:8534"
|
||||||
volumes:
|
volumes:
|
||||||
- /devl/docker/turnstone-submissions/daniel:/data:z
|
- /devl/docker/turnstone-submissions/contrib1:/data:z
|
||||||
- /devl/docker/turnstone-submissions/daniel/patterns:/patterns:ro
|
- /devl/docker/turnstone-submissions/contrib1/patterns:/patterns:ro
|
||||||
environment:
|
environment:
|
||||||
TURNSTONE_DB: /data/turnstone.db
|
TURNSTONE_DB: /data/turnstone.db
|
||||||
TURNSTONE_PATTERNS: /patterns
|
TURNSTONE_PATTERNS: /patterns
|
||||||
TURNSTONE_SOURCE_HOST: submissions-daniel
|
TURNSTONE_SOURCE_HOST: submissions-contrib1
|
||||||
TURNSTONE_INGEST_INTERVAL: "0"
|
TURNSTONE_INGEST_INTERVAL: "0"
|
||||||
PYTHONUNBUFFERED: "1"
|
PYTHONUNBUFFERED: "1"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
|
|
@ -44,19 +44,19 @@ services:
|
||||||
networks:
|
networks:
|
||||||
- caddy-internal
|
- caddy-internal
|
||||||
|
|
||||||
submissions-xander:
|
submissions-contrib2:
|
||||||
image: turnstone:latest
|
image: turnstone:latest
|
||||||
container_name: turnstone-submissions-xander
|
container_name: turnstone-submissions-contrib2
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
ports:
|
ports:
|
||||||
- "8537:8534"
|
- "8537:8534"
|
||||||
volumes:
|
volumes:
|
||||||
- /devl/docker/turnstone-submissions/xander:/data:z
|
- /devl/docker/turnstone-submissions/contrib2:/data:z
|
||||||
- /devl/docker/turnstone-submissions/xander/patterns:/patterns:ro
|
- /devl/docker/turnstone-submissions/contrib2/patterns:/patterns:ro
|
||||||
environment:
|
environment:
|
||||||
TURNSTONE_DB: /data/turnstone.db
|
TURNSTONE_DB: /data/turnstone.db
|
||||||
TURNSTONE_PATTERNS: /patterns
|
TURNSTONE_PATTERNS: /patterns
|
||||||
TURNSTONE_SOURCE_HOST: submissions-xander
|
TURNSTONE_SOURCE_HOST: submissions-contrib2
|
||||||
TURNSTONE_INGEST_INTERVAL: "0"
|
TURNSTONE_INGEST_INTERVAL: "0"
|
||||||
PYTHONUNBUFFERED: "1"
|
PYTHONUNBUFFERED: "1"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
|
|
|
||||||
68
docker-compose.yml
Normal file
68
docker-compose.yml
Normal file
|
|
@ -0,0 +1,68 @@
|
||||||
|
version: "3.9"
|
||||||
|
|
||||||
|
# Turnstone with external Postgres DB.
|
||||||
|
# Data lives in the named volume `turnstone_pgdata` — survives image rebuilds.
|
||||||
|
# To adopt an EXISTING Postgres install, set DATABASE_URL to point at it and
|
||||||
|
# remove the `db` service and `depends_on` blocks.
|
||||||
|
#
|
||||||
|
# Quick start:
|
||||||
|
# docker compose up -d
|
||||||
|
# # Then open http://localhost:8520
|
||||||
|
|
||||||
|
services:
|
||||||
|
db:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
POSTGRES_DB: turnstone
|
||||||
|
POSTGRES_USER: turnstone
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-turnstone_dev}
|
||||||
|
volumes:
|
||||||
|
- turnstone_pgdata:/var/lib/postgresql/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U turnstone -d turnstone"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
turnstone:
|
||||||
|
build: .
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "${TURNSTONE_PORT:-8520}:8520"
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
environment:
|
||||||
|
# Backend selection — comment out DATABASE_URL to fall back to SQLite
|
||||||
|
DATABASE_URL: postgresql://turnstone:${POSTGRES_PASSWORD:-turnstone_dev}@db:5432/turnstone
|
||||||
|
TURNSTONE_TENANT_ID: ${TURNSTONE_TENANT_ID:-}
|
||||||
|
TURNSTONE_API_KEY: ${TURNSTONE_API_KEY:-}
|
||||||
|
TURNSTONE_GLEAN_INTERVAL: ${TURNSTONE_GLEAN_INTERVAL:-900}
|
||||||
|
TURNSTONE_SOURCE_HOST: ${TURNSTONE_SOURCE_HOST:-}
|
||||||
|
TURNSTONE_SUBMIT_ENDPOINT: ${TURNSTONE_SUBMIT_ENDPOINT:-}
|
||||||
|
# --- Multi-agent diagnose pipeline ---
|
||||||
|
TURNSTONE_MULTI_AGENT_DIAGNOSE: ${TURNSTONE_MULTI_AGENT_DIAGNOSE:-false}
|
||||||
|
TURNSTONE_CLASSIFIER_MODEL: ${TURNSTONE_CLASSIFIER_MODEL:-}
|
||||||
|
TURNSTONE_EMBED_BACKEND: ${TURNSTONE_EMBED_BACKEND:-}
|
||||||
|
TURNSTONE_EMBED_MODEL: ${TURNSTONE_EMBED_MODEL:-}
|
||||||
|
TURNSTONE_EMBED_DEVICE: ${TURNSTONE_EMBED_DEVICE:-cpu}
|
||||||
|
# --- Cybersec scoring pipeline ---
|
||||||
|
TURNSTONE_CYBERSEC_MODEL: ${TURNSTONE_CYBERSEC_MODEL:-}
|
||||||
|
TURNSTONE_CYBERSEC_DEVICE: ${TURNSTONE_CYBERSEC_DEVICE:-cpu}
|
||||||
|
TURNSTONE_CYBERSEC_THRESHOLD: ${TURNSTONE_CYBERSEC_THRESHOLD:-0.60}
|
||||||
|
# --- Anomaly scoring pipeline ---
|
||||||
|
TURNSTONE_ANOMALY_MODEL: ${TURNSTONE_ANOMALY_MODEL:-}
|
||||||
|
TURNSTONE_ANOMALY_DEVICE: ${TURNSTONE_ANOMALY_DEVICE:-cpu}
|
||||||
|
TURNSTONE_ANOMALY_THRESHOLD: ${TURNSTONE_ANOMALY_THRESHOLD:-0.75}
|
||||||
|
TURNSTONE_ANOMALY_INTERVAL: ${TURNSTONE_ANOMALY_INTERVAL:-0}
|
||||||
|
# --- HuggingFace model cache ---
|
||||||
|
HF_HOME: /hf_cache
|
||||||
|
volumes:
|
||||||
|
- ./patterns:/app/patterns:ro
|
||||||
|
- ./data:/app/data # optional: persists SQLite files if DATABASE_URL unset
|
||||||
|
- ${HF_CACHE_PATH:-/Library/Assets/LLM}:/hf_cache:ro # shared model cache
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
turnstone_pgdata:
|
||||||
|
name: turnstone_pgdata
|
||||||
|
|
@ -62,7 +62,10 @@ set -euo pipefail
|
||||||
REPO_DIR="${HOME}/turnstone"
|
REPO_DIR="${HOME}/turnstone"
|
||||||
DATA_DIR="${REPO_DIR}/data"
|
DATA_DIR="${REPO_DIR}/data"
|
||||||
PATTERNS_DIR="${REPO_DIR}/patterns"
|
PATTERNS_DIR="${REPO_DIR}/patterns"
|
||||||
HF_CACHE_DIR="${REPO_DIR}/hf-cache" # persists downloaded ML models across restarts
|
# HF_CACHE_DIR: override to a shared cache directory to avoid re-downloading models.
|
||||||
|
# Example (Heimdall, where byviz/bylastic_classification_logs is already cached):
|
||||||
|
# export HF_CACHE_DIR=/Library/Assets/LLM
|
||||||
|
HF_CACHE_DIR="${HF_CACHE_DIR:-${REPO_DIR}/hf-cache}"
|
||||||
|
|
||||||
TZ="${TZ:-America/Los_Angeles}"
|
TZ="${TZ:-America/Los_Angeles}"
|
||||||
|
|
||||||
|
|
@ -79,15 +82,25 @@ TZ="${TZ:-America/Los_Angeles}"
|
||||||
# receiving instance after each glean run. Only matched entries are sent —
|
# receiving instance after each glean run. Only matched entries are sent —
|
||||||
# no raw log content. Used to build Avocet training data.
|
# no raw log content. Used to build Avocet training data.
|
||||||
#
|
#
|
||||||
# export TURNSTONE_SUBMIT_ENDPOINT=https://harvest.circuitforge.tech/daniel
|
# export TURNSTONE_SUBMIT_ENDPOINT=https://harvest.circuitforge.tech/your-node-id
|
||||||
|
# bash ~/turnstone/docker-standalone.sh
|
||||||
|
#
|
||||||
|
|
||||||
|
# ── Anomaly scoring pipeline (IDS / watchdog) ────────────────────────────────
|
||||||
|
# Set TURNSTONE_ANOMALY_MODEL to enable automatic anomaly scoring after each
|
||||||
|
# glean run. The byviz classifier (already used by the diagnose pipeline) is
|
||||||
|
# a good default — it's cached alongside the other models.
|
||||||
|
#
|
||||||
|
# export TURNSTONE_ANOMALY_MODEL=byviz/bylastic_classification_logs
|
||||||
|
# export TURNSTONE_ANOMALY_THRESHOLD=0.80 # confidence floor (default 0.75)
|
||||||
# bash ~/turnstone/docker-standalone.sh
|
# bash ~/turnstone/docker-standalone.sh
|
||||||
#
|
#
|
||||||
|
|
||||||
# ── Multi-agent diagnose pipeline ────────────────────────────────────────────
|
# ── Multi-agent diagnose pipeline ────────────────────────────────────────────
|
||||||
# Enable the 5-stage ML pipeline to get smarter diagnose results.
|
# Enable the 5-stage ML pipeline to get smarter diagnose results.
|
||||||
#
|
#
|
||||||
# If your host has WireGuard to Heimdall's LAN (e.g. Huginn):
|
# If your host has WireGuard to Heimdall's LAN:
|
||||||
# export GPU_SERVER_URL=http://10.1.10.71:7700
|
# export GPU_SERVER_URL=http://<HEIMDALL_LAN_IP>:7700
|
||||||
# export TURNSTONE_MULTI_AGENT_DIAGNOSE=true
|
# export TURNSTONE_MULTI_AGENT_DIAGNOSE=true
|
||||||
# bash ~/turnstone/docker-standalone.sh
|
# bash ~/turnstone/docker-standalone.sh
|
||||||
#
|
#
|
||||||
|
|
@ -134,6 +147,13 @@ docker run -d \
|
||||||
-e TURNSTONE_EMBED_BACKEND="${TURNSTONE_EMBED_BACKEND:-sentence_transformers}" \
|
-e TURNSTONE_EMBED_BACKEND="${TURNSTONE_EMBED_BACKEND:-sentence_transformers}" \
|
||||||
-e TURNSTONE_EMBED_MODEL="${TURNSTONE_EMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2}" \
|
-e TURNSTONE_EMBED_MODEL="${TURNSTONE_EMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2}" \
|
||||||
-e TURNSTONE_EMBED_DEVICE="${TURNSTONE_EMBED_DEVICE:-cpu}" \
|
-e TURNSTONE_EMBED_DEVICE="${TURNSTONE_EMBED_DEVICE:-cpu}" \
|
||||||
|
-e TURNSTONE_CYBERSEC_MODEL="${TURNSTONE_CYBERSEC_MODEL:-}" \
|
||||||
|
-e TURNSTONE_CYBERSEC_DEVICE="${TURNSTONE_CYBERSEC_DEVICE:-cpu}" \
|
||||||
|
-e TURNSTONE_CYBERSEC_THRESHOLD="${TURNSTONE_CYBERSEC_THRESHOLD:-0.60}" \
|
||||||
|
-e TURNSTONE_ANOMALY_MODEL="${TURNSTONE_ANOMALY_MODEL:-}" \
|
||||||
|
-e TURNSTONE_ANOMALY_DEVICE="${TURNSTONE_ANOMALY_DEVICE:-cpu}" \
|
||||||
|
-e TURNSTONE_ANOMALY_THRESHOLD="${TURNSTONE_ANOMALY_THRESHOLD:-0.75}" \
|
||||||
|
-e TURNSTONE_ANOMALY_INTERVAL="${TURNSTONE_ANOMALY_INTERVAL:-0}" \
|
||||||
localhost/turnstone:latest
|
localhost/turnstone:latest
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
#
|
#
|
||||||
# domain: groups patterns into service health domains for triage-level summaries.
|
# domain: groups patterns into service health domains for triage-level summaries.
|
||||||
# Valid domains: service_health | networking | auth | storage | memory |
|
# Valid domains: service_health | networking | auth | storage | memory |
|
||||||
# kernel | power | web_proxy | media | gpu
|
# kernel | power | web_proxy | media | gpu | audio
|
||||||
#
|
#
|
||||||
# Patterns are applied in order; multiple can match a single entry.
|
# Patterns are applied in order; multiple can match a single entry.
|
||||||
|
|
||||||
|
|
@ -211,10 +211,10 @@ patterns:
|
||||||
domain: media
|
domain: media
|
||||||
description: Plex EasyAudioEncoder (EAC3 Dolby audio transcoder) crashed — service restart required
|
description: Plex EasyAudioEncoder (EAC3 Dolby audio transcoder) crashed — service restart required
|
||||||
|
|
||||||
# - name: avcx_device_error
|
# - name: ext_device_error
|
||||||
# pattern: "ERR-\d{4}"
|
# pattern: "ERR-\d{4}"
|
||||||
# severity: ERROR
|
# severity: ERROR
|
||||||
# description: AVCX device error code
|
# description: vendor device structured error code
|
||||||
|
|
||||||
# ── VPN / tunnel patterns ──────────────────────────────────────────────────
|
# ── VPN / tunnel patterns ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
@ -275,3 +275,41 @@ patterns:
|
||||||
severity: ERROR
|
severity: ERROR
|
||||||
domain: power
|
domain: power
|
||||||
description: Undervoltage event — instability risk, check PSU and cable connections
|
description: Undervoltage event — instability risk, check PSU and cable connections
|
||||||
|
|
||||||
|
# ── Audio / PipeWire / ALSA ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
- name: pipewire_overflow
|
||||||
|
pattern: "(OVERFLOW channel|stream.*OVERFLOW|protocol.pulse.*OVERFLOW)"
|
||||||
|
severity: WARN
|
||||||
|
domain: audio
|
||||||
|
description: PipeWire-Pulse stream buffer overflow — client not draining audio fast enough; usually indicates a quantum/period-size mismatch or CPU scheduling issue
|
||||||
|
|
||||||
|
- name: pipewire_underrun
|
||||||
|
pattern: "(pw\\.node.*underrun|spa\\.alsa.*underrun|alsa.*underrun|UNDERRUN)"
|
||||||
|
severity: WARN
|
||||||
|
domain: audio
|
||||||
|
description: PipeWire/ALSA buffer underrun (xrun) — audio thread missed its deadline; increase quantum or period-size for the affected device
|
||||||
|
|
||||||
|
- name: alsa_xrun
|
||||||
|
pattern: "(ALSA.*[Xx][Rr][Uu][Nn]|alsa.*xrun|snd_pcm.*xrun|pcm.*underrun|pcm.*overrun)"
|
||||||
|
severity: WARN
|
||||||
|
domain: audio
|
||||||
|
description: ALSA xrun (hardware buffer overrun/underrun) — increase api.alsa.period-size via WirePlumber rule or raise clock.min-quantum
|
||||||
|
|
||||||
|
- name: pipewire_quantum_mismatch
|
||||||
|
pattern: "(quantum.*mismatch|rate.*mismatch|sample.rate.*mismatch|resampl.*fail|can.*t adapt quantum)"
|
||||||
|
severity: WARN
|
||||||
|
domain: audio
|
||||||
|
description: PipeWire quantum or sample-rate mismatch between nodes — check for mixed 44100/48000 streams; may need per-device WirePlumber rules
|
||||||
|
|
||||||
|
- name: pipewire_node_error
|
||||||
|
pattern: "(pw\\.node.*error|node.*ERROR|pipewire.*failed to set|spa\\.alsa.*error|alsa_sink.*error|alsa_source.*error)"
|
||||||
|
severity: ERROR
|
||||||
|
domain: audio
|
||||||
|
description: PipeWire node error — device may be unavailable or misconfigured
|
||||||
|
|
||||||
|
- name: pipewire_jackdbus_missing
|
||||||
|
pattern: "(jackdbus.*reply|jackaudio.*service.*not.*provided|org\\.jackaudio\\.service)"
|
||||||
|
severity: INFO
|
||||||
|
domain: audio
|
||||||
|
description: PipeWire JACK D-Bus probe — JACK not running; benign on non-JACK systems, fires once per PipeWire restart
|
||||||
|
|
|
||||||
|
|
@ -48,8 +48,8 @@ sources:
|
||||||
# ── Network syslog (router, switches, UniFi APs) ─────────────────────────────
|
# ── Network syslog (router, switches, UniFi APs) ─────────────────────────────
|
||||||
# Written by syslog-receiver.service (UDP 5140 → /devl/turnstone-cluster/data/network-syslog.txt).
|
# Written by syslog-receiver.service (UDP 5140 → /devl/turnstone-cluster/data/network-syslog.txt).
|
||||||
# Configure devices to send syslog to Heimdall:5140.
|
# Configure devices to send syslog to Heimdall:5140.
|
||||||
# UniFi: Settings → System → Remote Logging → Syslog Host = 10.1.10.71:5140
|
# UniFi: Settings → System → Remote Logging → Syslog Host = <YOUR_HOST_IP>:5140
|
||||||
# Ubiquiti EdgeRouter: set system syslog host 10.1.10.71 facility all level debug
|
# Ubiquiti EdgeRouter: set system syslog host <YOUR_HOST_IP> facility all level debug
|
||||||
# Managed switches: varies by vendor — target 10.1.10.71 UDP 5140
|
# Managed switches: varies by vendor — target <YOUR_HOST_IP> UDP 5140
|
||||||
- id: network-syslog
|
- id: network-syslog
|
||||||
path: /data/network-syslog.txt
|
path: /data/network-syslog.txt
|
||||||
|
|
|
||||||
50
patterns/sources-example.yaml
Normal file
50
patterns/sources-example.yaml
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
# Turnstone log sources — example node (Docker/Podman, self-hosted media stack)
|
||||||
|
#
|
||||||
|
# Copy this file to your patterns directory and edit for your setup.
|
||||||
|
# Container paths: /opt and /var/log are bind-mounted read-only.
|
||||||
|
# journal-export.jsonl is written to /data/ by export_journal.sh (run via cron before glean).
|
||||||
|
#
|
||||||
|
# Add or remove sources freely. Missing paths are skipped with a warning.
|
||||||
|
|
||||||
|
sources:
|
||||||
|
# ── System ────────────────────────────────────────────────────────────────
|
||||||
|
# Requires: cron job to run export_journal.sh before each glean.
|
||||||
|
# Example cron (every 15 min — edit paths for your install):
|
||||||
|
# */15 * * * * /opt/turnstone/scripts/export_journal.sh \
|
||||||
|
# /opt/turnstone-data/
|
||||||
|
- id: system-journal
|
||||||
|
path: /data/journal-export.jsonl
|
||||||
|
|
||||||
|
- id: dmesg
|
||||||
|
path: /data/dmesg-export.txt
|
||||||
|
|
||||||
|
# ── Servarr stack ─────────────────────────────────────────────────────────
|
||||||
|
- id: sonarr
|
||||||
|
path: /opt/sonarr/config/logs/sonarr.0.txt
|
||||||
|
|
||||||
|
- id: radarr
|
||||||
|
path: /opt/radarr/config/logs/radarr.0.txt
|
||||||
|
|
||||||
|
- id: bazarr
|
||||||
|
path: /opt/bazarr/config/log/bazarr.log
|
||||||
|
|
||||||
|
- id: prowlarr
|
||||||
|
path: /opt/prowlarr/config/logs/prowlarr.0.txt
|
||||||
|
|
||||||
|
# ── Media server / tracking ────────────────────────────────────────────────
|
||||||
|
- id: tautulli
|
||||||
|
path: /opt/tautulli/config/logs/plex_websocket.log
|
||||||
|
|
||||||
|
# ── Download automation ────────────────────────────────────────────────────
|
||||||
|
- id: autoscan
|
||||||
|
path: /opt/autoscan/config/autoscan.log
|
||||||
|
|
||||||
|
# ── Web / proxy ────────────────────────────────────────────────────────────
|
||||||
|
- id: organizr-nginx
|
||||||
|
path: /opt/organizr/log/nginx/error.log
|
||||||
|
|
||||||
|
- id: organizr-app
|
||||||
|
path: /opt/organizr/www/organizr/server.log
|
||||||
|
|
||||||
|
- id: nextcloud-nginx
|
||||||
|
path: /opt/nextcloud/config/log/nginx/error.log
|
||||||
|
|
@ -46,7 +46,7 @@
|
||||||
# ── Adding Caddy reverse proxy ────────────────────────────────────────────────
|
# ── Adding Caddy reverse proxy ────────────────────────────────────────────────
|
||||||
# Add to /etc/caddy/Caddyfile:
|
# Add to /etc/caddy/Caddyfile:
|
||||||
#
|
#
|
||||||
# turnstone.xanderland.tv {
|
# turnstone.your-domain.example {
|
||||||
# import protected
|
# import protected
|
||||||
# reverse_proxy 10.0.0.10:8534
|
# reverse_proxy 10.0.0.10:8534
|
||||||
# import cloudflare
|
# import cloudflare
|
||||||
|
|
@ -59,11 +59,14 @@
|
||||||
#
|
#
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
REPO_DIR=/opt/turnstone
|
# Auto-detect repo from script location — works whether cloned to /opt/turnstone
|
||||||
DATA_DIR=/opt/turnstone/data
|
# or to /Library/Development/CircuitForge/turnstone or any other path.
|
||||||
PATTERNS_DIR=/opt/turnstone/patterns
|
REPO_DIR="${TURNSTONE_REPO_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)}"
|
||||||
HF_CACHE_DIR=/opt/turnstone/hf-cache # persists downloaded ML models across restarts
|
# Data and patterns live OUTSIDE the repo so they survive git pulls.
|
||||||
TZ=America/Los_Angeles
|
DATA_DIR="${TURNSTONE_DATA_DIR:-/opt/turnstone-data}"
|
||||||
|
PATTERNS_DIR="${TURNSTONE_PATTERNS_DIR:-${DATA_DIR}/patterns}"
|
||||||
|
HF_CACHE_DIR="${TURNSTONE_HF_CACHE:-${DATA_DIR}/hf-cache}"
|
||||||
|
TZ="${TZ:-America/Los_Angeles}"
|
||||||
|
|
||||||
# ── Bundle push configuration ────────────────────────────────────────────────
|
# ── Bundle push configuration ────────────────────────────────────────────────
|
||||||
# Set TURNSTONE_BUNDLE_ENDPOINT before running this script to enable the
|
# Set TURNSTONE_BUNDLE_ENDPOINT before running this script to enable the
|
||||||
|
|
@ -77,7 +80,7 @@ TZ=America/Los_Angeles
|
||||||
# receiving instance after each glean run. Only matched entries are sent —
|
# receiving instance after each glean run. Only matched entries are sent —
|
||||||
# no raw log content. Used to build Avocet training data.
|
# no raw log content. Used to build Avocet training data.
|
||||||
#
|
#
|
||||||
# export TURNSTONE_SUBMIT_ENDPOINT=https://harvest.circuitforge.tech/xander
|
# export TURNSTONE_SUBMIT_ENDPOINT=https://harvest.circuitforge.tech/your-node-id
|
||||||
# bash /opt/turnstone/podman-standalone.sh
|
# bash /opt/turnstone/podman-standalone.sh
|
||||||
#
|
#
|
||||||
# TURNSTONE_SOURCE_HOST is auto-detected from `hostname` — override if needed.
|
# TURNSTONE_SOURCE_HOST is auto-detected from `hostname` — override if needed.
|
||||||
|
|
@ -91,15 +94,14 @@ TZ=America/Los_Angeles
|
||||||
# ML models are downloaded on first diagnose run and cached in HF_CACHE_DIR.
|
# ML models are downloaded on first diagnose run and cached in HF_CACHE_DIR.
|
||||||
# On a CPU-only host (no GPU) set TURNSTONE_EMBED_DEVICE=cpu (default).
|
# On a CPU-only host (no GPU) set TURNSTONE_EMBED_DEVICE=cpu (default).
|
||||||
#
|
#
|
||||||
# For Xander's instance (xanderland.tv) — no WireGuard to Heimdall LAN,
|
# If your host has no WireGuard to Heimdall — use the public cf-orch endpoint:
|
||||||
# use the public cf-orch endpoint instead:
|
|
||||||
# export GPU_SERVER_URL=https://orch.circuitforge.tech
|
# export GPU_SERVER_URL=https://orch.circuitforge.tech
|
||||||
# export TURNSTONE_MULTI_AGENT_DIAGNOSE=true
|
# export TURNSTONE_MULTI_AGENT_DIAGNOSE=true
|
||||||
# sudo bash /opt/turnstone/podman-standalone.sh
|
# sudo bash /opt/turnstone/podman-standalone.sh
|
||||||
#
|
#
|
||||||
# For Daniel's instance (Huginn) — WireGuard reaches Heimdall LAN directly,
|
# For WireGuard-connected Docker hosts — WireGuard reaches Heimdall LAN directly,
|
||||||
# use docker-standalone.sh (not this script — Docker host):
|
# use docker-standalone.sh (not this script — Docker host):
|
||||||
# export GPU_SERVER_URL=http://10.1.10.71:7700
|
# export GPU_SERVER_URL=http://<YOUR_HOST_IP>:7700
|
||||||
# export TURNSTONE_MULTI_AGENT_DIAGNOSE=true
|
# export TURNSTONE_MULTI_AGENT_DIAGNOSE=true
|
||||||
# bash ~/turnstone/docker-standalone.sh
|
# bash ~/turnstone/docker-standalone.sh
|
||||||
|
|
||||||
|
|
@ -114,13 +116,26 @@ TZ=America/Los_Angeles
|
||||||
# Must be run as root (sudo bash podman-standalone.sh) — rootful Podman only.
|
# Must be run as root (sudo bash podman-standalone.sh) — rootful Podman only.
|
||||||
#
|
#
|
||||||
|
|
||||||
|
# Bootstrap data and patterns dirs if this is a first run
|
||||||
|
mkdir -p "${DATA_DIR}" "${PATTERNS_DIR}" "${HF_CACHE_DIR}"
|
||||||
|
# Copy default patterns if the dir is empty (first run only)
|
||||||
|
if [ -z "$(ls -A "${PATTERNS_DIR}")" ]; then
|
||||||
|
cp "${REPO_DIR}/patterns/default.yaml" "${PATTERNS_DIR}/"
|
||||||
|
# Copy host-specific sources if present, otherwise copy the generic template
|
||||||
|
HOST_SOURCES="${REPO_DIR}/patterns/sources-$(hostname).yaml"
|
||||||
|
if [ -f "${HOST_SOURCES}" ]; then
|
||||||
|
cp "${HOST_SOURCES}" "${PATTERNS_DIR}/sources.yaml"
|
||||||
|
echo "==> Installed host-specific sources: ${HOST_SOURCES}"
|
||||||
|
else
|
||||||
|
cp "${REPO_DIR}/patterns/sources.yaml" "${PATTERNS_DIR}/"
|
||||||
|
echo "==> Installed default sources.yaml — edit ${PATTERNS_DIR}/sources.yaml for this host"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# Build image from current source (bakes app/ code into the image)
|
# Build image from current source (bakes app/ code into the image)
|
||||||
echo "Building Turnstone image..."
|
echo "Building Turnstone image..."
|
||||||
podman build -t localhost/turnstone:latest "${REPO_DIR}"
|
podman build -t localhost/turnstone:latest "${REPO_DIR}"
|
||||||
|
|
||||||
# Create HF model cache dir if not present (persists across container rebuilds)
|
|
||||||
mkdir -p "${HF_CACHE_DIR}"
|
|
||||||
|
|
||||||
# Remove existing container if present (safe re-run)
|
# Remove existing container if present (safe re-run)
|
||||||
podman rm -f turnstone 2>/dev/null || true
|
podman rm -f turnstone 2>/dev/null || true
|
||||||
|
|
||||||
|
|
@ -142,6 +157,9 @@ podman run -d \
|
||||||
-e TURNSTONE_MULTI_AGENT_DIAGNOSE="${TURNSTONE_MULTI_AGENT_DIAGNOSE:-false}" \
|
-e TURNSTONE_MULTI_AGENT_DIAGNOSE="${TURNSTONE_MULTI_AGENT_DIAGNOSE:-false}" \
|
||||||
-e GPU_SERVER_URL="${GPU_SERVER_URL:-}" \
|
-e GPU_SERVER_URL="${GPU_SERVER_URL:-}" \
|
||||||
-e HF_HOME=/hf-cache \
|
-e HF_HOME=/hf-cache \
|
||||||
|
-e TURNSTONE_AUTO_INCIDENT="${TURNSTONE_AUTO_INCIDENT:-true}" \
|
||||||
|
-e TURNSTONE_AUTO_INCIDENT_THRESHOLD="${TURNSTONE_AUTO_INCIDENT_THRESHOLD:-5}" \
|
||||||
|
-e TURNSTONE_AUTO_INCIDENT_WINDOW="${TURNSTONE_AUTO_INCIDENT_WINDOW:-600}" \
|
||||||
-e TURNSTONE_CLASSIFIER_MODEL="${TURNSTONE_CLASSIFIER_MODEL:-byviz/bylastic_classification_logs}" \
|
-e TURNSTONE_CLASSIFIER_MODEL="${TURNSTONE_CLASSIFIER_MODEL:-byviz/bylastic_classification_logs}" \
|
||||||
-e TURNSTONE_EMBED_BACKEND="${TURNSTONE_EMBED_BACKEND:-sentence_transformers}" \
|
-e TURNSTONE_EMBED_BACKEND="${TURNSTONE_EMBED_BACKEND:-sentence_transformers}" \
|
||||||
-e TURNSTONE_EMBED_MODEL="${TURNSTONE_EMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2}" \
|
-e TURNSTONE_EMBED_MODEL="${TURNSTONE_EMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2}" \
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
fastapi>=0.110.0
|
fastapi>=0.110.0
|
||||||
uvicorn[standard]>=0.27.0
|
uvicorn[standard]>=0.27.0
|
||||||
|
# Postgres backend — optional; SQLite is used when DATABASE_URL is unset
|
||||||
|
psycopg[binary,pool]>=3.1.0
|
||||||
pydantic>=2.0.0
|
pydantic>=2.0.0
|
||||||
pyyaml>=6.0
|
pyyaml>=6.0
|
||||||
aiofiles>=23.0.0
|
aiofiles>=23.0.0
|
||||||
|
|
|
||||||
383
scripts/gen_corpus.py
Normal file
383
scripts/gen_corpus.py
Normal file
|
|
@ -0,0 +1,383 @@
|
||||||
|
"""Synthetic log corpus generator.
|
||||||
|
|
||||||
|
Produces realistic-but-entirely-artificial log files for demos, load tests,
|
||||||
|
and parser regression suites — no production data required.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/gen_corpus.py --days 7 --out /tmp/demo-corpus/
|
||||||
|
python scripts/gen_corpus.py --days 1 --out /tmp/test-run/ --seed 42 --error-rate 0.15
|
||||||
|
python scripts/gen_corpus.py --help
|
||||||
|
|
||||||
|
Output tree:
|
||||||
|
<out>/journald/system.jsonl — systemd/kernel journald JSON
|
||||||
|
<out>/docker/services.jsonl — containerised app stdout
|
||||||
|
<out>/qbittorrent/qbt.log — hotio-format qBittorrent log
|
||||||
|
<out>/ext_device/device.log — vendor device plaintext log
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import random
|
||||||
|
import sys
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
|
# ── Severity distribution ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
_SYSLOG_PRIORITY = {
|
||||||
|
"CRITICAL": "2",
|
||||||
|
"ERROR": "3",
|
||||||
|
"WARN": "4",
|
||||||
|
"INFO": "6",
|
||||||
|
"DEBUG": "7",
|
||||||
|
}
|
||||||
|
|
||||||
|
_SEVERITY_WEIGHTS = {
|
||||||
|
"INFO": 0.70,
|
||||||
|
"DEBUG": 0.10,
|
||||||
|
"WARN": 0.12,
|
||||||
|
"ERROR": 0.06,
|
||||||
|
"CRITICAL": 0.02,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _pick_severity(rng: random.Random, error_rate: float) -> str:
|
||||||
|
"""Return a severity string, boosting ERROR/CRITICAL by error_rate."""
|
||||||
|
weights = dict(_SEVERITY_WEIGHTS)
|
||||||
|
boost = error_rate * 0.08 # distribute extra weight to error tiers
|
||||||
|
weights["ERROR"] += boost
|
||||||
|
weights["CRITICAL"] += boost / 2
|
||||||
|
weights["INFO"] -= boost * 1.2
|
||||||
|
weights["DEBUG"] -= boost * 0.3
|
||||||
|
choices = list(weights.keys())
|
||||||
|
probs = [max(0.0, weights[k]) for k in choices]
|
||||||
|
return rng.choices(choices, weights=probs, k=1)[0]
|
||||||
|
|
||||||
|
|
||||||
|
# ── Timestamp helpers ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _ts_seq(start: datetime, end: datetime, rng: random.Random) -> list[datetime]:
|
||||||
|
"""Return a sorted list of random timestamps between start and end."""
|
||||||
|
total_seconds = (end - start).total_seconds()
|
||||||
|
# Roughly 1 event every ~4 seconds on average across all sources
|
||||||
|
count = int(total_seconds / 4)
|
||||||
|
offsets = sorted(rng.uniform(0, total_seconds) for _ in range(count))
|
||||||
|
return [start + timedelta(seconds=o) for o in offsets]
|
||||||
|
|
||||||
|
|
||||||
|
def _micros(dt: datetime) -> str:
|
||||||
|
"""Journald __REALTIME_TIMESTAMP: microseconds since epoch, as string."""
|
||||||
|
return str(int(dt.timestamp() * 1_000_000))
|
||||||
|
|
||||||
|
|
||||||
|
# ── Message libraries ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
_JOURNALD_UNITS = [
|
||||||
|
"sshd.service", "nginx.service", "docker.service", "systemd-resolved.service",
|
||||||
|
"cron.service", "systemd-journald.service", "NetworkManager.service",
|
||||||
|
"turnstone.service", "podman.service", "fail2ban.service",
|
||||||
|
]
|
||||||
|
|
||||||
|
_JOURNALD_MESSAGES: dict[str, list[str]] = {
|
||||||
|
"INFO": [
|
||||||
|
"Started {unit}.",
|
||||||
|
"Listening on {port}/tcp.",
|
||||||
|
"Reloaded configuration for {unit}.",
|
||||||
|
"New connection from {ip}:{port}",
|
||||||
|
"Session opened for user {user} by (uid=0)",
|
||||||
|
"Accepted publickey for {user} from {ip} port {port}",
|
||||||
|
"System time synchronized from NTP server {ip}",
|
||||||
|
"Unit {unit} entered active state.",
|
||||||
|
"Loaded kernel module {module}.",
|
||||||
|
"DNS query resolved: {host} -> {ip}",
|
||||||
|
],
|
||||||
|
"DEBUG": [
|
||||||
|
"Polling interval set to {n}ms",
|
||||||
|
"Cache hit for key '{key}'",
|
||||||
|
"Heartbeat OK from {host}",
|
||||||
|
"Timer {n} fired",
|
||||||
|
"Worker {n} idle",
|
||||||
|
],
|
||||||
|
"WARN": [
|
||||||
|
"High memory usage on {unit}: {pct}% used",
|
||||||
|
"Slow DNS response ({ms}ms) for {host}",
|
||||||
|
"Deprecated option '{key}' in config — will be removed in next release",
|
||||||
|
"Retrying connection to {host} (attempt {n}/5)",
|
||||||
|
"Journal size limit reached, rotating",
|
||||||
|
"Disk usage at {pct}% on /dev/sda1",
|
||||||
|
],
|
||||||
|
"ERROR": [
|
||||||
|
"Failed to start {unit}: exit code {n}",
|
||||||
|
"Connection refused to {host}:{port}",
|
||||||
|
"Segmentation fault in {unit} (core dumped)",
|
||||||
|
"Authentication failure for user {user} from {ip}",
|
||||||
|
"Timeout waiting for {unit} to become ready",
|
||||||
|
"Failed to bind {port}/tcp: address already in use",
|
||||||
|
],
|
||||||
|
"CRITICAL": [
|
||||||
|
"Kernel panic — not syncing: {msg}",
|
||||||
|
"Out of memory: killed process {n} ({unit})",
|
||||||
|
"Hardware error on /dev/sda1: I/O error",
|
||||||
|
"Disk quota exceeded on /home for user {user}",
|
||||||
|
"Critical service {unit} failed; system may be unstable",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
_DOCKER_SERVICES = [
|
||||||
|
"caddy", "postgres", "redis", "turnstone", "avocet",
|
||||||
|
"prometheus", "grafana", "loki", "minio", "vllm",
|
||||||
|
]
|
||||||
|
|
||||||
|
_DOCKER_MESSAGES: dict[str, list[str]] = {
|
||||||
|
"INFO": [
|
||||||
|
"level=info msg=\"Server listening on 0.0.0.0:{port}\"",
|
||||||
|
"level=info msg=\"Connected to database at {host}:5432\"",
|
||||||
|
'level=info msg="GET /api/health 200 {ms}ms" user={user}',
|
||||||
|
'level=info msg="POST /api/v1/jobs 201 {ms}ms"',
|
||||||
|
"INFO: Worker pool size: {n}",
|
||||||
|
"INFO: Cache warmed — {n} entries loaded",
|
||||||
|
"INFO: Startup complete in {ms}ms",
|
||||||
|
"INFO: Scheduled job '{key}' executed successfully",
|
||||||
|
],
|
||||||
|
"DEBUG": [
|
||||||
|
"DEBUG: SQL query took {ms}ms: SELECT * FROM {key}",
|
||||||
|
"DEBUG: Redis HIT for key {key}",
|
||||||
|
"level=debug msg=\"span {key} completed\" duration={ms}ms",
|
||||||
|
"DEBUG: Trace ID {key}: handler returned 200",
|
||||||
|
],
|
||||||
|
"WARN": [
|
||||||
|
"level=warn msg=\"Slow query ({ms}ms) on table {key}\"",
|
||||||
|
"WARN: Connection pool at {pct}% capacity",
|
||||||
|
"WARN: Rate limit approaching for client {ip}",
|
||||||
|
"WARN: Deprecated endpoint /v1/{key} called by {ip}",
|
||||||
|
"level=warn msg=\"GC pause {ms}ms — possible memory pressure\"",
|
||||||
|
],
|
||||||
|
"ERROR": [
|
||||||
|
"level=error msg=\"Unhandled exception in handler '{key}'\" err={msg}",
|
||||||
|
"ERROR: Database connection lost: {msg}",
|
||||||
|
"level=error msg=\"Failed to acquire lock on {key} after {ms}ms\"",
|
||||||
|
"ERROR: HTTP 500 POST /api/v1/{key}: internal server error",
|
||||||
|
"ERROR: Redis NOAUTH: authentication required",
|
||||||
|
],
|
||||||
|
"CRITICAL": [
|
||||||
|
"level=critical msg=\"Panic: nil pointer dereference in {key}\"",
|
||||||
|
"CRITICAL: Fatal: cannot open database: {msg}",
|
||||||
|
"CRITICAL: OOM killer invoked — process {n} terminated",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
_QBT_MESSAGES: dict[str, list[str]] = {
|
||||||
|
"INFO": [
|
||||||
|
"Successfully listening on IP: 0.0.0.0; port: {port}",
|
||||||
|
"Torrent '{key}' added to download queue",
|
||||||
|
"Download of '{key}' complete ({n} MB)",
|
||||||
|
"Seeding '{key}' at {n} KB/s",
|
||||||
|
"Tracker '{host}' working, {n} seeds",
|
||||||
|
"Peer {ip} connected to torrent '{key}'",
|
||||||
|
"Free disk space: {n} GB",
|
||||||
|
],
|
||||||
|
"WARN": [
|
||||||
|
"Tracker '{host}' is not working (retrying)",
|
||||||
|
"Slow download speed ({n} KB/s) for '{key}'",
|
||||||
|
"Too many open files — reducing connection limit",
|
||||||
|
"DHT bootstrap failed, retrying in {n}s",
|
||||||
|
],
|
||||||
|
"CRITICAL": [
|
||||||
|
"Not enough space on disk to download '{key}'",
|
||||||
|
"File I/O error for torrent '{key}': {msg}",
|
||||||
|
"Unable to bind listen port {port}",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
_EXT_DEVICE_CODES: dict[str, list[str]] = {
|
||||||
|
"INFO": [
|
||||||
|
"SYS-0100 Device boot complete, firmware v{n}.{n}.{n}",
|
||||||
|
"SYS-0101 Sensor array calibration OK",
|
||||||
|
"NET-0200 Link established on interface eth{n}",
|
||||||
|
"CFG-0300 Configuration loaded from flash",
|
||||||
|
"HW-0400 Fan speed nominal: {n} RPM",
|
||||||
|
],
|
||||||
|
"WARN": [
|
||||||
|
"NET-0210 Link quality degraded: RSSI -{n} dBm",
|
||||||
|
"HW-0410 Fan speed elevated: {n} RPM (threshold: {n} RPM)",
|
||||||
|
"CFG-0310 Unknown config key '{key}' ignored",
|
||||||
|
"SYS-0110 Watchdog near timeout — {n}ms remaining",
|
||||||
|
],
|
||||||
|
"ERROR": [
|
||||||
|
"ERR-1001 Sensor read failure on channel {n}: timeout",
|
||||||
|
"ERR-1002 I2C bus {n} NACK from address 0x{key}",
|
||||||
|
"ERR-2001 Network tx queue overflow — dropped {n} packets",
|
||||||
|
"ERR-3001 Flash write error at sector {n}",
|
||||||
|
],
|
||||||
|
"CRITICAL": [
|
||||||
|
"ERR-9001 Thermal runaway detected — initiating shutdown",
|
||||||
|
"ERR-9002 Supply voltage out of range: {n}mV",
|
||||||
|
"ERR-9003 Memory parity error at address 0x{key}",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Template substitution ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
_HOSTS = ["node1", "node2", "node3", "node4", "gateway", "remotehost"]
|
||||||
|
_USERS = ["alan", "root", "deployer", "backup", "nobody"]
|
||||||
|
_MODULES = ["btrfs", "xfs", "nf_conntrack", "ip6table_filter", "overlay"]
|
||||||
|
|
||||||
|
def _fill(template: str, rng: random.Random) -> str:
|
||||||
|
"""Replace {placeholder} tokens with plausible random values."""
|
||||||
|
def _sub(m: re.Match) -> str:
|
||||||
|
import re
|
||||||
|
key = m.group(1)
|
||||||
|
if key == "ip": return f"10.{rng.randint(0,255)}.{rng.randint(0,255)}.{rng.randint(1,254)}"
|
||||||
|
if key == "port": return str(rng.randint(1024, 65535))
|
||||||
|
if key == "n": return str(rng.randint(1, 9999))
|
||||||
|
if key == "pct": return str(rng.randint(50, 99))
|
||||||
|
if key == "ms": return str(rng.randint(1, 5000))
|
||||||
|
if key == "unit": return rng.choice(_JOURNALD_UNITS)
|
||||||
|
if key == "user": return rng.choice(_USERS)
|
||||||
|
if key == "host": return rng.choice(_HOSTS)
|
||||||
|
if key == "module": return rng.choice(_MODULES)
|
||||||
|
if key == "msg": return rng.choice(["unexpected EOF", "connection reset", "no such file"])
|
||||||
|
if key == "key": return rng.choice(["auth", "jobs", "cache", "index", "sessions", "queue"])
|
||||||
|
return m.group(0)
|
||||||
|
import re
|
||||||
|
return re.sub(r"\{(\w+)\}", _sub, template)
|
||||||
|
|
||||||
|
|
||||||
|
def _pick_msg(library: dict[str, list[str]], severity: str, rng: random.Random) -> str:
|
||||||
|
candidates = library.get(severity) or library.get("INFO", ["log entry"])
|
||||||
|
return _fill(rng.choice(candidates), rng)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Per-format generators ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def gen_journald(path: Path, start: datetime, end: datetime, rng: random.Random, error_rate: float) -> int:
|
||||||
|
"""Emit journald JSON lines (-o json format)."""
|
||||||
|
lines = 0
|
||||||
|
hostname = rng.choice(_HOSTS)
|
||||||
|
with path.open("w") as fh:
|
||||||
|
for dt in _ts_seq(start, end, rng):
|
||||||
|
severity = _pick_severity(rng, error_rate)
|
||||||
|
unit = rng.choice(_JOURNALD_UNITS)
|
||||||
|
msg = _pick_msg(_JOURNALD_MESSAGES, severity, rng)
|
||||||
|
entry = {
|
||||||
|
"__REALTIME_TIMESTAMP": _micros(dt),
|
||||||
|
"MESSAGE": msg,
|
||||||
|
"PRIORITY": _SYSLOG_PRIORITY.get(severity, "6"),
|
||||||
|
"_HOSTNAME": hostname,
|
||||||
|
"_SYSTEMD_UNIT": unit,
|
||||||
|
"SYSLOG_IDENTIFIER": unit.replace(".service", ""),
|
||||||
|
}
|
||||||
|
fh.write(json.dumps(entry) + "\n")
|
||||||
|
lines += 1
|
||||||
|
return lines
|
||||||
|
|
||||||
|
|
||||||
|
def gen_docker(path: Path, start: datetime, end: datetime, rng: random.Random, error_rate: float) -> int:
|
||||||
|
"""Emit Docker-format JSON lines (SOURCE + MESSAGE envelope)."""
|
||||||
|
lines = 0
|
||||||
|
with path.open("w") as fh:
|
||||||
|
for dt in _ts_seq(start, end, rng):
|
||||||
|
severity = _pick_severity(rng, error_rate)
|
||||||
|
service = rng.choice(_DOCKER_SERVICES)
|
||||||
|
msg = _pick_msg(_DOCKER_MESSAGES, severity, rng)
|
||||||
|
entry = {
|
||||||
|
"SOURCE": f"docker:{service}",
|
||||||
|
"MESSAGE": msg,
|
||||||
|
}
|
||||||
|
fh.write(json.dumps(entry) + "\n")
|
||||||
|
lines += 1
|
||||||
|
return lines
|
||||||
|
|
||||||
|
|
||||||
|
def gen_qbittorrent(path: Path, start: datetime, end: datetime, rng: random.Random, error_rate: float) -> int:
|
||||||
|
"""Emit hotio-format qBittorrent plaintext log."""
|
||||||
|
_CODE = {"INFO": "N", "WARN": "W", "CRITICAL": "C", "ERROR": "C", "DEBUG": "N"}
|
||||||
|
lines = 0
|
||||||
|
with path.open("w") as fh:
|
||||||
|
for dt in _ts_seq(start, end, rng):
|
||||||
|
severity = _pick_severity(rng, error_rate)
|
||||||
|
msg = _pick_msg(_QBT_MESSAGES, severity, rng)
|
||||||
|
code = _CODE.get(severity, "N")
|
||||||
|
ts_str = dt.strftime("%Y-%m-%dT%H:%M:%S")
|
||||||
|
fh.write(f"({code}) {ts_str} - {msg}\n")
|
||||||
|
lines += 1
|
||||||
|
return lines
|
||||||
|
|
||||||
|
|
||||||
|
def gen_ext_device(path: Path, start: datetime, end: datetime, rng: random.Random, error_rate: float) -> int:
|
||||||
|
"""Emit vendor device plaintext log (ISO timestamp + level + ERR/SYS/NET code + message)."""
|
||||||
|
lines = 0
|
||||||
|
with path.open("w") as fh:
|
||||||
|
for dt in _ts_seq(start, end, rng):
|
||||||
|
severity = _pick_severity(rng, error_rate)
|
||||||
|
msg = _pick_msg(_EXT_DEVICE_CODES, severity, rng)
|
||||||
|
ts_str = dt.strftime("%Y-%m-%dT%H:%M:%S")
|
||||||
|
fh.write(f"{ts_str} [{severity}] {msg}\n")
|
||||||
|
lines += 1
|
||||||
|
return lines
|
||||||
|
|
||||||
|
|
||||||
|
# ── Orchestration ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
_GENERATORS: list[tuple[str, str, Callable]] = [
|
||||||
|
("journald", "system.jsonl", gen_journald),
|
||||||
|
("docker", "services.jsonl", gen_docker),
|
||||||
|
("qbittorrent", "qbt.log", gen_qbittorrent),
|
||||||
|
("ext_device", "device.log", gen_ext_device),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def generate(
|
||||||
|
out: Path,
|
||||||
|
days: int,
|
||||||
|
seed: int | None,
|
||||||
|
error_rate: float,
|
||||||
|
reference_time: datetime | None = None,
|
||||||
|
) -> dict[str, int]:
|
||||||
|
rng = random.Random(seed)
|
||||||
|
end = reference_time or datetime.now(tz=timezone.utc)
|
||||||
|
start = end - timedelta(days=days)
|
||||||
|
|
||||||
|
totals: dict[str, int] = {}
|
||||||
|
for subdir, filename, gen_fn in _GENERATORS:
|
||||||
|
dest = out / subdir / filename
|
||||||
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
# Each source gets its own seeded sub-RNG so streams are independent
|
||||||
|
sub_rng = random.Random(rng.randint(0, 2**31))
|
||||||
|
count = gen_fn(dest, start, end, sub_rng, error_rate)
|
||||||
|
totals[str(dest.relative_to(out))] = count
|
||||||
|
print(f" {dest.relative_to(out)}: {count:,} lines")
|
||||||
|
|
||||||
|
return totals
|
||||||
|
|
||||||
|
|
||||||
|
# ── CLI ────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def main(argv: list[str] | None = None) -> int:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Generate a synthetic Turnstone log corpus for demos and testing."
|
||||||
|
)
|
||||||
|
parser.add_argument("--days", type=int, default=7, help="Days of history to generate (default: 7)")
|
||||||
|
parser.add_argument("--out", type=Path, required=True, help="Output directory")
|
||||||
|
parser.add_argument("--seed", type=int, default=None, help="RNG seed for reproducibility")
|
||||||
|
parser.add_argument("--error-rate", type=float, default=0.05, help="Error injection rate 0.0-1.0 (default: 0.05)")
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
|
if not 0.0 <= args.error_rate <= 1.0:
|
||||||
|
print("ERROR: --error-rate must be between 0.0 and 1.0", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
args.out.mkdir(parents=True, exist_ok=True)
|
||||||
|
print(f"Generating {args.days}-day corpus → {args.out} (seed={args.seed}, error_rate={args.error_rate})")
|
||||||
|
|
||||||
|
totals = generate(args.out, args.days, args.seed, args.error_rate)
|
||||||
|
total_lines = sum(totals.values())
|
||||||
|
print(f"Done — {total_lines:,} total log lines across {len(totals)} files")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
204
scripts/migrate_sqlite_to_postgres.py
Normal file
204
scripts/migrate_sqlite_to_postgres.py
Normal file
|
|
@ -0,0 +1,204 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""One-shot migration: copy data from existing SQLite DBs into Postgres.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
DATABASE_URL=postgresql://... python scripts/migrate_sqlite_to_postgres.py \
|
||||||
|
--main-db data/turnstone.db \
|
||||||
|
--context-db data/turnstone-context.db \
|
||||||
|
--incidents-db data/turnstone-incidents.db \
|
||||||
|
[--tenant-id heimdall]
|
||||||
|
|
||||||
|
The script is idempotent: rows already present in Postgres (same id) are skipped.
|
||||||
|
It must be run ONCE per node after deploying the shared Postgres backend.
|
||||||
|
|
||||||
|
Prerequisites:
|
||||||
|
pip install 'psycopg[binary,pool]'
|
||||||
|
Set DATABASE_URL to the target Postgres connection string.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Allow running from the project root without installing the package
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
|
||||||
|
def _pg_connect():
|
||||||
|
import psycopg # type: ignore[import]
|
||||||
|
url = os.environ.get("DATABASE_URL")
|
||||||
|
if not url:
|
||||||
|
print("ERROR: DATABASE_URL not set", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
return psycopg.connect(url, autocommit=False)
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_schema_pg() -> None:
|
||||||
|
from app.db.schema import ensure_schema, ensure_context_schema, ensure_incidents_schema
|
||||||
|
from pathlib import Path
|
||||||
|
ensure_schema(Path("/dev/null")) # db_path ignored for Postgres
|
||||||
|
ensure_context_schema(Path("/dev/null"))
|
||||||
|
ensure_incidents_schema(Path("/dev/null"))
|
||||||
|
print("Postgres schema verified")
|
||||||
|
|
||||||
|
|
||||||
|
def _migrate_table(
|
||||||
|
src_conn: sqlite3.Connection,
|
||||||
|
dst_conn,
|
||||||
|
table: str,
|
||||||
|
tenant_id: str,
|
||||||
|
columns: list[str],
|
||||||
|
conflict_cols: list[str],
|
||||||
|
) -> int:
|
||||||
|
"""Copy rows from SQLite table to Postgres. Returns rows inserted."""
|
||||||
|
# Check if source table exists
|
||||||
|
try:
|
||||||
|
rows = src_conn.execute(f"SELECT * FROM {table} LIMIT 0").fetchall() # noqa: S608
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
print(f" {table}: not found in SQLite — skipping")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Fetch all rows
|
||||||
|
src_conn.row_factory = sqlite3.Row
|
||||||
|
rows = src_conn.execute(f"SELECT * FROM {table}").fetchall() # noqa: S608
|
||||||
|
if not rows:
|
||||||
|
print(f" {table}: empty — skipping")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Build INSERT ... ON CONFLICT DO NOTHING
|
||||||
|
col_list = ", ".join(columns)
|
||||||
|
placeholders = ", ".join("%s" for _ in columns)
|
||||||
|
conflict = ", ".join(conflict_cols)
|
||||||
|
sql = (
|
||||||
|
f"INSERT INTO {table} ({col_list}) VALUES ({placeholders}) " # noqa: S608
|
||||||
|
f"ON CONFLICT ({conflict}) DO NOTHING"
|
||||||
|
)
|
||||||
|
|
||||||
|
inserted = 0
|
||||||
|
with dst_conn.cursor() as cur:
|
||||||
|
for row in rows:
|
||||||
|
# Build values: inject tenant_id if not present in source row
|
||||||
|
vals = []
|
||||||
|
for col in columns:
|
||||||
|
if col == "tenant_id":
|
||||||
|
try:
|
||||||
|
val = row["tenant_id"] or tenant_id
|
||||||
|
except (IndexError, KeyError):
|
||||||
|
val = tenant_id
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
vals.append(row[col])
|
||||||
|
except (IndexError, KeyError):
|
||||||
|
vals.append(None)
|
||||||
|
continue
|
||||||
|
vals.append(val)
|
||||||
|
cur.execute(sql, vals)
|
||||||
|
inserted += cur.rowcount
|
||||||
|
|
||||||
|
dst_conn.commit()
|
||||||
|
print(f" {table}: {inserted}/{len(rows)} rows inserted ({len(rows) - inserted} skipped)")
|
||||||
|
return inserted
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(description="Migrate Turnstone SQLite → Postgres")
|
||||||
|
parser.add_argument("--main-db", default="data/turnstone.db")
|
||||||
|
parser.add_argument("--context-db", default="data/turnstone-context.db")
|
||||||
|
parser.add_argument("--incidents-db", default="data/turnstone-incidents.db")
|
||||||
|
parser.add_argument("--tenant-id", default=None, help="Override tenant ID (default: socket.gethostname())")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.tenant_id:
|
||||||
|
os.environ["TURNSTONE_TENANT_ID"] = args.tenant_id
|
||||||
|
|
||||||
|
import socket
|
||||||
|
tenant_id = os.environ.get("TURNSTONE_TENANT_ID") or socket.gethostname()
|
||||||
|
print(f"Migrating as tenant_id={tenant_id!r}")
|
||||||
|
|
||||||
|
# Ensure Postgres schema exists first
|
||||||
|
os.environ.setdefault("DATABASE_URL", "") # schema functions check this
|
||||||
|
_ensure_schema_pg()
|
||||||
|
|
||||||
|
pg = _pg_connect()
|
||||||
|
total = 0
|
||||||
|
|
||||||
|
# ── Main DB ───────────────────────────────────────────────────────────────
|
||||||
|
main_path = Path(args.main_db)
|
||||||
|
if main_path.exists():
|
||||||
|
print(f"\nMigrating main DB: {main_path}")
|
||||||
|
src = sqlite3.connect(str(main_path))
|
||||||
|
src.row_factory = sqlite3.Row
|
||||||
|
|
||||||
|
total += _migrate_table(src, pg, "log_entries", tenant_id,
|
||||||
|
columns=["tenant_id", "id", "source_id", "sequence", "timestamp_raw",
|
||||||
|
"timestamp_iso", "ingest_time", "severity", "repeat_count",
|
||||||
|
"out_of_order", "matched_patterns", "text"],
|
||||||
|
conflict_cols=["tenant_id", "id"])
|
||||||
|
|
||||||
|
total += _migrate_table(src, pg, "glean_fingerprints", tenant_id,
|
||||||
|
columns=["tenant_id", "path", "mtime", "size", "gleaned_at"],
|
||||||
|
conflict_cols=["tenant_id", "path"])
|
||||||
|
|
||||||
|
total += _migrate_table(src, pg, "blocklist_candidates", tenant_id,
|
||||||
|
columns=["id", "tenant_id", "domain_or_ip", "source_device_ip", "source_device_name",
|
||||||
|
"first_seen", "last_seen", "hit_count", "status", "pushed_at",
|
||||||
|
"log_evidence", "matched_rule", "llm_score", "llm_reason"],
|
||||||
|
conflict_cols=["id"])
|
||||||
|
src.close()
|
||||||
|
else:
|
||||||
|
print(f"Main DB not found at {main_path} — skipping")
|
||||||
|
|
||||||
|
# ── Context DB ────────────────────────────────────────────────────────────
|
||||||
|
ctx_path = Path(args.context_db)
|
||||||
|
if ctx_path.exists():
|
||||||
|
print(f"\nMigrating context DB: {ctx_path}")
|
||||||
|
src = sqlite3.connect(str(ctx_path))
|
||||||
|
|
||||||
|
total += _migrate_table(src, pg, "context_facts", tenant_id,
|
||||||
|
columns=["id", "tenant_id", "category", "key", "value", "source", "created_at"],
|
||||||
|
conflict_cols=["id"])
|
||||||
|
|
||||||
|
total += _migrate_table(src, pg, "context_documents", tenant_id,
|
||||||
|
columns=["id", "tenant_id", "filename", "doc_type", "full_text", "file_size", "uploaded_at"],
|
||||||
|
conflict_cols=["id"])
|
||||||
|
|
||||||
|
total += _migrate_table(src, pg, "context_chunks", tenant_id,
|
||||||
|
columns=["id", "tenant_id", "document_id", "chunk_index", "text"],
|
||||||
|
conflict_cols=["id"])
|
||||||
|
src.close()
|
||||||
|
else:
|
||||||
|
print(f"Context DB not found at {ctx_path} — skipping")
|
||||||
|
|
||||||
|
# ── Incidents DB ──────────────────────────────────────────────────────────
|
||||||
|
inc_path = Path(args.incidents_db)
|
||||||
|
if inc_path.exists():
|
||||||
|
print(f"\nMigrating incidents DB: {inc_path}")
|
||||||
|
src = sqlite3.connect(str(inc_path))
|
||||||
|
|
||||||
|
total += _migrate_table(src, pg, "incidents", tenant_id,
|
||||||
|
columns=["id", "tenant_id", "label", "issue_type", "started_at", "ended_at",
|
||||||
|
"notes", "created_at", "severity"],
|
||||||
|
conflict_cols=["id"])
|
||||||
|
|
||||||
|
total += _migrate_table(src, pg, "received_bundles", tenant_id,
|
||||||
|
columns=["id", "tenant_id", "source_host", "issue_type", "label", "severity",
|
||||||
|
"started_at", "bundled_at", "entry_count", "bundle_json"],
|
||||||
|
conflict_cols=["id"])
|
||||||
|
|
||||||
|
total += _migrate_table(src, pg, "sent_bundles", tenant_id,
|
||||||
|
columns=["id", "tenant_id", "incident_id", "exported_at", "sanitized",
|
||||||
|
"entry_count", "bundle_json"],
|
||||||
|
conflict_cols=["id"])
|
||||||
|
src.close()
|
||||||
|
else:
|
||||||
|
print(f"Incidents DB not found at {inc_path} — skipping")
|
||||||
|
|
||||||
|
pg.close()
|
||||||
|
print(f"\nDone. Total rows inserted: {total}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
@ -6,8 +6,10 @@
|
||||||
# sudo bash /opt/turnstone/scripts/update.sh feat/live-watch # test a branch
|
# sudo bash /opt/turnstone/scripts/update.sh feat/live-watch # test a branch
|
||||||
#
|
#
|
||||||
# Local files preserved across updates:
|
# Local files preserved across updates:
|
||||||
# patterns/watch.yaml — site-specific watch source config
|
# patterns/watch.yaml — site-specific watch source config
|
||||||
# data/ — database and live journal files (bind-mounted, untouched)
|
# data/corpus_watermark.txt — corpus export watermark (last exported rowid)
|
||||||
|
# data/incident_watermark.txt — incident export watermark (last exported timestamp)
|
||||||
|
# data/ — database and live journal files (bind-mounted, untouched)
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
|
|
@ -21,7 +23,9 @@ echo "==> Turnstone update: branch=$BRANCH"
|
||||||
|
|
||||||
# ── Preserve site-local config ────────────────────────────────────────────────
|
# ── Preserve site-local config ────────────────────────────────────────────────
|
||||||
# watch.yaml is tracked in git as a template but overridden per-host.
|
# watch.yaml is tracked in git as a template but overridden per-host.
|
||||||
# Back it up before the pull and restore it after.
|
# Corpus watermarks track the last exported entry/incident — must survive updates
|
||||||
|
# or the next export run will re-push everything from the beginning.
|
||||||
|
# Back them up before the pull and restore after.
|
||||||
WATCH_YAML="$REPO_DIR/patterns/watch.yaml"
|
WATCH_YAML="$REPO_DIR/patterns/watch.yaml"
|
||||||
WATCH_BACKUP=""
|
WATCH_BACKUP=""
|
||||||
if [ -f "$WATCH_YAML" ]; then
|
if [ -f "$WATCH_YAML" ]; then
|
||||||
|
|
@ -29,6 +33,19 @@ if [ -f "$WATCH_YAML" ]; then
|
||||||
cp "$WATCH_YAML" "$WATCH_BACKUP"
|
cp "$WATCH_YAML" "$WATCH_BACKUP"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
CORPUS_WM="$REPO_DIR/data/corpus_watermark.txt"
|
||||||
|
INCIDENT_WM="$REPO_DIR/data/incident_watermark.txt"
|
||||||
|
CORPUS_WM_BACKUP=""
|
||||||
|
INCIDENT_WM_BACKUP=""
|
||||||
|
if [ -f "$CORPUS_WM" ]; then
|
||||||
|
CORPUS_WM_BACKUP=$(mktemp /tmp/corpus-wm.XXXXXX)
|
||||||
|
cp "$CORPUS_WM" "$CORPUS_WM_BACKUP"
|
||||||
|
fi
|
||||||
|
if [ -f "$INCIDENT_WM" ]; then
|
||||||
|
INCIDENT_WM_BACKUP=$(mktemp /tmp/incident-wm.XXXXXX)
|
||||||
|
cp "$INCIDENT_WM" "$INCIDENT_WM_BACKUP"
|
||||||
|
fi
|
||||||
|
|
||||||
# ── Pull ──────────────────────────────────────────────────────────────────────
|
# ── Pull ──────────────────────────────────────────────────────────────────────
|
||||||
git fetch --all --tags --quiet
|
git fetch --all --tags --quiet
|
||||||
|
|
||||||
|
|
@ -50,6 +67,16 @@ if [ -n "$WATCH_BACKUP" ]; then
|
||||||
rm -f "$WATCH_BACKUP"
|
rm -f "$WATCH_BACKUP"
|
||||||
echo "==> Restored patterns/watch.yaml"
|
echo "==> Restored patterns/watch.yaml"
|
||||||
fi
|
fi
|
||||||
|
if [ -n "$CORPUS_WM_BACKUP" ]; then
|
||||||
|
cp "$CORPUS_WM_BACKUP" "$CORPUS_WM"
|
||||||
|
rm -f "$CORPUS_WM_BACKUP"
|
||||||
|
echo "==> Restored data/corpus_watermark.txt"
|
||||||
|
fi
|
||||||
|
if [ -n "$INCIDENT_WM_BACKUP" ]; then
|
||||||
|
cp "$INCIDENT_WM_BACKUP" "$INCIDENT_WM"
|
||||||
|
rm -f "$INCIDENT_WM_BACKUP"
|
||||||
|
echo "==> Restored data/incident_watermark.txt"
|
||||||
|
fi
|
||||||
|
|
||||||
# ── Build ─────────────────────────────────────────────────────────────────────
|
# ── Build ─────────────────────────────────────────────────────────────────────
|
||||||
echo "==> Building $IMAGE ..."
|
echo "==> Building $IMAGE ..."
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import sqlite3
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
import pytest
|
import pytest
|
||||||
|
from app.db.schema import ensure_schema, ensure_context_schema
|
||||||
from app.services.llm import summarize
|
from app.services.llm import summarize
|
||||||
from app.services.search import SearchResult
|
from app.services.search import SearchResult
|
||||||
|
|
||||||
|
|
@ -64,36 +65,14 @@ def test_summarize_without_context_block_unchanged():
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def db_with_facts(tmp_path):
|
def db_with_facts(tmp_path):
|
||||||
db_path = tmp_path / "t.db"
|
db_path = tmp_path / "t.db"
|
||||||
|
ensure_schema(db_path)
|
||||||
|
ensure_context_schema(db_path)
|
||||||
conn = sqlite3.connect(str(db_path))
|
conn = sqlite3.connect(str(db_path))
|
||||||
conn.executescript("""
|
conn.execute(
|
||||||
CREATE TABLE log_entries (
|
"INSERT INTO context_facts(id, tenant_id, category, key, value, source, created_at) "
|
||||||
id TEXT PRIMARY KEY, source_id TEXT NOT NULL, sequence INTEGER NOT NULL,
|
"VALUES (?,?,?,?,?,?,?)",
|
||||||
timestamp_raw TEXT, timestamp_iso TEXT, ingest_time TEXT NOT NULL,
|
("f1", "", "service", "plex", "port:32400", "wizard", "2026-05-13T00:00:00+00:00"),
|
||||||
severity TEXT, repeat_count INTEGER DEFAULT 1, out_of_order INTEGER DEFAULT 0,
|
)
|
||||||
matched_patterns TEXT DEFAULT '[]', text TEXT NOT NULL
|
|
||||||
);
|
|
||||||
CREATE VIRTUAL TABLE IF NOT EXISTS log_fts USING fts5(
|
|
||||||
text, entry_id UNINDEXED, source_id UNINDEXED, sequence UNINDEXED,
|
|
||||||
severity UNINDEXED, timestamp_iso UNINDEXED, matched_patterns UNINDEXED,
|
|
||||||
repeat_count UNINDEXED, out_of_order UNINDEXED, tokenize='porter ascii'
|
|
||||||
);
|
|
||||||
CREATE TABLE context_facts (
|
|
||||||
id TEXT PRIMARY KEY, category TEXT NOT NULL, key TEXT NOT NULL,
|
|
||||||
value TEXT NOT NULL, source TEXT, created_at TEXT NOT NULL
|
|
||||||
);
|
|
||||||
CREATE TABLE context_documents (
|
|
||||||
id TEXT PRIMARY KEY, filename TEXT NOT NULL, doc_type TEXT NOT NULL,
|
|
||||||
full_text TEXT NOT NULL, file_size INTEGER, uploaded_at TEXT NOT NULL
|
|
||||||
);
|
|
||||||
CREATE TABLE context_chunks (
|
|
||||||
id TEXT PRIMARY KEY, document_id TEXT NOT NULL
|
|
||||||
REFERENCES context_documents(id) ON DELETE CASCADE,
|
|
||||||
chunk_index INTEGER NOT NULL, text TEXT NOT NULL, embedding BLOB
|
|
||||||
);
|
|
||||||
INSERT INTO context_facts VALUES (
|
|
||||||
'f1','service','plex','port:32400','wizard','2026-05-13T00:00:00+00:00'
|
|
||||||
);
|
|
||||||
""")
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
return db_path
|
return db_path
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
"""End-to-end upload pipeline: file bytes → DB rows."""
|
"""End-to-end upload pipeline: file bytes → DB rows."""
|
||||||
import sqlite3
|
|
||||||
import pytest
|
import pytest
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.db.schema import ensure_context_schema
|
||||||
from app.glean.doc_upload import glean_upload
|
from app.glean.doc_upload import glean_upload
|
||||||
from app.context.store import list_facts, list_documents
|
from app.context.store import list_facts, list_documents
|
||||||
from app.context.chunker import UnsupportedDocType
|
from app.context.chunker import UnsupportedDocType
|
||||||
|
|
@ -11,24 +11,7 @@ from app.context.chunker import UnsupportedDocType
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def db(tmp_path):
|
def db(tmp_path):
|
||||||
db_path = tmp_path / "t.db"
|
db_path = tmp_path / "t.db"
|
||||||
conn = sqlite3.connect(str(db_path))
|
ensure_context_schema(db_path)
|
||||||
conn.executescript("""
|
|
||||||
CREATE TABLE context_facts (
|
|
||||||
id TEXT PRIMARY KEY, category TEXT NOT NULL, key TEXT NOT NULL,
|
|
||||||
value TEXT NOT NULL, source TEXT, created_at TEXT NOT NULL
|
|
||||||
);
|
|
||||||
CREATE TABLE context_documents (
|
|
||||||
id TEXT PRIMARY KEY, filename TEXT NOT NULL, doc_type TEXT NOT NULL,
|
|
||||||
full_text TEXT NOT NULL, file_size INTEGER, uploaded_at TEXT NOT NULL
|
|
||||||
);
|
|
||||||
CREATE TABLE context_chunks (
|
|
||||||
id TEXT PRIMARY KEY, document_id TEXT NOT NULL
|
|
||||||
REFERENCES context_documents(id) ON DELETE CASCADE,
|
|
||||||
chunk_index INTEGER NOT NULL, text TEXT NOT NULL, embedding BLOB
|
|
||||||
);
|
|
||||||
""")
|
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
return db_path
|
return db_path
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,13 @@
|
||||||
"""Verify the three new context tables are created by ensure_schema."""
|
"""Verify the three context tables are created by ensure_context_schema."""
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import pytest
|
import pytest
|
||||||
from app.glean.pipeline import ensure_schema
|
from app.db.schema import ensure_context_schema
|
||||||
|
|
||||||
|
|
||||||
def test_context_tables_created(tmp_path):
|
def test_context_tables_created(tmp_path):
|
||||||
db = tmp_path / "t.db"
|
db = tmp_path / "t.db"
|
||||||
ensure_schema(db)
|
ensure_context_schema(db)
|
||||||
conn = sqlite3.connect(str(db))
|
conn = sqlite3.connect(str(db))
|
||||||
tables = {r[0] for r in conn.execute(
|
tables = {r[0] for r in conn.execute(
|
||||||
"SELECT name FROM sqlite_master WHERE type='table'"
|
"SELECT name FROM sqlite_master WHERE type='table'"
|
||||||
|
|
@ -20,5 +20,5 @@ def test_context_tables_created(tmp_path):
|
||||||
|
|
||||||
def test_context_schema_idempotent(tmp_path):
|
def test_context_schema_idempotent(tmp_path):
|
||||||
db = tmp_path / "t.db"
|
db = tmp_path / "t.db"
|
||||||
ensure_schema(db)
|
ensure_context_schema(db)
|
||||||
ensure_schema(db) # second call must not raise
|
ensure_context_schema(db) # second call must not raise
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import pytest
|
import pytest
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from app.db.schema import ensure_context_schema
|
||||||
from app.context.store import (
|
from app.context.store import (
|
||||||
add_fact, list_facts, delete_fact,
|
add_fact, list_facts, delete_fact,
|
||||||
add_document, list_documents, delete_document,
|
add_document, list_documents, delete_document,
|
||||||
|
|
@ -12,24 +13,7 @@ from app.context.store import (
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def db(tmp_path):
|
def db(tmp_path):
|
||||||
db_path = tmp_path / "t.db"
|
db_path = tmp_path / "t.db"
|
||||||
conn = sqlite3.connect(str(db_path))
|
ensure_context_schema(db_path)
|
||||||
conn.executescript("""
|
|
||||||
CREATE TABLE context_facts (
|
|
||||||
id TEXT PRIMARY KEY, category TEXT NOT NULL, key TEXT NOT NULL,
|
|
||||||
value TEXT NOT NULL, source TEXT, created_at TEXT NOT NULL
|
|
||||||
);
|
|
||||||
CREATE TABLE context_documents (
|
|
||||||
id TEXT PRIMARY KEY, filename TEXT NOT NULL, doc_type TEXT NOT NULL,
|
|
||||||
full_text TEXT NOT NULL, file_size INTEGER, uploaded_at TEXT NOT NULL
|
|
||||||
);
|
|
||||||
CREATE TABLE context_chunks (
|
|
||||||
id TEXT PRIMARY KEY, document_id TEXT NOT NULL
|
|
||||||
REFERENCES context_documents(id) ON DELETE CASCADE,
|
|
||||||
chunk_index INTEGER NOT NULL, text TEXT NOT NULL, embedding BLOB
|
|
||||||
);
|
|
||||||
""")
|
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
return db_path
|
return db_path
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,21 +2,14 @@
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import pytest
|
import pytest
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from app.db.schema import ensure_context_schema
|
||||||
from app.context.wizard import get_schema, advance_step, is_complete, apply_session, TOTAL_STEPS
|
from app.context.wizard import get_schema, advance_step, is_complete, apply_session, TOTAL_STEPS
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def db(tmp_path):
|
def db(tmp_path):
|
||||||
db_path = tmp_path / "t.db"
|
db_path = tmp_path / "t.db"
|
||||||
conn = sqlite3.connect(str(db_path))
|
ensure_context_schema(db_path)
|
||||||
conn.executescript("""
|
|
||||||
CREATE TABLE context_facts (
|
|
||||||
id TEXT PRIMARY KEY, category TEXT NOT NULL, key TEXT NOT NULL,
|
|
||||||
value TEXT NOT NULL, source TEXT, created_at TEXT NOT NULL
|
|
||||||
);
|
|
||||||
""")
|
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
return db_path
|
return db_path
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
220
tests/test_anomaly.py
Normal file
220
tests/test_anomaly.py
Normal file
|
|
@ -0,0 +1,220 @@
|
||||||
|
"""Tests for app/services/anomaly.py — anomaly scoring pipeline."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import app.services.anomaly as anomaly_mod
|
||||||
|
from app.db.schema import ensure_schema
|
||||||
|
from app.services.anomaly import (
|
||||||
|
ScoringResult,
|
||||||
|
acknowledge_detection,
|
||||||
|
list_detections,
|
||||||
|
reset_pipeline,
|
||||||
|
score_unscored,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Fixtures
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _reset_pipeline():
|
||||||
|
"""Ensure the ML singleton is cleared between tests."""
|
||||||
|
reset_pipeline()
|
||||||
|
yield
|
||||||
|
reset_pipeline()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def db(tmp_path: Path) -> Path:
|
||||||
|
db_path = tmp_path / "t.db"
|
||||||
|
ensure_schema(db_path)
|
||||||
|
return db_path
|
||||||
|
|
||||||
|
|
||||||
|
def _insert_entry(db_path: Path, text: str, entry_id: str | None = None) -> str:
|
||||||
|
eid = entry_id or str(uuid.uuid4())
|
||||||
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO log_entries(id, tenant_id, source_id, sequence, ingest_time, text) "
|
||||||
|
"VALUES (?,?,?,?,?,?)",
|
||||||
|
(eid, "", "src", 1, "2026-01-01T00:00:00", text),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
return eid
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# score_unscored
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_score_unscored_no_model_returns_skipped(db: Path):
|
||||||
|
result = score_unscored(db, model_id="")
|
||||||
|
assert result.skipped is True
|
||||||
|
assert result.scored == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_score_unscored_scores_entries(db: Path, monkeypatch):
|
||||||
|
_insert_entry(db, "kernel panic — OOM killer invoked")
|
||||||
|
_insert_entry(db, "user login successful")
|
||||||
|
|
||||||
|
mock_pipe = MagicMock(return_value=[
|
||||||
|
{"label": "SYSTEM_FAILURE", "score": 0.92},
|
||||||
|
{"label": "NORMAL", "score": 0.88},
|
||||||
|
])
|
||||||
|
monkeypatch.setattr(anomaly_mod, "_pipeline", mock_pipe)
|
||||||
|
|
||||||
|
result = score_unscored(db, model_id="fake-model", batch_size=10)
|
||||||
|
assert result.skipped is False
|
||||||
|
assert result.scored == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_score_unscored_creates_detection_above_threshold(db: Path, monkeypatch):
|
||||||
|
_insert_entry(db, "segfault in service")
|
||||||
|
|
||||||
|
mock_pipe = MagicMock(return_value=[
|
||||||
|
{"label": "SYSTEM_FAILURE", "score": 0.95},
|
||||||
|
])
|
||||||
|
monkeypatch.setattr(anomaly_mod, "_pipeline", mock_pipe)
|
||||||
|
|
||||||
|
result = score_unscored(db, model_id="fake-model", threshold=0.80)
|
||||||
|
assert result.detections == 1
|
||||||
|
|
||||||
|
detections = list_detections(db)
|
||||||
|
assert len(detections) == 1
|
||||||
|
assert detections[0]["anomaly_label"] == "SYSTEM_FAILURE"
|
||||||
|
assert detections[0]["anomaly_score"] == pytest.approx(0.95)
|
||||||
|
|
||||||
|
|
||||||
|
def test_score_unscored_no_detection_below_threshold(db: Path, monkeypatch):
|
||||||
|
_insert_entry(db, "warning: disk at 80%")
|
||||||
|
|
||||||
|
mock_pipe = MagicMock(return_value=[
|
||||||
|
{"label": "PERFORMANCE_ISSUE", "score": 0.60},
|
||||||
|
])
|
||||||
|
monkeypatch.setattr(anomaly_mod, "_pipeline", mock_pipe)
|
||||||
|
|
||||||
|
result = score_unscored(db, model_id="fake-model", threshold=0.80)
|
||||||
|
assert result.detections == 0
|
||||||
|
assert result.scored == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_score_unscored_normal_label_never_detection(db: Path, monkeypatch):
|
||||||
|
_insert_entry(db, "service started successfully")
|
||||||
|
|
||||||
|
mock_pipe = MagicMock(return_value=[
|
||||||
|
{"label": "NORMAL", "score": 0.99},
|
||||||
|
])
|
||||||
|
monkeypatch.setattr(anomaly_mod, "_pipeline", mock_pipe)
|
||||||
|
|
||||||
|
result = score_unscored(db, model_id="fake-model", threshold=0.50)
|
||||||
|
assert result.detections == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_score_unscored_idempotent(db: Path, monkeypatch):
|
||||||
|
"""Entries already scored are not re-scored on subsequent runs."""
|
||||||
|
_insert_entry(db, "first entry")
|
||||||
|
|
||||||
|
call_count = 0
|
||||||
|
|
||||||
|
def _side_effect(texts, **_kwargs):
|
||||||
|
nonlocal call_count
|
||||||
|
call_count += 1
|
||||||
|
return [{"label": "NORMAL", "score": 0.90} for _ in texts]
|
||||||
|
|
||||||
|
mock_pipe = MagicMock(side_effect=_side_effect)
|
||||||
|
monkeypatch.setattr(anomaly_mod, "_pipeline", mock_pipe)
|
||||||
|
|
||||||
|
score_unscored(db, model_id="fake-model")
|
||||||
|
score_unscored(db, model_id="fake-model")
|
||||||
|
|
||||||
|
assert call_count == 1 # second run finds no unscored rows
|
||||||
|
|
||||||
|
|
||||||
|
def test_score_unscored_pipeline_error_returns_error(db: Path, monkeypatch):
|
||||||
|
_insert_entry(db, "some log line")
|
||||||
|
|
||||||
|
mock_pipe = MagicMock(side_effect=RuntimeError("CUDA OOM"))
|
||||||
|
monkeypatch.setattr(anomaly_mod, "_pipeline", mock_pipe)
|
||||||
|
|
||||||
|
result = score_unscored(db, model_id="fake-model")
|
||||||
|
assert result.error is not None
|
||||||
|
assert "CUDA OOM" in result.error
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# list_detections / acknowledge_detection
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_list_detections_empty(db: Path):
|
||||||
|
assert list_detections(db) == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_list_detections_filters_unacked(db: Path, monkeypatch):
|
||||||
|
_insert_entry(db, "crash")
|
||||||
|
|
||||||
|
mock_pipe = MagicMock(return_value=[{"label": "SYSTEM_FAILURE", "score": 0.91}])
|
||||||
|
monkeypatch.setattr(anomaly_mod, "_pipeline", mock_pipe)
|
||||||
|
score_unscored(db, model_id="fake-model", threshold=0.80)
|
||||||
|
|
||||||
|
all_dets = list_detections(db)
|
||||||
|
assert len(all_dets) == 1
|
||||||
|
unacked = list_detections(db, unacked_only=True)
|
||||||
|
assert len(unacked) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_acknowledge_detection(db: Path, monkeypatch):
|
||||||
|
_insert_entry(db, "network anomaly")
|
||||||
|
|
||||||
|
mock_pipe = MagicMock(return_value=[{"label": "NETWORK_ANOMALY", "score": 0.88}])
|
||||||
|
monkeypatch.setattr(anomaly_mod, "_pipeline", mock_pipe)
|
||||||
|
score_unscored(db, model_id="fake-model", threshold=0.80)
|
||||||
|
|
||||||
|
dets = list_detections(db)
|
||||||
|
assert len(dets) == 1
|
||||||
|
det_id = dets[0]["id"]
|
||||||
|
|
||||||
|
updated = acknowledge_detection(db, det_id, notes="benign test traffic")
|
||||||
|
assert updated is True
|
||||||
|
|
||||||
|
unacked = list_detections(db, unacked_only=True)
|
||||||
|
assert len(unacked) == 0
|
||||||
|
|
||||||
|
all_dets = list_detections(db)
|
||||||
|
assert all_dets[0]["acknowledged"] == 1
|
||||||
|
assert all_dets[0]["notes"] == "benign test traffic"
|
||||||
|
|
||||||
|
|
||||||
|
def test_acknowledge_detection_unknown_id(db: Path):
|
||||||
|
updated = acknowledge_detection(db, "nonexistent-id")
|
||||||
|
assert updated is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_list_detections_label_filter(db: Path, monkeypatch):
|
||||||
|
_insert_entry(db, "OOM kill")
|
||||||
|
_insert_entry(db, "network timeout")
|
||||||
|
|
||||||
|
mock_pipe = MagicMock(side_effect=[
|
||||||
|
[{"label": "SYSTEM_FAILURE", "score": 0.93}],
|
||||||
|
[{"label": "NETWORK_ANOMALY", "score": 0.85}],
|
||||||
|
])
|
||||||
|
monkeypatch.setattr(anomaly_mod, "_pipeline", mock_pipe)
|
||||||
|
|
||||||
|
score_unscored(db, model_id="fake-model", batch_size=1, threshold=0.80)
|
||||||
|
score_unscored(db, model_id="fake-model", batch_size=1, threshold=0.80)
|
||||||
|
|
||||||
|
sys_dets = list_detections(db, label="SYSTEM_FAILURE")
|
||||||
|
assert all(d["anomaly_label"] == "SYSTEM_FAILURE" for d in sys_dets)
|
||||||
|
|
||||||
|
net_dets = list_detections(db, label="NETWORK_ANOMALY")
|
||||||
|
assert all(d["anomaly_label"] == "NETWORK_ANOMALY" for d in net_dets)
|
||||||
233
tests/test_cybersec.py
Normal file
233
tests/test_cybersec.py
Normal file
|
|
@ -0,0 +1,233 @@
|
||||||
|
"""Tests for the cybersec zero-shot scoring pipeline."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from app.db.schema import ensure_schema
|
||||||
|
from app.services.cybersec import (
|
||||||
|
CybersecResult,
|
||||||
|
CYBERSEC_LABELS,
|
||||||
|
_NORMAL_LABEL,
|
||||||
|
reset_pipeline,
|
||||||
|
score_security_entries,
|
||||||
|
list_cybersec_detections,
|
||||||
|
)
|
||||||
|
import app.services.cybersec as cybersec_mod
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _reset(tmp_path):
|
||||||
|
reset_pipeline()
|
||||||
|
yield
|
||||||
|
reset_pipeline()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def db(tmp_path) -> Path:
|
||||||
|
path = tmp_path / "test.db"
|
||||||
|
ensure_schema(path)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def _insert_entry(db: Path, entry_id: str, text: str,
|
||||||
|
anomaly_label: str | None = None,
|
||||||
|
matched_patterns: str = "[]") -> None:
|
||||||
|
with sqlite3.connect(db) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT OR IGNORE INTO log_entries
|
||||||
|
(id, tenant_id, source_id, sequence, ingest_time, text,
|
||||||
|
anomaly_label, matched_patterns)
|
||||||
|
VALUES (?, '', 'test-src', 1, '2026-01-01T00:00:00Z', ?, ?, ?)""",
|
||||||
|
(entry_id, text, anomaly_label, matched_patterns),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# No model configured → skipped
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_no_model_returns_skipped(db):
|
||||||
|
result = score_security_entries(db, model_id="")
|
||||||
|
assert result.skipped is True
|
||||||
|
assert result.scored == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# No eligible entries → skipped
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_no_eligible_entries_skipped(db):
|
||||||
|
_insert_entry(db, "e1", "Started nginx.service", anomaly_label=None, matched_patterns="[]")
|
||||||
|
mock_pipe = MagicMock(return_value=[{"labels": [_NORMAL_LABEL], "scores": [0.99]}])
|
||||||
|
monkeypatch = pytest.MonkeyPatch()
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", mock_pipe)
|
||||||
|
result = score_security_entries(db, model_id="fake-model")
|
||||||
|
assert result.skipped is True
|
||||||
|
monkeypatch.undo()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Security entry gets scored
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_security_entry_scored(db, monkeypatch):
|
||||||
|
_insert_entry(db, "e1",
|
||||||
|
"Failed password for root from 192.168.1.1 port 22 ssh2",
|
||||||
|
anomaly_label="SECURITY_ANOMALY")
|
||||||
|
|
||||||
|
mock_pipe = MagicMock(return_value=[{
|
||||||
|
"labels": ["authentication failure or brute force attack", _NORMAL_LABEL],
|
||||||
|
"scores": [0.85, 0.15],
|
||||||
|
}])
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", mock_pipe)
|
||||||
|
|
||||||
|
result = score_security_entries(db, model_id="fake-model", threshold=0.70)
|
||||||
|
assert result.scored == 1
|
||||||
|
assert result.detections == 1
|
||||||
|
assert result.error is None
|
||||||
|
|
||||||
|
with sqlite3.connect(db) as conn:
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
row = conn.execute("SELECT ml_score, ml_label, ml_scored_at FROM log_entries WHERE id='e1'").fetchone()
|
||||||
|
assert row["ml_score"] == pytest.approx(0.85)
|
||||||
|
assert row["ml_label"] == "authentication failure or brute force attack"
|
||||||
|
assert row["ml_scored_at"] is not None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Detection created above threshold
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_detection_inserted_above_threshold(db, monkeypatch):
|
||||||
|
_insert_entry(db, "e1", "sudo: authentication failure", anomaly_label="ERROR")
|
||||||
|
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
|
||||||
|
"labels": ["privilege escalation or unauthorized access", _NORMAL_LABEL],
|
||||||
|
"scores": [0.75, 0.25],
|
||||||
|
}]))
|
||||||
|
|
||||||
|
score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
|
||||||
|
with sqlite3.connect(db) as conn:
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
dets = conn.execute("SELECT * FROM detections WHERE scorer='cybersec'").fetchall()
|
||||||
|
assert len(dets) == 1
|
||||||
|
assert dets[0]["anomaly_label"] == "privilege escalation or unauthorized access"
|
||||||
|
assert dets[0]["severity"] == "CRITICAL"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Normal label → no detection even above score threshold
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_normal_label_no_detection(db, monkeypatch):
|
||||||
|
_insert_entry(db, "e1", "Started nginx.service", anomaly_label="INFO",
|
||||||
|
matched_patterns='["service_start"]')
|
||||||
|
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
|
||||||
|
"labels": [_NORMAL_LABEL, "network intrusion or port scan"],
|
||||||
|
"scores": [0.95, 0.05],
|
||||||
|
}]))
|
||||||
|
|
||||||
|
result = score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
assert result.detections == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Below threshold → scored but no detection
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_below_threshold_no_detection(db, monkeypatch):
|
||||||
|
_insert_entry(db, "e1", "Some suspicious text", anomaly_label="WARN")
|
||||||
|
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
|
||||||
|
"labels": ["network intrusion or port scan", _NORMAL_LABEL],
|
||||||
|
"scores": [0.45, 0.55],
|
||||||
|
}]))
|
||||||
|
|
||||||
|
result = score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
assert result.scored == 1
|
||||||
|
assert result.detections == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Pattern-matched entry (not anomaly-flagged) still gets scored
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_pattern_matched_entry_scored(db, monkeypatch):
|
||||||
|
_insert_entry(db, "e1", "SSH port forwarding conflict detected",
|
||||||
|
anomaly_label=None,
|
||||||
|
matched_patterns='["ssh_forward_conflict"]')
|
||||||
|
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
|
||||||
|
"labels": ["network intrusion or port scan", _NORMAL_LABEL],
|
||||||
|
"scores": [0.70, 0.30],
|
||||||
|
}]))
|
||||||
|
|
||||||
|
result = score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
assert result.scored == 1
|
||||||
|
assert result.detections == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Idempotency — re-run finds nothing unscored
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_idempotent_rerun(db, monkeypatch):
|
||||||
|
_insert_entry(db, "e1", "Failed login", anomaly_label="ERROR")
|
||||||
|
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
|
||||||
|
"labels": ["authentication failure or brute force attack"],
|
||||||
|
"scores": [0.80],
|
||||||
|
}]))
|
||||||
|
|
||||||
|
score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
result2 = score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
assert result2.skipped is True
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# list_cybersec_detections filters to scorer='cybersec'
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_list_cybersec_detections(db, monkeypatch):
|
||||||
|
_insert_entry(db, "e1", "Failed login", anomaly_label="ERROR")
|
||||||
|
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
|
||||||
|
"labels": ["authentication failure or brute force attack"],
|
||||||
|
"scores": [0.90],
|
||||||
|
}]))
|
||||||
|
score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
|
||||||
|
rows = list_cybersec_detections(db)
|
||||||
|
assert len(rows) == 1
|
||||||
|
assert rows[0]["scorer"] == "cybersec"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# list_detections scorer filter (anomaly service)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_list_detections_scorer_filter(db, monkeypatch):
|
||||||
|
from app.services.anomaly import list_detections
|
||||||
|
_insert_entry(db, "e1", "Failed login", anomaly_label="ERROR")
|
||||||
|
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
|
||||||
|
"labels": ["authentication failure or brute force attack"],
|
||||||
|
"scores": [0.90],
|
||||||
|
}]))
|
||||||
|
score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
|
||||||
|
all_dets = list_detections(db)
|
||||||
|
cybersec_dets = list_detections(db, scorer="cybersec")
|
||||||
|
anomaly_dets = list_detections(db, scorer="anomaly")
|
||||||
|
|
||||||
|
assert len(cybersec_dets) == 1
|
||||||
|
assert len(anomaly_dets) == 0
|
||||||
|
assert len(all_dets) >= 1
|
||||||
|
|
@ -7,8 +7,8 @@ from __future__ import annotations
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
from app.context.retriever import RetrievedContext
|
from app.context.retriever import RetrievedContext
|
||||||
from app.services.diagnose.models import Hypothesis, RankedHypothesis, TimelineResult
|
from app.services.diagnose.models import EventCluster, Hypothesis, RankedHypothesis, TimelineResult
|
||||||
from app.services.diagnose.synthesizer import SummarySynthesizer
|
from app.services.diagnose.synthesizer import SummarySynthesizer, _build_timeline_block
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -50,12 +50,38 @@ def _make_ranked(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_cluster(
|
||||||
|
cluster_id: str = "c1",
|
||||||
|
start_iso: str | None = "2026-01-01T00:05:00+00:00",
|
||||||
|
severity: str = "ERROR",
|
||||||
|
source_ids: tuple[str, ...] = ("syslog",),
|
||||||
|
pattern_tags: tuple[str, ...] = ("ssh_auth_failure",),
|
||||||
|
burst: bool = False,
|
||||||
|
gap_before_seconds: float = 0.0,
|
||||||
|
representative_text: str = "Failed password for root from 1.2.3.4 port 22",
|
||||||
|
) -> EventCluster:
|
||||||
|
return EventCluster(
|
||||||
|
cluster_id=cluster_id,
|
||||||
|
entries=("e1",),
|
||||||
|
start_iso=start_iso,
|
||||||
|
end_iso=None,
|
||||||
|
duration_seconds=30.0,
|
||||||
|
source_ids=source_ids,
|
||||||
|
pattern_tags=pattern_tags,
|
||||||
|
severity=severity, # type: ignore[arg-type]
|
||||||
|
burst=burst,
|
||||||
|
gap_before_seconds=gap_before_seconds,
|
||||||
|
representative_text=representative_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _make_timeline(
|
def _make_timeline(
|
||||||
total_entries: int = 42,
|
total_entries: int = 42,
|
||||||
n_clusters: int = 3,
|
n_clusters: int = 3,
|
||||||
|
clusters: tuple[EventCluster, ...] | None = None,
|
||||||
) -> TimelineResult:
|
) -> TimelineResult:
|
||||||
return TimelineResult(
|
return TimelineResult(
|
||||||
clusters=tuple(),
|
clusters=clusters if clusters is not None else tuple(),
|
||||||
total_entries=total_entries,
|
total_entries=total_entries,
|
||||||
window_start="2026-01-01T00:00:00+00:00",
|
window_start="2026-01-01T00:00:00+00:00",
|
||||||
window_end="2026-01-01T01:00:00+00:00",
|
window_end="2026-01-01T01:00:00+00:00",
|
||||||
|
|
@ -283,3 +309,88 @@ class TestSynthesizerEmptyRanked:
|
||||||
|
|
||||||
assert isinstance(result, str)
|
assert isinstance(result, str)
|
||||||
assert len(result) > 0
|
assert len(result) > 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildTimelineBlock:
|
||||||
|
"""Unit tests for _build_timeline_block helper."""
|
||||||
|
|
||||||
|
def test_empty_clusters_returns_placeholder(self):
|
||||||
|
timeline = _make_timeline(clusters=tuple())
|
||||||
|
assert _build_timeline_block(timeline) == "(no clusters)"
|
||||||
|
|
||||||
|
def test_single_cluster_basic_fields(self):
|
||||||
|
cluster = _make_cluster(
|
||||||
|
start_iso="2026-01-01T00:05:00+00:00",
|
||||||
|
severity="ERROR",
|
||||||
|
source_ids=("syslog",),
|
||||||
|
representative_text="Failed password for root",
|
||||||
|
)
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "Cluster 1" in block
|
||||||
|
assert "2026-01-01T00:05:00+00:00" in block
|
||||||
|
assert "[ERROR]" in block
|
||||||
|
assert "syslog" in block
|
||||||
|
assert "Failed password for root" in block
|
||||||
|
|
||||||
|
def test_burst_label_applied(self):
|
||||||
|
cluster = _make_cluster(burst=True)
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "[BURST]" in block
|
||||||
|
|
||||||
|
def test_no_burst_label_when_not_burst(self):
|
||||||
|
cluster = _make_cluster(burst=False)
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "[BURST]" not in block
|
||||||
|
|
||||||
|
def test_gap_label_applied_when_over_threshold(self):
|
||||||
|
cluster = _make_cluster(gap_before_seconds=120.0)
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "silence" in block
|
||||||
|
assert "120s" in block
|
||||||
|
|
||||||
|
def test_gap_label_omitted_when_under_threshold(self):
|
||||||
|
cluster = _make_cluster(gap_before_seconds=10.0)
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "silence" not in block
|
||||||
|
|
||||||
|
def test_pattern_tags_included(self):
|
||||||
|
cluster = _make_cluster(pattern_tags=("ssh_auth_failure", "brute_force"))
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "ssh_auth_failure" in block
|
||||||
|
assert "brute_force" in block
|
||||||
|
|
||||||
|
def test_no_patterns_section_when_empty(self):
|
||||||
|
cluster = _make_cluster(pattern_tags=tuple())
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "[patterns:" not in block
|
||||||
|
|
||||||
|
def test_multiple_clusters_numbered(self):
|
||||||
|
c1 = _make_cluster(cluster_id="c1", representative_text="first event")
|
||||||
|
c2 = _make_cluster(cluster_id="c2", representative_text="second event")
|
||||||
|
timeline = _make_timeline(clusters=(c1, c2))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "Cluster 1" in block
|
||||||
|
assert "Cluster 2" in block
|
||||||
|
assert "first event" in block
|
||||||
|
assert "second event" in block
|
||||||
|
|
||||||
|
def test_representative_text_truncated_at_200_chars(self):
|
||||||
|
long_text = "x" * 300
|
||||||
|
cluster = _make_cluster(representative_text=long_text)
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "x" * 200 in block
|
||||||
|
assert "x" * 201 not in block
|
||||||
|
|
||||||
|
def test_null_start_iso_renders_as_unknown(self):
|
||||||
|
cluster = _make_cluster(start_iso=None)
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "unknown" in block
|
||||||
|
|
|
||||||
133
tests/test_discover_scan.py
Normal file
133
tests/test_discover_scan.py
Normal file
|
|
@ -0,0 +1,133 @@
|
||||||
|
"""Tests for scan_log_directories in app.services.discover."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from app.services.discover import scan_log_directories, _path_to_source_id
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _make_log(tmp_path: Path, name: str, content: str = "hello\n", age_days: float = 0) -> Path:
|
||||||
|
p = tmp_path / name
|
||||||
|
p.write_text(content)
|
||||||
|
mtime = time.time() - age_days * 86400
|
||||||
|
os.utime(p, (mtime, mtime))
|
||||||
|
return p
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# _path_to_source_id
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_path_to_source_id_basic():
|
||||||
|
result = _path_to_source_id(Path("/var/log/nginx/access.log"))
|
||||||
|
assert result.startswith("var-log-nginx-access")
|
||||||
|
assert "/" not in result
|
||||||
|
assert " " not in result
|
||||||
|
|
||||||
|
|
||||||
|
def test_path_to_source_id_max_length():
|
||||||
|
long_path = Path("/" + "a" * 200 + ".log")
|
||||||
|
assert len(_path_to_source_id(long_path)) <= 64
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# scan_log_directories
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_scan_finds_log_files(tmp_path):
|
||||||
|
_make_log(tmp_path, "app.log", "error: something\n")
|
||||||
|
_make_log(tmp_path, "system.log", "kernel: ok\n")
|
||||||
|
results = scan_log_directories(dirs=[str(tmp_path)])
|
||||||
|
paths = [r["path"] for r in results]
|
||||||
|
assert str(tmp_path / "app.log") in paths
|
||||||
|
assert str(tmp_path / "system.log") in paths
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_ignores_empty_files(tmp_path):
|
||||||
|
_make_log(tmp_path, "empty.log", "")
|
||||||
|
results = scan_log_directories(dirs=[str(tmp_path)])
|
||||||
|
assert not any(r["label"] == "empty.log" for r in results)
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_ignores_non_log_extensions(tmp_path):
|
||||||
|
(tmp_path / "config.yaml").write_text("key: value\n")
|
||||||
|
(tmp_path / "data.json").write_text('{"a":1}\n')
|
||||||
|
results = scan_log_directories(dirs=[str(tmp_path)])
|
||||||
|
names = [r["label"] for r in results]
|
||||||
|
assert "config.yaml" not in names
|
||||||
|
assert "data.json" not in names
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_ignores_compressed(tmp_path):
|
||||||
|
_make_log(tmp_path, "old.log.gz", "compressed content")
|
||||||
|
results = scan_log_directories(dirs=[str(tmp_path)])
|
||||||
|
assert not any(r["label"].endswith(".gz") for r in results)
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_respects_max_results(tmp_path):
|
||||||
|
for i in range(20):
|
||||||
|
_make_log(tmp_path, f"app{i}.log", f"log line {i}\n")
|
||||||
|
results = scan_log_directories(dirs=[str(tmp_path)], max_results=5)
|
||||||
|
assert len(results) <= 5
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_recent_files_score_higher(tmp_path):
|
||||||
|
recent = _make_log(tmp_path, "recent.log", "new stuff\n", age_days=0)
|
||||||
|
old = _make_log(tmp_path, "old.log", "old stuff\n", age_days=60)
|
||||||
|
results = scan_log_directories(dirs=[str(tmp_path)])
|
||||||
|
scores = {r["path"]: r["score"] for r in results}
|
||||||
|
assert scores[str(recent)] > scores[str(old)]
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_keyword_match_boosts_score(tmp_path):
|
||||||
|
nginx_log = _make_log(tmp_path, "nginx.log", "GET / 200\n", age_days=5)
|
||||||
|
other_log = _make_log(tmp_path, "kernel.log", "boot ok\n", age_days=5)
|
||||||
|
results = scan_log_directories(query="nginx 502 error", dirs=[str(tmp_path)])
|
||||||
|
scores = {r["path"]: r["score"] for r in results}
|
||||||
|
assert scores[str(nginx_log)] > scores[str(other_log)]
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_returns_required_fields(tmp_path):
|
||||||
|
_make_log(tmp_path, "test.log", "data\n")
|
||||||
|
results = scan_log_directories(dirs=[str(tmp_path)])
|
||||||
|
assert results
|
||||||
|
r = results[0]
|
||||||
|
assert r["type"] == "file"
|
||||||
|
assert "id" in r
|
||||||
|
assert "path" in r
|
||||||
|
assert "label" in r
|
||||||
|
assert "size_bytes" in r
|
||||||
|
assert "mtime" in r
|
||||||
|
assert "score" in r
|
||||||
|
assert r["available"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_missing_dir_is_graceful():
|
||||||
|
results = scan_log_directories(dirs=["/nonexistent/path/xyz"])
|
||||||
|
assert results == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_subdirectory_recursive(tmp_path):
|
||||||
|
subdir = tmp_path / "subapp"
|
||||||
|
subdir.mkdir()
|
||||||
|
_make_log(subdir, "subapp.log", "nested log\n")
|
||||||
|
results = scan_log_directories(dirs=[str(tmp_path)])
|
||||||
|
paths = [r["path"] for r in results]
|
||||||
|
assert str(subdir / "subapp.log") in paths
|
||||||
|
|
||||||
|
|
||||||
|
def test_scan_no_query_weights_recency_heavily(tmp_path):
|
||||||
|
"""Without a query, recency (0.7) dominates over size (0.3)."""
|
||||||
|
fresh = _make_log(tmp_path, "fresh.log", "x" * 100, age_days=0)
|
||||||
|
stale = _make_log(tmp_path, "stale.log", "x" * 10000, age_days=20)
|
||||||
|
results = scan_log_directories(query=None, dirs=[str(tmp_path)])
|
||||||
|
scores = {r["path"]: r["score"] for r in results}
|
||||||
|
assert scores[str(fresh)] > scores[str(stale)]
|
||||||
197
tests/test_gen_corpus.py
Normal file
197
tests/test_gen_corpus.py
Normal file
|
|
@ -0,0 +1,197 @@
|
||||||
|
"""Tests for scripts/gen_corpus.py synthetic log generator."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from scripts.gen_corpus import generate, main
|
||||||
|
|
||||||
|
# Fixed reference time keeps timestamps deterministic across test runs
|
||||||
|
_REF_TIME = datetime(2026, 6, 10, 12, 0, 0, tzinfo=timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Helpers ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _run(tmp_path: Path, days: int = 1, seed: int = 42, error_rate: float = 0.05) -> dict[str, int]:
|
||||||
|
return generate(tmp_path, days=days, seed=seed, error_rate=error_rate, reference_time=_REF_TIME)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Output structure ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestOutputStructure:
|
||||||
|
def test_creates_all_four_files(self, tmp_path: Path) -> None:
|
||||||
|
_run(tmp_path)
|
||||||
|
assert (tmp_path / "journald" / "system.jsonl").exists()
|
||||||
|
assert (tmp_path / "docker" / "services.jsonl").exists()
|
||||||
|
assert (tmp_path / "qbittorrent" / "qbt.log").exists()
|
||||||
|
assert (tmp_path / "ext_device" / "device.log").exists()
|
||||||
|
|
||||||
|
def test_returns_line_counts(self, tmp_path: Path) -> None:
|
||||||
|
totals = _run(tmp_path)
|
||||||
|
assert len(totals) == 4
|
||||||
|
assert all(v > 0 for v in totals.values())
|
||||||
|
|
||||||
|
|
||||||
|
# ── Reproducibility ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestReproducibility:
|
||||||
|
def test_same_seed_same_output(self, tmp_path: Path) -> None:
|
||||||
|
out_a = tmp_path / "a"
|
||||||
|
out_b = tmp_path / "b"
|
||||||
|
_run(out_a, seed=99)
|
||||||
|
_run(out_b, seed=99)
|
||||||
|
for sub in ["journald/system.jsonl", "docker/services.jsonl"]:
|
||||||
|
assert (out_a / sub).read_text() == (out_b / sub).read_text()
|
||||||
|
|
||||||
|
def test_different_seeds_differ(self, tmp_path: Path) -> None:
|
||||||
|
out_a = tmp_path / "a"
|
||||||
|
out_b = tmp_path / "b"
|
||||||
|
_run(out_a, seed=1)
|
||||||
|
_run(out_b, seed=2)
|
||||||
|
assert (out_a / "journald/system.jsonl").read_text() != (out_b / "journald/system.jsonl").read_text()
|
||||||
|
|
||||||
|
|
||||||
|
# ── Journald format ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestJournaldFormat:
|
||||||
|
def test_valid_json_lines(self, tmp_path: Path) -> None:
|
||||||
|
_run(tmp_path)
|
||||||
|
lines = (tmp_path / "journald/system.jsonl").read_text().splitlines()
|
||||||
|
for line in lines[:100]:
|
||||||
|
obj = json.loads(line)
|
||||||
|
assert "__REALTIME_TIMESTAMP" in obj
|
||||||
|
assert "MESSAGE" in obj
|
||||||
|
assert "PRIORITY" in obj
|
||||||
|
|
||||||
|
def test_timestamp_is_microseconds(self, tmp_path: Path) -> None:
|
||||||
|
_run(tmp_path)
|
||||||
|
lines = (tmp_path / "journald/system.jsonl").read_text().splitlines()
|
||||||
|
ts = int(json.loads(lines[0])["__REALTIME_TIMESTAMP"])
|
||||||
|
# microseconds since epoch — should be > year 2020
|
||||||
|
assert ts > 1_577_836_800_000_000
|
||||||
|
|
||||||
|
def test_parseable_by_journald_glean(self, tmp_path: Path) -> None:
|
||||||
|
from app.glean.journald import parse
|
||||||
|
_run(tmp_path)
|
||||||
|
with (tmp_path / "journald/system.jsonl").open() as fh:
|
||||||
|
entries = list(parse(fh, "test", []))
|
||||||
|
assert len(entries) > 0
|
||||||
|
severities = {e.severity for e in entries if e.severity}
|
||||||
|
assert severities <= {"INFO", "DEBUG", "WARN", "ERROR", "CRITICAL"}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Docker format ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestDockerFormat:
|
||||||
|
def test_valid_json_lines(self, tmp_path: Path) -> None:
|
||||||
|
_run(tmp_path)
|
||||||
|
lines = (tmp_path / "docker/services.jsonl").read_text().splitlines()
|
||||||
|
for line in lines[:100]:
|
||||||
|
obj = json.loads(line)
|
||||||
|
assert "SOURCE" in obj
|
||||||
|
assert "MESSAGE" in obj
|
||||||
|
|
||||||
|
def test_parseable_by_docker_glean(self, tmp_path: Path) -> None:
|
||||||
|
from app.glean.docker_log import parse
|
||||||
|
_run(tmp_path)
|
||||||
|
with (tmp_path / "docker/services.jsonl").open() as fh:
|
||||||
|
entries = list(parse(fh, "test", []))
|
||||||
|
assert len(entries) > 0
|
||||||
|
# Severity should be detected in most entries (messages embed level= / LEVEL:)
|
||||||
|
detected = [e for e in entries if e.severity is not None]
|
||||||
|
assert len(detected) / len(entries) > 0.8
|
||||||
|
|
||||||
|
|
||||||
|
# ── qBittorrent format ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestQbittorrentFormat:
|
||||||
|
def test_hotio_format_lines(self, tmp_path: Path) -> None:
|
||||||
|
_run(tmp_path)
|
||||||
|
lines = (tmp_path / "qbittorrent/qbt.log").read_text().splitlines()
|
||||||
|
import re
|
||||||
|
pattern = re.compile(r"^\([NIWC]\) \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2} - .+$")
|
||||||
|
assert all(pattern.match(line) for line in lines[:50])
|
||||||
|
|
||||||
|
def test_parseable_by_qbt_glean(self, tmp_path: Path) -> None:
|
||||||
|
from app.glean.qbittorrent import parse
|
||||||
|
_run(tmp_path)
|
||||||
|
with (tmp_path / "qbittorrent/qbt.log").open() as fh:
|
||||||
|
entries = list(parse(fh, "test", []))
|
||||||
|
assert len(entries) > 0
|
||||||
|
severities = {e.severity for e in entries if e.severity}
|
||||||
|
assert severities <= {"INFO", "WARN", "CRITICAL"}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Vendor device format ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestAvcxFormat:
|
||||||
|
def test_iso_timestamp_prefix(self, tmp_path: Path) -> None:
|
||||||
|
_run(tmp_path)
|
||||||
|
lines = (tmp_path / "ext_device/device.log").read_text().splitlines()
|
||||||
|
import re
|
||||||
|
pattern = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2} \[.+\] .+$")
|
||||||
|
assert all(pattern.match(line) for line in lines[:50])
|
||||||
|
|
||||||
|
def test_parseable_by_plaintext_glean(self, tmp_path: Path) -> None:
|
||||||
|
from app.glean.plaintext import parse
|
||||||
|
_run(tmp_path)
|
||||||
|
with (tmp_path / "ext_device/device.log").open() as fh:
|
||||||
|
entries = list(parse(fh, "test", []))
|
||||||
|
assert len(entries) > 0
|
||||||
|
# ISO timestamps should parse cleanly
|
||||||
|
timestamped = [e for e in entries if e.timestamp_iso]
|
||||||
|
assert len(timestamped) / len(entries) > 0.95
|
||||||
|
|
||||||
|
|
||||||
|
# ── Error rate ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestErrorRate:
|
||||||
|
def test_high_error_rate_increases_errors(self, tmp_path: Path) -> None:
|
||||||
|
from app.glean.journald import parse
|
||||||
|
|
||||||
|
low = tmp_path / "low"
|
||||||
|
high = tmp_path / "high"
|
||||||
|
_run(low, seed=7, error_rate=0.01)
|
||||||
|
_run(high, seed=7, error_rate=0.50)
|
||||||
|
|
||||||
|
def error_ratio(path: Path) -> float:
|
||||||
|
with path.open() as fh:
|
||||||
|
entries = list(parse(fh, "test", []))
|
||||||
|
errs = sum(1 for e in entries if e.severity in ("ERROR", "CRITICAL"))
|
||||||
|
return errs / len(entries) if entries else 0.0
|
||||||
|
|
||||||
|
assert error_ratio(high / "journald/system.jsonl") > error_ratio(low / "journald/system.jsonl")
|
||||||
|
|
||||||
|
def test_invalid_error_rate_returns_nonzero(self, tmp_path: Path) -> None:
|
||||||
|
rc = main(["--days", "1", "--out", str(tmp_path), "--error-rate", "1.5"])
|
||||||
|
assert rc != 0
|
||||||
|
|
||||||
|
|
||||||
|
# ── CLI ────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestCLI:
|
||||||
|
def test_acceptance_criteria(self, tmp_path: Path) -> None:
|
||||||
|
"""Acceptance: --days 7 --out <dir> produces a gleanable corpus with varied severities."""
|
||||||
|
from app.glean.journald import parse
|
||||||
|
|
||||||
|
rc = main(["--days", "7", "--out", str(tmp_path)])
|
||||||
|
assert rc == 0
|
||||||
|
|
||||||
|
with (tmp_path / "journald/system.jsonl").open() as fh:
|
||||||
|
entries = list(parse(fh, "test", []))
|
||||||
|
|
||||||
|
severities = {e.severity for e in entries if e.severity}
|
||||||
|
assert {"INFO", "WARN", "ERROR", "CRITICAL"}.issubset(severities)
|
||||||
|
assert len(entries) > 100_000 # 7 days of ~86k/day
|
||||||
|
|
||||||
|
def test_missing_out_fails(self, tmp_path: Path, capsys: pytest.CaptureFixture) -> None:
|
||||||
|
with pytest.raises(SystemExit) as exc_info:
|
||||||
|
main(["--days", "1"])
|
||||||
|
assert exc_info.value.code != 0
|
||||||
|
|
@ -51,12 +51,14 @@ class TestFingerprintHelpers:
|
||||||
|
|
||||||
def test_fp_unchanged_returns_false_when_no_record(self, db_path: Path, log_file: Path) -> None:
|
def test_fp_unchanged_returns_false_when_no_record(self, db_path: Path, log_file: Path) -> None:
|
||||||
conn = sqlite3.connect(str(db_path))
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
mtime, size = _fingerprint(log_file)
|
mtime, size = _fingerprint(log_file)
|
||||||
assert _fp_unchanged(conn, log_file, mtime, size) is False
|
assert _fp_unchanged(conn, log_file, mtime, size) is False
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
def test_fp_unchanged_returns_true_after_save(self, db_path: Path, log_file: Path) -> None:
|
def test_fp_unchanged_returns_true_after_save(self, db_path: Path, log_file: Path) -> None:
|
||||||
conn = sqlite3.connect(str(db_path))
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
mtime, size = _fingerprint(log_file)
|
mtime, size = _fingerprint(log_file)
|
||||||
_save_fingerprint(conn, log_file, mtime, size, now_iso())
|
_save_fingerprint(conn, log_file, mtime, size, now_iso())
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
@ -65,6 +67,7 @@ class TestFingerprintHelpers:
|
||||||
|
|
||||||
def test_fp_unchanged_returns_false_on_size_change(self, db_path: Path, log_file: Path) -> None:
|
def test_fp_unchanged_returns_false_on_size_change(self, db_path: Path, log_file: Path) -> None:
|
||||||
conn = sqlite3.connect(str(db_path))
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
mtime, size = _fingerprint(log_file)
|
mtime, size = _fingerprint(log_file)
|
||||||
_save_fingerprint(conn, log_file, mtime, size, now_iso())
|
_save_fingerprint(conn, log_file, mtime, size, now_iso())
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
@ -74,6 +77,7 @@ class TestFingerprintHelpers:
|
||||||
|
|
||||||
def test_fp_unchanged_returns_false_on_mtime_change(self, db_path: Path, log_file: Path) -> None:
|
def test_fp_unchanged_returns_false_on_mtime_change(self, db_path: Path, log_file: Path) -> None:
|
||||||
conn = sqlite3.connect(str(db_path))
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
mtime, size = _fingerprint(log_file)
|
mtime, size = _fingerprint(log_file)
|
||||||
_save_fingerprint(conn, log_file, mtime, size, now_iso())
|
_save_fingerprint(conn, log_file, mtime, size, now_iso())
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
|
||||||
|
|
@ -4,24 +4,24 @@ from __future__ import annotations
|
||||||
from app.glean.syslog import is_syslog, parse
|
from app.glean.syslog import is_syslog, parse
|
||||||
|
|
||||||
SYSLOG_SAMPLE = """\
|
SYSLOG_SAMPLE = """\
|
||||||
May 11 14:23:01 xanderland sshd[1234]: Accepted publickey for x from 192.168.1.1 port 54321 ssh2
|
May 11 14:23:01 testhost sshd[1234]: Accepted publickey for x from 192.168.1.1 port 54321 ssh2
|
||||||
May 11 14:23:05 xanderland sshd[1234]: Failed password for invalid user admin from 10.0.0.99 port 22 ssh2
|
May 11 14:23:05 testhost sshd[1234]: Failed password for invalid user admin from 10.0.0.99 port 22 ssh2
|
||||||
May 11 14:23:10 xanderland sudo[5678]: x : TTY=pts/0 ; PWD=/home/x ; USER=root ; COMMAND=/usr/bin/apt update
|
May 11 14:23:10 testhost sudo[5678]: x : TTY=pts/0 ; PWD=/home/x ; USER=root ; COMMAND=/usr/bin/apt update
|
||||||
May 11 14:23:15 xanderland kernel: [12345.678] usb 1-1: USB disconnect, device number 2
|
May 11 14:23:15 testhost kernel: [12345.678] usb 1-1: USB disconnect, device number 2
|
||||||
May 1 04:00:00 xanderland CRON[9999]: (root) CMD (/usr/local/sbin/backup.sh)
|
May 1 04:00:00 testhost CRON[9999]: (root) CMD (/usr/local/sbin/backup.sh)
|
||||||
May 11 14:24:00 xanderland systemd[1]: Started NetworkManager.
|
May 11 14:24:00 testhost systemd[1]: Started NetworkManager.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class TestDetector:
|
class TestDetector:
|
||||||
def test_detects_standard_line(self):
|
def test_detects_standard_line(self):
|
||||||
assert is_syslog("May 11 14:23:01 xanderland sshd[1234]: message")
|
assert is_syslog("May 11 14:23:01 testhost sshd[1234]: message")
|
||||||
|
|
||||||
def test_detects_no_pid(self):
|
def test_detects_no_pid(self):
|
||||||
assert is_syslog("May 11 14:23:01 xanderland kernel: message")
|
assert is_syslog("May 11 14:23:01 testhost kernel: message")
|
||||||
|
|
||||||
def test_detects_space_padded_day(self):
|
def test_detects_space_padded_day(self):
|
||||||
assert is_syslog("May 1 04:00:00 xanderland CRON[9999]: message")
|
assert is_syslog("May 1 04:00:00 testhost CRON[9999]: message")
|
||||||
|
|
||||||
def test_rejects_servarr(self):
|
def test_rejects_servarr(self):
|
||||||
assert not is_syslog("2026-05-11 02:31:51.5|Info|ComponentName|Message")
|
assert not is_syslog("2026-05-11 02:31:51.5|Info|ComponentName|Message")
|
||||||
|
|
|
||||||
|
|
@ -33,12 +33,11 @@ def db(tmp_path: Path) -> Path:
|
||||||
("database connection refused backend gone away", "ERROR"),
|
("database connection refused backend gone away", "ERROR"),
|
||||||
("mDNS avahi heartbeat ok", "INFO"),
|
("mDNS avahi heartbeat ok", "INFO"),
|
||||||
]):
|
]):
|
||||||
# Columns: id, source_id, sequence, timestamp_raw, timestamp_iso,
|
|
||||||
# ingest_time, severity, repeat_count, out_of_order,
|
|
||||||
# matched_patterns, text
|
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"INSERT INTO log_entries VALUES (?,?,?,?,?,?,?,?,?,?,?)",
|
"INSERT INTO log_entries(id, tenant_id, source_id, sequence, timestamp_raw, "
|
||||||
(str(uuid.uuid4()), "src", i, None, None, "2026-01-01T00:00:00", sev, 1, 0, "[]", text),
|
"timestamp_iso, ingest_time, severity, repeat_count, out_of_order, "
|
||||||
|
"matched_patterns, text) VALUES (?,?,?,?,?,?,?,?,?,?,?,?)",
|
||||||
|
(str(uuid.uuid4()), "", "src", i, None, None, "2026-01-01T00:00:00", sev, 1, 0, "[]", text),
|
||||||
)
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
|
||||||
238
tests/test_incident_detector.py
Normal file
238
tests/test_incident_detector.py
Normal file
|
|
@ -0,0 +1,238 @@
|
||||||
|
"""Tests for app/tasks/incident_detector.py auto-incident detection."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
import tempfile
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from app.db import ensure_schema, ensure_incidents_schema
|
||||||
|
from app.services.incidents import create_incident, list_incidents
|
||||||
|
from app.tasks.incident_detector import (
|
||||||
|
_find_clusters,
|
||||||
|
_incident_exists_for_cluster,
|
||||||
|
_parse_ts,
|
||||||
|
detect_and_create,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Helpers ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _make_db(path: Path) -> None:
|
||||||
|
ensure_schema(path)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_incidents_db(path: Path) -> None:
|
||||||
|
ensure_incidents_schema(path)
|
||||||
|
|
||||||
|
|
||||||
|
def _iso(base: datetime, offset_s: float) -> str:
|
||||||
|
return (base + timedelta(seconds=offset_s)).isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
def _insert_entry(db: Path, source_id: str, ts_iso: str, severity: str, ingest_time: str) -> None:
|
||||||
|
with sqlite3.connect(db) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO log_entries (id, source_id, sequence, timestamp_iso, ingest_time, "
|
||||||
|
"severity, text, repeat_count, out_of_order, matched_patterns, tenant_id) "
|
||||||
|
"VALUES (?,?,?,?,?,?,?,?,?,?,?)",
|
||||||
|
(
|
||||||
|
f"{source_id}-{ts_iso}", source_id, 0, ts_iso, ingest_time,
|
||||||
|
severity, "error text", 0, 0, "[]", "",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── _parse_ts ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestParseTs:
|
||||||
|
def test_parses_utc_iso(self) -> None:
|
||||||
|
ts = _parse_ts("2026-06-11T12:00:00+00:00")
|
||||||
|
assert ts is not None
|
||||||
|
assert ts > 0
|
||||||
|
|
||||||
|
def test_parses_z_suffix(self) -> None:
|
||||||
|
ts = _parse_ts("2026-06-11T12:00:00Z")
|
||||||
|
assert ts is not None
|
||||||
|
|
||||||
|
def test_none_input(self) -> None:
|
||||||
|
assert _parse_ts(None) is None
|
||||||
|
|
||||||
|
def test_invalid_input(self) -> None:
|
||||||
|
assert _parse_ts("not-a-date") is None
|
||||||
|
|
||||||
|
|
||||||
|
# ── _find_clusters ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestFindClusters:
|
||||||
|
BASE = datetime(2026, 6, 11, 12, 0, 0, tzinfo=timezone.utc)
|
||||||
|
|
||||||
|
def _events(self, offsets: list[float], severity: str = "ERROR") -> list[dict]:
|
||||||
|
return [{"timestamp_iso": _iso(self.BASE, o), "severity": severity} for o in offsets]
|
||||||
|
|
||||||
|
def test_dense_cluster_detected(self) -> None:
|
||||||
|
events = self._events([0, 60, 120, 180, 240]) # 5 errors in 4 min
|
||||||
|
clusters = _find_clusters(events, window_s=600, threshold=5)
|
||||||
|
assert len(clusters) == 1
|
||||||
|
|
||||||
|
def test_sparse_events_no_cluster(self) -> None:
|
||||||
|
events = self._events([0, 300, 600, 900, 1200]) # 5 errors, each 5 min apart
|
||||||
|
clusters = _find_clusters(events, window_s=60, threshold=5)
|
||||||
|
assert clusters == []
|
||||||
|
|
||||||
|
def test_threshold_not_met(self) -> None:
|
||||||
|
events = self._events([0, 10, 20, 30]) # only 4 events
|
||||||
|
clusters = _find_clusters(events, window_s=600, threshold=5)
|
||||||
|
assert clusters == []
|
||||||
|
|
||||||
|
def test_critical_wins_over_error(self) -> None:
|
||||||
|
events = self._events([0, 10, 20, 30, 40], "ERROR")
|
||||||
|
events[2]["severity"] = "CRITICAL"
|
||||||
|
clusters = _find_clusters(events, window_s=600, threshold=5)
|
||||||
|
assert clusters[0][2] == "CRITICAL"
|
||||||
|
|
||||||
|
def test_two_non_overlapping_clusters(self) -> None:
|
||||||
|
# Dense cluster at 0-4 min, then another at 60-64 min
|
||||||
|
e1 = self._events([0, 60, 120, 180, 240])
|
||||||
|
e2 = self._events([3600, 3660, 3720, 3780, 3840])
|
||||||
|
clusters = _find_clusters(e1 + e2, window_s=600, threshold=5)
|
||||||
|
assert len(clusters) == 2
|
||||||
|
|
||||||
|
def test_no_timestamp_events_skipped(self) -> None:
|
||||||
|
events = [{"timestamp_iso": None, "severity": "ERROR"}] * 10
|
||||||
|
clusters = _find_clusters(events, window_s=600, threshold=5)
|
||||||
|
assert clusters == []
|
||||||
|
|
||||||
|
|
||||||
|
# ── _incident_exists_for_cluster ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestIncidentExists:
|
||||||
|
BASE = datetime(2026, 6, 11, 12, 0, 0, tzinfo=timezone.utc)
|
||||||
|
|
||||||
|
def test_no_existing_incidents(self, tmp_path: Path) -> None:
|
||||||
|
db = tmp_path / "inc.db"
|
||||||
|
_make_incidents_db(db)
|
||||||
|
assert not _incident_exists_for_cluster(
|
||||||
|
db, "nginx", _iso(self.BASE, 0), _iso(self.BASE, 600)
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_exact_overlap_detected(self, tmp_path: Path) -> None:
|
||||||
|
db = tmp_path / "inc.db"
|
||||||
|
_make_incidents_db(db)
|
||||||
|
create_incident(
|
||||||
|
db, label="Auto: nginx — 5 errors",
|
||||||
|
issue_type="auto:nginx",
|
||||||
|
started_at=_iso(self.BASE, 0),
|
||||||
|
ended_at=_iso(self.BASE, 600),
|
||||||
|
severity="high",
|
||||||
|
)
|
||||||
|
assert _incident_exists_for_cluster(
|
||||||
|
db, "nginx", _iso(self.BASE, 100), _iso(self.BASE, 400)
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_different_source_not_matched(self, tmp_path: Path) -> None:
|
||||||
|
db = tmp_path / "inc.db"
|
||||||
|
_make_incidents_db(db)
|
||||||
|
create_incident(
|
||||||
|
db, label="Auto: caddy — 5 errors",
|
||||||
|
issue_type="auto:caddy",
|
||||||
|
started_at=_iso(self.BASE, 0),
|
||||||
|
ended_at=_iso(self.BASE, 600),
|
||||||
|
severity="high",
|
||||||
|
)
|
||||||
|
assert not _incident_exists_for_cluster(
|
||||||
|
db, "nginx", _iso(self.BASE, 0), _iso(self.BASE, 600)
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_non_overlapping_not_matched(self, tmp_path: Path) -> None:
|
||||||
|
db = tmp_path / "inc.db"
|
||||||
|
_make_incidents_db(db)
|
||||||
|
create_incident(
|
||||||
|
db, label="Auto: nginx — 5 errors",
|
||||||
|
issue_type="auto:nginx",
|
||||||
|
started_at=_iso(self.BASE, 0),
|
||||||
|
ended_at=_iso(self.BASE, 300),
|
||||||
|
severity="high",
|
||||||
|
)
|
||||||
|
# Cluster starts after existing incident ends
|
||||||
|
assert not _incident_exists_for_cluster(
|
||||||
|
db, "nginx", _iso(self.BASE, 900), _iso(self.BASE, 1200)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── detect_and_create ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestDetectAndCreate:
|
||||||
|
BASE = datetime(2026, 6, 11, 12, 0, 0, tzinfo=timezone.utc)
|
||||||
|
|
||||||
|
def _setup(self, tmp_path: Path) -> tuple[Path, Path]:
|
||||||
|
db = tmp_path / "ts.db"
|
||||||
|
idb = tmp_path / "incidents.db"
|
||||||
|
_make_db(db)
|
||||||
|
_make_incidents_db(idb)
|
||||||
|
return db, idb
|
||||||
|
|
||||||
|
def test_creates_incident_on_cluster(self, tmp_path: Path) -> None:
|
||||||
|
db, idb = self._setup(tmp_path)
|
||||||
|
ingest = _iso(self.BASE, -60)
|
||||||
|
for i in range(6):
|
||||||
|
_insert_entry(db, "nginx", _iso(self.BASE, i * 30), "ERROR", ingest)
|
||||||
|
|
||||||
|
result = detect_and_create(db, idb, since=_iso(self.BASE, -120))
|
||||||
|
assert result["created"] == 1
|
||||||
|
incidents = list_incidents(idb)
|
||||||
|
assert len(incidents) == 1
|
||||||
|
assert "nginx" in incidents[0].label
|
||||||
|
assert incidents[0].issue_type == "auto:nginx"
|
||||||
|
|
||||||
|
def test_no_incident_below_threshold(self, tmp_path: Path) -> None:
|
||||||
|
db, idb = self._setup(tmp_path)
|
||||||
|
ingest = _iso(self.BASE, -60)
|
||||||
|
for i in range(4): # only 4 errors — below default threshold of 5
|
||||||
|
_insert_entry(db, "nginx", _iso(self.BASE, i * 30), "ERROR", ingest)
|
||||||
|
|
||||||
|
result = detect_and_create(db, idb, since=_iso(self.BASE, -120), threshold=5)
|
||||||
|
assert result["created"] == 0
|
||||||
|
|
||||||
|
def test_no_duplicate_incidents(self, tmp_path: Path) -> None:
|
||||||
|
db, idb = self._setup(tmp_path)
|
||||||
|
ingest = _iso(self.BASE, -60)
|
||||||
|
for i in range(6):
|
||||||
|
_insert_entry(db, "nginx", _iso(self.BASE, i * 30), "ERROR", ingest)
|
||||||
|
|
||||||
|
detect_and_create(db, idb, since=_iso(self.BASE, -120))
|
||||||
|
detect_and_create(db, idb, since=_iso(self.BASE, -120)) # second run
|
||||||
|
|
||||||
|
incidents = list_incidents(idb)
|
||||||
|
assert len(incidents) == 1
|
||||||
|
|
||||||
|
def test_critical_severity_mapped_to_critical_label(self, tmp_path: Path) -> None:
|
||||||
|
db, idb = self._setup(tmp_path)
|
||||||
|
ingest = _iso(self.BASE, -60)
|
||||||
|
for i in range(6):
|
||||||
|
sev = "CRITICAL" if i == 0 else "ERROR"
|
||||||
|
_insert_entry(db, "sshd", _iso(self.BASE, i * 30), sev, ingest)
|
||||||
|
|
||||||
|
detect_and_create(db, idb, since=_iso(self.BASE, -120))
|
||||||
|
incidents = list_incidents(idb)
|
||||||
|
assert incidents[0].severity == "critical"
|
||||||
|
|
||||||
|
def test_empty_db_returns_zero(self, tmp_path: Path) -> None:
|
||||||
|
db, idb = self._setup(tmp_path)
|
||||||
|
result = detect_and_create(db, idb, since=None)
|
||||||
|
assert result["created"] == 0
|
||||||
|
|
||||||
|
def test_independent_sources_each_get_incident(self, tmp_path: Path) -> None:
|
||||||
|
db, idb = self._setup(tmp_path)
|
||||||
|
ingest = _iso(self.BASE, -60)
|
||||||
|
for src in ["caddy", "nginx"]:
|
||||||
|
for i in range(6):
|
||||||
|
_insert_entry(db, src, _iso(self.BASE, i * 30), "ERROR", ingest)
|
||||||
|
|
||||||
|
result = detect_and_create(db, idb, since=_iso(self.BASE, -120))
|
||||||
|
assert result["created"] == 2
|
||||||
87
tests/test_llm_client.py
Normal file
87
tests/test_llm_client.py
Normal file
|
|
@ -0,0 +1,87 @@
|
||||||
|
"""Tests for diagnose/_llm_client.py — thinking-tag stripping and content extraction."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _resp(content: str | None) -> dict:
|
||||||
|
if content is None:
|
||||||
|
return {"choices": []}
|
||||||
|
return {"choices": [{"message": {"content": content}}]}
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractContent:
|
||||||
|
def test_returns_plain_content(self):
|
||||||
|
from app.services.diagnose._llm_client import extract_content
|
||||||
|
assert extract_content(_resp("hello world")) == "hello world"
|
||||||
|
|
||||||
|
def test_returns_none_on_empty_choices(self):
|
||||||
|
from app.services.diagnose._llm_client import extract_content
|
||||||
|
assert extract_content({"choices": []}) is None
|
||||||
|
|
||||||
|
def test_returns_none_on_empty_content(self):
|
||||||
|
from app.services.diagnose._llm_client import extract_content
|
||||||
|
assert extract_content(_resp("")) is None
|
||||||
|
|
||||||
|
def test_strips_single_think_block(self):
|
||||||
|
from app.services.diagnose._llm_client import extract_content
|
||||||
|
raw = "<think>Let me reason about this…</think>\nThe answer is 42."
|
||||||
|
assert extract_content(_resp(raw)) == "The answer is 42."
|
||||||
|
|
||||||
|
def test_strips_multi_line_think_block(self):
|
||||||
|
from app.services.diagnose._llm_client import extract_content
|
||||||
|
raw = "<think>\nStep 1: consider X\nStep 2: consider Y\n</think>\n\nFinal answer here."
|
||||||
|
result = extract_content(_resp(raw))
|
||||||
|
assert result == "Final answer here."
|
||||||
|
assert "<think>" not in result
|
||||||
|
|
||||||
|
def test_strips_multiple_think_blocks(self):
|
||||||
|
from app.services.diagnose._llm_client import extract_content
|
||||||
|
raw = "<think>first</think> actual <think>second</think> content"
|
||||||
|
result = extract_content(_resp(raw))
|
||||||
|
assert "<think>" not in result
|
||||||
|
assert "actual" in result
|
||||||
|
assert "content" in result
|
||||||
|
|
||||||
|
def test_strips_case_insensitive(self):
|
||||||
|
from app.services.diagnose._llm_client import extract_content
|
||||||
|
raw = "<THINK>hidden</THINK> visible"
|
||||||
|
result = extract_content(_resp(raw))
|
||||||
|
assert result == "visible"
|
||||||
|
|
||||||
|
def test_returns_none_when_only_thinking_remains(self):
|
||||||
|
from app.services.diagnose._llm_client import extract_content
|
||||||
|
raw = "<think>only thinking, no output</think>"
|
||||||
|
assert extract_content(_resp(raw)) is None
|
||||||
|
|
||||||
|
def test_content_without_thinking_unchanged(self):
|
||||||
|
from app.services.diagnose._llm_client import extract_content
|
||||||
|
raw = "Redis OOM at 03:00 — key eviction triggered by batch job."
|
||||||
|
assert extract_content(_resp(raw)) == raw
|
||||||
|
|
||||||
|
|
||||||
|
class TestStripJsonFences:
|
||||||
|
def test_strips_json_fence(self):
|
||||||
|
from app.services.diagnose._llm_client import strip_json_fences
|
||||||
|
raw = "```json\n[{\"a\": 1}]\n```"
|
||||||
|
assert strip_json_fences(raw) == '[{"a": 1}]'
|
||||||
|
|
||||||
|
def test_strips_plain_fence(self):
|
||||||
|
from app.services.diagnose._llm_client import strip_json_fences
|
||||||
|
raw = "```\nhello\n```"
|
||||||
|
assert "```" not in strip_json_fences(raw)
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractFirstJsonArray:
|
||||||
|
def test_extracts_array_from_mixed_text(self):
|
||||||
|
from app.services.diagnose._llm_client import extract_first_json_array
|
||||||
|
raw = 'Here is the result:\n[{"id": 1}, {"id": 2}]\nThat is all.'
|
||||||
|
result = extract_first_json_array(raw)
|
||||||
|
import json
|
||||||
|
parsed = json.loads(result)
|
||||||
|
assert len(parsed) == 2
|
||||||
|
|
||||||
|
def test_returns_original_when_no_array(self):
|
||||||
|
from app.services.diagnose._llm_client import extract_first_json_array
|
||||||
|
raw = "no array here"
|
||||||
|
assert extract_first_json_array(raw) == raw
|
||||||
245
tests/test_ssh_targets.py
Normal file
245
tests/test_ssh_targets.py
Normal file
|
|
@ -0,0 +1,245 @@
|
||||||
|
"""Tests for ssh_targets service — CRUD, validation, serialization."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import stat
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _make_db(tmp_path: Path) -> Path:
|
||||||
|
"""Create a minimal DB with the ssh_targets table via ensure_schema."""
|
||||||
|
from app.glean.pipeline import ensure_schema
|
||||||
|
db = tmp_path / "test.db"
|
||||||
|
ensure_schema(db)
|
||||||
|
return db
|
||||||
|
|
||||||
|
|
||||||
|
def _make_key(tmp_path: Path, mode: int = 0o600) -> Path:
|
||||||
|
"""Write a fake SSH private key file with the given permission mode."""
|
||||||
|
key = tmp_path / "id_ed25519"
|
||||||
|
key.write_text("-----BEGIN OPENSSH PRIVATE KEY-----\nfake\n-----END OPENSSH PRIVATE KEY-----\n")
|
||||||
|
key.chmod(mode)
|
||||||
|
return key
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Schema
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestSchema:
|
||||||
|
def test_ssh_targets_table_exists(self, tmp_path):
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
conn = sqlite3.connect(str(db))
|
||||||
|
tables = {r[0] for r in conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()}
|
||||||
|
assert "ssh_targets" in tables
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def test_ssh_targets_columns(self, tmp_path):
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
conn = sqlite3.connect(str(db))
|
||||||
|
cols = {r[1] for r in conn.execute("PRAGMA table_info(ssh_targets)").fetchall()}
|
||||||
|
assert cols >= {"id", "label", "host", "port", "user", "key_path",
|
||||||
|
"last_tested", "last_ok", "last_error", "created_at", "updated_at"}
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CRUD
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestCrud:
|
||||||
|
def test_create_and_list(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import create_target, list_targets
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
key = _make_key(tmp_path)
|
||||||
|
t = create_target(db, label="server-01", host="10.0.0.1", port=22, user="alan", key_path=str(key))
|
||||||
|
assert t.label == "server-01"
|
||||||
|
assert t.host == "10.0.0.1"
|
||||||
|
assert t.port == 22
|
||||||
|
assert t.user == "alan"
|
||||||
|
targets = list_targets(db)
|
||||||
|
assert len(targets) == 1
|
||||||
|
assert targets[0].id == t.id
|
||||||
|
|
||||||
|
def test_create_resolves_tilde(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import create_target
|
||||||
|
from unittest.mock import patch
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
key = _make_key(tmp_path)
|
||||||
|
with patch("pathlib.Path.expanduser", return_value=key):
|
||||||
|
t = create_target(db, label="x", host="h", port=22, user="u", key_path="~/id_ed25519")
|
||||||
|
assert "~" not in t.key_path
|
||||||
|
|
||||||
|
def test_get_returns_none_for_missing(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import get_target
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
assert get_target(db, "nonexistent-id") is None
|
||||||
|
|
||||||
|
def test_update_partial(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import create_target, update_target
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
key = _make_key(tmp_path)
|
||||||
|
t = create_target(db, label="old-label", host="10.0.0.1", port=22, user="alan", key_path=str(key))
|
||||||
|
updated = update_target(db, t.id, label="new-label")
|
||||||
|
assert updated is not None
|
||||||
|
assert updated.label == "new-label"
|
||||||
|
assert updated.host == "10.0.0.1" # unchanged
|
||||||
|
|
||||||
|
def test_update_missing_target_returns_none(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import update_target
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
assert update_target(db, "no-such-id", label="x") is None
|
||||||
|
|
||||||
|
def test_delete_returns_true_on_success(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import create_target, delete_target, list_targets
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
key = _make_key(tmp_path)
|
||||||
|
t = create_target(db, label="x", host="h", port=22, user="u", key_path=str(key))
|
||||||
|
assert delete_target(db, t.id) is True
|
||||||
|
assert list_targets(db) == []
|
||||||
|
|
||||||
|
def test_delete_returns_false_for_missing(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import delete_target
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
assert delete_target(db, "no-such-id") is False
|
||||||
|
|
||||||
|
def test_list_sorted_by_label(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import create_target, list_targets
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
key = _make_key(tmp_path)
|
||||||
|
create_target(db, label="zebra", host="h", port=22, user="u", key_path=str(key))
|
||||||
|
create_target(db, label="alpha", host="h", port=22, user="u", key_path=str(key))
|
||||||
|
labels = [t.label for t in list_targets(db)]
|
||||||
|
assert labels == ["alpha", "zebra"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Validation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestValidation:
|
||||||
|
def test_create_raises_on_missing_key_file(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import create_target
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
with pytest.raises(ValueError, match="not found"):
|
||||||
|
create_target(db, label="x", host="h", port=22, user="u", key_path="/nonexistent/key")
|
||||||
|
|
||||||
|
def test_create_raises_on_directory_as_key(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import create_target
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
with pytest.raises(ValueError, match="not a file"):
|
||||||
|
create_target(db, label="x", host="h", port=22, user="u", key_path=str(tmp_path))
|
||||||
|
|
||||||
|
def test_update_raises_on_bad_key_path(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import create_target, update_target
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
key = _make_key(tmp_path)
|
||||||
|
t = create_target(db, label="x", host="h", port=22, user="u", key_path=str(key))
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
update_target(db, t.id, key_path="/does/not/exist")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Key warning
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestKeyWarning:
|
||||||
|
def test_no_warning_for_600(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import key_path_warning
|
||||||
|
key = _make_key(tmp_path, mode=0o600)
|
||||||
|
assert key_path_warning(str(key)) is None
|
||||||
|
|
||||||
|
def test_warning_for_644(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import key_path_warning
|
||||||
|
key = _make_key(tmp_path, mode=0o644)
|
||||||
|
warning = key_path_warning(str(key))
|
||||||
|
assert warning is not None
|
||||||
|
assert "chmod 600" in warning
|
||||||
|
|
||||||
|
def test_no_warning_for_nonexistent_file(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import key_path_warning
|
||||||
|
# Should not raise — just return None
|
||||||
|
result = key_path_warning("/nonexistent/path")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Serialization
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestTargetToDict:
|
||||||
|
def test_basic_fields_present(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import create_target, target_to_dict
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
key = _make_key(tmp_path)
|
||||||
|
t = create_target(db, label="server", host="10.0.0.1", port=2222, user="admin", key_path=str(key))
|
||||||
|
d = target_to_dict(t)
|
||||||
|
assert d["label"] == "server"
|
||||||
|
assert d["host"] == "10.0.0.1"
|
||||||
|
assert d["port"] == 2222
|
||||||
|
assert d["user"] == "admin"
|
||||||
|
assert "key_path" in d
|
||||||
|
assert "key_warning" not in d # not included by default
|
||||||
|
|
||||||
|
def test_key_contents_never_in_dict(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import create_target, target_to_dict
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
key = _make_key(tmp_path)
|
||||||
|
t = create_target(db, label="x", host="h", port=22, user="u", key_path=str(key))
|
||||||
|
d = target_to_dict(t, include_warning=True)
|
||||||
|
for v in d.values():
|
||||||
|
if isinstance(v, str):
|
||||||
|
assert "BEGIN" not in v, "Key contents must never be included in serialized output"
|
||||||
|
|
||||||
|
def test_include_warning_adds_field(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import create_target, target_to_dict
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
key = _make_key(tmp_path, mode=0o644)
|
||||||
|
t = create_target(db, label="x", host="h", port=22, user="u", key_path=str(key))
|
||||||
|
d = target_to_dict(t, include_warning=True)
|
||||||
|
assert "key_warning" in d
|
||||||
|
assert d["key_warning"] is not None
|
||||||
|
|
||||||
|
def test_last_ok_is_none_before_test(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import create_target, target_to_dict
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
key = _make_key(tmp_path)
|
||||||
|
t = create_target(db, label="x", host="h", port=22, user="u", key_path=str(key))
|
||||||
|
d = target_to_dict(t)
|
||||||
|
assert d["last_ok"] is None
|
||||||
|
assert d["last_tested"] is None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# test_connection (paramiko not available path)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestConnectionNoParamiko:
|
||||||
|
def test_returns_error_when_paramiko_missing(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import create_target, test_connection
|
||||||
|
import sys
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
key = _make_key(tmp_path)
|
||||||
|
t = create_target(db, label="x", host="127.0.0.1", port=22, user="u", key_path=str(key))
|
||||||
|
|
||||||
|
# Temporarily hide paramiko from the import system
|
||||||
|
original = sys.modules.get("paramiko")
|
||||||
|
sys.modules["paramiko"] = None # type: ignore[assignment]
|
||||||
|
try:
|
||||||
|
result = test_connection(db, t.id)
|
||||||
|
finally:
|
||||||
|
if original is None:
|
||||||
|
del sys.modules["paramiko"]
|
||||||
|
else:
|
||||||
|
sys.modules["paramiko"] = original
|
||||||
|
|
||||||
|
assert result["ok"] is False
|
||||||
|
assert "paramiko" in result["error"].lower()
|
||||||
|
|
||||||
|
def test_raises_key_error_for_missing_target(self, tmp_path):
|
||||||
|
from app.services.ssh_targets import test_connection
|
||||||
|
db = _make_db(tmp_path)
|
||||||
|
with pytest.raises(KeyError):
|
||||||
|
test_connection(db, "no-such-id")
|
||||||
224
tests/test_ticket_export.py
Normal file
224
tests/test_ticket_export.py
Normal file
|
|
@ -0,0 +1,224 @@
|
||||||
|
"""Tests for ticket_export service — Notion and Jira exporters."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
INCIDENT = {
|
||||||
|
"id": "inc-1",
|
||||||
|
"label": "Redis OOM — key eviction flood",
|
||||||
|
"issue_type": "memory",
|
||||||
|
"started_at": "2026-06-01T03:00:00Z",
|
||||||
|
"ended_at": "2026-06-01T03:45:00Z",
|
||||||
|
"notes": "Triggered by batch job at 03:00",
|
||||||
|
"severity": "high",
|
||||||
|
}
|
||||||
|
|
||||||
|
ENTRIES = [
|
||||||
|
{"entry_id": "e1", "source_id": "host:redis", "severity": "ERROR", "text": "maxmemory reached, evicting keys"},
|
||||||
|
{"entry_id": "e2", "source_id": "host:app", "severity": "WARN", "text": "Redis NOEVICTION response"},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _mock_response(status_code: int, body: dict):
|
||||||
|
resp = MagicMock()
|
||||||
|
resp.is_success = (status_code < 400)
|
||||||
|
resp.status_code = status_code
|
||||||
|
resp.json.return_value = body
|
||||||
|
resp.text = str(body)
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# available_targets
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_available_targets_lists_known_integrations():
|
||||||
|
from app.services.ticket_export import available_targets
|
||||||
|
targets = available_targets()
|
||||||
|
assert "notion" in targets
|
||||||
|
assert "jira" in targets
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# export_incident dispatch
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_export_incident_raises_for_unknown_target():
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
with pytest.raises(ValueError, match="Unknown ticket target"):
|
||||||
|
export_incident("linear", INCIDENT, ENTRIES, {})
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Notion exporter
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestNotionExport:
|
||||||
|
def test_successful_export_returns_url_and_id(self):
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
page_id = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
|
||||||
|
mock_resp = _mock_response(200, {"id": page_id, "url": f"https://notion.so/{page_id}"})
|
||||||
|
with patch("app.services.ticket_export.httpx.post", return_value=mock_resp) as mock_post:
|
||||||
|
result = export_incident("notion", INCIDENT, ENTRIES, {
|
||||||
|
"notion_token": "secret_abc123",
|
||||||
|
"notion_database_id": "db-id-xyz",
|
||||||
|
})
|
||||||
|
assert result["ticket_id"] == page_id
|
||||||
|
assert "notion.so" in result["url"]
|
||||||
|
mock_post.assert_called_once()
|
||||||
|
|
||||||
|
def test_raises_value_error_when_not_configured(self):
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
with pytest.raises(ValueError, match="Notion not configured"):
|
||||||
|
export_incident("notion", INCIDENT, ENTRIES, {
|
||||||
|
"notion_token": "",
|
||||||
|
"notion_database_id": "db-id",
|
||||||
|
})
|
||||||
|
|
||||||
|
def test_raises_value_error_when_database_id_missing(self):
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
with pytest.raises(ValueError, match="Notion not configured"):
|
||||||
|
export_incident("notion", INCIDENT, ENTRIES, {
|
||||||
|
"notion_token": "secret_abc",
|
||||||
|
"notion_database_id": "",
|
||||||
|
})
|
||||||
|
|
||||||
|
def test_raises_runtime_error_on_api_error(self):
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
mock_resp = _mock_response(401, {"message": "Unauthorized"})
|
||||||
|
with patch("app.services.ticket_export.httpx.post", return_value=mock_resp):
|
||||||
|
with pytest.raises(RuntimeError, match="Notion API error 401"):
|
||||||
|
export_incident("notion", INCIDENT, ENTRIES, {
|
||||||
|
"notion_token": "bad-token",
|
||||||
|
"notion_database_id": "db-id",
|
||||||
|
})
|
||||||
|
|
||||||
|
def test_sends_correct_database_id(self):
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
db_id = "my-database-uuid"
|
||||||
|
mock_resp = _mock_response(200, {"id": "page-id", "url": "https://notion.so/page-id"})
|
||||||
|
with patch("app.services.ticket_export.httpx.post", return_value=mock_resp) as mock_post:
|
||||||
|
export_incident("notion", INCIDENT, ENTRIES, {
|
||||||
|
"notion_token": "secret_abc123",
|
||||||
|
"notion_database_id": db_id,
|
||||||
|
})
|
||||||
|
call_kwargs = mock_post.call_args
|
||||||
|
payload = call_kwargs.kwargs.get("json") or call_kwargs.args[1] if len(call_kwargs.args) > 1 else call_kwargs.kwargs["json"]
|
||||||
|
assert payload["parent"]["database_id"] == db_id
|
||||||
|
|
||||||
|
def test_incident_label_becomes_page_title(self):
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
mock_resp = _mock_response(200, {"id": "pid", "url": "https://notion.so/pid"})
|
||||||
|
with patch("app.services.ticket_export.httpx.post", return_value=mock_resp) as mock_post:
|
||||||
|
export_incident("notion", INCIDENT, ENTRIES, {
|
||||||
|
"notion_token": "tok",
|
||||||
|
"notion_database_id": "dbid",
|
||||||
|
})
|
||||||
|
payload = mock_post.call_args.kwargs["json"]
|
||||||
|
title_text = payload["properties"]["title"]["title"][0]["text"]["content"]
|
||||||
|
assert INCIDENT["label"] in title_text
|
||||||
|
|
||||||
|
def test_url_falls_back_to_constructed_url(self):
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
page_id = "abc123"
|
||||||
|
mock_resp = _mock_response(200, {"id": page_id}) # no 'url' in response
|
||||||
|
with patch("app.services.ticket_export.httpx.post", return_value=mock_resp):
|
||||||
|
result = export_incident("notion", INCIDENT, ENTRIES, {
|
||||||
|
"notion_token": "tok",
|
||||||
|
"notion_database_id": "dbid",
|
||||||
|
})
|
||||||
|
assert "notion.so" in result["url"] or page_id in result["url"]
|
||||||
|
|
||||||
|
def test_long_text_truncated_to_notion_limit(self):
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
mock_resp = _mock_response(200, {"id": "pid", "url": "https://notion.so/pid"})
|
||||||
|
long_entries = [{"entry_id": f"e{i}", "source_id": "host:svc", "severity": "ERROR",
|
||||||
|
"text": "x" * 300} for i in range(60)]
|
||||||
|
with patch("app.services.ticket_export.httpx.post", return_value=mock_resp) as mock_post:
|
||||||
|
export_incident("notion", INCIDENT, long_entries, {
|
||||||
|
"notion_token": "tok",
|
||||||
|
"notion_database_id": "dbid",
|
||||||
|
})
|
||||||
|
payload = mock_post.call_args.kwargs["json"]
|
||||||
|
for block in payload.get("children", []):
|
||||||
|
for rt in block.get("bulleted_list_item", {}).get("rich_text", []):
|
||||||
|
assert len(rt["text"]["content"]) <= 2000
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Jira exporter
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestJiraExport:
|
||||||
|
_config = {
|
||||||
|
"jira_url": "https://myorg.atlassian.net",
|
||||||
|
"jira_email": "ops@example.com",
|
||||||
|
"jira_api_token": "ATATT3xFfGF0abc123",
|
||||||
|
"jira_project_key": "OPS",
|
||||||
|
"jira_issue_type": "Bug",
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_successful_export_returns_url_and_key(self):
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
mock_resp = _mock_response(201, {"id": "10042", "key": "OPS-42", "self": "https://myorg.atlassian.net/rest/api/3/issue/10042"})
|
||||||
|
with patch("app.services.ticket_export.httpx.post", return_value=mock_resp):
|
||||||
|
result = export_incident("jira", INCIDENT, ENTRIES, self._config)
|
||||||
|
assert result["ticket_id"] == "OPS-42"
|
||||||
|
assert "OPS-42" in result["url"]
|
||||||
|
assert "myorg.atlassian.net" in result["url"]
|
||||||
|
|
||||||
|
def test_raises_value_error_when_not_configured(self):
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
with pytest.raises(ValueError, match="Jira not configured"):
|
||||||
|
export_incident("jira", INCIDENT, ENTRIES, {
|
||||||
|
"jira_url": "",
|
||||||
|
"jira_email": "a@b.com",
|
||||||
|
"jira_api_token": "tok",
|
||||||
|
"jira_project_key": "OPS",
|
||||||
|
})
|
||||||
|
|
||||||
|
def test_raises_runtime_error_on_api_error(self):
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
mock_resp = _mock_response(403, {"errorMessages": ["Forbidden"]})
|
||||||
|
with patch("app.services.ticket_export.httpx.post", return_value=mock_resp):
|
||||||
|
with pytest.raises(RuntimeError, match="Jira API error 403"):
|
||||||
|
export_incident("jira", INCIDENT, ENTRIES, self._config)
|
||||||
|
|
||||||
|
def test_sends_basic_auth_header(self):
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
import base64
|
||||||
|
mock_resp = _mock_response(201, {"key": "OPS-1", "id": "1"})
|
||||||
|
with patch("app.services.ticket_export.httpx.post", return_value=mock_resp) as mock_post:
|
||||||
|
export_incident("jira", INCIDENT, ENTRIES, self._config)
|
||||||
|
call_kwargs = mock_post.call_args.kwargs
|
||||||
|
auth_header = call_kwargs["headers"]["Authorization"]
|
||||||
|
assert auth_header.startswith("Basic ")
|
||||||
|
decoded = base64.b64decode(auth_header[6:]).decode()
|
||||||
|
assert "ops@example.com" in decoded
|
||||||
|
|
||||||
|
def test_uses_correct_project_key(self):
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
mock_resp = _mock_response(201, {"key": "OPS-7", "id": "7"})
|
||||||
|
with patch("app.services.ticket_export.httpx.post", return_value=mock_resp) as mock_post:
|
||||||
|
export_incident("jira", INCIDENT, ENTRIES, self._config)
|
||||||
|
payload = mock_post.call_args.kwargs["json"]
|
||||||
|
assert payload["fields"]["project"]["key"] == "OPS"
|
||||||
|
|
||||||
|
def test_incident_label_becomes_summary(self):
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
mock_resp = _mock_response(201, {"key": "OPS-8", "id": "8"})
|
||||||
|
with patch("app.services.ticket_export.httpx.post", return_value=mock_resp) as mock_post:
|
||||||
|
export_incident("jira", INCIDENT, ENTRIES, self._config)
|
||||||
|
payload = mock_post.call_args.kwargs["json"]
|
||||||
|
assert payload["fields"]["summary"] == INCIDENT["label"]
|
||||||
|
|
||||||
|
def test_default_issue_type_is_bug(self):
|
||||||
|
from app.services.ticket_export import export_incident
|
||||||
|
config = {k: v for k, v in self._config.items() if k != "jira_issue_type"}
|
||||||
|
mock_resp = _mock_response(201, {"key": "OPS-9", "id": "9"})
|
||||||
|
with patch("app.services.ticket_export.httpx.post", return_value=mock_resp) as mock_post:
|
||||||
|
export_incident("jira", INCIDENT, ENTRIES, config)
|
||||||
|
payload = mock_post.call_args.kwargs["json"]
|
||||||
|
assert payload["fields"]["issuetype"]["name"] == "Bug"
|
||||||
|
|
@ -76,6 +76,7 @@ const navLinks = [
|
||||||
{ to: '/search', label: 'Search' },
|
{ to: '/search', label: 'Search' },
|
||||||
{ to: '/diagnose', label: 'Diagnose' },
|
{ to: '/diagnose', label: 'Diagnose' },
|
||||||
{ to: '/incidents', label: 'Incidents' },
|
{ to: '/incidents', label: 'Incidents' },
|
||||||
|
{ to: '/alerts', label: 'Alerts' },
|
||||||
{ to: '/bundles', label: 'Bundles' },
|
{ to: '/bundles', label: 'Bundles' },
|
||||||
{ to: '/sources', label: 'Sources' },
|
{ to: '/sources', label: 'Sources' },
|
||||||
{ to: '/context', label: 'Context' },
|
{ to: '/context', label: 'Context' },
|
||||||
|
|
|
||||||
391
web/src/components/ChatDiagnose.vue
Normal file
391
web/src/components/ChatDiagnose.vue
Normal file
|
|
@ -0,0 +1,391 @@
|
||||||
|
<template>
|
||||||
|
<div class="flex flex-col h-full min-h-0">
|
||||||
|
|
||||||
|
<!-- Conversation history -->
|
||||||
|
<div
|
||||||
|
ref="scrollEl"
|
||||||
|
class="flex-1 overflow-y-auto space-y-6 pb-4 pr-1"
|
||||||
|
aria-live="polite"
|
||||||
|
aria-label="Conversation history"
|
||||||
|
>
|
||||||
|
<!-- Empty state -->
|
||||||
|
<div v-if="!turns.length" class="flex flex-col items-center justify-center py-16 text-center px-4">
|
||||||
|
<div class="text-4xl mb-3" aria-hidden="true">🪵</div>
|
||||||
|
<p class="text-text-primary text-base font-medium mb-1">Describe your issue</p>
|
||||||
|
<p class="text-text-dim text-sm max-w-md">
|
||||||
|
Write what you're seeing — however you'd say it. Turnstone will search
|
||||||
|
your logs and explain what it finds. Mention a service name to focus
|
||||||
|
the search ("meshtasticd keeps disconnecting after 4.10 update").
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Turn history -->
|
||||||
|
<template v-for="(turn, idx) in turns" :key="idx">
|
||||||
|
<!-- User bubble -->
|
||||||
|
<div class="flex justify-end">
|
||||||
|
<div class="max-w-[80%] rounded-2xl rounded-tr-sm bg-accent text-white px-4 py-2.5 text-sm whitespace-pre-wrap leading-relaxed">
|
||||||
|
{{ turn.query }}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Assistant response -->
|
||||||
|
<div class="flex flex-col gap-3">
|
||||||
|
<!-- Source chips -->
|
||||||
|
<div v-if="turn.sources?.length" class="flex flex-wrap gap-1.5 items-center">
|
||||||
|
<span class="text-xs text-text-dim">Searched:</span>
|
||||||
|
<span
|
||||||
|
v-for="s in turn.sources"
|
||||||
|
:key="s"
|
||||||
|
class="font-mono text-xs bg-surface-raised border border-surface-border rounded px-2 py-0.5 text-text-muted"
|
||||||
|
>{{ s }}</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Loading spinner -->
|
||||||
|
<div v-if="turn.loading" class="flex items-center gap-2 text-xs text-text-dim py-2">
|
||||||
|
<span class="inline-block w-3 h-3 rounded-full border-2 border-accent border-t-transparent animate-spin motion-reduce:animate-none" aria-hidden="true" />
|
||||||
|
<span>{{ turn.status ?? 'Searching logs…' }}</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- LLM reasoning -->
|
||||||
|
<div
|
||||||
|
v-if="turn.reasoning"
|
||||||
|
class="rounded-r border-l-4 border-accent bg-surface-raised px-4 py-3"
|
||||||
|
>
|
||||||
|
<div class="flex items-center gap-2 mb-2 text-xs text-accent font-semibold uppercase tracking-wide">
|
||||||
|
<span aria-hidden="true">⚡</span>
|
||||||
|
<span>Diagnosis</span>
|
||||||
|
</div>
|
||||||
|
<p class="text-sm text-text-primary leading-relaxed whitespace-pre-wrap">{{ turn.reasoning }}</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Summary bar -->
|
||||||
|
<div v-if="turn.summary" class="flex flex-wrap gap-x-5 gap-y-1 text-xs text-text-dim px-1">
|
||||||
|
<span class="font-medium text-text-muted">{{ turn.summary.total }} entr{{ turn.summary.total !== 1 ? 'ies' : 'y' }}</span>
|
||||||
|
<span v-if="turn.summary.window_start">
|
||||||
|
{{ fmtTs(turn.summary.window_start) }} → {{ fmtTs(turn.summary.window_end) }}
|
||||||
|
</span>
|
||||||
|
<button
|
||||||
|
v-if="turn.entries?.length && !turn.showEntries"
|
||||||
|
@click="turn.showEntries = true"
|
||||||
|
class="text-accent hover:underline"
|
||||||
|
>show {{ turn.entries.length }} log lines</button>
|
||||||
|
<button
|
||||||
|
v-if="turn.showEntries"
|
||||||
|
@click="turn.showEntries = false"
|
||||||
|
class="text-text-dim hover:text-text-primary"
|
||||||
|
>hide entries</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Log entries (collapsible) -->
|
||||||
|
<div
|
||||||
|
v-if="turn.showEntries && turn.entries?.length"
|
||||||
|
class="rounded border border-surface-border overflow-hidden"
|
||||||
|
>
|
||||||
|
<LogEntryRow
|
||||||
|
v-for="entry in turn.entries"
|
||||||
|
:key="entry.entry_id"
|
||||||
|
:entry="entry"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- No results -->
|
||||||
|
<p
|
||||||
|
v-if="!turn.loading && turn.summary?.total === 0"
|
||||||
|
class="text-sm text-text-dim px-1"
|
||||||
|
>
|
||||||
|
No log evidence found for that query. Check Sources to confirm data is
|
||||||
|
gleaned, or try different wording.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<!-- Save as incident -->
|
||||||
|
<div v-if="!turn.loading && (turn.entries?.length ?? 0) > 0 && !turn.saved" class="flex gap-3 mt-1">
|
||||||
|
<button
|
||||||
|
@click="saveIncident(turn)"
|
||||||
|
:disabled="turn.saving"
|
||||||
|
class="px-3 py-1.5 bg-surface-raised border border-surface-border rounded text-xs text-text-muted hover:text-text-primary hover:border-accent transition-colors disabled:opacity-40"
|
||||||
|
>
|
||||||
|
{{ turn.saving ? 'Saving…' : 'Save as incident' }}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<p v-if="turn.saved" class="text-xs text-green-400 px-1">
|
||||||
|
Saved —
|
||||||
|
<RouterLink to="/incidents" class="underline underline-offset-2 hover:text-green-300">view in Incidents</RouterLink>
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Source suggestion pre-flight -->
|
||||||
|
<div
|
||||||
|
v-if="suggestedSources.length && !activeTurn"
|
||||||
|
class="mb-3 p-3 rounded border border-surface-border bg-surface-raised"
|
||||||
|
>
|
||||||
|
<p class="text-xs text-text-dim mb-2">Detected sources — deselect to exclude:</p>
|
||||||
|
<div class="flex flex-wrap gap-2">
|
||||||
|
<button
|
||||||
|
v-for="s in suggestedSources"
|
||||||
|
:key="s.source_id"
|
||||||
|
@click="toggleSource(s.source_id)"
|
||||||
|
:aria-pressed="!excludedSources.has(s.source_id)"
|
||||||
|
:class="[
|
||||||
|
'font-mono text-xs rounded px-2 py-1 border transition-colors',
|
||||||
|
excludedSources.has(s.source_id)
|
||||||
|
? 'bg-surface border-surface-border text-text-dim line-through'
|
||||||
|
: 'bg-accent/10 border-accent/40 text-accent'
|
||||||
|
]"
|
||||||
|
>{{ s.source_id }}</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Untracked name nudge -->
|
||||||
|
<div
|
||||||
|
v-if="untrackedNames.length && !activeTurn"
|
||||||
|
class="mb-3 p-3 rounded border border-yellow-700/40 bg-yellow-900/10"
|
||||||
|
>
|
||||||
|
<p class="text-xs text-yellow-400 mb-1">Not monitoring:
|
||||||
|
<span
|
||||||
|
v-for="name in untrackedNames"
|
||||||
|
:key="name"
|
||||||
|
class="font-mono ml-1 px-1.5 py-0.5 rounded bg-yellow-900/30 border border-yellow-700/30"
|
||||||
|
>{{ name }}</span>
|
||||||
|
</p>
|
||||||
|
<RouterLink
|
||||||
|
to="/sources"
|
||||||
|
class="text-xs text-accent hover:underline"
|
||||||
|
>Add as a log source →</RouterLink>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Input row -->
|
||||||
|
<div class="border-t border-surface-border pt-3">
|
||||||
|
<div class="flex gap-2 items-end">
|
||||||
|
<div class="flex-1">
|
||||||
|
<label :for="inputId" class="sr-only">Describe your issue</label>
|
||||||
|
<textarea
|
||||||
|
:id="inputId"
|
||||||
|
ref="textareaEl"
|
||||||
|
v-model="draft"
|
||||||
|
:disabled="!!activeTurn"
|
||||||
|
:placeholder="turns.length
|
||||||
|
? 'Follow up, or ask about something else…'
|
||||||
|
: 'Paste or type your issue — as much detail as you want…'"
|
||||||
|
rows="3"
|
||||||
|
class="w-full bg-surface-raised border border-surface-border rounded-xl px-4 py-2.5 text-sm text-text-primary placeholder-text-dim focus:outline-none focus:border-accent transition-colors resize-none leading-relaxed disabled:opacity-50"
|
||||||
|
@input="onInput"
|
||||||
|
@keydown.enter.exact.prevent="submit"
|
||||||
|
@keydown.enter.shift.exact.stop
|
||||||
|
/>
|
||||||
|
<p class="text-right text-xs text-text-dim mt-1">Enter to search · Shift+Enter for new line</p>
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
:disabled="!draft.trim() || !!activeTurn"
|
||||||
|
@click="submit"
|
||||||
|
class="shrink-0 px-4 py-2.5 rounded-xl bg-accent text-white text-sm font-semibold hover:bg-blue-400 transition-colors disabled:opacity-40 self-end mb-6"
|
||||||
|
aria-label="Search logs"
|
||||||
|
>
|
||||||
|
<span v-if="activeTurn">…</span>
|
||||||
|
<span v-else>Search</span>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<script setup lang="ts">
|
||||||
|
import { ref, nextTick, onMounted } from 'vue'
|
||||||
|
import { RouterLink } from 'vue-router'
|
||||||
|
import LogEntryRow from '@/components/LogEntryRow.vue'
|
||||||
|
import type { LogEntry } from '@/stores/search'
|
||||||
|
|
||||||
|
const BASE = import.meta.env.BASE_URL.replace(/\/$/, '')
|
||||||
|
const inputId = `chat-input-${Math.random().toString(36).slice(2, 7)}`
|
||||||
|
|
||||||
|
interface Summary {
|
||||||
|
total: number
|
||||||
|
window_start: string | null
|
||||||
|
window_end: string | null
|
||||||
|
time_detected: boolean
|
||||||
|
by_severity: Record<string, number>
|
||||||
|
by_source: Record<string, number>
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SuggestedSource {
|
||||||
|
source_id: string
|
||||||
|
score: number
|
||||||
|
matched_tokens: string[]
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Turn {
|
||||||
|
query: string
|
||||||
|
loading: boolean
|
||||||
|
status: string | null
|
||||||
|
reasoning: string | null
|
||||||
|
summary: Summary | null
|
||||||
|
entries: LogEntry[]
|
||||||
|
sources: string[]
|
||||||
|
showEntries: boolean
|
||||||
|
saved: boolean
|
||||||
|
saving: boolean
|
||||||
|
since: string | null
|
||||||
|
until: string | null
|
||||||
|
}
|
||||||
|
|
||||||
|
const turns = ref<Turn[]>([])
|
||||||
|
const draft = ref('')
|
||||||
|
const suggestedSources = ref<SuggestedSource[]>([])
|
||||||
|
const untrackedNames = ref<string[]>([])
|
||||||
|
const excludedSources = ref(new Set<string>())
|
||||||
|
const activeTurn = ref<Turn | null>(null)
|
||||||
|
const scrollEl = ref<HTMLElement | null>(null)
|
||||||
|
const textareaEl = ref<HTMLTextAreaElement | null>(null)
|
||||||
|
|
||||||
|
let suggestTimer: ReturnType<typeof setTimeout> | null = null
|
||||||
|
|
||||||
|
onMounted(() => textareaEl.value?.focus())
|
||||||
|
|
||||||
|
function onInput() {
|
||||||
|
// Auto-grow textarea
|
||||||
|
const el = textareaEl.value
|
||||||
|
if (el) {
|
||||||
|
el.style.height = 'auto'
|
||||||
|
el.style.height = `${Math.min(el.scrollHeight, 240)}px`
|
||||||
|
}
|
||||||
|
// Debounce source suggestion
|
||||||
|
if (suggestTimer) clearTimeout(suggestTimer)
|
||||||
|
if (draft.value.trim().length > 8) {
|
||||||
|
suggestTimer = setTimeout(fetchSuggestions, 400)
|
||||||
|
} else {
|
||||||
|
suggestedSources.value = []
|
||||||
|
untrackedNames.value = []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchSuggestions() {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${BASE}/api/sources/suggest`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ query: draft.value }),
|
||||||
|
})
|
||||||
|
if (!res.ok) return
|
||||||
|
const data = await res.json()
|
||||||
|
suggestedSources.value = (data.suggested ?? []).slice(0, 6)
|
||||||
|
untrackedNames.value = data.untracked_names ?? []
|
||||||
|
// Reset exclusions when suggestions change
|
||||||
|
excludedSources.value = new Set()
|
||||||
|
} catch { /* non-critical */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
function toggleSource(id: string) {
|
||||||
|
const next = new Set(excludedSources.value)
|
||||||
|
if (next.has(id)) next.delete(id)
|
||||||
|
else next.add(id)
|
||||||
|
excludedSources.value = next
|
||||||
|
}
|
||||||
|
|
||||||
|
async function submit() {
|
||||||
|
const text = draft.value.trim()
|
||||||
|
if (!text || activeTurn.value) return
|
||||||
|
|
||||||
|
draft.value = ''
|
||||||
|
suggestedSources.value = []
|
||||||
|
if (textareaEl.value) textareaEl.value.style.height = 'auto'
|
||||||
|
|
||||||
|
// Determine source scope from non-excluded suggestions
|
||||||
|
const sources = suggestedSources.value
|
||||||
|
.filter(s => !excludedSources.value.has(s.source_id))
|
||||||
|
.map(s => s.source_id)
|
||||||
|
excludedSources.value = new Set()
|
||||||
|
|
||||||
|
const turn: Turn = {
|
||||||
|
query: text,
|
||||||
|
loading: true,
|
||||||
|
status: 'Searching…',
|
||||||
|
reasoning: null,
|
||||||
|
summary: null,
|
||||||
|
entries: [],
|
||||||
|
sources,
|
||||||
|
showEntries: false,
|
||||||
|
saved: false,
|
||||||
|
saving: false,
|
||||||
|
since: null,
|
||||||
|
until: null,
|
||||||
|
}
|
||||||
|
turns.value.push(turn)
|
||||||
|
activeTurn.value = turn
|
||||||
|
await nextTick()
|
||||||
|
scrollEl.value?.scrollTo({ top: scrollEl.value.scrollHeight, behavior: 'smooth' })
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${BASE}/api/diagnose/stream`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({
|
||||||
|
query: text,
|
||||||
|
source: sources.length === 1 ? sources[0] : null,
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
if (!res.ok || !res.body) throw new Error(`API ${res.status}`)
|
||||||
|
|
||||||
|
const reader = res.body.getReader()
|
||||||
|
const decoder = new TextDecoder()
|
||||||
|
let buf = ''
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read()
|
||||||
|
if (done) break
|
||||||
|
buf += decoder.decode(value, { stream: true })
|
||||||
|
const parts = buf.split('\n\n')
|
||||||
|
buf = parts.pop() ?? ''
|
||||||
|
for (const part of parts) {
|
||||||
|
const line = part.trim()
|
||||||
|
if (!line.startsWith('data: ')) continue
|
||||||
|
const evt = JSON.parse(line.slice(6))
|
||||||
|
if (evt.type === 'status') { turn.status = evt.message }
|
||||||
|
else if (evt.type === 'summary') { turn.summary = evt.data; turn.since = evt.data.window_start; turn.until = evt.data.window_end }
|
||||||
|
else if (evt.type === 'entries') { turn.entries = evt.data; turn.showEntries = evt.data.length > 0 && evt.data.length <= 10 }
|
||||||
|
else if (evt.type === 'reasoning') { turn.reasoning = evt.text; await nextTick(); scrollEl.value?.scrollTo({ top: scrollEl.value.scrollHeight, behavior: 'smooth' }) }
|
||||||
|
else if (evt.type === 'done') { turn.status = null }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
turn.reasoning = `Error: ${e instanceof Error ? e.message : String(e)}`
|
||||||
|
} finally {
|
||||||
|
turn.loading = false
|
||||||
|
turn.status = null
|
||||||
|
activeTurn.value = null
|
||||||
|
await nextTick()
|
||||||
|
scrollEl.value?.scrollTo({ top: scrollEl.value.scrollHeight, behavior: 'smooth' })
|
||||||
|
textareaEl.value?.focus()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function saveIncident(turn: Turn) {
|
||||||
|
turn.saving = true
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${BASE}/api/incidents`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({
|
||||||
|
label: turn.query.slice(0, 120),
|
||||||
|
started_at: turn.since,
|
||||||
|
ended_at: turn.until,
|
||||||
|
severity: 'medium',
|
||||||
|
notes: turn.reasoning ?? '',
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
if (!res.ok) throw new Error(await res.text())
|
||||||
|
turn.saved = true
|
||||||
|
} catch { /* surface silently — not worth crashing the chat */ }
|
||||||
|
finally { turn.saving = false }
|
||||||
|
}
|
||||||
|
|
||||||
|
function fmtTs(iso: string | null): string {
|
||||||
|
if (!iso) return '—'
|
||||||
|
try {
|
||||||
|
return new Date(iso).toLocaleString(undefined, {
|
||||||
|
month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit',
|
||||||
|
})
|
||||||
|
} catch { return iso }
|
||||||
|
}
|
||||||
|
</script>
|
||||||
375
web/src/components/IncidentTimeline.vue
Normal file
375
web/src/components/IncidentTimeline.vue
Normal file
|
|
@ -0,0 +1,375 @@
|
||||||
|
<template>
|
||||||
|
<div class="incident-timeline" v-if="hasData">
|
||||||
|
<!-- Axis labels -->
|
||||||
|
<div class="flex justify-between text-xs text-text-dim mb-1 px-1 font-mono">
|
||||||
|
<span>{{ startLabel }}</span>
|
||||||
|
<span class="text-center text-text-dim opacity-60 text-[10px]">{{ totalLabel }}</span>
|
||||||
|
<span>{{ endLabel }}</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- SVG strip -->
|
||||||
|
<div
|
||||||
|
class="relative rounded border bg-surface overflow-hidden"
|
||||||
|
:class="brushable ? 'border-accent/40 cursor-crosshair' : 'border-surface-border'"
|
||||||
|
style="height:64px"
|
||||||
|
>
|
||||||
|
<svg
|
||||||
|
:viewBox="`0 0 ${W} ${H}`"
|
||||||
|
preserveAspectRatio="none"
|
||||||
|
class="w-full h-full select-none"
|
||||||
|
@mousedown="onMouseDown"
|
||||||
|
@mousemove="onMouseMove"
|
||||||
|
@mouseup="onMouseUp"
|
||||||
|
@mouseleave="onMouseLeave"
|
||||||
|
>
|
||||||
|
<!-- Burst density bands (bin shading) -->
|
||||||
|
<rect
|
||||||
|
v-for="(bin, i) in densityBins"
|
||||||
|
:key="`bin-${i}`"
|
||||||
|
:x="bin.x"
|
||||||
|
:width="bin.w"
|
||||||
|
y="0"
|
||||||
|
:height="H"
|
||||||
|
:fill="bin.fill"
|
||||||
|
:fill-opacity="bin.opacity"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<!-- Gap markers -->
|
||||||
|
<line
|
||||||
|
v-for="(gap, i) in gapMarkers"
|
||||||
|
:key="`gap-${i}`"
|
||||||
|
:x1="gap.x"
|
||||||
|
:x2="gap.x"
|
||||||
|
y1="4"
|
||||||
|
:y2="H - 4"
|
||||||
|
stroke="var(--color-text-dim)"
|
||||||
|
stroke-width="1"
|
||||||
|
stroke-dasharray="3,3"
|
||||||
|
opacity="0.5"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<!-- Event ticks -->
|
||||||
|
<rect
|
||||||
|
v-for="(ev, i) in eventTicks"
|
||||||
|
:key="`ev-${i}`"
|
||||||
|
:x="ev.x - 1"
|
||||||
|
width="2"
|
||||||
|
:y="ev.y"
|
||||||
|
:height="ev.h"
|
||||||
|
:fill="ev.color"
|
||||||
|
:fill-opacity="ev.alpha"
|
||||||
|
:class="brushable ? '' : 'cursor-pointer'"
|
||||||
|
@click.stop="!brushable && $emit('select-entry', ev.index)"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<!-- Brush selection rect -->
|
||||||
|
<rect
|
||||||
|
v-if="brushable && brushW > 4"
|
||||||
|
:x="brushLeft"
|
||||||
|
:width="brushW"
|
||||||
|
y="0"
|
||||||
|
:height="H"
|
||||||
|
fill="var(--color-accent)"
|
||||||
|
fill-opacity="0.18"
|
||||||
|
stroke="var(--color-accent)"
|
||||||
|
stroke-width="1"
|
||||||
|
stroke-opacity="0.5"
|
||||||
|
pointer-events="none"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<!-- Axis baseline -->
|
||||||
|
<line
|
||||||
|
x1="0" :x2="W" :y1="H - 6" :y2="H - 6"
|
||||||
|
stroke="var(--color-surface-border)"
|
||||||
|
stroke-width="1"
|
||||||
|
/>
|
||||||
|
</svg>
|
||||||
|
|
||||||
|
<!-- Hover tooltip (hidden while brushing) -->
|
||||||
|
<div
|
||||||
|
v-if="tooltip && !isDragging"
|
||||||
|
class="absolute pointer-events-none z-10 bg-surface-raised border border-surface-border rounded px-2 py-1 text-xs text-text-primary shadow-md max-w-xs truncate"
|
||||||
|
:style="{ left: `${tooltip.px}px`, top: '4px', transform: tooltip.flip ? 'translateX(-100%)' : '' }"
|
||||||
|
>
|
||||||
|
<span :class="severityClass(tooltip.severity)" class="mr-1 font-bold">{{ tooltip.severity }}</span>
|
||||||
|
<span class="text-text-dim mr-1">{{ tooltip.time }}</span>
|
||||||
|
<span class="text-text-muted">{{ tooltip.text }}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Legend -->
|
||||||
|
<div class="flex gap-3 mt-1.5 text-[10px] text-text-dim px-1">
|
||||||
|
<span
|
||||||
|
v-for="sev in legendItems"
|
||||||
|
:key="sev.label"
|
||||||
|
class="flex items-center gap-1"
|
||||||
|
>
|
||||||
|
<span class="inline-block w-2 h-2 rounded-sm" :style="{ background: sev.color }"></span>
|
||||||
|
{{ sev.label }}
|
||||||
|
</span>
|
||||||
|
<span v-if="brushable" class="ml-auto text-text-dim opacity-70 italic">drag to filter</span>
|
||||||
|
<span v-else class="ml-auto">{{ entries.length }} events</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<script setup lang="ts">
|
||||||
|
import { computed, ref } from 'vue'
|
||||||
|
|
||||||
|
interface Entry {
|
||||||
|
entry_id: string
|
||||||
|
source_id: string
|
||||||
|
timestamp_iso: string | null
|
||||||
|
severity: string | null
|
||||||
|
text: string
|
||||||
|
}
|
||||||
|
|
||||||
|
const props = defineProps<{
|
||||||
|
entries: Entry[]
|
||||||
|
startedAt?: string | null
|
||||||
|
endedAt?: string | null
|
||||||
|
brushable?: boolean
|
||||||
|
}>()
|
||||||
|
|
||||||
|
const emit = defineEmits<{
|
||||||
|
'select-entry': [index: number]
|
||||||
|
'select-range': [range: { from: string; to: string } | null]
|
||||||
|
}>()
|
||||||
|
|
||||||
|
// ── brush state ─────────────────────────────────────────────────────────────
|
||||||
|
const isDragging = ref(false)
|
||||||
|
const brushAnchor = ref(0) // SVG-space X where drag started
|
||||||
|
const brushCursor = ref(0) // SVG-space X of current mouse position
|
||||||
|
|
||||||
|
const brushLeft = computed(() => Math.min(brushAnchor.value, brushCursor.value))
|
||||||
|
const brushW = computed(() => Math.abs(brushCursor.value - brushAnchor.value))
|
||||||
|
|
||||||
|
// SVG logical dimensions
|
||||||
|
const W = 1000
|
||||||
|
const H = 64
|
||||||
|
|
||||||
|
// ── colour map ─────────────────────────────────────────────────────────────
|
||||||
|
const SEV_COLORS: Record<string, string> = {
|
||||||
|
DEBUG: 'var(--color-sev-debug)',
|
||||||
|
INFO: 'var(--color-sev-info)',
|
||||||
|
WARN: 'var(--color-sev-warn)',
|
||||||
|
WARNING: 'var(--color-sev-warn)',
|
||||||
|
ERROR: 'var(--color-sev-error)',
|
||||||
|
CRITICAL: 'var(--color-sev-critical)',
|
||||||
|
}
|
||||||
|
|
||||||
|
function sevColor(sev: string | null): string {
|
||||||
|
return SEV_COLORS[(sev ?? '').toUpperCase()] ?? 'var(--color-text-dim)'
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── time range ──────────────────────────────────────────────────────────────
|
||||||
|
const timed = computed(() =>
|
||||||
|
props.entries
|
||||||
|
.filter(e => e.timestamp_iso)
|
||||||
|
.map(e => ({ ...e, ms: new Date(e.timestamp_iso!).getTime() }))
|
||||||
|
.sort((a, b) => a.ms - b.ms)
|
||||||
|
)
|
||||||
|
|
||||||
|
const tMin = computed(() => {
|
||||||
|
if (props.startedAt) return new Date(props.startedAt).getTime()
|
||||||
|
return timed.value[0]?.ms ?? Date.now()
|
||||||
|
})
|
||||||
|
|
||||||
|
const tMax = computed(() => {
|
||||||
|
if (props.endedAt) return new Date(props.endedAt).getTime()
|
||||||
|
const last = timed.value[timed.value.length - 1]?.ms ?? Date.now()
|
||||||
|
return Math.max(last, tMin.value + 1000) // at least 1s span
|
||||||
|
})
|
||||||
|
|
||||||
|
const span = computed(() => Math.max(tMax.value - tMin.value, 1))
|
||||||
|
|
||||||
|
function xOf(ms: number): number {
|
||||||
|
return ((ms - tMin.value) / span.value) * W
|
||||||
|
}
|
||||||
|
|
||||||
|
const hasData = computed(() => timed.value.length > 0)
|
||||||
|
|
||||||
|
// ── axis labels ─────────────────────────────────────────────────────────────
|
||||||
|
function fmtTs(ms: number): string {
|
||||||
|
return new Date(ms).toLocaleTimeString(undefined, {
|
||||||
|
hour: '2-digit', minute: '2-digit', second: '2-digit',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
function fmtDuration(ms: number): string {
|
||||||
|
const s = Math.round(ms / 1000)
|
||||||
|
if (s < 60) return `${s}s`
|
||||||
|
if (s < 3600) return `${Math.floor(s / 60)}m ${s % 60}s`
|
||||||
|
return `${Math.floor(s / 3600)}h ${Math.floor((s % 3600) / 60)}m`
|
||||||
|
}
|
||||||
|
|
||||||
|
const startLabel = computed(() => fmtTs(tMin.value))
|
||||||
|
const endLabel = computed(() => fmtTs(tMax.value))
|
||||||
|
const totalLabel = computed(() => fmtDuration(span.value))
|
||||||
|
|
||||||
|
// ── density bins (burst shading) ────────────────────────────────────────────
|
||||||
|
const NUM_BINS = 50
|
||||||
|
|
||||||
|
const densityBins = computed(() => {
|
||||||
|
const binW = span.value / NUM_BINS
|
||||||
|
const counts = new Array<number>(NUM_BINS).fill(0)
|
||||||
|
for (const e of timed.value) {
|
||||||
|
const idx = Math.min(Math.floor((e.ms - tMin.value) / binW), NUM_BINS - 1)
|
||||||
|
counts[idx] = (counts[idx] ?? 0) + 1
|
||||||
|
}
|
||||||
|
const maxCount = Math.max(...counts, 1)
|
||||||
|
return counts.map((count, i) => ({
|
||||||
|
x: (i / NUM_BINS) * W,
|
||||||
|
w: W / NUM_BINS + 0.5,
|
||||||
|
fill: count > 0 ? 'var(--color-accent)' : 'transparent',
|
||||||
|
opacity: count > 0 ? Math.min(0.08 + (count / maxCount) * 0.25, 0.33) : 0,
|
||||||
|
}))
|
||||||
|
})
|
||||||
|
|
||||||
|
// ── gap markers (silence periods >10% of span or >60s) ─────────────────────
|
||||||
|
const gapMarkers = computed(() => {
|
||||||
|
if (timed.value.length < 2) return []
|
||||||
|
const minGapMs = Math.max(span.value * 0.1, 60_000)
|
||||||
|
const markers: { x: number }[] = []
|
||||||
|
for (let i = 1; i < timed.value.length; i++) {
|
||||||
|
const prev = timed.value[i - 1]!
|
||||||
|
const curr = timed.value[i]!
|
||||||
|
const gap = curr.ms - prev.ms
|
||||||
|
if (gap >= minGapMs) {
|
||||||
|
markers.push({ x: xOf(prev.ms + gap / 2) })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return markers
|
||||||
|
})
|
||||||
|
|
||||||
|
// ── event ticks ─────────────────────────────────────────────────────────────
|
||||||
|
const SEV_HEIGHT: Record<string, number> = {
|
||||||
|
DEBUG: 16, INFO: 24, WARN: 32, WARNING: 32, ERROR: 44, CRITICAL: 52,
|
||||||
|
}
|
||||||
|
|
||||||
|
const eventTicks = computed(() =>
|
||||||
|
timed.value.map((e, i) => {
|
||||||
|
const sevKey = (e.severity ?? '').toUpperCase()
|
||||||
|
const h = SEV_HEIGHT[sevKey] ?? 24
|
||||||
|
return {
|
||||||
|
index: i,
|
||||||
|
x: xOf(e.ms),
|
||||||
|
y: H - 6 - h,
|
||||||
|
h,
|
||||||
|
color: sevColor(e.severity),
|
||||||
|
alpha: 0.85,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
|
// ── legend items (only severities present in this incident) ─────────────────
|
||||||
|
const legendItems = computed(() => {
|
||||||
|
const seen = new Set(timed.value.map(e => (e.severity ?? 'UNKNOWN').toUpperCase()))
|
||||||
|
return (['CRITICAL', 'ERROR', 'WARN', 'INFO', 'DEBUG'] as const)
|
||||||
|
.filter(s => seen.has(s))
|
||||||
|
.map(s => ({ label: s, color: sevColor(s) }))
|
||||||
|
})
|
||||||
|
|
||||||
|
// ── hover tooltip ────────────────────────────────────────────────────────────
|
||||||
|
interface Tooltip {
|
||||||
|
px: number
|
||||||
|
flip: boolean
|
||||||
|
severity: string | null
|
||||||
|
time: string
|
||||||
|
text: string
|
||||||
|
}
|
||||||
|
|
||||||
|
const tooltip = ref<Tooltip | null>(null)
|
||||||
|
|
||||||
|
function _svgX(e: MouseEvent): number {
|
||||||
|
const svg = e.currentTarget as SVGElement
|
||||||
|
const rect = svg.getBoundingClientRect()
|
||||||
|
return ((e.clientX - rect.left) / rect.width) * W
|
||||||
|
}
|
||||||
|
|
||||||
|
function _pxX(e: MouseEvent): number {
|
||||||
|
const svg = e.currentTarget as SVGElement
|
||||||
|
return e.clientX - svg.getBoundingClientRect().left
|
||||||
|
}
|
||||||
|
|
||||||
|
function onMouseDown(e: MouseEvent) {
|
||||||
|
if (!props.brushable) return
|
||||||
|
const x = _svgX(e)
|
||||||
|
isDragging.value = true
|
||||||
|
brushAnchor.value = x
|
||||||
|
brushCursor.value = x
|
||||||
|
e.preventDefault()
|
||||||
|
}
|
||||||
|
|
||||||
|
function onMouseMove(e: MouseEvent) {
|
||||||
|
if (props.brushable && isDragging.value) {
|
||||||
|
brushCursor.value = Math.max(0, Math.min(W, _svgX(e)))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const svg = e.currentTarget as SVGElement
|
||||||
|
const rect = svg.getBoundingClientRect()
|
||||||
|
const relX = (e.clientX - rect.left) / rect.width
|
||||||
|
const ms = tMin.value + relX * span.value
|
||||||
|
|
||||||
|
let nearest = timed.value[0]
|
||||||
|
let nearestDist = Infinity
|
||||||
|
for (const entry of timed.value) {
|
||||||
|
const d = Math.abs(entry.ms - ms)
|
||||||
|
if (d < nearestDist) { nearestDist = d; nearest = entry }
|
||||||
|
}
|
||||||
|
if (!nearest) return
|
||||||
|
|
||||||
|
if (nearestDist > span.value * 0.03 + 5000) {
|
||||||
|
tooltip.value = null
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const px = _pxX(e)
|
||||||
|
tooltip.value = {
|
||||||
|
px,
|
||||||
|
flip: px > rect.width * 0.7,
|
||||||
|
severity: nearest.severity,
|
||||||
|
time: fmtTs(nearest.ms),
|
||||||
|
text: nearest.text.slice(0, 120),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function onMouseUp(e: MouseEvent) {
|
||||||
|
if (!props.brushable || !isDragging.value) return
|
||||||
|
isDragging.value = false
|
||||||
|
const dragW = Math.abs(brushCursor.value - brushAnchor.value)
|
||||||
|
if (dragW < 8) {
|
||||||
|
// Click without meaningful drag — clear selection
|
||||||
|
brushAnchor.value = 0
|
||||||
|
brushCursor.value = 0
|
||||||
|
emit('select-range', null)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
const x0 = Math.min(brushAnchor.value, brushCursor.value)
|
||||||
|
const x1 = Math.max(brushAnchor.value, brushCursor.value)
|
||||||
|
const fromMs = tMin.value + (x0 / W) * span.value
|
||||||
|
const toMs = tMin.value + (x1 / W) * span.value
|
||||||
|
emit('select-range', {
|
||||||
|
from: new Date(fromMs).toISOString(),
|
||||||
|
to: new Date(toMs).toISOString(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
function onMouseLeave() {
|
||||||
|
tooltip.value = null
|
||||||
|
if (isDragging.value) {
|
||||||
|
isDragging.value = false
|
||||||
|
brushAnchor.value = 0
|
||||||
|
brushCursor.value = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function severityClass(sev: string | null): string {
|
||||||
|
return {
|
||||||
|
ERROR: 'text-sev-error', CRITICAL: 'text-sev-critical',
|
||||||
|
WARN: 'text-sev-warn', WARNING: 'text-sev-warn',
|
||||||
|
INFO: 'text-sev-info', DEBUG: 'text-text-dim',
|
||||||
|
}[(sev ?? '').toUpperCase()] ?? 'text-text-dim'
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
@ -33,15 +33,29 @@
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Source scope badge -->
|
<!-- Source scope badge -->
|
||||||
<div v-if="sourceScope" class="flex items-center gap-2 mb-4 text-xs">
|
<div v-if="sourceScope || timeFrom" class="flex flex-wrap items-center gap-2 mb-4 text-xs">
|
||||||
<span class="text-text-dim">Scoped to:</span>
|
<template v-if="sourceScope">
|
||||||
<span class="font-mono text-surface bg-accent rounded px-2 py-0.5">{{ sourceScope }}</span>
|
<span class="text-text-dim">Scoped to:</span>
|
||||||
<button
|
<span class="font-mono text-surface bg-accent rounded px-2 py-0.5">{{ sourceScope }}</span>
|
||||||
@click="sourceScope = null"
|
<button
|
||||||
class="text-text-dim hover:text-text-primary ml-1"
|
@click="sourceScope = null"
|
||||||
title="Clear scope"
|
class="text-text-dim hover:text-text-primary"
|
||||||
aria-label="Clear source scope filter"
|
title="Clear scope"
|
||||||
>✕</button>
|
aria-label="Clear source scope filter"
|
||||||
|
>✕</button>
|
||||||
|
</template>
|
||||||
|
<template v-if="timeFrom">
|
||||||
|
<span class="text-text-dim ml-1">Window:</span>
|
||||||
|
<span class="font-mono text-surface bg-accent/80 rounded px-2 py-0.5">
|
||||||
|
{{ _fmtTs(timeFrom) }} → {{ timeTo ? _fmtTs(timeTo) : 'now' }}
|
||||||
|
</span>
|
||||||
|
<button
|
||||||
|
@click="timeFrom = null; timeTo = null"
|
||||||
|
class="text-text-dim hover:text-text-primary"
|
||||||
|
title="Clear time window"
|
||||||
|
aria-label="Clear time window filter"
|
||||||
|
>✕</button>
|
||||||
|
</template>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Error -->
|
<!-- Error -->
|
||||||
|
|
@ -192,6 +206,8 @@ interface Summary {
|
||||||
|
|
||||||
const query = ref('')
|
const query = ref('')
|
||||||
const sourceScope = ref<string | null>(null)
|
const sourceScope = ref<string | null>(null)
|
||||||
|
const timeFrom = ref<string | null>(null)
|
||||||
|
const timeTo = ref<string | null>(null)
|
||||||
const entries = ref<LogEntry[]>([])
|
const entries = ref<LogEntry[]>([])
|
||||||
const summary = ref<Summary | null>(null)
|
const summary = ref<Summary | null>(null)
|
||||||
const reasoning = ref<string | null>(null)
|
const reasoning = ref<string | null>(null)
|
||||||
|
|
@ -210,9 +226,19 @@ const severityFilter = ref<string | null>(null)
|
||||||
let capturedSince: string | null = null
|
let capturedSince: string | null = null
|
||||||
let capturedUntil: string | null = null
|
let capturedUntil: string | null = null
|
||||||
|
|
||||||
|
function _fmtTs(iso: string): string {
|
||||||
|
try {
|
||||||
|
return new Date(iso).toLocaleString(undefined, { month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit' })
|
||||||
|
} catch { return iso }
|
||||||
|
}
|
||||||
|
|
||||||
onMounted(async () => {
|
onMounted(async () => {
|
||||||
const s = route.query.source
|
const s = route.query.source
|
||||||
if (typeof s === 'string' && s.trim()) sourceScope.value = s
|
if (typeof s === 'string' && s.trim()) sourceScope.value = s
|
||||||
|
const f = route.query.from
|
||||||
|
const t = route.query.to
|
||||||
|
if (typeof f === 'string' && f) timeFrom.value = f
|
||||||
|
if (typeof t === 'string' && t) timeTo.value = t
|
||||||
const q = route.query.q
|
const q = route.query.q
|
||||||
if (typeof q === 'string' && q.trim()) {
|
if (typeof q === 'string' && q.trim()) {
|
||||||
query.value = q
|
query.value = q
|
||||||
|
|
@ -258,7 +284,12 @@ async function run() {
|
||||||
const res = await fetch(`${BASE}/api/diagnose/stream`, {
|
const res = await fetch(`${BASE}/api/diagnose/stream`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ query: query.value, source: sourceScope.value }),
|
body: JSON.stringify({
|
||||||
|
query: query.value,
|
||||||
|
source: sourceScope.value,
|
||||||
|
since: timeFrom.value || undefined,
|
||||||
|
until: timeTo.value || undefined,
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
if (!res.ok) throw new Error(`API returned ${res.status}`)
|
if (!res.ok) throw new Error(`API returned ${res.status}`)
|
||||||
if (!res.body) throw new Error('No response body')
|
if (!res.body) throw new Error('No response body')
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ import BundlesView from '@/views/BundlesView.vue'
|
||||||
import SettingsView from '@/views/SettingsView.vue'
|
import SettingsView from '@/views/SettingsView.vue'
|
||||||
import ContextView from '@/views/ContextView.vue'
|
import ContextView from '@/views/ContextView.vue'
|
||||||
import BlocklistView from '@/views/BlocklistView.vue'
|
import BlocklistView from '@/views/BlocklistView.vue'
|
||||||
|
import SecurityAlertsView from '@/views/SecurityAlertsView.vue'
|
||||||
|
|
||||||
export default createRouter({
|
export default createRouter({
|
||||||
history: createWebHistory(import.meta.env.BASE_URL),
|
history: createWebHistory(import.meta.env.BASE_URL),
|
||||||
|
|
@ -17,6 +18,7 @@ export default createRouter({
|
||||||
{ path: '/search', component: LogSearchView },
|
{ path: '/search', component: LogSearchView },
|
||||||
{ path: '/diagnose', component: DiagnoseView },
|
{ path: '/diagnose', component: DiagnoseView },
|
||||||
{ path: '/incidents', component: IncidentsView },
|
{ path: '/incidents', component: IncidentsView },
|
||||||
|
{ path: '/alerts', component: SecurityAlertsView },
|
||||||
{ path: '/bundles', component: BundlesView },
|
{ path: '/bundles', component: BundlesView },
|
||||||
{ path: '/sources', component: SourcesView },
|
{ path: '/sources', component: SourcesView },
|
||||||
{ path: '/context', component: ContextView },
|
{ path: '/context', component: ContextView },
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,13 @@
|
||||||
--badge-critical-bg: #450a0a; --badge-critical-text: #f87171;
|
--badge-critical-bg: #450a0a; --badge-critical-text: #f87171;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Reset UA button chrome — utility classes with higher specificity will override */
|
||||||
|
button {
|
||||||
|
background: transparent;
|
||||||
|
border: 0;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
/* Smooth theme transitions */
|
/* Smooth theme transitions */
|
||||||
@media (prefers-reduced-motion: no-preference) {
|
@media (prefers-reduced-motion: no-preference) {
|
||||||
*, *::before, *::after {
|
*, *::before, *::after {
|
||||||
|
|
@ -58,3 +65,27 @@
|
||||||
outline: 2px solid var(--color-accent);
|
outline: 2px solid var(--color-accent);
|
||||||
outline-offset: 2px;
|
outline-offset: 2px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Loading skeleton shimmer */
|
||||||
|
@keyframes shimmer {
|
||||||
|
0% { background-position: -200% 0; }
|
||||||
|
100% { background-position: 200% 0; }
|
||||||
|
}
|
||||||
|
|
||||||
|
.loading-shimmer {
|
||||||
|
background: linear-gradient(
|
||||||
|
90deg,
|
||||||
|
var(--color-surface-raised) 25%,
|
||||||
|
var(--color-surface-border) 50%,
|
||||||
|
var(--color-surface-raised) 75%
|
||||||
|
);
|
||||||
|
background-size: 200% 100%;
|
||||||
|
animation: shimmer 1.4s ease-in-out infinite;
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (prefers-reduced-motion: reduce) {
|
||||||
|
.loading-shimmer {
|
||||||
|
animation: none;
|
||||||
|
background: var(--color-surface-raised);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,22 @@
|
||||||
<template>
|
<template>
|
||||||
<div class="p-4 sm:p-6 max-w-5xl mx-auto space-y-8">
|
<div class="p-4 sm:p-6 max-w-5xl mx-auto space-y-8">
|
||||||
|
|
||||||
|
<!-- Timeline brush filter banner -->
|
||||||
|
<div
|
||||||
|
v-if="timelineRange"
|
||||||
|
class="flex items-center gap-3 rounded border border-accent/40 bg-surface-raised px-4 py-2.5 text-xs"
|
||||||
|
>
|
||||||
|
<span class="text-accent font-semibold">Filtered:</span>
|
||||||
|
<span class="text-text-primary font-mono">{{ shortTs(timelineRange.from) }}</span>
|
||||||
|
<span class="text-text-dim">→</span>
|
||||||
|
<span class="text-text-primary font-mono">{{ shortTs(timelineRange.to) }}</span>
|
||||||
|
<button
|
||||||
|
@click="timelineRange = null"
|
||||||
|
class="ml-auto text-text-dim hover:text-sev-error transition-colors"
|
||||||
|
aria-label="Clear time filter"
|
||||||
|
>✕ clear</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Watch status + freshness row -->
|
<!-- Watch status + freshness row -->
|
||||||
<div v-if="!loading && stats" class="space-y-2">
|
<div v-if="!loading && stats" class="space-y-2">
|
||||||
<!-- Live watch indicator -->
|
<!-- Live watch indicator -->
|
||||||
|
|
@ -29,8 +45,9 @@
|
||||||
<div class="grid grid-cols-1 sm:grid-cols-3 gap-4">
|
<div class="grid grid-cols-1 sm:grid-cols-3 gap-4">
|
||||||
<div class="rounded border border-surface-border bg-surface-raised p-5">
|
<div class="rounded border border-surface-border bg-surface-raised p-5">
|
||||||
<p class="text-text-dim text-xs uppercase tracking-widest mb-2">Criticals (24h)</p>
|
<p class="text-text-dim text-xs uppercase tracking-widest mb-2">Criticals (24h)</p>
|
||||||
<p class="text-3xl font-semibold tabular-nums" :class="stats?.criticals_24h ? 'text-sev-critical' : 'text-text-muted'">
|
<div v-if="loading" class="loading-shimmer h-9 w-16 rounded mt-1" />
|
||||||
{{ loading ? '…' : (stats?.criticals_24h ?? 0) }}
|
<p v-else class="text-3xl font-semibold tabular-nums" :class="stats?.criticals_24h ? 'text-sev-critical' : 'text-text-muted'">
|
||||||
|
{{ stats?.criticals_24h ?? 0 }}
|
||||||
</p>
|
</p>
|
||||||
<p v-if="stats?.suppressed_criticals" class="text-xs text-text-dim mt-1">
|
<p v-if="stats?.suppressed_criticals" class="text-xs text-text-dim mt-1">
|
||||||
{{ stats.suppressed_criticals }} suppressed by overrides
|
{{ stats.suppressed_criticals }} suppressed by overrides
|
||||||
|
|
@ -38,8 +55,9 @@
|
||||||
</div>
|
</div>
|
||||||
<div class="rounded border border-surface-border bg-surface-raised p-5">
|
<div class="rounded border border-surface-border bg-surface-raised p-5">
|
||||||
<p class="text-text-dim text-xs uppercase tracking-widest mb-2">Errors (24h)</p>
|
<p class="text-text-dim text-xs uppercase tracking-widest mb-2">Errors (24h)</p>
|
||||||
<p class="text-3xl font-semibold tabular-nums" :class="stats?.errors_24h ? 'text-sev-error' : 'text-text-muted'">
|
<div v-if="loading" class="loading-shimmer h-9 w-16 rounded mt-1" />
|
||||||
{{ loading ? '…' : (stats?.errors_24h ?? 0) }}
|
<p v-else class="text-3xl font-semibold tabular-nums" :class="stats?.errors_24h ? 'text-sev-error' : 'text-text-muted'">
|
||||||
|
{{ stats?.errors_24h ?? 0 }}
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
<RouterLink
|
<RouterLink
|
||||||
|
|
@ -48,17 +66,48 @@
|
||||||
:class="activeIncidents > 0 ? 'border-sev-warn' : 'border-surface-border'"
|
:class="activeIncidents > 0 ? 'border-sev-warn' : 'border-surface-border'"
|
||||||
>
|
>
|
||||||
<p class="text-text-dim text-xs uppercase tracking-widest mb-2">Active Incidents</p>
|
<p class="text-text-dim text-xs uppercase tracking-widest mb-2">Active Incidents</p>
|
||||||
<p class="text-3xl font-semibold tabular-nums" :class="activeIncidents > 0 ? 'text-sev-warn' : 'text-text-muted'">
|
<div v-if="incidentsLoading" class="loading-shimmer h-9 w-12 rounded mt-1" />
|
||||||
{{ incidentsLoading ? '…' : activeIncidents }}
|
<p v-else class="text-3xl font-semibold tabular-nums" :class="activeIncidents > 0 ? 'text-sev-warn' : 'text-text-muted'">
|
||||||
|
{{ activeIncidents }}
|
||||||
</p>
|
</p>
|
||||||
</RouterLink>
|
</RouterLink>
|
||||||
|
<RouterLink
|
||||||
|
to="/alerts"
|
||||||
|
class="rounded border bg-surface-raised p-5 block hover:bg-surface transition-colors"
|
||||||
|
:class="unackedAlerts > 0 ? 'border-sev-error' : 'border-surface-border'"
|
||||||
|
>
|
||||||
|
<p class="text-text-dim text-xs uppercase tracking-widest mb-2">Unreviewed Alerts</p>
|
||||||
|
<div v-if="alertsLoading" class="loading-shimmer h-9 w-12 rounded mt-1" />
|
||||||
|
<p v-else class="text-3xl font-semibold tabular-nums" :class="unackedAlerts > 0 ? 'text-sev-error' : 'text-text-muted'">
|
||||||
|
{{ unackedAlerts }}
|
||||||
|
</p>
|
||||||
|
</RouterLink>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Activity timeline -->
|
||||||
|
<div v-if="stats?.timeline_events?.length">
|
||||||
|
<h2 class="text-text-primary text-sm font-semibold uppercase tracking-wider mb-3">Activity Timeline — Last 24 Hours</h2>
|
||||||
|
<IncidentTimeline
|
||||||
|
:entries="stats.timeline_events"
|
||||||
|
:brushable="true"
|
||||||
|
@select-range="onTimelineRange"
|
||||||
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Source health (24h) -->
|
<!-- Source health (24h) -->
|
||||||
<div>
|
<div>
|
||||||
<h2 class="text-text-primary text-sm font-semibold uppercase tracking-wider mb-3">Source Health — Last 24 Hours</h2>
|
<h2 class="text-text-primary text-sm font-semibold uppercase tracking-wider mb-3">Source Health — Last 24 Hours</h2>
|
||||||
|
|
||||||
<div v-if="loading" class="text-text-dim text-sm py-4">Loading…</div>
|
<div v-if="loading" class="rounded border border-surface-border overflow-hidden divide-y divide-surface-border">
|
||||||
|
<div v-for="i in 4" :key="i" class="px-4 py-3 flex items-center gap-4">
|
||||||
|
<div class="loading-shimmer w-2 h-2 rounded-full shrink-0" />
|
||||||
|
<div class="loading-shimmer h-3.5 rounded" :style="`width: ${50 + (i * 23) % 80}px`" />
|
||||||
|
<div class="loading-shimmer h-3.5 w-10 rounded ml-auto" />
|
||||||
|
<div class="loading-shimmer h-3.5 w-8 rounded" />
|
||||||
|
<div class="loading-shimmer h-3.5 w-20 rounded" />
|
||||||
|
<div class="loading-shimmer h-6 w-16 rounded" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div v-else-if="!stats?.source_health?.length" class="text-text-dim text-sm py-4">
|
<div v-else-if="!stats?.source_health?.length" class="text-text-dim text-sm py-4">
|
||||||
No log entries in the last 24 hours.
|
No log entries in the last 24 hours.
|
||||||
|
|
@ -107,7 +156,7 @@
|
||||||
class="text-text-dim hover:text-accent text-xs px-2 py-1 rounded hover:bg-surface transition-colors"
|
class="text-text-dim hover:text-accent text-xs px-2 py-1 rounded hover:bg-surface transition-colors"
|
||||||
@click="diagnoseSource(src.source_id)"
|
@click="diagnoseSource(src.source_id)"
|
||||||
:aria-label="`Diagnose ${src.source_id}`"
|
:aria-label="`Diagnose ${src.source_id}`"
|
||||||
>diagnose</button>
|
>diagnose ↗</button>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
|
|
@ -123,14 +172,52 @@
|
||||||
<div
|
<div
|
||||||
v-for="entry in stats.recent_criticals"
|
v-for="entry in stats.recent_criticals"
|
||||||
:key="entry.entry_id"
|
:key="entry.entry_id"
|
||||||
class="border-b border-surface-border border-l-2 border-l-sev-critical px-4 py-3 hover:bg-surface-raised transition-colors"
|
class="border-b border-surface-border last:border-b-0"
|
||||||
>
|
>
|
||||||
<div class="flex items-center gap-2 mb-1 flex-wrap">
|
<!-- Entry header row (clickable to expand) -->
|
||||||
<span class="text-sev-critical text-xs font-semibold">CRITICAL</span>
|
<div
|
||||||
<span class="text-accent text-xs">{{ entry.source_id }}</span>
|
class="border-l-2 border-l-sev-critical px-4 py-3 hover:bg-surface-raised transition-colors cursor-pointer select-none flex items-start gap-2"
|
||||||
<span v-if="entry.timestamp_iso" class="text-text-dim text-xs">{{ shortTs(entry.timestamp_iso) }}</span>
|
:class="expandedEntryId === entry.entry_id ? 'bg-surface-raised' : ''"
|
||||||
|
@click="explainCritical(entry)"
|
||||||
|
:aria-expanded="expandedEntryId === entry.entry_id"
|
||||||
|
>
|
||||||
|
<div class="flex-1 min-w-0">
|
||||||
|
<div class="flex items-center gap-2 mb-1 flex-wrap">
|
||||||
|
<span class="text-sev-critical text-xs font-semibold">CRITICAL</span>
|
||||||
|
<span class="text-accent text-xs font-mono">{{ entry.source_id }}</span>
|
||||||
|
<span v-if="entry.timestamp_iso" class="text-text-dim text-xs">{{ shortTs(entry.timestamp_iso) }}</span>
|
||||||
|
</div>
|
||||||
|
<p
|
||||||
|
class="text-text-primary text-sm font-mono leading-relaxed"
|
||||||
|
:class="expandedEntryId !== entry.entry_id ? 'line-clamp-2' : ''"
|
||||||
|
>{{ entry.text }}</p>
|
||||||
|
</div>
|
||||||
|
<span class="text-text-dim text-[10px] shrink-0 mt-0.5 select-none opacity-60">
|
||||||
|
{{ expandedEntryId === entry.entry_id ? '▲' : '▼' }}
|
||||||
|
</span>
|
||||||
</div>
|
</div>
|
||||||
<p class="text-text-primary text-sm font-mono leading-relaxed line-clamp-2">{{ entry.text }}</p>
|
|
||||||
|
<!-- Inline explain panel -->
|
||||||
|
<Transition name="expand">
|
||||||
|
<div
|
||||||
|
v-if="expandedEntryId === entry.entry_id"
|
||||||
|
class="border-l-2 border-l-accent/40 bg-surface px-4 py-3"
|
||||||
|
>
|
||||||
|
<div v-if="entryExplaining === entry.entry_id" class="flex items-center gap-2 text-xs text-text-dim py-1">
|
||||||
|
<span class="inline-block w-3 h-3 rounded-full border-2 border-accent border-t-transparent animate-spin motion-reduce:animate-none" aria-hidden="true" />
|
||||||
|
Analysing surrounding logs…
|
||||||
|
</div>
|
||||||
|
<div v-else-if="entryExplanations[entry.entry_id]" class="text-sm text-text-primary leading-relaxed whitespace-pre-wrap mb-3">
|
||||||
|
{{ entryExplanations[entry.entry_id] }}
|
||||||
|
</div>
|
||||||
|
<div class="flex gap-2 mt-2">
|
||||||
|
<button
|
||||||
|
@click.stop="diagnoseSource(entry.source_id)"
|
||||||
|
class="text-xs px-2 py-1 rounded border border-surface-border text-text-dim hover:text-accent hover:border-accent transition-colors"
|
||||||
|
>Diagnose source ↗</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</Transition>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<p v-if="stats.suppressed_criticals" class="text-xs text-text-dim mt-2">
|
<p v-if="stats.suppressed_criticals" class="text-xs text-text-dim mt-2">
|
||||||
|
|
@ -154,6 +241,7 @@
|
||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { ref, computed, onMounted } from 'vue'
|
import { ref, computed, onMounted } from 'vue'
|
||||||
import { useRouter, RouterLink } from 'vue-router'
|
import { useRouter, RouterLink } from 'vue-router'
|
||||||
|
import IncidentTimeline from '@/components/IncidentTimeline.vue'
|
||||||
|
|
||||||
const router = useRouter()
|
const router = useRouter()
|
||||||
const BASE = import.meta.env.BASE_URL.replace(/\/$/, '')
|
const BASE = import.meta.env.BASE_URL.replace(/\/$/, '')
|
||||||
|
|
@ -165,6 +253,14 @@ interface SourceHealth {
|
||||||
latest: string | null
|
latest: string | null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface TimelineEvent {
|
||||||
|
entry_id: string
|
||||||
|
source_id: string
|
||||||
|
timestamp_iso: string | null
|
||||||
|
severity: string | null
|
||||||
|
text: string
|
||||||
|
}
|
||||||
|
|
||||||
interface StatsResponse {
|
interface StatsResponse {
|
||||||
window_hours: number
|
window_hours: number
|
||||||
total_24h: number
|
total_24h: number
|
||||||
|
|
@ -173,6 +269,7 @@ interface StatsResponse {
|
||||||
suppressed_criticals: number
|
suppressed_criticals: number
|
||||||
last_gleaned: string | null
|
last_gleaned: string | null
|
||||||
source_health: SourceHealth[]
|
source_health: SourceHealth[]
|
||||||
|
timeline_events: TimelineEvent[]
|
||||||
recent_criticals: Array<{
|
recent_criticals: Array<{
|
||||||
entry_id: string
|
entry_id: string
|
||||||
source_id: string
|
source_id: string
|
||||||
|
|
@ -193,6 +290,7 @@ interface WatchSourceStatus {
|
||||||
|
|
||||||
interface Incident {
|
interface Incident {
|
||||||
id: string
|
id: string
|
||||||
|
started_at: string | null
|
||||||
ended_at: string | null
|
ended_at: string | null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -201,10 +299,28 @@ const loading = ref(true)
|
||||||
const incidents = ref<Incident[]>([])
|
const incidents = ref<Incident[]>([])
|
||||||
const incidentsLoading = ref(true)
|
const incidentsLoading = ref(true)
|
||||||
const watchSources = ref<WatchSourceStatus[]>([])
|
const watchSources = ref<WatchSourceStatus[]>([])
|
||||||
|
const unackedAlerts = ref(0)
|
||||||
|
const alertsLoading = ref(true)
|
||||||
|
const timelineRange = ref<{ from: string; to: string } | null>(null)
|
||||||
|
const expandedEntryId = ref<string | null>(null)
|
||||||
|
const entryExplanations = ref<Record<string, string>>({})
|
||||||
|
const entryExplaining = ref<string | null>(null)
|
||||||
|
|
||||||
const activeIncidents = computed(() =>
|
const activeIncidents = computed(() => {
|
||||||
incidents.value.filter(i => !i.ended_at).length
|
const open = incidents.value.filter(i => !i.ended_at)
|
||||||
)
|
if (!timelineRange.value) return open.length
|
||||||
|
const from = new Date(timelineRange.value.from).getTime()
|
||||||
|
const to = new Date(timelineRange.value.to).getTime()
|
||||||
|
return open.filter(i => {
|
||||||
|
if (!i.started_at) return true
|
||||||
|
const start = new Date(i.started_at).getTime()
|
||||||
|
return start <= to
|
||||||
|
}).length
|
||||||
|
})
|
||||||
|
|
||||||
|
function onTimelineRange(range: { from: string; to: string } | null) {
|
||||||
|
timelineRange.value = range
|
||||||
|
}
|
||||||
|
|
||||||
const watchActive = computed(() =>
|
const watchActive = computed(() =>
|
||||||
watchSources.value.some(s => s.running)
|
watchSources.value.some(s => s.running)
|
||||||
|
|
@ -217,7 +333,7 @@ const isStale = computed(() => {
|
||||||
})
|
})
|
||||||
|
|
||||||
onMounted(async () => {
|
onMounted(async () => {
|
||||||
await Promise.all([loadStats(), loadIncidents(), loadWatchStatus()])
|
await Promise.all([loadStats(), loadIncidents(), loadWatchStatus(), loadAlertCount()])
|
||||||
})
|
})
|
||||||
|
|
||||||
async function loadStats() {
|
async function loadStats() {
|
||||||
|
|
@ -245,6 +361,14 @@ async function loadWatchStatus() {
|
||||||
} catch { /* non-critical */ }
|
} catch { /* non-critical */ }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function loadAlertCount() {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${BASE}/api/anomaly/detections?unacked_only=true&limit=1000`)
|
||||||
|
if (res.ok) unackedAlerts.value = (await res.json()).total ?? 0
|
||||||
|
} catch { /* non-critical — scorer may be disabled */ }
|
||||||
|
finally { alertsLoading.value = false }
|
||||||
|
}
|
||||||
|
|
||||||
function healthDot(errors: number, total: number): string {
|
function healthDot(errors: number, total: number): string {
|
||||||
if (errors === 0) return 'bg-green-500'
|
if (errors === 0) return 'bg-green-500'
|
||||||
const ratio = errors / Math.max(total, 1)
|
const ratio = errors / Math.max(total, 1)
|
||||||
|
|
@ -253,7 +377,74 @@ function healthDot(errors: number, total: number): string {
|
||||||
}
|
}
|
||||||
|
|
||||||
function diagnoseSource(sourceId: string) {
|
function diagnoseSource(sourceId: string) {
|
||||||
router.push({ path: '/diagnose', query: { source: sourceId } })
|
const query: Record<string, string> = {
|
||||||
|
tab: 'quick',
|
||||||
|
source: sourceId,
|
||||||
|
q: 'Summarize what errors or issues occurred — what went wrong and what is the likely cause?',
|
||||||
|
}
|
||||||
|
if (timelineRange.value) {
|
||||||
|
query.from = timelineRange.value.from
|
||||||
|
query.to = timelineRange.value.to
|
||||||
|
}
|
||||||
|
router.push({ path: '/diagnose', query })
|
||||||
|
}
|
||||||
|
|
||||||
|
type CriticalEntry = { entry_id: string; source_id: string; timestamp_iso: string | null; text: string }
|
||||||
|
|
||||||
|
async function explainCritical(entry: CriticalEntry) {
|
||||||
|
if (expandedEntryId.value === entry.entry_id) {
|
||||||
|
expandedEntryId.value = null
|
||||||
|
return
|
||||||
|
}
|
||||||
|
expandedEntryId.value = entry.entry_id
|
||||||
|
if (entryExplanations.value[entry.entry_id]) return
|
||||||
|
|
||||||
|
entryExplaining.value = entry.entry_id
|
||||||
|
let explanation = ''
|
||||||
|
try {
|
||||||
|
const sinceMs = entry.timestamp_iso ? new Date(entry.timestamp_iso).getTime() - 5 * 60_000 : null
|
||||||
|
const untilMs = entry.timestamp_iso ? new Date(entry.timestamp_iso).getTime() + 5 * 60_000 : null
|
||||||
|
const res = await fetch(`${BASE}/api/diagnose/stream`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({
|
||||||
|
query: `Explain this critical log error and its likely cause: ${entry.text.slice(0, 300)}`,
|
||||||
|
source: entry.source_id,
|
||||||
|
since: sinceMs ? new Date(sinceMs).toISOString() : undefined,
|
||||||
|
until: untilMs ? new Date(untilMs).toISOString() : undefined,
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
if (!res.ok || !res.body) throw new Error(`HTTP ${res.status}`)
|
||||||
|
const reader = res.body.getReader()
|
||||||
|
const decoder = new TextDecoder()
|
||||||
|
let buf = ''
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read()
|
||||||
|
if (done) break
|
||||||
|
buf += decoder.decode(value, { stream: true })
|
||||||
|
const parts = buf.split('\n\n')
|
||||||
|
buf = parts.pop() ?? ''
|
||||||
|
for (const part of parts) {
|
||||||
|
const line = part.trim()
|
||||||
|
if (!line.startsWith('data: ')) continue
|
||||||
|
try {
|
||||||
|
const evt = JSON.parse(line.slice(6))
|
||||||
|
if (evt.type === 'reasoning') explanation = evt.text
|
||||||
|
} catch { /* malformed SSE chunk — skip */ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
entryExplanations.value = {
|
||||||
|
...entryExplanations.value,
|
||||||
|
[entry.entry_id]: explanation || 'No explanation returned — try the full diagnose view for more context.',
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
entryExplanations.value = {
|
||||||
|
...entryExplanations.value,
|
||||||
|
[entry.entry_id]: 'Failed to load explanation.',
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
entryExplaining.value = null
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function shortTs(iso: string | null): string {
|
function shortTs(iso: string | null): string {
|
||||||
|
|
@ -266,3 +457,17 @@ function shortTs(iso: string | null): string {
|
||||||
} catch { return iso }
|
} catch { return iso }
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
<style scoped>
|
||||||
|
.expand-enter-active,
|
||||||
|
.expand-leave-active {
|
||||||
|
transition: opacity 0.15s ease, max-height 0.2s ease;
|
||||||
|
overflow: hidden;
|
||||||
|
max-height: 400px;
|
||||||
|
}
|
||||||
|
.expand-enter-from,
|
||||||
|
.expand-leave-to {
|
||||||
|
opacity: 0;
|
||||||
|
max-height: 0;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,20 @@
|
||||||
<template>
|
<template>
|
||||||
<div class="p-4 sm:p-6 max-w-4xl mx-auto">
|
<div
|
||||||
<div class="mb-5">
|
class="p-4 sm:p-6 mx-auto"
|
||||||
|
:class="activeTab === 'chat' ? 'max-w-3xl flex flex-col' : 'max-w-4xl'"
|
||||||
|
:style="activeTab === 'chat' ? 'height: calc(100vh - 5rem)' : ''"
|
||||||
|
>
|
||||||
|
<div class="mb-5 shrink-0">
|
||||||
<h1 class="text-text-primary text-xl font-semibold mb-1">Diagnose</h1>
|
<h1 class="text-text-primary text-xl font-semibold mb-1">Diagnose</h1>
|
||||||
<p class="text-text-dim text-sm">
|
<p class="text-text-dim text-sm">
|
||||||
Quick: describe a symptom to surface log evidence.
|
<template v-if="activeTab === 'chat'">Describe your issue in plain language — Turnstone searches your logs and explains what it finds.</template>
|
||||||
Structured: tag a timestamped incident record.
|
<template v-else-if="activeTab === 'quick'">Single-shot: describe a symptom to surface log evidence and LLM reasoning.</template>
|
||||||
|
<template v-else>Tag and timestamp a known issue to build an incident record.</template>
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Tab toggle -->
|
<!-- Tab strip -->
|
||||||
<div role="tablist" aria-label="Diagnose mode" class="flex gap-1 mb-6 border-b border-surface-border">
|
<div role="tablist" aria-label="Diagnose mode" class="flex gap-1 mb-6 border-b border-surface-border shrink-0">
|
||||||
<button
|
<button
|
||||||
v-for="(t, idx) in tabs"
|
v-for="(t, idx) in tabs"
|
||||||
:key="t.key"
|
:key="t.key"
|
||||||
|
|
@ -18,7 +23,7 @@
|
||||||
:id="`tab-${t.key}`"
|
:id="`tab-${t.key}`"
|
||||||
:aria-controls="`tabpanel-${t.key}`"
|
:aria-controls="`tabpanel-${t.key}`"
|
||||||
:tabindex="activeTab === t.key ? 0 : -1"
|
:tabindex="activeTab === t.key ? 0 : -1"
|
||||||
@click="activeTab = t.key as 'quick' | 'structured'"
|
@click="activeTab = t.key as TabKey"
|
||||||
@keydown="handleTabKey($event, t.key)"
|
@keydown="handleTabKey($event, t.key)"
|
||||||
:ref="(el) => { if (el) tabRefs[idx] = el as HTMLButtonElement }"
|
:ref="(el) => { if (el) tabRefs[idx] = el as HTMLButtonElement }"
|
||||||
:class="[
|
:class="[
|
||||||
|
|
@ -30,7 +35,18 @@
|
||||||
>{{ t.label }}</button>
|
>{{ t.label }}</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Quick tab panel -->
|
<!-- Chat tab — full-height flex layout -->
|
||||||
|
<div
|
||||||
|
v-show="activeTab === 'chat'"
|
||||||
|
role="tabpanel"
|
||||||
|
id="tabpanel-chat"
|
||||||
|
aria-labelledby="tab-chat"
|
||||||
|
class="flex-1 min-h-0"
|
||||||
|
>
|
||||||
|
<ChatDiagnose />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Quick tab -->
|
||||||
<div
|
<div
|
||||||
v-show="activeTab === 'quick'"
|
v-show="activeTab === 'quick'"
|
||||||
role="tabpanel"
|
role="tabpanel"
|
||||||
|
|
@ -41,7 +57,7 @@
|
||||||
<QuickCapture />
|
<QuickCapture />
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Structured tab panel -->
|
<!-- Structured tab -->
|
||||||
<div
|
<div
|
||||||
v-show="activeTab === 'structured'"
|
v-show="activeTab === 'structured'"
|
||||||
role="tabpanel"
|
role="tabpanel"
|
||||||
|
|
@ -64,36 +80,42 @@
|
||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { ref, onMounted, watch, nextTick } from 'vue'
|
import { ref, onMounted, watch, nextTick } from 'vue'
|
||||||
import { useRoute, RouterLink } from 'vue-router'
|
import { useRoute, RouterLink } from 'vue-router'
|
||||||
import QuickCapture from '@/components/QuickCapture.vue'
|
import QuickCapture from '@/components/QuickCapture.vue'
|
||||||
import IncidentForm from '@/components/IncidentForm.vue'
|
import IncidentForm from '@/components/IncidentForm.vue'
|
||||||
|
import ChatDiagnose from '@/components/ChatDiagnose.vue'
|
||||||
|
|
||||||
const route = useRoute()
|
const route = useRoute()
|
||||||
const tabs: { key: 'quick' | 'structured'; label: string }[] = [
|
|
||||||
|
type TabKey = 'chat' | 'quick' | 'structured'
|
||||||
|
|
||||||
|
const tabs: { key: TabKey; label: string }[] = [
|
||||||
|
{ key: 'chat', label: 'Chat' },
|
||||||
{ key: 'quick', label: 'Quick' },
|
{ key: 'quick', label: 'Quick' },
|
||||||
{ key: 'structured', label: 'Structured' },
|
{ key: 'structured', label: 'Structured' },
|
||||||
]
|
]
|
||||||
const activeTab = ref<'quick' | 'structured'>('quick')
|
const activeTab = ref<TabKey>('chat')
|
||||||
const createdLabel = ref('')
|
const createdLabel = ref('')
|
||||||
const tabRefs = ref<HTMLButtonElement[]>([])
|
const tabRefs = ref<HTMLButtonElement[]>([])
|
||||||
|
|
||||||
function handleTabKey(e: KeyboardEvent, currentKey: 'quick' | 'structured') {
|
function handleTabKey(e: KeyboardEvent, currentKey: TabKey) {
|
||||||
const keys = tabs.map(t => t.key)
|
const keys = tabs.map(t => t.key)
|
||||||
const idx = keys.indexOf(currentKey)
|
const idx = keys.indexOf(currentKey)
|
||||||
let next = idx
|
let next = idx
|
||||||
if (e.key === 'ArrowRight') next = (idx + 1) % keys.length
|
if (e.key === 'ArrowRight') next = (idx + 1) % keys.length
|
||||||
else if (e.key === 'ArrowLeft') next = (idx - 1 + keys.length) % keys.length
|
else if (e.key === 'ArrowLeft') next = (idx - 1 + keys.length) % keys.length
|
||||||
else return
|
else return
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
activeTab.value = keys[next] as 'quick' | 'structured'
|
activeTab.value = keys[next] as TabKey
|
||||||
nextTick(() => tabRefs.value[next]?.focus())
|
nextTick(() => tabRefs.value[next]?.focus())
|
||||||
}
|
}
|
||||||
|
|
||||||
onMounted(() => {
|
onMounted(() => {
|
||||||
if (route.query.tab === 'structured') activeTab.value = 'structured'
|
const tab = route.query.tab as string | undefined
|
||||||
|
if (tab === 'structured' || tab === 'quick' || tab === 'chat') activeTab.value = tab
|
||||||
})
|
})
|
||||||
|
|
||||||
watch(() => route.query.tab, (tab) => {
|
watch(() => route.query.tab, (tab) => {
|
||||||
if (tab === 'structured' || tab === 'quick') activeTab.value = tab
|
if (tab === 'structured' || tab === 'quick' || tab === 'chat') activeTab.value = tab as TabKey
|
||||||
})
|
})
|
||||||
|
|
||||||
function onCreated(label: string) {
|
function onCreated(label: string) {
|
||||||
|
|
|
||||||
|
|
@ -86,6 +86,29 @@
|
||||||
{{ sending ? 'Sending…' : 'Send Bundle' }}
|
{{ sending ? 'Sending…' : 'Send Bundle' }}
|
||||||
</button>
|
</button>
|
||||||
<span v-if="sendStatus" :class="sendStatus.ok ? 'text-green-500' : 'text-sev-error'" class="text-xs">{{ sendStatus.msg }}</span>
|
<span v-if="sendStatus" :class="sendStatus.ok ? 'text-green-500' : 'text-sev-error'" class="text-xs">{{ sendStatus.msg }}</span>
|
||||||
|
<!-- Export to ticket tracker -->
|
||||||
|
<div class="relative" ref="exportMenuRef">
|
||||||
|
<button
|
||||||
|
@click="exportMenuOpen = !exportMenuOpen"
|
||||||
|
:disabled="exporting"
|
||||||
|
class="px-3 py-1.5 text-xs rounded border border-surface-border text-text-muted hover:text-accent hover:border-accent transition-colors disabled:opacity-40"
|
||||||
|
>{{ exporting ? 'Exporting…' : 'Export ticket ▾' }}</button>
|
||||||
|
<div
|
||||||
|
v-if="exportMenuOpen"
|
||||||
|
class="absolute right-0 top-full mt-1 w-32 bg-surface border border-surface-border rounded shadow-lg z-10"
|
||||||
|
>
|
||||||
|
<button
|
||||||
|
v-for="target in exportTargets"
|
||||||
|
:key="target.key"
|
||||||
|
@click="exportTicket(selected!.id, target.key)"
|
||||||
|
class="block w-full text-left px-3 py-2 text-xs text-text-primary hover:bg-surface-raised transition-colors"
|
||||||
|
>{{ target.label }}</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<span v-if="exportStatus" :class="exportStatus.ok ? 'text-green-400' : 'text-sev-error'" class="text-xs">
|
||||||
|
<a v-if="exportStatus.url" :href="exportStatus.url" target="_blank" rel="noopener" class="underline">{{ exportStatus.msg }}</a>
|
||||||
|
<span v-else>{{ exportStatus.msg }}</span>
|
||||||
|
</span>
|
||||||
<button @click="selected = null" class="text-text-dim hover:text-text-primary text-xs ml-auto sm:ml-0">✕ close</button>
|
<button @click="selected = null" class="text-text-dim hover:text-text-primary text-xs ml-auto sm:ml-0">✕ close</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
@ -115,12 +138,25 @@
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div v-else>
|
<div v-else>
|
||||||
<p class="text-text-dim text-xs mb-3">{{ selectedEntries.length }} entries in window</p>
|
<!-- Timeline visualizer -->
|
||||||
<div class="space-y-1 max-h-96 overflow-y-auto">
|
<IncidentTimeline
|
||||||
|
class="mb-4"
|
||||||
|
:entries="selectedEntries"
|
||||||
|
:started-at="selected.started_at"
|
||||||
|
:ended-at="selected.ended_at"
|
||||||
|
@select-entry="scrollToEntry"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<div
|
||||||
|
id="incident-entries"
|
||||||
|
class="space-y-1 max-h-96 overflow-y-auto"
|
||||||
|
>
|
||||||
<div
|
<div
|
||||||
v-for="entry in selectedEntries"
|
v-for="(entry, idx) in selectedEntries"
|
||||||
:key="entry.entry_id"
|
:key="entry.entry_id"
|
||||||
class="font-mono text-xs py-1 px-2 rounded bg-surface-raised border border-surface-border"
|
:id="`incident-entry-${idx}`"
|
||||||
|
class="font-mono text-xs py-1 px-2 rounded bg-surface-raised border border-surface-border transition-colors"
|
||||||
|
:class="{ 'ring-1 ring-accent': highlightIdx === idx }"
|
||||||
>
|
>
|
||||||
<span class="text-text-dim mr-2">{{ shortTs(entry.timestamp_iso) }}</span>
|
<span class="text-text-dim mr-2">{{ shortTs(entry.timestamp_iso) }}</span>
|
||||||
<span :class="['mr-2', severityTextClass(entry.severity)]">{{ entry.severity || '?' }}</span>
|
<span :class="['mr-2', severityTextClass(entry.severity)]">{{ entry.severity || '?' }}</span>
|
||||||
|
|
@ -136,8 +172,9 @@
|
||||||
</template>
|
</template>
|
||||||
|
|
||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { ref, onMounted } from 'vue'
|
import { ref, onMounted, onBeforeUnmount } from 'vue'
|
||||||
import { RouterLink } from 'vue-router'
|
import { RouterLink } from 'vue-router'
|
||||||
|
import IncidentTimeline from '@/components/IncidentTimeline.vue'
|
||||||
|
|
||||||
const BASE = import.meta.env.BASE_URL.replace(/\/$/, '')
|
const BASE = import.meta.env.BASE_URL.replace(/\/$/, '')
|
||||||
|
|
||||||
|
|
@ -224,6 +261,57 @@ async function sendBundle(id: string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── ticket export ─────────────────────────────────────────────
|
||||||
|
const exportTargets = [
|
||||||
|
{ key: 'notion', label: 'Notion' },
|
||||||
|
{ key: 'jira', label: 'Jira' },
|
||||||
|
]
|
||||||
|
const exporting = ref(false)
|
||||||
|
const exportMenuOpen = ref(false)
|
||||||
|
const exportMenuRef = ref<HTMLElement | null>(null)
|
||||||
|
const exportStatus = ref<{ ok: boolean; msg: string; url?: string } | null>(null)
|
||||||
|
|
||||||
|
function handleExportClickOutside(e: MouseEvent) {
|
||||||
|
if (exportMenuRef.value && !exportMenuRef.value.contains(e.target as Node)) {
|
||||||
|
exportMenuOpen.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
onMounted(() => { document.addEventListener('click', handleExportClickOutside) })
|
||||||
|
onBeforeUnmount(() => { document.removeEventListener('click', handleExportClickOutside) })
|
||||||
|
|
||||||
|
async function exportTicket(incident_id: string, target: string) {
|
||||||
|
exportMenuOpen.value = false
|
||||||
|
exporting.value = true
|
||||||
|
exportStatus.value = null
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${BASE}/api/incidents/${incident_id}/export`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ target }),
|
||||||
|
})
|
||||||
|
const data = await res.json()
|
||||||
|
if (res.ok) {
|
||||||
|
exportStatus.value = { ok: true, msg: `Created ${data.ticket_id} →`, url: data.url }
|
||||||
|
} else {
|
||||||
|
exportStatus.value = { ok: false, msg: data.detail ?? 'Export failed' }
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
exportStatus.value = { ok: false, msg: 'Network error' }
|
||||||
|
} finally {
|
||||||
|
exporting.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── timeline interaction ──────────────────────────────────────
|
||||||
|
const highlightIdx = ref<number | null>(null)
|
||||||
|
|
||||||
|
function scrollToEntry(idx: number) {
|
||||||
|
highlightIdx.value = idx
|
||||||
|
const el = document.getElementById(`incident-entry-${idx}`)
|
||||||
|
el?.scrollIntoView({ block: 'nearest', behavior: 'smooth' })
|
||||||
|
setTimeout(() => { highlightIdx.value = null }, 1500)
|
||||||
|
}
|
||||||
|
|
||||||
// ── helpers ───────────────────────────────────────────────────
|
// ── helpers ───────────────────────────────────────────────────
|
||||||
function severityStyle(sev: string): Record<string, string> {
|
function severityStyle(sev: string): Record<string, string> {
|
||||||
const k = sev?.toLowerCase() ?? 'low'
|
const k = sev?.toLowerCase() ?? 'low'
|
||||||
|
|
|
||||||
609
web/src/views/SecurityAlertsView.vue
Normal file
609
web/src/views/SecurityAlertsView.vue
Normal file
|
|
@ -0,0 +1,609 @@
|
||||||
|
<template>
|
||||||
|
<div class="p-4 sm:p-6 max-w-5xl mx-auto">
|
||||||
|
|
||||||
|
<!-- Header -->
|
||||||
|
<div class="mb-5 flex items-start justify-between gap-4 flex-wrap">
|
||||||
|
<div>
|
||||||
|
<h1 class="text-text-primary text-xl font-semibold mb-1">Security Alerts</h1>
|
||||||
|
<p class="text-text-dim text-sm">
|
||||||
|
Anomaly detections from the scoring pipeline.
|
||||||
|
Acknowledge entries after review to track your triage state.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Scorer controls -->
|
||||||
|
<div class="flex items-center gap-3 shrink-0 flex-wrap">
|
||||||
|
<!-- Status badge -->
|
||||||
|
<span
|
||||||
|
v-if="scorerStatus"
|
||||||
|
:class="[
|
||||||
|
'text-xs px-2 py-1 rounded border font-mono',
|
||||||
|
scorerStatus.enabled
|
||||||
|
? scorerStatus.running
|
||||||
|
? 'border-accent text-accent animate-pulse'
|
||||||
|
: 'border-surface-border text-text-dim'
|
||||||
|
: 'border-surface-border text-text-dim opacity-60'
|
||||||
|
]"
|
||||||
|
:title="scorerStatus.enabled ? `model: ${scorerStatus.model}` : 'TURNSTONE_ANOMALY_MODEL not set'"
|
||||||
|
>
|
||||||
|
{{ scorerStatus.running ? 'scoring…' : scorerStatus.enabled ? 'scorer ready' : 'scorer off' }}
|
||||||
|
</span>
|
||||||
|
|
||||||
|
<!-- Cybersec scorer status -->
|
||||||
|
<span
|
||||||
|
v-if="cybersecStatus"
|
||||||
|
:class="[
|
||||||
|
'text-xs px-2 py-1 rounded border font-mono',
|
||||||
|
cybersecStatus.enabled
|
||||||
|
? cybersecStatus.running
|
||||||
|
? 'border-accent text-accent animate-pulse'
|
||||||
|
: 'border-surface-border text-text-dim'
|
||||||
|
: 'border-surface-border text-text-dim opacity-40'
|
||||||
|
]"
|
||||||
|
:title="cybersecStatus.enabled ? `cybersec: ${cybersecStatus.model}` : 'TURNSTONE_CYBERSEC_MODEL not set'"
|
||||||
|
>
|
||||||
|
{{ cybersecStatus.running ? 'cybersec scoring…' : cybersecStatus.enabled ? 'cybersec on' : 'cybersec off' }}
|
||||||
|
</span>
|
||||||
|
|
||||||
|
<button
|
||||||
|
@click="runScorer"
|
||||||
|
:disabled="!scorerStatus?.enabled || triggerLoading || scorerStatus?.running"
|
||||||
|
class="px-3 py-1.5 bg-accent text-surface text-xs rounded font-medium hover:opacity-90 transition-opacity disabled:opacity-40"
|
||||||
|
title="Manually trigger an anomaly scoring pass"
|
||||||
|
>
|
||||||
|
{{ triggerLoading ? 'triggering…' : 'Run anomaly' }}
|
||||||
|
</button>
|
||||||
|
|
||||||
|
<button
|
||||||
|
@click="runCybersec"
|
||||||
|
:disabled="!cybersecStatus?.enabled || cybersecTriggerLoading || cybersecStatus?.running"
|
||||||
|
class="px-3 py-1.5 bg-accent text-surface text-xs rounded font-medium hover:opacity-90 transition-opacity disabled:opacity-40"
|
||||||
|
title="Manually trigger a cybersec scoring pass"
|
||||||
|
>
|
||||||
|
{{ cybersecTriggerLoading ? 'triggering…' : 'Run cybersec' }}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Scorer config warning (no model set) -->
|
||||||
|
<div
|
||||||
|
v-if="scorerStatus && !scorerStatus.enabled"
|
||||||
|
class="mb-5 px-4 py-3 rounded border border-sev-warn/40 bg-surface-raised text-sev-warn text-sm"
|
||||||
|
>
|
||||||
|
Anomaly scoring is disabled — set <code class="font-mono text-xs bg-surface px-1 py-0.5 rounded">TURNSTONE_ANOMALY_MODEL</code>
|
||||||
|
in your <code class="font-mono text-xs bg-surface px-1 py-0.5 rounded">.env</code> and restart Turnstone.
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Stats row -->
|
||||||
|
<div class="mb-5 flex flex-wrap gap-x-6 gap-y-2 text-xs text-text-dim">
|
||||||
|
<template v-if="scorerStatus?.enabled">
|
||||||
|
<span class="text-text-dim/60 uppercase tracking-wider font-medium">Anomaly:</span>
|
||||||
|
<span>scored <span class="text-text-primary font-mono">{{ scorerStatus.total_scored ?? '—' }}</span></span>
|
||||||
|
<span>detections <span class="text-text-primary font-mono">{{ scorerStatus.total_detections ?? '—' }}</span></span>
|
||||||
|
<span v-if="scorerStatus.last_run_at">
|
||||||
|
last run <span class="text-text-primary font-mono">{{ formatTs(scorerStatus.last_run_at) }}</span>
|
||||||
|
</span>
|
||||||
|
<span v-if="scorerStatus.last_error" class="text-sev-error">error: {{ scorerStatus.last_error }}</span>
|
||||||
|
</template>
|
||||||
|
<template v-if="cybersecStatus?.enabled">
|
||||||
|
<span class="text-text-dim/60 uppercase tracking-wider font-medium ml-2">Cybersec:</span>
|
||||||
|
<span>scored <span class="text-text-primary font-mono">{{ cybersecStatus.total_scored ?? '—' }}</span></span>
|
||||||
|
<span>detections <span class="text-text-primary font-mono">{{ cybersecStatus.total_detections ?? '—' }}</span></span>
|
||||||
|
<span v-if="cybersecStatus.last_run_at">
|
||||||
|
last run <span class="text-text-primary font-mono">{{ formatTs(cybersecStatus.last_run_at) }}</span>
|
||||||
|
</span>
|
||||||
|
<span v-if="cybersecStatus.last_error" class="text-sev-error">error: {{ cybersecStatus.last_error }}</span>
|
||||||
|
</template>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Filter / Tab bar -->
|
||||||
|
<div class="mb-4 flex flex-col sm:flex-row sm:items-center gap-3">
|
||||||
|
<!-- Tabs -->
|
||||||
|
<div role="tablist" aria-label="Filter by acknowledgement" class="flex gap-1 border-b border-surface-border flex-1">
|
||||||
|
<button
|
||||||
|
v-for="(tab, idx) in tabs"
|
||||||
|
:key="tab.value"
|
||||||
|
role="tab"
|
||||||
|
:aria-selected="activeTab === tab.value"
|
||||||
|
:tabindex="activeTab === tab.value ? 0 : -1"
|
||||||
|
@click="activeTab = tab.value as 'all' | 'unacked'; loadDetections()"
|
||||||
|
@keydown="handleTabKey($event, idx)"
|
||||||
|
:ref="(el) => collectTabRef(el as HTMLElement | null, idx)"
|
||||||
|
:class="[
|
||||||
|
'px-4 py-2 text-sm transition-colors border-b-2 -mb-px whitespace-nowrap',
|
||||||
|
activeTab === tab.value
|
||||||
|
? 'border-accent text-accent'
|
||||||
|
: 'border-transparent text-text-dim hover:text-text-primary'
|
||||||
|
]"
|
||||||
|
>
|
||||||
|
{{ tab.label }}
|
||||||
|
<span v-if="tab.count !== null" class="ml-1 text-xs opacity-70">({{ tab.count }})</span>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Scorer filter -->
|
||||||
|
<div class="flex items-center gap-2 shrink-0">
|
||||||
|
<label for="scorer-filter" class="text-xs text-text-dim whitespace-nowrap">Source:</label>
|
||||||
|
<select
|
||||||
|
id="scorer-filter"
|
||||||
|
v-model="scorerFilter"
|
||||||
|
@change="loadDetections()"
|
||||||
|
class="text-xs bg-surface border border-surface-border rounded px-2 py-1 text-text-primary focus:outline-none focus:border-accent"
|
||||||
|
>
|
||||||
|
<option value="">All</option>
|
||||||
|
<option value="anomaly">Anomaly scorer</option>
|
||||||
|
<option value="cybersec">Cybersec scorer</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Label filter -->
|
||||||
|
<div class="flex items-center gap-2 shrink-0">
|
||||||
|
<label for="label-filter" class="text-xs text-text-dim whitespace-nowrap">Label:</label>
|
||||||
|
<select
|
||||||
|
id="label-filter"
|
||||||
|
v-model="labelFilter"
|
||||||
|
@change="loadDetections()"
|
||||||
|
class="text-xs bg-surface border border-surface-border rounded px-2 py-1 text-text-primary focus:outline-none focus:border-accent"
|
||||||
|
>
|
||||||
|
<option value="">All</option>
|
||||||
|
<optgroup label="Anomaly labels">
|
||||||
|
<option v-for="lbl in anomalyLabels" :key="lbl" :value="lbl">{{ lbl }}</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="Cybersec labels">
|
||||||
|
<option v-for="lbl in cybersecLabels" :key="lbl" :value="lbl">{{ lbl }}</option>
|
||||||
|
</optgroup>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Collapse dupes toggle -->
|
||||||
|
<button
|
||||||
|
@click="collapseDupes = !collapseDupes"
|
||||||
|
:class="[
|
||||||
|
'text-xs px-2 py-1 rounded border transition-colors shrink-0',
|
||||||
|
collapseDupes
|
||||||
|
? 'border-accent text-accent bg-accent/10'
|
||||||
|
: 'border-surface-border text-text-dim hover:text-text-primary'
|
||||||
|
]"
|
||||||
|
:title="collapseDupes ? 'Showing one per message — click to expand' : 'Click to collapse duplicate messages'"
|
||||||
|
>
|
||||||
|
{{ collapseDupes ? 'collapsed' : 'collapse similar' }}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Loading state -->
|
||||||
|
<div v-if="loading" class="rounded border border-surface-border overflow-hidden divide-y divide-surface-border">
|
||||||
|
<div v-for="i in 6" :key="i" class="px-4 py-3 flex items-center gap-4">
|
||||||
|
<div class="loading-shimmer h-4 w-14 rounded" />
|
||||||
|
<div class="loading-shimmer h-4 rounded" :style="`width: ${80 + (i * 37) % 100}px`" />
|
||||||
|
<div class="loading-shimmer h-3 w-10 rounded" />
|
||||||
|
<div class="loading-shimmer h-3 w-20 rounded" />
|
||||||
|
<div class="loading-shimmer h-3 flex-1 rounded" />
|
||||||
|
<div class="loading-shimmer h-3 w-24 rounded" />
|
||||||
|
<div class="loading-shimmer h-7 w-20 rounded" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Empty state -->
|
||||||
|
<div v-else-if="detections.length === 0" class="text-text-dim py-12 text-center text-sm">
|
||||||
|
<p v-if="activeTab === 'unacked'">No unacknowledged detections — all clear.</p>
|
||||||
|
<p v-else-if="!scorerStatus?.enabled">Enable anomaly scoring to start detecting.</p>
|
||||||
|
<p v-else>No detections yet. Run the scorer after gleaning to populate this list.</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Detections table -->
|
||||||
|
<div v-else class="rounded border border-surface-border overflow-hidden">
|
||||||
|
<div class="overflow-x-auto">
|
||||||
|
<table class="w-full text-sm min-w-[700px]">
|
||||||
|
<thead class="bg-surface-raised border-b border-surface-border">
|
||||||
|
<tr>
|
||||||
|
<th class="text-left px-4 py-2.5 text-text-dim font-medium text-xs uppercase tracking-wider w-20">Sev</th>
|
||||||
|
<th class="text-left px-4 py-2.5 text-text-dim font-medium text-xs uppercase tracking-wider">Label</th>
|
||||||
|
<th class="text-left px-4 py-2.5 text-text-dim font-medium text-xs uppercase tracking-wider w-16">Score</th>
|
||||||
|
<th class="text-left px-4 py-2.5 text-text-dim font-medium text-xs uppercase tracking-wider">Source</th>
|
||||||
|
<th class="text-left px-4 py-2.5 text-text-dim font-medium text-xs uppercase tracking-wider">Log entry</th>
|
||||||
|
<th class="text-left px-4 py-2.5 text-text-dim font-medium text-xs uppercase tracking-wider w-32">Detected</th>
|
||||||
|
<th class="px-4 py-2.5 w-28"></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr
|
||||||
|
v-for="det in filteredDetections"
|
||||||
|
:key="det.id"
|
||||||
|
:class="[
|
||||||
|
'border-b border-surface-border transition-colors cursor-pointer',
|
||||||
|
det.acknowledged ? 'opacity-50 hover:opacity-75' : 'hover:bg-surface-raised'
|
||||||
|
]"
|
||||||
|
@click="openDrawer(det)"
|
||||||
|
>
|
||||||
|
<td class="px-4 py-2.5">
|
||||||
|
<span :class="['text-xs font-semibold', severityTextClass(det.severity)]">
|
||||||
|
{{ det.severity }}
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
<td class="px-4 py-2.5">
|
||||||
|
<div class="flex items-center gap-1.5 flex-wrap">
|
||||||
|
<span class="font-mono text-xs text-accent bg-surface px-1.5 py-0.5 rounded border border-surface-border">
|
||||||
|
{{ det.anomaly_label }}
|
||||||
|
</span>
|
||||||
|
<span
|
||||||
|
v-if="det.scorer === 'cybersec'"
|
||||||
|
class="text-xs px-1.5 py-0.5 rounded bg-surface-raised border border-surface-border text-text-dim font-mono"
|
||||||
|
>cybersec</span>
|
||||||
|
<span
|
||||||
|
v-if="collapseDupes && det.count && det.count > 1"
|
||||||
|
class="text-xs px-1.5 py-0.5 rounded bg-accent/10 border border-accent/40 text-accent font-mono"
|
||||||
|
:title="`${det.count} similar events collapsed`"
|
||||||
|
>×{{ det.count }}</span>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
<td class="px-4 py-2.5">
|
||||||
|
<div class="flex items-center gap-1.5">
|
||||||
|
<div class="w-10 h-1.5 bg-surface-raised rounded-full overflow-hidden">
|
||||||
|
<div
|
||||||
|
class="h-full rounded-full"
|
||||||
|
:class="scoreBarColor(det.anomaly_score)"
|
||||||
|
:style="{ width: `${Math.round(det.anomaly_score * 100)}%` }"
|
||||||
|
></div>
|
||||||
|
</div>
|
||||||
|
<span class="text-xs text-text-dim font-mono">{{ Math.round(det.anomaly_score * 100) }}%</span>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
<td class="px-4 py-2.5 text-text-dim text-xs font-mono truncate max-w-[120px]">{{ det.source_id }}</td>
|
||||||
|
<td class="px-4 py-2.5 text-text-dim text-xs truncate max-w-[260px]" :title="det.text">{{ det.text }}</td>
|
||||||
|
<td class="px-4 py-2.5 text-text-dim text-xs whitespace-nowrap">{{ formatTs(det.detected_at) }}</td>
|
||||||
|
<td class="px-4 py-2.5 text-right">
|
||||||
|
<span
|
||||||
|
v-if="det.acknowledged"
|
||||||
|
class="text-xs text-text-dim italic"
|
||||||
|
>reviewed</span>
|
||||||
|
<button
|
||||||
|
v-else
|
||||||
|
@click.stop="openDrawer(det)"
|
||||||
|
class="text-xs px-2 py-1 rounded border border-surface-border text-text-dim hover:text-text-primary hover:border-accent transition-colors"
|
||||||
|
>
|
||||||
|
Acknowledge
|
||||||
|
</button>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Acknowledge drawer -->
|
||||||
|
<Transition name="drawer">
|
||||||
|
<div v-if="drawer" class="mt-6 rounded border border-accent bg-surface p-5">
|
||||||
|
<div class="flex items-start justify-between mb-4 gap-4">
|
||||||
|
<div class="min-w-0">
|
||||||
|
<div class="flex items-center gap-2 flex-wrap mb-1">
|
||||||
|
<span :class="['text-xs font-semibold', severityTextClass(drawer.severity)]">{{ drawer.severity }}</span>
|
||||||
|
<span class="font-mono text-xs text-accent bg-surface-raised px-1.5 py-0.5 rounded border border-surface-border">
|
||||||
|
{{ drawer.anomaly_label }}
|
||||||
|
</span>
|
||||||
|
<span class="text-xs text-text-dim font-mono">{{ Math.round(drawer.anomaly_score * 100) }}% confidence</span>
|
||||||
|
</div>
|
||||||
|
<p class="text-text-dim text-xs font-mono">
|
||||||
|
source: {{ drawer.source_id }}
|
||||||
|
<span v-if="drawer.timestamp_iso"> · {{ formatTs(drawer.timestamp_iso) }}</span>
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
@click="drawer = null"
|
||||||
|
class="text-text-dim hover:text-text-primary transition-colors shrink-0 text-lg leading-none"
|
||||||
|
aria-label="Close drawer"
|
||||||
|
>✕</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Full log text -->
|
||||||
|
<div class="mb-4 bg-surface-raised rounded border border-surface-border p-3 text-xs font-mono text-text-primary break-all leading-relaxed max-h-40 overflow-y-auto">
|
||||||
|
{{ drawer.text }}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Already acknowledged -->
|
||||||
|
<div v-if="drawer.acknowledged" class="text-text-dim text-sm">
|
||||||
|
<p class="mb-1">Acknowledged <span class="text-text-primary">{{ formatTs(drawer.acknowledged_at) }}</span></p>
|
||||||
|
<p v-if="drawer.notes" class="text-xs italic">{{ drawer.notes }}</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Acknowledge form -->
|
||||||
|
<div v-else>
|
||||||
|
<label for="ack-notes" class="block text-xs text-text-dim mb-1.5">Notes (optional)</label>
|
||||||
|
<textarea
|
||||||
|
id="ack-notes"
|
||||||
|
v-model="ackNotes"
|
||||||
|
rows="2"
|
||||||
|
placeholder="False positive, known pattern, remediated…"
|
||||||
|
class="w-full bg-surface border border-surface-border rounded px-3 py-2 text-sm text-text-primary placeholder-text-dim focus:outline-none focus:border-accent resize-none mb-3"
|
||||||
|
></textarea>
|
||||||
|
<div class="flex items-center gap-3">
|
||||||
|
<button
|
||||||
|
@click="acknowledge(drawer)"
|
||||||
|
:disabled="ackLoading"
|
||||||
|
class="px-4 py-2 bg-accent text-surface text-sm rounded font-medium hover:opacity-90 transition-opacity disabled:opacity-40"
|
||||||
|
>
|
||||||
|
{{ ackLoading ? 'Saving…' : 'Mark as reviewed' }}
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
@click="drawer = null"
|
||||||
|
class="px-4 py-2 text-text-dim text-sm rounded border border-surface-border hover:text-text-primary transition-colors"
|
||||||
|
>
|
||||||
|
Cancel
|
||||||
|
</button>
|
||||||
|
<span v-if="ackError" class="text-xs text-sev-error">{{ ackError }}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</Transition>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
|
||||||
|
<script setup lang="ts">
|
||||||
|
import { ref, computed, onMounted } from 'vue'
|
||||||
|
|
||||||
|
const BASE = import.meta.env.BASE_URL.replace(/\/$/, '')
|
||||||
|
|
||||||
|
// ── Types ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
interface Detection {
|
||||||
|
id: string
|
||||||
|
source_id: string
|
||||||
|
entry_id: string
|
||||||
|
anomaly_label: string
|
||||||
|
anomaly_score: number
|
||||||
|
severity: string
|
||||||
|
text: string
|
||||||
|
timestamp_iso: string | null
|
||||||
|
detected_at: string
|
||||||
|
acknowledged: number | boolean
|
||||||
|
acknowledged_at: string | null
|
||||||
|
notes: string
|
||||||
|
scorer: string
|
||||||
|
count?: number
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ScorerStatus {
|
||||||
|
enabled: boolean
|
||||||
|
running: boolean
|
||||||
|
model: string | null
|
||||||
|
threshold: number
|
||||||
|
device: string
|
||||||
|
last_run_at: string | null
|
||||||
|
last_scored: number
|
||||||
|
last_detections: number
|
||||||
|
last_error: string | null
|
||||||
|
total_scored: number
|
||||||
|
total_detections: number
|
||||||
|
}
|
||||||
|
|
||||||
|
interface CybersecStatus {
|
||||||
|
enabled: boolean
|
||||||
|
running: boolean
|
||||||
|
model: string | null
|
||||||
|
threshold: number
|
||||||
|
device: string
|
||||||
|
last_run_at: string | null
|
||||||
|
last_duration_s: number | null
|
||||||
|
last_scored: number
|
||||||
|
last_detections: number
|
||||||
|
last_error: string | null
|
||||||
|
total_scored: number
|
||||||
|
total_detections: number
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── State ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const detections = ref<Detection[]>([])
|
||||||
|
const scorerStatus = ref<ScorerStatus | null>(null)
|
||||||
|
const cybersecStatus = ref<CybersecStatus | null>(null)
|
||||||
|
const loading = ref(true)
|
||||||
|
const triggerLoading = ref(false)
|
||||||
|
const cybersecTriggerLoading = ref(false)
|
||||||
|
const ackLoading = ref(false)
|
||||||
|
const ackError = ref<string | null>(null)
|
||||||
|
const ackNotes = ref('')
|
||||||
|
const drawer = ref<Detection | null>(null)
|
||||||
|
const activeTab = ref<'all' | 'unacked'>('all')
|
||||||
|
const labelFilter = ref('')
|
||||||
|
const scorerFilter = ref('')
|
||||||
|
const collapseDupes = ref(true)
|
||||||
|
const tabRefs = ref<(HTMLElement | null)[]>([])
|
||||||
|
|
||||||
|
const anomalyLabels = [
|
||||||
|
'SECURITY_ANOMALY', 'SYSTEM_FAILURE', 'PERFORMANCE_ISSUE',
|
||||||
|
'NETWORK_ANOMALY', 'CONFIG_ERROR', 'HARDWARE_ISSUE',
|
||||||
|
'CRITICAL', 'ERROR',
|
||||||
|
]
|
||||||
|
|
||||||
|
const cybersecLabels = [
|
||||||
|
'authentication failure or brute force attack',
|
||||||
|
'privilege escalation or unauthorized access',
|
||||||
|
'network intrusion or port scan',
|
||||||
|
'malware or suspicious process activity',
|
||||||
|
'data exfiltration or unusual outbound traffic',
|
||||||
|
]
|
||||||
|
|
||||||
|
// ── Tabs ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const unackedCount = computed(() => detections.value.filter(d => !d.acknowledged).length)
|
||||||
|
|
||||||
|
const tabs = computed(() => [
|
||||||
|
{ value: 'all', label: 'All', count: detections.value.length },
|
||||||
|
{ value: 'unacked', label: 'Unacknowledged', count: unackedCount.value },
|
||||||
|
])
|
||||||
|
|
||||||
|
const filteredDetections = computed(() => {
|
||||||
|
const base = activeTab.value === 'unacked'
|
||||||
|
? detections.value.filter(d => !d.acknowledged)
|
||||||
|
: detections.value
|
||||||
|
if (!collapseDupes.value) return base
|
||||||
|
const groups = new Map<string, Detection>()
|
||||||
|
const counts = new Map<string, number>()
|
||||||
|
for (const d of base) {
|
||||||
|
const key = d.anomaly_label + '|' + d.text.slice(0, 100)
|
||||||
|
const existing = groups.get(key)
|
||||||
|
if (!existing || d.anomaly_score > existing.anomaly_score) {
|
||||||
|
groups.set(key, d)
|
||||||
|
}
|
||||||
|
counts.set(key, (counts.get(key) ?? 0) + 1)
|
||||||
|
}
|
||||||
|
return Array.from(groups.values()).map(d => ({
|
||||||
|
...d,
|
||||||
|
count: counts.get(d.anomaly_label + '|' + d.text.slice(0, 100)) ?? 1,
|
||||||
|
}))
|
||||||
|
})
|
||||||
|
|
||||||
|
// ── Data loading ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async function loadDetections() {
|
||||||
|
loading.value = true
|
||||||
|
const params = new URLSearchParams({ limit: '200' })
|
||||||
|
if (labelFilter.value) params.set('label', labelFilter.value)
|
||||||
|
if (scorerFilter.value) params.set('scorer', scorerFilter.value)
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${BASE}/api/anomaly/detections?${params}`)
|
||||||
|
if (!res.ok) throw new Error(`HTTP ${res.status}`)
|
||||||
|
const data = await res.json()
|
||||||
|
detections.value = (data.detections ?? []).map((d: Detection) => ({
|
||||||
|
...d,
|
||||||
|
acknowledged: !!d.acknowledged,
|
||||||
|
}))
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load detections', e)
|
||||||
|
} finally {
|
||||||
|
loading.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadScorerStatus() {
|
||||||
|
try {
|
||||||
|
const [anomalyRes, cybersecRes] = await Promise.all([
|
||||||
|
fetch(`${BASE}/api/anomaly/status`),
|
||||||
|
fetch(`${BASE}/api/cybersec/status`),
|
||||||
|
])
|
||||||
|
if (anomalyRes.ok) {
|
||||||
|
scorerStatus.value = await anomalyRes.json()
|
||||||
|
}
|
||||||
|
if (cybersecRes.ok) {
|
||||||
|
const data = await cybersecRes.json()
|
||||||
|
cybersecStatus.value = data
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// scorer status is non-critical — fail silently
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
onMounted(() => {
|
||||||
|
loadScorerStatus()
|
||||||
|
loadDetections()
|
||||||
|
})
|
||||||
|
|
||||||
|
// ── Actions ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async function runScorer() {
|
||||||
|
triggerLoading.value = true
|
||||||
|
try {
|
||||||
|
await fetch(`${BASE}/api/anomaly/run`, { method: 'POST' })
|
||||||
|
setTimeout(() => { loadScorerStatus(); loadDetections() }, 2000)
|
||||||
|
} finally {
|
||||||
|
triggerLoading.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runCybersec() {
|
||||||
|
cybersecTriggerLoading.value = true
|
||||||
|
try {
|
||||||
|
await fetch(`${BASE}/api/cybersec/run`, { method: 'POST' })
|
||||||
|
setTimeout(() => { loadScorerStatus(); loadDetections() }, 2000)
|
||||||
|
} finally {
|
||||||
|
cybersecTriggerLoading.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function openDrawer(det: Detection) {
|
||||||
|
ackNotes.value = det.notes ?? ''
|
||||||
|
ackError.value = null
|
||||||
|
drawer.value = det
|
||||||
|
}
|
||||||
|
|
||||||
|
async function acknowledge(det: Detection) {
|
||||||
|
ackLoading.value = true
|
||||||
|
ackError.value = null
|
||||||
|
try {
|
||||||
|
const params = new URLSearchParams()
|
||||||
|
if (ackNotes.value.trim()) params.set('notes', ackNotes.value.trim())
|
||||||
|
const res = await fetch(
|
||||||
|
`${BASE}/api/anomaly/detections/${det.id}/acknowledge?${params}`,
|
||||||
|
{ method: 'POST' }
|
||||||
|
)
|
||||||
|
if (!res.ok) throw new Error(`HTTP ${res.status}`)
|
||||||
|
// update in-place so the row dims without a full reload
|
||||||
|
const idx = detections.value.findIndex(d => d.id === det.id)
|
||||||
|
const existing = idx !== -1 ? detections.value[idx] : null
|
||||||
|
if (existing) {
|
||||||
|
detections.value.splice(idx, 1, { ...existing, acknowledged: true, notes: ackNotes.value.trim() })
|
||||||
|
}
|
||||||
|
drawer.value = null
|
||||||
|
ackNotes.value = ''
|
||||||
|
loadScorerStatus()
|
||||||
|
} catch (e) {
|
||||||
|
ackError.value = 'Failed to save — try again'
|
||||||
|
console.error(e)
|
||||||
|
} finally {
|
||||||
|
ackLoading.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Helpers ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function severityTextClass(sev: string | null): string {
|
||||||
|
return ({
|
||||||
|
CRITICAL: 'text-sev-critical',
|
||||||
|
ERROR: 'text-sev-error',
|
||||||
|
WARN: 'text-sev-warn',
|
||||||
|
WARNING: 'text-sev-warn',
|
||||||
|
INFO: 'text-sev-info',
|
||||||
|
DEBUG: 'text-text-dim',
|
||||||
|
} as Record<string, string>)[sev?.toUpperCase() ?? ''] ?? 'text-text-dim'
|
||||||
|
}
|
||||||
|
|
||||||
|
function scoreBarColor(score: number): string {
|
||||||
|
if (score >= 0.90) return 'bg-sev-critical'
|
||||||
|
if (score >= 0.80) return 'bg-sev-error'
|
||||||
|
if (score >= 0.65) return 'bg-sev-warn'
|
||||||
|
return 'bg-sev-info'
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatTs(iso: string | null): string {
|
||||||
|
if (!iso) return '—'
|
||||||
|
try {
|
||||||
|
return new Date(iso).toLocaleString(undefined, {
|
||||||
|
month: 'short', day: 'numeric',
|
||||||
|
hour: '2-digit', minute: '2-digit',
|
||||||
|
})
|
||||||
|
} catch { return iso }
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Keyboard nav for tabs ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function collectTabRef(el: HTMLElement | null, idx: number) {
|
||||||
|
tabRefs.value[idx] = el
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleTabKey(e: KeyboardEvent, idx: number) {
|
||||||
|
const count = tabs.value.length
|
||||||
|
let next = idx
|
||||||
|
if (e.key === 'ArrowRight') next = (idx + 1) % count
|
||||||
|
else if (e.key === 'ArrowLeft') next = (idx - 1 + count) % count
|
||||||
|
else return
|
||||||
|
e.preventDefault()
|
||||||
|
tabRefs.value[next]?.focus()
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<style scoped>
|
||||||
|
.drawer-enter-active,
|
||||||
|
.drawer-leave-active { transition: opacity 0.15s, transform 0.15s; }
|
||||||
|
.drawer-enter-from,
|
||||||
|
.drawer-leave-to { opacity: 0; transform: translateY(-6px); }
|
||||||
|
</style>
|
||||||
|
|
@ -27,7 +27,7 @@
|
||||||
:class="[
|
:class="[
|
||||||
'flex-1 px-4 py-3 rounded border text-sm transition-colors text-left',
|
'flex-1 px-4 py-3 rounded border text-sm transition-colors text-left',
|
||||||
prefs.entry_point_style === opt.value
|
prefs.entry_point_style === opt.value
|
||||||
? 'border-accent bg-accent/10 text-accent'
|
? 'border-accent bg-accent-muted text-accent'
|
||||||
: 'border-surface-border text-text-muted hover:text-text-primary hover:border-accent'
|
: 'border-surface-border text-text-muted hover:text-text-primary hover:border-accent'
|
||||||
]"
|
]"
|
||||||
>
|
>
|
||||||
|
|
@ -112,7 +112,7 @@
|
||||||
:class="[
|
:class="[
|
||||||
'flex-1 px-4 py-3 rounded border text-sm transition-colors text-left',
|
'flex-1 px-4 py-3 rounded border text-sm transition-colors text-left',
|
||||||
prefs.tech_level === opt.value
|
prefs.tech_level === opt.value
|
||||||
? 'border-accent bg-accent/10 text-accent'
|
? 'border-accent bg-accent-muted text-accent'
|
||||||
: 'border-surface-border text-text-muted hover:text-text-primary hover:border-accent'
|
: 'border-surface-border text-text-muted hover:text-text-primary hover:border-accent'
|
||||||
]"
|
]"
|
||||||
>
|
>
|
||||||
|
|
@ -146,7 +146,7 @@
|
||||||
]"
|
]"
|
||||||
:title="rule.enabled ? 'Enabled — click to disable' : 'Disabled — click to enable'"
|
:title="rule.enabled ? 'Enabled — click to disable' : 'Disabled — click to enable'"
|
||||||
>
|
>
|
||||||
<span :class="['absolute top-0.5 w-4 h-4 rounded-full bg-white shadow transition-transform', rule.enabled ? 'translate-x-4' : 'translate-x-0.5']"></span>
|
<span :class="['absolute top-0.5 left-0.5 w-4 h-4 rounded-full bg-white shadow transition-transform', rule.enabled ? 'translate-x-4' : 'translate-x-0']"></span>
|
||||||
</button>
|
</button>
|
||||||
<div class="flex-1 min-w-0">
|
<div class="flex-1 min-w-0">
|
||||||
<div class="flex items-center gap-2 flex-wrap">
|
<div class="flex items-center gap-2 flex-wrap">
|
||||||
|
|
@ -282,6 +282,200 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Remote Hosts (SSH targets) -->
|
||||||
|
<div>
|
||||||
|
<h2 class="text-text-primary text-sm font-semibold mb-1">Remote Hosts</h2>
|
||||||
|
<p class="text-text-dim text-xs mb-3">
|
||||||
|
SSH hosts to pull logs from. Private keys are stored as path references only — key contents are never read or transmitted.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<!-- Target list -->
|
||||||
|
<div v-if="sshTargets.length > 0" class="space-y-2 mb-3">
|
||||||
|
<div
|
||||||
|
v-for="t in sshTargets"
|
||||||
|
:key="t.id"
|
||||||
|
class="rounded border border-surface-border bg-surface p-3"
|
||||||
|
>
|
||||||
|
<div class="flex items-start gap-3">
|
||||||
|
<div class="flex-1 min-w-0">
|
||||||
|
<div class="flex items-center gap-2 flex-wrap">
|
||||||
|
<span class="text-sm text-text-primary font-medium">{{ t.label }}</span>
|
||||||
|
<span class="font-mono text-xs text-text-dim">{{ t.user }}@{{ t.host }}:{{ t.port }}</span>
|
||||||
|
<!-- Connection status badge -->
|
||||||
|
<span
|
||||||
|
v-if="t.last_ok === true"
|
||||||
|
class="text-[10px] px-1.5 py-0.5 rounded bg-green-900/30 text-green-400 border border-green-800/40"
|
||||||
|
>Connected</span>
|
||||||
|
<span
|
||||||
|
v-else-if="t.last_ok === false"
|
||||||
|
class="text-[10px] px-1.5 py-0.5 rounded bg-red-900/30 text-sev-error border border-red-800/40"
|
||||||
|
:title="t.last_error ?? ''"
|
||||||
|
>Unreachable</span>
|
||||||
|
<span
|
||||||
|
v-else
|
||||||
|
class="text-[10px] px-1.5 py-0.5 rounded bg-surface-raised text-text-dim border border-surface-border"
|
||||||
|
>Not tested</span>
|
||||||
|
</div>
|
||||||
|
<p class="text-xs text-text-dim font-mono mt-0.5 truncate">{{ t.key_path }}</p>
|
||||||
|
<p v-if="t.key_warning" class="text-xs text-yellow-400 mt-0.5">⚠ {{ t.key_warning }}</p>
|
||||||
|
<!-- Test result (persistent inline, not a toast) -->
|
||||||
|
<p
|
||||||
|
v-if="sshTestResults[t.id]"
|
||||||
|
class="text-xs mt-1"
|
||||||
|
:class="sshTestResults[t.id]!.ok ? 'text-green-400' : 'text-sev-error'"
|
||||||
|
>
|
||||||
|
{{ sshTestResults[t.id]!.ok ? 'Connection OK' : sshTestResults[t.id]!.error }}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<div class="flex items-center gap-2 shrink-0">
|
||||||
|
<button
|
||||||
|
@click="testSshTarget(t.id)"
|
||||||
|
:disabled="sshTesting.has(t.id)"
|
||||||
|
class="text-xs text-text-dim hover:text-accent transition-colors px-2 py-1 rounded hover:bg-surface disabled:opacity-40"
|
||||||
|
>{{ sshTesting.has(t.id) ? 'Testing…' : 'Test' }}</button>
|
||||||
|
<button
|
||||||
|
@click="editSshTarget(t)"
|
||||||
|
class="text-xs text-text-dim hover:text-accent transition-colors px-2 py-1 rounded hover:bg-surface"
|
||||||
|
>Edit</button>
|
||||||
|
<button
|
||||||
|
@click="deleteSshTarget(t.id, t.label)"
|
||||||
|
class="text-xs text-text-dim hover:text-sev-error transition-colors px-2 py-1 rounded hover:bg-surface"
|
||||||
|
>Delete</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<p v-else class="text-text-dim text-xs mb-3">
|
||||||
|
No remote hosts configured. Add an SSH host to pull logs from remote machines without manual file exports.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<!-- Add / Edit form -->
|
||||||
|
<div v-if="sshForm.open" class="rounded border border-surface-border bg-surface p-3 space-y-3 mb-3">
|
||||||
|
<h3 class="text-text-primary text-xs font-medium">{{ sshForm.editId ? 'Edit host' : 'Add remote host' }}</h3>
|
||||||
|
<div class="grid grid-cols-1 sm:grid-cols-2 gap-3">
|
||||||
|
<div>
|
||||||
|
<label class="block text-xs text-text-dim mb-1">Display name</label>
|
||||||
|
<input v-model="sshForm.label" type="text" placeholder="e.g. rack-server-01"
|
||||||
|
class="w-full bg-surface-raised border border-surface-border rounded px-2 py-1.5 text-sm text-text-primary placeholder-text-dim focus:outline-none focus:border-accent" />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="block text-xs text-text-dim mb-1">Host</label>
|
||||||
|
<input v-model="sshForm.host" type="text" placeholder="192.168.1.10 or server.example.com"
|
||||||
|
class="w-full bg-surface-raised border border-surface-border rounded px-2 py-1.5 text-sm text-text-primary placeholder-text-dim focus:outline-none focus:border-accent" />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="block text-xs text-text-dim mb-1">Port</label>
|
||||||
|
<input v-model.number="sshForm.port" type="number" min="1" max="65535" placeholder="22"
|
||||||
|
class="w-full bg-surface-raised border border-surface-border rounded px-2 py-1.5 text-sm text-text-primary focus:outline-none focus:border-accent" />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="block text-xs text-text-dim mb-1">Username</label>
|
||||||
|
<input v-model="sshForm.user" type="text" placeholder="root or alan"
|
||||||
|
class="w-full bg-surface-raised border border-surface-border rounded px-2 py-1.5 text-sm text-text-primary placeholder-text-dim focus:outline-none focus:border-accent" />
|
||||||
|
</div>
|
||||||
|
<div class="sm:col-span-2">
|
||||||
|
<label class="block text-xs text-text-dim mb-1">SSH key path</label>
|
||||||
|
<input v-model="sshForm.key_path" type="text" placeholder="~/.ssh/id_ed25519"
|
||||||
|
class="w-full bg-surface-raised border border-surface-border rounded px-2 py-1.5 text-sm font-mono text-text-primary placeholder-text-dim focus:outline-none focus:border-accent" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<p v-if="sshFormError" class="text-sev-error text-xs">{{ sshFormError }}</p>
|
||||||
|
<div class="flex gap-2">
|
||||||
|
<button @click="saveSshTarget" :disabled="sshFormSaving"
|
||||||
|
class="px-3 py-1.5 bg-accent text-surface text-xs rounded font-medium hover:opacity-90 transition-opacity disabled:opacity-50">
|
||||||
|
{{ sshFormSaving ? 'Saving…' : (sshForm.editId ? 'Save changes' : 'Add host') }}
|
||||||
|
</button>
|
||||||
|
<button @click="closeSshForm" class="text-text-dim hover:text-text-primary text-xs">Cancel</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<button v-if="!sshForm.open" @click="sshForm.open = true" class="text-accent text-xs hover:underline">
|
||||||
|
+ Add remote host
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Ticket Trackers -->
|
||||||
|
<div>
|
||||||
|
<h2 class="text-text-primary text-sm font-semibold mb-1">Ticket Trackers</h2>
|
||||||
|
<p class="text-text-dim text-xs mb-4">
|
||||||
|
Connect external issue trackers to export incidents with one click from the Incidents view.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<!-- Notion -->
|
||||||
|
<div class="mb-4">
|
||||||
|
<h3 class="text-text-primary text-xs font-medium mb-2">Notion</h3>
|
||||||
|
<div class="grid grid-cols-1 sm:grid-cols-2 gap-3 mb-3">
|
||||||
|
<div class="sm:col-span-2">
|
||||||
|
<label class="block text-xs text-text-dim mb-1">Integration token</label>
|
||||||
|
<div class="relative">
|
||||||
|
<input v-model="prefs.notion_token" :type="showNotionToken ? 'text' : 'password'"
|
||||||
|
placeholder="secret_xxxxxxxxxxxx"
|
||||||
|
class="w-full bg-surface-raised border border-surface-border rounded px-2 py-1.5 text-sm font-mono text-text-primary placeholder-text-dim focus:outline-none focus:border-accent pr-14" />
|
||||||
|
<button @click="showNotionToken = !showNotionToken"
|
||||||
|
class="absolute right-2 top-1/2 -translate-y-1/2 text-xs text-text-dim hover:text-accent">
|
||||||
|
{{ showNotionToken ? 'hide' : 'show' }}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="sm:col-span-2">
|
||||||
|
<label class="block text-xs text-text-dim mb-1">Database ID</label>
|
||||||
|
<input v-model="prefs.notion_database_id" type="text"
|
||||||
|
placeholder="8-4-4-4-12 UUID from the database URL"
|
||||||
|
class="w-full bg-surface-raised border border-surface-border rounded px-2 py-1.5 text-sm font-mono text-text-primary placeholder-text-dim focus:outline-none focus:border-accent" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Jira -->
|
||||||
|
<div class="mb-4">
|
||||||
|
<h3 class="text-text-primary text-xs font-medium mb-2">Jira</h3>
|
||||||
|
<div class="grid grid-cols-1 sm:grid-cols-2 gap-3 mb-3">
|
||||||
|
<div class="sm:col-span-2">
|
||||||
|
<label class="block text-xs text-text-dim mb-1">Jira URL</label>
|
||||||
|
<input v-model="prefs.jira_url" type="url"
|
||||||
|
placeholder="https://yourorg.atlassian.net"
|
||||||
|
class="w-full bg-surface-raised border border-surface-border rounded px-2 py-1.5 text-sm font-mono text-text-primary placeholder-text-dim focus:outline-none focus:border-accent" />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="block text-xs text-text-dim mb-1">Account email</label>
|
||||||
|
<input v-model="prefs.jira_email" type="email"
|
||||||
|
placeholder="you@example.com"
|
||||||
|
class="w-full bg-surface-raised border border-surface-border rounded px-2 py-1.5 text-sm text-text-primary placeholder-text-dim focus:outline-none focus:border-accent" />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="block text-xs text-text-dim mb-1">API token</label>
|
||||||
|
<div class="relative">
|
||||||
|
<input v-model="prefs.jira_api_token" :type="showJiraToken ? 'text' : 'password'"
|
||||||
|
placeholder="Atlassian API token"
|
||||||
|
class="w-full bg-surface-raised border border-surface-border rounded px-2 py-1.5 text-sm font-mono text-text-primary placeholder-text-dim focus:outline-none focus:border-accent pr-14" />
|
||||||
|
<button @click="showJiraToken = !showJiraToken"
|
||||||
|
class="absolute right-2 top-1/2 -translate-y-1/2 text-xs text-text-dim hover:text-accent">
|
||||||
|
{{ showJiraToken ? 'hide' : 'show' }}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="block text-xs text-text-dim mb-1">Project key</label>
|
||||||
|
<input v-model="prefs.jira_project_key" type="text"
|
||||||
|
placeholder="OPS"
|
||||||
|
class="w-full bg-surface-raised border border-surface-border rounded px-2 py-1.5 text-sm font-mono text-text-primary placeholder-text-dim focus:outline-none focus:border-accent" />
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label class="block text-xs text-text-dim mb-1">Issue type</label>
|
||||||
|
<input v-model="prefs.jira_issue_type" type="text"
|
||||||
|
placeholder="Bug"
|
||||||
|
class="w-full bg-surface-raised border border-surface-border rounded px-2 py-1.5 text-sm text-text-primary placeholder-text-dim focus:outline-none focus:border-accent" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<button @click="saveTicketTrackers"
|
||||||
|
class="px-4 py-2 bg-accent text-surface text-sm rounded font-medium hover:opacity-90 transition-opacity">
|
||||||
|
Save tracker settings
|
||||||
|
</button>
|
||||||
|
<span v-if="ticketSaveStatus" :class="ticketSaveStatus.ok ? 'text-green-400' : 'text-sev-error'" class="text-xs ml-3">{{ ticketSaveStatus.msg }}</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
<p
|
<p
|
||||||
v-if="saveStatus"
|
v-if="saveStatus"
|
||||||
role="status"
|
role="status"
|
||||||
|
|
@ -320,6 +514,26 @@ interface Prefs {
|
||||||
pihole_api_key: string
|
pihole_api_key: string
|
||||||
router_source_ids: string
|
router_source_ids: string
|
||||||
device_names: string
|
device_names: string
|
||||||
|
notion_token: string
|
||||||
|
notion_database_id: string
|
||||||
|
jira_url: string
|
||||||
|
jira_email: string
|
||||||
|
jira_api_token: string
|
||||||
|
jira_project_key: string
|
||||||
|
jira_issue_type: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SshTarget {
|
||||||
|
id: string
|
||||||
|
label: string
|
||||||
|
host: string
|
||||||
|
port: number
|
||||||
|
user: string
|
||||||
|
key_path: string
|
||||||
|
last_tested: string | null
|
||||||
|
last_ok: boolean | null
|
||||||
|
last_error: string | null
|
||||||
|
key_warning?: string | null
|
||||||
}
|
}
|
||||||
|
|
||||||
const techLevelOptions: { value: 'homelab' | 'sysadmin' | 'executive'; label: string; desc: string }[] = [
|
const techLevelOptions: { value: 'homelab' | 'sysadmin' | 'executive'; label: string; desc: string }[] = [
|
||||||
|
|
@ -356,13 +570,32 @@ async function setTechLevel(level: 'homelab' | 'sysadmin' | 'executive') {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const prefs = ref<Prefs>({ entry_point_style: 'topbar', llm_url: '', llm_model: '', llm_api_key: '', tech_level: 'sysadmin', severity_overrides: [], pihole_url: '', pihole_version: 'v6', pihole_api_key: '', router_source_ids: '', device_names: '' })
|
const prefs = ref<Prefs>({ entry_point_style: 'topbar', llm_url: '', llm_model: '', llm_api_key: '', tech_level: 'sysadmin', severity_overrides: [], pihole_url: '', pihole_version: 'v6', pihole_api_key: '', router_source_ids: '', device_names: '', notion_token: '', notion_database_id: '', jira_url: '', jira_email: '', jira_api_token: '', jira_project_key: '', jira_issue_type: 'Bug' })
|
||||||
const saveStatus = ref<{ ok: boolean; msg: string } | null>(null)
|
const saveStatus = ref<{ ok: boolean; msg: string } | null>(null)
|
||||||
const showAddOverride = ref(false)
|
const showAddOverride = ref(false)
|
||||||
const showApiKey = ref(false)
|
const showApiKey = ref(false)
|
||||||
const showPiholeKey = ref(false)
|
const showPiholeKey = ref(false)
|
||||||
|
const showNotionToken = ref(false)
|
||||||
|
const showJiraToken = ref(false)
|
||||||
const piholeStatus = ref<{ ok: boolean; msg: string } | null>(null)
|
const piholeStatus = ref<{ ok: boolean; msg: string } | null>(null)
|
||||||
|
const ticketSaveStatus = ref<{ ok: boolean; msg: string } | null>(null)
|
||||||
const newRule = ref<SeverityOverride>({ name: '', pattern: '', override_severity: 'WARN', enabled: true })
|
const newRule = ref<SeverityOverride>({ name: '', pattern: '', override_severity: 'WARN', enabled: true })
|
||||||
|
|
||||||
|
// SSH targets
|
||||||
|
const sshTargets = ref<SshTarget[]>([])
|
||||||
|
const sshTestResults = ref<Record<string, { ok: boolean; error: string | null }>>({})
|
||||||
|
const sshTesting = ref<Set<string>>(new Set())
|
||||||
|
const sshFormSaving = ref(false)
|
||||||
|
const sshFormError = ref<string | null>(null)
|
||||||
|
const sshForm = ref({
|
||||||
|
open: false,
|
||||||
|
editId: null as string | null,
|
||||||
|
label: '',
|
||||||
|
host: '',
|
||||||
|
port: 22,
|
||||||
|
user: '',
|
||||||
|
key_path: '',
|
||||||
|
})
|
||||||
const entryPointBtnRefs = ref<HTMLButtonElement[]>([])
|
const entryPointBtnRefs = ref<HTMLButtonElement[]>([])
|
||||||
|
|
||||||
const entryPointOptions = [
|
const entryPointOptions = [
|
||||||
|
|
@ -391,6 +624,7 @@ onMounted(async () => {
|
||||||
const res = await fetch(`${BASE}/api/settings`)
|
const res = await fetch(`${BASE}/api/settings`)
|
||||||
if (res.ok) prefs.value = await res.json()
|
if (res.ok) prefs.value = await res.json()
|
||||||
} catch { /* non-critical — defaults stay */ }
|
} catch { /* non-critical — defaults stay */ }
|
||||||
|
await loadSshTargets()
|
||||||
})
|
})
|
||||||
|
|
||||||
async function patch(body: Partial<Prefs>) {
|
async function patch(body: Partial<Prefs>) {
|
||||||
|
|
@ -490,4 +724,99 @@ async function testPihole() {
|
||||||
piholeStatus.value = { ok: false, msg: 'Network error' }
|
piholeStatus.value = { ok: false, msg: 'Network error' }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Ticket tracker settings ---
|
||||||
|
|
||||||
|
async function saveTicketTrackers() {
|
||||||
|
ticketSaveStatus.value = null
|
||||||
|
try {
|
||||||
|
await patch({
|
||||||
|
notion_token: prefs.value.notion_token,
|
||||||
|
notion_database_id: prefs.value.notion_database_id,
|
||||||
|
jira_url: prefs.value.jira_url,
|
||||||
|
jira_email: prefs.value.jira_email,
|
||||||
|
jira_api_token: prefs.value.jira_api_token,
|
||||||
|
jira_project_key: prefs.value.jira_project_key,
|
||||||
|
jira_issue_type: prefs.value.jira_issue_type,
|
||||||
|
})
|
||||||
|
ticketSaveStatus.value = { ok: true, msg: 'Tracker settings saved' }
|
||||||
|
setTimeout(() => { ticketSaveStatus.value = null }, 2000)
|
||||||
|
} catch {
|
||||||
|
ticketSaveStatus.value = { ok: false, msg: 'Save failed — check server connection' }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- SSH target management ---
|
||||||
|
|
||||||
|
async function loadSshTargets() {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${BASE}/api/ssh-targets`)
|
||||||
|
if (res.ok) sshTargets.value = await res.json()
|
||||||
|
} catch { /* non-critical */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
async function testSshTarget(id: string) {
|
||||||
|
sshTesting.value = new Set([...sshTesting.value, id])
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${BASE}/api/ssh-targets/${id}/test`, { method: 'POST' })
|
||||||
|
const data = await res.json()
|
||||||
|
sshTestResults.value = { ...sshTestResults.value, [id]: { ok: data.ok, error: data.error ?? null } }
|
||||||
|
// Refresh list so last_ok badge updates
|
||||||
|
await loadSshTargets()
|
||||||
|
} catch {
|
||||||
|
sshTestResults.value = { ...sshTestResults.value, [id]: { ok: false, error: 'Network error' } }
|
||||||
|
} finally {
|
||||||
|
const next = new Set(sshTesting.value)
|
||||||
|
next.delete(id)
|
||||||
|
sshTesting.value = next
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function editSshTarget(t: SshTarget) {
|
||||||
|
sshFormError.value = null
|
||||||
|
sshForm.value = { open: true, editId: t.id, label: t.label, host: t.host, port: t.port, user: t.user, key_path: t.key_path }
|
||||||
|
}
|
||||||
|
|
||||||
|
async function deleteSshTarget(id: string, label: string) {
|
||||||
|
if (!confirm(`Delete remote host "${label}"?`)) return
|
||||||
|
try {
|
||||||
|
await fetch(`${BASE}/api/ssh-targets/${id}`, { method: 'DELETE' })
|
||||||
|
await loadSshTargets()
|
||||||
|
} catch { /* ignore */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
async function saveSshTarget() {
|
||||||
|
const f = sshForm.value
|
||||||
|
if (!f.label.trim() || !f.host.trim() || !f.user.trim() || !f.key_path.trim()) {
|
||||||
|
sshFormError.value = 'All fields are required'
|
||||||
|
return
|
||||||
|
}
|
||||||
|
sshFormSaving.value = true
|
||||||
|
sshFormError.value = null
|
||||||
|
try {
|
||||||
|
const url = f.editId ? `${BASE}/api/ssh-targets/${f.editId}` : `${BASE}/api/ssh-targets`
|
||||||
|
const method = f.editId ? 'PATCH' : 'POST'
|
||||||
|
const res = await fetch(url, {
|
||||||
|
method,
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ label: f.label, host: f.host, port: f.port, user: f.user, key_path: f.key_path }),
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
const err = await res.json().catch(() => ({ detail: 'Save failed' }))
|
||||||
|
sshFormError.value = err.detail ?? 'Save failed'
|
||||||
|
return
|
||||||
|
}
|
||||||
|
closeSshForm()
|
||||||
|
await loadSshTargets()
|
||||||
|
} catch {
|
||||||
|
sshFormError.value = 'Network error'
|
||||||
|
} finally {
|
||||||
|
sshFormSaving.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function closeSshForm() {
|
||||||
|
sshForm.value = { open: false, editId: null, label: '', host: '', port: 22, user: '', key_path: '' }
|
||||||
|
sshFormError.value = null
|
||||||
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,12 @@
|
||||||
<p class="text-text-dim text-sm">All hosts and services in the gleaned corpus.</p>
|
<p class="text-text-dim text-sm">All hosts and services in the gleaned corpus.</p>
|
||||||
</div>
|
</div>
|
||||||
<div class="flex items-center gap-2 shrink-0">
|
<div class="flex items-center gap-2 shrink-0">
|
||||||
|
<button
|
||||||
|
@click="toggleScanPanel"
|
||||||
|
class="btn-secondary text-sm"
|
||||||
|
>
|
||||||
|
Scan
|
||||||
|
</button>
|
||||||
<button
|
<button
|
||||||
@click="showAddPanel = !showAddPanel"
|
@click="showAddPanel = !showAddPanel"
|
||||||
class="btn-secondary text-sm"
|
class="btn-secondary text-sm"
|
||||||
|
|
@ -27,6 +33,73 @@
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Filesystem scan panel -->
|
||||||
|
<div v-if="showScanPanel && !showWizard" class="mb-6 rounded border border-surface-border bg-surface-raised p-4">
|
||||||
|
<h2 class="text-text-primary font-medium text-sm mb-3">Scan for log files</h2>
|
||||||
|
<div class="flex gap-2 mb-4">
|
||||||
|
<input
|
||||||
|
v-model="scanQuery"
|
||||||
|
type="text"
|
||||||
|
placeholder="Optional: describe the problem (e.g. 'nginx 502 gateway error')"
|
||||||
|
class="input-field flex-1 text-sm"
|
||||||
|
@keydown.enter="runScan"
|
||||||
|
/>
|
||||||
|
<button @click="runScan" :disabled="scanning" class="btn-primary text-sm px-4">
|
||||||
|
{{ scanning ? 'Scanning…' : 'Scan' }}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div v-if="scanError" class="text-sev-error text-sm mb-3">{{ scanError }}</div>
|
||||||
|
|
||||||
|
<div v-if="scanCandidates.length > 0">
|
||||||
|
<p class="text-text-dim text-xs mb-2">
|
||||||
|
{{ scanCandidates.length }} file{{ scanCandidates.length === 1 ? '' : 's' }} found — ranked by recency{{ scanQuery ? ' and keyword match' : '' }}.
|
||||||
|
Select files to add as sources.
|
||||||
|
</p>
|
||||||
|
<div class="divide-y divide-surface-border border border-surface-border rounded overflow-hidden mb-3">
|
||||||
|
<label
|
||||||
|
v-for="c in scanCandidates"
|
||||||
|
:key="c.path"
|
||||||
|
class="flex items-start gap-3 px-3 py-2 hover:bg-surface cursor-pointer"
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
:value="c"
|
||||||
|
v-model="scanSelected"
|
||||||
|
class="mt-0.5 shrink-0"
|
||||||
|
/>
|
||||||
|
<div class="min-w-0 flex-1">
|
||||||
|
<div class="flex items-center gap-2 flex-wrap">
|
||||||
|
<span class="font-mono text-xs text-accent truncate">{{ c.path }}</span>
|
||||||
|
<span class="text-text-dim text-xs shrink-0">{{ formatBytes(c.size_bytes) }}</span>
|
||||||
|
<span class="text-text-dim text-xs shrink-0">{{ formatAge(c.mtime) }}</span>
|
||||||
|
<span
|
||||||
|
v-if="scanQuery"
|
||||||
|
class="text-text-dim text-xs shrink-0"
|
||||||
|
:title="`Relevance score: ${c.score}`"
|
||||||
|
>score {{ (c.score * 100).toFixed(0) }}%</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
<div class="flex items-center gap-3">
|
||||||
|
<button
|
||||||
|
:disabled="scanSelected.length === 0 || scanAdding"
|
||||||
|
@click="addScanSelected"
|
||||||
|
class="btn-primary text-sm"
|
||||||
|
>
|
||||||
|
{{ scanAdding ? 'Adding…' : `Add ${scanSelected.length || ''} selected` }}
|
||||||
|
</button>
|
||||||
|
<button @click="scanSelected = []" class="btn-secondary text-sm">Deselect all</button>
|
||||||
|
<button @click="scanSelected = [...scanCandidates]" class="btn-secondary text-sm">Select all</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div v-else-if="scanRan && !scanning" class="text-text-dim text-sm">
|
||||||
|
No log files found in the scanned directories.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Post-setup Add Source panel (condensed wizard steps 1-2) -->
|
<!-- Post-setup Add Source panel (condensed wizard steps 1-2) -->
|
||||||
<div v-else-if="showAddPanel" class="mb-6">
|
<div v-else-if="showAddPanel" class="mb-6">
|
||||||
<SetupWizard
|
<SetupWizard
|
||||||
|
|
@ -184,6 +257,17 @@ interface DbSource {
|
||||||
latest: string | null
|
latest: string | null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface ScanCandidate {
|
||||||
|
type: string
|
||||||
|
id: string
|
||||||
|
path: string
|
||||||
|
label: string
|
||||||
|
size_bytes: number
|
||||||
|
mtime: number
|
||||||
|
score: number
|
||||||
|
available: boolean
|
||||||
|
}
|
||||||
|
|
||||||
const sources = ref<SourceRow[]>([])
|
const sources = ref<SourceRow[]>([])
|
||||||
const loading = ref(true)
|
const loading = ref(true)
|
||||||
const busy = ref(new Set<string>())
|
const busy = ref(new Set<string>())
|
||||||
|
|
@ -191,6 +275,14 @@ const actionMsg = ref('')
|
||||||
const actionError = ref(false)
|
const actionError = ref(false)
|
||||||
const showWizard = ref(false)
|
const showWizard = ref(false)
|
||||||
const showAddPanel = ref(false)
|
const showAddPanel = ref(false)
|
||||||
|
const showScanPanel = ref(false)
|
||||||
|
const scanQuery = ref('')
|
||||||
|
const scanning = ref(false)
|
||||||
|
const scanRan = ref(false)
|
||||||
|
const scanError = ref('')
|
||||||
|
const scanCandidates = ref<ScanCandidate[]>([])
|
||||||
|
const scanSelected = ref<ScanCandidate[]>([])
|
||||||
|
const scanAdding = ref(false)
|
||||||
|
|
||||||
const BASE = import.meta.env.BASE_URL.replace(/\/$/, '')
|
const BASE = import.meta.env.BASE_URL.replace(/\/$/, '')
|
||||||
|
|
||||||
|
|
@ -347,6 +439,82 @@ async function handleUpload(e: Event): Promise<void> {
|
||||||
;(e.target as HTMLInputElement).value = ''
|
;(e.target as HTMLInputElement).value = ''
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function toggleScanPanel(): void {
|
||||||
|
showScanPanel.value = !showScanPanel.value
|
||||||
|
if (!showScanPanel.value) {
|
||||||
|
scanCandidates.value = []
|
||||||
|
scanSelected.value = []
|
||||||
|
scanRan.value = false
|
||||||
|
scanError.value = ''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runScan(): Promise<void> {
|
||||||
|
scanning.value = true
|
||||||
|
scanError.value = ''
|
||||||
|
scanCandidates.value = []
|
||||||
|
scanSelected.value = []
|
||||||
|
try {
|
||||||
|
const params = new URLSearchParams({ max_results: '30' })
|
||||||
|
if (scanQuery.value.trim()) params.set('query', scanQuery.value.trim())
|
||||||
|
const res = await fetch(`${BASE}/api/setup/scan?${params}`)
|
||||||
|
if (!res.ok) {
|
||||||
|
const data = await res.json().catch(() => ({}))
|
||||||
|
scanError.value = data.detail ?? 'Scan failed'
|
||||||
|
return
|
||||||
|
}
|
||||||
|
const data = await res.json()
|
||||||
|
scanCandidates.value = data.candidates ?? []
|
||||||
|
scanRan.value = true
|
||||||
|
} catch (err) {
|
||||||
|
scanError.value = String(err)
|
||||||
|
} finally {
|
||||||
|
scanning.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function addScanSelected(): Promise<void> {
|
||||||
|
if (scanSelected.value.length === 0) return
|
||||||
|
scanAdding.value = true
|
||||||
|
actionMsg.value = ''
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${BASE}/api/setup/write`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ sources: scanSelected.value }),
|
||||||
|
})
|
||||||
|
const data = await res.json()
|
||||||
|
if (res.ok) {
|
||||||
|
actionMsg.value = `Added ${scanSelected.value.length} source${scanSelected.value.length === 1 ? '' : 's'} to sources.yaml`
|
||||||
|
actionError.value = false
|
||||||
|
showScanPanel.value = false
|
||||||
|
scanCandidates.value = []
|
||||||
|
scanSelected.value = []
|
||||||
|
scanRan.value = false
|
||||||
|
await loadSources()
|
||||||
|
} else {
|
||||||
|
actionMsg.value = data.detail ?? 'Failed to add sources'
|
||||||
|
actionError.value = true
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
scanAdding.value = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatBytes(bytes: number): string {
|
||||||
|
if (bytes < 1024) return `${bytes} B`
|
||||||
|
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`
|
||||||
|
return `${(bytes / 1024 / 1024).toFixed(1)} MB`
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatAge(mtime: number): string {
|
||||||
|
const ageDays = (Date.now() / 1000 - mtime) / 86400
|
||||||
|
if (ageDays < 1) return 'today'
|
||||||
|
if (ageDays < 2) return 'yesterday'
|
||||||
|
if (ageDays < 30) return `${Math.floor(ageDays)}d ago`
|
||||||
|
return `${Math.floor(ageDays / 30)}mo ago`
|
||||||
|
}
|
||||||
|
|
||||||
function formatTs(iso: string | null): string {
|
function formatTs(iso: string | null): string {
|
||||||
if (!iso) return '—'
|
if (!iso) return '—'
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue