feat: cybersec zero-shot scoring pipeline (#9)

Second-pass cybersec classifier using DeBERTa-v3-base-mnli (already
cached — no download required). Runs after each anomaly scoring pass on
entries flagged by the anomaly scorer or with pattern matches.

Architecture:
- app/services/cybersec.py: zero-shot-classification pipeline with 5
  cybersec candidate labels (auth failure, privilege escalation, network
  intrusion, malware, data exfiltration). Writes ml_score/ml_label/
  ml_scored_at to log_entries; inserts high-confidence hits into
  detections with scorer='cybersec'.
- app/tasks/cybersec_scorer.py: async background task (same shape as
  anomaly_scorer.py).
- REST: GET/POST /turnstone/api/cybersec/status|run|detections.
  GET /turnstone/api/anomaly/detections now accepts scorer= filter.

Schema: ml_score, ml_label, ml_scored_at added to log_entries; scorer
column added to detections (idempotent migrations + DDL for both SQLite
and Postgres).

UI: Security Alerts view gains Source dropdown (All / Anomaly / Cybersec)
and cybersec scorer status badge. Label dropdown split into optgroups.

Deployment: TURNSTONE_CYBERSEC_MODEL/DEVICE/THRESHOLD vars added to
.env.example, docker-compose.yml, docker-standalone.sh.

Tests: 10 new tests — no model, no eligible entries, scoring, detection
creation, normal label suppression, threshold filtering, pattern-tag
filtering, idempotency, list filtering, scorer column filter.
416/416 passing.

Closes: #9
This commit is contained in:
pyr0ball 2026-06-10 01:03:25 -07:00
parent d1e6871525
commit b2e2f15d55
11 changed files with 730 additions and 11 deletions

View file

@ -42,6 +42,15 @@
# TURNSTONE_EMBED_MODEL=BAAI/bge-small-en-v1.5
# TURNSTONE_EMBED_DEVICE=cpu
# --- Cybersec scoring pipeline (zero-shot, second-pass on flagged entries) ---
# Runs a zero-shot classifier on entries already flagged by the anomaly scorer
# or that have pattern matches — a focused second opinion using cybersec vocabulary.
# The DeBERTa-v3-base-mnli model (required by the diagnose pipeline) is the recommended
# zero-shot classifier — it produces human-readable cybersec labels with no fine-tuning.
# TURNSTONE_CYBERSEC_MODEL=MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli
# TURNSTONE_CYBERSEC_DEVICE=cpu
# TURNSTONE_CYBERSEC_THRESHOLD=0.60 # lower than anomaly threshold (zero-shot is calibrated differently)
# --- Anomaly scoring pipeline (IDS / watchdog) ---
# Batch-scores every ingested log entry after each glean cycle.
# Any HuggingFace text-classification model works; the byviz classifier (already

View file

@ -38,6 +38,9 @@ CREATE TABLE IF NOT EXISTS log_entries (
anomaly_score REAL,
anomaly_label TEXT,
anomaly_scored_at TEXT,
ml_score REAL,
ml_label TEXT,
ml_scored_at TEXT,
PRIMARY KEY (tenant_id, id)
);
CREATE INDEX IF NOT EXISTS idx_source ON log_entries(source_id);
@ -47,6 +50,7 @@ CREATE INDEX IF NOT EXISTS idx_ts_repeat ON log_entries(timestamp_iso, repeat_
CREATE INDEX IF NOT EXISTS idx_severity ON log_entries(tenant_id, severity);
CREATE INDEX IF NOT EXISTS idx_patterns ON log_entries(matched_patterns);
CREATE INDEX IF NOT EXISTS idx_anomaly ON log_entries(tenant_id, anomaly_score);
CREATE INDEX IF NOT EXISTS idx_ml_scored ON log_entries(tenant_id, ml_scored_at);
CREATE TABLE IF NOT EXISTS detections (
id TEXT PRIMARY KEY,
@ -61,12 +65,14 @@ CREATE TABLE IF NOT EXISTS detections (
detected_at TEXT NOT NULL,
acknowledged INTEGER NOT NULL DEFAULT 0,
acknowledged_at TEXT,
notes TEXT NOT NULL DEFAULT ''
notes TEXT NOT NULL DEFAULT '',
scorer TEXT NOT NULL DEFAULT 'anomaly'
);
CREATE INDEX IF NOT EXISTS idx_detections_tenant ON detections(tenant_id, detected_at);
CREATE INDEX IF NOT EXISTS idx_detections_ack ON detections(acknowledged);
CREATE INDEX IF NOT EXISTS idx_detections_label ON detections(anomaly_label);
CREATE INDEX IF NOT EXISTS idx_detections_entry ON detections(entry_id);
CREATE INDEX IF NOT EXISTS idx_detections_scorer ON detections(scorer);
CREATE TABLE IF NOT EXISTS glean_fingerprints (
tenant_id TEXT NOT NULL DEFAULT '',
@ -201,6 +207,9 @@ _MAIN_SCHEMA_PG_STMTS = [
anomaly_score DOUBLE PRECISION,
anomaly_label TEXT,
anomaly_scored_at TEXT,
ml_score DOUBLE PRECISION,
ml_label TEXT,
ml_scored_at TEXT,
PRIMARY KEY (tenant_id, id)
)
""",
@ -210,6 +219,7 @@ _MAIN_SCHEMA_PG_STMTS = [
"CREATE INDEX IF NOT EXISTS idx_patterns ON log_entries(matched_patterns)",
"CREATE INDEX IF NOT EXISTS idx_fts_gin ON log_entries USING GIN(text_tsv)",
"CREATE INDEX IF NOT EXISTS idx_anomaly ON log_entries(tenant_id, anomaly_score)",
"CREATE INDEX IF NOT EXISTS idx_ml_scored ON log_entries(tenant_id, ml_scored_at)",
"""
CREATE TABLE IF NOT EXISTS detections (
id TEXT PRIMARY KEY,
@ -224,13 +234,15 @@ _MAIN_SCHEMA_PG_STMTS = [
detected_at TEXT NOT NULL,
acknowledged INTEGER NOT NULL DEFAULT 0,
acknowledged_at TEXT,
notes TEXT NOT NULL DEFAULT ''
notes TEXT NOT NULL DEFAULT '',
scorer TEXT NOT NULL DEFAULT 'anomaly'
)
""",
"CREATE INDEX IF NOT EXISTS idx_detections_tenant ON detections(tenant_id, detected_at)",
"CREATE INDEX IF NOT EXISTS idx_detections_ack ON detections(acknowledged)",
"CREATE INDEX IF NOT EXISTS idx_detections_label ON detections(anomaly_label)",
"CREATE INDEX IF NOT EXISTS idx_detections_entry ON detections(entry_id)",
"CREATE INDEX IF NOT EXISTS idx_detections_scorer ON detections(scorer)",
"""
CREATE OR REPLACE FUNCTION _ts_update_text_tsv() RETURNS trigger AS $$
BEGIN
@ -388,6 +400,10 @@ _MAIN_MIGRATIONS_SQLITE = [
"ALTER TABLE log_entries ADD COLUMN anomaly_score REAL",
"ALTER TABLE log_entries ADD COLUMN anomaly_label TEXT",
"ALTER TABLE log_entries ADD COLUMN anomaly_scored_at TEXT",
"ALTER TABLE log_entries ADD COLUMN ml_score REAL",
"ALTER TABLE log_entries ADD COLUMN ml_label TEXT",
"ALTER TABLE log_entries ADD COLUMN ml_scored_at TEXT",
"ALTER TABLE detections ADD COLUMN scorer TEXT NOT NULL DEFAULT 'anomaly'",
]
_CONTEXT_MIGRATIONS_SQLITE = [

View file

@ -89,7 +89,9 @@ from app.context.wizard import get_schema as _wizard_schema, advance_step, is_co
from app.context.chunker import UnsupportedDocType, FileTooLarge
from app.tasks.glean_scheduler import get_state as _glean_state, run_once as _run_glean, scheduler_loop as _scheduler_loop, submit_matched as _submit_matched
from app.tasks.anomaly_scorer import get_state as _scorer_state, run_once as _run_scorer
from app.tasks.cybersec_scorer import get_state as _cybersec_state, run_once as _run_cybersec
from app.services.anomaly import list_detections as _list_detections, acknowledge_detection as _ack_detection
from app.services.cybersec import list_cybersec_detections as _list_cybersec, CYBERSEC_LABELS
from app.glean.mqtt_subscriber import run_mqtt_subscribers as _run_mqtt_subscribers
DB_PATH = Path(os.environ.get("TURNSTONE_DB", Path(__file__).parent.parent / "data" / "turnstone.db"))
@ -114,6 +116,9 @@ SUBMIT_ENDPOINT = os.environ.get("TURNSTONE_SUBMIT_ENDPOINT", "").rstrip("/")
ANOMALY_MODEL = os.environ.get("TURNSTONE_ANOMALY_MODEL", "")
ANOMALY_DEVICE = os.environ.get("TURNSTONE_ANOMALY_DEVICE", "cpu")
ANOMALY_THRESHOLD = float(os.environ.get("TURNSTONE_ANOMALY_THRESHOLD", "0.75"))
CYBERSEC_MODEL = os.environ.get("TURNSTONE_CYBERSEC_MODEL", "")
CYBERSEC_DEVICE = os.environ.get("TURNSTONE_CYBERSEC_DEVICE", "cpu")
CYBERSEC_THRESHOLD = float(os.environ.get("TURNSTONE_CYBERSEC_THRESHOLD", "0.60"))
# When set, all /api/ routes require Authorization: Bearer <key>.
# Unset (default) means no authentication — suitable for local-only deployments.
_API_KEY: str | None = os.environ.get("TURNSTONE_API_KEY") or None
@ -173,6 +178,9 @@ async def _lifespan(app: FastAPI):
anomaly_model=ANOMALY_MODEL,
anomaly_device=ANOMALY_DEVICE,
anomaly_threshold=ANOMALY_THRESHOLD,
cybersec_model=CYBERSEC_MODEL,
cybersec_device=CYBERSEC_DEVICE,
cybersec_threshold=CYBERSEC_THRESHOLD,
),
name="glean-scheduler",
)
@ -1362,11 +1370,12 @@ async def anomaly_detections(
limit: int = Query(100, ge=1, le=1000),
unacked_only: bool = Query(False),
label: str | None = Query(None),
scorer: str | None = Query(None),
):
"""List anomaly detections ordered by detected_at DESC."""
"""List detections ordered by detected_at DESC. Optionally filter by scorer ('anomaly'|'cybersec')."""
loop = asyncio.get_running_loop()
rows = await loop.run_in_executor(
None, lambda: _list_detections(DB_PATH, limit=limit, unacked_only=unacked_only, label=label)
None, lambda: _list_detections(DB_PATH, limit=limit, unacked_only=unacked_only, label=label, scorer=scorer)
)
return {"detections": rows, "total": len(rows)}
@ -1386,6 +1395,54 @@ async def acknowledge_detection(detection_id: str, notes: str = ""):
app.include_router(_anomaly)
# ---------------------------------------------------------------------------
# Cybersec scoring endpoints
# ---------------------------------------------------------------------------
_cybersec_router = APIRouter(prefix="/turnstone/api/cybersec", dependencies=[Depends(_check_api_key)])
@_cybersec_router.get("/status")
async def cybersec_status():
"""Return cybersec scorer state and configuration."""
return {
"model": CYBERSEC_MODEL or None,
"threshold": CYBERSEC_THRESHOLD,
"device": CYBERSEC_DEVICE,
"enabled": bool(CYBERSEC_MODEL),
"candidate_labels": CYBERSEC_LABELS,
**_cybersec_state(),
}
@_cybersec_router.post("/run")
async def cybersec_run(background_tasks: BackgroundTasks):
"""Trigger a manual cybersec scoring pass (runs in background)."""
if not CYBERSEC_MODEL:
raise HTTPException(status_code=400, detail="TURNSTONE_CYBERSEC_MODEL not configured")
background_tasks.add_task(
_run_cybersec, DB_PATH, CYBERSEC_MODEL, CYBERSEC_DEVICE, 32, CYBERSEC_THRESHOLD
)
return {"ok": True, "message": "cybersec scorer triggered"}
@_cybersec_router.get("/detections")
async def cybersec_detections(
limit: int = Query(100, ge=1, le=1000),
unacked_only: bool = Query(False),
label: str | None = Query(None),
):
"""List cybersec detections ordered by detected_at DESC."""
loop = asyncio.get_running_loop()
rows = await loop.run_in_executor(
None, lambda: _list_cybersec(DB_PATH, limit=limit, unacked_only=unacked_only, label=label)
)
return {"detections": rows, "total": len(rows)}
app.include_router(_cybersec_router)
# Root redirect → /turnstone/
@app.get("/")
def root_redirect() -> RedirectResponse:

View file

@ -253,6 +253,7 @@ def list_detections(
limit: int = 100,
unacked_only: bool = False,
label: str | None = None,
scorer: str | None = None,
) -> list[dict]:
"""Return detections ordered by detected_at DESC."""
tenant_id = resolve_tenant_id()
@ -264,6 +265,9 @@ def list_detections(
if label:
conditions.append(q("anomaly_label = ?"))
params.append(label.upper())
if scorer:
conditions.append(q("scorer = ?"))
params.append(scorer.lower())
where = " AND ".join(conditions)
with get_conn(db_path) as conn:

241
app/services/cybersec.py Normal file
View file

@ -0,0 +1,241 @@
"""Cybersecurity-focused scoring pipeline using zero-shot classification.
Runs a second-pass analysis on entries that were already flagged by the
anomaly scorer or that have pattern matches. Uses a zero-shot classification
model (DeBERTa-v3-base-mnli is cached locally) so no fine-tuning is needed.
The scorer writes ml_score / ml_label / ml_scored_at to log_entries and
inserts high-confidence non-normal hits into the detections table tagged
with scorer='cybersec'.
Env vars
--------
TURNSTONE_CYBERSEC_MODEL HF model id for zero-shot classification.
Recommended: MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli
(already cached from the diagnose pipeline).
Set to empty string to disable (safe default).
TURNSTONE_CYBERSEC_DEVICE 'cpu' (default) or 'cuda'
TURNSTONE_CYBERSEC_THRESHOLD float confidence floor for detection insertion (default 0.60)
"""
from __future__ import annotations
import logging
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from app.db import get_conn, resolve_tenant_id
from app.db.dialect import q
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Candidate labels — cybersec vocabulary for zero-shot inference
# ---------------------------------------------------------------------------
CYBERSEC_LABELS: list[str] = [
"authentication failure or brute force attack",
"privilege escalation or unauthorized access",
"network intrusion or port scan",
"malware or suspicious process activity",
"data exfiltration or unusual outbound traffic",
"normal system operation",
]
_NORMAL_LABEL = "normal system operation"
_LABEL_SEVERITY: dict[str, str] = {
"authentication failure or brute force attack": "ERROR",
"privilege escalation or unauthorized access": "CRITICAL",
"network intrusion or port scan": "ERROR",
"malware or suspicious process activity": "CRITICAL",
"data exfiltration or unusual outbound traffic":"CRITICAL",
"normal system operation": "INFO",
}
# ---------------------------------------------------------------------------
# Pipeline singleton
# ---------------------------------------------------------------------------
_pipeline: Any = None
def _get_pipeline(model_id: str, device: str) -> Any:
global _pipeline # noqa: PLW0603
if _pipeline is None:
from transformers import pipeline # type: ignore[import-untyped]
logger.info("loading cybersec zero-shot pipeline: %s on %s", model_id, device)
_pipeline = pipeline(
"zero-shot-classification",
model=model_id,
device=0 if device == "cuda" else -1,
)
logger.info("cybersec pipeline ready")
return _pipeline
def reset_pipeline() -> None:
"""Clear the cached pipeline — for testing only."""
global _pipeline # noqa: PLW0603
_pipeline = None
# ---------------------------------------------------------------------------
# Result type
# ---------------------------------------------------------------------------
@dataclass
class CybersecResult:
scored: int = 0
detections: int = 0
skipped: bool = False
error: str | None = None
# ---------------------------------------------------------------------------
# Core scoring function
# ---------------------------------------------------------------------------
def score_security_entries(
db_path: Path,
model_id: str,
device: str = "cpu",
batch_size: int = 32,
threshold: float = 0.60,
) -> CybersecResult:
"""Score entries that were anomaly-flagged or pattern-matched.
Only entries with ml_scored_at IS NULL are processed (idempotent).
Writes ml_score / ml_label / ml_scored_at and inserts high-confidence
hits into detections with scorer='cybersec'.
"""
if not model_id:
return CybersecResult(skipped=True)
tenant_id = resolve_tenant_id()
try:
pipe = _get_pipeline(model_id, device)
except Exception as exc:
logger.error("failed to load cybersec pipeline: %s", exc)
return CybersecResult(error=str(exc))
total_scored = 0
total_detections = 0
try:
with get_conn(db_path) as conn:
# Only score entries that are worth a second look:
# anomaly-flagged (non-normal) OR have at least one pattern match.
rows = conn.execute(
q("""
SELECT id, source_id, text, timestamp_iso
FROM log_entries
WHERE (tenant_id = ? OR tenant_id = '')
AND ml_scored_at IS NULL
AND (
(anomaly_label IS NOT NULL AND anomaly_label != 'NORMAL')
OR (matched_patterns IS NOT NULL AND matched_patterns != '[]' AND matched_patterns != '')
)
LIMIT ?
"""),
(tenant_id, batch_size * 10),
).fetchall()
if not rows:
return CybersecResult(skipped=True)
# Process in chunks to avoid OOM on large backlogs
for i in range(0, len(rows), batch_size):
chunk = rows[i : i + batch_size]
texts = [r["text"] for r in chunk]
try:
results = pipe(texts, candidate_labels=CYBERSEC_LABELS, multi_label=False)
except Exception as exc:
logger.warning("zero-shot inference error on chunk %d: %s", i, exc)
continue
now = datetime.now(tz=timezone.utc).isoformat()
with get_conn(db_path) as conn:
for row, result in zip(chunk, results):
top_label: str = result["labels"][0]
top_score: float = result["scores"][0]
conn.execute(
q("""
UPDATE log_entries
SET ml_score = ?, ml_label = ?, ml_scored_at = ?
WHERE id = ? AND (tenant_id = ? OR tenant_id = '')
"""),
(top_score, top_label, now, row["id"], tenant_id),
)
total_scored += 1
if top_score >= threshold and top_label != _NORMAL_LABEL:
severity = _LABEL_SEVERITY.get(top_label, "WARN")
try:
conn.execute(
q("""
INSERT INTO detections
(id, tenant_id, entry_id, source_id, anomaly_label,
anomaly_score, severity, text, timestamp_iso,
detected_at, scorer)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'cybersec')
"""),
(
str(uuid.uuid4()),
tenant_id,
row["id"],
row["source_id"],
top_label,
top_score,
severity,
row["text"],
row["timestamp_iso"],
now,
),
)
total_detections += 1
except Exception:
pass # entry may already have a detection — skip
conn.commit()
except Exception as exc:
logger.error("cybersec scoring failed: %s", exc)
return CybersecResult(scored=total_scored, detections=total_detections, error=str(exc))
return CybersecResult(scored=total_scored, detections=total_detections)
# ---------------------------------------------------------------------------
# Query helpers (used by REST layer)
# ---------------------------------------------------------------------------
def list_cybersec_detections(
db_path: Path,
limit: int = 100,
unacked_only: bool = False,
label: str | None = None,
) -> list[dict]:
"""Return cybersec detections ordered by detected_at DESC."""
tenant_id = resolve_tenant_id()
conditions = ["(tenant_id = ? OR tenant_id = '')", "scorer = 'cybersec'"]
params: list[Any] = [tenant_id]
if unacked_only:
conditions.append("acknowledged = 0")
if label:
conditions.append(q("anomaly_label = ?"))
params.append(label)
where = " AND ".join(conditions)
with get_conn(db_path) as conn:
rows = conn.execute(
q(f"SELECT * FROM detections WHERE {where} ORDER BY detected_at DESC LIMIT ?"), # noqa: S608
(*params, limit),
).fetchall()
return [dict(r) for r in rows]

View file

@ -0,0 +1,84 @@
"""Background task wrapper for the cybersec zero-shot scoring pipeline."""
from __future__ import annotations
import asyncio
import logging
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from app.services.cybersec import score_security_entries
logger = logging.getLogger(__name__)
_lock = asyncio.Lock()
@dataclass
class CybersecState:
last_run_at: str | None = None
last_duration_s: float | None = None
last_scored: int = 0
last_detections: int = 0
last_error: str | None = None
run_count: int = 0
running: bool = False
total_scored: int = 0
total_detections: int = 0
_state = CybersecState()
def get_state() -> dict:
return {
"last_run_at": _state.last_run_at,
"last_duration_s":_state.last_duration_s,
"last_scored": _state.last_scored,
"last_detections":_state.last_detections,
"last_error": _state.last_error,
"run_count": _state.run_count,
"running": _state.running,
"total_scored": _state.total_scored,
"total_detections": _state.total_detections,
}
async def run_once(
db_path: Path,
model_id: str,
device: str = "cpu",
batch_size: int = 32,
threshold: float = 0.60,
) -> None:
"""Single cybersec scoring pass — no-op if already running or no model set."""
if not model_id or _lock.locked():
return
async with _lock:
_state.running = True
started = datetime.now(tz=timezone.utc)
try:
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(
None,
lambda: score_security_entries(db_path, model_id, device, batch_size, threshold),
)
elapsed = (datetime.now(tz=timezone.utc) - started).total_seconds()
_state.last_run_at = started.isoformat()
_state.last_duration_s = elapsed
_state.last_scored = result.scored
_state.last_detections = result.detections
_state.last_error = result.error
_state.run_count += 1
_state.total_scored += result.scored
_state.total_detections += result.detections
if result.error:
logger.error("cybersec scorer error: %s", result.error)
elif not result.skipped:
logger.info(
"cybersec scorer: scored=%d detections=%d in %.1fs",
result.scored, result.detections, elapsed,
)
finally:
_state.running = False

View file

@ -21,6 +21,7 @@ import httpx
from app.glean.pipeline import glean_sources
from app.tasks.anomaly_scorer import run_once as _run_scorer
from app.tasks.cybersec_scorer import run_once as _run_cybersec
logger = logging.getLogger(__name__)
@ -127,6 +128,9 @@ async def run_once(
anomaly_model: str = "",
anomaly_device: str = "cpu",
anomaly_threshold: float = 0.75,
cybersec_model: str = "",
cybersec_device: str = "cpu",
cybersec_threshold: float = 0.60,
) -> dict[str, Any]:
"""Ingest all sources once, then submit matched entries if configured.
@ -170,6 +174,9 @@ async def run_once(
if anomaly_model:
await _run_scorer(db_path, anomaly_model, anomaly_device, threshold=anomaly_threshold)
if cybersec_model:
await _run_cybersec(db_path, cybersec_model, cybersec_device, threshold=cybersec_threshold)
return {"ok": True, "stats": _state.last_stats, "duration_s": _state.last_duration_s}
@ -183,19 +190,27 @@ async def scheduler_loop(
anomaly_model: str = "",
anomaly_device: str = "cpu",
anomaly_threshold: float = 0.75,
cybersec_model: str = "",
cybersec_device: str = "cpu",
cybersec_threshold: float = 0.60,
) -> None:
"""Run glean + optional submission + optional anomaly scoring every interval_s seconds."""
"""Run glean + optional submission + optional anomaly/cybersec scoring every interval_s seconds."""
logger.info("Ingest scheduler started — interval %ds, sources: %s", interval_s, sources_file)
if submit_endpoint:
logger.info("Submission enabled — endpoint: %s", submit_endpoint)
if anomaly_model:
logger.info("Anomaly scoring enabled — model: %s", anomaly_model)
if cybersec_model:
logger.info("Cybersec scoring enabled — model: %s", cybersec_model)
while True:
await run_once(
sources_file, db_path, pattern_file, submit_endpoint, source_host,
anomaly_model=anomaly_model,
anomaly_device=anomaly_device,
anomaly_threshold=anomaly_threshold,
cybersec_model=cybersec_model,
cybersec_device=cybersec_device,
cybersec_threshold=cybersec_threshold,
)
next_run = datetime.now(tz=timezone.utc) + timedelta(seconds=interval_s)
_state.next_run_at = next_run.isoformat()

View file

@ -47,6 +47,10 @@ services:
TURNSTONE_EMBED_BACKEND: ${TURNSTONE_EMBED_BACKEND:-}
TURNSTONE_EMBED_MODEL: ${TURNSTONE_EMBED_MODEL:-}
TURNSTONE_EMBED_DEVICE: ${TURNSTONE_EMBED_DEVICE:-cpu}
# --- Cybersec scoring pipeline ---
TURNSTONE_CYBERSEC_MODEL: ${TURNSTONE_CYBERSEC_MODEL:-}
TURNSTONE_CYBERSEC_DEVICE: ${TURNSTONE_CYBERSEC_DEVICE:-cpu}
TURNSTONE_CYBERSEC_THRESHOLD: ${TURNSTONE_CYBERSEC_THRESHOLD:-0.60}
# --- Anomaly scoring pipeline ---
TURNSTONE_ANOMALY_MODEL: ${TURNSTONE_ANOMALY_MODEL:-}
TURNSTONE_ANOMALY_DEVICE: ${TURNSTONE_ANOMALY_DEVICE:-cpu}

View file

@ -147,6 +147,9 @@ docker run -d \
-e TURNSTONE_EMBED_BACKEND="${TURNSTONE_EMBED_BACKEND:-sentence_transformers}" \
-e TURNSTONE_EMBED_MODEL="${TURNSTONE_EMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2}" \
-e TURNSTONE_EMBED_DEVICE="${TURNSTONE_EMBED_DEVICE:-cpu}" \
-e TURNSTONE_CYBERSEC_MODEL="${TURNSTONE_CYBERSEC_MODEL:-}" \
-e TURNSTONE_CYBERSEC_DEVICE="${TURNSTONE_CYBERSEC_DEVICE:-cpu}" \
-e TURNSTONE_CYBERSEC_THRESHOLD="${TURNSTONE_CYBERSEC_THRESHOLD:-0.60}" \
-e TURNSTONE_ANOMALY_MODEL="${TURNSTONE_ANOMALY_MODEL:-}" \
-e TURNSTONE_ANOMALY_DEVICE="${TURNSTONE_ANOMALY_DEVICE:-cpu}" \
-e TURNSTONE_ANOMALY_THRESHOLD="${TURNSTONE_ANOMALY_THRESHOLD:-0.75}" \

233
tests/test_cybersec.py Normal file
View file

@ -0,0 +1,233 @@
"""Tests for the cybersec zero-shot scoring pipeline."""
from __future__ import annotations
import sqlite3
import tempfile
from pathlib import Path
from unittest.mock import MagicMock
import pytest
from app.db.schema import ensure_schema
from app.services.cybersec import (
CybersecResult,
CYBERSEC_LABELS,
_NORMAL_LABEL,
reset_pipeline,
score_security_entries,
list_cybersec_detections,
)
import app.services.cybersec as cybersec_mod
@pytest.fixture(autouse=True)
def _reset(tmp_path):
reset_pipeline()
yield
reset_pipeline()
@pytest.fixture
def db(tmp_path) -> Path:
path = tmp_path / "test.db"
ensure_schema(path)
return path
def _insert_entry(db: Path, entry_id: str, text: str,
anomaly_label: str | None = None,
matched_patterns: str = "[]") -> None:
with sqlite3.connect(db) as conn:
conn.execute(
"""INSERT OR IGNORE INTO log_entries
(id, tenant_id, source_id, sequence, ingest_time, text,
anomaly_label, matched_patterns)
VALUES (?, '', 'test-src', 1, '2026-01-01T00:00:00Z', ?, ?, ?)""",
(entry_id, text, anomaly_label, matched_patterns),
)
conn.commit()
# ---------------------------------------------------------------------------
# No model configured → skipped
# ---------------------------------------------------------------------------
def test_no_model_returns_skipped(db):
result = score_security_entries(db, model_id="")
assert result.skipped is True
assert result.scored == 0
# ---------------------------------------------------------------------------
# No eligible entries → skipped
# ---------------------------------------------------------------------------
def test_no_eligible_entries_skipped(db):
_insert_entry(db, "e1", "Started nginx.service", anomaly_label=None, matched_patterns="[]")
mock_pipe = MagicMock(return_value=[{"labels": [_NORMAL_LABEL], "scores": [0.99]}])
monkeypatch = pytest.MonkeyPatch()
monkeypatch.setattr(cybersec_mod, "_pipeline", mock_pipe)
result = score_security_entries(db, model_id="fake-model")
assert result.skipped is True
monkeypatch.undo()
# ---------------------------------------------------------------------------
# Security entry gets scored
# ---------------------------------------------------------------------------
def test_security_entry_scored(db, monkeypatch):
_insert_entry(db, "e1",
"Failed password for root from 192.168.1.1 port 22 ssh2",
anomaly_label="SECURITY_ANOMALY")
mock_pipe = MagicMock(return_value=[{
"labels": ["authentication failure or brute force attack", _NORMAL_LABEL],
"scores": [0.85, 0.15],
}])
monkeypatch.setattr(cybersec_mod, "_pipeline", mock_pipe)
result = score_security_entries(db, model_id="fake-model", threshold=0.70)
assert result.scored == 1
assert result.detections == 1
assert result.error is None
with sqlite3.connect(db) as conn:
conn.row_factory = sqlite3.Row
row = conn.execute("SELECT ml_score, ml_label, ml_scored_at FROM log_entries WHERE id='e1'").fetchone()
assert row["ml_score"] == pytest.approx(0.85)
assert row["ml_label"] == "authentication failure or brute force attack"
assert row["ml_scored_at"] is not None
# ---------------------------------------------------------------------------
# Detection created above threshold
# ---------------------------------------------------------------------------
def test_detection_inserted_above_threshold(db, monkeypatch):
_insert_entry(db, "e1", "sudo: authentication failure", anomaly_label="ERROR")
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
"labels": ["privilege escalation or unauthorized access", _NORMAL_LABEL],
"scores": [0.75, 0.25],
}]))
score_security_entries(db, model_id="fake-model", threshold=0.60)
with sqlite3.connect(db) as conn:
conn.row_factory = sqlite3.Row
dets = conn.execute("SELECT * FROM detections WHERE scorer='cybersec'").fetchall()
assert len(dets) == 1
assert dets[0]["anomaly_label"] == "privilege escalation or unauthorized access"
assert dets[0]["severity"] == "CRITICAL"
# ---------------------------------------------------------------------------
# Normal label → no detection even above score threshold
# ---------------------------------------------------------------------------
def test_normal_label_no_detection(db, monkeypatch):
_insert_entry(db, "e1", "Started nginx.service", anomaly_label="INFO",
matched_patterns='["service_start"]')
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
"labels": [_NORMAL_LABEL, "network intrusion or port scan"],
"scores": [0.95, 0.05],
}]))
result = score_security_entries(db, model_id="fake-model", threshold=0.60)
assert result.detections == 0
# ---------------------------------------------------------------------------
# Below threshold → scored but no detection
# ---------------------------------------------------------------------------
def test_below_threshold_no_detection(db, monkeypatch):
_insert_entry(db, "e1", "Some suspicious text", anomaly_label="WARN")
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
"labels": ["network intrusion or port scan", _NORMAL_LABEL],
"scores": [0.45, 0.55],
}]))
result = score_security_entries(db, model_id="fake-model", threshold=0.60)
assert result.scored == 1
assert result.detections == 0
# ---------------------------------------------------------------------------
# Pattern-matched entry (not anomaly-flagged) still gets scored
# ---------------------------------------------------------------------------
def test_pattern_matched_entry_scored(db, monkeypatch):
_insert_entry(db, "e1", "SSH port forwarding conflict detected",
anomaly_label=None,
matched_patterns='["ssh_forward_conflict"]')
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
"labels": ["network intrusion or port scan", _NORMAL_LABEL],
"scores": [0.70, 0.30],
}]))
result = score_security_entries(db, model_id="fake-model", threshold=0.60)
assert result.scored == 1
assert result.detections == 1
# ---------------------------------------------------------------------------
# Idempotency — re-run finds nothing unscored
# ---------------------------------------------------------------------------
def test_idempotent_rerun(db, monkeypatch):
_insert_entry(db, "e1", "Failed login", anomaly_label="ERROR")
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
"labels": ["authentication failure or brute force attack"],
"scores": [0.80],
}]))
score_security_entries(db, model_id="fake-model", threshold=0.60)
result2 = score_security_entries(db, model_id="fake-model", threshold=0.60)
assert result2.skipped is True
# ---------------------------------------------------------------------------
# list_cybersec_detections filters to scorer='cybersec'
# ---------------------------------------------------------------------------
def test_list_cybersec_detections(db, monkeypatch):
_insert_entry(db, "e1", "Failed login", anomaly_label="ERROR")
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
"labels": ["authentication failure or brute force attack"],
"scores": [0.90],
}]))
score_security_entries(db, model_id="fake-model", threshold=0.60)
rows = list_cybersec_detections(db)
assert len(rows) == 1
assert rows[0]["scorer"] == "cybersec"
# ---------------------------------------------------------------------------
# list_detections scorer filter (anomaly service)
# ---------------------------------------------------------------------------
def test_list_detections_scorer_filter(db, monkeypatch):
from app.services.anomaly import list_detections
_insert_entry(db, "e1", "Failed login", anomaly_label="ERROR")
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
"labels": ["authentication failure or brute force attack"],
"scores": [0.90],
}]))
score_security_entries(db, model_id="fake-model", threshold=0.60)
all_dets = list_detections(db)
cybersec_dets = list_detections(db, scorer="cybersec")
anomaly_dets = list_detections(db, scorer="anomaly")
assert len(cybersec_dets) == 1
assert len(anomaly_dets) == 0
assert len(all_dets) >= 1

View file

@ -29,6 +29,20 @@
{{ scorerStatus.running ? 'scoring…' : scorerStatus.enabled ? 'scorer ready' : 'scorer off' }}
</span>
<!-- Cybersec scorer status -->
<span
v-if="cybersecStatus"
:class="[
'text-xs px-2 py-1 rounded border font-mono',
cybersecStatus.enabled
? 'border-surface-border text-text-dim'
: 'border-surface-border text-text-dim opacity-40'
]"
:title="cybersecStatus.enabled ? `cybersec: ${cybersecStatus.model}` : 'TURNSTONE_CYBERSEC_MODEL not set'"
>
{{ cybersecStatus.enabled ? 'cybersec on' : 'cybersec off' }}
</span>
<button
@click="runScorer"
:disabled="!scorerStatus?.enabled || triggerLoading || scorerStatus?.running"
@ -86,6 +100,21 @@
</button>
</div>
<!-- Scorer filter -->
<div class="flex items-center gap-2 shrink-0">
<label for="scorer-filter" class="text-xs text-text-dim whitespace-nowrap">Source:</label>
<select
id="scorer-filter"
v-model="scorerFilter"
@change="loadDetections()"
class="text-xs bg-surface border border-surface-border rounded px-2 py-1 text-text-primary focus:outline-none focus:border-accent"
>
<option value="">All</option>
<option value="anomaly">Anomaly scorer</option>
<option value="cybersec">Cybersec scorer</option>
</select>
</div>
<!-- Label filter -->
<div class="flex items-center gap-2 shrink-0">
<label for="label-filter" class="text-xs text-text-dim whitespace-nowrap">Label:</label>
@ -96,7 +125,12 @@
class="text-xs bg-surface border border-surface-border rounded px-2 py-1 text-text-primary focus:outline-none focus:border-accent"
>
<option value="">All</option>
<option v-for="lbl in knownLabels" :key="lbl" :value="lbl">{{ lbl }}</option>
<optgroup label="Anomaly labels">
<option v-for="lbl in anomalyLabels" :key="lbl" :value="lbl">{{ lbl }}</option>
</optgroup>
<optgroup label="Cybersec labels">
<option v-for="lbl in cybersecLabels" :key="lbl" :value="lbl">{{ lbl }}</option>
</optgroup>
</select>
</div>
</div>
@ -288,6 +322,7 @@ interface ScorerStatus {
const detections = ref<Detection[]>([])
const scorerStatus = ref<ScorerStatus | null>(null)
const cybersecStatus = ref<Record<string, unknown> | null>(null)
const loading = ref(true)
const triggerLoading = ref(false)
const ackLoading = ref(false)
@ -296,14 +331,23 @@ const ackNotes = ref('')
const drawer = ref<Detection | null>(null)
const activeTab = ref<'all' | 'unacked'>('all')
const labelFilter = ref('')
const scorerFilter = ref('')
const tabRefs = ref<(HTMLElement | null)[]>([])
const knownLabels = [
const anomalyLabels = [
'SECURITY_ANOMALY', 'SYSTEM_FAILURE', 'PERFORMANCE_ISSUE',
'NETWORK_ANOMALY', 'CONFIG_ERROR', 'HARDWARE_ISSUE',
'CRITICAL', 'ERROR',
]
const cybersecLabels = [
'authentication failure or brute force attack',
'privilege escalation or unauthorized access',
'network intrusion or port scan',
'malware or suspicious process activity',
'data exfiltration or unusual outbound traffic',
]
// Tabs
const unackedCount = computed(() => detections.value.filter(d => !d.acknowledged).length)
@ -325,6 +369,7 @@ async function loadDetections() {
loading.value = true
const params = new URLSearchParams({ limit: '200' })
if (labelFilter.value) params.set('label', labelFilter.value)
if (scorerFilter.value) params.set('scorer', scorerFilter.value)
try {
const res = await fetch(`${BASE}/turnstone/api/anomaly/detections?${params}`)
if (!res.ok) throw new Error(`HTTP ${res.status}`)
@ -342,10 +387,18 @@ async function loadDetections() {
async function loadScorerStatus() {
try {
const res = await fetch(`${BASE}/turnstone/api/anomaly/status`)
if (!res.ok) return
const data = await res.json()
const [anomalyRes, cybersecRes] = await Promise.all([
fetch(`${BASE}/turnstone/api/anomaly/status`),
fetch(`${BASE}/turnstone/api/cybersec/status`),
])
if (anomalyRes.ok) {
const data = await anomalyRes.json()
scorerStatus.value = { ...data.state, ...data.config }
}
if (cybersecRes.ok) {
const data = await cybersecRes.json()
cybersecStatus.value = data
}
} catch {
// scorer status is non-critical fail silently
}