feat: cybersec zero-shot scoring pipeline (#9)
Second-pass cybersec classifier using DeBERTa-v3-base-mnli (already cached — no download required). Runs after each anomaly scoring pass on entries flagged by the anomaly scorer or with pattern matches. Architecture: - app/services/cybersec.py: zero-shot-classification pipeline with 5 cybersec candidate labels (auth failure, privilege escalation, network intrusion, malware, data exfiltration). Writes ml_score/ml_label/ ml_scored_at to log_entries; inserts high-confidence hits into detections with scorer='cybersec'. - app/tasks/cybersec_scorer.py: async background task (same shape as anomaly_scorer.py). - REST: GET/POST /turnstone/api/cybersec/status|run|detections. GET /turnstone/api/anomaly/detections now accepts scorer= filter. Schema: ml_score, ml_label, ml_scored_at added to log_entries; scorer column added to detections (idempotent migrations + DDL for both SQLite and Postgres). UI: Security Alerts view gains Source dropdown (All / Anomaly / Cybersec) and cybersec scorer status badge. Label dropdown split into optgroups. Deployment: TURNSTONE_CYBERSEC_MODEL/DEVICE/THRESHOLD vars added to .env.example, docker-compose.yml, docker-standalone.sh. Tests: 10 new tests — no model, no eligible entries, scoring, detection creation, normal label suppression, threshold filtering, pattern-tag filtering, idempotency, list filtering, scorer column filter. 416/416 passing. Closes: #9
This commit is contained in:
parent
6e228fe0bf
commit
cffe6bcd31
11 changed files with 730 additions and 11 deletions
|
|
@ -42,6 +42,15 @@
|
||||||
# TURNSTONE_EMBED_MODEL=BAAI/bge-small-en-v1.5
|
# TURNSTONE_EMBED_MODEL=BAAI/bge-small-en-v1.5
|
||||||
# TURNSTONE_EMBED_DEVICE=cpu
|
# TURNSTONE_EMBED_DEVICE=cpu
|
||||||
|
|
||||||
|
# --- Cybersec scoring pipeline (zero-shot, second-pass on flagged entries) ---
|
||||||
|
# Runs a zero-shot classifier on entries already flagged by the anomaly scorer
|
||||||
|
# or that have pattern matches — a focused second opinion using cybersec vocabulary.
|
||||||
|
# The DeBERTa-v3-base-mnli model (required by the diagnose pipeline) is the recommended
|
||||||
|
# zero-shot classifier — it produces human-readable cybersec labels with no fine-tuning.
|
||||||
|
# TURNSTONE_CYBERSEC_MODEL=MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli
|
||||||
|
# TURNSTONE_CYBERSEC_DEVICE=cpu
|
||||||
|
# TURNSTONE_CYBERSEC_THRESHOLD=0.60 # lower than anomaly threshold (zero-shot is calibrated differently)
|
||||||
|
|
||||||
# --- Anomaly scoring pipeline (IDS / watchdog) ---
|
# --- Anomaly scoring pipeline (IDS / watchdog) ---
|
||||||
# Batch-scores every ingested log entry after each glean cycle.
|
# Batch-scores every ingested log entry after each glean cycle.
|
||||||
# Any HuggingFace text-classification model works; the byviz classifier (already
|
# Any HuggingFace text-classification model works; the byviz classifier (already
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,9 @@ CREATE TABLE IF NOT EXISTS log_entries (
|
||||||
anomaly_score REAL,
|
anomaly_score REAL,
|
||||||
anomaly_label TEXT,
|
anomaly_label TEXT,
|
||||||
anomaly_scored_at TEXT,
|
anomaly_scored_at TEXT,
|
||||||
|
ml_score REAL,
|
||||||
|
ml_label TEXT,
|
||||||
|
ml_scored_at TEXT,
|
||||||
PRIMARY KEY (tenant_id, id)
|
PRIMARY KEY (tenant_id, id)
|
||||||
);
|
);
|
||||||
CREATE INDEX IF NOT EXISTS idx_source ON log_entries(source_id);
|
CREATE INDEX IF NOT EXISTS idx_source ON log_entries(source_id);
|
||||||
|
|
@ -47,6 +50,7 @@ CREATE INDEX IF NOT EXISTS idx_ts_repeat ON log_entries(timestamp_iso, repeat_
|
||||||
CREATE INDEX IF NOT EXISTS idx_severity ON log_entries(tenant_id, severity);
|
CREATE INDEX IF NOT EXISTS idx_severity ON log_entries(tenant_id, severity);
|
||||||
CREATE INDEX IF NOT EXISTS idx_patterns ON log_entries(matched_patterns);
|
CREATE INDEX IF NOT EXISTS idx_patterns ON log_entries(matched_patterns);
|
||||||
CREATE INDEX IF NOT EXISTS idx_anomaly ON log_entries(tenant_id, anomaly_score);
|
CREATE INDEX IF NOT EXISTS idx_anomaly ON log_entries(tenant_id, anomaly_score);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_ml_scored ON log_entries(tenant_id, ml_scored_at);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS detections (
|
CREATE TABLE IF NOT EXISTS detections (
|
||||||
id TEXT PRIMARY KEY,
|
id TEXT PRIMARY KEY,
|
||||||
|
|
@ -61,12 +65,14 @@ CREATE TABLE IF NOT EXISTS detections (
|
||||||
detected_at TEXT NOT NULL,
|
detected_at TEXT NOT NULL,
|
||||||
acknowledged INTEGER NOT NULL DEFAULT 0,
|
acknowledged INTEGER NOT NULL DEFAULT 0,
|
||||||
acknowledged_at TEXT,
|
acknowledged_at TEXT,
|
||||||
notes TEXT NOT NULL DEFAULT ''
|
notes TEXT NOT NULL DEFAULT '',
|
||||||
|
scorer TEXT NOT NULL DEFAULT 'anomaly'
|
||||||
);
|
);
|
||||||
CREATE INDEX IF NOT EXISTS idx_detections_tenant ON detections(tenant_id, detected_at);
|
CREATE INDEX IF NOT EXISTS idx_detections_tenant ON detections(tenant_id, detected_at);
|
||||||
CREATE INDEX IF NOT EXISTS idx_detections_ack ON detections(acknowledged);
|
CREATE INDEX IF NOT EXISTS idx_detections_ack ON detections(acknowledged);
|
||||||
CREATE INDEX IF NOT EXISTS idx_detections_label ON detections(anomaly_label);
|
CREATE INDEX IF NOT EXISTS idx_detections_label ON detections(anomaly_label);
|
||||||
CREATE INDEX IF NOT EXISTS idx_detections_entry ON detections(entry_id);
|
CREATE INDEX IF NOT EXISTS idx_detections_entry ON detections(entry_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_detections_scorer ON detections(scorer);
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS glean_fingerprints (
|
CREATE TABLE IF NOT EXISTS glean_fingerprints (
|
||||||
tenant_id TEXT NOT NULL DEFAULT '',
|
tenant_id TEXT NOT NULL DEFAULT '',
|
||||||
|
|
@ -201,6 +207,9 @@ _MAIN_SCHEMA_PG_STMTS = [
|
||||||
anomaly_score DOUBLE PRECISION,
|
anomaly_score DOUBLE PRECISION,
|
||||||
anomaly_label TEXT,
|
anomaly_label TEXT,
|
||||||
anomaly_scored_at TEXT,
|
anomaly_scored_at TEXT,
|
||||||
|
ml_score DOUBLE PRECISION,
|
||||||
|
ml_label TEXT,
|
||||||
|
ml_scored_at TEXT,
|
||||||
PRIMARY KEY (tenant_id, id)
|
PRIMARY KEY (tenant_id, id)
|
||||||
)
|
)
|
||||||
""",
|
""",
|
||||||
|
|
@ -210,6 +219,7 @@ _MAIN_SCHEMA_PG_STMTS = [
|
||||||
"CREATE INDEX IF NOT EXISTS idx_patterns ON log_entries(matched_patterns)",
|
"CREATE INDEX IF NOT EXISTS idx_patterns ON log_entries(matched_patterns)",
|
||||||
"CREATE INDEX IF NOT EXISTS idx_fts_gin ON log_entries USING GIN(text_tsv)",
|
"CREATE INDEX IF NOT EXISTS idx_fts_gin ON log_entries USING GIN(text_tsv)",
|
||||||
"CREATE INDEX IF NOT EXISTS idx_anomaly ON log_entries(tenant_id, anomaly_score)",
|
"CREATE INDEX IF NOT EXISTS idx_anomaly ON log_entries(tenant_id, anomaly_score)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_ml_scored ON log_entries(tenant_id, ml_scored_at)",
|
||||||
"""
|
"""
|
||||||
CREATE TABLE IF NOT EXISTS detections (
|
CREATE TABLE IF NOT EXISTS detections (
|
||||||
id TEXT PRIMARY KEY,
|
id TEXT PRIMARY KEY,
|
||||||
|
|
@ -224,13 +234,15 @@ _MAIN_SCHEMA_PG_STMTS = [
|
||||||
detected_at TEXT NOT NULL,
|
detected_at TEXT NOT NULL,
|
||||||
acknowledged INTEGER NOT NULL DEFAULT 0,
|
acknowledged INTEGER NOT NULL DEFAULT 0,
|
||||||
acknowledged_at TEXT,
|
acknowledged_at TEXT,
|
||||||
notes TEXT NOT NULL DEFAULT ''
|
notes TEXT NOT NULL DEFAULT '',
|
||||||
|
scorer TEXT NOT NULL DEFAULT 'anomaly'
|
||||||
)
|
)
|
||||||
""",
|
""",
|
||||||
"CREATE INDEX IF NOT EXISTS idx_detections_tenant ON detections(tenant_id, detected_at)",
|
"CREATE INDEX IF NOT EXISTS idx_detections_tenant ON detections(tenant_id, detected_at)",
|
||||||
"CREATE INDEX IF NOT EXISTS idx_detections_ack ON detections(acknowledged)",
|
"CREATE INDEX IF NOT EXISTS idx_detections_ack ON detections(acknowledged)",
|
||||||
"CREATE INDEX IF NOT EXISTS idx_detections_label ON detections(anomaly_label)",
|
"CREATE INDEX IF NOT EXISTS idx_detections_label ON detections(anomaly_label)",
|
||||||
"CREATE INDEX IF NOT EXISTS idx_detections_entry ON detections(entry_id)",
|
"CREATE INDEX IF NOT EXISTS idx_detections_entry ON detections(entry_id)",
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_detections_scorer ON detections(scorer)",
|
||||||
"""
|
"""
|
||||||
CREATE OR REPLACE FUNCTION _ts_update_text_tsv() RETURNS trigger AS $$
|
CREATE OR REPLACE FUNCTION _ts_update_text_tsv() RETURNS trigger AS $$
|
||||||
BEGIN
|
BEGIN
|
||||||
|
|
@ -388,6 +400,10 @@ _MAIN_MIGRATIONS_SQLITE = [
|
||||||
"ALTER TABLE log_entries ADD COLUMN anomaly_score REAL",
|
"ALTER TABLE log_entries ADD COLUMN anomaly_score REAL",
|
||||||
"ALTER TABLE log_entries ADD COLUMN anomaly_label TEXT",
|
"ALTER TABLE log_entries ADD COLUMN anomaly_label TEXT",
|
||||||
"ALTER TABLE log_entries ADD COLUMN anomaly_scored_at TEXT",
|
"ALTER TABLE log_entries ADD COLUMN anomaly_scored_at TEXT",
|
||||||
|
"ALTER TABLE log_entries ADD COLUMN ml_score REAL",
|
||||||
|
"ALTER TABLE log_entries ADD COLUMN ml_label TEXT",
|
||||||
|
"ALTER TABLE log_entries ADD COLUMN ml_scored_at TEXT",
|
||||||
|
"ALTER TABLE detections ADD COLUMN scorer TEXT NOT NULL DEFAULT 'anomaly'",
|
||||||
]
|
]
|
||||||
|
|
||||||
_CONTEXT_MIGRATIONS_SQLITE = [
|
_CONTEXT_MIGRATIONS_SQLITE = [
|
||||||
|
|
|
||||||
61
app/rest.py
61
app/rest.py
|
|
@ -89,7 +89,9 @@ from app.context.wizard import get_schema as _wizard_schema, advance_step, is_co
|
||||||
from app.context.chunker import UnsupportedDocType, FileTooLarge
|
from app.context.chunker import UnsupportedDocType, FileTooLarge
|
||||||
from app.tasks.glean_scheduler import get_state as _glean_state, run_once as _run_glean, scheduler_loop as _scheduler_loop, submit_matched as _submit_matched
|
from app.tasks.glean_scheduler import get_state as _glean_state, run_once as _run_glean, scheduler_loop as _scheduler_loop, submit_matched as _submit_matched
|
||||||
from app.tasks.anomaly_scorer import get_state as _scorer_state, run_once as _run_scorer
|
from app.tasks.anomaly_scorer import get_state as _scorer_state, run_once as _run_scorer
|
||||||
|
from app.tasks.cybersec_scorer import get_state as _cybersec_state, run_once as _run_cybersec
|
||||||
from app.services.anomaly import list_detections as _list_detections, acknowledge_detection as _ack_detection
|
from app.services.anomaly import list_detections as _list_detections, acknowledge_detection as _ack_detection
|
||||||
|
from app.services.cybersec import list_cybersec_detections as _list_cybersec, CYBERSEC_LABELS
|
||||||
from app.glean.mqtt_subscriber import run_mqtt_subscribers as _run_mqtt_subscribers
|
from app.glean.mqtt_subscriber import run_mqtt_subscribers as _run_mqtt_subscribers
|
||||||
|
|
||||||
DB_PATH = Path(os.environ.get("TURNSTONE_DB", Path(__file__).parent.parent / "data" / "turnstone.db"))
|
DB_PATH = Path(os.environ.get("TURNSTONE_DB", Path(__file__).parent.parent / "data" / "turnstone.db"))
|
||||||
|
|
@ -114,6 +116,9 @@ SUBMIT_ENDPOINT = os.environ.get("TURNSTONE_SUBMIT_ENDPOINT", "").rstrip("/")
|
||||||
ANOMALY_MODEL = os.environ.get("TURNSTONE_ANOMALY_MODEL", "")
|
ANOMALY_MODEL = os.environ.get("TURNSTONE_ANOMALY_MODEL", "")
|
||||||
ANOMALY_DEVICE = os.environ.get("TURNSTONE_ANOMALY_DEVICE", "cpu")
|
ANOMALY_DEVICE = os.environ.get("TURNSTONE_ANOMALY_DEVICE", "cpu")
|
||||||
ANOMALY_THRESHOLD = float(os.environ.get("TURNSTONE_ANOMALY_THRESHOLD", "0.75"))
|
ANOMALY_THRESHOLD = float(os.environ.get("TURNSTONE_ANOMALY_THRESHOLD", "0.75"))
|
||||||
|
CYBERSEC_MODEL = os.environ.get("TURNSTONE_CYBERSEC_MODEL", "")
|
||||||
|
CYBERSEC_DEVICE = os.environ.get("TURNSTONE_CYBERSEC_DEVICE", "cpu")
|
||||||
|
CYBERSEC_THRESHOLD = float(os.environ.get("TURNSTONE_CYBERSEC_THRESHOLD", "0.60"))
|
||||||
# When set, all /api/ routes require Authorization: Bearer <key>.
|
# When set, all /api/ routes require Authorization: Bearer <key>.
|
||||||
# Unset (default) means no authentication — suitable for local-only deployments.
|
# Unset (default) means no authentication — suitable for local-only deployments.
|
||||||
_API_KEY: str | None = os.environ.get("TURNSTONE_API_KEY") or None
|
_API_KEY: str | None = os.environ.get("TURNSTONE_API_KEY") or None
|
||||||
|
|
@ -173,6 +178,9 @@ async def _lifespan(app: FastAPI):
|
||||||
anomaly_model=ANOMALY_MODEL,
|
anomaly_model=ANOMALY_MODEL,
|
||||||
anomaly_device=ANOMALY_DEVICE,
|
anomaly_device=ANOMALY_DEVICE,
|
||||||
anomaly_threshold=ANOMALY_THRESHOLD,
|
anomaly_threshold=ANOMALY_THRESHOLD,
|
||||||
|
cybersec_model=CYBERSEC_MODEL,
|
||||||
|
cybersec_device=CYBERSEC_DEVICE,
|
||||||
|
cybersec_threshold=CYBERSEC_THRESHOLD,
|
||||||
),
|
),
|
||||||
name="glean-scheduler",
|
name="glean-scheduler",
|
||||||
)
|
)
|
||||||
|
|
@ -1362,11 +1370,12 @@ async def anomaly_detections(
|
||||||
limit: int = Query(100, ge=1, le=1000),
|
limit: int = Query(100, ge=1, le=1000),
|
||||||
unacked_only: bool = Query(False),
|
unacked_only: bool = Query(False),
|
||||||
label: str | None = Query(None),
|
label: str | None = Query(None),
|
||||||
|
scorer: str | None = Query(None),
|
||||||
):
|
):
|
||||||
"""List anomaly detections ordered by detected_at DESC."""
|
"""List detections ordered by detected_at DESC. Optionally filter by scorer ('anomaly'|'cybersec')."""
|
||||||
loop = asyncio.get_running_loop()
|
loop = asyncio.get_running_loop()
|
||||||
rows = await loop.run_in_executor(
|
rows = await loop.run_in_executor(
|
||||||
None, lambda: _list_detections(DB_PATH, limit=limit, unacked_only=unacked_only, label=label)
|
None, lambda: _list_detections(DB_PATH, limit=limit, unacked_only=unacked_only, label=label, scorer=scorer)
|
||||||
)
|
)
|
||||||
return {"detections": rows, "total": len(rows)}
|
return {"detections": rows, "total": len(rows)}
|
||||||
|
|
||||||
|
|
@ -1386,6 +1395,54 @@ async def acknowledge_detection(detection_id: str, notes: str = ""):
|
||||||
app.include_router(_anomaly)
|
app.include_router(_anomaly)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Cybersec scoring endpoints
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_cybersec_router = APIRouter(prefix="/turnstone/api/cybersec", dependencies=[Depends(_check_api_key)])
|
||||||
|
|
||||||
|
|
||||||
|
@_cybersec_router.get("/status")
|
||||||
|
async def cybersec_status():
|
||||||
|
"""Return cybersec scorer state and configuration."""
|
||||||
|
return {
|
||||||
|
"model": CYBERSEC_MODEL or None,
|
||||||
|
"threshold": CYBERSEC_THRESHOLD,
|
||||||
|
"device": CYBERSEC_DEVICE,
|
||||||
|
"enabled": bool(CYBERSEC_MODEL),
|
||||||
|
"candidate_labels": CYBERSEC_LABELS,
|
||||||
|
**_cybersec_state(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@_cybersec_router.post("/run")
|
||||||
|
async def cybersec_run(background_tasks: BackgroundTasks):
|
||||||
|
"""Trigger a manual cybersec scoring pass (runs in background)."""
|
||||||
|
if not CYBERSEC_MODEL:
|
||||||
|
raise HTTPException(status_code=400, detail="TURNSTONE_CYBERSEC_MODEL not configured")
|
||||||
|
background_tasks.add_task(
|
||||||
|
_run_cybersec, DB_PATH, CYBERSEC_MODEL, CYBERSEC_DEVICE, 32, CYBERSEC_THRESHOLD
|
||||||
|
)
|
||||||
|
return {"ok": True, "message": "cybersec scorer triggered"}
|
||||||
|
|
||||||
|
|
||||||
|
@_cybersec_router.get("/detections")
|
||||||
|
async def cybersec_detections(
|
||||||
|
limit: int = Query(100, ge=1, le=1000),
|
||||||
|
unacked_only: bool = Query(False),
|
||||||
|
label: str | None = Query(None),
|
||||||
|
):
|
||||||
|
"""List cybersec detections ordered by detected_at DESC."""
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
rows = await loop.run_in_executor(
|
||||||
|
None, lambda: _list_cybersec(DB_PATH, limit=limit, unacked_only=unacked_only, label=label)
|
||||||
|
)
|
||||||
|
return {"detections": rows, "total": len(rows)}
|
||||||
|
|
||||||
|
|
||||||
|
app.include_router(_cybersec_router)
|
||||||
|
|
||||||
|
|
||||||
# Root redirect → /turnstone/
|
# Root redirect → /turnstone/
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
def root_redirect() -> RedirectResponse:
|
def root_redirect() -> RedirectResponse:
|
||||||
|
|
|
||||||
|
|
@ -253,6 +253,7 @@ def list_detections(
|
||||||
limit: int = 100,
|
limit: int = 100,
|
||||||
unacked_only: bool = False,
|
unacked_only: bool = False,
|
||||||
label: str | None = None,
|
label: str | None = None,
|
||||||
|
scorer: str | None = None,
|
||||||
) -> list[dict]:
|
) -> list[dict]:
|
||||||
"""Return detections ordered by detected_at DESC."""
|
"""Return detections ordered by detected_at DESC."""
|
||||||
tenant_id = resolve_tenant_id()
|
tenant_id = resolve_tenant_id()
|
||||||
|
|
@ -264,6 +265,9 @@ def list_detections(
|
||||||
if label:
|
if label:
|
||||||
conditions.append(q("anomaly_label = ?"))
|
conditions.append(q("anomaly_label = ?"))
|
||||||
params.append(label.upper())
|
params.append(label.upper())
|
||||||
|
if scorer:
|
||||||
|
conditions.append(q("scorer = ?"))
|
||||||
|
params.append(scorer.lower())
|
||||||
|
|
||||||
where = " AND ".join(conditions)
|
where = " AND ".join(conditions)
|
||||||
with get_conn(db_path) as conn:
|
with get_conn(db_path) as conn:
|
||||||
|
|
|
||||||
241
app/services/cybersec.py
Normal file
241
app/services/cybersec.py
Normal file
|
|
@ -0,0 +1,241 @@
|
||||||
|
"""Cybersecurity-focused scoring pipeline using zero-shot classification.
|
||||||
|
|
||||||
|
Runs a second-pass analysis on entries that were already flagged by the
|
||||||
|
anomaly scorer or that have pattern matches. Uses a zero-shot classification
|
||||||
|
model (DeBERTa-v3-base-mnli is cached locally) so no fine-tuning is needed.
|
||||||
|
|
||||||
|
The scorer writes ml_score / ml_label / ml_scored_at to log_entries and
|
||||||
|
inserts high-confidence non-normal hits into the detections table tagged
|
||||||
|
with scorer='cybersec'.
|
||||||
|
|
||||||
|
Env vars
|
||||||
|
--------
|
||||||
|
TURNSTONE_CYBERSEC_MODEL — HF model id for zero-shot classification.
|
||||||
|
Recommended: MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli
|
||||||
|
(already cached from the diagnose pipeline).
|
||||||
|
Set to empty string to disable (safe default).
|
||||||
|
TURNSTONE_CYBERSEC_DEVICE — 'cpu' (default) or 'cuda'
|
||||||
|
TURNSTONE_CYBERSEC_THRESHOLD — float confidence floor for detection insertion (default 0.60)
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import uuid
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from app.db import get_conn, resolve_tenant_id
|
||||||
|
from app.db.dialect import q
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Candidate labels — cybersec vocabulary for zero-shot inference
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
CYBERSEC_LABELS: list[str] = [
|
||||||
|
"authentication failure or brute force attack",
|
||||||
|
"privilege escalation or unauthorized access",
|
||||||
|
"network intrusion or port scan",
|
||||||
|
"malware or suspicious process activity",
|
||||||
|
"data exfiltration or unusual outbound traffic",
|
||||||
|
"normal system operation",
|
||||||
|
]
|
||||||
|
|
||||||
|
_NORMAL_LABEL = "normal system operation"
|
||||||
|
|
||||||
|
_LABEL_SEVERITY: dict[str, str] = {
|
||||||
|
"authentication failure or brute force attack": "ERROR",
|
||||||
|
"privilege escalation or unauthorized access": "CRITICAL",
|
||||||
|
"network intrusion or port scan": "ERROR",
|
||||||
|
"malware or suspicious process activity": "CRITICAL",
|
||||||
|
"data exfiltration or unusual outbound traffic":"CRITICAL",
|
||||||
|
"normal system operation": "INFO",
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Pipeline singleton
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_pipeline: Any = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_pipeline(model_id: str, device: str) -> Any:
|
||||||
|
global _pipeline # noqa: PLW0603
|
||||||
|
if _pipeline is None:
|
||||||
|
from transformers import pipeline # type: ignore[import-untyped]
|
||||||
|
logger.info("loading cybersec zero-shot pipeline: %s on %s", model_id, device)
|
||||||
|
_pipeline = pipeline(
|
||||||
|
"zero-shot-classification",
|
||||||
|
model=model_id,
|
||||||
|
device=0 if device == "cuda" else -1,
|
||||||
|
)
|
||||||
|
logger.info("cybersec pipeline ready")
|
||||||
|
return _pipeline
|
||||||
|
|
||||||
|
|
||||||
|
def reset_pipeline() -> None:
|
||||||
|
"""Clear the cached pipeline — for testing only."""
|
||||||
|
global _pipeline # noqa: PLW0603
|
||||||
|
_pipeline = None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Result type
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CybersecResult:
|
||||||
|
scored: int = 0
|
||||||
|
detections: int = 0
|
||||||
|
skipped: bool = False
|
||||||
|
error: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Core scoring function
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def score_security_entries(
|
||||||
|
db_path: Path,
|
||||||
|
model_id: str,
|
||||||
|
device: str = "cpu",
|
||||||
|
batch_size: int = 32,
|
||||||
|
threshold: float = 0.60,
|
||||||
|
) -> CybersecResult:
|
||||||
|
"""Score entries that were anomaly-flagged or pattern-matched.
|
||||||
|
|
||||||
|
Only entries with ml_scored_at IS NULL are processed (idempotent).
|
||||||
|
Writes ml_score / ml_label / ml_scored_at and inserts high-confidence
|
||||||
|
hits into detections with scorer='cybersec'.
|
||||||
|
"""
|
||||||
|
if not model_id:
|
||||||
|
return CybersecResult(skipped=True)
|
||||||
|
|
||||||
|
tenant_id = resolve_tenant_id()
|
||||||
|
try:
|
||||||
|
pipe = _get_pipeline(model_id, device)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("failed to load cybersec pipeline: %s", exc)
|
||||||
|
return CybersecResult(error=str(exc))
|
||||||
|
|
||||||
|
total_scored = 0
|
||||||
|
total_detections = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
with get_conn(db_path) as conn:
|
||||||
|
# Only score entries that are worth a second look:
|
||||||
|
# anomaly-flagged (non-normal) OR have at least one pattern match.
|
||||||
|
rows = conn.execute(
|
||||||
|
q("""
|
||||||
|
SELECT id, source_id, text, timestamp_iso
|
||||||
|
FROM log_entries
|
||||||
|
WHERE (tenant_id = ? OR tenant_id = '')
|
||||||
|
AND ml_scored_at IS NULL
|
||||||
|
AND (
|
||||||
|
(anomaly_label IS NOT NULL AND anomaly_label != 'NORMAL')
|
||||||
|
OR (matched_patterns IS NOT NULL AND matched_patterns != '[]' AND matched_patterns != '')
|
||||||
|
)
|
||||||
|
LIMIT ?
|
||||||
|
"""),
|
||||||
|
(tenant_id, batch_size * 10),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return CybersecResult(skipped=True)
|
||||||
|
|
||||||
|
# Process in chunks to avoid OOM on large backlogs
|
||||||
|
for i in range(0, len(rows), batch_size):
|
||||||
|
chunk = rows[i : i + batch_size]
|
||||||
|
texts = [r["text"] for r in chunk]
|
||||||
|
|
||||||
|
try:
|
||||||
|
results = pipe(texts, candidate_labels=CYBERSEC_LABELS, multi_label=False)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("zero-shot inference error on chunk %d: %s", i, exc)
|
||||||
|
continue
|
||||||
|
|
||||||
|
now = datetime.now(tz=timezone.utc).isoformat()
|
||||||
|
|
||||||
|
with get_conn(db_path) as conn:
|
||||||
|
for row, result in zip(chunk, results):
|
||||||
|
top_label: str = result["labels"][0]
|
||||||
|
top_score: float = result["scores"][0]
|
||||||
|
|
||||||
|
conn.execute(
|
||||||
|
q("""
|
||||||
|
UPDATE log_entries
|
||||||
|
SET ml_score = ?, ml_label = ?, ml_scored_at = ?
|
||||||
|
WHERE id = ? AND (tenant_id = ? OR tenant_id = '')
|
||||||
|
"""),
|
||||||
|
(top_score, top_label, now, row["id"], tenant_id),
|
||||||
|
)
|
||||||
|
total_scored += 1
|
||||||
|
|
||||||
|
if top_score >= threshold and top_label != _NORMAL_LABEL:
|
||||||
|
severity = _LABEL_SEVERITY.get(top_label, "WARN")
|
||||||
|
try:
|
||||||
|
conn.execute(
|
||||||
|
q("""
|
||||||
|
INSERT INTO detections
|
||||||
|
(id, tenant_id, entry_id, source_id, anomaly_label,
|
||||||
|
anomaly_score, severity, text, timestamp_iso,
|
||||||
|
detected_at, scorer)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'cybersec')
|
||||||
|
"""),
|
||||||
|
(
|
||||||
|
str(uuid.uuid4()),
|
||||||
|
tenant_id,
|
||||||
|
row["id"],
|
||||||
|
row["source_id"],
|
||||||
|
top_label,
|
||||||
|
top_score,
|
||||||
|
severity,
|
||||||
|
row["text"],
|
||||||
|
row["timestamp_iso"],
|
||||||
|
now,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
total_detections += 1
|
||||||
|
except Exception:
|
||||||
|
pass # entry may already have a detection — skip
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("cybersec scoring failed: %s", exc)
|
||||||
|
return CybersecResult(scored=total_scored, detections=total_detections, error=str(exc))
|
||||||
|
|
||||||
|
return CybersecResult(scored=total_scored, detections=total_detections)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Query helpers (used by REST layer)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def list_cybersec_detections(
|
||||||
|
db_path: Path,
|
||||||
|
limit: int = 100,
|
||||||
|
unacked_only: bool = False,
|
||||||
|
label: str | None = None,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Return cybersec detections ordered by detected_at DESC."""
|
||||||
|
tenant_id = resolve_tenant_id()
|
||||||
|
conditions = ["(tenant_id = ? OR tenant_id = '')", "scorer = 'cybersec'"]
|
||||||
|
params: list[Any] = [tenant_id]
|
||||||
|
|
||||||
|
if unacked_only:
|
||||||
|
conditions.append("acknowledged = 0")
|
||||||
|
if label:
|
||||||
|
conditions.append(q("anomaly_label = ?"))
|
||||||
|
params.append(label)
|
||||||
|
|
||||||
|
where = " AND ".join(conditions)
|
||||||
|
with get_conn(db_path) as conn:
|
||||||
|
rows = conn.execute(
|
||||||
|
q(f"SELECT * FROM detections WHERE {where} ORDER BY detected_at DESC LIMIT ?"), # noqa: S608
|
||||||
|
(*params, limit),
|
||||||
|
).fetchall()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
84
app/tasks/cybersec_scorer.py
Normal file
84
app/tasks/cybersec_scorer.py
Normal file
|
|
@ -0,0 +1,84 @@
|
||||||
|
"""Background task wrapper for the cybersec zero-shot scoring pipeline."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from app.services.cybersec import score_security_entries
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_lock = asyncio.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CybersecState:
|
||||||
|
last_run_at: str | None = None
|
||||||
|
last_duration_s: float | None = None
|
||||||
|
last_scored: int = 0
|
||||||
|
last_detections: int = 0
|
||||||
|
last_error: str | None = None
|
||||||
|
run_count: int = 0
|
||||||
|
running: bool = False
|
||||||
|
total_scored: int = 0
|
||||||
|
total_detections: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
_state = CybersecState()
|
||||||
|
|
||||||
|
|
||||||
|
def get_state() -> dict:
|
||||||
|
return {
|
||||||
|
"last_run_at": _state.last_run_at,
|
||||||
|
"last_duration_s":_state.last_duration_s,
|
||||||
|
"last_scored": _state.last_scored,
|
||||||
|
"last_detections":_state.last_detections,
|
||||||
|
"last_error": _state.last_error,
|
||||||
|
"run_count": _state.run_count,
|
||||||
|
"running": _state.running,
|
||||||
|
"total_scored": _state.total_scored,
|
||||||
|
"total_detections": _state.total_detections,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def run_once(
|
||||||
|
db_path: Path,
|
||||||
|
model_id: str,
|
||||||
|
device: str = "cpu",
|
||||||
|
batch_size: int = 32,
|
||||||
|
threshold: float = 0.60,
|
||||||
|
) -> None:
|
||||||
|
"""Single cybersec scoring pass — no-op if already running or no model set."""
|
||||||
|
if not model_id or _lock.locked():
|
||||||
|
return
|
||||||
|
|
||||||
|
async with _lock:
|
||||||
|
_state.running = True
|
||||||
|
started = datetime.now(tz=timezone.utc)
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
result = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
lambda: score_security_entries(db_path, model_id, device, batch_size, threshold),
|
||||||
|
)
|
||||||
|
elapsed = (datetime.now(tz=timezone.utc) - started).total_seconds()
|
||||||
|
_state.last_run_at = started.isoformat()
|
||||||
|
_state.last_duration_s = elapsed
|
||||||
|
_state.last_scored = result.scored
|
||||||
|
_state.last_detections = result.detections
|
||||||
|
_state.last_error = result.error
|
||||||
|
_state.run_count += 1
|
||||||
|
_state.total_scored += result.scored
|
||||||
|
_state.total_detections += result.detections
|
||||||
|
if result.error:
|
||||||
|
logger.error("cybersec scorer error: %s", result.error)
|
||||||
|
elif not result.skipped:
|
||||||
|
logger.info(
|
||||||
|
"cybersec scorer: scored=%d detections=%d in %.1fs",
|
||||||
|
result.scored, result.detections, elapsed,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
_state.running = False
|
||||||
|
|
@ -21,6 +21,7 @@ import httpx
|
||||||
|
|
||||||
from app.glean.pipeline import glean_sources
|
from app.glean.pipeline import glean_sources
|
||||||
from app.tasks.anomaly_scorer import run_once as _run_scorer
|
from app.tasks.anomaly_scorer import run_once as _run_scorer
|
||||||
|
from app.tasks.cybersec_scorer import run_once as _run_cybersec
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -127,6 +128,9 @@ async def run_once(
|
||||||
anomaly_model: str = "",
|
anomaly_model: str = "",
|
||||||
anomaly_device: str = "cpu",
|
anomaly_device: str = "cpu",
|
||||||
anomaly_threshold: float = 0.75,
|
anomaly_threshold: float = 0.75,
|
||||||
|
cybersec_model: str = "",
|
||||||
|
cybersec_device: str = "cpu",
|
||||||
|
cybersec_threshold: float = 0.60,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Ingest all sources once, then submit matched entries if configured.
|
"""Ingest all sources once, then submit matched entries if configured.
|
||||||
|
|
||||||
|
|
@ -170,6 +174,9 @@ async def run_once(
|
||||||
if anomaly_model:
|
if anomaly_model:
|
||||||
await _run_scorer(db_path, anomaly_model, anomaly_device, threshold=anomaly_threshold)
|
await _run_scorer(db_path, anomaly_model, anomaly_device, threshold=anomaly_threshold)
|
||||||
|
|
||||||
|
if cybersec_model:
|
||||||
|
await _run_cybersec(db_path, cybersec_model, cybersec_device, threshold=cybersec_threshold)
|
||||||
|
|
||||||
return {"ok": True, "stats": _state.last_stats, "duration_s": _state.last_duration_s}
|
return {"ok": True, "stats": _state.last_stats, "duration_s": _state.last_duration_s}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -183,19 +190,27 @@ async def scheduler_loop(
|
||||||
anomaly_model: str = "",
|
anomaly_model: str = "",
|
||||||
anomaly_device: str = "cpu",
|
anomaly_device: str = "cpu",
|
||||||
anomaly_threshold: float = 0.75,
|
anomaly_threshold: float = 0.75,
|
||||||
|
cybersec_model: str = "",
|
||||||
|
cybersec_device: str = "cpu",
|
||||||
|
cybersec_threshold: float = 0.60,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Run glean + optional submission + optional anomaly scoring every interval_s seconds."""
|
"""Run glean + optional submission + optional anomaly/cybersec scoring every interval_s seconds."""
|
||||||
logger.info("Ingest scheduler started — interval %ds, sources: %s", interval_s, sources_file)
|
logger.info("Ingest scheduler started — interval %ds, sources: %s", interval_s, sources_file)
|
||||||
if submit_endpoint:
|
if submit_endpoint:
|
||||||
logger.info("Submission enabled — endpoint: %s", submit_endpoint)
|
logger.info("Submission enabled — endpoint: %s", submit_endpoint)
|
||||||
if anomaly_model:
|
if anomaly_model:
|
||||||
logger.info("Anomaly scoring enabled — model: %s", anomaly_model)
|
logger.info("Anomaly scoring enabled — model: %s", anomaly_model)
|
||||||
|
if cybersec_model:
|
||||||
|
logger.info("Cybersec scoring enabled — model: %s", cybersec_model)
|
||||||
while True:
|
while True:
|
||||||
await run_once(
|
await run_once(
|
||||||
sources_file, db_path, pattern_file, submit_endpoint, source_host,
|
sources_file, db_path, pattern_file, submit_endpoint, source_host,
|
||||||
anomaly_model=anomaly_model,
|
anomaly_model=anomaly_model,
|
||||||
anomaly_device=anomaly_device,
|
anomaly_device=anomaly_device,
|
||||||
anomaly_threshold=anomaly_threshold,
|
anomaly_threshold=anomaly_threshold,
|
||||||
|
cybersec_model=cybersec_model,
|
||||||
|
cybersec_device=cybersec_device,
|
||||||
|
cybersec_threshold=cybersec_threshold,
|
||||||
)
|
)
|
||||||
next_run = datetime.now(tz=timezone.utc) + timedelta(seconds=interval_s)
|
next_run = datetime.now(tz=timezone.utc) + timedelta(seconds=interval_s)
|
||||||
_state.next_run_at = next_run.isoformat()
|
_state.next_run_at = next_run.isoformat()
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,10 @@ services:
|
||||||
TURNSTONE_EMBED_BACKEND: ${TURNSTONE_EMBED_BACKEND:-}
|
TURNSTONE_EMBED_BACKEND: ${TURNSTONE_EMBED_BACKEND:-}
|
||||||
TURNSTONE_EMBED_MODEL: ${TURNSTONE_EMBED_MODEL:-}
|
TURNSTONE_EMBED_MODEL: ${TURNSTONE_EMBED_MODEL:-}
|
||||||
TURNSTONE_EMBED_DEVICE: ${TURNSTONE_EMBED_DEVICE:-cpu}
|
TURNSTONE_EMBED_DEVICE: ${TURNSTONE_EMBED_DEVICE:-cpu}
|
||||||
|
# --- Cybersec scoring pipeline ---
|
||||||
|
TURNSTONE_CYBERSEC_MODEL: ${TURNSTONE_CYBERSEC_MODEL:-}
|
||||||
|
TURNSTONE_CYBERSEC_DEVICE: ${TURNSTONE_CYBERSEC_DEVICE:-cpu}
|
||||||
|
TURNSTONE_CYBERSEC_THRESHOLD: ${TURNSTONE_CYBERSEC_THRESHOLD:-0.60}
|
||||||
# --- Anomaly scoring pipeline ---
|
# --- Anomaly scoring pipeline ---
|
||||||
TURNSTONE_ANOMALY_MODEL: ${TURNSTONE_ANOMALY_MODEL:-}
|
TURNSTONE_ANOMALY_MODEL: ${TURNSTONE_ANOMALY_MODEL:-}
|
||||||
TURNSTONE_ANOMALY_DEVICE: ${TURNSTONE_ANOMALY_DEVICE:-cpu}
|
TURNSTONE_ANOMALY_DEVICE: ${TURNSTONE_ANOMALY_DEVICE:-cpu}
|
||||||
|
|
|
||||||
|
|
@ -147,6 +147,9 @@ docker run -d \
|
||||||
-e TURNSTONE_EMBED_BACKEND="${TURNSTONE_EMBED_BACKEND:-sentence_transformers}" \
|
-e TURNSTONE_EMBED_BACKEND="${TURNSTONE_EMBED_BACKEND:-sentence_transformers}" \
|
||||||
-e TURNSTONE_EMBED_MODEL="${TURNSTONE_EMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2}" \
|
-e TURNSTONE_EMBED_MODEL="${TURNSTONE_EMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2}" \
|
||||||
-e TURNSTONE_EMBED_DEVICE="${TURNSTONE_EMBED_DEVICE:-cpu}" \
|
-e TURNSTONE_EMBED_DEVICE="${TURNSTONE_EMBED_DEVICE:-cpu}" \
|
||||||
|
-e TURNSTONE_CYBERSEC_MODEL="${TURNSTONE_CYBERSEC_MODEL:-}" \
|
||||||
|
-e TURNSTONE_CYBERSEC_DEVICE="${TURNSTONE_CYBERSEC_DEVICE:-cpu}" \
|
||||||
|
-e TURNSTONE_CYBERSEC_THRESHOLD="${TURNSTONE_CYBERSEC_THRESHOLD:-0.60}" \
|
||||||
-e TURNSTONE_ANOMALY_MODEL="${TURNSTONE_ANOMALY_MODEL:-}" \
|
-e TURNSTONE_ANOMALY_MODEL="${TURNSTONE_ANOMALY_MODEL:-}" \
|
||||||
-e TURNSTONE_ANOMALY_DEVICE="${TURNSTONE_ANOMALY_DEVICE:-cpu}" \
|
-e TURNSTONE_ANOMALY_DEVICE="${TURNSTONE_ANOMALY_DEVICE:-cpu}" \
|
||||||
-e TURNSTONE_ANOMALY_THRESHOLD="${TURNSTONE_ANOMALY_THRESHOLD:-0.75}" \
|
-e TURNSTONE_ANOMALY_THRESHOLD="${TURNSTONE_ANOMALY_THRESHOLD:-0.75}" \
|
||||||
|
|
|
||||||
233
tests/test_cybersec.py
Normal file
233
tests/test_cybersec.py
Normal file
|
|
@ -0,0 +1,233 @@
|
||||||
|
"""Tests for the cybersec zero-shot scoring pipeline."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from app.db.schema import ensure_schema
|
||||||
|
from app.services.cybersec import (
|
||||||
|
CybersecResult,
|
||||||
|
CYBERSEC_LABELS,
|
||||||
|
_NORMAL_LABEL,
|
||||||
|
reset_pipeline,
|
||||||
|
score_security_entries,
|
||||||
|
list_cybersec_detections,
|
||||||
|
)
|
||||||
|
import app.services.cybersec as cybersec_mod
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _reset(tmp_path):
|
||||||
|
reset_pipeline()
|
||||||
|
yield
|
||||||
|
reset_pipeline()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def db(tmp_path) -> Path:
|
||||||
|
path = tmp_path / "test.db"
|
||||||
|
ensure_schema(path)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def _insert_entry(db: Path, entry_id: str, text: str,
|
||||||
|
anomaly_label: str | None = None,
|
||||||
|
matched_patterns: str = "[]") -> None:
|
||||||
|
with sqlite3.connect(db) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT OR IGNORE INTO log_entries
|
||||||
|
(id, tenant_id, source_id, sequence, ingest_time, text,
|
||||||
|
anomaly_label, matched_patterns)
|
||||||
|
VALUES (?, '', 'test-src', 1, '2026-01-01T00:00:00Z', ?, ?, ?)""",
|
||||||
|
(entry_id, text, anomaly_label, matched_patterns),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# No model configured → skipped
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_no_model_returns_skipped(db):
|
||||||
|
result = score_security_entries(db, model_id="")
|
||||||
|
assert result.skipped is True
|
||||||
|
assert result.scored == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# No eligible entries → skipped
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_no_eligible_entries_skipped(db):
|
||||||
|
_insert_entry(db, "e1", "Started nginx.service", anomaly_label=None, matched_patterns="[]")
|
||||||
|
mock_pipe = MagicMock(return_value=[{"labels": [_NORMAL_LABEL], "scores": [0.99]}])
|
||||||
|
monkeypatch = pytest.MonkeyPatch()
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", mock_pipe)
|
||||||
|
result = score_security_entries(db, model_id="fake-model")
|
||||||
|
assert result.skipped is True
|
||||||
|
monkeypatch.undo()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Security entry gets scored
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_security_entry_scored(db, monkeypatch):
|
||||||
|
_insert_entry(db, "e1",
|
||||||
|
"Failed password for root from 192.168.1.1 port 22 ssh2",
|
||||||
|
anomaly_label="SECURITY_ANOMALY")
|
||||||
|
|
||||||
|
mock_pipe = MagicMock(return_value=[{
|
||||||
|
"labels": ["authentication failure or brute force attack", _NORMAL_LABEL],
|
||||||
|
"scores": [0.85, 0.15],
|
||||||
|
}])
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", mock_pipe)
|
||||||
|
|
||||||
|
result = score_security_entries(db, model_id="fake-model", threshold=0.70)
|
||||||
|
assert result.scored == 1
|
||||||
|
assert result.detections == 1
|
||||||
|
assert result.error is None
|
||||||
|
|
||||||
|
with sqlite3.connect(db) as conn:
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
row = conn.execute("SELECT ml_score, ml_label, ml_scored_at FROM log_entries WHERE id='e1'").fetchone()
|
||||||
|
assert row["ml_score"] == pytest.approx(0.85)
|
||||||
|
assert row["ml_label"] == "authentication failure or brute force attack"
|
||||||
|
assert row["ml_scored_at"] is not None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Detection created above threshold
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_detection_inserted_above_threshold(db, monkeypatch):
|
||||||
|
_insert_entry(db, "e1", "sudo: authentication failure", anomaly_label="ERROR")
|
||||||
|
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
|
||||||
|
"labels": ["privilege escalation or unauthorized access", _NORMAL_LABEL],
|
||||||
|
"scores": [0.75, 0.25],
|
||||||
|
}]))
|
||||||
|
|
||||||
|
score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
|
||||||
|
with sqlite3.connect(db) as conn:
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
dets = conn.execute("SELECT * FROM detections WHERE scorer='cybersec'").fetchall()
|
||||||
|
assert len(dets) == 1
|
||||||
|
assert dets[0]["anomaly_label"] == "privilege escalation or unauthorized access"
|
||||||
|
assert dets[0]["severity"] == "CRITICAL"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Normal label → no detection even above score threshold
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_normal_label_no_detection(db, monkeypatch):
|
||||||
|
_insert_entry(db, "e1", "Started nginx.service", anomaly_label="INFO",
|
||||||
|
matched_patterns='["service_start"]')
|
||||||
|
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
|
||||||
|
"labels": [_NORMAL_LABEL, "network intrusion or port scan"],
|
||||||
|
"scores": [0.95, 0.05],
|
||||||
|
}]))
|
||||||
|
|
||||||
|
result = score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
assert result.detections == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Below threshold → scored but no detection
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_below_threshold_no_detection(db, monkeypatch):
|
||||||
|
_insert_entry(db, "e1", "Some suspicious text", anomaly_label="WARN")
|
||||||
|
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
|
||||||
|
"labels": ["network intrusion or port scan", _NORMAL_LABEL],
|
||||||
|
"scores": [0.45, 0.55],
|
||||||
|
}]))
|
||||||
|
|
||||||
|
result = score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
assert result.scored == 1
|
||||||
|
assert result.detections == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Pattern-matched entry (not anomaly-flagged) still gets scored
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_pattern_matched_entry_scored(db, monkeypatch):
|
||||||
|
_insert_entry(db, "e1", "SSH port forwarding conflict detected",
|
||||||
|
anomaly_label=None,
|
||||||
|
matched_patterns='["ssh_forward_conflict"]')
|
||||||
|
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
|
||||||
|
"labels": ["network intrusion or port scan", _NORMAL_LABEL],
|
||||||
|
"scores": [0.70, 0.30],
|
||||||
|
}]))
|
||||||
|
|
||||||
|
result = score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
assert result.scored == 1
|
||||||
|
assert result.detections == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Idempotency — re-run finds nothing unscored
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_idempotent_rerun(db, monkeypatch):
|
||||||
|
_insert_entry(db, "e1", "Failed login", anomaly_label="ERROR")
|
||||||
|
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
|
||||||
|
"labels": ["authentication failure or brute force attack"],
|
||||||
|
"scores": [0.80],
|
||||||
|
}]))
|
||||||
|
|
||||||
|
score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
result2 = score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
assert result2.skipped is True
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# list_cybersec_detections filters to scorer='cybersec'
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_list_cybersec_detections(db, monkeypatch):
|
||||||
|
_insert_entry(db, "e1", "Failed login", anomaly_label="ERROR")
|
||||||
|
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
|
||||||
|
"labels": ["authentication failure or brute force attack"],
|
||||||
|
"scores": [0.90],
|
||||||
|
}]))
|
||||||
|
score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
|
||||||
|
rows = list_cybersec_detections(db)
|
||||||
|
assert len(rows) == 1
|
||||||
|
assert rows[0]["scorer"] == "cybersec"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# list_detections scorer filter (anomaly service)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_list_detections_scorer_filter(db, monkeypatch):
|
||||||
|
from app.services.anomaly import list_detections
|
||||||
|
_insert_entry(db, "e1", "Failed login", anomaly_label="ERROR")
|
||||||
|
|
||||||
|
monkeypatch.setattr(cybersec_mod, "_pipeline", MagicMock(return_value=[{
|
||||||
|
"labels": ["authentication failure or brute force attack"],
|
||||||
|
"scores": [0.90],
|
||||||
|
}]))
|
||||||
|
score_security_entries(db, model_id="fake-model", threshold=0.60)
|
||||||
|
|
||||||
|
all_dets = list_detections(db)
|
||||||
|
cybersec_dets = list_detections(db, scorer="cybersec")
|
||||||
|
anomaly_dets = list_detections(db, scorer="anomaly")
|
||||||
|
|
||||||
|
assert len(cybersec_dets) == 1
|
||||||
|
assert len(anomaly_dets) == 0
|
||||||
|
assert len(all_dets) >= 1
|
||||||
|
|
@ -29,6 +29,20 @@
|
||||||
{{ scorerStatus.running ? 'scoring…' : scorerStatus.enabled ? 'scorer ready' : 'scorer off' }}
|
{{ scorerStatus.running ? 'scoring…' : scorerStatus.enabled ? 'scorer ready' : 'scorer off' }}
|
||||||
</span>
|
</span>
|
||||||
|
|
||||||
|
<!-- Cybersec scorer status -->
|
||||||
|
<span
|
||||||
|
v-if="cybersecStatus"
|
||||||
|
:class="[
|
||||||
|
'text-xs px-2 py-1 rounded border font-mono',
|
||||||
|
cybersecStatus.enabled
|
||||||
|
? 'border-surface-border text-text-dim'
|
||||||
|
: 'border-surface-border text-text-dim opacity-40'
|
||||||
|
]"
|
||||||
|
:title="cybersecStatus.enabled ? `cybersec: ${cybersecStatus.model}` : 'TURNSTONE_CYBERSEC_MODEL not set'"
|
||||||
|
>
|
||||||
|
{{ cybersecStatus.enabled ? 'cybersec on' : 'cybersec off' }}
|
||||||
|
</span>
|
||||||
|
|
||||||
<button
|
<button
|
||||||
@click="runScorer"
|
@click="runScorer"
|
||||||
:disabled="!scorerStatus?.enabled || triggerLoading || scorerStatus?.running"
|
:disabled="!scorerStatus?.enabled || triggerLoading || scorerStatus?.running"
|
||||||
|
|
@ -86,6 +100,21 @@
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Scorer filter -->
|
||||||
|
<div class="flex items-center gap-2 shrink-0">
|
||||||
|
<label for="scorer-filter" class="text-xs text-text-dim whitespace-nowrap">Source:</label>
|
||||||
|
<select
|
||||||
|
id="scorer-filter"
|
||||||
|
v-model="scorerFilter"
|
||||||
|
@change="loadDetections()"
|
||||||
|
class="text-xs bg-surface border border-surface-border rounded px-2 py-1 text-text-primary focus:outline-none focus:border-accent"
|
||||||
|
>
|
||||||
|
<option value="">All</option>
|
||||||
|
<option value="anomaly">Anomaly scorer</option>
|
||||||
|
<option value="cybersec">Cybersec scorer</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Label filter -->
|
<!-- Label filter -->
|
||||||
<div class="flex items-center gap-2 shrink-0">
|
<div class="flex items-center gap-2 shrink-0">
|
||||||
<label for="label-filter" class="text-xs text-text-dim whitespace-nowrap">Label:</label>
|
<label for="label-filter" class="text-xs text-text-dim whitespace-nowrap">Label:</label>
|
||||||
|
|
@ -96,7 +125,12 @@
|
||||||
class="text-xs bg-surface border border-surface-border rounded px-2 py-1 text-text-primary focus:outline-none focus:border-accent"
|
class="text-xs bg-surface border border-surface-border rounded px-2 py-1 text-text-primary focus:outline-none focus:border-accent"
|
||||||
>
|
>
|
||||||
<option value="">All</option>
|
<option value="">All</option>
|
||||||
<option v-for="lbl in knownLabels" :key="lbl" :value="lbl">{{ lbl }}</option>
|
<optgroup label="Anomaly labels">
|
||||||
|
<option v-for="lbl in anomalyLabels" :key="lbl" :value="lbl">{{ lbl }}</option>
|
||||||
|
</optgroup>
|
||||||
|
<optgroup label="Cybersec labels">
|
||||||
|
<option v-for="lbl in cybersecLabels" :key="lbl" :value="lbl">{{ lbl }}</option>
|
||||||
|
</optgroup>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
@ -288,6 +322,7 @@ interface ScorerStatus {
|
||||||
|
|
||||||
const detections = ref<Detection[]>([])
|
const detections = ref<Detection[]>([])
|
||||||
const scorerStatus = ref<ScorerStatus | null>(null)
|
const scorerStatus = ref<ScorerStatus | null>(null)
|
||||||
|
const cybersecStatus = ref<Record<string, unknown> | null>(null)
|
||||||
const loading = ref(true)
|
const loading = ref(true)
|
||||||
const triggerLoading = ref(false)
|
const triggerLoading = ref(false)
|
||||||
const ackLoading = ref(false)
|
const ackLoading = ref(false)
|
||||||
|
|
@ -296,14 +331,23 @@ const ackNotes = ref('')
|
||||||
const drawer = ref<Detection | null>(null)
|
const drawer = ref<Detection | null>(null)
|
||||||
const activeTab = ref<'all' | 'unacked'>('all')
|
const activeTab = ref<'all' | 'unacked'>('all')
|
||||||
const labelFilter = ref('')
|
const labelFilter = ref('')
|
||||||
|
const scorerFilter = ref('')
|
||||||
const tabRefs = ref<(HTMLElement | null)[]>([])
|
const tabRefs = ref<(HTMLElement | null)[]>([])
|
||||||
|
|
||||||
const knownLabels = [
|
const anomalyLabels = [
|
||||||
'SECURITY_ANOMALY', 'SYSTEM_FAILURE', 'PERFORMANCE_ISSUE',
|
'SECURITY_ANOMALY', 'SYSTEM_FAILURE', 'PERFORMANCE_ISSUE',
|
||||||
'NETWORK_ANOMALY', 'CONFIG_ERROR', 'HARDWARE_ISSUE',
|
'NETWORK_ANOMALY', 'CONFIG_ERROR', 'HARDWARE_ISSUE',
|
||||||
'CRITICAL', 'ERROR',
|
'CRITICAL', 'ERROR',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
const cybersecLabels = [
|
||||||
|
'authentication failure or brute force attack',
|
||||||
|
'privilege escalation or unauthorized access',
|
||||||
|
'network intrusion or port scan',
|
||||||
|
'malware or suspicious process activity',
|
||||||
|
'data exfiltration or unusual outbound traffic',
|
||||||
|
]
|
||||||
|
|
||||||
// ── Tabs ─────────────────────────────────────────────────────────────────────
|
// ── Tabs ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
const unackedCount = computed(() => detections.value.filter(d => !d.acknowledged).length)
|
const unackedCount = computed(() => detections.value.filter(d => !d.acknowledged).length)
|
||||||
|
|
@ -325,6 +369,7 @@ async function loadDetections() {
|
||||||
loading.value = true
|
loading.value = true
|
||||||
const params = new URLSearchParams({ limit: '200' })
|
const params = new URLSearchParams({ limit: '200' })
|
||||||
if (labelFilter.value) params.set('label', labelFilter.value)
|
if (labelFilter.value) params.set('label', labelFilter.value)
|
||||||
|
if (scorerFilter.value) params.set('scorer', scorerFilter.value)
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`${BASE}/turnstone/api/anomaly/detections?${params}`)
|
const res = await fetch(`${BASE}/turnstone/api/anomaly/detections?${params}`)
|
||||||
if (!res.ok) throw new Error(`HTTP ${res.status}`)
|
if (!res.ok) throw new Error(`HTTP ${res.status}`)
|
||||||
|
|
@ -342,10 +387,18 @@ async function loadDetections() {
|
||||||
|
|
||||||
async function loadScorerStatus() {
|
async function loadScorerStatus() {
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`${BASE}/turnstone/api/anomaly/status`)
|
const [anomalyRes, cybersecRes] = await Promise.all([
|
||||||
if (!res.ok) return
|
fetch(`${BASE}/turnstone/api/anomaly/status`),
|
||||||
const data = await res.json()
|
fetch(`${BASE}/turnstone/api/cybersec/status`),
|
||||||
scorerStatus.value = { ...data.state, ...data.config }
|
])
|
||||||
|
if (anomalyRes.ok) {
|
||||||
|
const data = await anomalyRes.json()
|
||||||
|
scorerStatus.value = { ...data.state, ...data.config }
|
||||||
|
}
|
||||||
|
if (cybersecRes.ok) {
|
||||||
|
const data = await cybersecRes.json()
|
||||||
|
cybersecStatus.value = data
|
||||||
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// scorer status is non-critical — fail silently
|
// scorer status is non-critical — fail silently
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue