Second-pass cybersec classifier using DeBERTa-v3-base-mnli (already cached — no download required). Runs after each anomaly scoring pass on entries flagged by the anomaly scorer or with pattern matches. Architecture: - app/services/cybersec.py: zero-shot-classification pipeline with 5 cybersec candidate labels (auth failure, privilege escalation, network intrusion, malware, data exfiltration). Writes ml_score/ml_label/ ml_scored_at to log_entries; inserts high-confidence hits into detections with scorer='cybersec'. - app/tasks/cybersec_scorer.py: async background task (same shape as anomaly_scorer.py). - REST: GET/POST /turnstone/api/cybersec/status|run|detections. GET /turnstone/api/anomaly/detections now accepts scorer= filter. Schema: ml_score, ml_label, ml_scored_at added to log_entries; scorer column added to detections (idempotent migrations + DDL for both SQLite and Postgres). UI: Security Alerts view gains Source dropdown (All / Anomaly / Cybersec) and cybersec scorer status badge. Label dropdown split into optgroups. Deployment: TURNSTONE_CYBERSEC_MODEL/DEVICE/THRESHOLD vars added to .env.example, docker-compose.yml, docker-standalone.sh. Tests: 10 new tests — no model, no eligible entries, scoring, detection creation, normal label suppression, threshold filtering, pattern-tag filtering, idempotency, list filtering, scorer column filter. 416/416 passing. Closes: #9
84 lines
2.6 KiB
Python
84 lines
2.6 KiB
Python
"""Background task wrapper for the cybersec zero-shot scoring pipeline."""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
from app.services.cybersec import score_security_entries
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_lock = asyncio.Lock()
|
|
|
|
|
|
@dataclass
|
|
class CybersecState:
|
|
last_run_at: str | None = None
|
|
last_duration_s: float | None = None
|
|
last_scored: int = 0
|
|
last_detections: int = 0
|
|
last_error: str | None = None
|
|
run_count: int = 0
|
|
running: bool = False
|
|
total_scored: int = 0
|
|
total_detections: int = 0
|
|
|
|
|
|
_state = CybersecState()
|
|
|
|
|
|
def get_state() -> dict:
|
|
return {
|
|
"last_run_at": _state.last_run_at,
|
|
"last_duration_s":_state.last_duration_s,
|
|
"last_scored": _state.last_scored,
|
|
"last_detections":_state.last_detections,
|
|
"last_error": _state.last_error,
|
|
"run_count": _state.run_count,
|
|
"running": _state.running,
|
|
"total_scored": _state.total_scored,
|
|
"total_detections": _state.total_detections,
|
|
}
|
|
|
|
|
|
async def run_once(
|
|
db_path: Path,
|
|
model_id: str,
|
|
device: str = "cpu",
|
|
batch_size: int = 32,
|
|
threshold: float = 0.60,
|
|
) -> None:
|
|
"""Single cybersec scoring pass — no-op if already running or no model set."""
|
|
if not model_id or _lock.locked():
|
|
return
|
|
|
|
async with _lock:
|
|
_state.running = True
|
|
started = datetime.now(tz=timezone.utc)
|
|
try:
|
|
loop = asyncio.get_running_loop()
|
|
result = await loop.run_in_executor(
|
|
None,
|
|
lambda: score_security_entries(db_path, model_id, device, batch_size, threshold),
|
|
)
|
|
elapsed = (datetime.now(tz=timezone.utc) - started).total_seconds()
|
|
_state.last_run_at = started.isoformat()
|
|
_state.last_duration_s = elapsed
|
|
_state.last_scored = result.scored
|
|
_state.last_detections = result.detections
|
|
_state.last_error = result.error
|
|
_state.run_count += 1
|
|
_state.total_scored += result.scored
|
|
_state.total_detections += result.detections
|
|
if result.error:
|
|
logger.error("cybersec scorer error: %s", result.error)
|
|
elif not result.skipped:
|
|
logger.info(
|
|
"cybersec scorer: scored=%d detections=%d in %.1fs",
|
|
result.scored, result.detections, elapsed,
|
|
)
|
|
finally:
|
|
_state.running = False
|