Second-pass cybersec classifier using DeBERTa-v3-base-mnli (already cached — no download required). Runs after each anomaly scoring pass on entries flagged by the anomaly scorer or with pattern matches. Architecture: - app/services/cybersec.py: zero-shot-classification pipeline with 5 cybersec candidate labels (auth failure, privilege escalation, network intrusion, malware, data exfiltration). Writes ml_score/ml_label/ ml_scored_at to log_entries; inserts high-confidence hits into detections with scorer='cybersec'. - app/tasks/cybersec_scorer.py: async background task (same shape as anomaly_scorer.py). - REST: GET/POST /turnstone/api/cybersec/status|run|detections. GET /turnstone/api/anomaly/detections now accepts scorer= filter. Schema: ml_score, ml_label, ml_scored_at added to log_entries; scorer column added to detections (idempotent migrations + DDL for both SQLite and Postgres). UI: Security Alerts view gains Source dropdown (All / Anomaly / Cybersec) and cybersec scorer status badge. Label dropdown split into optgroups. Deployment: TURNSTONE_CYBERSEC_MODEL/DEVICE/THRESHOLD vars added to .env.example, docker-compose.yml, docker-standalone.sh. Tests: 10 new tests — no model, no eligible entries, scoring, detection creation, normal label suppression, threshold filtering, pattern-tag filtering, idempotency, list filtering, scorer column filter. 416/416 passing. Closes: #9
68 lines
2.6 KiB
YAML
68 lines
2.6 KiB
YAML
version: "3.9"
|
|
|
|
# Turnstone with external Postgres DB.
|
|
# Data lives in the named volume `turnstone_pgdata` — survives image rebuilds.
|
|
# To adopt an EXISTING Postgres install, set DATABASE_URL to point at it and
|
|
# remove the `db` service and `depends_on` blocks.
|
|
#
|
|
# Quick start:
|
|
# docker compose up -d
|
|
# # Then open http://localhost:8520
|
|
|
|
services:
|
|
db:
|
|
image: postgres:16-alpine
|
|
restart: unless-stopped
|
|
environment:
|
|
POSTGRES_DB: turnstone
|
|
POSTGRES_USER: turnstone
|
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-turnstone_dev}
|
|
volumes:
|
|
- turnstone_pgdata:/var/lib/postgresql/data
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U turnstone -d turnstone"]
|
|
interval: 5s
|
|
timeout: 5s
|
|
retries: 5
|
|
|
|
turnstone:
|
|
build: .
|
|
restart: unless-stopped
|
|
ports:
|
|
- "${TURNSTONE_PORT:-8520}:8520"
|
|
depends_on:
|
|
db:
|
|
condition: service_healthy
|
|
environment:
|
|
# Backend selection — comment out DATABASE_URL to fall back to SQLite
|
|
DATABASE_URL: postgresql://turnstone:${POSTGRES_PASSWORD:-turnstone_dev}@db:5432/turnstone
|
|
TURNSTONE_TENANT_ID: ${TURNSTONE_TENANT_ID:-}
|
|
TURNSTONE_API_KEY: ${TURNSTONE_API_KEY:-}
|
|
TURNSTONE_GLEAN_INTERVAL: ${TURNSTONE_GLEAN_INTERVAL:-900}
|
|
TURNSTONE_SOURCE_HOST: ${TURNSTONE_SOURCE_HOST:-}
|
|
TURNSTONE_SUBMIT_ENDPOINT: ${TURNSTONE_SUBMIT_ENDPOINT:-}
|
|
# --- Multi-agent diagnose pipeline ---
|
|
TURNSTONE_MULTI_AGENT_DIAGNOSE: ${TURNSTONE_MULTI_AGENT_DIAGNOSE:-false}
|
|
TURNSTONE_CLASSIFIER_MODEL: ${TURNSTONE_CLASSIFIER_MODEL:-}
|
|
TURNSTONE_EMBED_BACKEND: ${TURNSTONE_EMBED_BACKEND:-}
|
|
TURNSTONE_EMBED_MODEL: ${TURNSTONE_EMBED_MODEL:-}
|
|
TURNSTONE_EMBED_DEVICE: ${TURNSTONE_EMBED_DEVICE:-cpu}
|
|
# --- Cybersec scoring pipeline ---
|
|
TURNSTONE_CYBERSEC_MODEL: ${TURNSTONE_CYBERSEC_MODEL:-}
|
|
TURNSTONE_CYBERSEC_DEVICE: ${TURNSTONE_CYBERSEC_DEVICE:-cpu}
|
|
TURNSTONE_CYBERSEC_THRESHOLD: ${TURNSTONE_CYBERSEC_THRESHOLD:-0.60}
|
|
# --- Anomaly scoring pipeline ---
|
|
TURNSTONE_ANOMALY_MODEL: ${TURNSTONE_ANOMALY_MODEL:-}
|
|
TURNSTONE_ANOMALY_DEVICE: ${TURNSTONE_ANOMALY_DEVICE:-cpu}
|
|
TURNSTONE_ANOMALY_THRESHOLD: ${TURNSTONE_ANOMALY_THRESHOLD:-0.75}
|
|
TURNSTONE_ANOMALY_INTERVAL: ${TURNSTONE_ANOMALY_INTERVAL:-0}
|
|
# --- HuggingFace model cache ---
|
|
HF_HOME: /hf_cache
|
|
volumes:
|
|
- ./patterns:/app/patterns:ro
|
|
- ./data:/app/data # optional: persists SQLite files if DATABASE_URL unset
|
|
- ${HF_CACHE_PATH:-/Library/Assets/LLM}:/hf_cache:ro # shared model cache
|
|
|
|
volumes:
|
|
turnstone_pgdata:
|
|
name: turnstone_pgdata
|