diff --git a/.env.example b/.env.example index fff4a27..97b21e6 100644 --- a/.env.example +++ b/.env.example @@ -42,6 +42,23 @@ # TURNSTONE_EMBED_MODEL=BAAI/bge-small-en-v1.5 # TURNSTONE_EMBED_DEVICE=cpu +# --- Anomaly scoring pipeline (IDS / watchdog) --- +# Batch-scores every ingested log entry after each glean cycle. +# Any HuggingFace text-classification model works; the byviz classifier (already +# required by the diagnose pipeline) is the recommended starting point. +# Detections above the threshold are inserted into the detections table and +# surfaced in the Security Alerts tab. +# +# Set TURNSTONE_ANOMALY_MODEL to enable; leave unset to disable (safe default). +# TURNSTONE_ANOMALY_MODEL=byviz/bylastic_classification_logs +# TURNSTONE_ANOMALY_DEVICE=cpu # or "cuda" / "mps" for GPU inference +# TURNSTONE_ANOMALY_THRESHOLD=0.80 # confidence floor for detection insertion +# TURNSTONE_ANOMALY_INTERVAL=0 # standalone loop (0 = glean-triggered only) +# +# HuggingFace model cache — share with the host to avoid re-downloading models. +# HF_HOME=/hf_cache # inside container (set in docker-compose) +# HF_CACHE_PATH=/Library/Assets/LLM # host bind-mount source (docker-compose only) + # --- Air-gapped / offline deployment --- # Set to 1 to block all HuggingFace hub network access at runtime. # Pre-download models to ~/.cache/huggingface/ before deploying — see docs/air-gapped-deployment.md. diff --git a/docker-compose.yml b/docker-compose.yml index 8c9bf29..d197bc1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -41,9 +41,23 @@ services: TURNSTONE_GLEAN_INTERVAL: ${TURNSTONE_GLEAN_INTERVAL:-900} TURNSTONE_SOURCE_HOST: ${TURNSTONE_SOURCE_HOST:-} TURNSTONE_SUBMIT_ENDPOINT: ${TURNSTONE_SUBMIT_ENDPOINT:-} + # --- Multi-agent diagnose pipeline --- + TURNSTONE_MULTI_AGENT_DIAGNOSE: ${TURNSTONE_MULTI_AGENT_DIAGNOSE:-false} + TURNSTONE_CLASSIFIER_MODEL: ${TURNSTONE_CLASSIFIER_MODEL:-} + TURNSTONE_EMBED_BACKEND: ${TURNSTONE_EMBED_BACKEND:-} + TURNSTONE_EMBED_MODEL: ${TURNSTONE_EMBED_MODEL:-} + TURNSTONE_EMBED_DEVICE: ${TURNSTONE_EMBED_DEVICE:-cpu} + # --- Anomaly scoring pipeline --- + TURNSTONE_ANOMALY_MODEL: ${TURNSTONE_ANOMALY_MODEL:-} + TURNSTONE_ANOMALY_DEVICE: ${TURNSTONE_ANOMALY_DEVICE:-cpu} + TURNSTONE_ANOMALY_THRESHOLD: ${TURNSTONE_ANOMALY_THRESHOLD:-0.75} + TURNSTONE_ANOMALY_INTERVAL: ${TURNSTONE_ANOMALY_INTERVAL:-0} + # --- HuggingFace model cache --- + HF_HOME: /hf_cache volumes: - ./patterns:/app/patterns:ro - ./data:/app/data # optional: persists SQLite files if DATABASE_URL unset + - ${HF_CACHE_PATH:-/Library/Assets/LLM}:/hf_cache:ro # shared model cache volumes: turnstone_pgdata: diff --git a/docker-standalone.sh b/docker-standalone.sh index 7098fa8..8d45406 100755 --- a/docker-standalone.sh +++ b/docker-standalone.sh @@ -62,7 +62,10 @@ set -euo pipefail REPO_DIR="${HOME}/turnstone" DATA_DIR="${REPO_DIR}/data" PATTERNS_DIR="${REPO_DIR}/patterns" -HF_CACHE_DIR="${REPO_DIR}/hf-cache" # persists downloaded ML models across restarts +# HF_CACHE_DIR: override to a shared cache directory to avoid re-downloading models. +# Example (Heimdall, where byviz/bylastic_classification_logs is already cached): +# export HF_CACHE_DIR=/Library/Assets/LLM +HF_CACHE_DIR="${HF_CACHE_DIR:-${REPO_DIR}/hf-cache}" TZ="${TZ:-America/Los_Angeles}" @@ -83,6 +86,16 @@ TZ="${TZ:-America/Los_Angeles}" # bash ~/turnstone/docker-standalone.sh # +# ── Anomaly scoring pipeline (IDS / watchdog) ──────────────────────────────── +# Set TURNSTONE_ANOMALY_MODEL to enable automatic anomaly scoring after each +# glean run. The byviz classifier (already used by the diagnose pipeline) is +# a good default — it's cached alongside the other models. +# +# export TURNSTONE_ANOMALY_MODEL=byviz/bylastic_classification_logs +# export TURNSTONE_ANOMALY_THRESHOLD=0.80 # confidence floor (default 0.75) +# bash ~/turnstone/docker-standalone.sh +# + # ── Multi-agent diagnose pipeline ──────────────────────────────────────────── # Enable the 5-stage ML pipeline to get smarter diagnose results. # @@ -134,6 +147,10 @@ docker run -d \ -e TURNSTONE_EMBED_BACKEND="${TURNSTONE_EMBED_BACKEND:-sentence_transformers}" \ -e TURNSTONE_EMBED_MODEL="${TURNSTONE_EMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2}" \ -e TURNSTONE_EMBED_DEVICE="${TURNSTONE_EMBED_DEVICE:-cpu}" \ + -e TURNSTONE_ANOMALY_MODEL="${TURNSTONE_ANOMALY_MODEL:-}" \ + -e TURNSTONE_ANOMALY_DEVICE="${TURNSTONE_ANOMALY_DEVICE:-cpu}" \ + -e TURNSTONE_ANOMALY_THRESHOLD="${TURNSTONE_ANOMALY_THRESHOLD:-0.75}" \ + -e TURNSTONE_ANOMALY_INTERVAL="${TURNSTONE_ANOMALY_INTERVAL:-0}" \ localhost/turnstone:latest echo ""