feat(container): multi-agent env vars, HF cache mount, and ML deps

podman-standalone.sh: - Add HF_CACHE_DIR=/opt/turnstone/hf-cache with mkdir guard - Mount HF_HOME=/hf-cache so model weights persist across restarts - Forward all multi-agent env vars (TURNSTONE_MULTI_AGENT_DIAGNOSE, GPU_SERVER_URL, TURNSTONE_CLASSIFIER_MODEL, TURNSTONE_EMBED_*) - Add documentation comments for Daniel/Xander remote instance setup requirements.txt: - Add torch (CPU-only), transformers, sentence-transformers for the 5-stage multi-agent diagnose pipeline (classifier + suppressor stages) - Use --extra-index-url for cpu wheel to keep image ~2GB lighter - Both modules keep ImportError guards so server starts without them, but container images should ship fully capable
2026-05-26 13:20:26 -07:00 · 2026-05-26 13:20:26 -07:00 · a9f9491355
commit a9f9491355
parent 64804b1378
2 changed files with 40 additions and 0 deletions
--- a/podman-standalone.sh
+++ b/podman-standalone.sh
@ -62,6 +62,7 @@ set -euo pipefail
 REPO_DIR=/opt/turnstone
 DATA_DIR=/opt/turnstone/data
 PATTERNS_DIR=/opt/turnstone/patterns
+HF_CACHE_DIR=/opt/turnstone/hf-cache   # persists downloaded ML models across restarts
 TZ=America/Los_Angeles

 # ── Bundle push configuration ────────────────────────────────────────────────
@ -80,6 +81,25 @@ TZ=America/Los_Angeles
 #   bash /opt/turnstone/podman-standalone.sh
 #
 # TURNSTONE_SOURCE_HOST is auto-detected from `hostname` — override if needed.
+#
+# ── Multi-agent diagnose pipeline ────────────────────────────────────────────
+# The 5-stage ML pipeline requires three env vars and a writable HF cache dir:
+#
+#   TURNSTONE_MULTI_AGENT_DIAGNOSE=true   — enable the pipeline
+#   GPU_SERVER_URL=http://<orch-host>:7700 — cf-orch coordinator or Ollama base URL
+#
+# ML models are downloaded on first diagnose run and cached in HF_CACHE_DIR.
+# On a CPU-only host (no GPU) set TURNSTONE_EMBED_DEVICE=cpu (default).
+#
+# For Xander's instance (xanderland.tv) — Heimdall's cf-orch via WireGuard:
+#   export GPU_SERVER_URL=http://10.1.10.71:7700
+#   export TURNSTONE_MULTI_AGENT_DIAGNOSE=true
+#   sudo bash /opt/turnstone/podman-standalone.sh
+#
+# For Daniel's instance (Huginn) — same cf-orch via WireGuard:
+#   export GPU_SERVER_URL=http://10.1.10.71:7700
+#   export TURNSTONE_MULTI_AGENT_DIAGNOSE=true
+#   bash /opt/turnstone/podman-standalone.sh   (or rebuild Docker container)

 # ── Turnstone container ───────────────────────────────────────────────────────
 # Image is built locally — no registry auto-update label.
@ -96,6 +116,9 @@ TZ=America/Los_Angeles
 echo "Building Turnstone image..."
 podman build -t localhost/turnstone:latest "${REPO_DIR}"

+# Create HF model cache dir if not present (persists across container rebuilds)
+mkdir -p "${HF_CACHE_DIR}"
+
 # Remove existing container if present (safe re-run)
 podman rm -f turnstone 2>/dev/null || true

@ -105,6 +128,7 @@ podman run -d \
  --net=host \
  -v "${DATA_DIR}:/data:Z" \
  -v "${PATTERNS_DIR}:/patterns:Z" \
+  -v "${HF_CACHE_DIR}:/hf-cache:Z" \
  -v /opt:/opt:ro \
  -v /var/log:/var/log:ro \
  -e TURNSTONE_DB=/data/turnstone.db \
@ -113,6 +137,13 @@ podman run -d \
  -e TURNSTONE_SUBMIT_ENDPOINT="${TURNSTONE_SUBMIT_ENDPOINT:-}" \
  -e PYTHONUNBUFFERED=1 \
  -e TZ="${TZ}" \
+  -e TURNSTONE_MULTI_AGENT_DIAGNOSE="${TURNSTONE_MULTI_AGENT_DIAGNOSE:-false}" \
+  -e GPU_SERVER_URL="${GPU_SERVER_URL:-}" \
+  -e HF_HOME=/hf-cache \
+  -e TURNSTONE_CLASSIFIER_MODEL="${TURNSTONE_CLASSIFIER_MODEL:-byviz/bylastic_classification_logs}" \
+  -e TURNSTONE_EMBED_BACKEND="${TURNSTONE_EMBED_BACKEND:-sentence_transformers}" \
+  -e TURNSTONE_EMBED_MODEL="${TURNSTONE_EMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2}" \
+  -e TURNSTONE_EMBED_DEVICE="${TURNSTONE_EMBED_DEVICE:-cpu}" \
  --health-cmd="curl -f http://localhost:8534/turnstone/health || exit 1" \
  --health-interval=30s \
  --health-timeout=10s \
--- a/requirements.txt
+++ b/requirements.txt
@ -7,3 +7,12 @@ python-multipart>=0.0.9
 dateparser>=1.2.0
 httpx>=0.27.0
 paramiko
+
+# Multi-agent diagnose pipeline — ML deps
+# classifier.py and suppressor.py have ImportError guards and fall back gracefully,
+# but these are included unconditionally so container images are fully capable.
+# Install CPU-only torch to avoid pulling the ~2GB CUDA wheel into the image.
+--extra-index-url https://download.pytorch.org/whl/cpu
+torch>=2.2.0
+transformers>=4.40.0
+sentence-transformers>=3.0.0