chore: sanitize internal hostnames and IP references

- Rename patterns/sources-example-node.yaml → patterns/sources-example.yaml
  and update header/comments to be host-agnostic
- Replace internal node names in gen_corpus.py _HOSTS with generic names
- Replace example-node hostname in syslog test fixtures with testhost
- Replace example-node example in mcp_server.py doc with myserver
- Replace private LAN IP (<YOUR_HOST_IP>) in docker-standalone.sh with
  <HEIMDALL_LAN_IP> placeholder
- Replace private IPs in sources-cluster.yaml comments with <YOUR_HOST_IP>
- Remove instance-specific hostname from llm.py fallback comment
- Replace Caddy example domain in podman-standalone.sh with placeholder
This commit is contained in:
pyr0ball 2026-06-13 10:02:46 -07:00
parent eba1f825f6
commit b0e4d8d5f4
7 changed files with 20 additions and 20 deletions

View file

@ -93,7 +93,7 @@ def search_logs(
Example: '"connection refused" OR "connection lost"'
severity: Filter by level EMERGENCY, ALERT, CRITICAL, ERROR, WARN, NOTICE, INFO, DEBUG.
source: Partial match on source_id. Format is 'corpus:host:service'.
Example: 'example-node:caddy' matches all Caddy entries from example-node.
Example: 'myserver:caddy' matches all Caddy entries from myserver.
pattern: Filter by named pattern tag applied at glean time.
Known tags: auth_failure, connection_lost, oom, segfault, disk_full,
timeout, caddy_tls_error, caddy_config_error, caddy_auth_error,

View file

@ -88,7 +88,7 @@ def summarize(
logger.debug("Task endpoint unavailable (%s) — falling back to direct model", exc)
# Fallback: OpenAI-compat endpoint with explicit model name (local instances,
# example-node, or any cf-orch that doesn't have task assignments loaded).
# or any cf-orch node that doesn't have task assignments loaded).
try:
resp = httpx.post(
f"{llm_url.rstrip('/')}/v1/chat/completions",

View file

@ -99,8 +99,8 @@ TZ="${TZ:-America/Los_Angeles}"
# ── Multi-agent diagnose pipeline ────────────────────────────────────────────
# Enable the 5-stage ML pipeline to get smarter diagnose results.
#
# If your host has WireGuard to Heimdall's LAN (e.g. Huginn):
# export GPU_SERVER_URL=http://<YOUR_HOST_IP>:7700
# If your host has WireGuard to Heimdall's LAN:
# export GPU_SERVER_URL=http://<HEIMDALL_LAN_IP>:7700
# export TURNSTONE_MULTI_AGENT_DIAGNOSE=true
# bash ~/turnstone/docker-standalone.sh
#

View file

@ -1,5 +1,6 @@
# Turnstone log sources — example-node.tv
# Turnstone log sources — example node (Docker/Podman, self-hosted media stack)
#
# Copy this file to your patterns directory and edit for your setup.
# Container paths: /opt and /var/log are bind-mounted read-only.
# journal-export.jsonl is written to /data/ by export_journal.sh (run via cron before glean).
#
@ -8,8 +9,8 @@
sources:
# ── System ────────────────────────────────────────────────────────────────
# Requires: cron job to run export_journal.sh before each glean.
# Example cron (every 15 min, run as x, add via: crontab -e):
# */15 * * * * /Library/Development/CircuitForge/turnstone/scripts/export_journal.sh \
# Example cron (every 15 min — edit paths for your install):
# */15 * * * * /opt/turnstone/scripts/export_journal.sh \
# /opt/turnstone-data/
- id: system-journal
path: /data/journal-export.jsonl

View file

@ -46,7 +46,7 @@
# ── Adding Caddy reverse proxy ────────────────────────────────────────────────
# Add to /etc/caddy/Caddyfile:
#
# turnstone.example-node.tv {
# turnstone.your-domain.example {
# import protected
# reverse_proxy 10.0.0.10:8534
# import cloudflare
@ -94,8 +94,7 @@ TZ="${TZ:-America/Los_Angeles}"
# ML models are downloaded on first diagnose run and cached in HF_CACHE_DIR.
# On a CPU-only host (no GPU) set TURNSTONE_EMBED_DEVICE=cpu (default).
#
# For Xander's instance (example-node.tv) — no WireGuard to Heimdall LAN,
# use the public cf-orch endpoint instead:
# If your host has no WireGuard to Heimdall — use the public cf-orch endpoint:
# export GPU_SERVER_URL=https://orch.circuitforge.tech
# export TURNSTONE_MULTI_AGENT_DIAGNOSE=true
# sudo bash /opt/turnstone/podman-standalone.sh

View file

@ -221,7 +221,7 @@ _AVCX_CODES: dict[str, list[str]] = {
# ── Template substitution ──────────────────────────────────────────────────────
_HOSTS = ["heimdall", "navi", "sif", "strahl", "bastion", "example-node"]
_HOSTS = ["node1", "node2", "node3", "node4", "gateway", "remotehost"]
_USERS = ["alan", "root", "deployer", "backup", "nobody"]
_MODULES = ["btrfs", "xfs", "nf_conntrack", "ip6table_filter", "overlay"]

View file

@ -4,24 +4,24 @@ from __future__ import annotations
from app.glean.syslog import is_syslog, parse
SYSLOG_SAMPLE = """\
May 11 14:23:01 example-node sshd[1234]: Accepted publickey for x from 192.168.1.1 port 54321 ssh2
May 11 14:23:05 example-node sshd[1234]: Failed password for invalid user admin from 10.0.0.99 port 22 ssh2
May 11 14:23:10 example-node sudo[5678]: x : TTY=pts/0 ; PWD=/home/x ; USER=root ; COMMAND=/usr/bin/apt update
May 11 14:23:15 example-node kernel: [12345.678] usb 1-1: USB disconnect, device number 2
May 1 04:00:00 example-node CRON[9999]: (root) CMD (/usr/local/sbin/backup.sh)
May 11 14:24:00 example-node systemd[1]: Started NetworkManager.
May 11 14:23:01 testhost sshd[1234]: Accepted publickey for x from 192.168.1.1 port 54321 ssh2
May 11 14:23:05 testhost sshd[1234]: Failed password for invalid user admin from 10.0.0.99 port 22 ssh2
May 11 14:23:10 testhost sudo[5678]: x : TTY=pts/0 ; PWD=/home/x ; USER=root ; COMMAND=/usr/bin/apt update
May 11 14:23:15 testhost kernel: [12345.678] usb 1-1: USB disconnect, device number 2
May 1 04:00:00 testhost CRON[9999]: (root) CMD (/usr/local/sbin/backup.sh)
May 11 14:24:00 testhost systemd[1]: Started NetworkManager.
"""
class TestDetector:
def test_detects_standard_line(self):
assert is_syslog("May 11 14:23:01 example-node sshd[1234]: message")
assert is_syslog("May 11 14:23:01 testhost sshd[1234]: message")
def test_detects_no_pid(self):
assert is_syslog("May 11 14:23:01 example-node kernel: message")
assert is_syslog("May 11 14:23:01 testhost kernel: message")
def test_detects_space_padded_day(self):
assert is_syslog("May 1 04:00:00 example-node CRON[9999]: message")
assert is_syslog("May 1 04:00:00 testhost CRON[9999]: message")
def test_rejects_servarr(self):
assert not is_syslog("2026-05-11 02:31:51.5|Info|ComponentName|Message")