From 7c76217149c6f912d1cb3851b5e52762f7cecd41 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sat, 13 Jun 2026 10:02:46 -0700 Subject: [PATCH] chore: sanitize internal hostnames and IP references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename patterns/sources-example-node.yaml → patterns/sources-example.yaml and update header/comments to be host-agnostic - Replace internal node names in gen_corpus.py _HOSTS with generic names - Replace example-node hostname in syslog test fixtures with testhost - Replace example-node example in mcp_server.py doc with myserver - Replace private LAN IP () in docker-standalone.sh with placeholder - Replace private IPs in sources-cluster.yaml comments with - Remove instance-specific hostname from llm.py fallback comment - Replace Caddy example domain in podman-standalone.sh with placeholder --- app/mcp_server.py | 2 +- app/services/llm.py | 2 +- docker-standalone.sh | 4 ++-- patterns/sources-example.yaml | 7 ++++--- podman-standalone.sh | 5 ++--- scripts/gen_corpus.py | 2 +- tests/test_glean_syslog.py | 18 +++++++++--------- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/app/mcp_server.py b/app/mcp_server.py index 5eec5fd..38b55ec 100644 --- a/app/mcp_server.py +++ b/app/mcp_server.py @@ -93,7 +93,7 @@ def search_logs( Example: '"connection refused" OR "connection lost"' severity: Filter by level — EMERGENCY, ALERT, CRITICAL, ERROR, WARN, NOTICE, INFO, DEBUG. source: Partial match on source_id. Format is 'corpus:host:service'. - Example: 'example-node:caddy' matches all Caddy entries from example-node. + Example: 'myserver:caddy' matches all Caddy entries from myserver. pattern: Filter by named pattern tag applied at glean time. Known tags: auth_failure, connection_lost, oom, segfault, disk_full, timeout, caddy_tls_error, caddy_config_error, caddy_auth_error, diff --git a/app/services/llm.py b/app/services/llm.py index 0b04098..44c42ff 100644 --- a/app/services/llm.py +++ b/app/services/llm.py @@ -88,7 +88,7 @@ def summarize( logger.debug("Task endpoint unavailable (%s) — falling back to direct model", exc) # Fallback: OpenAI-compat endpoint with explicit model name (local instances, - # example-node, or any cf-orch that doesn't have task assignments loaded). + # or any cf-orch node that doesn't have task assignments loaded). try: resp = httpx.post( f"{llm_url.rstrip('/')}/v1/chat/completions", diff --git a/docker-standalone.sh b/docker-standalone.sh index fece648..3d77a9f 100755 --- a/docker-standalone.sh +++ b/docker-standalone.sh @@ -99,8 +99,8 @@ TZ="${TZ:-America/Los_Angeles}" # ── Multi-agent diagnose pipeline ──────────────────────────────────────────── # Enable the 5-stage ML pipeline to get smarter diagnose results. # -# If your host has WireGuard to Heimdall's LAN (e.g. Huginn): -# export GPU_SERVER_URL=http://:7700 +# If your host has WireGuard to Heimdall's LAN: +# export GPU_SERVER_URL=http://:7700 # export TURNSTONE_MULTI_AGENT_DIAGNOSE=true # bash ~/turnstone/docker-standalone.sh # diff --git a/patterns/sources-example.yaml b/patterns/sources-example.yaml index 3aefafc..804601b 100644 --- a/patterns/sources-example.yaml +++ b/patterns/sources-example.yaml @@ -1,5 +1,6 @@ -# Turnstone log sources — example-node.tv +# Turnstone log sources — example node (Docker/Podman, self-hosted media stack) # +# Copy this file to your patterns directory and edit for your setup. # Container paths: /opt and /var/log are bind-mounted read-only. # journal-export.jsonl is written to /data/ by export_journal.sh (run via cron before glean). # @@ -8,8 +9,8 @@ sources: # ── System ──────────────────────────────────────────────────────────────── # Requires: cron job to run export_journal.sh before each glean. - # Example cron (every 15 min, run as x, add via: crontab -e): - # */15 * * * * /Library/Development/CircuitForge/turnstone/scripts/export_journal.sh \ + # Example cron (every 15 min — edit paths for your install): + # */15 * * * * /opt/turnstone/scripts/export_journal.sh \ # /opt/turnstone-data/ - id: system-journal path: /data/journal-export.jsonl diff --git a/podman-standalone.sh b/podman-standalone.sh index 72b0fd9..5bfc7f8 100755 --- a/podman-standalone.sh +++ b/podman-standalone.sh @@ -46,7 +46,7 @@ # ── Adding Caddy reverse proxy ──────────────────────────────────────────────── # Add to /etc/caddy/Caddyfile: # -# turnstone.example-node.tv { +# turnstone.your-domain.example { # import protected # reverse_proxy 10.0.0.10:8534 # import cloudflare @@ -94,8 +94,7 @@ TZ="${TZ:-America/Los_Angeles}" # ML models are downloaded on first diagnose run and cached in HF_CACHE_DIR. # On a CPU-only host (no GPU) set TURNSTONE_EMBED_DEVICE=cpu (default). # -# For Contributor2's instance (example-node.tv) — no WireGuard to Heimdall LAN, -# use the public cf-orch endpoint instead: +# If your host has no WireGuard to Heimdall — use the public cf-orch endpoint: # export GPU_SERVER_URL=https://orch.circuitforge.tech # export TURNSTONE_MULTI_AGENT_DIAGNOSE=true # sudo bash /opt/turnstone/podman-standalone.sh diff --git a/scripts/gen_corpus.py b/scripts/gen_corpus.py index bdc4056..01b65f2 100644 --- a/scripts/gen_corpus.py +++ b/scripts/gen_corpus.py @@ -221,7 +221,7 @@ _EXT_DEVICE_CODES: dict[str, list[str]] = { # ── Template substitution ────────────────────────────────────────────────────── -_HOSTS = ["heimdall", "navi", "sif", "strahl", "bastion", "example-node"] +_HOSTS = ["node1", "node2", "node3", "node4", "gateway", "remotehost"] _USERS = ["alan", "root", "deployer", "backup", "nobody"] _MODULES = ["btrfs", "xfs", "nf_conntrack", "ip6table_filter", "overlay"] diff --git a/tests/test_glean_syslog.py b/tests/test_glean_syslog.py index cb3573d..b6115f1 100644 --- a/tests/test_glean_syslog.py +++ b/tests/test_glean_syslog.py @@ -4,24 +4,24 @@ from __future__ import annotations from app.glean.syslog import is_syslog, parse SYSLOG_SAMPLE = """\ -May 11 14:23:01 example-node sshd[1234]: Accepted publickey for x from 192.168.1.1 port 54321 ssh2 -May 11 14:23:05 example-node sshd[1234]: Failed password for invalid user admin from 10.0.0.99 port 22 ssh2 -May 11 14:23:10 example-node sudo[5678]: x : TTY=pts/0 ; PWD=/home/x ; USER=root ; COMMAND=/usr/bin/apt update -May 11 14:23:15 example-node kernel: [12345.678] usb 1-1: USB disconnect, device number 2 -May 1 04:00:00 example-node CRON[9999]: (root) CMD (/usr/local/sbin/backup.sh) -May 11 14:24:00 example-node systemd[1]: Started NetworkManager. +May 11 14:23:01 testhost sshd[1234]: Accepted publickey for x from 192.168.1.1 port 54321 ssh2 +May 11 14:23:05 testhost sshd[1234]: Failed password for invalid user admin from 10.0.0.99 port 22 ssh2 +May 11 14:23:10 testhost sudo[5678]: x : TTY=pts/0 ; PWD=/home/x ; USER=root ; COMMAND=/usr/bin/apt update +May 11 14:23:15 testhost kernel: [12345.678] usb 1-1: USB disconnect, device number 2 +May 1 04:00:00 testhost CRON[9999]: (root) CMD (/usr/local/sbin/backup.sh) +May 11 14:24:00 testhost systemd[1]: Started NetworkManager. """ class TestDetector: def test_detects_standard_line(self): - assert is_syslog("May 11 14:23:01 example-node sshd[1234]: message") + assert is_syslog("May 11 14:23:01 testhost sshd[1234]: message") def test_detects_no_pid(self): - assert is_syslog("May 11 14:23:01 example-node kernel: message") + assert is_syslog("May 11 14:23:01 testhost kernel: message") def test_detects_space_padded_day(self): - assert is_syslog("May 1 04:00:00 example-node CRON[9999]: message") + assert is_syslog("May 1 04:00:00 testhost CRON[9999]: message") def test_rejects_servarr(self): assert not is_syslog("2026-05-11 02:31:51.5|Info|ComponentName|Message")