feat(diagnose): tech-level post-processor, offline mode, API auth, context harvest

- synthesizer: 3 system prompts (sysadmin/homelab/executive) selected by tech_level pref
- settings: tech_level selector (UI + backend) persisted in preferences.json
- QuickCapture: shows active level label in diagnosis card header
- TURNSTONE_OFFLINE_MODE=1: sets HF_HUB_OFFLINE + TRANSFORMERS_OFFLINE before lib load
- TURNSTONE_API_KEY: bearer token auth on all /api/ routes (hmac.compare_digest)
- /health always open; unset key = no auth (backward compatible)
- docs/air-gapped-deployment.md: full offline deployment guide
- scripts/harvest_docs.py: generalized context doc bulk-uploader with manifest support
- scripts/manifests/: heimdall-devops.yaml (10 docs ingested) + example.yaml template
- fix: _ingest_upload -> _glean_upload in context doc upload endpoint (was 500)

Closes: #56
Closes: #45
Closes: #47
Closes: #49
Closes: #21
This commit is contained in:
pyr0ball 2026-05-28 08:51:05 -07:00
parent 3269aae960
commit 3fd9b6d5a2
11 changed files with 657 additions and 20 deletions

View file

@ -41,3 +41,13 @@
# TURNSTONE_EMBED_BACKEND=sentence_transformers
# TURNSTONE_EMBED_MODEL=BAAI/bge-small-en-v1.5
# TURNSTONE_EMBED_DEVICE=cpu
# --- Air-gapped / offline deployment ---
# Set to 1 to block all HuggingFace hub network access at runtime.
# Pre-download models to ~/.cache/huggingface/ before deploying — see docs/air-gapped-deployment.md.
# TURNSTONE_OFFLINE_MODE=1
# --- API authentication ---
# When set, all /api/ requests require: Authorization: Bearer <token>
# Generate a token: python -c "import secrets; print(secrets.token_urlsafe(32))"
# TURNSTONE_API_KEY=your-secret-token-here

View file

@ -11,6 +11,12 @@ import dataclasses
import hmac
import json
import os
# Offline mode: must be set before any HuggingFace library is imported.
# Both flags must agree — HF hub and transformers each check independently.
if os.environ.get("TURNSTONE_OFFLINE_MODE", "").lower() in ("1", "true", "yes"):
os.environ.setdefault("HF_HUB_OFFLINE", "1")
os.environ.setdefault("TRANSFORMERS_OFFLINE", "1")
import sqlite3
import tempfile
import urllib.error
@ -21,7 +27,7 @@ from typing import Annotated
import yaml
from fastapi import APIRouter, BackgroundTasks, FastAPI, HTTPException, Query, Request, UploadFile
from fastapi import APIRouter, BackgroundTasks, Depends, FastAPI, HTTPException, Query, Request, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, RedirectResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
@ -91,6 +97,9 @@ PATTERN_DIR = Path(os.environ.get("TURNSTONE_PATTERNS", Path(__file__).parent.pa
PATTERN_FILE = PATTERN_DIR / "default.yaml"
GLEAN_INTERVAL = int(os.environ.get("TURNSTONE_GLEAN_INTERVAL", "900"))
SUBMIT_ENDPOINT = os.environ.get("TURNSTONE_SUBMIT_ENDPOINT", "").rstrip("/")
# When set, all /api/ routes require Authorization: Bearer <key>.
# Unset (default) means no authentication — suitable for local-only deployments.
_API_KEY: str | None = os.environ.get("TURNSTONE_API_KEY") or None
# GPU inference server URL.
# Priority: GPU_SERVER_URL → CF_ORCH_URL (backward compat) → orch.circuitforge.tech (Paid+).
@ -169,6 +178,7 @@ _PREFS_DEFAULTS: dict = {
"llm_url": GPU_SERVER_URL or "http://localhost:11434",
"llm_model": "llama3.1:8b",
"llm_api_key": "",
"tech_level": "sysadmin",
"severity_overrides": [
{
"name": "PAM auth noise",
@ -213,6 +223,7 @@ class SettingsBody(BaseModel):
llm_url: str | None = None
llm_model: str | None = None
llm_api_key: str | None = None
tech_level: str | None = None
tautulli_token: str | None = None
severity_overrides: list[SeverityOverride] | None = None
pihole_url: str | None = None
@ -251,8 +262,28 @@ class WizardApplyBody(BaseModel):
if (DIST_DIR / "assets").exists():
app.mount("/turnstone/assets", StaticFiles(directory=str(DIST_DIR / "assets")), name="assets")
def _check_api_key(request: Request) -> None:
"""Dependency: enforce bearer token when TURNSTONE_API_KEY is configured.
/health is always open so monitoring tools work without credentials.
All other /api/ routes require Authorization: Bearer <key>.
"""
if _API_KEY is None:
return
if request.url.path.rstrip("/") in ("/turnstone/health", "/turnstone"):
return
if not request.url.path.startswith("/turnstone/api"):
return
auth = request.headers.get("Authorization", "")
if not auth.startswith("Bearer "):
raise HTTPException(status_code=401, detail="Missing Authorization: Bearer <token>")
token = auth[len("Bearer "):]
if not hmac.compare_digest(token, _API_KEY):
raise HTTPException(status_code=403, detail="Invalid API key")
# API router — all routes accessible at /turnstone/api/* and /turnstone/health.
router = APIRouter(prefix="/turnstone")
router = APIRouter(prefix="/turnstone", dependencies=[Depends(_check_api_key)])
@router.get("/health")
@ -389,6 +420,7 @@ async def diagnose_post_stream(body: DiagnoseRequest) -> StreamingResponse:
llm_model=prefs.get("llm_model") or None,
llm_api_key=prefs.get("llm_api_key") or None,
context_db_path=CONTEXT_DB_PATH,
tech_level=prefs.get("tech_level", "sysadmin"),
):
yield f"data: {json.dumps(event)}\n\n"
@ -417,6 +449,10 @@ def patch_settings(body: SettingsBody) -> dict:
prefs["llm_model"] = body.llm_model
if body.llm_api_key is not None:
prefs["llm_api_key"] = body.llm_api_key
if body.tech_level is not None:
if body.tech_level not in ("homelab", "sysadmin", "executive"):
raise HTTPException(status_code=422, detail="tech_level must be 'homelab', 'sysadmin', or 'executive'")
prefs["tech_level"] = body.tech_level
if body.tautulli_token is not None:
prefs["tautulli_token"] = body.tautulli_token
if body.severity_overrides is not None:
@ -1007,7 +1043,7 @@ def test_pihole_connection() -> dict:
app.include_router(router)
_ctx = APIRouter(prefix="/turnstone/api/context")
_ctx = APIRouter(prefix="/turnstone/api/context", dependencies=[Depends(_check_api_key)])
@_ctx.post("/docs")
@ -1015,7 +1051,7 @@ async def upload_doc(file: UploadFile):
content = await file.read()
try:
result = await asyncio.to_thread(
lambda: _ingest_upload(CONTEXT_DB_PATH, file.filename or "upload", content)
lambda: _glean_upload(CONTEXT_DB_PATH, file.filename or "upload", content)
)
except UnsupportedDocType as e:
raise HTTPException(status_code=415, detail=str(e))

View file

@ -196,6 +196,7 @@ async def diagnose_stream(
llm_model: str | None = None,
llm_api_key: str | None = None,
context_db_path: Path | None = None,
tech_level: str = "sysadmin",
) -> AsyncGenerator[dict[str, Any], None]:
"""Async generator yielding SSE event dicts for the diagnose pipeline.
@ -316,6 +317,7 @@ async def diagnose_stream(
llm_url=llm_url,
llm_model=llm_model,
llm_api_key=llm_api_key,
tech_level=tech_level,
):
yield event
return # pipeline emits its own "done" event

View file

@ -37,6 +37,7 @@ async def run_pipeline(
llm_url: str | None,
llm_model: str | None,
llm_api_key: str | None,
tech_level: str = "sysadmin",
) -> AsyncGenerator[dict[str, Any], None]:
"""Async generator that runs all 5 pipeline stages and yields SSE event dicts.
@ -157,6 +158,7 @@ async def run_pipeline(
llm_url,
llm_model,
llm_api_key,
tech_level,
)
except Exception as exc:
logger.exception("Stage 5 (synthesizer) failed: %s", exc)

View file

@ -13,19 +13,45 @@ from app.services.diagnose.models import RankedHypothesis, TimelineResult
logger = logging.getLogger(__name__)
_SYSTEM_PROMPT = (
"You are a Linux sysadmin diagnosing a system incident. "
"Write a concise, actionable incident diagnosis.\n\n"
"Format your response exactly as:\n"
"1. VERDICT: [CRITICAL|ERROR|WARN|INFO] — <what happened> (<X>% confidence)\n"
"2. TIMELINE: <what the logs show in sequence, 2-3 sentences>\n"
"3. ROOT CAUSES:\n"
" - <hypothesis 1 title> (<confidence>%)\n"
" - <hypothesis 2 title> (<confidence>%)\n"
"4. RECOMMENDED ACTIONS:\n"
" - <action based on hypotheses>\n"
"5. INVESTIGATE FURTHER: <open questions, if any>"
)
_SYSTEM_PROMPTS: dict[str, str] = {
"sysadmin": (
"You are a Linux sysadmin diagnosing a system incident. "
"Write a concise, actionable incident diagnosis.\n\n"
"Format your response exactly as:\n"
"1. VERDICT: [CRITICAL|ERROR|WARN|INFO] — <what happened> (<X>% confidence)\n"
"2. TIMELINE: <what the logs show in sequence, 2-3 sentences>\n"
"3. ROOT CAUSES:\n"
" - <hypothesis 1 title> (<confidence>%)\n"
" - <hypothesis 2 title> (<confidence>%)\n"
"4. RECOMMENDED ACTIONS:\n"
" - <action based on hypotheses>\n"
"5. INVESTIGATE FURTHER: <open questions, if any>"
),
"homelab": (
"You are explaining a system incident to a home lab enthusiast — someone "
"comfortable with Linux basics but not necessarily familiar with every daemon "
"or kernel subsystem. Be clear about what each service does; spell out "
"abbreviations; explain why each action helps.\n\n"
"Format your response exactly as:\n"
"1. VERDICT: [CRITICAL|ERROR|WARN|INFO] — <what happened in plain terms> (<X>% confidence)\n"
"2. TIMELINE: <what happened in sequence, 2-3 sentences; explain what each service is>\n"
"3. ROOT CAUSES:\n"
" - <hypothesis title — one sentence explaining what it means> (<confidence>%)\n"
"4. RECOMMENDED ACTIONS:\n"
" - <command or step — explain what it does and why>\n"
"5. INVESTIGATE FURTHER: <open questions in plain language>"
),
"executive": (
"You are summarizing a technical system incident for a non-technical stakeholder. "
"Focus on what broke, what the business impact was, and what the technical team is doing about it. "
"Use plain English. Do not use daemon names, kernel terms, log syntax, or technical jargon.\n\n"
"Format your response exactly as:\n"
"1. WHAT HAPPENED: <1-2 sentences describing the problem in plain English>\n"
"2. IMPACT: <which services or users were affected, and how>\n"
"3. CONFIDENCE: <High / Medium / Low — how certain we are of the diagnosis>\n"
"4. ACTION NEEDED: <what the IT team is doing or should do, in plain terms>"
),
}
def _build_hypothesis_block(ranked: list[RankedHypothesis]) -> str:
@ -104,6 +130,7 @@ class SummarySynthesizer:
llm_url: str | None = None,
llm_model: str | None = None,
llm_api_key: str | None = None,
tech_level: str = "sysadmin",
) -> str:
"""Return synthesis text (single string, synchronous).
@ -115,6 +142,7 @@ class SummarySynthesizer:
if not llm_url or not llm_model:
return fallback
system_prompt = _SYSTEM_PROMPTS.get(tech_level, _SYSTEM_PROMPTS["sysadmin"])
hypothesis_block = _build_hypothesis_block(ranked)
context_block = _build_context_block(ctx)
dominant = ", ".join(timeline.dominant_sources[:5]) or "none"
@ -131,7 +159,7 @@ class SummarySynthesizer:
)
messages = [
{"role": "system", "content": _SYSTEM_PROMPT},
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
]

View file

@ -0,0 +1,129 @@
# Air-Gapped Deployment Guide
Turnstone can run entirely without internet access. This guide covers pre-downloading
all model weights, configuring offline mode, and verifying that no outbound connections
are made at runtime.
## What requires network access by default
| Component | When | What it downloads |
|-----------|------|------------------|
| Stage 2 ML classifier | First diagnose run (if `TURNSTONE_CLASSIFIER_MODEL` is set) | HuggingFace model weights (~300 MB) |
| Stage 4 sentence-transformers embedder | First diagnose run (if `TURNSTONE_EMBED_BACKEND=sentence_transformers`) | Embedding model (~130 MB) |
| LLM inference | Every diagnose run | Nothing — calls your configured `GPU_SERVER_URL` only |
| Log glean | Every glean run | Nothing — reads local files or SSH sources |
If neither the classifier nor the sentence-transformers embedder is enabled, Turnstone
makes no outbound network calls at runtime (only local SQLite reads/writes and your
configured LLM endpoint).
## Step 1 — Pre-download models (on an internet-connected machine)
Run these commands in the `cf` conda environment before moving to the air-gapped host:
```bash
# Stage 2 ML classifier (only needed if TURNSTONE_CLASSIFIER_MODEL is set)
conda run -n cf python -c "
from transformers import pipeline
pipeline('text-classification', model='byviz/bylastic_classification_logs')
print('classifier cached')
"
# Stage 4 sentence-transformers embedder (only if TURNSTONE_EMBED_BACKEND=sentence_transformers)
conda run -n cf python -c "
from sentence_transformers import SentenceTransformer
SentenceTransformer('BAAI/bge-small-en-v1.5')
print('embedder cached')
"
```
Models are cached to `~/.cache/huggingface/`. Copy that directory to the air-gapped host
at the same path before deployment.
## Step 2 — Pre-ingest your documentation corpus
On the internet-connected machine, or before cutting the network:
```bash
# Write your manifest (see scripts/manifests/example.yaml)
# Then bulk-upload to the context DB:
conda run -n cf python scripts/harvest_docs.py --manifest scripts/manifests/your-site.yaml
```
The context DB (`turnstone-context.db`) is a plain SQLite file — copy it to the
air-gapped host alongside `turnstone.db`.
## Step 3 — Set offline environment variables
Add to your `.env` file (copy from `.env.example`):
```bash
# Block all HuggingFace hub network access
TURNSTONE_OFFLINE_MODE=1
# Point models at the pre-downloaded cache (usually the default)
# HF_HOME=/home/youruser/.cache/huggingface
```
`TURNSTONE_OFFLINE_MODE=1` sets both `HF_HUB_OFFLINE=1` and `TRANSFORMERS_OFFLINE=1`
before any model library loads. If the cache is missing or incomplete, the classifier
falls back to the pattern-tag / regex path and embedding is skipped — diagnose still
works, just without ML-assisted severity or suppression.
## Step 4 — Configure a local LLM endpoint
Turnstone's LLM reasoning calls your `GPU_SERVER_URL`. On an air-gapped host this
must be a local endpoint — either Ollama or a local cf-orch coordinator:
```bash
# Local Ollama
GPU_SERVER_URL=http://localhost:11434
# Local cf-orch coordinator
GPU_SERVER_URL=http://localhost:7700
```
Pull the Ollama model before cutting network access:
```bash
ollama pull llama3.1:8b
```
## Step 5 — Verify no outbound connections at runtime
Start Turnstone and run a diagnose query, then check for unexpected outbound connections:
```bash
# Watch for any connection to HuggingFace, PyPI, or other external hosts
ss -tp | grep python
# or
lsof -i -n -P | grep python | grep ESTABLISHED
```
Expected: only connections to your `GPU_SERVER_URL` and any SSH log sources.
No connections to `huggingface.co`, `cdn-lfs.huggingface.co`, or `pypi.org`.
## Deployment checklist
- [ ] `~/.cache/huggingface/` copied to air-gapped host (if using ML classifier or embedder)
- [ ] `TURNSTONE_OFFLINE_MODE=1` set in `.env`
- [ ] `GPU_SERVER_URL` points to a local inference endpoint
- [ ] Ollama model pulled locally (if using Ollama)
- [ ] Context DB pre-populated with runbooks via `harvest_docs.py`
- [ ] No internet access verified with `ss -tp` during a diagnose run
- [ ] `TURNSTONE_API_KEY` set if the host is accessible over the network (see API auth docs)
## Troubleshooting
**"OSError: We couldn't connect to huggingface.co…"**
The model is not in the local cache. Either download it on a connected machine and copy
`~/.cache/huggingface/`, or unset `TURNSTONE_CLASSIFIER_MODEL` to fall back to the
pattern-based classifier.
**Diagnose still works but no ML severity in pipeline stages**
Expected when running offline without a pre-cached model. Stage 2 falls back to
`pattern_tags` → regex severity detection automatically.
**LLM reasoning missing from diagnose output**
Check that `GPU_SERVER_URL` is reachable from the air-gapped host and that your local
Ollama/vLLM has the configured model pulled.

266
scripts/harvest_docs.py Normal file
View file

@ -0,0 +1,266 @@
#!/usr/bin/env python3
"""harvest_docs.py — Bulk-upload documentation into Turnstone's context RAG.
Reads a YAML manifest that describes which files or directories to upload,
then POSTs each file to the Turnstone /api/context/docs endpoint.
Usage:
# From a manifest file
python harvest_docs.py --manifest manifests/my-cluster.yaml
# Explicit files (no manifest needed)
python harvest_docs.py --base-url http://localhost:8534 file1.md dir/file2.yaml
# Dry run — show what would be uploaded without sending
python harvest_docs.py --manifest manifests/my-cluster.yaml --dry-run
Manifest format (YAML):
base_url: http://localhost:8534 # optional; overridden by --base-url
sources:
- path: /absolute/path/to/file.md
label: friendly-name # optional; overrides filename in DB
- path: /absolute/path/to/dir/
include: ["*.md", "*.yaml"] # glob patterns; default: see INCLUDE_EXTS
exclude: ["CLAUDE*", "SESSION_*", "*_keys*"]
recursive: false # default false
"""
from __future__ import annotations
import argparse
import fnmatch
import sys
import urllib.request
import urllib.error
from pathlib import Path
try:
import yaml
_HAS_YAML = True
except ImportError:
_HAS_YAML = False
# File extensions included when walking a directory with no explicit `include`.
INCLUDE_EXTS = {".md", ".yaml", ".yml", ".txt", ".conf", ".rst"}
# Default exclude patterns applied to every directory source (unless overridden).
DEFAULT_EXCLUDES = [
"CLAUDE*",
"SESSION_*",
"HANDOFF_*",
"*.key",
"*.pem",
"*.crt",
"node_modules",
".git",
"__pycache__",
]
UPLOAD_PATH = "/turnstone/api/context/docs"
# ---------------------------------------------------------------------------
# File collection
# ---------------------------------------------------------------------------
def _matches_any(name: str, patterns: list[str]) -> bool:
return any(fnmatch.fnmatch(name, p) for p in patterns)
def _collect_from_dir(
root: Path,
include: list[str],
exclude: list[str],
recursive: bool,
) -> list[Path]:
pattern = "**/*" if recursive else "*"
candidates: list[Path] = []
for p in root.glob(pattern):
if not p.is_file():
continue
# Exclude any path component that matches an exclude pattern
if any(_matches_any(part, exclude) for part in p.parts):
continue
if include:
if not _matches_any(p.name, include):
continue
else:
if p.suffix.lower() not in INCLUDE_EXTS:
continue
candidates.append(p)
return sorted(candidates)
def resolve_sources(sources: list[dict]) -> list[tuple[Path, str]]:
"""Return list of (path, label) pairs from a manifest sources list."""
results: list[tuple[Path, str]] = []
for entry in sources:
raw_path = entry.get("path", "")
p = Path(raw_path).expanduser().resolve()
label: str = entry.get("label", "")
include: list[str] = entry.get("include", [])
exclude: list[str] = entry.get("exclude", DEFAULT_EXCLUDES)
recursive: bool = entry.get("recursive", False)
if not p.exists():
print(f" [WARN] path not found, skipping: {p}", file=sys.stderr)
continue
if p.is_file():
results.append((p, label or p.name))
elif p.is_dir():
found = _collect_from_dir(p, include, exclude, recursive)
for f in found:
results.append((f, f.name))
else:
print(f" [WARN] not a file or directory, skipping: {p}", file=sys.stderr)
return results
# ---------------------------------------------------------------------------
# Upload
# ---------------------------------------------------------------------------
def _build_multipart(boundary: bytes, filename: str, content: bytes) -> bytes:
"""Build a minimal multipart/form-data body for a single file field."""
lines: list[bytes] = [
b"--" + boundary,
f'Content-Disposition: form-data; name="file"; filename="{filename}"'.encode(),
b"Content-Type: application/octet-stream",
b"",
content,
b"--" + boundary + b"--",
b"",
]
return b"\r\n".join(lines)
def upload_file(base_url: str, path: Path, label: str) -> dict:
"""POST a file to Turnstone's context doc endpoint. Returns response dict."""
url = base_url.rstrip("/") + UPLOAD_PATH
content = path.read_bytes()
filename = label or path.name
boundary = b"----TurnstoneHarvest"
body = _build_multipart(boundary, filename, content)
content_type = f"multipart/form-data; boundary={boundary.decode()}"
req = urllib.request.Request(
url,
data=body,
headers={"Content-Type": content_type},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
import json
return json.loads(resp.read())
except urllib.error.HTTPError as e:
body_text = e.read().decode(errors="replace")
return {"error": f"HTTP {e.code}: {body_text[:200]}"}
except Exception as exc:
return {"error": str(exc)}
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main() -> None:
parser = argparse.ArgumentParser(
description="Bulk-upload docs into Turnstone context RAG.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
parser.add_argument(
"--manifest", "-m",
metavar="FILE",
help="YAML manifest describing sources to upload",
)
parser.add_argument(
"--base-url", "-u",
default="http://localhost:8534",
metavar="URL",
help="Turnstone base URL (default: http://localhost:8534)",
)
parser.add_argument(
"--dry-run", "-n",
action="store_true",
help="Show files that would be uploaded without actually uploading",
)
parser.add_argument(
"files",
nargs="*",
metavar="FILE",
help="Explicit files to upload (alternative to --manifest)",
)
args = parser.parse_args()
base_url = args.base_url
sources: list[tuple[Path, str]] = []
if args.manifest:
if not _HAS_YAML:
print("ERROR: PyYAML is required for --manifest. Run: pip install pyyaml", file=sys.stderr)
sys.exit(1)
manifest_path = Path(args.manifest).expanduser().resolve()
if not manifest_path.exists():
print(f"ERROR: manifest not found: {manifest_path}", file=sys.stderr)
sys.exit(1)
data = yaml.safe_load(manifest_path.read_text())
base_url = args.base_url if args.base_url != "http://localhost:8534" else data.get("base_url", base_url)
sources = resolve_sources(data.get("sources", []))
for raw in args.files:
p = Path(raw).expanduser().resolve()
if not p.exists():
print(f" [WARN] not found, skipping: {p}", file=sys.stderr)
continue
if p.is_file():
sources.append((p, p.name))
else:
print(f" [WARN] {p} is a directory; use a manifest with recursive:true for directory sources", file=sys.stderr)
if not sources:
print("No files to upload. Pass --manifest or explicit file paths.")
sys.exit(0)
print(f"Turnstone: {base_url}")
print(f"Files to upload: {len(sources)}")
if args.dry_run:
print("\n[DRY RUN] Would upload:")
print()
ok = 0
failed = 0
for path, label in sources:
size_kb = path.stat().st_size / 1024
if args.dry_run:
print(f" {label} ({size_kb:.1f} KB) ← {path}")
ok += 1
continue
print(f" Uploading {label} ({size_kb:.1f} KB)…", end=" ", flush=True)
result = upload_file(base_url, path, label)
if "error" in result:
print(f"FAILED — {result['error']}")
failed += 1
else:
chunks = result.get("chunks_written", result.get("chunks_created", "?"))
facts = result.get("facts_written", 0)
extra = f", {facts} facts" if facts else ""
print(f"OK ({chunks} chunks{extra})")
ok += 1
print()
if args.dry_run:
print(f"Dry run complete. {ok} file(s) would be uploaded.")
else:
print(f"Done. {ok} uploaded, {failed} failed.")
if failed:
sys.exit(1)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,38 @@
# Turnstone context doc manifest — example / template
# Run: python scripts/harvest_docs.py --manifest scripts/manifests/example.yaml
#
# Copy this file, adjust paths and patterns for your environment.
# Keep manifests in version control alongside your docs so ingestion config
# is auditable and reproducible.
# Turnstone URL (can be overridden with --base-url on the command line)
base_url: http://localhost:8534
sources:
# ── Single file ────────────────────────────────────────────────────────────
- path: /path/to/runbooks/service-restart.md
label: runbook-service-restart.md # name stored in context DB (optional)
# ── Directory — include specific extensions, exclude sensitive patterns ─────
- path: /path/to/runbooks/
include: ["*.md", "*.yaml"] # only these extensions
exclude: # skip these filename patterns
- "CLAUDE*" # Claude session prompts
- "SESSION_*" # session summaries
- "HANDOFF_*" # handoff notes
- "*.key" # private keys
- "*.pem"
recursive: false # set true to walk subdirectories
# ── Recursive directory walk ───────────────────────────────────────────────
- path: /path/to/docs/
include: ["*.md"]
exclude:
- "CLAUDE*"
- "*.key"
- "node_modules"
- ".git"
recursive: true
# ── Minimal entry (defaults: INCLUDE_EXTS filter, DEFAULT_EXCLUDES applied) -
- path: /path/to/infrastructure.md

View file

@ -0,0 +1,53 @@
# Turnstone context doc manifest — Heimdall home lab cluster
# Run: python scripts/harvest_docs.py --manifest scripts/manifests/heimdall-devops.yaml
#
# Sections:
# infrastructure/ — network topology, machine specs, service ports
# runbooks/ — incident postmortems and operational procedures
# tdarr/ — media transcoding failure modes and recovery
#
# Files intentionally excluded from this manifest:
# - WireGuard .conf files and KEYS.txt (contain private keys)
# - SESSION_* and HANDOFF_* files (Claude session prompts, not operational docs)
# - CLAUDE.md files (Claude context prompts, not operational docs)
# - Raw tdarr scan data (tdarr/data/*.txt — scan output, not prose)
# - projects/helmet-3d, projects/mycroft-precise (unrelated to cluster ops)
# - collapse-stack/ (resilience planning, not daily log triage material)
# - bastion/sdcard-config, bastion/rpi-config (one-time setup artifacts)
base_url: http://localhost:8534
sources:
# ── Service inventory (most immediately useful for log attribution) ────────
- path: /Library/Development/CircuitForge/circuitforge-infra/inventory/services.md
label: service-inventory.md
# ── Infrastructure topology (partially outdated — note added at top of file)
- path: /Library/Development/CircuitForge/circuitforge-infra/infrastructure/docs/INFRASTRUCTURE.md
label: infrastructure-topology.md
- path: /Library/Development/CircuitForge/circuitforge-infra/infrastructure/docs/GPU_CLUSTERING.md
label: gpu-clustering.md
- path: /Library/Development/CircuitForge/circuitforge-infra/infrastructure/ssh_configs/PROXYJUMP_CONFIG.md
label: ssh-proxyjump-config.md
# ── Runbooks ───────────────────────────────────────────────────────────────
- path: /Library/Development/CircuitForge/circuitforge-infra/runbooks/cf-orch-coordinator.md
label: runbook-cf-orch-coordinator.md
- path: /Library/Development/CircuitForge/circuitforge-infra/runbooks/docker-nfs-boot-race-and-image-security.md
label: runbook-docker-nfs-boot-race.md
- path: /Library/Development/CircuitForge/circuitforge-infra/runbooks/PIHOLE_DNS_HANDOFF.md
label: runbook-pihole-dns.md
# ── Media server / Tdarr ───────────────────────────────────────────────────
- path: /Library/Development/devl/Devops/tdarr/docs/TDARR_RECOVERY_README.md
label: tdarr-recovery.md
- path: /Library/Development/devl/Devops/tdarr/docs/NVENC_CORRUPTION_DETECTION.md
label: tdarr-nvenc-corruption.md
- path: /Library/Development/devl/Devops/tdarr/docs/TDARR_ROBUST_WORKFLOW.md
label: tdarr-robust-workflow.md

View file

@ -90,6 +90,7 @@
<div class="flex items-center gap-2 mb-2 text-xs text-accent font-semibold uppercase tracking-wide">
<span aria-hidden="true"></span>
<span>Diagnosis</span>
<span class="ml-auto text-text-dim font-normal normal-case tracking-normal capitalize">{{ techLevel }}</span>
</div>
<p class="text-sm text-text-primary leading-relaxed whitespace-pre-wrap">{{ reasoning }}</p>
</div>
@ -194,6 +195,7 @@ const sourceScope = ref<string | null>(null)
const entries = ref<LogEntry[]>([])
const summary = ref<Summary | null>(null)
const reasoning = ref<string | null>(null)
const techLevel = ref<'homelab' | 'sysadmin' | 'executive'>('sysadmin')
const loading = ref(false)
const statusMsg = ref<string | null>(null)
const error = ref<string | null>(null)
@ -208,7 +210,7 @@ const severityFilter = ref<string | null>(null)
let capturedSince: string | null = null
let capturedUntil: string | null = null
onMounted(() => {
onMounted(async () => {
const s = route.query.source
if (typeof s === 'string' && s.trim()) sourceScope.value = s
const q = route.query.q
@ -218,6 +220,13 @@ onMounted(() => {
} else if (sourceScope.value) {
run()
}
try {
const res = await fetch(`${BASE}/api/settings`)
if (res.ok) {
const prefs = await res.json()
if (prefs.tech_level) techLevel.value = prefs.tech_level
}
} catch { /* non-critical — default stays */ }
})
watch(() => route.query.source, (newS) => {

View file

@ -93,6 +93,35 @@
</div>
</div>
<!-- Diagnosis detail level -->
<div>
<h2 id="tech-level-label" class="text-text-primary text-sm font-semibold mb-1">Diagnosis Detail Level</h2>
<p class="text-text-dim text-xs mb-3">
Controls how the LLM formats its diagnosis affects the level of technical detail and output structure.
</p>
<div role="radiogroup" aria-labelledby="tech-level-label" class="flex flex-col sm:flex-row gap-3">
<button
v-for="(opt, idx) in techLevelOptions"
:key="opt.value"
:ref="(el) => collectTechLevelRef(el, idx)"
role="radio"
:aria-checked="prefs.tech_level === opt.value"
:tabindex="prefs.tech_level === opt.value ? 0 : -1"
@click="setTechLevel(opt.value)"
@keydown="handleTechLevelKey($event, idx)"
:class="[
'flex-1 px-4 py-3 rounded border text-sm transition-colors text-left',
prefs.tech_level === opt.value
? 'border-accent bg-accent/10 text-accent'
: 'border-surface-border text-text-muted hover:text-text-primary hover:border-accent'
]"
>
<div class="font-medium">{{ opt.label }}</div>
<div class="text-xs text-text-dim mt-0.5">{{ opt.desc }}</div>
</button>
</div>
</div>
<!-- Severity overrides -->
<div>
<h2 class="text-text-primary text-sm font-semibold mb-1">Severity Overrides</h2>
@ -284,6 +313,7 @@ interface Prefs {
llm_url: string
llm_model: string
llm_api_key: string
tech_level: 'homelab' | 'sysadmin' | 'executive'
severity_overrides: SeverityOverride[]
pihole_url: string
pihole_version: string
@ -292,7 +322,41 @@ interface Prefs {
device_names: string
}
const prefs = ref<Prefs>({ entry_point_style: 'topbar', llm_url: '', llm_model: '', llm_api_key: '', severity_overrides: [], pihole_url: '', pihole_version: 'v6', pihole_api_key: '', router_source_ids: '', device_names: '' })
const techLevelOptions: { value: 'homelab' | 'sysadmin' | 'executive'; label: string; desc: string }[] = [
{ value: 'homelab', label: 'Homelab', desc: 'Clear explanations — spells out service names and why each action helps' },
{ value: 'sysadmin', label: 'Sysadmin', desc: 'Technical, structured 5-section diagnosis with commands and confidence scores' },
{ value: 'executive', label: 'Executive', desc: 'Plain English: what broke, who was affected, and what action is needed' },
]
const techLevelBtnRefs = ref<HTMLButtonElement[]>([])
function collectTechLevelRef(el: any, idx: number) {
if (el instanceof HTMLButtonElement) techLevelBtnRefs.value[idx] = el
}
function handleTechLevelKey(e: KeyboardEvent, idx: number) {
let next = idx
if (e.key === 'ArrowRight' || e.key === 'ArrowDown') next = idx + 1
else if (e.key === 'ArrowLeft' || e.key === 'ArrowUp') next = idx - 1
else return
e.preventDefault()
const clamped = Math.max(0, Math.min(techLevelOptions.length - 1, next))
setTechLevel(techLevelOptions[clamped]!.value)
const nextBtn = techLevelBtnRefs.value[clamped]
if (nextBtn) nextBtn.focus()
}
async function setTechLevel(level: 'homelab' | 'sysadmin' | 'executive') {
saveStatus.value = null
try {
await patch({ tech_level: level })
saveStatus.value = { ok: true, msg: 'Saved' }
setTimeout(() => { saveStatus.value = null }, 2000)
} catch {
saveStatus.value = { ok: false, msg: 'Save failed — check server connection' }
}
}
const prefs = ref<Prefs>({ entry_point_style: 'topbar', llm_url: '', llm_model: '', llm_api_key: '', tech_level: 'sysadmin', severity_overrides: [], pihole_url: '', pihole_version: 'v6', pihole_api_key: '', router_source_ids: '', device_names: '' })
const saveStatus = ref<{ ok: boolean; msg: string } | null>(null)
const showAddOverride = ref(false)
const showApiKey = ref(false)