feat(orchard): auto-enrollment API for branch node provisioning (#27)
Implements the Orchard branch grafting system for harvest.circuitforge.tech:
- POST /api/orchard/graft: provisions data dir, starts a new
turnstone-submissions-<slug> Docker container on the next free port
(ORCHARD_PORT_BASE=8538+), injects a handle_path block into the
Caddyfile dynamic-branches marker section, restarts caddy-proxy,
returns {submit_endpoint, api_key}
- GET /api/orchard/branches: list active/inactive branches (admin-only)
- DELETE /api/orchard/branches/<slug>: deactivate branch + stop container
- POST /api/orchard/branches/<slug>/anonymize: HMAC-based IP/username
pseudonymization worker over a branch DB
- POST /api/glean/batch: optional TURNSTONE_BRANCH_KEY auth guard
- anonymized column added to log_entries schema (migration-safe)
- Updated Caddyfile with /huginn/* route (port 8536), /node2/* (8537),
and dynamic-branch marker section
- All endpoints admin-gated via TURNSTONE_ORCHARD_ADMIN_KEY
Closes: #27
This commit is contained in:
parent
600e5a9eac
commit
7a2ab0bb46
5 changed files with 444 additions and 3 deletions
16
.env.example
16
.env.example
|
|
@ -86,3 +86,19 @@
|
||||||
# When set, all /api/ requests require: Authorization: Bearer <token>
|
# When set, all /api/ requests require: Authorization: Bearer <token>
|
||||||
# Generate a token: python -c "import secrets; print(secrets.token_urlsafe(32))"
|
# Generate a token: python -c "import secrets; print(secrets.token_urlsafe(32))"
|
||||||
# TURNSTONE_API_KEY=your-secret-token-here
|
# TURNSTONE_API_KEY=your-secret-token-here
|
||||||
|
|
||||||
|
# --- The Orchard (harvest receiver only) ---
|
||||||
|
# Set on the central harvest.circuitforge.tech instance to enable branch management.
|
||||||
|
# TURNSTONE_ORCHARD_ADMIN_KEY=your-admin-secret-here
|
||||||
|
# TURNSTONE_ORCHARD_DATA_ROOT=/devl/docker/turnstone-submissions
|
||||||
|
# TURNSTONE_ORCHARD_CADDYFILE=/devl/caddy-proxy/Caddyfile
|
||||||
|
# TURNSTONE_ORCHARD_CADDY_CONTAINER=caddy-proxy
|
||||||
|
# TURNSTONE_ORCHARD_HARVEST_HOST=https://harvest.circuitforge.tech
|
||||||
|
# TURNSTONE_ORCHARD_PORT_BASE=8538
|
||||||
|
# TURNSTONE_ORCHARD_IMAGE=localhost/turnstone:latest
|
||||||
|
|
||||||
|
# --- Orchard branch (submitting node) ---
|
||||||
|
# Set TURNSTONE_SUBMIT_ENDPOINT to push pattern-matched log entries to the harvest receiver.
|
||||||
|
# Generate your branch slug and API key via: POST /api/orchard/graft on the harvest instance.
|
||||||
|
# TURNSTONE_SUBMIT_ENDPOINT=https://harvest.circuitforge.tech/your-slug
|
||||||
|
# TURNSTONE_BRANCH_KEY=api-key-from-graft-response
|
||||||
|
|
|
||||||
|
|
@ -404,6 +404,7 @@ _MAIN_MIGRATIONS_SQLITE = [
|
||||||
"ALTER TABLE log_entries ADD COLUMN ml_label TEXT",
|
"ALTER TABLE log_entries ADD COLUMN ml_label TEXT",
|
||||||
"ALTER TABLE log_entries ADD COLUMN ml_scored_at TEXT",
|
"ALTER TABLE log_entries ADD COLUMN ml_scored_at TEXT",
|
||||||
"ALTER TABLE detections ADD COLUMN scorer TEXT NOT NULL DEFAULT 'anomaly'",
|
"ALTER TABLE detections ADD COLUMN scorer TEXT NOT NULL DEFAULT 'anomaly'",
|
||||||
|
"ALTER TABLE log_entries ADD COLUMN anonymized INTEGER DEFAULT NULL",
|
||||||
]
|
]
|
||||||
|
|
||||||
_CONTEXT_MIGRATIONS_SQLITE = [
|
_CONTEXT_MIGRATIONS_SQLITE = [
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,8 @@ CREATE TABLE IF NOT EXISTS log_entries (
|
||||||
repeat_count INTEGER DEFAULT 1,
|
repeat_count INTEGER DEFAULT 1,
|
||||||
out_of_order INTEGER DEFAULT 0,
|
out_of_order INTEGER DEFAULT 0,
|
||||||
matched_patterns TEXT DEFAULT '[]',
|
matched_patterns TEXT DEFAULT '[]',
|
||||||
text TEXT NOT NULL
|
text TEXT NOT NULL,
|
||||||
|
anonymized INTEGER DEFAULT NULL
|
||||||
);
|
);
|
||||||
CREATE INDEX IF NOT EXISTS idx_source ON log_entries(source_id);
|
CREATE INDEX IF NOT EXISTS idx_source ON log_entries(source_id);
|
||||||
CREATE INDEX IF NOT EXISTS idx_timestamp ON log_entries(timestamp_iso);
|
CREATE INDEX IF NOT EXISTS idx_timestamp ON log_entries(timestamp_iso);
|
||||||
|
|
|
||||||
100
app/rest.py
100
app/rest.py
|
|
@ -30,7 +30,7 @@ from typing import Annotated
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from fastapi import APIRouter, BackgroundTasks, Depends, FastAPI, HTTPException, Query, Request, UploadFile
|
from fastapi import APIRouter, BackgroundTasks, Depends, FastAPI, Header, HTTPException, Query, Request, UploadFile
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from fastapi.responses import FileResponse, RedirectResponse, StreamingResponse
|
from fastapi.responses import FileResponse, RedirectResponse, StreamingResponse
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
|
@ -54,6 +54,7 @@ from app.services.blocklist import (
|
||||||
from app.services.pihole import PiholeClient
|
from app.services.pihole import PiholeClient
|
||||||
from app.services.discover import discover_all, build_sources_yaml, validate_source, scan_log_directories
|
from app.services.discover import discover_all, build_sources_yaml, validate_source, scan_log_directories
|
||||||
from app.services.nl_source import interpret as _nl_interpret
|
from app.services.nl_source import interpret as _nl_interpret
|
||||||
|
from app.services import orchard as _orchard
|
||||||
from app.services.incidents import (
|
from app.services.incidents import (
|
||||||
build_bundle,
|
build_bundle,
|
||||||
create_incident,
|
create_incident,
|
||||||
|
|
@ -124,6 +125,9 @@ AUTO_INCIDENT = os.environ.get("TURNSTONE_AUTO_INCIDENT", "true").lower() not in
|
||||||
# When set, all /api/ routes require Authorization: Bearer <key>.
|
# When set, all /api/ routes require Authorization: Bearer <key>.
|
||||||
# Unset (default) means no authentication — suitable for local-only deployments.
|
# Unset (default) means no authentication — suitable for local-only deployments.
|
||||||
_API_KEY: str | None = os.environ.get("TURNSTONE_API_KEY") or None
|
_API_KEY: str | None = os.environ.get("TURNSTONE_API_KEY") or None
|
||||||
|
# Admin key for The Orchard graft/deactivate endpoints on the harvest receiver.
|
||||||
|
# If unset, the orchard management endpoints return 501.
|
||||||
|
_ORCHARD_ADMIN_KEY: str | None = os.environ.get("TURNSTONE_ORCHARD_ADMIN_KEY") or None
|
||||||
|
|
||||||
# GPU inference server URL.
|
# GPU inference server URL.
|
||||||
# Priority: GPU_SERVER_URL → CF_ORCH_URL (backward compat) → orch.circuitforge.tech (Paid+).
|
# Priority: GPU_SERVER_URL → CF_ORCH_URL (backward compat) → orch.circuitforge.tech (Paid+).
|
||||||
|
|
@ -910,12 +914,24 @@ def setup_interpret(body: NLInterpretBody) -> dict:
|
||||||
|
|
||||||
|
|
||||||
@router.post("/api/glean/batch")
|
@router.post("/api/glean/batch")
|
||||||
def glean_batch(payload: BatchGleanRequest, background_tasks: BackgroundTasks) -> dict:
|
def glean_batch(
|
||||||
|
payload: BatchGleanRequest,
|
||||||
|
background_tasks: BackgroundTasks,
|
||||||
|
authorization: str | None = Header(default=None),
|
||||||
|
) -> dict:
|
||||||
"""Accept pre-parsed log entries from a remote Turnstone instance (submission protocol).
|
"""Accept pre-parsed log entries from a remote Turnstone instance (submission protocol).
|
||||||
|
|
||||||
Used by nodes with TURNSTONE_SUBMIT_ENDPOINT configured to push their
|
Used by nodes with TURNSTONE_SUBMIT_ENDPOINT configured to push their
|
||||||
pattern-matched entries to a central receiving instance.
|
pattern-matched entries to a central receiving instance.
|
||||||
|
|
||||||
|
When TURNSTONE_ORCHARD_ADMIN_KEY is set on the receiver, requests must
|
||||||
|
include Authorization: Bearer <api_key> where the key was issued at graft time.
|
||||||
"""
|
"""
|
||||||
|
branch_key_env = os.environ.get("TURNSTONE_BRANCH_KEY", "")
|
||||||
|
if branch_key_env:
|
||||||
|
provided = (authorization or "").removeprefix("Bearer ").strip()
|
||||||
|
if not provided or provided != branch_key_env:
|
||||||
|
raise HTTPException(status_code=401, detail="Invalid branch API key")
|
||||||
if not payload.entries:
|
if not payload.entries:
|
||||||
return {"gleaned": 0}
|
return {"gleaned": 0}
|
||||||
conn = sqlite3.connect(str(DB_PATH), timeout=30.0)
|
conn = sqlite3.connect(str(DB_PATH), timeout=30.0)
|
||||||
|
|
@ -951,6 +967,86 @@ def glean_batch(payload: BatchGleanRequest, background_tasks: BackgroundTasks) -
|
||||||
return {"gleaned": len(payload.entries), "source_host": payload.source_host}
|
return {"gleaned": len(payload.entries), "source_host": payload.source_host}
|
||||||
|
|
||||||
|
|
||||||
|
def _require_orchard_admin(authorization: str | None) -> None:
|
||||||
|
"""Raise 401/501 if the Orchard admin key check fails."""
|
||||||
|
if _ORCHARD_ADMIN_KEY is None:
|
||||||
|
raise HTTPException(status_code=501, detail="Orchard management not enabled on this instance — set TURNSTONE_ORCHARD_ADMIN_KEY")
|
||||||
|
provided = (authorization or "").removeprefix("Bearer ").strip()
|
||||||
|
if not hmac.compare_digest(_ORCHARD_ADMIN_KEY, provided):
|
||||||
|
raise HTTPException(status_code=401, detail="Invalid Orchard admin key")
|
||||||
|
|
||||||
|
|
||||||
|
class GraftRequest(BaseModel):
|
||||||
|
slug: str
|
||||||
|
contact_email: str
|
||||||
|
agreed_to_terms: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/api/orchard/graft")
|
||||||
|
def orchard_graft(
|
||||||
|
body: GraftRequest,
|
||||||
|
authorization: str | None = Header(default=None),
|
||||||
|
) -> dict:
|
||||||
|
"""Provision a new Orchard branch node.
|
||||||
|
|
||||||
|
Admin-only: requires Authorization: Bearer <TURNSTONE_ORCHARD_ADMIN_KEY>.
|
||||||
|
Returns the submit endpoint and a one-time API key.
|
||||||
|
"""
|
||||||
|
_require_orchard_admin(authorization)
|
||||||
|
try:
|
||||||
|
result = _orchard.graft(body.slug, body.contact_email, body.agreed_to_terms)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise HTTPException(status_code=422, detail=str(exc))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("Orchard graft failed: %s", exc)
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/api/orchard/branches")
|
||||||
|
def orchard_list_branches(
|
||||||
|
authorization: str | None = Header(default=None),
|
||||||
|
) -> dict:
|
||||||
|
"""List all Orchard branches. Admin-only."""
|
||||||
|
_require_orchard_admin(authorization)
|
||||||
|
branches = _orchard.list_branches()
|
||||||
|
# Strip api_key_hash from public response
|
||||||
|
safe = [{k: v for k, v in b.items() if k != "api_key_hash"} for b in branches]
|
||||||
|
return {"branches": safe}
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete("/api/orchard/branches/{slug}")
|
||||||
|
def orchard_deactivate(
|
||||||
|
slug: str,
|
||||||
|
authorization: str | None = Header(default=None),
|
||||||
|
) -> dict:
|
||||||
|
"""Deactivate a branch: stop its container and remove its Caddy route. Admin-only."""
|
||||||
|
_require_orchard_admin(authorization)
|
||||||
|
try:
|
||||||
|
return _orchard.deactivate(slug)
|
||||||
|
except KeyError as exc:
|
||||||
|
raise HTTPException(status_code=404, detail=str(exc))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("Orchard deactivate failed: %s", exc)
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc))
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/api/orchard/branches/{slug}/anonymize")
|
||||||
|
def orchard_anonymize(
|
||||||
|
slug: str,
|
||||||
|
authorization: str | None = Header(default=None),
|
||||||
|
) -> dict:
|
||||||
|
"""Run the anonymization worker over a branch DB. Admin-only."""
|
||||||
|
_require_orchard_admin(authorization)
|
||||||
|
try:
|
||||||
|
return _orchard.run_anonymization(slug)
|
||||||
|
except KeyError as exc:
|
||||||
|
raise HTTPException(status_code=404, detail=str(exc))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("Orchard anonymize failed: %s", exc)
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc))
|
||||||
|
|
||||||
|
|
||||||
@router.get("/api/tasks/glean/status")
|
@router.get("/api/tasks/glean/status")
|
||||||
def glean_task_status() -> dict:
|
def glean_task_status() -> dict:
|
||||||
"""Return the current state of the periodic glean scheduler."""
|
"""Return the current state of the periodic glean scheduler."""
|
||||||
|
|
|
||||||
327
app/services/orchard.py
Normal file
327
app/services/orchard.py
Normal file
|
|
@ -0,0 +1,327 @@
|
||||||
|
"""The Orchard — auto-enrollment of new Turnstone branch nodes.
|
||||||
|
|
||||||
|
A "branch" is an external Turnstone instance that submits pattern-matched log
|
||||||
|
entries to a central harvest receiver (harvest.circuitforge.tech). Grafting
|
||||||
|
provisions the receiving infrastructure for a new branch:
|
||||||
|
|
||||||
|
1. Creates a data dir at ORCHARD_DATA_ROOT/<slug>/
|
||||||
|
2. Starts a new turnstone-submissions-<slug> Docker container
|
||||||
|
3. Injects a handle_path block into the Caddyfile marker section
|
||||||
|
4. Restarts caddy-proxy to activate the route
|
||||||
|
5. Persists the branch registry to orchard-branches.yaml
|
||||||
|
|
||||||
|
Admin auth: the graft/deactivate endpoints require
|
||||||
|
Authorization: Bearer <TURNSTONE_ORCHARD_ADMIN_KEY>
|
||||||
|
|
||||||
|
Set TURNSTONE_ORCHARD_ADMIN_KEY in the environment on the harvest instance.
|
||||||
|
If unset, the endpoints return 501 Not Implemented (feature is off).
|
||||||
|
|
||||||
|
Anonymization: a separate pass (run_anonymization) replaces IPs, hostnames,
|
||||||
|
and usernames in branch DBs with stable pseudonyms before Avocet reads them.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
|
import ipaddress
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import secrets
|
||||||
|
import sqlite3
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Config (read from env on the harvest instance)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
ORCHARD_DATA_ROOT = Path(os.environ.get("TURNSTONE_ORCHARD_DATA_ROOT", "/devl/docker/turnstone-submissions"))
|
||||||
|
ORCHARD_CADDYFILE = Path(os.environ.get("TURNSTONE_ORCHARD_CADDYFILE", "/devl/caddy-proxy/Caddyfile"))
|
||||||
|
ORCHARD_CADDY_CONTAINER = os.environ.get("TURNSTONE_ORCHARD_CADDY_CONTAINER", "caddy-proxy")
|
||||||
|
ORCHARD_HARVEST_HOST = os.environ.get("TURNSTONE_ORCHARD_HARVEST_HOST", "https://harvest.circuitforge.tech")
|
||||||
|
ORCHARD_IMAGE = os.environ.get("TURNSTONE_ORCHARD_IMAGE", "localhost/turnstone:latest")
|
||||||
|
|
||||||
|
# Ports for submission containers start here and scan upward.
|
||||||
|
ORCHARD_PORT_BASE = int(os.environ.get("TURNSTONE_ORCHARD_PORT_BASE", "8538"))
|
||||||
|
|
||||||
|
_REGISTRY_FILE = ORCHARD_DATA_ROOT / "orchard-branches.yaml"
|
||||||
|
|
||||||
|
_CADDY_BRANCH_START = "# --- ORCHARD BRANCHES: auto-managed by POST /api/orchard/graft, do not edit manually ---"
|
||||||
|
_CADDY_BRANCH_END = "# --- END ORCHARD BRANCHES ---"
|
||||||
|
|
||||||
|
_SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9-]{1,30}[a-z0-9]$")
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Branch registry
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _load_registry() -> list[dict[str, Any]]:
|
||||||
|
if not _REGISTRY_FILE.exists():
|
||||||
|
return []
|
||||||
|
import yaml as _yaml
|
||||||
|
try:
|
||||||
|
data = _yaml.safe_load(_REGISTRY_FILE.read_text()) or {}
|
||||||
|
return data.get("branches", [])
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _save_registry(branches: list[dict[str, Any]]) -> None:
|
||||||
|
import yaml as _yaml
|
||||||
|
_REGISTRY_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
_REGISTRY_FILE.write_text(_yaml.dump({"branches": branches}, default_flow_style=False))
|
||||||
|
|
||||||
|
|
||||||
|
def list_branches() -> list[dict[str, Any]]:
|
||||||
|
return _load_registry()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Port allocation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _next_free_port() -> int:
|
||||||
|
used = {b["port"] for b in _load_registry() if "port" in b}
|
||||||
|
port = ORCHARD_PORT_BASE
|
||||||
|
while port in used:
|
||||||
|
port += 1
|
||||||
|
return port
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Caddy route injection
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _build_branch_block(slug: str, port: int) -> str:
|
||||||
|
return (
|
||||||
|
f" handle_path /{slug}/* {{\n"
|
||||||
|
f" reverse_proxy http://host.docker.internal:{port} {{\n"
|
||||||
|
f" header_up X-Real-IP {{remote_host}}\n"
|
||||||
|
f" header_up X-Forwarded-Proto {{scheme}}\n"
|
||||||
|
f" flush_interval -1\n"
|
||||||
|
f" transport http {{\n"
|
||||||
|
f" response_header_timeout 0\n"
|
||||||
|
f" read_timeout 0\n"
|
||||||
|
f" }}\n"
|
||||||
|
f" }}\n"
|
||||||
|
f" }}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _rewrite_caddy_branches(branches: list[dict[str, Any]]) -> None:
|
||||||
|
"""Replace the auto-managed section in the Caddyfile with current branches."""
|
||||||
|
if not ORCHARD_CADDYFILE.exists():
|
||||||
|
raise RuntimeError(f"Caddyfile not found at {ORCHARD_CADDYFILE}")
|
||||||
|
|
||||||
|
text = ORCHARD_CADDYFILE.read_text()
|
||||||
|
start_idx = text.find(_CADDY_BRANCH_START)
|
||||||
|
end_idx = text.find(_CADDY_BRANCH_END)
|
||||||
|
if start_idx == -1 or end_idx == -1:
|
||||||
|
raise RuntimeError("Caddyfile is missing the ORCHARD BRANCHES marker section")
|
||||||
|
|
||||||
|
active = [b for b in branches if b.get("active", True)]
|
||||||
|
blocks = "\n".join(_build_branch_block(b["slug"], b["port"]) for b in active)
|
||||||
|
replacement = f"{_CADDY_BRANCH_START}\n{blocks}\n {_CADDY_BRANCH_END}"
|
||||||
|
|
||||||
|
new_text = text[:start_idx] + replacement + text[end_idx + len(_CADDY_BRANCH_END):]
|
||||||
|
ORCHARD_CADDYFILE.write_text(new_text)
|
||||||
|
logger.info("Caddyfile updated with %d active branch routes", len(active))
|
||||||
|
|
||||||
|
|
||||||
|
def _reload_caddy() -> None:
|
||||||
|
result = subprocess.run(
|
||||||
|
["docker", "restart", ORCHARD_CADDY_CONTAINER],
|
||||||
|
capture_output=True, text=True, timeout=30,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise RuntimeError(f"docker restart {ORCHARD_CADDY_CONTAINER} failed: {result.stderr}")
|
||||||
|
logger.info("Restarted %s", ORCHARD_CADDY_CONTAINER)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Container provisioning
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _start_branch_container(slug: str, port: int, data_dir: Path) -> None:
|
||||||
|
patterns_dir = data_dir / "patterns"
|
||||||
|
patterns_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
data_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Seed default patterns if not already present
|
||||||
|
repo_patterns = Path(__file__).parent.parent.parent / "patterns"
|
||||||
|
for yaml_file in ("default.yaml", "sources-example.yaml"):
|
||||||
|
src = repo_patterns / yaml_file
|
||||||
|
dst = patterns_dir / yaml_file
|
||||||
|
if src.exists() and not dst.exists():
|
||||||
|
dst.write_text(src.read_text())
|
||||||
|
|
||||||
|
container_name = f"turnstone-submissions-{slug}"
|
||||||
|
cmd = [
|
||||||
|
"docker", "run", "-d",
|
||||||
|
"--name", container_name,
|
||||||
|
"--restart", "unless-stopped",
|
||||||
|
"-p", f"{port}:8534",
|
||||||
|
"-v", f"{data_dir}:/data",
|
||||||
|
"-v", f"{patterns_dir}:/patterns",
|
||||||
|
"-e", f"TURNSTONE_DB=/data/turnstone.db",
|
||||||
|
"-e", f"TURNSTONE_SOURCE_HOST={slug}",
|
||||||
|
"-e", "PYTHONUNBUFFERED=1",
|
||||||
|
"-e", "TZ=America/Los_Angeles",
|
||||||
|
ORCHARD_IMAGE,
|
||||||
|
]
|
||||||
|
# Remove any stale container with the same name first
|
||||||
|
subprocess.run(["docker", "rm", "-f", container_name], capture_output=True)
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise RuntimeError(f"docker run for {container_name} failed: {result.stderr}")
|
||||||
|
logger.info("Started container %s on port %d", container_name, port)
|
||||||
|
|
||||||
|
|
||||||
|
def _stop_branch_container(slug: str) -> None:
|
||||||
|
container_name = f"turnstone-submissions-{slug}"
|
||||||
|
subprocess.run(["docker", "rm", "-f", container_name], capture_output=True, timeout=30)
|
||||||
|
logger.info("Removed container %s", container_name)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Public API
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def graft(slug: str, contact_email: str, agreed_to_terms: bool) -> dict[str, Any]:
|
||||||
|
"""Provision a new Orchard branch and return connection details."""
|
||||||
|
if not agreed_to_terms:
|
||||||
|
raise ValueError("agreed_to_terms must be true")
|
||||||
|
if not _SLUG_RE.match(slug):
|
||||||
|
raise ValueError(
|
||||||
|
f"Invalid slug {slug!r}: must be 2-32 lowercase alphanumeric/hyphen, "
|
||||||
|
"cannot start or end with a hyphen"
|
||||||
|
)
|
||||||
|
|
||||||
|
branches = _load_registry()
|
||||||
|
if any(b["slug"] == slug for b in branches):
|
||||||
|
raise ValueError(f"Branch {slug!r} already exists")
|
||||||
|
|
||||||
|
port = _next_free_port()
|
||||||
|
data_dir = ORCHARD_DATA_ROOT / slug
|
||||||
|
api_key = secrets.token_urlsafe(32)
|
||||||
|
|
||||||
|
branch: dict[str, Any] = {
|
||||||
|
"slug": slug,
|
||||||
|
"port": port,
|
||||||
|
"contact_email": contact_email,
|
||||||
|
"api_key_hash": hashlib.sha256(api_key.encode()).hexdigest(),
|
||||||
|
"grafted_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||||||
|
"active": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
_start_branch_container(slug, port, data_dir)
|
||||||
|
branches.append(branch)
|
||||||
|
_save_registry(branches)
|
||||||
|
|
||||||
|
_rewrite_caddy_branches(branches)
|
||||||
|
_reload_caddy()
|
||||||
|
|
||||||
|
submit_endpoint = f"{ORCHARD_HARVEST_HOST}/{slug}"
|
||||||
|
logger.info("Grafted branch %r at %s", slug, submit_endpoint)
|
||||||
|
return {
|
||||||
|
"slug": slug,
|
||||||
|
"submit_endpoint": submit_endpoint,
|
||||||
|
"api_key": api_key,
|
||||||
|
"port": port,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def deactivate(slug: str) -> dict[str, Any]:
|
||||||
|
"""Deactivate a branch: stop its container and remove its Caddy route."""
|
||||||
|
branches = _load_registry()
|
||||||
|
branch = next((b for b in branches if b["slug"] == slug), None)
|
||||||
|
if branch is None:
|
||||||
|
raise KeyError(f"Branch {slug!r} not found")
|
||||||
|
|
||||||
|
_stop_branch_container(slug)
|
||||||
|
branch["active"] = False
|
||||||
|
_save_registry(branches)
|
||||||
|
_rewrite_caddy_branches(branches)
|
||||||
|
_reload_caddy()
|
||||||
|
return {"slug": slug, "deactivated": True}
|
||||||
|
|
||||||
|
|
||||||
|
def verify_api_key(slug: str, key: str) -> bool:
|
||||||
|
"""Check whether *key* is valid for the given branch slug."""
|
||||||
|
branches = _load_registry()
|
||||||
|
branch = next((b for b in branches if b["slug"] == slug and b.get("active")), None)
|
||||||
|
if branch is None:
|
||||||
|
return False
|
||||||
|
expected = branch.get("api_key_hash", "")
|
||||||
|
provided = hashlib.sha256(key.encode()).hexdigest()
|
||||||
|
return hmac.compare_digest(expected, provided)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Anonymization worker
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_IP_RE = re.compile(
|
||||||
|
r"\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b"
|
||||||
|
)
|
||||||
|
_USERNAME_RE = re.compile(r"\bfor\s+(\w+)\b|\buser\s+(\w+)\b|\bsession\s+opened\s+for\s+(\w+)\b", re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def _pseudonym(value: str, salt: bytes, prefix: str) -> str:
|
||||||
|
digest = hmac.new(salt, value.encode(), "sha256").hexdigest()[:10]
|
||||||
|
return f"{prefix}-{digest}"
|
||||||
|
|
||||||
|
|
||||||
|
def _anonymize_text(text: str, salt: bytes) -> str:
|
||||||
|
def replace_ip(m: re.Match) -> str:
|
||||||
|
return _pseudonym(m.group(), salt, "ip")
|
||||||
|
|
||||||
|
def replace_user(m: re.Match) -> str:
|
||||||
|
user = next(g for g in m.groups() if g)
|
||||||
|
return m.group().replace(user, _pseudonym(user, salt, "user"))
|
||||||
|
|
||||||
|
text = _IP_RE.sub(replace_ip, text)
|
||||||
|
text = _USERNAME_RE.sub(replace_user, text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def run_anonymization(slug: str) -> dict[str, Any]:
|
||||||
|
"""Anonymize IPs and usernames in a branch DB in-place.
|
||||||
|
|
||||||
|
Uses a stable per-branch salt so pseudonyms are consistent across runs
|
||||||
|
but not reversible without the salt.
|
||||||
|
"""
|
||||||
|
branch = next((b for b in _load_registry() if b["slug"] == slug), None)
|
||||||
|
if branch is None:
|
||||||
|
raise KeyError(f"Branch {slug!r} not found")
|
||||||
|
|
||||||
|
db_path = ORCHARD_DATA_ROOT / slug / "turnstone.db"
|
||||||
|
if not db_path.exists():
|
||||||
|
return {"slug": slug, "anonymized": 0}
|
||||||
|
|
||||||
|
# Per-branch salt derived from api_key_hash for stability
|
||||||
|
salt = branch["api_key_hash"].encode()[:32].ljust(32, b"0")
|
||||||
|
|
||||||
|
conn = sqlite3.connect(str(db_path), timeout=30)
|
||||||
|
conn.execute("PRAGMA journal_mode=WAL")
|
||||||
|
rows = conn.execute("SELECT id, text FROM log_entries WHERE anonymized IS NULL OR anonymized = 0").fetchall()
|
||||||
|
|
||||||
|
updated = 0
|
||||||
|
for row_id, text in rows:
|
||||||
|
clean = _anonymize_text(text or "", salt)
|
||||||
|
if clean != text:
|
||||||
|
conn.execute("UPDATE log_entries SET text = ?, anonymized = 1 WHERE id = ?", (clean, row_id))
|
||||||
|
updated += 1
|
||||||
|
else:
|
||||||
|
conn.execute("UPDATE log_entries SET anonymized = 1 WHERE id = ?", (row_id,))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
logger.info("Anonymized %d/%d entries in branch %r", updated, len(rows), slug)
|
||||||
|
return {"slug": slug, "anonymized": updated, "total_processed": len(rows)}
|
||||||
Loading…
Reference in a new issue