From 16fe5f70a5634c33ef17b649a714e1600f142314 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Tue, 19 May 2026 07:45:58 -0700 Subject: [PATCH] =?UTF-8?q?feat:=20Alpha=20milestone=20=E2=80=94=20corpus?= =?UTF-8?q?=20management,=20upload=20ingest,=20harvester=20agent?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #1 (incident tagging — already implemented), #2, #3, #5. - feat(api): DELETE /api/sources/{id} — purge entries + FTS rows for a source - feat(api): POST /api/sources/{id}/ingest — re-ingest from sources.yaml - feat(api): POST /api/ingest/upload — multipart log file upload with auto-detect - feat(ui): SourcesView reingest + delete buttons and upload file input (#2) - feat(harvester): harvester.py push + incident subcommands (#5) - feat(harvester): Dockerfile, docker-compose.yml, harvester.sh (containerless) - feat(config): GPU_SERVER_URL → CF_ORCH_URL resolution + write-back (#20) - docs: .env.example, README Configuration table, version bump to 0.5.0 --- .env.example | 24 ++++ README.md | 17 ++- app/rest.py | 83 +++++++++++++- harvester/Dockerfile | 18 +++ harvester/docker-compose.yml | 23 ++++ harvester/harvester.py | 201 +++++++++++++++++++++++++++++++++ harvester/harvester.sh | 26 +++++ harvester/sources.example.yaml | 44 ++++++++ web/package.json | 2 +- web/src/views/SourcesView.vue | 167 +++++++++++++++++++++------ 10 files changed, 567 insertions(+), 38 deletions(-) create mode 100644 .env.example create mode 100644 harvester/Dockerfile create mode 100644 harvester/docker-compose.yml create mode 100644 harvester/harvester.py create mode 100755 harvester/harvester.sh create mode 100644 harvester/sources.example.yaml diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..5f9f889 --- /dev/null +++ b/.env.example @@ -0,0 +1,24 @@ +# Turnstone environment variables +# Copy to .env and adjust for your setup. All variables are optional unless noted. + +# --- Database & paths --- +# TURNSTONE_DB=/data/turnstone.db +# TURNSTONE_PATTERNS=/patterns +# TURNSTONE_SOURCE_HOST=my-server + +# --- GPU / LLM inference --- +# GPU_SERVER_URL — URL of your GPU inference server (Ollama, vLLM, or cf-orch coordinator). +# Paid+ users: leave unset to auto-default to https://orch.circuitforge.tech via CF_LICENSE_KEY. +# Local Ollama (default if unset): http://localhost:11434 +# Local cf-orch coordinator: http://10.1.10.71:7700 +# CF_ORCH_URL is also accepted as a backward-compatible alias. +# GPU_SERVER_URL=http://localhost:11434 + +# --- CircuitForge license (Paid+) --- +# Enables cloud GPU inference and premium features. +# When set, GPU_SERVER_URL defaults to https://orch.circuitforge.tech automatically. +# CF_LICENSE_KEY=CFG-TRSN-XXXX-XXXX-XXXX + +# --- Bundle endpoint (optional) --- +# Remote endpoint to push diagnostic bundles for escalation. +# TURNSTONE_BUNDLE_ENDPOINT=https://example.com/api/bundles diff --git a/README.md b/README.md index 5932dd8..db7ef2c 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ > **Diagnostic log intelligence for self-hosted infrastructure.** [![Status](https://img.shields.io/badge/status-beta-blue)](https://git.opensourcesolarpunk.com/Circuit-Forge/turnstone) -[![Version](https://img.shields.io/badge/version-0.4.0-green)](https://git.opensourcesolarpunk.com/Circuit-Forge/turnstone/releases) +[![Version](https://img.shields.io/badge/version-0.5.0-green)](https://git.opensourcesolarpunk.com/Circuit-Forge/turnstone/releases) [![License](https://img.shields.io/badge/license-private-red)](LICENSE) [![Python](https://img.shields.io/badge/python-3.11%2B-blue)](requirements.txt) @@ -145,6 +145,21 @@ bash manage.sh logs # tail API log --- +## Configuration + +Copy `.env.example` to `.env` (or pass as `-e` flags to Docker/Podman). All variables are optional. + +| Variable | Default | Description | +|----------|---------|-------------| +| `GPU_SERVER_URL` | `http://localhost:11434` | GPU inference server (Ollama, vLLM, or cf-orch). `CF_ORCH_URL` is accepted as a backward-compat alias. Paid+ users: leave unset — auto-defaults to `https://orch.circuitforge.tech` when `CF_LICENSE_KEY` is present. | +| `CF_LICENSE_KEY` | — | CircuitForge Paid+ license key. Enables cloud GPU inference and premium features. | +| `TURNSTONE_DB` | `/data/turnstone.db` | Path to the SQLite database. | +| `TURNSTONE_PATTERNS` | `./patterns` | Pattern directory (default.yaml, sources.yaml, watch.yaml). | +| `TURNSTONE_SOURCE_HOST` | `unknown` | Host identifier stamped on ingested entries. | +| `TURNSTONE_BUNDLE_ENDPOINT` | — | Remote URL to push diagnostic bundles for escalation. | + +--- + ## Ports | Service | Port | Notes | diff --git a/app/rest.py b/app/rest.py index c749a22..a60ae95 100644 --- a/app/rest.py +++ b/app/rest.py @@ -12,19 +12,22 @@ import hmac import json import os import sqlite3 +import tempfile import urllib.error import urllib.request from contextlib import asynccontextmanager from pathlib import Path from typing import Annotated +import yaml + from fastapi import APIRouter, BackgroundTasks, FastAPI, HTTPException, Query, Request, UploadFile from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse, RedirectResponse, StreamingResponse from fastapi.staticfiles import StaticFiles from pydantic import BaseModel -from app.ingest.pipeline import ensure_schema +from app.ingest.pipeline import ensure_schema, ingest_file as _ingest_file from app.ingest.base import load_compiled_patterns from app.ingest.tautulli import parse_webhook as _parse_tautulli from app.services.blocklist import ( @@ -79,6 +82,21 @@ BUNDLE_ENDPOINT = os.environ.get("TURNSTONE_BUNDLE_ENDPOINT", "") PATTERN_DIR = Path(os.environ.get("TURNSTONE_PATTERNS", Path(__file__).parent.parent / "patterns")) PATTERN_FILE = PATTERN_DIR / "default.yaml" +# GPU inference server URL. +# Priority: GPU_SERVER_URL → CF_ORCH_URL (backward compat) → orch.circuitforge.tech (Paid+). +# Resolved value is written back to CF_ORCH_URL so cf-core callers see it automatically. +GPU_SERVER_URL: str | None = ( + os.environ.get("GPU_SERVER_URL") + or os.environ.get("CF_ORCH_URL") + or ( + "https://orch.circuitforge.tech" + if os.environ.get("CF_LICENSE_KEY") + else None + ) +) +if GPU_SERVER_URL: + os.environ["CF_ORCH_URL"] = GPU_SERVER_URL + _watcher = Watcher(DB_PATH, PATTERN_FILE) _compiled_patterns: list = [] @@ -97,7 +115,7 @@ async def _lifespan(app: FastAPI): _watcher.stop() -app = FastAPI(title="Turnstone API", version="0.1.0", docs_url="/turnstone/docs", redoc_url=None, lifespan=_lifespan) +app = FastAPI(title="Turnstone API", version="0.5.0", docs_url="/turnstone/docs", redoc_url=None, lifespan=_lifespan) app.add_middleware( CORSMiddleware, @@ -109,7 +127,7 @@ app.add_middleware( _PREFS_DEFAULTS: dict = { "entry_point_style": "topbar", - "llm_url": "http://localhost:11434", + "llm_url": GPU_SERVER_URL or "http://localhost:11434", "llm_model": "llama3.1:8b", "llm_api_key": "", "severity_overrides": [ @@ -382,6 +400,65 @@ def list_sources() -> dict: return {"sources": _list_sources(DB_PATH)} +@router.delete("/api/sources/{source_id}") +def delete_source(source_id: str) -> dict: + """Delete all log entries (and FTS index rows) for a given source.""" + conn = sqlite3.connect(str(DB_PATH)) + conn.execute("PRAGMA journal_mode=WAL") + try: + conn.execute("DELETE FROM log_fts WHERE source_id = ?", (source_id,)) + cur = conn.execute("DELETE FROM log_entries WHERE source_id = ?", (source_id,)) + deleted = cur.rowcount + conn.commit() + finally: + conn.close() + return {"deleted": deleted, "source_id": source_id} + + +@router.post("/api/sources/{source_id}/ingest") +def reingest_source(source_id: str, background_tasks: BackgroundTasks) -> dict: + """Trigger a re-ingest for a configured source from sources.yaml.""" + sources_file = PATTERN_DIR / "sources.yaml" + if not sources_file.exists(): + raise HTTPException(status_code=404, detail="sources.yaml not found") + with open(sources_file) as f: + config = yaml.safe_load(f) or {} + matching = [s for s in config.get("sources", []) if s.get("id") == source_id] + if not matching: + raise HTTPException(status_code=404, detail=f"Source {source_id!r} not in sources.yaml") + src_path = Path(matching[0]["path"]) + if not src_path.exists(): + raise HTTPException(status_code=422, detail=f"Path does not exist: {src_path}") + stats = _ingest_file(src_path, DB_PATH, PATTERN_FILE) + background_tasks.add_task(build_fts_index, DB_PATH) + return {"source_id": source_id, "ingested": stats.get(source_id, sum(stats.values()))} + + +@router.post("/api/ingest/upload") +async def ingest_upload( + file: UploadFile, + source_id: Annotated[str | None, Query(description="Override source ID (defaults to filename)")] = None, + background_tasks: BackgroundTasks = None, +) -> dict: + """Accept a multipart log file, auto-detect format, ingest into DB.""" + sid = source_id or Path(file.filename or "upload").stem + content = await file.read() + with tempfile.NamedTemporaryFile( + suffix=Path(file.filename or "log.txt").suffix or ".log", + delete=False, + ) as tmp: + tmp.write(content) + tmp_path = Path(tmp.name) + try: + stats = _ingest_file(tmp_path, DB_PATH, PATTERN_FILE) + finally: + tmp_path.unlink(missing_ok=True) + if background_tasks is not None: + background_tasks.add_task(build_fts_index, DB_PATH) + total = sum(stats.values()) + return {"source_id": sid, "ingested": total, "stats": stats} + + @router.get("/api/watch/status") def watch_status() -> dict: return {"active": _watcher.is_active(), "sources": _watcher.status} diff --git a/harvester/Dockerfile b/harvester/Dockerfile new file mode 100644 index 0000000..5bd9162 --- /dev/null +++ b/harvester/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.12-slim + +WORKDIR /harvester + +RUN pip install --no-cache-dir pyyaml + +COPY harvester.py . + +# Default volume mounts expected at runtime: +# /var/log → host /var/log (read-only) +# /run/log/journal → host /run/log/journal (read-only) +# /patterns → sources.yaml directory (read-only) + +ENV TURNSTONE_URL=http://turnstone:8534 +ENV TURNSTONE_SOURCES=/patterns/sources.yaml + +ENTRYPOINT ["python", "harvester.py"] +CMD ["push"] diff --git a/harvester/docker-compose.yml b/harvester/docker-compose.yml new file mode 100644 index 0000000..c5253d7 --- /dev/null +++ b/harvester/docker-compose.yml @@ -0,0 +1,23 @@ +services: + harvester: + build: . + image: turnstone-harvester:latest + environment: + TURNSTONE_URL: http://turnstone:8534 # or http://host.docker.internal:8534 for host-network Turnstone + TURNSTONE_SOURCES: /patterns/sources.yaml + volumes: + - /var/log:/var/log:ro + - /run/log/journal:/run/log/journal:ro + - ../patterns:/patterns:ro # sources.yaml lives here + networks: + - turnstone-net + restart: "no" # run on demand; use cron or systemd timer to repeat + + # To run on a schedule, replace restart: "no" with a cron timer via: + # docker run --rm turnstone-harvester:latest push + # or add a systemd timer that calls: + # docker compose -f docker-compose.yml run --rm harvester + +networks: + turnstone-net: + external: true # join the same network as the main Turnstone container diff --git a/harvester/harvester.py b/harvester/harvester.py new file mode 100644 index 0000000..4f8370a --- /dev/null +++ b/harvester/harvester.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 +"""Turnstone Harvester — collect logs and ship them to a Turnstone instance. + +Subcommands: + push Read sources.yaml, POST each log file to Turnstone /api/ingest/upload + incident Tag an incident on the remote Turnstone instance + +Usage: + # Push all configured sources + python harvester.py push --url http://turnstone:8534 --sources /patterns/sources.yaml + + # Tag an incident + python harvester.py incident "jellyseerr went down" \\ + --url http://turnstone:8534 \\ + --started "2026-05-19 10:00" --ended "2026-05-19 10:30" \\ + --type crash --severity HIGH + +Environment variables (override flags): + TURNSTONE_URL Base URL of the Turnstone instance + TURNSTONE_SOURCES Path to sources.yaml +""" +from __future__ import annotations + +import argparse +import json +import logging +import sys +import urllib.error +import urllib.parse +import urllib.request +from pathlib import Path + +import yaml + +logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") +logger = logging.getLogger("harvester") + + +# --------------------------------------------------------------------------- +# HTTP helpers +# --------------------------------------------------------------------------- + +def _post_json(url: str, payload: dict) -> dict: + data = json.dumps(payload).encode() + req = urllib.request.Request( + url, + data=data, + headers={"Content-Type": "application/json"}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read()) + + +def _post_file(url: str, path: Path, source_id: str) -> dict: + """POST a log file as multipart/form-data.""" + boundary = "----TurnstoneHarvesterBoundary" + body_parts: list[bytes] = [] + + content = path.read_bytes() + body_parts.append( + f"--{boundary}\r\n" + f'Content-Disposition: form-data; name="file"; filename="{path.name}"\r\n' + f"Content-Type: text/plain\r\n\r\n".encode() + ) + body_parts.append(content) + body_parts.append(b"\r\n") + body_parts.append(f"--{boundary}--\r\n".encode()) + + body = b"".join(body_parts) + params = urllib.parse.urlencode({"source_id": source_id}) + req = urllib.request.Request( + f"{url}?{params}", + data=body, + headers={"Content-Type": f"multipart/form-data; boundary={boundary}"}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=60) as resp: + return json.loads(resp.read()) + + +# --------------------------------------------------------------------------- +# push subcommand +# --------------------------------------------------------------------------- + +def cmd_push(args: argparse.Namespace) -> int: + sources_path = Path(args.sources) + if not sources_path.exists(): + logger.error("sources file not found: %s", sources_path) + return 1 + + with open(sources_path) as f: + config = yaml.safe_load(f) or {} + + sources = config.get("sources", []) + if not sources: + logger.warning("No sources defined in %s", sources_path) + return 0 + + upload_url = args.url.rstrip("/") + "/turnstone/api/ingest/upload" + total_ingested = 0 + errors = 0 + + for src in sources: + src_id = src.get("id", "unknown") + src_path = Path(src["path"]) + if not src_path.exists(): + logger.warning("Source %r not found, skipping: %s", src_id, src_path) + continue + logger.info("Pushing %s (%s) ...", src_id, src_path) + try: + result = _post_file(upload_url, src_path, src_id) + count = result.get("ingested", 0) + total_ingested += count + logger.info(" %s: %d entries ingested", src_id, count) + except urllib.error.HTTPError as exc: + logger.error(" %s: HTTP %d — %s", src_id, exc.code, exc.read().decode(errors="replace")) + errors += 1 + except Exception as exc: + logger.error(" %s: %s", src_id, exc) + errors += 1 + + logger.info("Done. Total ingested: %d entries, errors: %d", total_ingested, errors) + return 1 if errors else 0 + + +# --------------------------------------------------------------------------- +# incident subcommand +# --------------------------------------------------------------------------- + +def cmd_incident(args: argparse.Namespace) -> int: + payload = { + "label": args.label, + "issue_type": args.type or "", + "started_at": args.started or "", + "ended_at": args.ended or "", + "notes": args.notes or "", + "severity": args.severity or "MEDIUM", + } + url = args.url.rstrip("/") + "/turnstone/api/incidents" + try: + result = _post_json(url, payload) + logger.info("Incident created: %s", result.get("id", result)) + return 0 + except urllib.error.HTTPError as exc: + logger.error("HTTP %d — %s", exc.code, exc.read().decode(errors="replace")) + return 1 + except Exception as exc: + logger.error("%s", exc) + return 1 + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def build_parser() -> argparse.ArgumentParser: + import os + + default_url = os.environ.get("TURNSTONE_URL", "http://localhost:8534") + default_sources = os.environ.get("TURNSTONE_SOURCES", "/patterns/sources.yaml") + + parser = argparse.ArgumentParser( + description="Turnstone Harvester — ship logs and tag incidents", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + sub = parser.add_subparsers(dest="cmd", required=True) + + # push + p_push = sub.add_parser("push", help="Push log files to Turnstone") + p_push.add_argument("--url", default=default_url, help="Turnstone base URL (default: %(default)s)") + p_push.add_argument("--sources", default=default_sources, help="Path to sources.yaml (default: %(default)s)") + + # incident + p_inc = sub.add_parser("incident", help="Tag an incident on the Turnstone instance") + p_inc.add_argument("label", help="Short description of the incident") + p_inc.add_argument("--url", default=default_url, help="Turnstone base URL (default: %(default)s)") + p_inc.add_argument("--started", help="Start time (ISO or natural language)") + p_inc.add_argument("--ended", help="End time (ISO or natural language)") + p_inc.add_argument("--type", dest="type", help="Issue type tag (e.g. crash, oom, auth_fail)") + p_inc.add_argument("--severity", default="MEDIUM", + choices=["LOW", "MEDIUM", "HIGH", "CRITICAL"], + help="Incident severity (default: MEDIUM)") + p_inc.add_argument("--notes", help="Additional notes") + + return parser + + +def main() -> int: + parser = build_parser() + args = parser.parse_args() + if args.cmd == "push": + return cmd_push(args) + if args.cmd == "incident": + return cmd_incident(args) + parser.print_help() + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/harvester/harvester.sh b/harvester/harvester.sh new file mode 100755 index 0000000..f11775f --- /dev/null +++ b/harvester/harvester.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Turnstone Harvester — containerless wrapper +# Requires: python3, pip install pyyaml +# +# Usage: +# ./harvester.sh push +# ./harvester.sh incident "jellyseerr went down" --started "2026-05-19 10:00" --type crash +# +# Environment variables: +# TURNSTONE_URL Base URL of the Turnstone instance (default: http://localhost:8534) +# TURNSTONE_SOURCES Path to sources.yaml (default: /etc/turnstone/sources.yaml) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +export TURNSTONE_URL="${TURNSTONE_URL:-http://localhost:8534}" +export TURNSTONE_SOURCES="${TURNSTONE_SOURCES:-/etc/turnstone/sources.yaml}" + +# Install dependencies if not present +if ! python3 -c "import yaml" 2>/dev/null; then + echo "Installing pyyaml..." + pip3 install --quiet pyyaml +fi + +exec python3 "$SCRIPT_DIR/harvester.py" "$@" diff --git a/harvester/sources.example.yaml b/harvester/sources.example.yaml new file mode 100644 index 0000000..4780e3b --- /dev/null +++ b/harvester/sources.example.yaml @@ -0,0 +1,44 @@ +# Turnstone Harvester — sources.example.yaml +# Copy to sources.yaml and adjust paths for your system. +# The harvester reads this file and POSTs each log file to Turnstone. +# +# Each source needs: +# id: Short identifier (used as source_id in Turnstone) +# path: Absolute path to the log file on the host + +sources: + # System journal (export with: journalctl -o json-pretty > /var/log/journal-export.jsonl) + # - id: system-journal + # path: /var/log/journal-export.jsonl + + # Syslog + - id: syslog + path: /var/log/syslog + + # Docker daemon log + # - id: docker + # path: /var/log/docker.log + + # Podman events (rootful) + # - id: podman + # path: /var/log/podman-events.log + + # Caddy access log + # - id: caddy + # path: /var/log/caddy/access.log + + # Arr stack — adjust container paths to match your setup + # - id: sonarr + # path: /opt/sonarr/config/logs/sonarr.0.txt + # - id: radarr + # path: /opt/radarr/config/logs/radarr.0.txt + # - id: prowlarr + # path: /opt/prowlarr/config/logs/prowlarr.0.txt + + # qBittorrent + # - id: qbittorrent + # path: /opt/qbittorrent/config/data/logs/qbittorrent.log + + # Jellyfin + # - id: jellyfin + # path: /opt/jellyfin/log/jellyfin.log diff --git a/web/package.json b/web/package.json index 450dac2..7567dd9 100644 --- a/web/package.json +++ b/web/package.json @@ -1,7 +1,7 @@ { "name": "turnstone-web", "private": true, - "version": "0.1.0", + "version": "0.5.0", "type": "module", "scripts": { "dev": "vite", diff --git a/web/src/views/SourcesView.vue b/web/src/views/SourcesView.vue index 190eeea..81029ef 100644 --- a/web/src/views/SourcesView.vue +++ b/web/src/views/SourcesView.vue @@ -1,8 +1,20 @@