turnstone/manage.sh
pyr0ball 828b69768a refactor: rename ingest → glean throughout codebase
Renames the app/ingest/ package to app/glean/ and updates all
references across Python modules, shell scripts, Vue components,
tests, and documentation.

Intentionally preserved:
- SQLite column name ingest_time (avoids schema migration)
- RetrievedEntry.ingest_time field (maps to the column above)
- Any public-facing JSON keys that reference ingest_time

Changes by category:
- app/ingest/ → app/glean/ (full package move, all parsers)
- app/tasks/ingest_scheduler.py → app/tasks/glean_scheduler.py
- scripts/ingest_corpus.py → scripts/glean_corpus.py
- tests/test_ingest_*.py → tests/test_glean_*.py
- Docstrings, log messages, comments: ingest → glean
- Env var: TURNSTONE_INGEST_INTERVAL → TURNSTONE_GLEAN_INTERVAL
- Shell scripts: glean.log, glean_corpus.py references
- README.md: multi-source ingest → multi-source glean
- .env.example: updated env var name
- patterns/: new diagnostic patterns from 2026-05-20 SSH incident
  (service_crash_loop, pkg_daemon_restart, ssh_forward_conflict)
- SourcesView.vue: pipeline label updated
- All test import paths updated to app.glean.*

285 tests passing.
2026-05-20 23:02:55 -07:00

347 lines
12 KiB
Bash
Executable file

#!/usr/bin/env bash
# manage.sh — Turnstone diagnostic intelligence layer
# Usage: ./manage.sh <command> [args]
set -euo pipefail
# Only emit color codes when stdout is a real terminal
if [[ -t 1 ]]; then
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m'
else
RED=''; GREEN=''; YELLOW=''; BLUE=''; NC=''
fi
info() { echo -e "${BLUE}[turnstone]${NC} $*"; }
success() { echo -e "${GREEN}[turnstone]${NC} $*"; }
warn() { echo -e "${YELLOW}[turnstone]${NC} $*"; }
error() { echo -e "${RED}[turnstone]${NC} $*" >&2; exit 1; }
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
API_PORT=8534 # FastAPI: serves REST API + built Vue SPA
VITE_PORT=5174 # Vite HMR port in dev mode (proxies /api → 8534)
LOG_DIR="log"
API_PID_FILE=".turnstone-api.pid"
# Default to the live cluster DB when present; fall back to dev DB.
_CLUSTER_DB="/devl/turnstone-cluster/data/turnstone.db"
_DEV_DB="${SCRIPT_DIR}/data/turnstone.db"
if [[ -z "${TURNSTONE_DB:-}" ]]; then
DB="$([[ -d /devl/turnstone-cluster ]] && echo "${_CLUSTER_DB}" || echo "${_DEV_DB}")"
else
DB="${TURNSTONE_DB}"
fi
# Use cluster patterns (watch.yaml, default.yaml) when available.
PATTERN_DIR="${TURNSTONE_PATTERNS:-$([[ -d /devl/turnstone-cluster/patterns ]] && echo "/devl/turnstone-cluster/patterns" || echo "${SCRIPT_DIR}/patterns")}"
CONDA_BASE="${CONDA_BASE:-/devl/miniconda3}"
PYTHON="${CONDA_BASE}/envs/cf/bin/python"
# ── Helpers ───────────────────────────────────────────────────────────────────
_is_alive() {
local pid_file="$1"
[[ -f "$pid_file" ]] && kill -0 "$(<"$pid_file")" 2>/dev/null
}
# Kill any process currently holding a TCP port.
_kill_port() {
local port="$1"
local pids
pids=$(ss -tlnp "sport = :${port}" 2>/dev/null | grep -oP '(?<=pid=)\d+' | sort -u)
[[ -z "$pids" ]] && return 0
for pid in $pids; do
warn "Killing stray PID ${pid} on port ${port}"
kill "$pid" 2>/dev/null || true
done
}
# Wait for a port to stop accepting connections (i.e. fully released).
_wait_for_port_free() {
local port="$1"
for _i in $(seq 1 30); do
sleep 0.3
(echo "" >/dev/tcp/127.0.0.1/"$port") 2>/dev/null || return 0
done
warn "Port ${port} still occupied after 9 s — trying SIGKILL"
_kill_port "$port"
sleep 1
(echo "" >/dev/tcp/127.0.0.1/"$port") 2>/dev/null && warn "Port ${port} still in use!" || true
}
_kill_pid_file() {
local pid_file="$1" label="$2"
if [[ -f "$pid_file" ]]; then
local pid
pid=$(<"$pid_file")
if kill -0 "$pid" 2>/dev/null; then
kill "$pid" && rm -f "$pid_file"
success "$label stopped (PID $pid)."
else
warn "Stale PID file for $label (PID $pid not running). Cleaning up."
rm -f "$pid_file"
fi
else
warn "No PID file for $label."
fi
}
_wait_for_port() {
local port="$1" label="$2" pid_file="$3"
for _i in $(seq 1 20); do
sleep 0.5
(echo "" >/dev/tcp/127.0.0.1/"$port") 2>/dev/null && return 0
if ! _is_alive "$pid_file"; then
rm -f "$pid_file"
error "$label died during startup. Check ${LOG_DIR}/api.log"
fi
done
error "$label did not bind to port $port within 10 s."
}
# ── Usage ─────────────────────────────────────────────────────────────────────
usage() {
echo ""
echo -e " ${BLUE}Turnstone — Diagnostic Log Intelligence${NC}"
echo ""
echo " Usage: ./manage.sh <command> [args]"
echo ""
echo " Production-like (built SPA + uvicorn):"
echo -e " ${GREEN}start${NC} Build Vue SPA, start FastAPI + SPA on :${API_PORT}"
echo -e " ${GREEN}stop${NC} Stop the server"
echo -e " ${GREEN}restart${NC} Stop then start"
echo -e " ${GREEN}status${NC} Show running process"
echo -e " ${GREEN}logs${NC} Tail server log"
echo -e " ${GREEN}open${NC} Open UI in browser"
echo ""
echo " Development (hot-reload):"
echo -e " ${GREEN}dev${NC} uvicorn --reload (:${API_PORT}) + Vite HMR (:${VITE_PORT})"
echo ""
echo " Data:"
echo -e " ${GREEN}glean PATH [DB]${NC} Glean a log file or corpus directory"
echo -e " ${GREEN}glean-plex [HOST]${NC} Pull Plex log from Cass (or HOST) and glean"
echo -e " ${GREEN}glean-qbit [HOST]${NC} Pull qBittorrent log locally or from HOST via SSH"
echo -e " ${GREEN}build-fts${NC} Rebuild the FTS search index"
echo ""
echo " Tests:"
echo -e " ${GREEN}test [args]${NC} Run pytest suite"
echo ""
echo " DB: ${DB}"
echo " Conda env: cf"
echo ""
echo " Examples:"
echo " ./manage.sh start"
echo " ./manage.sh dev"
echo " ./manage.sh glean corpus/raw/"
echo " ./manage.sh glean corpus/raw/ data/custom.db"
echo ""
}
# ── Commands ──────────────────────────────────────────────────────────────────
CMD="${1:-help}"
shift || true
case "$CMD" in
start)
if _is_alive "$API_PID_FILE"; then
warn "Already running (PID $(<"$API_PID_FILE")) — use 'restart' to rebuild."
exit 0
fi
mkdir -p "$LOG_DIR" data
info "Building Vue SPA…"
(cd web && npm run build) 2>&1 | tee "${LOG_DIR}/build.log" | grep -E "built in|error" || true
success "SPA built → web/dist/"
info "Starting on port ${API_PORT}"
info " DB: ${DB}"
info " Patterns: ${PATTERN_DIR}"
TURNSTONE_DB="$DB" TURNSTONE_PATTERNS="$PATTERN_DIR" nohup "$PYTHON" -m uvicorn app.rest:app \
--host 0.0.0.0 --port "$API_PORT" \
>> "${LOG_DIR}/api.log" 2>&1 &
echo $! > "$API_PID_FILE"
_wait_for_port "$API_PORT" "Turnstone" "$API_PID_FILE"
success "Running → http://localhost:${API_PORT} (PID $(<"$API_PID_FILE"))"
;;
stop)
_kill_pid_file "$API_PID_FILE" "Turnstone"
_kill_port "$API_PORT"
_wait_for_port_free "$API_PORT"
;;
restart)
bash "$0" stop
exec bash "$0" start
;;
status)
echo ""
if _is_alive "$API_PID_FILE"; then
success "Turnstone RUNNING PID $(<"$API_PID_FILE") → http://localhost:${API_PORT}"
else
echo -e " Turnstone ${RED}STOPPED${NC}"
fi
echo ""
;;
logs)
tail -f "${LOG_DIR}/api.log"
;;
open)
URL="http://localhost:${API_PORT}"
info "Opening ${URL}"
if command -v xdg-open &>/dev/null; then xdg-open "$URL"
elif command -v open &>/dev/null; then open "$URL"
else echo "$URL"
fi
;;
dev)
DEV_API_PID=".turnstone-dev-api.pid"
mkdir -p "$LOG_DIR" data
if _is_alive "$DEV_API_PID"; then
warn "Dev API already running (PID $(<"$DEV_API_PID"))"
else
info "Starting uvicorn --reload on port ${API_PORT}"
TURNSTONE_DB="$DB" nohup "$PYTHON" -m uvicorn app.rest:app \
--host 0.0.0.0 --port "$API_PORT" --reload \
>> "${LOG_DIR}/api.log" 2>&1 &
echo $! > "$DEV_API_PID"
_wait_for_port "$API_PORT" "FastAPI (dev)" "$DEV_API_PID"
success "API (hot-reload) → http://localhost:${API_PORT}"
fi
_cleanup_dev() {
local pid
pid=$(<"$DEV_API_PID" 2>/dev/null) || true
[[ -n "${pid:-}" ]] && kill "$pid" 2>/dev/null && rm -f "$DEV_API_PID"
info "Dev servers stopped."
}
trap _cleanup_dev EXIT INT TERM
info "Starting Vite HMR on port ${VITE_PORT}"
success "Frontend (HMR) → http://localhost:${VITE_PORT}"
(cd web && npm run dev -- --port "$VITE_PORT")
;;
glean)
if [[ $# -lt 1 ]]; then
error "Usage: ./manage.sh glean <file_or_dir> [DB_PATH]"
fi
info "Gleaning $1${2:-$DB}"
"$PYTHON" scripts/glean_corpus.py "$1" "${2:-$DB}"
;;
glean-plex)
PLEX_HOST="${1:-cass}"
PLEX_LOG_DIR="/var/lib/plexmediaserver/Library/Application Support/Plex Media Server/Logs"
TMP_DIR="/tmp/turnstone-plex-$$"
mkdir -p "$TMP_DIR"
info "Listing Plex logs on ${PLEX_HOST}"
# Get list of all rotated + active Plex logs
mapfile -t REMOTE_LOGS < <(ssh "$PLEX_HOST" \
"ls '${PLEX_LOG_DIR}'/Plex\ Media\ Server*.log 2>/dev/null") \
|| { rm -rf "$TMP_DIR"; error "SSH to ${PLEX_HOST} failed."; }
if [[ ${#REMOTE_LOGS[@]} -eq 0 ]]; then
rm -rf "$TMP_DIR"
error "No Plex logs found on ${PLEX_HOST} at ${PLEX_LOG_DIR}"
fi
for remote_path in "${REMOTE_LOGS[@]}"; do
# Plex Media Server.1.log → cass-plex_media_server.1.log
local_name="${PLEX_HOST}-$(basename "$remote_path" | tr ' ' '_' | tr '[:upper:]' '[:lower:]')"
local_path="${TMP_DIR}/${local_name}"
info "$(basename "$remote_path")"
ssh "$PLEX_HOST" "cat '${remote_path}'" > "$local_path"
done
info "Gleaning ${#REMOTE_LOGS[@]} log file(s) into ${DB}"
for f in "$TMP_DIR"/*.log; do
"$PYTHON" scripts/glean_corpus.py "$f" "$DB"
done
rm -rf "$TMP_DIR"
info "Done. Restarting server…"
exec bash "$0" restart
;;
glean-qbit)
QBIT_HOST="${1:-}"
# Default log locations in priority order
QBIT_LOG_PATHS=(
"$HOME/.local/share/qBittorrent/logs/qbittorrent.log"
"$HOME/.config/qBittorrent/logs/qbittorrent.log"
"/var/log/qbittorrent/qbittorrent.log"
)
TMP_DIR="/tmp/turnstone-qbit-$$"
mkdir -p "$TMP_DIR"
if [[ -n "$QBIT_HOST" ]]; then
info "Fetching qBittorrent log from ${QBIT_HOST}"
REMOTE_LOG=""
for p in "${QBIT_LOG_PATHS[@]}"; do
if ssh "$QBIT_HOST" "test -f '$p'" 2>/dev/null; then
REMOTE_LOG="$p"
break
fi
done
if [[ -z "$REMOTE_LOG" ]]; then
rm -rf "$TMP_DIR"
error "No qBittorrent log found on ${QBIT_HOST}. Tried: ${QBIT_LOG_PATHS[*]}"
fi
local_name="${QBIT_HOST}-qbittorrent.log"
ssh "$QBIT_HOST" "cat '$REMOTE_LOG'" > "${TMP_DIR}/${local_name}"
info "${REMOTE_LOG} (${QBIT_HOST})"
else
LOCAL_LOG=""
for p in "${QBIT_LOG_PATHS[@]}"; do
if [[ -f "$p" ]]; then
LOCAL_LOG="$p"
break
fi
done
if [[ -z "$LOCAL_LOG" ]]; then
rm -rf "$TMP_DIR"
error "No qBittorrent log found locally. Tried: ${QBIT_LOG_PATHS[*]}"
fi
cp "$LOCAL_LOG" "${TMP_DIR}/qbittorrent.log"
info "${LOCAL_LOG}"
fi
info "Gleaning into ${DB}"
"$PYTHON" scripts/glean_corpus.py "${TMP_DIR}"/*.log "$DB"
rm -rf "$TMP_DIR"
info "Done. Restarting server…"
exec bash "$0" restart
;;
build-fts)
info "Rebuilding FTS index for ${DB}"
TURNSTONE_DB="$DB" "$PYTHON" scripts/build_fts_index.py "$DB"
success "FTS index rebuilt."
;;
test)
info "Running test suite…"
PYTEST="${CONDA_BASE}/envs/cf/bin/pytest"
[[ -x "$PYTEST" ]] || error "pytest not found in cf env at ${PYTEST}"
TURNSTONE_DB=":memory:" "$PYTEST" tests/ -v "$@"
;;
help|--help|-h)
usage
;;
*)
error "Unknown command: ${CMD}. Run './manage.sh help' for usage."
;;
esac