turnstone/app/rest.py

"""Turnstone REST API — serves REST API and Vue SPA under the /turnstone prefix.

All routes (API + static files) are mounted at /turnstone so the app works
identically whether accessed directly (http://host:8534/turnstone/) or through
Caddy (menagerie.circuitforge.tech/turnstone) without prefix stripping.
"""
from __future__ import annotations

import asyncio
import dataclasses
import hmac
import json
import logging
import os
import re
import time

# Offline mode: must be set before any HuggingFace library is imported.
# Both flags must agree — HF hub and transformers each check independently.
if os.environ.get("TURNSTONE_OFFLINE_MODE", "").lower() in ("1", "true", "yes"):
    os.environ.setdefault("HF_HUB_OFFLINE", "1")
    os.environ.setdefault("TRANSFORMERS_OFFLINE", "1")
import sqlite3
import tempfile
import urllib.error
import urllib.request
from contextlib import asynccontextmanager
from pathlib import Path
from typing import Annotated

import yaml

from fastapi import APIRouter, BackgroundTasks, Depends, FastAPI, Header, HTTPException, Query, Request, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, RedirectResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel

from app.db import close_pool, ensure_schema, ensure_context_schema, ensure_incidents_schema, migrate_incidents_to_dedicated_db
from app.glean.pipeline import glean_file as _glean_file, glean_ssh_source as _glean_ssh_source
from app.glean.base import load_compiled_patterns, now_iso
from app.glean.tautulli import parse_webhook as _parse_tautulli
from app.glean.wazuh import is_wazuh_alert as _is_wazuh_alert, parse as _parse_wazuh
from app.services.blocklist import (
    BlocklistCandidate,
    get_candidate,
    list_candidates,
    load_telemetry_rules,
    mark_pushed,
    mark_unblocked,
    run_scan,
    update_candidate_status,
)
from app.services.pihole import PiholeClient
from app.services.discover import discover_all, build_sources_yaml, validate_source, scan_log_directories
from app.services.nl_source import interpret as _nl_interpret
from app.services import orchard as _orchard
from app.services import ssh_targets as _ssh_targets
from app.services.incidents import (
    build_bundle,
    create_incident,
    delete_incident,
    get_bundle,
    get_incident,
    get_incident_entries,
    list_bundles,
    list_incidents,
    list_sent_bundles,
    record_sent_bundle,
    store_bundle,
)
from app.services.search import (
    search as _search,
    list_sources as _list_sources,
    recent_source_errors as _source_errors,
    stats_summary as _stats,
    format_results,
    build_fts_index,
)
from app.services.diagnose import diagnose as _diagnose, diagnose_stream as _diagnose_stream
from app.watch.watcher import Watcher, load_watch_config
from app.context.store import (
    add_fact as _add_fact,
    list_facts as _list_facts,
    delete_fact as _delete_fact,
    list_documents as _list_documents,
    delete_document as _delete_document,
)
from app.context.retriever import retrieve_context as _retrieve_context, format_context_block
from app.glean.doc_upload import glean_upload as _glean_upload
from app.context.wizard import get_schema as _wizard_schema, advance_step, is_complete, apply_session
from app.context.chunker import UnsupportedDocType, FileTooLarge
from app.tasks.glean_scheduler import get_state as _glean_state, run_once as _run_glean, scheduler_loop as _scheduler_loop, submit_matched as _submit_matched
from app.tasks.anomaly_scorer import get_state as _scorer_state, run_once as _run_scorer
from app.tasks.cybersec_scorer import get_state as _cybersec_state, run_once as _run_cybersec
from app.services.anomaly import list_detections as _list_detections, acknowledge_detection as _ack_detection
from app.services.cybersec import list_cybersec_detections as _list_cybersec, CYBERSEC_LABELS
from app.glean.mqtt_subscriber import run_mqtt_subscribers as _run_mqtt_subscribers

DB_PATH = Path(os.environ.get("TURNSTONE_DB", Path(__file__).parent.parent / "data" / "turnstone.db"))
# Context KB gets its own file so context fact writes never contend with the
# high-throughput glean scheduler.  Defaults to a sibling file next to the main DB.
CONTEXT_DB_PATH = Path(
    os.environ.get("TURNSTONE_CONTEXT_DB", DB_PATH.parent / "turnstone-context.db")
)
# Incidents get their own file so incident writes never block behind the FTS5
# bulk-insert write lock held by the glean scheduler during log bursts.
INCIDENTS_DB_PATH = Path(
    os.environ.get("TURNSTONE_INCIDENTS_DB", DB_PATH.parent / "turnstone-incidents.db")
)
PREFS_PATH = DB_PATH.parent / "preferences.json"
DIST_DIR = Path(__file__).parent.parent / "web" / "dist"
SOURCE_HOST = os.environ.get("TURNSTONE_SOURCE_HOST", "unknown")
BUNDLE_ENDPOINT = os.environ.get("TURNSTONE_BUNDLE_ENDPOINT", "")
PATTERN_DIR = Path(os.environ.get("TURNSTONE_PATTERNS", Path(__file__).parent.parent / "patterns"))
PATTERN_FILE = PATTERN_DIR / "default.yaml"
GLEAN_INTERVAL = int(os.environ.get("TURNSTONE_GLEAN_INTERVAL", "900"))
SUBMIT_ENDPOINT = os.environ.get("TURNSTONE_SUBMIT_ENDPOINT", "").rstrip("/")
ANOMALY_MODEL = os.environ.get("TURNSTONE_ANOMALY_MODEL", "")
ANOMALY_DEVICE = os.environ.get("TURNSTONE_ANOMALY_DEVICE", "cpu")
ANOMALY_THRESHOLD = float(os.environ.get("TURNSTONE_ANOMALY_THRESHOLD", "0.75"))
CYBERSEC_MODEL = os.environ.get("TURNSTONE_CYBERSEC_MODEL", "")
CYBERSEC_DEVICE = os.environ.get("TURNSTONE_CYBERSEC_DEVICE", "cpu")
CYBERSEC_THRESHOLD = float(os.environ.get("TURNSTONE_CYBERSEC_THRESHOLD", "0.60"))
AUTO_INCIDENT = os.environ.get("TURNSTONE_AUTO_INCIDENT", "true").lower() not in ("0", "false", "no")
# When set, all /api/ routes require Authorization: Bearer <key>.
# Unset (default) means no authentication — suitable for local-only deployments.
_API_KEY: str | None = os.environ.get("TURNSTONE_API_KEY") or None
# Admin key for The Orchard graft/deactivate endpoints on the harvest receiver.
# If unset, the orchard management endpoints return 501.
_ORCHARD_ADMIN_KEY: str | None = os.environ.get("TURNSTONE_ORCHARD_ADMIN_KEY") or None

# GPU inference server URL.
# Priority: GPU_SERVER_URL → CF_ORCH_URL (backward compat) → orch.circuitforge.tech (Paid+).
# Resolved value is written back to CF_ORCH_URL so cf-core callers see it automatically.
GPU_SERVER_URL: str | None = (
    os.environ.get("GPU_SERVER_URL")
    or os.environ.get("CF_ORCH_URL")
    or (
        "https://orch.circuitforge.tech"
        if os.environ.get("CF_LICENSE_KEY")
        else None
    )
)
if GPU_SERVER_URL:
    os.environ["CF_ORCH_URL"] = GPU_SERVER_URL

_watcher = Watcher(DB_PATH, PATTERN_FILE)
_compiled_patterns: list = []
# pattern name → domain; populated at startup from _compiled_patterns.
_pattern_domain: dict[str, str] = {}


@asynccontextmanager
async def _lifespan(app: FastAPI):
    global _compiled_patterns, _pattern_domain
    # Route turnstone.audit through uvicorn's own handler so it appears in api.log.
    _audit_log.setLevel(logging.INFO)
    for h in logging.getLogger("uvicorn.error").handlers:
        _audit_log.addHandler(h)
    ensure_schema(DB_PATH)
    ensure_context_schema(CONTEXT_DB_PATH)
    ensure_incidents_schema(INCIDENTS_DB_PATH)
    migrated = migrate_incidents_to_dedicated_db(DB_PATH, INCIDENTS_DB_PATH)
    if migrated:
        logging.getLogger(__name__).info(
            "Migrated %d incident/bundle rows from main DB to incidents DB", migrated
        )
    _compiled_patterns = load_compiled_patterns(PATTERN_FILE)
    _pattern_domain = {p.name: p.domain for p, _ in _compiled_patterns if p.domain}
    watch_cfg_path = PATTERN_DIR / "watch.yaml"
    configs = load_watch_config(watch_cfg_path)
    if configs:
        _watcher.configure(configs)
        _watcher.start()

    sources_file = PATTERN_DIR / "sources.yaml"
    _scheduler_task: asyncio.Task | None = None
    if GLEAN_INTERVAL > 0 and sources_file.exists():
        _scheduler_task = asyncio.create_task(
            _scheduler_loop(
                sources_file, DB_PATH, PATTERN_FILE, GLEAN_INTERVAL,
                submit_endpoint=SUBMIT_ENDPOINT or None,
                source_host=SOURCE_HOST,
                anomaly_model=ANOMALY_MODEL,
                anomaly_device=ANOMALY_DEVICE,
                anomaly_threshold=ANOMALY_THRESHOLD,
                cybersec_model=CYBERSEC_MODEL,
                cybersec_device=CYBERSEC_DEVICE,
                cybersec_threshold=CYBERSEC_THRESHOLD,
                incidents_db_path=INCIDENTS_DB_PATH,
                auto_incident=AUTO_INCIDENT,
            ),
            name="glean-scheduler",
        )

    _mqtt_task: asyncio.Task | None = None
    if sources_file.exists():
        _mqtt_task = asyncio.create_task(
            _run_mqtt_subscribers(sources_file, DB_PATH),
            name="mqtt-subscribers",
        )

    yield

    _watcher.stop()
    for task in (_scheduler_task, _mqtt_task):
        if task:
            task.cancel()
            try:
                await task
            except asyncio.CancelledError:
                pass
    close_pool()  # no-op if SQLite backend


app = FastAPI(title="Turnstone API", version="0.6.2", docs_url="/turnstone/docs", redoc_url=None, lifespan=_lifespan)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["GET", "POST", "DELETE", "PATCH"],
    allow_headers=["*"],
)

_audit_log = logging.getLogger("turnstone.audit")


@app.middleware("http")
async def _audit_middleware(request: Request, call_next):
    """Log every API request: timestamp, method, path, query (no body, no response data)."""
    t0 = time.monotonic()
    response = await call_next(request)
    if request.url.path.startswith("/turnstone/api"):
        ms = int((time.monotonic() - t0) * 1000)
        qs = f"?{request.url.query}" if request.url.query else ""
        _audit_log.info(
            "%s %s%s %d %dms",
            request.method,
            request.url.path,
            qs,
            response.status_code,
            ms,
        )
    return response


_PREFS_DEFAULTS: dict = {
    "entry_point_style": "topbar",
    "llm_url": GPU_SERVER_URL or "http://localhost:11434",
    "llm_model": "llama3.1:8b",
    "llm_api_key": "",
    "tech_level": "sysadmin",
    "severity_overrides": [
        {
            "name": "PAM auth noise",
            "pattern": r"pam_unix.*auth(?:entication)?\s+fail|auth could not identify",
            "override_severity": "WARN",
            "enabled": True,
        }
    ],
}


def _load_prefs() -> dict[str, str]:
    if PREFS_PATH.exists():
        try:
            saved = json.loads(PREFS_PATH.read_text())
            return {**_PREFS_DEFAULTS, **saved}
        except (json.JSONDecodeError, OSError):
            pass
    return dict(_PREFS_DEFAULTS)


def _save_prefs(data: dict[str, str]) -> None:
    PREFS_PATH.write_text(json.dumps(data))


class DiagnoseRequest(BaseModel):
    query: str
    since: str | None = None
    until: str | None = None
    source: str | None = None


class SourceSuggestRequest(BaseModel):
    query: str


class SeverityOverride(BaseModel):
    name: str
    pattern: str
    override_severity: str
    enabled: bool = True


class SettingsBody(BaseModel):
    entry_point_style: str | None = None
    llm_url: str | None = None
    llm_model: str | None = None
    llm_api_key: str | None = None
    tech_level: str | None = None
    tautulli_token: str | None = None
    severity_overrides: list[SeverityOverride] | None = None
    pihole_url: str | None = None
    pihole_version: str | None = None
    pihole_api_key: str | None = None
    router_source_ids: str | None = None
    device_names: str | None = None


class IncidentCreate(BaseModel):
    label: str
    issue_type: str = ""
    started_at: str | None = None
    ended_at: str | None = None
    notes: str = ""
    severity: str = "medium"


class FactBody(BaseModel):
    category: str
    key: str
    value: str
    source: str | None = None


class WizardStepBody(BaseModel):
    session: dict
    step_id: str
    answer: str | list[str] | None = None


class WizardApplyBody(BaseModel):
    session: dict

# Serve built Vue assets at the path Vite embeds in index.html.
if (DIST_DIR / "assets").exists():
    app.mount("/turnstone/assets", StaticFiles(directory=str(DIST_DIR / "assets")), name="assets")

def _check_api_key(request: Request) -> None:
    """Dependency: enforce bearer token when TURNSTONE_API_KEY is configured.

    /health is always open so monitoring tools work without credentials.
    All other /api/ routes require Authorization: Bearer <key>.
    """
    if _API_KEY is None:
        return
    if request.url.path.rstrip("/") in ("/turnstone/health", "/turnstone"):
        return
    if not request.url.path.startswith("/turnstone/api"):
        return
    auth = request.headers.get("Authorization", "")
    if not auth.startswith("Bearer "):
        raise HTTPException(status_code=401, detail="Missing Authorization: Bearer <token>")
    token = auth[len("Bearer "):]
    if not hmac.compare_digest(token, _API_KEY):
        raise HTTPException(status_code=403, detail="Invalid API key")


def _domain_counts(results: list) -> dict[str, int]:
    """Count hits per domain across a list of SearchResult objects.

    Uses the module-level _pattern_domain lookup built at startup.
    Entries whose matched_patterns contain no known domain are skipped.
    """
    counts: dict[str, int] = {}
    for r in results:
        seen_domains: set[str] = set()
        for tag in (r.matched_patterns or []):
            domain = _pattern_domain.get(tag, "")
            if domain and domain not in seen_domains:
                seen_domains.add(domain)
                counts[domain] = counts.get(domain, 0) + 1
    return counts


# API router — all routes accessible at /turnstone/api/* and /turnstone/health.
router = APIRouter(prefix="/turnstone", dependencies=[Depends(_check_api_key)])


@router.get("/health")
def health() -> dict:
    return {"status": "ok", "db": str(DB_PATH)}


@router.get("/api/search")
def search_logs(
    q: Annotated[str, Query(description="Search query")] = "",
    source: Annotated[str | None, Query(description="Filter by log source ID (partial match)")] = None,
    severity: Annotated[str | None, Query(description="Filter by severity (DEBUG/INFO/WARN/ERROR/CRITICAL)")] = None,
    domain: Annotated[str | None, Query(description="Filter by service health domain (networking, storage, auth, etc.)")] = None,
    since: Annotated[str | None, Query(description="ISO timestamp lower bound")] = None,
    until: Annotated[str | None, Query(description="ISO timestamp upper bound")] = None,
    limit: Annotated[int, Query(ge=1, le=500)] = 50,
    semantic: Annotated[bool, Query(description="Hybrid BM25+vector re-ranking (requires embedding backend)")] = False,
) -> dict:
    if not q:
        return {"count": 0, "results": []}
    results = _search(
        DB_PATH,
        query=q,
        source_filter=source,
        severity=severity,
        since=since,
        until=until,
        limit=limit,
        semantic=semantic,
    )
    if domain:
        results = [r for r in results if domain in {_pattern_domain.get(t, "") for t in r.matched_patterns}]
    return {"count": len(results), "results": [dataclasses.asdict(r) for r in results]}


@router.get("/api/diagnose")
def diagnose(
    q: Annotated[str, Query(description="Service name or problem description")] = "",
    source: Annotated[str | None, Query(description="Limit to a specific source ID (partial match)")] = None,
    since: Annotated[str | None, Query(description="ISO timestamp lower bound")] = None,
    until: Annotated[str | None, Query(description="ISO timestamp upper bound")] = None,
) -> dict:
    if not q:
        return {"count": 0, "results": [], "formatted": ""}

    # Auto-detect source hints: if a query token matches part of a known source_id,
    # use that token as the source_filter so all matching sources (e.g. all
    # rotated plex logs) are included — not just the first matched rotation.
    detected_source = source
    if not detected_source:
        known_sources = [s["source_id"] for s in _list_sources(DB_PATH)]
        q_lower = q.lower()
        for src in known_sources:
            parts = [p for seg in src.split(":") for p in seg.replace("-", " ").replace("_", " ").split()]
            for p in parts:
                if len(p) > 3 and p in q_lower:
                    detected_source = p  # use matched token, not full source_id
                    break
            if detected_source:
                break

    common: dict = dict(source_filter=detected_source, since=since, until=until, include_repeats=False)
    # Broad pass uses OR so any symptom keyword surfaces evidence
    broad = _search(DB_PATH, query=q, limit=15, or_mode=True, **common)
    critical = _search(DB_PATH, query=q, severity="CRITICAL", limit=5, **common)
    errors = _search(DB_PATH, query=q, severity="ERROR", limit=10, **common)

    # When a source was auto-detected, also pull its most recent errors via plain SQL —
    # FTS ranking can bury real errors from the named service if their text doesn't
    # match the symptom keywords. Plain-SQL scan returns actual recent errors regardless.
    source_errors: list = []
    if detected_source and not source and not errors:
        source_errors = _source_errors(
            DB_PATH, source_filter=detected_source, severity="ERROR",
            limit=10, since=since, until=until,
        )
        if not source_errors:
            source_errors = _source_errors(
                DB_PATH, source_filter=detected_source, severity="CRITICAL",
                limit=5, since=since, until=until,
            )

    seen: set[str] = set()
    combined = []
    for r in broad + critical + errors + source_errors:
        if r.entry_id not in seen:
            seen.add(r.entry_id)
            combined.append(r)

    combined.sort(key=lambda r: (r.timestamp_iso or "\xff", r.sequence))
    combined = combined[:20]
    return {
        "count": len(combined),
        "results": [dataclasses.asdict(r) for r in combined],
        "formatted": format_results(combined),
    }


@router.post("/api/diagnose")
def diagnose_post(body: DiagnoseRequest) -> dict:
    if not body.query.strip():
        return {
            "summary": {
                "total": 0, "window_start": None, "window_end": None,
                "time_detected": False, "by_severity": {}, "by_source": {},
                "by_domain": {},
            },
            "entries": [],
        }
    prefs = _load_prefs()
    result = _diagnose(
        DB_PATH,
        query=body.query,
        since=body.since,
        until=body.until,
        source_filter=body.source or None,
        llm_url=prefs.get("llm_url") or None,
        llm_model=prefs.get("llm_model") or None,
        llm_api_key=prefs.get("llm_api_key") or None,
    )
    summary = {**result["summary"], "by_domain": _domain_counts(result["entries"])}
    return {
        "summary": summary,
        "reasoning": result.get("reasoning"),
        "entries": [dataclasses.asdict(r) for r in result["entries"]],
    }


@router.post("/api/diagnose/stream")
async def diagnose_post_stream(body: DiagnoseRequest) -> StreamingResponse:
    prefs = _load_prefs()

    async def sse_gen():
        async for event in _diagnose_stream(
            DB_PATH,
            query=body.query,
            since=body.since,
            until=body.until,
            source_filter=body.source or None,
            llm_url=prefs.get("llm_url") or None,
            llm_model=prefs.get("llm_model") or None,
            llm_api_key=prefs.get("llm_api_key") or None,
            context_db_path=CONTEXT_DB_PATH,
            incidents_db_path=INCIDENTS_DB_PATH,
            tech_level=prefs.get("tech_level", "sysadmin"),
            pattern_domain=_pattern_domain or None,
        ):
            yield f"data: {json.dumps(event)}\n\n"

    return StreamingResponse(
        sse_gen(),
        media_type="text/event-stream",
        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
    )


_SUGGEST_STOPWORDS = frozenset({
    "the", "and", "that", "this", "with", "have", "from", "they",
    "been", "their", "what", "when", "there", "some", "would", "make",
    "like", "into", "time", "look", "just", "know", "take", "year",
    "your", "good", "some", "could", "them", "then", "very", "also",
    "back", "after", "work", "need", "even", "much", "most", "tell",
    "does", "more", "once", "help", "seem", "here", "about", "issue",
    "thing", "logs", "error", "again", "still", "these", "those",
    "getting", "having", "trying", "going", "where", "which", "cant",
    "now", "set", "kind", "weird", "stable", "huge", "real", "nice",
})


@router.post("/api/sources/suggest")
def suggest_sources(body: SourceSuggestRequest) -> dict:
    """Return source IDs ranked by relevance to a natural-language problem description."""
    all_sources = _list_sources(DB_PATH)
    query_tokens = {
        t.lower()
        for t in re.findall(r"[a-zA-Z]+", body.query)
        if len(t) > 2 and t.lower() not in _SUGGEST_STOPWORDS
    }

    suggestions = []
    for src in all_sources:
        src_id: str = src["source_id"]
        # Tokenise source ID: split on colon, dash, underscore, digits
        parts = {
            p.lower()
            for seg in re.split(r"[:\-_\d]+", src_id)
            for p in [seg.strip()]
            if len(p) > 2
        }
        matched = query_tokens & parts
        if matched:
            score = round(len(matched) / max(len(parts), 1), 3)
            suggestions.append({
                "source_id": src_id,
                "score": score,
                "matched_tokens": sorted(matched),
            })

    suggestions.sort(key=lambda x: x["score"], reverse=True)
    return {
        "suggested": suggestions,
        "all_source_ids": [s["source_id"] for s in all_sources],
    }


@router.get("/api/settings")
def get_settings() -> dict:
    return _load_prefs()


@router.patch("/api/settings")
def patch_settings(body: SettingsBody) -> dict:
    prefs = _load_prefs()
    if body.entry_point_style is not None:
        if body.entry_point_style not in ("topbar", "fab"):
            raise HTTPException(status_code=422, detail="entry_point_style must be 'topbar' or 'fab'")
        prefs["entry_point_style"] = body.entry_point_style
    if body.llm_url is not None:
        prefs["llm_url"] = body.llm_url
    if body.llm_model is not None:
        prefs["llm_model"] = body.llm_model
    if body.llm_api_key is not None:
        prefs["llm_api_key"] = body.llm_api_key
    if body.tech_level is not None:
        if body.tech_level not in ("homelab", "sysadmin", "executive"):
            raise HTTPException(status_code=422, detail="tech_level must be 'homelab', 'sysadmin', or 'executive'")
        prefs["tech_level"] = body.tech_level
    if body.tautulli_token is not None:
        prefs["tautulli_token"] = body.tautulli_token
    if body.severity_overrides is not None:
        prefs["severity_overrides"] = [o.model_dump() for o in body.severity_overrides]
    if body.pihole_url is not None:
        prefs["pihole_url"] = body.pihole_url
    if body.pihole_version is not None:
        prefs["pihole_version"] = body.pihole_version
    if body.pihole_api_key is not None:
        prefs["pihole_api_key"] = body.pihole_api_key
    if body.router_source_ids is not None:
        prefs["router_source_ids"] = body.router_source_ids
    if body.device_names is not None:
        prefs["device_names"] = body.device_names
    _save_prefs(prefs)
    return prefs


@router.get("/api/sources")
def list_sources() -> dict:
    return {"sources": _list_sources(DB_PATH)}


@router.get("/api/sources/configured")
def list_configured_sources() -> dict:
    """Return every source in sources.yaml, enriched with DB stats.

    Unlike ``/api/sources`` (which is DB-only), this endpoint reads sources.yaml
    so SSH sources appear even before their first successful glean.  DB entry
    counts, error counts, and timestamps are aggregated and merged in.

    For SSH sources, sub-source IDs (e.g. ``rack01/journald``) are summed to
    produce a single aggregate stat row for the top-level host entry.
    """
    sources_file = PATTERN_DIR / "sources.yaml"
    if not sources_file.exists():
        return {"sources": []}

    with open(sources_file) as f:
        config = yaml.safe_load(f) or {}

    # Fetch all DB source stats once; key by source_id for O(1) lookup.
    db_stats: dict[str, dict] = {}
    try:
        for row in _list_sources(DB_PATH):
            db_stats[row["source_id"]] = row
    except Exception:
        pass  # DB may not exist on first run

    result = []
    for src in config.get("sources", []):
        transport = src.get("transport", "local")
        src_id = src.get("id", "")

        entry: dict = {"id": src_id, "transport": transport}

        if transport != "ssh":
            entry["path"] = src.get("path", "")
            db = db_stats.get(src_id, {})
            entry["entry_count"] = db.get("entry_count", 0)
            entry["error_count"] = db.get("error_count", 0)
            entry["earliest"] = db.get("earliest")
            entry["latest"] = db.get("latest")
        else:
            entry["host"] = src.get("host", "")
            entry["user"] = src.get("user", "")
            glean_items: list[dict] = src.get("glean", [])
            entry["glean_types"] = sorted({item.get("type", "plaintext") for item in glean_items})
            entry["glean_items"] = glean_items

            # Aggregate sub-source DB rows that belong to this SSH host.
            # Sub-sources use IDs like "{host_id}/{type}" or "{host_id}/{type}/{container}".
            prefix = src_id + "/"
            matching_rows = [
                v for k, v in db_stats.items()
                if k.startswith(prefix) or k == src_id
            ]
            entry["entry_count"] = sum(r.get("entry_count", 0) for r in matching_rows)
            entry["error_count"] = sum(r.get("error_count", 0) for r in matching_rows)
            earliests = [r["earliest"] for r in matching_rows if r.get("earliest")]
            latests = [r["latest"] for r in matching_rows if r.get("latest")]
            entry["earliest"] = min(earliests) if earliests else None
            entry["latest"] = max(latests) if latests else None

        result.append(entry)

    return {"sources": result}


@router.delete("/api/sources/{source_id}")
def delete_source(source_id: str) -> dict:
    """Delete all log entries (and FTS index rows) for a given source."""
    conn = sqlite3.connect(str(DB_PATH), timeout=30.0)
    conn.execute("PRAGMA journal_mode=WAL")
    try:
        # Exact match covers ungrouped IDs; LIKE match covers grouped stems
        # (e.g. "muninn-journal:Muninn" deletes all "muninn-journal:Muninn:*" units).
        conn.execute(
            "DELETE FROM log_fts WHERE source_id = ? OR source_id LIKE ? || ':%'",
            (source_id, source_id),
        )
        cur = conn.execute(
            "DELETE FROM log_entries WHERE source_id = ? OR source_id LIKE ? || ':%'",
            (source_id, source_id),
        )
        deleted = cur.rowcount
        conn.commit()
    finally:
        conn.close()
    return {"deleted": deleted, "source_id": source_id}


@router.post("/api/sources/{source_id}/glean")
def reglean_source(
    source_id: str,
    background_tasks: BackgroundTasks,
    force: Annotated[bool, Query(description="Bypass fingerprint check and re-glean even if file is unchanged")] = False,
) -> dict:
    """Trigger a re-glean for a configured source from sources.yaml.

    Handles both local file sources and SSH remote sources.  For SSH sources,
    the glean runs in the foreground and rebuilds the FTS index before returning
    (same behaviour as local sources — callers can rely on the count being final
    when the response arrives).

    Use ``?force=true`` to bypass the fingerprint cache and re-glean the file
    even if mtime and size appear unchanged since the last run.
    """
    sources_file = PATTERN_DIR / "sources.yaml"
    if not sources_file.exists():
        raise HTTPException(status_code=404, detail="sources.yaml not found")
    with open(sources_file) as f:
        config = yaml.safe_load(f) or {}
    matching = [s for s in config.get("sources", []) if s.get("id") == source_id]
    if not matching:
        raise HTTPException(status_code=404, detail=f"Source {source_id!r} not in sources.yaml")

    src = matching[0]

    if src.get("transport") == "ssh":
        # SSH sources: open connection, glean all items, rebuild FTS inline.
        # Fingerprint skipping applies only to local file sources.
        stats = _glean_ssh_source(src, DB_PATH, PATTERN_FILE)
        return {"source_id": source_id, "gleaned": sum(stats.values())}

    # Local file source.
    src_path = Path(src["path"])
    if not src_path.exists():
        raise HTTPException(status_code=422, detail=f"Path does not exist: {src_path}")
    stats = _glean_file(src_path, DB_PATH, PATTERN_FILE, force=force)
    background_tasks.add_task(build_fts_index, DB_PATH)
    return {"source_id": source_id, "gleaned": stats.get(source_id, sum(stats.values()))}


@router.post("/api/glean/upload")
async def glean_upload(
    file: UploadFile,
    source_id: Annotated[str | None, Query(description="Override source ID (defaults to filename)")] = None,
    background_tasks: BackgroundTasks = None,
) -> dict:
    """Accept a multipart log file, auto-detect format, glean into DB."""
    sid = source_id or Path(file.filename or "upload").stem
    content = await file.read()
    with tempfile.NamedTemporaryFile(
        suffix=Path(file.filename or "log.txt").suffix or ".log",
        delete=False,
    ) as tmp:
        tmp.write(content)
        tmp_path = Path(tmp.name)
    try:
        stats = _glean_file(tmp_path, DB_PATH, PATTERN_FILE)
    finally:
        tmp_path.unlink(missing_ok=True)
    if background_tasks is not None:
        background_tasks.add_task(build_fts_index, DB_PATH)
    total = sum(stats.values())
    return {"source_id": sid, "gleaned": total, "stats": stats}


class BatchEntry(BaseModel):
    id: str
    source_id: str
    sequence: int
    timestamp_raw: str | None = None
    timestamp_iso: str | None = None
    ingest_time: str
    severity: str | None = None
    repeat_count: int = 1
    out_of_order: int = 0
    matched_patterns: list[str] = []
    text: str


class BatchGleanRequest(BaseModel):
    source_host: str = "unknown"
    entries: list[BatchEntry]


# ── SSH target manager ─────────────────────────────────────────────────────

class SshTargetCreate(BaseModel):
    label: str
    host: str
    port: int = 22
    user: str
    key_path: str


class SshTargetUpdate(BaseModel):
    label: str | None = None
    host: str | None = None
    port: int | None = None
    user: str | None = None
    key_path: str | None = None


@router.get("/api/ssh-targets")
def list_ssh_targets() -> dict:
    """List all configured SSH targets (never returns key contents)."""
    targets = _ssh_targets.list_targets(DB_PATH)
    return {"targets": [_ssh_targets.target_to_dict(t, include_warning=True) for t in targets]}


@router.post("/api/ssh-targets")
def create_ssh_target(body: SshTargetCreate) -> dict:
    """Create a new SSH target."""
    try:
        target = _ssh_targets.create_target(
            DB_PATH,
            label=body.label,
            host=body.host,
            port=body.port,
            user=body.user,
            key_path=body.key_path,
        )
    except ValueError as exc:
        raise HTTPException(status_code=422, detail=str(exc))
    d = _ssh_targets.target_to_dict(target, include_warning=True)
    return d


@router.patch("/api/ssh-targets/{target_id}")
def update_ssh_target(target_id: str, body: SshTargetUpdate) -> dict:
    """Update an existing SSH target."""
    try:
        target = _ssh_targets.update_target(
            DB_PATH,
            target_id,
            label=body.label,
            host=body.host,
            port=body.port,
            user=body.user,
            key_path=body.key_path,
        )
    except ValueError as exc:
        raise HTTPException(status_code=422, detail=str(exc))
    if target is None:
        raise HTTPException(status_code=404, detail=f"SSH target {target_id!r} not found")
    return _ssh_targets.target_to_dict(target, include_warning=True)


@router.delete("/api/ssh-targets/{target_id}")
def delete_ssh_target(target_id: str) -> dict:
    """Remove an SSH target."""
    if not _ssh_targets.delete_target(DB_PATH, target_id):
        raise HTTPException(status_code=404, detail=f"SSH target {target_id!r} not found")
    return {"deleted": target_id}


@router.post("/api/ssh-targets/{target_id}/test")
def test_ssh_target(target_id: str) -> dict:
    """Test an SSH connection by running a no-op remote command.

    Records the result in the DB so the UI can show a persistent status badge.
    """
    try:
        return _ssh_targets.test_connection(DB_PATH, target_id)
    except KeyError as exc:
        raise HTTPException(status_code=404, detail=str(exc))


# ── Setup / Onboarding wizard ──────────────────────────────────────────────

class SetupWriteBody(BaseModel):
    sources: list[dict]


class NLInterpretBody(BaseModel):
    description: str


@router.get("/api/setup/status")
def setup_status() -> dict:
    """Return whether sources.yaml exists (wizard completion gate)."""
    sources_file = PATTERN_DIR / "sources.yaml"
    return {"configured": sources_file.exists()}


@router.get("/api/setup/discover")
def setup_discover() -> dict:
    """Auto-detect available log sources on this host."""
    return discover_all()


@router.get("/api/setup/scan")
def setup_scan(
    query: str = "",
    dirs: str = "",
    max_results: int = 25,
) -> dict:
    """Scan the filesystem for log files ranked by recency and keyword match.

    Accepts an optional ?query= to weight results toward files matching the
    problem context (e.g. 'nginx 502', 'docker timeout', 'ssh refused').
    Accepts an optional ?dirs= comma-separated list to override default scan
    directories (/var/log, /opt).
    """
    scan_dirs = [d.strip() for d in dirs.split(",") if d.strip()] or None
    candidates = scan_log_directories(
        query=query or None,
        dirs=scan_dirs,
        max_results=min(max_results, 100),
    )
    return {"candidates": candidates, "query": query or None}


@router.post("/api/setup/write")
def setup_write(body: SetupWriteBody, background_tasks: BackgroundTasks) -> dict:
    """Validate and write sources.yaml from a list of selected source definitions.

    Each source is validated before writing.  An existing sources.yaml is
    appended to, not overwritten, so post-setup additions are non-destructive.
    """
    errors = []
    for src in body.sources:
        err = validate_source(src)
        if err:
            errors.append(err)
    if errors:
        raise HTTPException(status_code=422, detail="; ".join(errors))

    sources_file = PATTERN_DIR / "sources.yaml"
    if sources_file.exists():
        # Append to existing file: read current sources, merge, rewrite.
        import yaml as _yaml
        with open(sources_file) as f:
            current = _yaml.safe_load(f) or {}
        existing_ids = {s.get("id") for s in current.get("sources", [])}
        new_sources = [s for s in body.sources if s.get("id") not in existing_ids]
        if not new_sources:
            return {"written": 0, "skipped": len(body.sources), "message": "All sources already configured"}
        all_sources = current.get("sources", []) + new_sources
        content = build_sources_yaml(all_sources)
    else:
        content = build_sources_yaml(body.sources)
        new_sources = body.sources

    PATTERN_DIR.mkdir(parents=True, exist_ok=True)
    sources_file.write_text(content)

    # Trigger a background glean of new sources
    if GLEAN_INTERVAL > 0:
        background_tasks.add_task(
            _glean_file,
            sources_file, DB_PATH, PATTERN_FILE, 1,
        )

    return {"written": len(new_sources), "skipped": len(body.sources) - len(new_sources)}


@router.post("/api/setup/interpret")
def setup_interpret(body: NLInterpretBody) -> dict:
    """Interpret a plain-English source description into a SourceDefinition.

    Uses a keyword lookup first (deterministic, no LLM needed), then falls
    back to the configured LLM.  Returns null on failure so the UI can
    show the manual form — never raises 500.
    """
    prefs = _load_prefs()
    result = _nl_interpret(
        description=body.description,
        llm_url=prefs.get("llm_url") or None,
        llm_model=prefs.get("llm_model") or None,
        api_key=prefs.get("llm_api_key") or None,
    )
    if result is None:
        return {"source": None, "fallback": True}
    err = validate_source(result)
    if err:
        return {"source": None, "fallback": True, "validation_error": err}
    return {"source": result, "fallback": False}


@router.post("/api/glean/batch")
def glean_batch(
    payload: BatchGleanRequest,
    background_tasks: BackgroundTasks,
    authorization: str | None = Header(default=None),
) -> dict:
    """Accept pre-parsed log entries from a remote Turnstone instance (submission protocol).

    Used by nodes with TURNSTONE_SUBMIT_ENDPOINT configured to push their
    pattern-matched entries to a central receiving instance.

    When TURNSTONE_ORCHARD_ADMIN_KEY is set on the receiver, requests must
    include Authorization: Bearer <api_key> where the key was issued at graft time.
    """
    branch_key_env = os.environ.get("TURNSTONE_BRANCH_KEY", "")
    if branch_key_env:
        provided = (authorization or "").removeprefix("Bearer ").strip()
        if not provided or provided != branch_key_env:
            raise HTTPException(status_code=401, detail="Invalid branch API key")
    if not payload.entries:
        return {"gleaned": 0}
    conn = sqlite3.connect(str(DB_PATH), timeout=30.0)
    conn.execute("PRAGMA journal_mode=WAL")
    conn.executemany(
        """
        INSERT OR IGNORE INTO log_entries
          (id, source_id, sequence, timestamp_raw, timestamp_iso,
           ingest_time, severity, repeat_count, out_of_order,
           matched_patterns, text)
        VALUES (?,?,?,?,?,?,?,?,?,?,?)
        """,
        [
            (
                e.id,
                f"{payload.source_host}/{e.source_id}",
                e.sequence,
                e.timestamp_raw,
                e.timestamp_iso,
                e.ingest_time,
                e.severity,
                e.repeat_count,
                e.out_of_order,
                json.dumps(e.matched_patterns),
                e.text,
            )
            for e in payload.entries
        ],
    )
    conn.commit()
    conn.close()
    background_tasks.add_task(build_fts_index, DB_PATH)
    return {"gleaned": len(payload.entries), "source_host": payload.source_host}


def _require_orchard_admin(authorization: str | None) -> None:
    """Raise 401/501 if the Orchard admin key check fails."""
    if _ORCHARD_ADMIN_KEY is None:
        raise HTTPException(status_code=501, detail="Orchard management not enabled on this instance — set TURNSTONE_ORCHARD_ADMIN_KEY")
    provided = (authorization or "").removeprefix("Bearer ").strip()
    if not hmac.compare_digest(_ORCHARD_ADMIN_KEY, provided):
        raise HTTPException(status_code=401, detail="Invalid Orchard admin key")


class GraftRequest(BaseModel):
    slug: str
    contact_email: str
    agreed_to_terms: bool = False


@router.post("/api/orchard/graft")
def orchard_graft(
    body: GraftRequest,
    authorization: str | None = Header(default=None),
) -> dict:
    """Provision a new Orchard branch node.

    Admin-only: requires Authorization: Bearer <TURNSTONE_ORCHARD_ADMIN_KEY>.
    Returns the submit endpoint and a one-time API key.
    """
    _require_orchard_admin(authorization)
    try:
        result = _orchard.graft(body.slug, body.contact_email, body.agreed_to_terms)
    except ValueError as exc:
        raise HTTPException(status_code=422, detail=str(exc))
    except Exception as exc:
        logger.error("Orchard graft failed: %s", exc)
        raise HTTPException(status_code=500, detail=str(exc))
    return result


@router.get("/api/orchard/branches")
def orchard_list_branches(
    authorization: str | None = Header(default=None),
) -> dict:
    """List all Orchard branches. Admin-only."""
    _require_orchard_admin(authorization)
    branches = _orchard.list_branches()
    # Strip api_key_hash from public response
    safe = [{k: v for k, v in b.items() if k != "api_key_hash"} for b in branches]
    return {"branches": safe}


@router.delete("/api/orchard/branches/{slug}")
def orchard_deactivate(
    slug: str,
    authorization: str | None = Header(default=None),
) -> dict:
    """Deactivate a branch: stop its container and remove its Caddy route. Admin-only."""
    _require_orchard_admin(authorization)
    try:
        return _orchard.deactivate(slug)
    except KeyError as exc:
        raise HTTPException(status_code=404, detail=str(exc))
    except Exception as exc:
        logger.error("Orchard deactivate failed: %s", exc)
        raise HTTPException(status_code=500, detail=str(exc))


@router.post("/api/orchard/branches/{slug}/anonymize")
def orchard_anonymize(
    slug: str,
    authorization: str | None = Header(default=None),
) -> dict:
    """Run the anonymization worker over a branch DB. Admin-only."""
    _require_orchard_admin(authorization)
    try:
        return _orchard.run_anonymization(slug)
    except KeyError as exc:
        raise HTTPException(status_code=404, detail=str(exc))
    except Exception as exc:
        logger.error("Orchard anonymize failed: %s", exc)
        raise HTTPException(status_code=500, detail=str(exc))


@router.get("/api/tasks/glean/status")
def glean_task_status() -> dict:
    """Return the current state of the periodic glean scheduler."""
    s = _glean_state()
    return {
        "running": s.running,
        "run_count": s.run_count,
        "last_run_at": s.last_run_at,
        "last_duration_s": s.last_duration_s,
        "last_stats": s.last_stats,
        "last_error": s.last_error,
        "next_run_at": s.next_run_at,
        "interval_s": GLEAN_INTERVAL,
        "scheduler_active": GLEAN_INTERVAL > 0 and (PATTERN_DIR / "sources.yaml").exists(),
        "submit_endpoint": SUBMIT_ENDPOINT or None,
        "last_submitted_at": s.last_submitted_at,
        "last_submit_count": s.last_submit_count,
        "last_submit_error": s.last_submit_error,
    }


@router.post("/api/tasks/glean")
async def trigger_glean(
    force: Annotated[bool, Query(description="Bypass fingerprint check and re-glean all sources")] = False,
) -> dict:
    """Manually trigger a glean of all configured sources. No-ops if already running.

    Use ``?force=true`` to bypass the fingerprint cache and re-glean every local
    file source even when mtime and size are unchanged since the last run.
    """
    sources_file = PATTERN_DIR / "sources.yaml"
    if not sources_file.exists():
        raise HTTPException(status_code=404, detail="sources.yaml not found — configure log sources first")
    return await _run_glean(
        sources_file, DB_PATH, PATTERN_FILE,
        submit_endpoint=SUBMIT_ENDPOINT or None,
        source_host=SOURCE_HOST,
        force=force,
    )


@router.post("/api/glean/wazuh/alert")
async def glean_wazuh_alert(
    alert: dict,
    source_id: Annotated[str | None, Query(description="Source label (defaults to 'wazuh')")] = None,
    background_tasks: BackgroundTasks = None,
) -> dict:
    """Accept a single Wazuh alert JSON object pushed by a Wazuh custom integration.

    Configure in Wazuh: ossec.conf → <integration><name>custom-turnstone</name>
    pointing to a script that POSTs the alert JSON to this endpoint.
    """
    if not _is_wazuh_alert(alert):
        from fastapi import HTTPException
        raise HTTPException(status_code=422, detail="Not a valid Wazuh alert object")

    sid = source_id or "wazuh"
    ingest_time = now_iso()
    compiled = load_compiled_patterns(PATTERN_FILE)
    entries = list(_parse_wazuh(iter([json.dumps(alert)]), sid, compiled, ingest_time))
    if entries:
        conn = sqlite3.connect(str(DB_PATH), timeout=30.0)
        conn.execute("PRAGMA journal_mode=WAL")
        conn.executemany(
            """
            INSERT OR IGNORE INTO log_entries
              (id, source_id, sequence, timestamp_raw, timestamp_iso,
               ingest_time, severity, repeat_count, out_of_order,
               matched_patterns, text)
            VALUES (?,?,?,?,?,?,?,?,?,?,?)
            """,
            [
                (
                    e.entry_id, e.source_id, e.sequence,
                    e.timestamp_raw, e.timestamp_iso, e.ingest_time,
                    e.severity, e.repeat_count, int(e.out_of_order),
                    json.dumps(list(e.matched_patterns)), e.text,
                )
                for e in entries
            ],
        )
        conn.commit()
        conn.close()
    if background_tasks is not None:
        background_tasks.add_task(build_fts_index, DB_PATH)
    return {"ingested": len(entries), "source_id": sid}


@router.get("/api/watch/status")
def watch_status() -> dict:
    return {"active": _watcher.is_active(), "sources": _watcher.status}


@router.post("/api/watch/reload")
def watch_reload() -> dict:
    """Stop all watch sources and restart with current watch.yaml."""
    global _compiled_patterns
    _watcher.stop()
    _compiled_patterns = load_compiled_patterns(PATTERN_FILE)
    watch_cfg_path = PATTERN_DIR / "watch.yaml"
    configs = load_watch_config(watch_cfg_path)
    if configs:
        _watcher.configure(configs)
        _watcher.start()
    return {"reloaded": True, "source_count": len(configs)}


@router.get("/api/stats")
def get_stats(
    window: Annotated[int, Query(ge=1, le=168, description="Hours to look back")] = 24,
) -> dict:
    prefs = _load_prefs()
    return _stats(DB_PATH, window_hours=window, severity_overrides=prefs.get("severity_overrides", []))


@router.post("/api/incidents")
def create_incident_endpoint(body: IncidentCreate) -> dict:
    incident = create_incident(
        INCIDENTS_DB_PATH,
        label=body.label,
        issue_type=body.issue_type,
        started_at=body.started_at,
        ended_at=body.ended_at,
        notes=body.notes,
        severity=body.severity,
    )
    return dataclasses.asdict(incident)


@router.get("/api/incidents")
def list_incidents_endpoint() -> dict:
    return {"incidents": [dataclasses.asdict(i) for i in list_incidents(INCIDENTS_DB_PATH)]}


@router.get("/api/incidents/{incident_id}")
def get_incident_endpoint(incident_id: str) -> dict:
    incident = get_incident(INCIDENTS_DB_PATH, incident_id)
    if not incident:
        raise HTTPException(status_code=404, detail="Incident not found")
    entries = get_incident_entries(DB_PATH, incident)
    return {
        **dataclasses.asdict(incident),
        "entries": [dataclasses.asdict(e) for e in entries],
    }


@router.delete("/api/incidents/{incident_id}")
def delete_incident_endpoint(incident_id: str) -> dict:
    if not delete_incident(INCIDENTS_DB_PATH, incident_id):
        raise HTTPException(status_code=404, detail="Incident not found")
    return {"deleted": incident_id}


@router.get("/api/incidents/{incident_id}/bundle")
def get_incident_bundle(incident_id: str, sanitize: bool = False) -> dict:
    incident = get_incident(INCIDENTS_DB_PATH, incident_id)
    if not incident:
        raise HTTPException(status_code=404, detail="Incident not found")
    bundle = build_bundle(DB_PATH, incident, source_host=SOURCE_HOST, sanitize=sanitize)
    record_sent_bundle(INCIDENTS_DB_PATH, incident_id, bundle, sanitized=sanitize)
    return bundle


@router.get("/api/sent-bundles")
def list_sent_bundles_endpoint() -> dict:
    bundles = list_sent_bundles(INCIDENTS_DB_PATH)
    return {"bundles": [dataclasses.asdict(b) for b in bundles]}


@router.post("/api/incidents/{incident_id}/send")
def send_incident_bundle(incident_id: str, sanitize: bool = False) -> dict:
    if not BUNDLE_ENDPOINT:
        raise HTTPException(status_code=503, detail="TURNSTONE_BUNDLE_ENDPOINT not configured")
    incident = get_incident(INCIDENTS_DB_PATH, incident_id)
    if not incident:
        raise HTTPException(status_code=404, detail="Incident not found")
    bundle = build_bundle(DB_PATH, incident, source_host=SOURCE_HOST, sanitize=sanitize)
    record_sent_bundle(INCIDENTS_DB_PATH, incident_id, bundle, sanitized=sanitize)
    payload = json.dumps(bundle).encode()
    req = urllib.request.Request(
        BUNDLE_ENDPOINT,
        data=payload,
        headers={"Content-Type": "application/json"},
        method="POST",
    )
    try:
        with urllib.request.urlopen(req, timeout=15) as resp:
            return {"sent": True, "status": resp.status, "entry_count": len(bundle["log_entries"])}
    except urllib.error.HTTPError as exc:
        raise HTTPException(status_code=502, detail=f"Receiver returned {exc.code}") from exc
    except OSError as exc:
        raise HTTPException(status_code=502, detail=f"Send failed: {exc}") from exc


@router.post("/api/bundles")
def receive_bundle(bundle: dict) -> dict:
    record = store_bundle(INCIDENTS_DB_PATH, bundle)
    return {"id": record.id, "entry_count": record.entry_count}


@router.get("/api/bundles")
def list_bundles_endpoint() -> dict:
    bundles = list_bundles(INCIDENTS_DB_PATH)
    return {"bundles": [dataclasses.asdict(b) for b in bundles]}


@router.get("/api/bundles/{bundle_id}")
def get_bundle_endpoint(bundle_id: str) -> dict:
    bundle = get_bundle(INCIDENTS_DB_PATH, bundle_id)
    if not bundle:
        raise HTTPException(status_code=404, detail="Bundle not found")
    return dataclasses.asdict(bundle)


def _tautulli_write_entry(conn: sqlite3.Connection, entry) -> None:
    conn.execute(
        """
        INSERT OR IGNORE INTO log_entries
          (id, source_id, sequence, timestamp_raw, timestamp_iso,
           ingest_time, severity, repeat_count, out_of_order,
           matched_patterns, text)
        VALUES (?,?,?,?,?,?,?,?,?,?,?)
        """,
        (
            entry.entry_id, entry.source_id, entry.sequence,
            entry.timestamp_raw, entry.timestamp_iso, entry.ingest_time,
            entry.severity, entry.repeat_count, int(entry.out_of_order),
            json.dumps(list(entry.matched_patterns)), entry.text,
        ),
    )


@router.post("/api/glean/tautulli")
def glean_tautulli(
    payload: dict,
    request: Request,
    background_tasks: BackgroundTasks,
) -> dict:
    """Accept a Tautulli webhook POST and store the event as a log entry."""
    prefs = _load_prefs()
    token = prefs.get("tautulli_token", "")
    if token:
        header_token = request.headers.get("X-Tautulli-Token", "")
        if not hmac.compare_digest(header_token, token):
            raise HTTPException(status_code=403, detail="Invalid Tautulli token")

    if "action" not in payload:
        raise HTTPException(status_code=400, detail="Missing required field: action")

    compiled = _compiled_patterns
    entry = _parse_tautulli(payload, compiled)

    conn = sqlite3.connect(str(DB_PATH), timeout=30.0)
    conn.execute("PRAGMA journal_mode=WAL")
    try:
        _tautulli_write_entry(conn, entry)
        conn.commit()
    finally:
        conn.close()

    background_tasks.add_task(build_fts_index, DB_PATH)
    return {"stored": 1, "entry_id": entry.entry_id, "action": payload.get("action")}


class BlocklistStatusBody(BaseModel):
    status: str


def _make_pihole_client() -> PiholeClient:
    """Build PiholeClient from prefs. Raises 503 if not configured.

    The 503 is raised by catching ValueError from PiholeClient.__post_init__,
    which validates that url and api_key are non-empty. When PiholeClient is
    mocked in tests, __post_init__ is never called and no 503 is raised.
    """
    prefs = _load_prefs()
    url = prefs.get("pihole_url", "")
    key = prefs.get("pihole_api_key", "")
    version = prefs.get("pihole_version", "v6")
    try:
        return PiholeClient(url=url, api_key=key, version=version)
    except ValueError as exc:
        raise HTTPException(
            status_code=503,
            detail="Pi-hole not configured — set pihole_url and pihole_api_key in Settings",
        ) from exc


@router.get("/api/blocklist/candidates")
def list_blocklist_candidates(
    status: Annotated[str | None, Query()] = None,
    device_ip: Annotated[str | None, Query()] = None,
) -> dict:
    candidates = list_candidates(DB_PATH, status=status, device_ip=device_ip)
    return {"candidates": [dataclasses.asdict(c) for c in candidates], "total": len(candidates)}


@router.post("/api/blocklist/scan")
def scan_blocklist(background_tasks: BackgroundTasks) -> dict:
    prefs = _load_prefs()
    source_ids = [s.strip() for s in prefs.get("router_source_ids", "").split(",") if s.strip()]
    device_map: dict[str, str] = {}
    raw_devices = prefs.get("device_names", "")
    if raw_devices:
        try:
            device_map = json.loads(raw_devices)
        except (ValueError, TypeError):
            raise HTTPException(status_code=400, detail="device_names is not valid JSON — update it in Settings")
    telemetry_path = PATTERN_DIR / "telemetry.yaml"
    telemetry_rules = load_telemetry_rules(telemetry_path) if telemetry_path.exists() else []
    background_tasks.add_task(run_scan, DB_PATH, source_ids, device_map, telemetry_rules)
    return {"started": True}


@router.patch("/api/blocklist/candidates/{candidate_id}")
def update_blocklist_status(candidate_id: str, body: BlocklistStatusBody) -> dict:
    try:
        candidate = update_candidate_status(DB_PATH, candidate_id, body.status)
        return dataclasses.asdict(candidate)
    except ValueError as exc:
        raise HTTPException(status_code=400, detail=str(exc))
    except KeyError:
        raise HTTPException(status_code=404, detail="Candidate not found")


@router.post("/api/blocklist/push/{candidate_id}")
def push_to_pihole(candidate_id: str) -> dict:
    try:
        candidate = get_candidate(DB_PATH, candidate_id)
    except KeyError:
        raise HTTPException(status_code=404, detail="Candidate not found")
    if candidate.status != "approved":
        raise HTTPException(
            status_code=400,
            detail=f"Candidate must be approved before pushing (current status: {candidate.status!r})",
        )
    pihole = _make_pihole_client()
    pihole.block(candidate.domain_or_ip)
    mark_pushed(DB_PATH, candidate_id)
    return {"pushed": True, "domain": candidate.domain_or_ip}


@router.delete("/api/blocklist/push/{candidate_id}")
def unblock_from_pihole(candidate_id: str) -> dict:
    try:
        candidate = get_candidate(DB_PATH, candidate_id)
    except KeyError:
        raise HTTPException(status_code=404, detail="Candidate not found")
    if candidate.status != "pushed":
        raise HTTPException(
            status_code=400,
            detail=f"Candidate is not currently pushed (status: {candidate.status!r})",
        )
    pihole = _make_pihole_client()
    pihole.unblock(candidate.domain_or_ip)
    mark_unblocked(DB_PATH, candidate_id)
    return {"unblocked": True, "domain": candidate.domain_or_ip}


@router.post("/api/blocklist/test")
def test_pihole_connection() -> dict:
    pihole = _make_pihole_client()
    return pihole.test_connection()


app.include_router(router)

_ctx = APIRouter(prefix="/turnstone/api/context", dependencies=[Depends(_check_api_key)])


@_ctx.post("/docs")
async def upload_doc(file: UploadFile):
    content = await file.read()
    try:
        result = await asyncio.to_thread(
            lambda: _glean_upload(CONTEXT_DB_PATH, file.filename or "upload", content)
        )
    except UnsupportedDocType as e:
        raise HTTPException(status_code=415, detail=str(e))
    except FileTooLarge as e:
        raise HTTPException(status_code=413, detail=str(e))
    return result


@_ctx.get("/docs")
async def list_docs():
    docs = await asyncio.to_thread(lambda: _list_documents(CONTEXT_DB_PATH))
    return [
        {
            "id": d.id,
            "filename": d.filename,
            "doc_type": d.doc_type,
            "file_size": d.file_size,
            "uploaded_at": d.uploaded_at,
        }
        for d in docs
    ]


@_ctx.delete("/docs/{doc_id}")
async def delete_doc(doc_id: str):
    deleted = await asyncio.to_thread(lambda: _delete_document(CONTEXT_DB_PATH, doc_id))
    if not deleted:
        raise HTTPException(status_code=404, detail="Document not found")
    return {"deleted": doc_id}


@_ctx.post("/facts")
async def create_fact(body: FactBody):
    fact = await asyncio.to_thread(
        lambda: _add_fact(CONTEXT_DB_PATH, body.category, body.key, body.value, body.source)
    )
    return {"id": fact.id, "category": fact.category, "key": fact.key,
            "value": fact.value, "source": fact.source, "created_at": fact.created_at}


@_ctx.get("/facts")
async def list_facts_endpoint(category: str | None = None):
    facts = await asyncio.to_thread(lambda: _list_facts(CONTEXT_DB_PATH, category))
    return [
        {"id": f.id, "category": f.category, "key": f.key,
         "value": f.value, "source": f.source, "created_at": f.created_at}
        for f in facts
    ]


@_ctx.delete("/facts/{fact_id}")
async def delete_fact_endpoint(fact_id: str):
    deleted = await asyncio.to_thread(lambda: _delete_fact(CONTEXT_DB_PATH, fact_id))
    if not deleted:
        raise HTTPException(status_code=404, detail="Fact not found")
    return {"deleted": fact_id}


@_ctx.get("/wizard/schema")
async def wizard_schema():
    return _wizard_schema()


@_ctx.post("/wizard/step")
async def wizard_step(body: WizardStepBody):
    updated = advance_step(body.session, body.step_id, body.answer)
    return {"session": updated, "complete": is_complete(updated)}


@_ctx.post("/wizard/apply")
async def wizard_apply(body: WizardApplyBody):
    if not is_complete(body.session):
        raise HTTPException(status_code=400, detail="Wizard session is not complete")
    result = await asyncio.to_thread(lambda: apply_session(CONTEXT_DB_PATH, body.session))
    return result


@_ctx.get("/debug/search")
async def debug_search(q: str):
    ctx = await asyncio.to_thread(lambda: _retrieve_context(CONTEXT_DB_PATH, q))
    return {"facts": ctx.facts, "chunks": ctx.chunks, "block": format_context_block(ctx)}


app.include_router(_ctx)


# ---------------------------------------------------------------------------
# Anomaly scoring endpoints
# ---------------------------------------------------------------------------

_anomaly = APIRouter(prefix="/turnstone/api/anomaly", dependencies=[Depends(_check_api_key)])


@_anomaly.get("/status")
async def anomaly_status():
    """Return scorer state and configuration."""
    state = _scorer_state()
    return {
        "model": ANOMALY_MODEL or None,
        "threshold": ANOMALY_THRESHOLD,
        "device": ANOMALY_DEVICE,
        "enabled": bool(ANOMALY_MODEL),
        **vars(state),
    }


@_anomaly.post("/run")
async def anomaly_run(background_tasks: BackgroundTasks):
    """Trigger a manual anomaly scoring pass (runs in background)."""
    if not ANOMALY_MODEL:
        raise HTTPException(status_code=400, detail="TURNSTONE_ANOMALY_MODEL not configured")
    background_tasks.add_task(
        _run_scorer, DB_PATH, ANOMALY_MODEL, ANOMALY_DEVICE, 256, ANOMALY_THRESHOLD
    )
    return {"ok": True, "message": "scorer triggered"}


@_anomaly.get("/detections")
async def anomaly_detections(
    limit: int = Query(100, ge=1, le=1000),
    unacked_only: bool = Query(False),
    label: str | None = Query(None),
    scorer: str | None = Query(None),
):
    """List detections ordered by detected_at DESC. Optionally filter by scorer ('anomaly'|'cybersec')."""
    loop = asyncio.get_running_loop()
    rows = await loop.run_in_executor(
        None, lambda: _list_detections(DB_PATH, limit=limit, unacked_only=unacked_only, label=label, scorer=scorer)
    )
    return {"detections": rows, "total": len(rows)}


@_anomaly.post("/detections/{detection_id}/acknowledge")
async def acknowledge_detection(detection_id: str, notes: str = ""):
    """Acknowledge a detection (mark as reviewed)."""
    loop = asyncio.get_running_loop()
    updated = await loop.run_in_executor(
        None, lambda: _ack_detection(DB_PATH, detection_id, notes)
    )
    if not updated:
        raise HTTPException(status_code=404, detail="Detection not found")
    return {"ok": True}


app.include_router(_anomaly)


# ---------------------------------------------------------------------------
# Cybersec scoring endpoints
# ---------------------------------------------------------------------------

_cybersec_router = APIRouter(prefix="/turnstone/api/cybersec", dependencies=[Depends(_check_api_key)])


@_cybersec_router.get("/status")
async def cybersec_status():
    """Return cybersec scorer state and configuration."""
    return {
        "model": CYBERSEC_MODEL or None,
        "threshold": CYBERSEC_THRESHOLD,
        "device": CYBERSEC_DEVICE,
        "enabled": bool(CYBERSEC_MODEL),
        "candidate_labels": CYBERSEC_LABELS,
        **_cybersec_state(),
    }


@_cybersec_router.post("/run")
async def cybersec_run(background_tasks: BackgroundTasks):
    """Trigger a manual cybersec scoring pass (runs in background)."""
    if not CYBERSEC_MODEL:
        raise HTTPException(status_code=400, detail="TURNSTONE_CYBERSEC_MODEL not configured")
    background_tasks.add_task(
        _run_cybersec, DB_PATH, CYBERSEC_MODEL, CYBERSEC_DEVICE, 32, CYBERSEC_THRESHOLD
    )
    return {"ok": True, "message": "cybersec scorer triggered"}


@_cybersec_router.get("/detections")
async def cybersec_detections(
    limit: int = Query(100, ge=1, le=1000),
    unacked_only: bool = Query(False),
    label: str | None = Query(None),
):
    """List cybersec detections ordered by detected_at DESC."""
    loop = asyncio.get_running_loop()
    rows = await loop.run_in_executor(
        None, lambda: _list_cybersec(DB_PATH, limit=limit, unacked_only=unacked_only, label=label)
    )
    return {"detections": rows, "total": len(rows)}


app.include_router(_cybersec_router)


# Root redirect → /turnstone/
@app.get("/")
def root_redirect() -> RedirectResponse:
    return RedirectResponse(url="/turnstone/")


# SPA catch-all — serves index.html for any /turnstone/* path that isn't a
# static asset or API route. Must be registered after include_router.
@app.get("/turnstone/{path:path}")
def spa_fallback(path: str) -> FileResponse:
    if DIST_DIR.exists():
        candidate = DIST_DIR / path
        if candidate.is_file():
            return FileResponse(str(candidate))
        return FileResponse(str(DIST_DIR / "index.html"))
    return FileResponse("/dev/null", status_code=503)