Ingest pipeline (journald / Caddy / Docker-wrapped formats) with per-source state tracking (repeat dedup, out-of-order detection), named pattern tagging at ingest time, and idempotent SHA1-keyed writes. FTS5 search layer with porter stemmer, severity/source/pattern/time filters, and BM25 ranking. MCP server (FastMCP stdio) with three tools: search_logs, diagnose, list_log_sources — compatible with both Claude Code and Copilot CLI. WAL mode enabled on all connections. FTS index auto-built after ingest. MCP configs included for Claude Code (.mcp.json) and Copilot CLI (.github/copilot/mcp.json).
33 lines
1.4 KiB
Python
33 lines
1.4 KiB
Python
"""Core data models for Turnstone log retrieval."""
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class RetrievedEntry:
|
|
"""A log entry returned by the retriever, with source metadata and scores."""
|
|
|
|
entry_id: str
|
|
source_id: str # log file path or service name
|
|
sequence: int # original line number — ingest order, not wall-clock order
|
|
timestamp_raw: str | None # timestamp as it appeared in the log
|
|
timestamp_iso: str | None # parsed to ISO 8601 for sorting; None if unparseable
|
|
ingest_time: str # when Turnstone indexed this entry (wall clock)
|
|
severity: str | None # ERROR / WARN / INFO / DEBUG / None if not detected
|
|
repeat_count: int # collapsed duplicate count (1 = unique)
|
|
out_of_order: bool # True when timestamp precedes predecessor's timestamp
|
|
matched_patterns: tuple[str, ...] = field(default_factory=tuple) # named pattern hits
|
|
text: str = ""
|
|
bm25_score: float = 0.0
|
|
vector_score: float | None = None
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class LogPattern:
|
|
"""A named regex pattern for tagging entries at ingest time."""
|
|
|
|
name: str # e.g. "device_disconnect", "auth_failure"
|
|
pattern: str # regex string
|
|
severity: str # suggested severity if not present in log line
|
|
description: str # human-readable explanation for the UI
|