feat: qBittorrent log ingestor with 8 diagnostic patterns
Adds app/ingest/qbittorrent.py — auto-detected by the pipeline on the (YYYY/MM/DD HH:MM:SS) timestamp fingerprint. Handles both slash and dash date separators, optional [Warning|Critical] bracket levels, and multi-line continuations (Qt stack traces). patterns/default.yaml: 8 new qbit_ patterns covering tracker errors, port bind failures, disk errors, hash check failures, peer bans, download completion, ratio limits, and session errors. manage.sh: ingest-qbit [HOST] command mirrors ingest-plex — probes known default log paths locally or via SSH, ingests, restarts server. 14 tests covering format detection, severity mapping, multiline handling, and timestamp normalization.
This commit is contained in:
parent
19d3827e2d
commit
a3c0962277
5 changed files with 286 additions and 1 deletions
|
|
@ -8,7 +8,7 @@ import sqlite3
|
|||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
|
||||
from app.ingest import caddy, docker_log, journald, plaintext, plex
|
||||
from app.ingest import caddy, docker_log, journald, plaintext, plex, qbittorrent
|
||||
from app.ingest.base import _compile, load_patterns, now_iso
|
||||
from app.services.models import LogPattern, RetrievedEntry
|
||||
from app.services.search import build_fts_index
|
||||
|
|
@ -69,6 +69,8 @@ def _detect_format(first_line: str) -> str:
|
|||
pass
|
||||
if plex.is_plex_log(first_line):
|
||||
return "plex"
|
||||
if qbittorrent.is_qbit_log(first_line):
|
||||
return "qbittorrent"
|
||||
return "plaintext"
|
||||
|
||||
|
||||
|
|
@ -101,6 +103,8 @@ def _parse_file(
|
|||
yield from caddy.parse(all_lines(), source_id, compiled, ingest_time)
|
||||
elif fmt == "plex":
|
||||
yield from plex.parse(all_lines(), source_id, compiled, ingest_time)
|
||||
elif fmt == "qbittorrent":
|
||||
yield from qbittorrent.parse(all_lines(), source_id, compiled, ingest_time)
|
||||
else:
|
||||
yield from plaintext.parse(all_lines(), source_id, compiled, ingest_time)
|
||||
|
||||
|
|
|
|||
100
app/ingest/qbittorrent.py
Normal file
100
app/ingest/qbittorrent.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
"""qBittorrent log parser.
|
||||
|
||||
Handles the standard qBittorrent log format:
|
||||
(YYYY/MM/DD HH:MM:SS) [Level] Message text
|
||||
(YYYY/MM/DD HH:MM:SS) Message text (no explicit level)
|
||||
|
||||
The level field is optional — qBittorrent omits it for Normal/Info messages
|
||||
in some versions. Parenthesised timestamp is the format fingerprint.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from typing import Iterator
|
||||
|
||||
from app.ingest.base import (
|
||||
SourceState, apply_patterns, detect_severity, make_entry_id, now_iso,
|
||||
)
|
||||
from app.services.models import LogPattern, RetrievedEntry
|
||||
|
||||
# (2026/05/09 14:23:01) [Warning] Tracker 'http://...' is not working.
|
||||
# (2026/05/09 14:23:01) qBittorrent v5.0.3 started
|
||||
_LINE_RE = re.compile(
|
||||
r"^\((?P<ts>\d{4}[/-]\d{2}[/-]\d{2}\s+\d{2}:\d{2}:\d{2})\)"
|
||||
r"(?:\s+\[(?P<level>[^\]]+)\])?"
|
||||
r"\s+(?P<msg>.*)$"
|
||||
)
|
||||
|
||||
_LEVEL_MAP = {
|
||||
"normal": "INFO",
|
||||
"info": "INFO",
|
||||
"warning": "WARN",
|
||||
"critical": "CRITICAL",
|
||||
}
|
||||
|
||||
|
||||
def _parse_ts(ts_str: str) -> tuple[str, str]:
|
||||
"""Return (raw, iso). Handles both YYYY/MM/DD and YYYY-MM-DD."""
|
||||
normalized = ts_str.replace("/", "-")
|
||||
try:
|
||||
dt = datetime.strptime(normalized, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
|
||||
return ts_str, dt.isoformat()
|
||||
except ValueError:
|
||||
return ts_str, ""
|
||||
|
||||
|
||||
def is_qbit_log(first_line: str) -> bool:
|
||||
return bool(_LINE_RE.match(first_line.strip()))
|
||||
|
||||
|
||||
def parse(
|
||||
lines: Iterator[str],
|
||||
source_id: str,
|
||||
compiled_patterns: list[tuple[LogPattern, object]],
|
||||
ingest_time: str | None = None,
|
||||
) -> Iterator[RetrievedEntry]:
|
||||
ingest_time = ingest_time or now_iso()
|
||||
state = SourceState()
|
||||
pending_text: str | None = None
|
||||
pending_meta: dict = {}
|
||||
|
||||
def _emit(text: str, meta: dict) -> RetrievedEntry:
|
||||
repeat, out_of_order = state.observe(text, meta.get("ts_iso"))
|
||||
matched = apply_patterns(text, compiled_patterns)
|
||||
return RetrievedEntry(
|
||||
entry_id=make_entry_id(source_id, state.sequence, text),
|
||||
source_id=source_id,
|
||||
sequence=state.sequence,
|
||||
timestamp_raw=meta.get("ts_raw", ""),
|
||||
timestamp_iso=meta.get("ts_iso", ""),
|
||||
ingest_time=ingest_time,
|
||||
severity=meta.get("severity"),
|
||||
repeat_count=repeat,
|
||||
out_of_order=out_of_order,
|
||||
matched_patterns=matched,
|
||||
text=text,
|
||||
)
|
||||
|
||||
for raw_line in lines:
|
||||
line = raw_line.rstrip("\n")
|
||||
m = _LINE_RE.match(line)
|
||||
if m:
|
||||
if pending_text is not None:
|
||||
yield _emit(pending_text, pending_meta)
|
||||
|
||||
ts_raw, ts_iso = _parse_ts(m.group("ts"))
|
||||
level_raw = (m.group("level") or "").lower()
|
||||
severity = _LEVEL_MAP.get(level_raw) or detect_severity(m.group("msg"))
|
||||
pending_meta = {
|
||||
"ts_raw": ts_raw,
|
||||
"ts_iso": ts_iso,
|
||||
"severity": severity,
|
||||
}
|
||||
pending_text = m.group("msg")
|
||||
elif pending_text is not None:
|
||||
# Continuation line (Qt stack trace or wrapped message)
|
||||
pending_text += "\n" + line.strip()
|
||||
|
||||
if pending_text is not None:
|
||||
yield _emit(pending_text, pending_meta)
|
||||
51
manage.sh
51
manage.sh
|
|
@ -87,6 +87,7 @@ usage() {
|
|||
echo " Data:"
|
||||
echo -e " ${GREEN}ingest PATH [DB]${NC} Ingest a log file or corpus directory"
|
||||
echo -e " ${GREEN}ingest-plex [HOST]${NC} Pull Plex log from Cass (or HOST) and ingest"
|
||||
echo -e " ${GREEN}ingest-qbit [HOST]${NC} Pull qBittorrent log locally or from HOST via SSH"
|
||||
echo -e " ${GREEN}build-fts${NC} Rebuild the FTS search index"
|
||||
echo ""
|
||||
echo " Tests:"
|
||||
|
|
@ -233,6 +234,56 @@ case "$CMD" in
|
|||
exec bash "$0" restart
|
||||
;;
|
||||
|
||||
ingest-qbit)
|
||||
QBIT_HOST="${1:-}"
|
||||
# Default log locations in priority order
|
||||
QBIT_LOG_PATHS=(
|
||||
"$HOME/.local/share/qBittorrent/logs/qbittorrent.log"
|
||||
"$HOME/.config/qBittorrent/logs/qbittorrent.log"
|
||||
"/var/log/qbittorrent/qbittorrent.log"
|
||||
)
|
||||
TMP_DIR="/tmp/turnstone-qbit-$$"
|
||||
mkdir -p "$TMP_DIR"
|
||||
|
||||
if [[ -n "$QBIT_HOST" ]]; then
|
||||
info "Fetching qBittorrent log from ${QBIT_HOST}…"
|
||||
REMOTE_LOG=""
|
||||
for p in "${QBIT_LOG_PATHS[@]}"; do
|
||||
if ssh "$QBIT_HOST" "test -f '$p'" 2>/dev/null; then
|
||||
REMOTE_LOG="$p"
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [[ -z "$REMOTE_LOG" ]]; then
|
||||
rm -rf "$TMP_DIR"
|
||||
error "No qBittorrent log found on ${QBIT_HOST}. Tried: ${QBIT_LOG_PATHS[*]}"
|
||||
fi
|
||||
local_name="${QBIT_HOST}-qbittorrent.log"
|
||||
ssh "$QBIT_HOST" "cat '$REMOTE_LOG'" > "${TMP_DIR}/${local_name}"
|
||||
info " ← ${REMOTE_LOG} (${QBIT_HOST})"
|
||||
else
|
||||
LOCAL_LOG=""
|
||||
for p in "${QBIT_LOG_PATHS[@]}"; do
|
||||
if [[ -f "$p" ]]; then
|
||||
LOCAL_LOG="$p"
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [[ -z "$LOCAL_LOG" ]]; then
|
||||
rm -rf "$TMP_DIR"
|
||||
error "No qBittorrent log found locally. Tried: ${QBIT_LOG_PATHS[*]}"
|
||||
fi
|
||||
cp "$LOCAL_LOG" "${TMP_DIR}/qbittorrent.log"
|
||||
info " ← ${LOCAL_LOG}"
|
||||
fi
|
||||
|
||||
info "Ingesting into ${DB}…"
|
||||
"$PYTHON" scripts/ingest_corpus.py "${TMP_DIR}"/*.log "$DB"
|
||||
rm -rf "$TMP_DIR"
|
||||
info "Done. Restarting server…"
|
||||
exec bash "$0" restart
|
||||
;;
|
||||
|
||||
build-fts)
|
||||
info "Rebuilding FTS index for ${DB}…"
|
||||
TURNSTONE_DB="$DB" "$PYTHON" scripts/build_fts_index.py "$DB"
|
||||
|
|
|
|||
|
|
@ -83,6 +83,46 @@ patterns:
|
|||
|
||||
# Add device/service-specific patterns below this line:
|
||||
|
||||
- name: qbit_tracker_error
|
||||
pattern: "(tracker|announce).*(not working|error|fail|unreachable|timeout|refused|invalid)"
|
||||
severity: WARN
|
||||
description: qBittorrent tracker connection or announce failure
|
||||
|
||||
- name: qbit_port_bind
|
||||
pattern: "(couldn't? listen|bind.*fail|port.*in use|listening.*fail)"
|
||||
severity: CRITICAL
|
||||
description: qBittorrent failed to bind listen port — firewall or port conflict
|
||||
|
||||
- name: qbit_disk_error
|
||||
pattern: "(cannot (write|open|create)|disk.*error|i/o error|file.*fail|write.*fail)"
|
||||
severity: ERROR
|
||||
description: qBittorrent disk write or file access failure
|
||||
|
||||
- name: qbit_hash_fail
|
||||
pattern: "(hash.*(check|fail|mismatch)|recheck|piece.*fail)"
|
||||
severity: WARN
|
||||
description: qBittorrent torrent hash verification failure — possible corrupt data
|
||||
|
||||
- name: qbit_peer_ban
|
||||
pattern: "(peer.*ban|banned.*peer|blocked.*peer)"
|
||||
severity: INFO
|
||||
description: qBittorrent peer banned (encryption enforcement or bad actor)
|
||||
|
||||
- name: qbit_download_complete
|
||||
pattern: "(download.*complet|torrent.*finish|has finished downloading)"
|
||||
severity: INFO
|
||||
description: qBittorrent torrent download completed
|
||||
|
||||
- name: qbit_ratio_limit
|
||||
pattern: "(ratio.*reach|seeding.*limit|stop.*seeding|upload.*limit)"
|
||||
severity: INFO
|
||||
description: qBittorrent seeding ratio or time limit reached
|
||||
|
||||
- name: qbit_session_error
|
||||
pattern: "(session.*error|couldn't? resume|resume.*fail|torrent.*error)"
|
||||
severity: ERROR
|
||||
description: qBittorrent session or resume data error
|
||||
|
||||
- name: plex_eae_failure
|
||||
pattern: "(EAE timeout|EAE not running|eac3_eae.*error reading output|Error submitting packet to decoder.*I/O error)"
|
||||
severity: ERROR
|
||||
|
|
|
|||
90
tests/test_ingest_qbittorrent.py
Normal file
90
tests/test_ingest_qbittorrent.py
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
"""Tests for the qBittorrent log ingestor."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from app.ingest.qbittorrent import is_qbit_log, parse
|
||||
|
||||
SAMPLE_LOG = """\
|
||||
(2026/05/09 14:10:01) qBittorrent v5.0.3 started
|
||||
(2026/05/09 14:10:02) [Warning] Tracker 'http://tracker.example.com/announce' is not working. Reason: Connection timed out
|
||||
(2026/05/09 14:10:03) [Critical] Couldn't listen on any of the network interfaces. Aborting!
|
||||
(2026/05/09 14:10:04) Download of 'ubuntu-24.04.iso' has finished.
|
||||
(2026/05/09 14:10:05) [Warning] Hash check failed for piece 42 of 'ubuntu-24.04.iso'
|
||||
(2026/05/09 14:10:06) Some long message
|
||||
that continues on the next line
|
||||
and a third line
|
||||
(2026/05/09 14:10:07) Normal message without bracket level
|
||||
"""
|
||||
|
||||
DASH_FORMAT = "(2026-05-09 14:10:01) qBittorrent v4.6.2 started\n"
|
||||
|
||||
|
||||
class TestDetector:
|
||||
def test_detects_slash_format(self):
|
||||
assert is_qbit_log("(2026/05/09 14:10:01) qBittorrent started")
|
||||
|
||||
def test_detects_dash_format(self):
|
||||
assert is_qbit_log(DASH_FORMAT.strip())
|
||||
|
||||
def test_rejects_plex_format(self):
|
||||
assert not is_qbit_log("Jan 01, 2026 12:00:00.000 [12345] DEBUG - message")
|
||||
|
||||
def test_rejects_journald_json(self):
|
||||
assert not is_qbit_log('{"__REALTIME_TIMESTAMP": "12345", "MESSAGE": "hi"}')
|
||||
|
||||
def test_rejects_plaintext(self):
|
||||
assert not is_qbit_log("2026-05-09 14:10:01 some syslog line")
|
||||
|
||||
|
||||
class TestParser:
|
||||
def _parse(self, text: str) -> list:
|
||||
return list(parse(iter(text.splitlines(keepends=True)), "qbit_test", []))
|
||||
|
||||
def test_entry_count(self):
|
||||
entries = self._parse(SAMPLE_LOG)
|
||||
assert len(entries) == 7
|
||||
|
||||
def test_startup_entry(self):
|
||||
e = self._parse(SAMPLE_LOG)[0]
|
||||
assert "qBittorrent v5.0.3 started" in e.text
|
||||
# No bracket level + no severity keyword in text → None (consistent with other ingestors)
|
||||
assert e.severity is None
|
||||
assert e.timestamp_iso == "2026-05-09T14:10:01+00:00"
|
||||
|
||||
def test_warning_severity(self):
|
||||
entries = self._parse(SAMPLE_LOG)
|
||||
tracker_entry = entries[1]
|
||||
assert tracker_entry.severity == "WARN"
|
||||
assert "not working" in tracker_entry.text
|
||||
|
||||
def test_critical_severity(self):
|
||||
entries = self._parse(SAMPLE_LOG)
|
||||
port_entry = entries[2]
|
||||
assert port_entry.severity == "CRITICAL"
|
||||
|
||||
def test_multiline_continuation(self):
|
||||
entries = self._parse(SAMPLE_LOG)
|
||||
multiline = entries[5]
|
||||
assert "continues on the next line" in multiline.text
|
||||
assert "third line" in multiline.text
|
||||
|
||||
def test_no_level_bracket_falls_back_to_detect(self):
|
||||
entries = self._parse(SAMPLE_LOG)
|
||||
last = entries[6]
|
||||
assert last.text == "Normal message without bracket level"
|
||||
|
||||
def test_source_id_propagated(self):
|
||||
entries = self._parse(SAMPLE_LOG)
|
||||
assert all(e.source_id == "qbit_test" for e in entries)
|
||||
|
||||
def test_sequence_is_monotonic(self):
|
||||
entries = self._parse(SAMPLE_LOG)
|
||||
sequences = [e.sequence for e in entries]
|
||||
assert sequences == sorted(sequences)
|
||||
assert len(set(sequences)) == len(sequences)
|
||||
|
||||
def test_dash_format_timestamp(self):
|
||||
entries = list(parse(iter(DASH_FORMAT.splitlines(keepends=True)), "qbit", []))
|
||||
assert len(entries) == 1
|
||||
assert entries[0].timestamp_iso == "2026-05-09T14:10:01+00:00"
|
||||
Loading…
Reference in a new issue