feat: qBittorrent log ingestor with 8 diagnostic patterns

Adds app/ingest/qbittorrent.py — auto-detected by the pipeline on the
(YYYY/MM/DD HH:MM:SS) timestamp fingerprint. Handles both slash and dash
date separators, optional [Warning|Critical] bracket levels, and
multi-line continuations (Qt stack traces).

patterns/default.yaml: 8 new qbit_ patterns covering tracker errors,
port bind failures, disk errors, hash check failures, peer bans, download
completion, ratio limits, and session errors.

manage.sh: ingest-qbit [HOST] command mirrors ingest-plex — probes known
default log paths locally or via SSH, ingests, restarts server.

14 tests covering format detection, severity mapping, multiline handling,
and timestamp normalization.
This commit is contained in:
pyr0ball 2026-05-10 08:21:16 -07:00
parent 19d3827e2d
commit a3c0962277
5 changed files with 286 additions and 1 deletions

View file

@ -8,7 +8,7 @@ import sqlite3
from pathlib import Path from pathlib import Path
from typing import Iterator from typing import Iterator
from app.ingest import caddy, docker_log, journald, plaintext, plex from app.ingest import caddy, docker_log, journald, plaintext, plex, qbittorrent
from app.ingest.base import _compile, load_patterns, now_iso from app.ingest.base import _compile, load_patterns, now_iso
from app.services.models import LogPattern, RetrievedEntry from app.services.models import LogPattern, RetrievedEntry
from app.services.search import build_fts_index from app.services.search import build_fts_index
@ -69,6 +69,8 @@ def _detect_format(first_line: str) -> str:
pass pass
if plex.is_plex_log(first_line): if plex.is_plex_log(first_line):
return "plex" return "plex"
if qbittorrent.is_qbit_log(first_line):
return "qbittorrent"
return "plaintext" return "plaintext"
@ -101,6 +103,8 @@ def _parse_file(
yield from caddy.parse(all_lines(), source_id, compiled, ingest_time) yield from caddy.parse(all_lines(), source_id, compiled, ingest_time)
elif fmt == "plex": elif fmt == "plex":
yield from plex.parse(all_lines(), source_id, compiled, ingest_time) yield from plex.parse(all_lines(), source_id, compiled, ingest_time)
elif fmt == "qbittorrent":
yield from qbittorrent.parse(all_lines(), source_id, compiled, ingest_time)
else: else:
yield from plaintext.parse(all_lines(), source_id, compiled, ingest_time) yield from plaintext.parse(all_lines(), source_id, compiled, ingest_time)

100
app/ingest/qbittorrent.py Normal file
View file

@ -0,0 +1,100 @@
"""qBittorrent log parser.
Handles the standard qBittorrent log format:
(YYYY/MM/DD HH:MM:SS) [Level] Message text
(YYYY/MM/DD HH:MM:SS) Message text (no explicit level)
The level field is optional qBittorrent omits it for Normal/Info messages
in some versions. Parenthesised timestamp is the format fingerprint.
"""
from __future__ import annotations
import re
from datetime import datetime, timezone
from typing import Iterator
from app.ingest.base import (
SourceState, apply_patterns, detect_severity, make_entry_id, now_iso,
)
from app.services.models import LogPattern, RetrievedEntry
# (2026/05/09 14:23:01) [Warning] Tracker 'http://...' is not working.
# (2026/05/09 14:23:01) qBittorrent v5.0.3 started
_LINE_RE = re.compile(
r"^\((?P<ts>\d{4}[/-]\d{2}[/-]\d{2}\s+\d{2}:\d{2}:\d{2})\)"
r"(?:\s+\[(?P<level>[^\]]+)\])?"
r"\s+(?P<msg>.*)$"
)
_LEVEL_MAP = {
"normal": "INFO",
"info": "INFO",
"warning": "WARN",
"critical": "CRITICAL",
}
def _parse_ts(ts_str: str) -> tuple[str, str]:
"""Return (raw, iso). Handles both YYYY/MM/DD and YYYY-MM-DD."""
normalized = ts_str.replace("/", "-")
try:
dt = datetime.strptime(normalized, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
return ts_str, dt.isoformat()
except ValueError:
return ts_str, ""
def is_qbit_log(first_line: str) -> bool:
return bool(_LINE_RE.match(first_line.strip()))
def parse(
lines: Iterator[str],
source_id: str,
compiled_patterns: list[tuple[LogPattern, object]],
ingest_time: str | None = None,
) -> Iterator[RetrievedEntry]:
ingest_time = ingest_time or now_iso()
state = SourceState()
pending_text: str | None = None
pending_meta: dict = {}
def _emit(text: str, meta: dict) -> RetrievedEntry:
repeat, out_of_order = state.observe(text, meta.get("ts_iso"))
matched = apply_patterns(text, compiled_patterns)
return RetrievedEntry(
entry_id=make_entry_id(source_id, state.sequence, text),
source_id=source_id,
sequence=state.sequence,
timestamp_raw=meta.get("ts_raw", ""),
timestamp_iso=meta.get("ts_iso", ""),
ingest_time=ingest_time,
severity=meta.get("severity"),
repeat_count=repeat,
out_of_order=out_of_order,
matched_patterns=matched,
text=text,
)
for raw_line in lines:
line = raw_line.rstrip("\n")
m = _LINE_RE.match(line)
if m:
if pending_text is not None:
yield _emit(pending_text, pending_meta)
ts_raw, ts_iso = _parse_ts(m.group("ts"))
level_raw = (m.group("level") or "").lower()
severity = _LEVEL_MAP.get(level_raw) or detect_severity(m.group("msg"))
pending_meta = {
"ts_raw": ts_raw,
"ts_iso": ts_iso,
"severity": severity,
}
pending_text = m.group("msg")
elif pending_text is not None:
# Continuation line (Qt stack trace or wrapped message)
pending_text += "\n" + line.strip()
if pending_text is not None:
yield _emit(pending_text, pending_meta)

View file

@ -87,6 +87,7 @@ usage() {
echo " Data:" echo " Data:"
echo -e " ${GREEN}ingest PATH [DB]${NC} Ingest a log file or corpus directory" echo -e " ${GREEN}ingest PATH [DB]${NC} Ingest a log file or corpus directory"
echo -e " ${GREEN}ingest-plex [HOST]${NC} Pull Plex log from Cass (or HOST) and ingest" echo -e " ${GREEN}ingest-plex [HOST]${NC} Pull Plex log from Cass (or HOST) and ingest"
echo -e " ${GREEN}ingest-qbit [HOST]${NC} Pull qBittorrent log locally or from HOST via SSH"
echo -e " ${GREEN}build-fts${NC} Rebuild the FTS search index" echo -e " ${GREEN}build-fts${NC} Rebuild the FTS search index"
echo "" echo ""
echo " Tests:" echo " Tests:"
@ -233,6 +234,56 @@ case "$CMD" in
exec bash "$0" restart exec bash "$0" restart
;; ;;
ingest-qbit)
QBIT_HOST="${1:-}"
# Default log locations in priority order
QBIT_LOG_PATHS=(
"$HOME/.local/share/qBittorrent/logs/qbittorrent.log"
"$HOME/.config/qBittorrent/logs/qbittorrent.log"
"/var/log/qbittorrent/qbittorrent.log"
)
TMP_DIR="/tmp/turnstone-qbit-$$"
mkdir -p "$TMP_DIR"
if [[ -n "$QBIT_HOST" ]]; then
info "Fetching qBittorrent log from ${QBIT_HOST}"
REMOTE_LOG=""
for p in "${QBIT_LOG_PATHS[@]}"; do
if ssh "$QBIT_HOST" "test -f '$p'" 2>/dev/null; then
REMOTE_LOG="$p"
break
fi
done
if [[ -z "$REMOTE_LOG" ]]; then
rm -rf "$TMP_DIR"
error "No qBittorrent log found on ${QBIT_HOST}. Tried: ${QBIT_LOG_PATHS[*]}"
fi
local_name="${QBIT_HOST}-qbittorrent.log"
ssh "$QBIT_HOST" "cat '$REMOTE_LOG'" > "${TMP_DIR}/${local_name}"
info "${REMOTE_LOG} (${QBIT_HOST})"
else
LOCAL_LOG=""
for p in "${QBIT_LOG_PATHS[@]}"; do
if [[ -f "$p" ]]; then
LOCAL_LOG="$p"
break
fi
done
if [[ -z "$LOCAL_LOG" ]]; then
rm -rf "$TMP_DIR"
error "No qBittorrent log found locally. Tried: ${QBIT_LOG_PATHS[*]}"
fi
cp "$LOCAL_LOG" "${TMP_DIR}/qbittorrent.log"
info "${LOCAL_LOG}"
fi
info "Ingesting into ${DB}"
"$PYTHON" scripts/ingest_corpus.py "${TMP_DIR}"/*.log "$DB"
rm -rf "$TMP_DIR"
info "Done. Restarting server…"
exec bash "$0" restart
;;
build-fts) build-fts)
info "Rebuilding FTS index for ${DB}" info "Rebuilding FTS index for ${DB}"
TURNSTONE_DB="$DB" "$PYTHON" scripts/build_fts_index.py "$DB" TURNSTONE_DB="$DB" "$PYTHON" scripts/build_fts_index.py "$DB"

View file

@ -83,6 +83,46 @@ patterns:
# Add device/service-specific patterns below this line: # Add device/service-specific patterns below this line:
- name: qbit_tracker_error
pattern: "(tracker|announce).*(not working|error|fail|unreachable|timeout|refused|invalid)"
severity: WARN
description: qBittorrent tracker connection or announce failure
- name: qbit_port_bind
pattern: "(couldn't? listen|bind.*fail|port.*in use|listening.*fail)"
severity: CRITICAL
description: qBittorrent failed to bind listen port — firewall or port conflict
- name: qbit_disk_error
pattern: "(cannot (write|open|create)|disk.*error|i/o error|file.*fail|write.*fail)"
severity: ERROR
description: qBittorrent disk write or file access failure
- name: qbit_hash_fail
pattern: "(hash.*(check|fail|mismatch)|recheck|piece.*fail)"
severity: WARN
description: qBittorrent torrent hash verification failure — possible corrupt data
- name: qbit_peer_ban
pattern: "(peer.*ban|banned.*peer|blocked.*peer)"
severity: INFO
description: qBittorrent peer banned (encryption enforcement or bad actor)
- name: qbit_download_complete
pattern: "(download.*complet|torrent.*finish|has finished downloading)"
severity: INFO
description: qBittorrent torrent download completed
- name: qbit_ratio_limit
pattern: "(ratio.*reach|seeding.*limit|stop.*seeding|upload.*limit)"
severity: INFO
description: qBittorrent seeding ratio or time limit reached
- name: qbit_session_error
pattern: "(session.*error|couldn't? resume|resume.*fail|torrent.*error)"
severity: ERROR
description: qBittorrent session or resume data error
- name: plex_eae_failure - name: plex_eae_failure
pattern: "(EAE timeout|EAE not running|eac3_eae.*error reading output|Error submitting packet to decoder.*I/O error)" pattern: "(EAE timeout|EAE not running|eac3_eae.*error reading output|Error submitting packet to decoder.*I/O error)"
severity: ERROR severity: ERROR

View file

@ -0,0 +1,90 @@
"""Tests for the qBittorrent log ingestor."""
from __future__ import annotations
import pytest
from app.ingest.qbittorrent import is_qbit_log, parse
SAMPLE_LOG = """\
(2026/05/09 14:10:01) qBittorrent v5.0.3 started
(2026/05/09 14:10:02) [Warning] Tracker 'http://tracker.example.com/announce' is not working. Reason: Connection timed out
(2026/05/09 14:10:03) [Critical] Couldn't listen on any of the network interfaces. Aborting!
(2026/05/09 14:10:04) Download of 'ubuntu-24.04.iso' has finished.
(2026/05/09 14:10:05) [Warning] Hash check failed for piece 42 of 'ubuntu-24.04.iso'
(2026/05/09 14:10:06) Some long message
that continues on the next line
and a third line
(2026/05/09 14:10:07) Normal message without bracket level
"""
DASH_FORMAT = "(2026-05-09 14:10:01) qBittorrent v4.6.2 started\n"
class TestDetector:
def test_detects_slash_format(self):
assert is_qbit_log("(2026/05/09 14:10:01) qBittorrent started")
def test_detects_dash_format(self):
assert is_qbit_log(DASH_FORMAT.strip())
def test_rejects_plex_format(self):
assert not is_qbit_log("Jan 01, 2026 12:00:00.000 [12345] DEBUG - message")
def test_rejects_journald_json(self):
assert not is_qbit_log('{"__REALTIME_TIMESTAMP": "12345", "MESSAGE": "hi"}')
def test_rejects_plaintext(self):
assert not is_qbit_log("2026-05-09 14:10:01 some syslog line")
class TestParser:
def _parse(self, text: str) -> list:
return list(parse(iter(text.splitlines(keepends=True)), "qbit_test", []))
def test_entry_count(self):
entries = self._parse(SAMPLE_LOG)
assert len(entries) == 7
def test_startup_entry(self):
e = self._parse(SAMPLE_LOG)[0]
assert "qBittorrent v5.0.3 started" in e.text
# No bracket level + no severity keyword in text → None (consistent with other ingestors)
assert e.severity is None
assert e.timestamp_iso == "2026-05-09T14:10:01+00:00"
def test_warning_severity(self):
entries = self._parse(SAMPLE_LOG)
tracker_entry = entries[1]
assert tracker_entry.severity == "WARN"
assert "not working" in tracker_entry.text
def test_critical_severity(self):
entries = self._parse(SAMPLE_LOG)
port_entry = entries[2]
assert port_entry.severity == "CRITICAL"
def test_multiline_continuation(self):
entries = self._parse(SAMPLE_LOG)
multiline = entries[5]
assert "continues on the next line" in multiline.text
assert "third line" in multiline.text
def test_no_level_bracket_falls_back_to_detect(self):
entries = self._parse(SAMPLE_LOG)
last = entries[6]
assert last.text == "Normal message without bracket level"
def test_source_id_propagated(self):
entries = self._parse(SAMPLE_LOG)
assert all(e.source_id == "qbit_test" for e in entries)
def test_sequence_is_monotonic(self):
entries = self._parse(SAMPLE_LOG)
sequences = [e.sequence for e in entries]
assert sequences == sorted(sequences)
assert len(set(sequences)) == len(sequences)
def test_dash_format_timestamp(self):
entries = list(parse(iter(DASH_FORMAT.splitlines(keepends=True)), "qbit", []))
assert len(entries) == 1
assert entries[0].timestamp_iso == "2026-05-09T14:10:01+00:00"