From d05430ef859862540f611a26206e176bbe0cb4b4 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sun, 10 May 2026 08:21:16 -0700 Subject: [PATCH] feat: qBittorrent log ingestor with 8 diagnostic patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds app/ingest/qbittorrent.py — auto-detected by the pipeline on the (YYYY/MM/DD HH:MM:SS) timestamp fingerprint. Handles both slash and dash date separators, optional [Warning|Critical] bracket levels, and multi-line continuations (Qt stack traces). patterns/default.yaml: 8 new qbit_ patterns covering tracker errors, port bind failures, disk errors, hash check failures, peer bans, download completion, ratio limits, and session errors. manage.sh: ingest-qbit [HOST] command mirrors ingest-plex — probes known default log paths locally or via SSH, ingests, restarts server. 14 tests covering format detection, severity mapping, multiline handling, and timestamp normalization. --- app/ingest/pipeline.py | 6 +- app/ingest/qbittorrent.py | 100 +++++++++++++++++++++++++++++++ manage.sh | 51 ++++++++++++++++ patterns/default.yaml | 40 +++++++++++++ tests/test_ingest_qbittorrent.py | 90 ++++++++++++++++++++++++++++ 5 files changed, 286 insertions(+), 1 deletion(-) create mode 100644 app/ingest/qbittorrent.py create mode 100644 tests/test_ingest_qbittorrent.py diff --git a/app/ingest/pipeline.py b/app/ingest/pipeline.py index 6972265..1b8f440 100644 --- a/app/ingest/pipeline.py +++ b/app/ingest/pipeline.py @@ -8,7 +8,7 @@ import sqlite3 from pathlib import Path from typing import Iterator -from app.ingest import caddy, docker_log, journald, plaintext, plex +from app.ingest import caddy, docker_log, journald, plaintext, plex, qbittorrent from app.ingest.base import _compile, load_patterns, now_iso from app.services.models import LogPattern, RetrievedEntry from app.services.search import build_fts_index @@ -69,6 +69,8 @@ def _detect_format(first_line: str) -> str: pass if plex.is_plex_log(first_line): return "plex" + if qbittorrent.is_qbit_log(first_line): + return "qbittorrent" return "plaintext" @@ -101,6 +103,8 @@ def _parse_file( yield from caddy.parse(all_lines(), source_id, compiled, ingest_time) elif fmt == "plex": yield from plex.parse(all_lines(), source_id, compiled, ingest_time) + elif fmt == "qbittorrent": + yield from qbittorrent.parse(all_lines(), source_id, compiled, ingest_time) else: yield from plaintext.parse(all_lines(), source_id, compiled, ingest_time) diff --git a/app/ingest/qbittorrent.py b/app/ingest/qbittorrent.py new file mode 100644 index 0000000..9d233cc --- /dev/null +++ b/app/ingest/qbittorrent.py @@ -0,0 +1,100 @@ +"""qBittorrent log parser. + +Handles the standard qBittorrent log format: + (YYYY/MM/DD HH:MM:SS) [Level] Message text + (YYYY/MM/DD HH:MM:SS) Message text (no explicit level) + +The level field is optional — qBittorrent omits it for Normal/Info messages +in some versions. Parenthesised timestamp is the format fingerprint. +""" +from __future__ import annotations + +import re +from datetime import datetime, timezone +from typing import Iterator + +from app.ingest.base import ( + SourceState, apply_patterns, detect_severity, make_entry_id, now_iso, +) +from app.services.models import LogPattern, RetrievedEntry + +# (2026/05/09 14:23:01) [Warning] Tracker 'http://...' is not working. +# (2026/05/09 14:23:01) qBittorrent v5.0.3 started +_LINE_RE = re.compile( + r"^\((?P\d{4}[/-]\d{2}[/-]\d{2}\s+\d{2}:\d{2}:\d{2})\)" + r"(?:\s+\[(?P[^\]]+)\])?" + r"\s+(?P.*)$" +) + +_LEVEL_MAP = { + "normal": "INFO", + "info": "INFO", + "warning": "WARN", + "critical": "CRITICAL", +} + + +def _parse_ts(ts_str: str) -> tuple[str, str]: + """Return (raw, iso). Handles both YYYY/MM/DD and YYYY-MM-DD.""" + normalized = ts_str.replace("/", "-") + try: + dt = datetime.strptime(normalized, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc) + return ts_str, dt.isoformat() + except ValueError: + return ts_str, "" + + +def is_qbit_log(first_line: str) -> bool: + return bool(_LINE_RE.match(first_line.strip())) + + +def parse( + lines: Iterator[str], + source_id: str, + compiled_patterns: list[tuple[LogPattern, object]], + ingest_time: str | None = None, +) -> Iterator[RetrievedEntry]: + ingest_time = ingest_time or now_iso() + state = SourceState() + pending_text: str | None = None + pending_meta: dict = {} + + def _emit(text: str, meta: dict) -> RetrievedEntry: + repeat, out_of_order = state.observe(text, meta.get("ts_iso")) + matched = apply_patterns(text, compiled_patterns) + return RetrievedEntry( + entry_id=make_entry_id(source_id, state.sequence, text), + source_id=source_id, + sequence=state.sequence, + timestamp_raw=meta.get("ts_raw", ""), + timestamp_iso=meta.get("ts_iso", ""), + ingest_time=ingest_time, + severity=meta.get("severity"), + repeat_count=repeat, + out_of_order=out_of_order, + matched_patterns=matched, + text=text, + ) + + for raw_line in lines: + line = raw_line.rstrip("\n") + m = _LINE_RE.match(line) + if m: + if pending_text is not None: + yield _emit(pending_text, pending_meta) + + ts_raw, ts_iso = _parse_ts(m.group("ts")) + level_raw = (m.group("level") or "").lower() + severity = _LEVEL_MAP.get(level_raw) or detect_severity(m.group("msg")) + pending_meta = { + "ts_raw": ts_raw, + "ts_iso": ts_iso, + "severity": severity, + } + pending_text = m.group("msg") + elif pending_text is not None: + # Continuation line (Qt stack trace or wrapped message) + pending_text += "\n" + line.strip() + + if pending_text is not None: + yield _emit(pending_text, pending_meta) diff --git a/manage.sh b/manage.sh index 4dea383..a491754 100755 --- a/manage.sh +++ b/manage.sh @@ -87,6 +87,7 @@ usage() { echo " Data:" echo -e " ${GREEN}ingest PATH [DB]${NC} Ingest a log file or corpus directory" echo -e " ${GREEN}ingest-plex [HOST]${NC} Pull Plex log from Cass (or HOST) and ingest" + echo -e " ${GREEN}ingest-qbit [HOST]${NC} Pull qBittorrent log locally or from HOST via SSH" echo -e " ${GREEN}build-fts${NC} Rebuild the FTS search index" echo "" echo " Tests:" @@ -233,6 +234,56 @@ case "$CMD" in exec bash "$0" restart ;; + ingest-qbit) + QBIT_HOST="${1:-}" + # Default log locations in priority order + QBIT_LOG_PATHS=( + "$HOME/.local/share/qBittorrent/logs/qbittorrent.log" + "$HOME/.config/qBittorrent/logs/qbittorrent.log" + "/var/log/qbittorrent/qbittorrent.log" + ) + TMP_DIR="/tmp/turnstone-qbit-$$" + mkdir -p "$TMP_DIR" + + if [[ -n "$QBIT_HOST" ]]; then + info "Fetching qBittorrent log from ${QBIT_HOST}…" + REMOTE_LOG="" + for p in "${QBIT_LOG_PATHS[@]}"; do + if ssh "$QBIT_HOST" "test -f '$p'" 2>/dev/null; then + REMOTE_LOG="$p" + break + fi + done + if [[ -z "$REMOTE_LOG" ]]; then + rm -rf "$TMP_DIR" + error "No qBittorrent log found on ${QBIT_HOST}. Tried: ${QBIT_LOG_PATHS[*]}" + fi + local_name="${QBIT_HOST}-qbittorrent.log" + ssh "$QBIT_HOST" "cat '$REMOTE_LOG'" > "${TMP_DIR}/${local_name}" + info " ← ${REMOTE_LOG} (${QBIT_HOST})" + else + LOCAL_LOG="" + for p in "${QBIT_LOG_PATHS[@]}"; do + if [[ -f "$p" ]]; then + LOCAL_LOG="$p" + break + fi + done + if [[ -z "$LOCAL_LOG" ]]; then + rm -rf "$TMP_DIR" + error "No qBittorrent log found locally. Tried: ${QBIT_LOG_PATHS[*]}" + fi + cp "$LOCAL_LOG" "${TMP_DIR}/qbittorrent.log" + info " ← ${LOCAL_LOG}" + fi + + info "Ingesting into ${DB}…" + "$PYTHON" scripts/ingest_corpus.py "${TMP_DIR}"/*.log "$DB" + rm -rf "$TMP_DIR" + info "Done. Restarting server…" + exec bash "$0" restart + ;; + build-fts) info "Rebuilding FTS index for ${DB}…" TURNSTONE_DB="$DB" "$PYTHON" scripts/build_fts_index.py "$DB" diff --git a/patterns/default.yaml b/patterns/default.yaml index 5550bcb..b78a101 100644 --- a/patterns/default.yaml +++ b/patterns/default.yaml @@ -83,6 +83,46 @@ patterns: # Add device/service-specific patterns below this line: + - name: qbit_tracker_error + pattern: "(tracker|announce).*(not working|error|fail|unreachable|timeout|refused|invalid)" + severity: WARN + description: qBittorrent tracker connection or announce failure + + - name: qbit_port_bind + pattern: "(couldn't? listen|bind.*fail|port.*in use|listening.*fail)" + severity: CRITICAL + description: qBittorrent failed to bind listen port — firewall or port conflict + + - name: qbit_disk_error + pattern: "(cannot (write|open|create)|disk.*error|i/o error|file.*fail|write.*fail)" + severity: ERROR + description: qBittorrent disk write or file access failure + + - name: qbit_hash_fail + pattern: "(hash.*(check|fail|mismatch)|recheck|piece.*fail)" + severity: WARN + description: qBittorrent torrent hash verification failure — possible corrupt data + + - name: qbit_peer_ban + pattern: "(peer.*ban|banned.*peer|blocked.*peer)" + severity: INFO + description: qBittorrent peer banned (encryption enforcement or bad actor) + + - name: qbit_download_complete + pattern: "(download.*complet|torrent.*finish|has finished downloading)" + severity: INFO + description: qBittorrent torrent download completed + + - name: qbit_ratio_limit + pattern: "(ratio.*reach|seeding.*limit|stop.*seeding|upload.*limit)" + severity: INFO + description: qBittorrent seeding ratio or time limit reached + + - name: qbit_session_error + pattern: "(session.*error|couldn't? resume|resume.*fail|torrent.*error)" + severity: ERROR + description: qBittorrent session or resume data error + - name: plex_eae_failure pattern: "(EAE timeout|EAE not running|eac3_eae.*error reading output|Error submitting packet to decoder.*I/O error)" severity: ERROR diff --git a/tests/test_ingest_qbittorrent.py b/tests/test_ingest_qbittorrent.py new file mode 100644 index 0000000..e508a65 --- /dev/null +++ b/tests/test_ingest_qbittorrent.py @@ -0,0 +1,90 @@ +"""Tests for the qBittorrent log ingestor.""" +from __future__ import annotations + +import pytest + +from app.ingest.qbittorrent import is_qbit_log, parse + +SAMPLE_LOG = """\ +(2026/05/09 14:10:01) qBittorrent v5.0.3 started +(2026/05/09 14:10:02) [Warning] Tracker 'http://tracker.example.com/announce' is not working. Reason: Connection timed out +(2026/05/09 14:10:03) [Critical] Couldn't listen on any of the network interfaces. Aborting! +(2026/05/09 14:10:04) Download of 'ubuntu-24.04.iso' has finished. +(2026/05/09 14:10:05) [Warning] Hash check failed for piece 42 of 'ubuntu-24.04.iso' +(2026/05/09 14:10:06) Some long message + that continues on the next line + and a third line +(2026/05/09 14:10:07) Normal message without bracket level +""" + +DASH_FORMAT = "(2026-05-09 14:10:01) qBittorrent v4.6.2 started\n" + + +class TestDetector: + def test_detects_slash_format(self): + assert is_qbit_log("(2026/05/09 14:10:01) qBittorrent started") + + def test_detects_dash_format(self): + assert is_qbit_log(DASH_FORMAT.strip()) + + def test_rejects_plex_format(self): + assert not is_qbit_log("Jan 01, 2026 12:00:00.000 [12345] DEBUG - message") + + def test_rejects_journald_json(self): + assert not is_qbit_log('{"__REALTIME_TIMESTAMP": "12345", "MESSAGE": "hi"}') + + def test_rejects_plaintext(self): + assert not is_qbit_log("2026-05-09 14:10:01 some syslog line") + + +class TestParser: + def _parse(self, text: str) -> list: + return list(parse(iter(text.splitlines(keepends=True)), "qbit_test", [])) + + def test_entry_count(self): + entries = self._parse(SAMPLE_LOG) + assert len(entries) == 7 + + def test_startup_entry(self): + e = self._parse(SAMPLE_LOG)[0] + assert "qBittorrent v5.0.3 started" in e.text + # No bracket level + no severity keyword in text → None (consistent with other ingestors) + assert e.severity is None + assert e.timestamp_iso == "2026-05-09T14:10:01+00:00" + + def test_warning_severity(self): + entries = self._parse(SAMPLE_LOG) + tracker_entry = entries[1] + assert tracker_entry.severity == "WARN" + assert "not working" in tracker_entry.text + + def test_critical_severity(self): + entries = self._parse(SAMPLE_LOG) + port_entry = entries[2] + assert port_entry.severity == "CRITICAL" + + def test_multiline_continuation(self): + entries = self._parse(SAMPLE_LOG) + multiline = entries[5] + assert "continues on the next line" in multiline.text + assert "third line" in multiline.text + + def test_no_level_bracket_falls_back_to_detect(self): + entries = self._parse(SAMPLE_LOG) + last = entries[6] + assert last.text == "Normal message without bracket level" + + def test_source_id_propagated(self): + entries = self._parse(SAMPLE_LOG) + assert all(e.source_id == "qbit_test" for e in entries) + + def test_sequence_is_monotonic(self): + entries = self._parse(SAMPLE_LOG) + sequences = [e.sequence for e in entries] + assert sequences == sorted(sequences) + assert len(set(sequences)) == len(sequences) + + def test_dash_format_timestamp(self): + entries = list(parse(iter(DASH_FORMAT.splitlines(keepends=True)), "qbit", [])) + assert len(entries) == 1 + assert entries[0].timestamp_iso == "2026-05-09T14:10:01+00:00"