feat: qBittorrent log ingestor with 8 diagnostic patterns
Adds app/ingest/qbittorrent.py — auto-detected by the pipeline on the (YYYY/MM/DD HH:MM:SS) timestamp fingerprint. Handles both slash and dash date separators, optional [Warning|Critical] bracket levels, and multi-line continuations (Qt stack traces). patterns/default.yaml: 8 new qbit_ patterns covering tracker errors, port bind failures, disk errors, hash check failures, peer bans, download completion, ratio limits, and session errors. manage.sh: ingest-qbit [HOST] command mirrors ingest-plex — probes known default log paths locally or via SSH, ingests, restarts server. 14 tests covering format detection, severity mapping, multiline handling, and timestamp normalization.
This commit is contained in:
parent
5a8dc731b8
commit
2d148e4f9e
5 changed files with 286 additions and 1 deletions
|
|
@ -8,7 +8,7 @@ import sqlite3
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
from app.ingest import caddy, docker_log, journald, plaintext, plex
|
from app.ingest import caddy, docker_log, journald, plaintext, plex, qbittorrent
|
||||||
from app.ingest.base import _compile, load_patterns, now_iso
|
from app.ingest.base import _compile, load_patterns, now_iso
|
||||||
from app.services.models import LogPattern, RetrievedEntry
|
from app.services.models import LogPattern, RetrievedEntry
|
||||||
from app.services.search import build_fts_index
|
from app.services.search import build_fts_index
|
||||||
|
|
@ -69,6 +69,8 @@ def _detect_format(first_line: str) -> str:
|
||||||
pass
|
pass
|
||||||
if plex.is_plex_log(first_line):
|
if plex.is_plex_log(first_line):
|
||||||
return "plex"
|
return "plex"
|
||||||
|
if qbittorrent.is_qbit_log(first_line):
|
||||||
|
return "qbittorrent"
|
||||||
return "plaintext"
|
return "plaintext"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -101,6 +103,8 @@ def _parse_file(
|
||||||
yield from caddy.parse(all_lines(), source_id, compiled, ingest_time)
|
yield from caddy.parse(all_lines(), source_id, compiled, ingest_time)
|
||||||
elif fmt == "plex":
|
elif fmt == "plex":
|
||||||
yield from plex.parse(all_lines(), source_id, compiled, ingest_time)
|
yield from plex.parse(all_lines(), source_id, compiled, ingest_time)
|
||||||
|
elif fmt == "qbittorrent":
|
||||||
|
yield from qbittorrent.parse(all_lines(), source_id, compiled, ingest_time)
|
||||||
else:
|
else:
|
||||||
yield from plaintext.parse(all_lines(), source_id, compiled, ingest_time)
|
yield from plaintext.parse(all_lines(), source_id, compiled, ingest_time)
|
||||||
|
|
||||||
|
|
|
||||||
100
app/ingest/qbittorrent.py
Normal file
100
app/ingest/qbittorrent.py
Normal file
|
|
@ -0,0 +1,100 @@
|
||||||
|
"""qBittorrent log parser.
|
||||||
|
|
||||||
|
Handles the standard qBittorrent log format:
|
||||||
|
(YYYY/MM/DD HH:MM:SS) [Level] Message text
|
||||||
|
(YYYY/MM/DD HH:MM:SS) Message text (no explicit level)
|
||||||
|
|
||||||
|
The level field is optional — qBittorrent omits it for Normal/Info messages
|
||||||
|
in some versions. Parenthesised timestamp is the format fingerprint.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
from app.ingest.base import (
|
||||||
|
SourceState, apply_patterns, detect_severity, make_entry_id, now_iso,
|
||||||
|
)
|
||||||
|
from app.services.models import LogPattern, RetrievedEntry
|
||||||
|
|
||||||
|
# (2026/05/09 14:23:01) [Warning] Tracker 'http://...' is not working.
|
||||||
|
# (2026/05/09 14:23:01) qBittorrent v5.0.3 started
|
||||||
|
_LINE_RE = re.compile(
|
||||||
|
r"^\((?P<ts>\d{4}[/-]\d{2}[/-]\d{2}\s+\d{2}:\d{2}:\d{2})\)"
|
||||||
|
r"(?:\s+\[(?P<level>[^\]]+)\])?"
|
||||||
|
r"\s+(?P<msg>.*)$"
|
||||||
|
)
|
||||||
|
|
||||||
|
_LEVEL_MAP = {
|
||||||
|
"normal": "INFO",
|
||||||
|
"info": "INFO",
|
||||||
|
"warning": "WARN",
|
||||||
|
"critical": "CRITICAL",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_ts(ts_str: str) -> tuple[str, str]:
|
||||||
|
"""Return (raw, iso). Handles both YYYY/MM/DD and YYYY-MM-DD."""
|
||||||
|
normalized = ts_str.replace("/", "-")
|
||||||
|
try:
|
||||||
|
dt = datetime.strptime(normalized, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
|
||||||
|
return ts_str, dt.isoformat()
|
||||||
|
except ValueError:
|
||||||
|
return ts_str, ""
|
||||||
|
|
||||||
|
|
||||||
|
def is_qbit_log(first_line: str) -> bool:
|
||||||
|
return bool(_LINE_RE.match(first_line.strip()))
|
||||||
|
|
||||||
|
|
||||||
|
def parse(
|
||||||
|
lines: Iterator[str],
|
||||||
|
source_id: str,
|
||||||
|
compiled_patterns: list[tuple[LogPattern, object]],
|
||||||
|
ingest_time: str | None = None,
|
||||||
|
) -> Iterator[RetrievedEntry]:
|
||||||
|
ingest_time = ingest_time or now_iso()
|
||||||
|
state = SourceState()
|
||||||
|
pending_text: str | None = None
|
||||||
|
pending_meta: dict = {}
|
||||||
|
|
||||||
|
def _emit(text: str, meta: dict) -> RetrievedEntry:
|
||||||
|
repeat, out_of_order = state.observe(text, meta.get("ts_iso"))
|
||||||
|
matched = apply_patterns(text, compiled_patterns)
|
||||||
|
return RetrievedEntry(
|
||||||
|
entry_id=make_entry_id(source_id, state.sequence, text),
|
||||||
|
source_id=source_id,
|
||||||
|
sequence=state.sequence,
|
||||||
|
timestamp_raw=meta.get("ts_raw", ""),
|
||||||
|
timestamp_iso=meta.get("ts_iso", ""),
|
||||||
|
ingest_time=ingest_time,
|
||||||
|
severity=meta.get("severity"),
|
||||||
|
repeat_count=repeat,
|
||||||
|
out_of_order=out_of_order,
|
||||||
|
matched_patterns=matched,
|
||||||
|
text=text,
|
||||||
|
)
|
||||||
|
|
||||||
|
for raw_line in lines:
|
||||||
|
line = raw_line.rstrip("\n")
|
||||||
|
m = _LINE_RE.match(line)
|
||||||
|
if m:
|
||||||
|
if pending_text is not None:
|
||||||
|
yield _emit(pending_text, pending_meta)
|
||||||
|
|
||||||
|
ts_raw, ts_iso = _parse_ts(m.group("ts"))
|
||||||
|
level_raw = (m.group("level") or "").lower()
|
||||||
|
severity = _LEVEL_MAP.get(level_raw) or detect_severity(m.group("msg"))
|
||||||
|
pending_meta = {
|
||||||
|
"ts_raw": ts_raw,
|
||||||
|
"ts_iso": ts_iso,
|
||||||
|
"severity": severity,
|
||||||
|
}
|
||||||
|
pending_text = m.group("msg")
|
||||||
|
elif pending_text is not None:
|
||||||
|
# Continuation line (Qt stack trace or wrapped message)
|
||||||
|
pending_text += "\n" + line.strip()
|
||||||
|
|
||||||
|
if pending_text is not None:
|
||||||
|
yield _emit(pending_text, pending_meta)
|
||||||
51
manage.sh
51
manage.sh
|
|
@ -87,6 +87,7 @@ usage() {
|
||||||
echo " Data:"
|
echo " Data:"
|
||||||
echo -e " ${GREEN}ingest PATH [DB]${NC} Ingest a log file or corpus directory"
|
echo -e " ${GREEN}ingest PATH [DB]${NC} Ingest a log file or corpus directory"
|
||||||
echo -e " ${GREEN}ingest-plex [HOST]${NC} Pull Plex log from Cass (or HOST) and ingest"
|
echo -e " ${GREEN}ingest-plex [HOST]${NC} Pull Plex log from Cass (or HOST) and ingest"
|
||||||
|
echo -e " ${GREEN}ingest-qbit [HOST]${NC} Pull qBittorrent log locally or from HOST via SSH"
|
||||||
echo -e " ${GREEN}build-fts${NC} Rebuild the FTS search index"
|
echo -e " ${GREEN}build-fts${NC} Rebuild the FTS search index"
|
||||||
echo ""
|
echo ""
|
||||||
echo " Tests:"
|
echo " Tests:"
|
||||||
|
|
@ -233,6 +234,56 @@ case "$CMD" in
|
||||||
exec bash "$0" restart
|
exec bash "$0" restart
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
ingest-qbit)
|
||||||
|
QBIT_HOST="${1:-}"
|
||||||
|
# Default log locations in priority order
|
||||||
|
QBIT_LOG_PATHS=(
|
||||||
|
"$HOME/.local/share/qBittorrent/logs/qbittorrent.log"
|
||||||
|
"$HOME/.config/qBittorrent/logs/qbittorrent.log"
|
||||||
|
"/var/log/qbittorrent/qbittorrent.log"
|
||||||
|
)
|
||||||
|
TMP_DIR="/tmp/turnstone-qbit-$$"
|
||||||
|
mkdir -p "$TMP_DIR"
|
||||||
|
|
||||||
|
if [[ -n "$QBIT_HOST" ]]; then
|
||||||
|
info "Fetching qBittorrent log from ${QBIT_HOST}…"
|
||||||
|
REMOTE_LOG=""
|
||||||
|
for p in "${QBIT_LOG_PATHS[@]}"; do
|
||||||
|
if ssh "$QBIT_HOST" "test -f '$p'" 2>/dev/null; then
|
||||||
|
REMOTE_LOG="$p"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [[ -z "$REMOTE_LOG" ]]; then
|
||||||
|
rm -rf "$TMP_DIR"
|
||||||
|
error "No qBittorrent log found on ${QBIT_HOST}. Tried: ${QBIT_LOG_PATHS[*]}"
|
||||||
|
fi
|
||||||
|
local_name="${QBIT_HOST}-qbittorrent.log"
|
||||||
|
ssh "$QBIT_HOST" "cat '$REMOTE_LOG'" > "${TMP_DIR}/${local_name}"
|
||||||
|
info " ← ${REMOTE_LOG} (${QBIT_HOST})"
|
||||||
|
else
|
||||||
|
LOCAL_LOG=""
|
||||||
|
for p in "${QBIT_LOG_PATHS[@]}"; do
|
||||||
|
if [[ -f "$p" ]]; then
|
||||||
|
LOCAL_LOG="$p"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [[ -z "$LOCAL_LOG" ]]; then
|
||||||
|
rm -rf "$TMP_DIR"
|
||||||
|
error "No qBittorrent log found locally. Tried: ${QBIT_LOG_PATHS[*]}"
|
||||||
|
fi
|
||||||
|
cp "$LOCAL_LOG" "${TMP_DIR}/qbittorrent.log"
|
||||||
|
info " ← ${LOCAL_LOG}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
info "Ingesting into ${DB}…"
|
||||||
|
"$PYTHON" scripts/ingest_corpus.py "${TMP_DIR}"/*.log "$DB"
|
||||||
|
rm -rf "$TMP_DIR"
|
||||||
|
info "Done. Restarting server…"
|
||||||
|
exec bash "$0" restart
|
||||||
|
;;
|
||||||
|
|
||||||
build-fts)
|
build-fts)
|
||||||
info "Rebuilding FTS index for ${DB}…"
|
info "Rebuilding FTS index for ${DB}…"
|
||||||
TURNSTONE_DB="$DB" "$PYTHON" scripts/build_fts_index.py "$DB"
|
TURNSTONE_DB="$DB" "$PYTHON" scripts/build_fts_index.py "$DB"
|
||||||
|
|
|
||||||
|
|
@ -83,6 +83,46 @@ patterns:
|
||||||
|
|
||||||
# Add device/service-specific patterns below this line:
|
# Add device/service-specific patterns below this line:
|
||||||
|
|
||||||
|
- name: qbit_tracker_error
|
||||||
|
pattern: "(tracker|announce).*(not working|error|fail|unreachable|timeout|refused|invalid)"
|
||||||
|
severity: WARN
|
||||||
|
description: qBittorrent tracker connection or announce failure
|
||||||
|
|
||||||
|
- name: qbit_port_bind
|
||||||
|
pattern: "(couldn't? listen|bind.*fail|port.*in use|listening.*fail)"
|
||||||
|
severity: CRITICAL
|
||||||
|
description: qBittorrent failed to bind listen port — firewall or port conflict
|
||||||
|
|
||||||
|
- name: qbit_disk_error
|
||||||
|
pattern: "(cannot (write|open|create)|disk.*error|i/o error|file.*fail|write.*fail)"
|
||||||
|
severity: ERROR
|
||||||
|
description: qBittorrent disk write or file access failure
|
||||||
|
|
||||||
|
- name: qbit_hash_fail
|
||||||
|
pattern: "(hash.*(check|fail|mismatch)|recheck|piece.*fail)"
|
||||||
|
severity: WARN
|
||||||
|
description: qBittorrent torrent hash verification failure — possible corrupt data
|
||||||
|
|
||||||
|
- name: qbit_peer_ban
|
||||||
|
pattern: "(peer.*ban|banned.*peer|blocked.*peer)"
|
||||||
|
severity: INFO
|
||||||
|
description: qBittorrent peer banned (encryption enforcement or bad actor)
|
||||||
|
|
||||||
|
- name: qbit_download_complete
|
||||||
|
pattern: "(download.*complet|torrent.*finish|has finished downloading)"
|
||||||
|
severity: INFO
|
||||||
|
description: qBittorrent torrent download completed
|
||||||
|
|
||||||
|
- name: qbit_ratio_limit
|
||||||
|
pattern: "(ratio.*reach|seeding.*limit|stop.*seeding|upload.*limit)"
|
||||||
|
severity: INFO
|
||||||
|
description: qBittorrent seeding ratio or time limit reached
|
||||||
|
|
||||||
|
- name: qbit_session_error
|
||||||
|
pattern: "(session.*error|couldn't? resume|resume.*fail|torrent.*error)"
|
||||||
|
severity: ERROR
|
||||||
|
description: qBittorrent session or resume data error
|
||||||
|
|
||||||
- name: plex_eae_failure
|
- name: plex_eae_failure
|
||||||
pattern: "(EAE timeout|EAE not running|eac3_eae.*error reading output|Error submitting packet to decoder.*I/O error)"
|
pattern: "(EAE timeout|EAE not running|eac3_eae.*error reading output|Error submitting packet to decoder.*I/O error)"
|
||||||
severity: ERROR
|
severity: ERROR
|
||||||
|
|
|
||||||
90
tests/test_ingest_qbittorrent.py
Normal file
90
tests/test_ingest_qbittorrent.py
Normal file
|
|
@ -0,0 +1,90 @@
|
||||||
|
"""Tests for the qBittorrent log ingestor."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from app.ingest.qbittorrent import is_qbit_log, parse
|
||||||
|
|
||||||
|
SAMPLE_LOG = """\
|
||||||
|
(2026/05/09 14:10:01) qBittorrent v5.0.3 started
|
||||||
|
(2026/05/09 14:10:02) [Warning] Tracker 'http://tracker.example.com/announce' is not working. Reason: Connection timed out
|
||||||
|
(2026/05/09 14:10:03) [Critical] Couldn't listen on any of the network interfaces. Aborting!
|
||||||
|
(2026/05/09 14:10:04) Download of 'ubuntu-24.04.iso' has finished.
|
||||||
|
(2026/05/09 14:10:05) [Warning] Hash check failed for piece 42 of 'ubuntu-24.04.iso'
|
||||||
|
(2026/05/09 14:10:06) Some long message
|
||||||
|
that continues on the next line
|
||||||
|
and a third line
|
||||||
|
(2026/05/09 14:10:07) Normal message without bracket level
|
||||||
|
"""
|
||||||
|
|
||||||
|
DASH_FORMAT = "(2026-05-09 14:10:01) qBittorrent v4.6.2 started\n"
|
||||||
|
|
||||||
|
|
||||||
|
class TestDetector:
|
||||||
|
def test_detects_slash_format(self):
|
||||||
|
assert is_qbit_log("(2026/05/09 14:10:01) qBittorrent started")
|
||||||
|
|
||||||
|
def test_detects_dash_format(self):
|
||||||
|
assert is_qbit_log(DASH_FORMAT.strip())
|
||||||
|
|
||||||
|
def test_rejects_plex_format(self):
|
||||||
|
assert not is_qbit_log("Jan 01, 2026 12:00:00.000 [12345] DEBUG - message")
|
||||||
|
|
||||||
|
def test_rejects_journald_json(self):
|
||||||
|
assert not is_qbit_log('{"__REALTIME_TIMESTAMP": "12345", "MESSAGE": "hi"}')
|
||||||
|
|
||||||
|
def test_rejects_plaintext(self):
|
||||||
|
assert not is_qbit_log("2026-05-09 14:10:01 some syslog line")
|
||||||
|
|
||||||
|
|
||||||
|
class TestParser:
|
||||||
|
def _parse(self, text: str) -> list:
|
||||||
|
return list(parse(iter(text.splitlines(keepends=True)), "qbit_test", []))
|
||||||
|
|
||||||
|
def test_entry_count(self):
|
||||||
|
entries = self._parse(SAMPLE_LOG)
|
||||||
|
assert len(entries) == 7
|
||||||
|
|
||||||
|
def test_startup_entry(self):
|
||||||
|
e = self._parse(SAMPLE_LOG)[0]
|
||||||
|
assert "qBittorrent v5.0.3 started" in e.text
|
||||||
|
# No bracket level + no severity keyword in text → None (consistent with other ingestors)
|
||||||
|
assert e.severity is None
|
||||||
|
assert e.timestamp_iso == "2026-05-09T14:10:01+00:00"
|
||||||
|
|
||||||
|
def test_warning_severity(self):
|
||||||
|
entries = self._parse(SAMPLE_LOG)
|
||||||
|
tracker_entry = entries[1]
|
||||||
|
assert tracker_entry.severity == "WARN"
|
||||||
|
assert "not working" in tracker_entry.text
|
||||||
|
|
||||||
|
def test_critical_severity(self):
|
||||||
|
entries = self._parse(SAMPLE_LOG)
|
||||||
|
port_entry = entries[2]
|
||||||
|
assert port_entry.severity == "CRITICAL"
|
||||||
|
|
||||||
|
def test_multiline_continuation(self):
|
||||||
|
entries = self._parse(SAMPLE_LOG)
|
||||||
|
multiline = entries[5]
|
||||||
|
assert "continues on the next line" in multiline.text
|
||||||
|
assert "third line" in multiline.text
|
||||||
|
|
||||||
|
def test_no_level_bracket_falls_back_to_detect(self):
|
||||||
|
entries = self._parse(SAMPLE_LOG)
|
||||||
|
last = entries[6]
|
||||||
|
assert last.text == "Normal message without bracket level"
|
||||||
|
|
||||||
|
def test_source_id_propagated(self):
|
||||||
|
entries = self._parse(SAMPLE_LOG)
|
||||||
|
assert all(e.source_id == "qbit_test" for e in entries)
|
||||||
|
|
||||||
|
def test_sequence_is_monotonic(self):
|
||||||
|
entries = self._parse(SAMPLE_LOG)
|
||||||
|
sequences = [e.sequence for e in entries]
|
||||||
|
assert sequences == sorted(sequences)
|
||||||
|
assert len(set(sequences)) == len(sequences)
|
||||||
|
|
||||||
|
def test_dash_format_timestamp(self):
|
||||||
|
entries = list(parse(iter(DASH_FORMAT.splitlines(keepends=True)), "qbit", []))
|
||||||
|
assert len(entries) == 1
|
||||||
|
assert entries[0].timestamp_iso == "2026-05-09T14:10:01+00:00"
|
||||||
Loading…
Reference in a new issue