"""qBittorrent log parser. Handles two formats produced by different qBittorrent builds: Classic (pre-5.x GUI builds): (YYYY/MM/DD HH:MM:SS) [Level] Message text (YYYY/MM/DD HH:MM:SS) Message text (no explicit level) Hotio/headless (5.x container builds, e.g. ghcr.io/hotio/qbittorrent): (N) 2026-04-26T03:32:59 - Message text (W) 2026-04-26T03:33:00 - Warning message Level codes for hotio format: N=Normal/Info, I=Info, W=Warning, C=Critical. """ from __future__ import annotations import re from datetime import datetime, timezone from typing import Iterator from app.ingest.base import ( SourceState, apply_patterns, detect_severity, make_entry_id, now_iso, ) from app.services.models import LogPattern, RetrievedEntry # Classic: (2026/05/09 14:23:01) [Warning] Tracker '...' is not working. _CLASSIC_RE = re.compile( r"^\((?P\d{4}[/-]\d{2}[/-]\d{2}\s+\d{2}:\d{2}:\d{2})\)" r"(?:\s+\[(?P[^\]]+)\])?" r"\s+(?P.*)$" ) # Hotio/headless: (N) 2026-04-26T03:33:11 - Successfully listening on IP... _HOTIO_RE = re.compile( r"^\((?P[NIWC])\)\s+" r"(?P\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})" r"\s+-\s+(?P.*)$" ) _LEVEL_MAP: dict[str, str] = { # Classic bracket labels (lowercased for lookup) "normal": "INFO", "info": "INFO", "warning": "WARN", "critical": "CRITICAL", # Hotio single-char codes (lowercased for lookup) "n": "INFO", "i": "INFO", "w": "WARN", "c": "CRITICAL", } def _parse_ts(ts_str: str) -> tuple[str, str]: """Return (raw, iso). Handles classic (space sep) and hotio (T sep) timestamps.""" for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S"): try: dt = datetime.strptime(ts_str, fmt).replace(tzinfo=timezone.utc) return ts_str, dt.isoformat() except ValueError: continue return ts_str, "" def _match_line(line: str) -> dict | None: """Try both log formats. Returns {ts, level, msg} or None.""" m = _CLASSIC_RE.match(line) if m: return {"ts": m.group("ts"), "level": m.group("level") or "", "msg": m.group("msg")} m = _HOTIO_RE.match(line) if m: return {"ts": m.group("ts"), "level": m.group("level"), "msg": m.group("msg")} return None def is_qbit_log(first_line: str) -> bool: return _match_line(first_line.strip()) is not None def parse( lines: Iterator[str], source_id: str, compiled_patterns: list[tuple[LogPattern, object]], ingest_time: str | None = None, ) -> Iterator[RetrievedEntry]: ingest_time = ingest_time or now_iso() state = SourceState() pending_text: str | None = None pending_meta: dict = {} def _emit(text: str, meta: dict) -> RetrievedEntry: repeat, out_of_order = state.observe(text, meta.get("ts_iso")) matched = apply_patterns(text, compiled_patterns) return RetrievedEntry( entry_id=make_entry_id(source_id, state.sequence, text), source_id=source_id, sequence=state.sequence, timestamp_raw=meta.get("ts_raw", ""), timestamp_iso=meta.get("ts_iso", ""), ingest_time=ingest_time, severity=meta.get("severity"), repeat_count=repeat, out_of_order=out_of_order, matched_patterns=matched, text=text, ) for raw_line in lines: line = raw_line.rstrip("\n") m = _match_line(line) if m: if pending_text is not None: yield _emit(pending_text, pending_meta) ts_raw, ts_iso = _parse_ts(m["ts"]) severity = _LEVEL_MAP.get(m["level"].lower()) or detect_severity(m["msg"]) pending_meta = {"ts_raw": ts_raw, "ts_iso": ts_iso, "severity": severity} pending_text = m["msg"] elif pending_text is not None: pending_text += "\n" + line.strip() if pending_text is not None: yield _emit(pending_text, pending_meta)