From 8db8810667d285186f6f19cc04e69d7cd56cee78 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Fri, 8 May 2026 13:41:34 -0700 Subject: [PATCH] feat: plex EAE watchdog and plex_eae_failure pattern Add plex_eae_failure pattern to default.yaml targeting the EasyAudioEncoder crash signature (EAE timeout + I/O error pair, 5s cadence). Pattern fires when EAE's WAV handoff files stop appearing in the pms temp directory. Add watch_plex.py: tail-based watchdog that counts EAE timeout events and auto-restarts plexmediaserver after N consecutive hits (default 3, ~15s of failure). Includes cooldown, dry-run mode, and a systemd unit template. --- patterns/default.yaml | 6 ++ scripts/watch_plex.py | 136 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 scripts/watch_plex.py diff --git a/patterns/default.yaml b/patterns/default.yaml index 20ff693..5550bcb 100644 --- a/patterns/default.yaml +++ b/patterns/default.yaml @@ -82,6 +82,12 @@ patterns: description: IP address change or DHCP event # Add device/service-specific patterns below this line: + + - name: plex_eae_failure + pattern: "(EAE timeout|EAE not running|eac3_eae.*error reading output|Error submitting packet to decoder.*I/O error)" + severity: ERROR + description: Plex EasyAudioEncoder (EAC3 Dolby audio transcoder) crashed — service restart required + # - name: avcx_device_error # pattern: "ERR-\d{4}" # severity: ERROR diff --git a/scripts/watch_plex.py b/scripts/watch_plex.py new file mode 100644 index 0000000..0c479e4 --- /dev/null +++ b/scripts/watch_plex.py @@ -0,0 +1,136 @@ +"""Watchdog: detect Plex EAE (EasyAudioEncoder) failures and auto-restart. + +The EAE daemon handles Dolby EAC3/DDP audio transcoding. When it crashes, +Plex logs 'EAE timeout!' at 5-second intervals until the service is restarted. + +Run as root or a user with `systemctl restart plexmediaserver` permission. + +Usage: + python scripts/watch_plex.py [--log PATH] [--threshold N] [--cooldown SECS] [--dry-run] + +Systemd unit (deploy to /etc/systemd/system/turnstone-plex-watchdog.service): + [Unit] + Description=Turnstone Plex EAE watchdog + After=plexmediaserver.service + + [Service] + ExecStart=/usr/bin/python3 /opt/turnstone/scripts/watch_plex.py + Restart=always + RestartSec=10 + + [Install] + WantedBy=multi-user.target +""" +from __future__ import annotations + +import argparse +import logging +import subprocess +import sys +import time +from pathlib import Path + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) +logger = logging.getLogger("plex-watchdog") + +DEFAULT_LOG = Path( + "/var/lib/plexmediaserver/Library/Application Support" + "/Plex Media Server/Logs/Plex Media Server.log" +) +EAE_TRIGGER = "EAE timeout!" +SERVICE = "plexmediaserver" + + +def _restart(dry_run: bool) -> bool: + cmd = ["systemctl", "restart", SERVICE] + if dry_run: + logger.info("[dry-run] would run: %s", " ".join(cmd)) + return True + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode == 0: + logger.info("Restart successful") + return True + logger.error("Restart failed (exit %d): %s", result.returncode, result.stderr.strip()) + return False + + +def _tail_f(path: Path): + """Yield new lines from a growing log file, blocking between reads.""" + with open(path, errors="replace") as f: + f.seek(0, 2) # start at end — don't replay history on startup + while True: + line = f.readline() + if line: + yield line + else: + time.sleep(0.25) + + +def watch(log_path: Path, threshold: int, cooldown: int, dry_run: bool) -> None: + logger.info( + "Watching %s | threshold=%d EAE timeouts | cooldown=%ds%s", + log_path, threshold, cooldown, " | DRY RUN" if dry_run else "", + ) + + eae_count = 0 + last_restart: float = 0.0 + + for line in _tail_f(log_path): + line = line.strip() + + if "Started plexmediaserver" in line or "Starting plexmediaserver" in line: + if eae_count > 0: + logger.info("Service (re)started — resetting EAE counter") + eae_count = 0 + continue + + if EAE_TRIGGER not in line: + continue + + eae_count += 1 + logger.warning("EAE timeout #%d detected", eae_count) + + if eae_count < threshold: + continue + + now = time.time() + remaining = cooldown - (now - last_restart) + if remaining > 0: + logger.warning( + "Threshold reached but cooldown active (%.0fs remaining) — skipping restart", + remaining, + ) + continue + + logger.error( + "EAE failure confirmed (%d timeouts) — restarting %s", eae_count, SERVICE + ) + if _restart(dry_run): + last_restart = now + eae_count = 0 + + +def main() -> None: + parser = argparse.ArgumentParser(description="Plex EAE watchdog") + parser.add_argument("--log", type=Path, default=DEFAULT_LOG, help="Plex server log path") + parser.add_argument("--threshold", type=int, default=3, help="EAE timeouts before restart (default 3)") + parser.add_argument("--cooldown", type=int, default=300, help="Seconds between restarts (default 300)") + parser.add_argument("--dry-run", action="store_true", help="Log what would happen without restarting") + args = parser.parse_args() + + if not args.log.exists(): + logger.error("Log file not found: %s", args.log) + sys.exit(1) + + try: + watch(args.log, args.threshold, args.cooldown, args.dry_run) + except KeyboardInterrupt: + logger.info("Watchdog stopped") + + +if __name__ == "__main__": + main()