Add plex_eae_failure pattern to default.yaml targeting the EasyAudioEncoder crash signature (EAE timeout + I/O error pair, 5s cadence). Pattern fires when EAE's WAV handoff files stop appearing in the pms temp directory. Add watch_plex.py: tail-based watchdog that counts EAE timeout events and auto-restarts plexmediaserver after N consecutive hits (default 3, ~15s of failure). Includes cooldown, dry-run mode, and a systemd unit template.
136 lines
4.1 KiB
Python
136 lines
4.1 KiB
Python
"""Watchdog: detect Plex EAE (EasyAudioEncoder) failures and auto-restart.
|
|
|
|
The EAE daemon handles Dolby EAC3/DDP audio transcoding. When it crashes,
|
|
Plex logs 'EAE timeout!' at 5-second intervals until the service is restarted.
|
|
|
|
Run as root or a user with `systemctl restart plexmediaserver` permission.
|
|
|
|
Usage:
|
|
python scripts/watch_plex.py [--log PATH] [--threshold N] [--cooldown SECS] [--dry-run]
|
|
|
|
Systemd unit (deploy to /etc/systemd/system/turnstone-plex-watchdog.service):
|
|
[Unit]
|
|
Description=Turnstone Plex EAE watchdog
|
|
After=plexmediaserver.service
|
|
|
|
[Service]
|
|
ExecStart=/usr/bin/python3 /opt/turnstone/scripts/watch_plex.py
|
|
Restart=always
|
|
RestartSec=10
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import logging
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s %(levelname)s %(message)s",
|
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
)
|
|
logger = logging.getLogger("plex-watchdog")
|
|
|
|
DEFAULT_LOG = Path(
|
|
"/var/lib/plexmediaserver/Library/Application Support"
|
|
"/Plex Media Server/Logs/Plex Media Server.log"
|
|
)
|
|
EAE_TRIGGER = "EAE timeout!"
|
|
SERVICE = "plexmediaserver"
|
|
|
|
|
|
def _restart(dry_run: bool) -> bool:
|
|
cmd = ["systemctl", "restart", SERVICE]
|
|
if dry_run:
|
|
logger.info("[dry-run] would run: %s", " ".join(cmd))
|
|
return True
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
if result.returncode == 0:
|
|
logger.info("Restart successful")
|
|
return True
|
|
logger.error("Restart failed (exit %d): %s", result.returncode, result.stderr.strip())
|
|
return False
|
|
|
|
|
|
def _tail_f(path: Path):
|
|
"""Yield new lines from a growing log file, blocking between reads."""
|
|
with open(path, errors="replace") as f:
|
|
f.seek(0, 2) # start at end — don't replay history on startup
|
|
while True:
|
|
line = f.readline()
|
|
if line:
|
|
yield line
|
|
else:
|
|
time.sleep(0.25)
|
|
|
|
|
|
def watch(log_path: Path, threshold: int, cooldown: int, dry_run: bool) -> None:
|
|
logger.info(
|
|
"Watching %s | threshold=%d EAE timeouts | cooldown=%ds%s",
|
|
log_path, threshold, cooldown, " | DRY RUN" if dry_run else "",
|
|
)
|
|
|
|
eae_count = 0
|
|
last_restart: float = 0.0
|
|
|
|
for line in _tail_f(log_path):
|
|
line = line.strip()
|
|
|
|
if "Started plexmediaserver" in line or "Starting plexmediaserver" in line:
|
|
if eae_count > 0:
|
|
logger.info("Service (re)started — resetting EAE counter")
|
|
eae_count = 0
|
|
continue
|
|
|
|
if EAE_TRIGGER not in line:
|
|
continue
|
|
|
|
eae_count += 1
|
|
logger.warning("EAE timeout #%d detected", eae_count)
|
|
|
|
if eae_count < threshold:
|
|
continue
|
|
|
|
now = time.time()
|
|
remaining = cooldown - (now - last_restart)
|
|
if remaining > 0:
|
|
logger.warning(
|
|
"Threshold reached but cooldown active (%.0fs remaining) — skipping restart",
|
|
remaining,
|
|
)
|
|
continue
|
|
|
|
logger.error(
|
|
"EAE failure confirmed (%d timeouts) — restarting %s", eae_count, SERVICE
|
|
)
|
|
if _restart(dry_run):
|
|
last_restart = now
|
|
eae_count = 0
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description="Plex EAE watchdog")
|
|
parser.add_argument("--log", type=Path, default=DEFAULT_LOG, help="Plex server log path")
|
|
parser.add_argument("--threshold", type=int, default=3, help="EAE timeouts before restart (default 3)")
|
|
parser.add_argument("--cooldown", type=int, default=300, help="Seconds between restarts (default 300)")
|
|
parser.add_argument("--dry-run", action="store_true", help="Log what would happen without restarting")
|
|
args = parser.parse_args()
|
|
|
|
if not args.log.exists():
|
|
logger.error("Log file not found: %s", args.log)
|
|
sys.exit(1)
|
|
|
|
try:
|
|
watch(args.log, args.threshold, args.cooldown, args.dry_run)
|
|
except KeyboardInterrupt:
|
|
logger.info("Watchdog stopped")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|