""" Manual integration test for speaker diarization via pyannote. Requires: - HF_TOKEN env var (or set below) - CF_VOICE_DIARIZE=1 - ffmpeg on PATH - A local audio/video file (edit MEDIA_FILE below) - pip install cf-voice[inference] Run: HF_TOKEN=hf_... CF_VOICE_DIARIZE=1 python scripts/test_diarize_real.py """ from __future__ import annotations import asyncio import os import subprocess import numpy as np # Override if not in env if not os.environ.get("HF_TOKEN"): raise SystemExit("Set HF_TOKEN in env before running this script.") os.environ.setdefault("CF_VOICE_DIARIZE", "1") MEDIA_FILE = "/Library/Series/Hogan's Heroes/Season 3/Hogan's Heroes - S03E19 - Hogan, Go Home.mkv" START_S = 120 DURATION_S = 2 SAMPLE_RATE = 16_000 from cf_voice.diarize import Diarizer, SpeakerTracker # noqa: E402 async def main() -> None: d = Diarizer.from_env() tracker = SpeakerTracker() proc = subprocess.run( [ "ffmpeg", "-i", MEDIA_FILE, "-ss", str(START_S), "-t", str(DURATION_S), "-ar", str(SAMPLE_RATE), "-ac", "1", "-f", "s16le", "-", ], capture_output=True, check=True, ) audio = np.frombuffer(proc.stdout, dtype=np.int16).astype(np.float32) / 32768.0 rms = float(np.sqrt(np.mean(audio**2))) print(f"audio: {len(audio)} samples, {len(audio) / SAMPLE_RATE:.2f}s, rms={rms:.4f}") segs = await d.diarize_async(audio) print(f"segments ({len(segs)}): {segs}") mid = len(audio) / 2.0 / SAMPLE_RATE label = d.speaker_at(segs, mid, tracker) print(f"speaker_at({mid:.2f}s): {label}") if __name__ == "__main__": asyncio.run(main())