cf-voice/cf_voice/io.py
pyr0ball 35fc0a088c feat: initial cf-voice stub — VoiceFrame API, mock IO, context classifier
- VoiceFrame dataclass: label, confidence, speaker_id, shift_magnitude, timestamp
- MockVoiceIO: async generator of synthetic frames on a timer (CF_VOICE_MOCK=1)
- ContextClassifier: passthrough stub wrapping VoiceIO; _enrich() hook for real classifiers
- make_io() factory: mock mode auto-detected from env, raises NotImplementedError for real audio
- cf-voice-demo CLI entry point for quick smoke-testing
- 12 tests passing; editable install via pip install -e ../cf-voice
2026-04-06 16:03:07 -07:00

122 lines
3.8 KiB
Python

# cf_voice/io.py — audio capture and VoiceFrame generation
#
# MIT licensed. This layer handles audio I/O only — no inference.
#
# In mock mode (CF_VOICE_MOCK=1 or MockVoiceIO), synthetic VoiceFrames are
# emitted on a timer. Real audio capture will be added in Notation v0.1.x
# once pyannote.audio and faster-whisper are integrated.
from __future__ import annotations
import asyncio
import os
import random
import time
from abc import ABC, abstractmethod
from typing import AsyncIterator
from cf_voice.models import VoiceFrame
# Generic tone labels for the annotation stream.
# These are the underlying classifier outputs — the Elcor-style prefix format
# ("With barely concealed frustration:") is applied by the UI layer, not here.
_MOCK_LABELS = [
"Calm and focused",
"Warmly impatient",
"Deflecting",
"Genuinely curious",
"Politely dismissive",
"Nervous but cooperative",
"Frustrated but contained",
"Enthusiastic",
"Tired and compliant",
"Guardedly optimistic",
"Apologetically firm",
"Confused but engaged",
]
_MOCK_SPEAKERS = ["speaker_a", "speaker_b"]
class VoiceIO(ABC):
"""
Base class for all audio capture sources.
Subclasses yield VoiceFrame objects from an async generator.
Consumers should use: `async for frame in io_instance.stream(): ...`
"""
@abstractmethod
async def stream(self) -> AsyncIterator[VoiceFrame]:
"""Yield VoiceFrames continuously until stopped."""
...
async def stop(self) -> None:
"""Signal the stream to stop. Override if cleanup is needed."""
class MockVoiceIO(VoiceIO):
"""
Synthetic VoiceFrame generator for development and CI.
Emits one frame every `interval_s` seconds with randomised labels,
confidence, and simulated speaker transitions.
Activated automatically when CF_VOICE_MOCK=1 is set, or instantiated
directly in tests.
"""
def __init__(
self,
interval_s: float = 2.5,
speakers: list[str] | None = None,
labels: list[str] | None = None,
seed: int | None = None,
) -> None:
self._interval_s = interval_s
self._speakers = speakers or _MOCK_SPEAKERS
self._labels = labels or _MOCK_LABELS
self._rng = random.Random(seed)
self._running = False
async def stream(self) -> AsyncIterator[VoiceFrame]: # type: ignore[override]
self._running = True
start = time.monotonic()
prev_label = self._rng.choice(self._labels)
while self._running:
await asyncio.sleep(self._interval_s)
label = self._rng.choice(self._labels)
# shift_magnitude is 0 when the label repeats, higher for big jumps
shift = 0.0 if label == prev_label else self._rng.uniform(0.1, 0.9)
prev_label = label
yield VoiceFrame(
label=label,
confidence=self._rng.uniform(0.55, 0.98),
speaker_id=self._rng.choice(self._speakers),
shift_magnitude=round(shift, 3),
timestamp=round(time.monotonic() - start, 2),
)
async def stop(self) -> None:
self._running = False
def make_io(
mock: bool | None = None,
interval_s: float = 2.5,
) -> VoiceIO:
"""
Factory: return a VoiceIO instance appropriate for the current environment.
mock=True or CF_VOICE_MOCK=1 → MockVoiceIO (no audio hardware needed)
Otherwise → real audio capture (not yet implemented)
"""
use_mock = mock if mock is not None else os.environ.get("CF_VOICE_MOCK", "") == "1"
if use_mock:
return MockVoiceIO(interval_s=interval_s)
raise NotImplementedError(
"Real audio capture is not yet implemented. "
"Set CF_VOICE_MOCK=1 or pass mock=True to use synthetic frames."
)