# cf_voice/context.py — tone classification and context enrichment # # BSL 1.1 when real inference models are integrated. # Currently a passthrough stub: wraps a VoiceIO source and forwards frames. # # Real implementation (Notation v0.1.x) will: # - Run YAMNet acoustic event detection on the audio buffer # - Run wav2vec2-based SER (speech emotion recognition) # - Run librosa prosody extraction (pitch, energy, rate) # - Combine into enriched VoiceFrame label + confidence # - Support pyannote.audio speaker diarization (Navigation v0.2.x) from __future__ import annotations import os from typing import AsyncIterator from cf_voice.io import VoiceIO, make_io from cf_voice.models import VoiceFrame class ContextClassifier: """ High-level voice context classifier. Wraps a VoiceIO source and enriches each VoiceFrame with tone annotation. In stub mode the frames pass through unchanged — the enrichment pipeline (YAMNet + wav2vec2 + librosa) is filled in incrementally. Usage ----- classifier = ContextClassifier.from_env() async for frame in classifier.stream(): print(frame.label, frame.confidence) """ def __init__(self, io: VoiceIO) -> None: self._io = io @classmethod def from_env(cls, interval_s: float = 2.5) -> "ContextClassifier": """ Create a ContextClassifier from environment. CF_VOICE_MOCK=1 activates mock mode (no GPU, no audio hardware needed). """ io = make_io(interval_s=interval_s) return cls(io=io) @classmethod def mock(cls, interval_s: float = 2.5, seed: int | None = None) -> "ContextClassifier": """Create a ContextClassifier backed by MockVoiceIO. Useful in tests.""" from cf_voice.io import MockVoiceIO return cls(io=MockVoiceIO(interval_s=interval_s, seed=seed)) async def stream(self) -> AsyncIterator[VoiceFrame]: """ Yield enriched VoiceFrames continuously. Stub: frames from the IO layer pass through unchanged. Real: enrichment pipeline runs here before yield. """ async for frame in self._io.stream(): yield self._enrich(frame) async def stop(self) -> None: await self._io.stop() def _enrich(self, frame: VoiceFrame) -> VoiceFrame: """ Apply tone classification to a raw frame. Stub: identity transform — returns frame unchanged. Real: replace label + confidence with classifier output. """ return frame