- VoiceFrame dataclass: label, confidence, speaker_id, shift_magnitude, timestamp - MockVoiceIO: async generator of synthetic frames on a timer (CF_VOICE_MOCK=1) - ContextClassifier: passthrough stub wrapping VoiceIO; _enrich() hook for real classifiers - make_io() factory: mock mode auto-detected from env, raises NotImplementedError for real audio - cf-voice-demo CLI entry point for quick smoke-testing - 12 tests passing; editable install via pip install -e ../cf-voice
74 lines
2.5 KiB
Python
74 lines
2.5 KiB
Python
# cf_voice/context.py — tone classification and context enrichment
|
|
#
|
|
# BSL 1.1 when real inference models are integrated.
|
|
# Currently a passthrough stub: wraps a VoiceIO source and forwards frames.
|
|
#
|
|
# Real implementation (Notation v0.1.x) will:
|
|
# - Run YAMNet acoustic event detection on the audio buffer
|
|
# - Run wav2vec2-based SER (speech emotion recognition)
|
|
# - Run librosa prosody extraction (pitch, energy, rate)
|
|
# - Combine into enriched VoiceFrame label + confidence
|
|
# - Support pyannote.audio speaker diarization (Navigation v0.2.x)
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from typing import AsyncIterator
|
|
|
|
from cf_voice.io import VoiceIO, make_io
|
|
from cf_voice.models import VoiceFrame
|
|
|
|
|
|
class ContextClassifier:
|
|
"""
|
|
High-level voice context classifier.
|
|
|
|
Wraps a VoiceIO source and enriches each VoiceFrame with tone annotation.
|
|
In stub mode the frames pass through unchanged — the enrichment pipeline
|
|
(YAMNet + wav2vec2 + librosa) is filled in incrementally.
|
|
|
|
Usage
|
|
-----
|
|
classifier = ContextClassifier.from_env()
|
|
async for frame in classifier.stream():
|
|
print(frame.label, frame.confidence)
|
|
"""
|
|
|
|
def __init__(self, io: VoiceIO) -> None:
|
|
self._io = io
|
|
|
|
@classmethod
|
|
def from_env(cls, interval_s: float = 2.5) -> "ContextClassifier":
|
|
"""
|
|
Create a ContextClassifier from environment.
|
|
CF_VOICE_MOCK=1 activates mock mode (no GPU, no audio hardware needed).
|
|
"""
|
|
io = make_io(interval_s=interval_s)
|
|
return cls(io=io)
|
|
|
|
@classmethod
|
|
def mock(cls, interval_s: float = 2.5, seed: int | None = None) -> "ContextClassifier":
|
|
"""Create a ContextClassifier backed by MockVoiceIO. Useful in tests."""
|
|
from cf_voice.io import MockVoiceIO
|
|
return cls(io=MockVoiceIO(interval_s=interval_s, seed=seed))
|
|
|
|
async def stream(self) -> AsyncIterator[VoiceFrame]:
|
|
"""
|
|
Yield enriched VoiceFrames continuously.
|
|
|
|
Stub: frames from the IO layer pass through unchanged.
|
|
Real: enrichment pipeline runs here before yield.
|
|
"""
|
|
async for frame in self._io.stream():
|
|
yield self._enrich(frame)
|
|
|
|
async def stop(self) -> None:
|
|
await self._io.stop()
|
|
|
|
def _enrich(self, frame: VoiceFrame) -> VoiceFrame:
|
|
"""
|
|
Apply tone classification to a raw frame.
|
|
|
|
Stub: identity transform — returns frame unchanged.
|
|
Real: replace label + confidence with classifier output.
|
|
"""
|
|
return frame
|