New modules shipped (from Linnet integration): - acoustic.py: AST (MIT/ast-finetuned-audioset-10-10-0.4593) replaces YAMNet stub; 527 AudioSet classes mapped to queue/speaker/environ/scene labels; _LABEL_MAP includes hold_music, ringback, DTMF, background_shift, AMD signal chain - accent.py: facebook/mms-lid-126 language ID → regional accent labels (en_gb, en_us, en_au, fr, es, de, zh, …); lazy-loaded, gated by CF_VOICE_ACCENT - privacy.py: compound privacy risk scorer — public_env, background_voices, nature scene, accent signals; returns 0–3 score without storing any audio - prosody.py: openSMILE-backed prosody extractor (sarcasm_risk, flat_f0_score, speech_rate, pitch_range); mock mode returns neutral values - dimensional.py: audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim valence/arousal/dominance scorer; gated by CF_VOICE_DIMENSIONAL - trajectory.py: rolling buffer for arousal/valence deltas, trend detection (escalating/suppressed/stable), coherence scoring, suppression/reframe flags - telephony.py: TelephonyBackend Protocol + MockTelephonyBackend + SignalWireBackend + FreeSWITCHBackend; CallSession dataclass; make_telephony() factory - app.py: FastAPI service (port 8007) — /health + /classify; accepts base64 PCM chunks, returns full AudioEventOut including dimensional/prosody/accent fields - prefs.py: voice preference helpers (elcor_mode, confidence_threshold, whisper_model, elcor_prior_frames); cf-core and env-var fallback Tests: fix stale tests (YAMNetAcousticBackend → ASTAcousticBackend, scene field added to AcousticResult, speaker_at gap now resolves dominant speaker not UNKNOWN, make_io real path returns MicVoiceIO when sounddevice installed). 78 tests passing. Closes #2, #3.
154 lines
5.6 KiB
Python
154 lines
5.6 KiB
Python
import asyncio
|
|
import pytest
|
|
from cf_voice.models import VoiceFrame
|
|
from cf_voice.io import MockVoiceIO, make_io
|
|
from cf_voice.context import ContextClassifier
|
|
|
|
|
|
def make_frame(**kwargs) -> VoiceFrame:
|
|
defaults = dict(
|
|
label="Calm and focused",
|
|
confidence=0.8,
|
|
speaker_id="speaker_a",
|
|
shift_magnitude=0.0,
|
|
timestamp=1.0,
|
|
)
|
|
return VoiceFrame(**{**defaults, **kwargs})
|
|
|
|
|
|
class TestVoiceFrame:
|
|
def test_is_reliable_above_threshold(self):
|
|
assert make_frame(confidence=0.7).is_reliable(threshold=0.6)
|
|
|
|
def test_is_reliable_below_threshold(self):
|
|
assert not make_frame(confidence=0.4).is_reliable(threshold=0.6)
|
|
|
|
def test_is_shift_above_threshold(self):
|
|
assert make_frame(shift_magnitude=0.5).is_shift(threshold=0.3)
|
|
|
|
def test_is_shift_below_threshold(self):
|
|
assert not make_frame(shift_magnitude=0.1).is_shift(threshold=0.3)
|
|
|
|
def test_default_reliable_threshold(self):
|
|
assert make_frame(confidence=0.6).is_reliable()
|
|
assert not make_frame(confidence=0.59).is_reliable()
|
|
|
|
|
|
class TestMockVoiceIO:
|
|
@pytest.mark.asyncio
|
|
async def test_emits_frames(self):
|
|
io = MockVoiceIO(interval_s=0.05, seed=42)
|
|
frames = []
|
|
async for frame in io.stream():
|
|
frames.append(frame)
|
|
if len(frames) >= 3:
|
|
await io.stop()
|
|
break
|
|
assert len(frames) == 3
|
|
assert all(isinstance(f, VoiceFrame) for f in frames)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_confidence_in_range(self):
|
|
io = MockVoiceIO(interval_s=0.05, seed=1)
|
|
count = 0
|
|
async for frame in io.stream():
|
|
assert 0.0 <= frame.confidence <= 1.0
|
|
assert 0.0 <= frame.shift_magnitude <= 1.0
|
|
count += 1
|
|
if count >= 5:
|
|
await io.stop()
|
|
break
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_timestamps_increase(self):
|
|
io = MockVoiceIO(interval_s=0.05, seed=0)
|
|
timestamps = []
|
|
async for frame in io.stream():
|
|
timestamps.append(frame.timestamp)
|
|
if len(timestamps) >= 3:
|
|
await io.stop()
|
|
break
|
|
assert timestamps == sorted(timestamps)
|
|
|
|
def test_make_io_mock_env(self, monkeypatch):
|
|
monkeypatch.setenv("CF_VOICE_MOCK", "1")
|
|
io = make_io()
|
|
assert isinstance(io, MockVoiceIO)
|
|
|
|
def test_make_io_real_returns_mic_io(self, monkeypatch):
|
|
"""make_io(mock=False) returns MicVoiceIO when sounddevice/numpy are installed."""
|
|
from cf_voice.capture import MicVoiceIO
|
|
monkeypatch.delenv("CF_VOICE_MOCK", raising=False)
|
|
io = make_io(mock=False)
|
|
assert isinstance(io, MicVoiceIO)
|
|
|
|
|
|
class TestContextClassifierChunk:
|
|
"""Tests for classify_chunk() — multi-class event output."""
|
|
|
|
def test_mock_returns_four_event_types(self):
|
|
classifier = ContextClassifier.mock(interval_s=0.05, seed=10)
|
|
events = classifier.classify_chunk(timestamp=1.0)
|
|
types = {e.event_type for e in events}
|
|
# In mock mode all four event types should be present
|
|
assert "tone" in types
|
|
assert "queue" in types
|
|
assert "speaker" in types
|
|
assert "environ" in types
|
|
|
|
def test_mock_tone_event_has_subtext(self):
|
|
classifier = ContextClassifier.mock(interval_s=0.05, seed=11)
|
|
events = classifier.classify_chunk(timestamp=0.0)
|
|
tone_events = [e for e in events if e.event_type == "tone"]
|
|
assert len(tone_events) == 1
|
|
assert tone_events[0].subtext is not None
|
|
|
|
def test_elcor_override_flag(self):
|
|
classifier = ContextClassifier.mock(interval_s=0.05, seed=12)
|
|
events_generic = classifier.classify_chunk(timestamp=0.0, elcor=False)
|
|
events_elcor = classifier.classify_chunk(timestamp=0.0, elcor=True)
|
|
|
|
def subtext(evs):
|
|
return next(e.subtext for e in evs if e.event_type == "tone")
|
|
|
|
generic_sub = subtext(events_generic)
|
|
elcor_sub = subtext(events_elcor)
|
|
# Generic format: "Tone: X". Elcor format: "With X:" or "Warmly:" etc.
|
|
assert generic_sub.startswith("Tone:") or not generic_sub.endswith(":")
|
|
# Elcor format ends with ":"
|
|
assert elcor_sub.endswith(":")
|
|
|
|
def test_session_id_propagates(self):
|
|
classifier = ContextClassifier.mock(interval_s=0.05, seed=13)
|
|
events = classifier.classify_chunk(timestamp=0.0, session_id="ses_test")
|
|
tone_events = [e for e in events if e.event_type == "tone"]
|
|
assert tone_events[0].session_id == "ses_test"
|
|
|
|
def test_prior_frames_zero_means_no_shift(self):
|
|
classifier = ContextClassifier.mock(interval_s=0.05, seed=14)
|
|
events = classifier.classify_chunk(timestamp=0.0, prior_frames=0)
|
|
tone_events = [e for e in events if e.event_type == "tone"]
|
|
assert tone_events[0].shift_magnitude == 0.0
|
|
|
|
|
|
class TestContextClassifier:
|
|
@pytest.mark.asyncio
|
|
async def test_mock_passthrough(self):
|
|
classifier = ContextClassifier.mock(interval_s=0.05, seed=7)
|
|
frames = []
|
|
async for frame in classifier.stream():
|
|
frames.append(frame)
|
|
if len(frames) >= 3:
|
|
await classifier.stop()
|
|
break
|
|
assert len(frames) == 3
|
|
assert all(isinstance(f, VoiceFrame) for f in frames)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_from_env_mock(self, monkeypatch):
|
|
monkeypatch.setenv("CF_VOICE_MOCK", "1")
|
|
classifier = ContextClassifier.from_env(interval_s=0.05)
|
|
async for frame in classifier.stream():
|
|
assert isinstance(frame, VoiceFrame)
|
|
await classifier.stop()
|
|
break
|