cf-voice/tests/test_models.py
pyr0ball 24f04b67db feat: full voice pipeline — AST acoustic, accent, privacy, prosody, dimensional, trajectory, telephony, FastAPI app
New modules shipped (from Linnet integration):
- acoustic.py: AST (MIT/ast-finetuned-audioset-10-10-0.4593) replaces YAMNet stub;
  527 AudioSet classes mapped to queue/speaker/environ/scene labels; _LABEL_MAP
  includes hold_music, ringback, DTMF, background_shift, AMD signal chain
- accent.py: facebook/mms-lid-126 language ID → regional accent labels
  (en_gb, en_us, en_au, fr, es, de, zh, …); lazy-loaded, gated by CF_VOICE_ACCENT
- privacy.py: compound privacy risk scorer — public_env, background_voices,
  nature scene, accent signals; returns 0–3 score without storing any audio
- prosody.py: openSMILE-backed prosody extractor (sarcasm_risk, flat_f0_score,
  speech_rate, pitch_range); mock mode returns neutral values
- dimensional.py: audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim
  valence/arousal/dominance scorer; gated by CF_VOICE_DIMENSIONAL
- trajectory.py: rolling buffer for arousal/valence deltas, trend detection
  (escalating/suppressed/stable), coherence scoring, suppression/reframe flags
- telephony.py: TelephonyBackend Protocol + MockTelephonyBackend + SignalWireBackend
  + FreeSWITCHBackend; CallSession dataclass; make_telephony() factory
- app.py: FastAPI service (port 8007) — /health + /classify; accepts base64 PCM
  chunks, returns full AudioEventOut including dimensional/prosody/accent fields
- prefs.py: voice preference helpers (elcor_mode, confidence_threshold,
  whisper_model, elcor_prior_frames); cf-core and env-var fallback

Tests: fix stale tests (YAMNetAcousticBackend → ASTAcousticBackend, scene field
added to AcousticResult, speaker_at gap now resolves dominant speaker not UNKNOWN,
make_io real path returns MicVoiceIO when sounddevice installed). 78 tests passing.

Closes #2, #3.
2026-04-18 22:36:58 -07:00

154 lines
5.6 KiB
Python

import asyncio
import pytest
from cf_voice.models import VoiceFrame
from cf_voice.io import MockVoiceIO, make_io
from cf_voice.context import ContextClassifier
def make_frame(**kwargs) -> VoiceFrame:
defaults = dict(
label="Calm and focused",
confidence=0.8,
speaker_id="speaker_a",
shift_magnitude=0.0,
timestamp=1.0,
)
return VoiceFrame(**{**defaults, **kwargs})
class TestVoiceFrame:
def test_is_reliable_above_threshold(self):
assert make_frame(confidence=0.7).is_reliable(threshold=0.6)
def test_is_reliable_below_threshold(self):
assert not make_frame(confidence=0.4).is_reliable(threshold=0.6)
def test_is_shift_above_threshold(self):
assert make_frame(shift_magnitude=0.5).is_shift(threshold=0.3)
def test_is_shift_below_threshold(self):
assert not make_frame(shift_magnitude=0.1).is_shift(threshold=0.3)
def test_default_reliable_threshold(self):
assert make_frame(confidence=0.6).is_reliable()
assert not make_frame(confidence=0.59).is_reliable()
class TestMockVoiceIO:
@pytest.mark.asyncio
async def test_emits_frames(self):
io = MockVoiceIO(interval_s=0.05, seed=42)
frames = []
async for frame in io.stream():
frames.append(frame)
if len(frames) >= 3:
await io.stop()
break
assert len(frames) == 3
assert all(isinstance(f, VoiceFrame) for f in frames)
@pytest.mark.asyncio
async def test_confidence_in_range(self):
io = MockVoiceIO(interval_s=0.05, seed=1)
count = 0
async for frame in io.stream():
assert 0.0 <= frame.confidence <= 1.0
assert 0.0 <= frame.shift_magnitude <= 1.0
count += 1
if count >= 5:
await io.stop()
break
@pytest.mark.asyncio
async def test_timestamps_increase(self):
io = MockVoiceIO(interval_s=0.05, seed=0)
timestamps = []
async for frame in io.stream():
timestamps.append(frame.timestamp)
if len(timestamps) >= 3:
await io.stop()
break
assert timestamps == sorted(timestamps)
def test_make_io_mock_env(self, monkeypatch):
monkeypatch.setenv("CF_VOICE_MOCK", "1")
io = make_io()
assert isinstance(io, MockVoiceIO)
def test_make_io_real_returns_mic_io(self, monkeypatch):
"""make_io(mock=False) returns MicVoiceIO when sounddevice/numpy are installed."""
from cf_voice.capture import MicVoiceIO
monkeypatch.delenv("CF_VOICE_MOCK", raising=False)
io = make_io(mock=False)
assert isinstance(io, MicVoiceIO)
class TestContextClassifierChunk:
"""Tests for classify_chunk() — multi-class event output."""
def test_mock_returns_four_event_types(self):
classifier = ContextClassifier.mock(interval_s=0.05, seed=10)
events = classifier.classify_chunk(timestamp=1.0)
types = {e.event_type for e in events}
# In mock mode all four event types should be present
assert "tone" in types
assert "queue" in types
assert "speaker" in types
assert "environ" in types
def test_mock_tone_event_has_subtext(self):
classifier = ContextClassifier.mock(interval_s=0.05, seed=11)
events = classifier.classify_chunk(timestamp=0.0)
tone_events = [e for e in events if e.event_type == "tone"]
assert len(tone_events) == 1
assert tone_events[0].subtext is not None
def test_elcor_override_flag(self):
classifier = ContextClassifier.mock(interval_s=0.05, seed=12)
events_generic = classifier.classify_chunk(timestamp=0.0, elcor=False)
events_elcor = classifier.classify_chunk(timestamp=0.0, elcor=True)
def subtext(evs):
return next(e.subtext for e in evs if e.event_type == "tone")
generic_sub = subtext(events_generic)
elcor_sub = subtext(events_elcor)
# Generic format: "Tone: X". Elcor format: "With X:" or "Warmly:" etc.
assert generic_sub.startswith("Tone:") or not generic_sub.endswith(":")
# Elcor format ends with ":"
assert elcor_sub.endswith(":")
def test_session_id_propagates(self):
classifier = ContextClassifier.mock(interval_s=0.05, seed=13)
events = classifier.classify_chunk(timestamp=0.0, session_id="ses_test")
tone_events = [e for e in events if e.event_type == "tone"]
assert tone_events[0].session_id == "ses_test"
def test_prior_frames_zero_means_no_shift(self):
classifier = ContextClassifier.mock(interval_s=0.05, seed=14)
events = classifier.classify_chunk(timestamp=0.0, prior_frames=0)
tone_events = [e for e in events if e.event_type == "tone"]
assert tone_events[0].shift_magnitude == 0.0
class TestContextClassifier:
@pytest.mark.asyncio
async def test_mock_passthrough(self):
classifier = ContextClassifier.mock(interval_s=0.05, seed=7)
frames = []
async for frame in classifier.stream():
frames.append(frame)
if len(frames) >= 3:
await classifier.stop()
break
assert len(frames) == 3
assert all(isinstance(f, VoiceFrame) for f in frames)
@pytest.mark.asyncio
async def test_from_env_mock(self, monkeypatch):
monkeypatch.setenv("CF_VOICE_MOCK", "1")
classifier = ContextClassifier.from_env(interval_s=0.05)
async for frame in classifier.stream():
assert isinstance(frame, VoiceFrame)
await classifier.stop()
break