New modules shipped (from Linnet integration): - acoustic.py: AST (MIT/ast-finetuned-audioset-10-10-0.4593) replaces YAMNet stub; 527 AudioSet classes mapped to queue/speaker/environ/scene labels; _LABEL_MAP includes hold_music, ringback, DTMF, background_shift, AMD signal chain - accent.py: facebook/mms-lid-126 language ID → regional accent labels (en_gb, en_us, en_au, fr, es, de, zh, …); lazy-loaded, gated by CF_VOICE_ACCENT - privacy.py: compound privacy risk scorer — public_env, background_voices, nature scene, accent signals; returns 0–3 score without storing any audio - prosody.py: openSMILE-backed prosody extractor (sarcasm_risk, flat_f0_score, speech_rate, pitch_range); mock mode returns neutral values - dimensional.py: audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim valence/arousal/dominance scorer; gated by CF_VOICE_DIMENSIONAL - trajectory.py: rolling buffer for arousal/valence deltas, trend detection (escalating/suppressed/stable), coherence scoring, suppression/reframe flags - telephony.py: TelephonyBackend Protocol + MockTelephonyBackend + SignalWireBackend + FreeSWITCHBackend; CallSession dataclass; make_telephony() factory - app.py: FastAPI service (port 8007) — /health + /classify; accepts base64 PCM chunks, returns full AudioEventOut including dimensional/prosody/accent fields - prefs.py: voice preference helpers (elcor_mode, confidence_threshold, whisper_model, elcor_prior_frames); cf-core and env-var fallback Tests: fix stale tests (YAMNetAcousticBackend → ASTAcousticBackend, scene field added to AcousticResult, speaker_at gap now resolves dominant speaker not UNKNOWN, make_io real path returns MicVoiceIO when sounddevice installed). 78 tests passing. Closes #2, #3.
55 lines
1.3 KiB
TOML
55 lines
1.3 KiB
TOML
[build-system]
|
|
requires = ["setuptools>=68"]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[project]
|
|
name = "cf-voice"
|
|
version = "0.1.0"
|
|
description = "CircuitForge voice annotation pipeline — VoiceFrame API, tone classifiers, speaker diarization"
|
|
readme = "README.md"
|
|
requires-python = ">=3.11"
|
|
license = {text = "MIT"}
|
|
dependencies = [
|
|
"pydantic>=2.0",
|
|
"fastapi>=0.111",
|
|
"uvicorn[standard]>=0.29",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
# Real inference backends — not required for stub/mock mode
|
|
inference = [
|
|
"torch>=2.0",
|
|
"torchaudio>=2.0",
|
|
"numpy>=1.24",
|
|
"faster-whisper>=1.0",
|
|
"sounddevice>=0.4",
|
|
"transformers>=4.40",
|
|
"librosa>=0.10",
|
|
"pyannote.audio>=3.1",
|
|
"python-dotenv>=1.0",
|
|
]
|
|
signalwire = [
|
|
"signalwire>=2.0",
|
|
]
|
|
freeswitch = [
|
|
# ESL Python bindings are compiled from FreeSWITCH source.
|
|
# See: https://developer.signalwire.com/freeswitch/FreeSWITCH-Explained/Client-and-Developer-Interfaces/Event-Socket-Library/
|
|
"python-ESL",
|
|
]
|
|
dev = [
|
|
"pytest>=8.0",
|
|
"pytest-asyncio>=0.23",
|
|
"numpy>=1.24",
|
|
]
|
|
|
|
[project.scripts]
|
|
# Quick smoke-test: stream mock frames to stdout
|
|
cf-voice-demo = "cf_voice.cli:demo"
|
|
|
|
[tool.setuptools.packages.find]
|
|
where = ["."]
|
|
include = ["cf_voice*"]
|
|
|
|
[tool.pytest.ini_options]
|
|
testpaths = ["tests"]
|
|
asyncio_mode = "auto"
|