- cf_voice/stt.py: WhisperSTT async wrapper (faster-whisper, thread-pool executor, rolling 50-word session prompt for cross-chunk context continuity) - cf_voice/classify.py: ToneClassifier — wav2vec2 SER + librosa prosody flags (energy, ZCR speech rate, YIN pitch contour) mapped to AFFECT_LABELS - cf_voice/diarize.py: Diarizer async wrapper around pyannote/speaker-diarization-3.1; speaker_at() helper for Navigation v0.2.x wiring - cf_voice/capture.py: MicVoiceIO — sounddevice 16kHz mono capture, 2s window accumulation, parallel STT+classify tasks, shift_magnitude from confidence delta - cf_voice/io.py: make_io() now returns MicVoiceIO when CF_VOICE_MOCK is unset - cf_voice/context.py: classify_chunk() split into mock/real paths; real path decodes base64 PCM and runs ToneClassifier synchronously (cf-orch endpoint) - pyproject.toml: inference extras expanded (faster-whisper, sounddevice, librosa, python-dotenv) - .env.example: HF_TOKEN, CF_VOICE_WHISPER_MODEL, CF_VOICE_DEVICE, CF_VOICE_MOCK, CF_VOICE_CONFIDENCE_THRESHOLD Prior art ported from: Plex-Scripts/transcription/diarization.py (pyannote setup), devl/ogma/backend/speech/transcription_engine.py (faster-whisper preprocessing and session prompt pattern).
45 lines
1,002 B
TOML
45 lines
1,002 B
TOML
[build-system]
|
|
requires = ["setuptools>=68"]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[project]
|
|
name = "cf-voice"
|
|
version = "0.1.0"
|
|
description = "CircuitForge voice annotation pipeline — VoiceFrame API, tone classifiers, speaker diarization"
|
|
readme = "README.md"
|
|
requires-python = ">=3.11"
|
|
license = {text = "MIT"}
|
|
dependencies = [
|
|
"pydantic>=2.0",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
# Real inference backends — not required for stub/mock mode
|
|
inference = [
|
|
"torch>=2.0",
|
|
"torchaudio>=2.0",
|
|
"numpy>=1.24",
|
|
"faster-whisper>=1.0",
|
|
"sounddevice>=0.4",
|
|
"transformers>=4.40",
|
|
"librosa>=0.10",
|
|
"pyannote.audio>=3.1",
|
|
"python-dotenv>=1.0",
|
|
]
|
|
dev = [
|
|
"pytest>=8.0",
|
|
"pytest-asyncio>=0.23",
|
|
"numpy>=1.24",
|
|
]
|
|
|
|
[project.scripts]
|
|
# Quick smoke-test: stream mock frames to stdout
|
|
cf-voice-demo = "cf_voice.cli:demo"
|
|
|
|
[tool.setuptools.packages.find]
|
|
where = ["."]
|
|
include = ["cf_voice*"]
|
|
|
|
[tool.pytest.ini_options]
|
|
testpaths = ["tests"]
|
|
asyncio_mode = "auto"
|