cf-voice/.env.example

# cf-voice environment — copy to .env and fill in values
# cf-voice itself does not auto-load .env; consumers (Linnet, Osprey, etc.)
# load it via python-dotenv in their own startup. For standalone cf-voice
# dev/testing, source this file manually or install python-dotenv.

# ── HuggingFace ───────────────────────────────────────────────────────────────
# Required for pyannote.audio speaker diarization model download.
# Get a free token at https://huggingface.co/settings/tokens
# Also accept the gated model terms at:
#   https://huggingface.co/pyannote/speaker-diarization-3.1
#   https://huggingface.co/pyannote/segmentation-3.0
HF_TOKEN=

# ── Whisper STT ───────────────────────────────────────────────────────────────
# Model size: tiny | base | small | medium | large-v2 | large-v3
# Smaller = faster / less VRAM; larger = more accurate.
# Recommended: small (500MB VRAM) for real-time use.
CF_VOICE_WHISPER_MODEL=small

# ── Compute ───────────────────────────────────────────────────────────────────
# auto (detect GPU), cuda, cpu
CF_VOICE_DEVICE=auto

# ── Mock mode ─────────────────────────────────────────────────────────────────
# Set to 1 to use synthetic VoiceFrames — no GPU, mic, or HF token required.
# Unset or 0 for real audio capture.
CF_VOICE_MOCK=

# ── Tone classifier ───────────────────────────────────────────────────────────
# Minimum confidence to emit a VoiceFrame (below this = frame skipped).
CF_VOICE_CONFIDENCE_THRESHOLD=0.55