# cf_voice/accent.py — accent / language identification classifier # # MIT licensed (AccentResult dataclass + mock). BSL 1.1 (real inference). # Gated by CF_VOICE_ACCENT=1 — off by default (GPU cost + privacy sensitivity). # # Accent alone is not high-risk, but combined with birdsong or a quiet rural # background it becomes location-identifying. The privacy scorer accounts for # this compound signal. # # Real backend: facebook/mms-lid-126 for language detection, wav2vec2 accent # fine-tune for region. Lazy-loaded to keep startup fast. from __future__ import annotations import logging import os from dataclasses import dataclass logger = logging.getLogger(__name__) @dataclass class AccentResult: """ Language + regional accent classification for the primary speaker. language: BCP-47 language tag (e.g. "en", "fr", "zh") region: cf-voice ACCENT_LABEL string (e.g. "en_gb", "en_us", "other") confidence: float in [0, 1] """ language: str region: str confidence: float class MockAccentClassifier: """ Synthetic accent classifier for development and CI. Returns a fixed result so the privacy scorer can exercise all code paths without loading a real model. """ def classify(self, audio: "list[float] | bytes") -> AccentResult | None: return AccentResult(language="en", region="en_gb", confidence=0.72) class AccentClassifier: """ Real accent / language classifier. BSL 1.1 — requires [inference] extras. Language detection: facebook/mms-lid-126 (126 languages, MIT licensed). Accent region: maps language tag to a regional ACCENT_LABEL. VRAM: ~500 MB on CUDA. """ _LANG_MODEL_ID = "facebook/mms-lid-126" def __init__(self) -> None: try: from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor except ImportError as exc: raise ImportError( "transformers is required for accent classification. " "Install with: pip install cf-voice[inference]" ) from exc import torch self._device = "cuda" if torch.cuda.is_available() else "cpu" logger.info("Loading language ID model %s on %s", self._LANG_MODEL_ID, self._device) self._extractor = AutoFeatureExtractor.from_pretrained(self._LANG_MODEL_ID) self._model = Wav2Vec2ForSequenceClassification.from_pretrained( self._LANG_MODEL_ID ).to(self._device) self._model.eval() def classify(self, audio: "list[float] | bytes") -> AccentResult | None: import numpy as np import torch if isinstance(audio, bytes): audio_np = np.frombuffer(audio, dtype=np.float32) else: audio_np = np.asarray(audio, dtype=np.float32) if len(audio_np) < 1600: # need at least 100ms at 16kHz return None inputs = self._extractor( audio_np, sampling_rate=16_000, return_tensors="pt", padding=True ) inputs = {k: v.to(self._device) for k, v in inputs.items()} with torch.no_grad(): logits = self._model(**inputs).logits probs = torch.softmax(logits, dim=-1)[0] top_idx = int(probs.argmax()) confidence = float(probs[top_idx]) language = self._model.config.id2label.get(top_idx, "other") region = _lang_to_region(language) return AccentResult(language=language, region=region, confidence=confidence) def _lang_to_region(lang: str) -> str: """Map a BCP-47 / ISO 639-3 language tag to a cf-voice ACCENT_LABEL.""" _MAP: dict[str, str] = { "eng": "en_us", # MMS uses ISO 639-3; sub-regional accent needs fine-tune "fra": "fr", "spa": "es", "deu": "de", "zho": "zh", "jpn": "ja", "en": "en_us", "en-GB": "en_gb", "en-AU": "en_au", "en-CA": "en_ca", "en-IN": "en_in", "fr": "fr", "de": "de", "es": "es", "zh": "zh", "ja": "ja", } return _MAP.get(lang, "other") def make_accent_classifier( mock: bool | None = None, ) -> "MockAccentClassifier | AccentClassifier | None": """ Factory: return an AccentClassifier if CF_VOICE_ACCENT=1, else None. Callers must check for None before invoking classify(). """ enabled = os.environ.get("CF_VOICE_ACCENT", "") == "1" if not enabled: return None use_mock = mock if mock is not None else os.environ.get("CF_VOICE_MOCK", "") == "1" if use_mock: return MockAccentClassifier() try: return AccentClassifier() except (ImportError, Exception) as exc: logger.warning("AccentClassifier unavailable (%s) — using mock", exc) return MockAccentClassifier()