# cf_voice/privacy.py — local acoustic privacy risk scoring # # MIT licensed. Never transmitted to cloud. Never logged server-side. # # Derives a privacy_risk level (low / moderate / high) from the combined # acoustic fingerprint: scene + environ labels + speaker type + accent. # # Design rationale (#20): # - "outdoor_urban" + "crowd_chatter" + "traffic" → low: clearly public # - "indoor_quiet" + "background_voices" → moderate: conversation overheard # - "outdoor_nature" + "birdsong" + regional accent → moderate-high: location-identifying compound # - "indoor_quiet" + no background voices → low # # Risk gates (Linnet): # high: warn before sending audio chunk to cloud STT; offer local-only fallback # moderate: attach privacy_flags to session state, no blocking action # low: proceed normally from __future__ import annotations from dataclasses import dataclass, field from typing import Literal PrivacyLevel = Literal["low", "moderate", "high"] @dataclass class PrivacyRisk: """ Locally-computed privacy risk for a single audio window. level: aggregate risk level flags: ordered list of contributing signal descriptions """ level: PrivacyLevel flags: list[str] = field(default_factory=list) # ── Signal sets ─────────────────────────────────────────────────────────────── _PUBLIC_SCENES = {"outdoor_urban", "public_transit"} _NATURE_SCENES = {"outdoor_nature"} _QUIET_SCENES = {"indoor_quiet"} _LOCATION_ENVIRON = {"birdsong", "wind", "rain", "water"} _URBAN_ENVIRON = {"traffic", "crowd_chatter", "street_signal", "construction"} def score_privacy_risk( scene: str | None, environ_labels: list[str], speaker: str | None, accent: str | None, ) -> PrivacyRisk: """ Derive a PrivacyRisk from the current acoustic fingerprint. All inputs are nullable — this function handles partial signals gracefully. Called per audio window; results are never persisted or transmitted. Args: scene: SCENE_LABEL string or None environ_labels: list of ENVIRON_LABEL strings active in this window speaker: SPEAKER_LABEL string or None accent: ACCENT_LABEL string or None (None when CF_VOICE_ACCENT disabled) """ flags: list[str] = [] score = 0 # internal accumulator; maps to level at the end environ_set = set(environ_labels) # ── Clearly public environments → reduce risk ───────────────────────────── if scene in _PUBLIC_SCENES or environ_set & _URBAN_ENVIRON: flags.append("public_environment") score -= 1 # ── Background voices: conversation may be overheard ───────────────────── if speaker == "background_voices": flags.append("background_voices_detected") score += 2 # ── Quiet indoor: no background noise reduces identifiability ──────────── if scene in _QUIET_SCENES and speaker not in ("background_voices", "human_multi"): flags.append("controlled_environment") # No score change — neutral # ── Nature sounds: alone they suggest a quiet, potentially identifiable location nature_match = environ_set & _LOCATION_ENVIRON if nature_match: flags.append(f"location_signal: {', '.join(sorted(nature_match))}") score += 1 # ── Nature scene + nature sounds: compound location-identifying signal ──── if scene in _NATURE_SCENES and nature_match: flags.append("compound_location_signal") score += 1 # ── Regional accent + nature: narrows location to region + environment ──── if accent and accent not in ("en_us", "other") and nature_match: flags.append(f"accent_plus_location: {accent}") score += 1 # ── Quiet indoor + background voices: overheard conversation ───────────── if scene in _QUIET_SCENES and speaker == "background_voices": flags.append("overheard_conversation") score += 1 # ── Map score to level ──────────────────────────────────────────────────── if score <= 0: level: PrivacyLevel = "low" elif score <= 2: level = "moderate" else: level = "high" return PrivacyRisk(level=level, flags=flags)