# cf_voice/trajectory.py — affect trajectory and SER/VAD coherence signals # # MIT licensed — derived computation only, no inference models. # # Two signal families: # # 1. TrajectorySignal — rolling arousal/valence trend across the last N windows. # Detects escalation, de-escalation, suppression, worsening, improving. # # 2. CoherenceSignal — cross-model comparison between SER (categorical affect) # and VAD (continuous dimensional valence). Disagreement indicates affect # suppression, controlled presentation, or surface-only semantic reframe. # # Both signals activate only after BASELINE_MIN windows per speaker are buffered. # All thresholds are relative to the per-speaker rolling mean, not absolute — # this is required for ND/neurodivergent speaker safety (see design doc). # # Safety note: these signals must never be labelled "deception" in any # user-facing context. Use: "affect divergence", "controlled presentation", # "framing shift". The user interprets; the system observes. from __future__ import annotations from collections import deque from dataclasses import dataclass from cf_voice.dimensional import DimensionalResult # Rolling window depth per speaker BUFFER_WINDOW = 5 # Minimum frames before signals activate (relative baseline requirement) BASELINE_MIN = 3 # Minimum arousal/valence delta per window to count as directional movement _DELTA_THRESHOLD = 0.05 # Arousal threshold above which "neutral SER + high arousal" = suppression candidate _SUPPRESSION_AROUSAL_MIN = 0.65 # SER affects that imply low arousal presentation (used for suppression detection) _LOW_PRESENTATION_AFFECTS = frozenset({"neutral", "scripted", "tired", "apologetic"}) # Expected valence ranges derived from MSP-Podcast emotion distribution. # Used to determine whether SER affect label and dimensional valence agree. _AFFECT_VALENCE_PRIOR: dict[str, tuple[float, float]] = { "warm": (0.60, 1.00), "genuine": (0.55, 1.00), "optimistic": (0.55, 0.90), "neutral": (0.35, 0.65), "confused": (0.30, 0.60), "scripted": (0.30, 0.65), "apologetic": (0.20, 0.55), "tired": (0.10, 0.50), "frustrated": (0.10, 0.45), "dismissive": (0.15, 0.50), "condescending": (0.10, 0.45), "urgent": (0.15, 0.55), } # Ordinal positivity for reframe direction detection. # Higher = more positive presentation. _AFFECT_POSITIVITY: dict[str, int] = { "urgent": 1, "frustrated": 1, "condescending": 1, "dismissive": 2, "tired": 2, "apologetic": 3, "confused": 3, "scripted": 4, "neutral": 4, "optimistic": 5, "genuine": 5, "warm": 6, } @dataclass class TrajectorySignal: """ Rolling trend across recent dimensional frames for one speaker. All delta values: current_frame_value - mean(buffer_values). Positive arousal_delta = current frame is more activated than baseline. Negative valence_delta = current frame is more negative than baseline. trend values: "calibrating" not enough frames yet (< BASELINE_MIN) "stable" no significant directional movement "escalating" arousal rising: current > mean by DELTA_THRESHOLD, consecutive "de-escalating" arousal falling after elevated period "worsening" valence falling: current < mean, consecutive "improving" valence rising after depressed period "suppressed" SER affect is calm/neutral, VAD arousal is elevated """ arousal_delta: float valence_delta: float dominance_delta: float arousal_trend: str # "rising" | "falling" | "flat" valence_trend: str # "rising" | "falling" | "flat" trend: str frames_in_buffer: int baseline_established: bool @dataclass class CoherenceSignal: """ Cross-signal comparison: SER categorical affect vs. VAD dimensional valence. coherence_score: 1.0 = SER label and VAD valence are fully consistent. 0.0 = maximum disagreement. suppression_flag: True when the speaker is presenting as calm/neutral (SER) but VAD arousal is elevated. Indicates controlled presentation with activation underneath. This is relative to a per-session threshold — not a universal claim. reframe_type: "none" no SER category shift this window "genuine" SER shifted toward more positive AND dimensional valence also improved (>= DELTA_THRESHOLD in this window) "surface" SER shifted toward more positive BUT dimensional valence continued its prior trajectory unchanged or worsening affect_divergence: Signed: VAD-implied valence minus SER-implied valence midpoint. Negative = VAD more negative than SER label implies (masking candidate). Positive = VAD more positive than SER label implies (unusual). """ coherence_score: float suppression_flag: bool reframe_type: str # "none" | "genuine" | "surface" affect_divergence: float # ── Public helpers ───────────────────────────────────────────────────────────── def affect_coherence(affect: str, valence: float) -> float: """ Compute coherence between a SER affect category and a VAD valence score. Returns 1.0 when valence falls inside the expected range for the affect. Returns 0.0 when the gap between valence and the nearest range boundary exceeds 0.40 (the full range of a typical incoherence gap). """ lo, hi = _AFFECT_VALENCE_PRIOR.get(affect, (0.30, 0.70)) if lo <= valence <= hi: return 1.0 gap = min(abs(valence - lo), abs(valence - hi)) return round(max(0.0, 1.0 - (gap / 0.40)), 3) def affect_divergence_score(affect: str, valence: float) -> float: """ Signed divergence: actual VAD valence minus the midpoint of the expected range. Negative = VAD more negative than SER label implies. Positive = VAD more positive than SER label implies. """ lo, hi = _AFFECT_VALENCE_PRIOR.get(affect, (0.30, 0.70)) midpoint = (lo + hi) / 2.0 return round(valence - midpoint, 3) def compute_trajectory( buffer: deque, current: DimensionalResult, ser_affect: str, prior_ser_affect: str | None, ) -> tuple[TrajectorySignal, CoherenceSignal]: """ Compute trajectory and coherence signals for one speaker at one window. buffer Rolling deque of prior DimensionalResult for this speaker. Must be updated AFTER this call (append current to buffer). current DimensionalResult for the window being classified. ser_affect SER affect label for this window (from ToneClassifier). prior_ser_affect SER affect label from the previous window, for reframe detection. Pass None on the first window or when not tracking. Returns (TrajectorySignal, CoherenceSignal). Both have baseline_established=False and trend="calibrating" when buffer has fewer than BASELINE_MIN entries. """ n = len(buffer) # Coherence can be computed without a buffer coh_score = affect_coherence(ser_affect, current.valence) div_score = affect_divergence_score(ser_affect, current.valence) suppression = ( ser_affect in _LOW_PRESENTATION_AFFECTS and current.arousal > _SUPPRESSION_AROUSAL_MIN and current.valence < 0.50 ) reframe = "none" if prior_ser_affect and prior_ser_affect != ser_affect: if _is_more_positive(ser_affect, prior_ser_affect): # Valence actually improved in this window vs. single prior frame if n >= 1: prev_valence = list(buffer)[-1].valence dim_improved = (current.valence - prev_valence) >= _DELTA_THRESHOLD else: dim_improved = False reframe = "genuine" if dim_improved else "surface" coher = CoherenceSignal( coherence_score=coh_score, suppression_flag=suppression, reframe_type=reframe, affect_divergence=div_score, ) if n < BASELINE_MIN: traj = TrajectorySignal( arousal_delta=0.0, valence_delta=0.0, dominance_delta=0.0, arousal_trend="flat", valence_trend="flat", trend="calibrating", frames_in_buffer=n, baseline_established=False, ) return traj, coher mean_arousal = sum(f.arousal for f in buffer) / n mean_valence = sum(f.valence for f in buffer) / n mean_dominance = sum(f.dominance for f in buffer) / n a_delta = current.arousal - mean_arousal v_delta = current.valence - mean_valence d_delta = current.dominance - mean_dominance a_trend = ( "rising" if a_delta > _DELTA_THRESHOLD else "falling" if a_delta < -_DELTA_THRESHOLD else "flat" ) v_trend = ( "rising" if v_delta > _DELTA_THRESHOLD else "falling" if v_delta < -_DELTA_THRESHOLD else "flat" ) # Consecutive movement: check whether the most recent buffered frame # was already moving in the same direction as the current frame. buf_list = list(buffer) prev = buf_list[-1] a_consecutive = a_trend == "rising" and (current.arousal - prev.arousal) > 0.03 v_consecutive = v_trend == "falling" and (current.valence - prev.valence) < -0.03 # Composite trend label if suppression: trend = "suppressed" elif a_trend == "rising" and a_consecutive: trend = "escalating" elif a_trend == "falling" and mean_arousal > 0.55: trend = "de-escalating" elif v_trend == "falling" and v_consecutive: trend = "worsening" elif v_trend == "rising" and mean_valence < 0.45: trend = "improving" else: trend = "stable" traj = TrajectorySignal( arousal_delta=round(a_delta, 3), valence_delta=round(v_delta, 3), dominance_delta=round(d_delta, 3), arousal_trend=a_trend, valence_trend=v_trend, trend=trend, frames_in_buffer=n, baseline_established=True, ) return traj, coher # ── Internal helpers ─────────────────────────────────────────────────────────── def _is_more_positive(current: str, prior: str) -> bool: """True when the current SER affect is ranked more positive than prior.""" return _AFFECT_POSITIVITY.get(current, 4) > _AFFECT_POSITIVITY.get(prior, 4)