cf-voice/cf_voice/trajectory.py

# cf_voice/trajectory.py — affect trajectory and SER/VAD coherence signals
#
# MIT licensed — derived computation only, no inference models.
#
# Two signal families:
#
# 1. TrajectorySignal — rolling arousal/valence trend across the last N windows.
#    Detects escalation, de-escalation, suppression, worsening, improving.
#
# 2. CoherenceSignal — cross-model comparison between SER (categorical affect)
#    and VAD (continuous dimensional valence). Disagreement indicates affect
#    suppression, controlled presentation, or surface-only semantic reframe.
#
# Both signals activate only after BASELINE_MIN windows per speaker are buffered.
# All thresholds are relative to the per-speaker rolling mean, not absolute —
# this is required for ND/neurodivergent speaker safety (see design doc).
#
# Safety note: these signals must never be labelled "deception" in any
# user-facing context. Use: "affect divergence", "controlled presentation",
# "framing shift". The user interprets; the system observes.
from __future__ import annotations

from collections import deque
from dataclasses import dataclass

from cf_voice.dimensional import DimensionalResult

# Rolling window depth per speaker
BUFFER_WINDOW = 5

# Minimum frames before signals activate (relative baseline requirement)
BASELINE_MIN = 3

# Minimum arousal/valence delta per window to count as directional movement
_DELTA_THRESHOLD = 0.05

# Arousal threshold above which "neutral SER + high arousal" = suppression candidate
_SUPPRESSION_AROUSAL_MIN = 0.65

# SER affects that imply low arousal presentation (used for suppression detection)
_LOW_PRESENTATION_AFFECTS = frozenset({"neutral", "scripted", "tired", "apologetic"})

# Expected valence ranges derived from MSP-Podcast emotion distribution.
# Used to determine whether SER affect label and dimensional valence agree.
_AFFECT_VALENCE_PRIOR: dict[str, tuple[float, float]] = {
    "warm":          (0.60, 1.00),
    "genuine":       (0.55, 1.00),
    "optimistic":    (0.55, 0.90),
    "neutral":       (0.35, 0.65),
    "confused":      (0.30, 0.60),
    "scripted":      (0.30, 0.65),
    "apologetic":    (0.20, 0.55),
    "tired":         (0.10, 0.50),
    "frustrated":    (0.10, 0.45),
    "dismissive":    (0.15, 0.50),
    "condescending": (0.10, 0.45),
    "urgent":        (0.15, 0.55),
}

# Ordinal positivity for reframe direction detection.
# Higher = more positive presentation.
_AFFECT_POSITIVITY: dict[str, int] = {
    "urgent":        1,
    "frustrated":    1,
    "condescending": 1,
    "dismissive":    2,
    "tired":         2,
    "apologetic":    3,
    "confused":      3,
    "scripted":      4,
    "neutral":       4,
    "optimistic":    5,
    "genuine":       5,
    "warm":          6,
}


@dataclass
class TrajectorySignal:
    """
    Rolling trend across recent dimensional frames for one speaker.

    All delta values: current_frame_value - mean(buffer_values).
    Positive arousal_delta = current frame is more activated than baseline.
    Negative valence_delta = current frame is more negative than baseline.

    trend values:
      "calibrating"    not enough frames yet (< BASELINE_MIN)
      "stable"         no significant directional movement
      "escalating"     arousal rising: current > mean by DELTA_THRESHOLD, consecutive
      "de-escalating"  arousal falling after elevated period
      "worsening"      valence falling: current < mean, consecutive
      "improving"      valence rising after depressed period
      "suppressed"     SER affect is calm/neutral, VAD arousal is elevated
    """
    arousal_delta: float
    valence_delta: float
    dominance_delta: float
    arousal_trend: str          # "rising" | "falling" | "flat"
    valence_trend: str          # "rising" | "falling" | "flat"
    trend: str
    frames_in_buffer: int
    baseline_established: bool


@dataclass
class CoherenceSignal:
    """
    Cross-signal comparison: SER categorical affect vs. VAD dimensional valence.

    coherence_score:
      1.0 = SER label and VAD valence are fully consistent.
      0.0 = maximum disagreement.

    suppression_flag:
      True when the speaker is presenting as calm/neutral (SER) but VAD arousal
      is elevated. Indicates controlled presentation with activation underneath.
      This is relative to a per-session threshold — not a universal claim.

    reframe_type:
      "none"    no SER category shift this window
      "genuine" SER shifted toward more positive AND dimensional valence also
                improved (>= DELTA_THRESHOLD in this window)
      "surface" SER shifted toward more positive BUT dimensional valence
                continued its prior trajectory unchanged or worsening

    affect_divergence:
      Signed: VAD-implied valence minus SER-implied valence midpoint.
      Negative = VAD more negative than SER label implies (masking candidate).
      Positive = VAD more positive than SER label implies (unusual).
    """
    coherence_score: float
    suppression_flag: bool
    reframe_type: str           # "none" | "genuine" | "surface"
    affect_divergence: float


# ── Public helpers ─────────────────────────────────────────────────────────────


def affect_coherence(affect: str, valence: float) -> float:
    """
    Compute coherence between a SER affect category and a VAD valence score.

    Returns 1.0 when valence falls inside the expected range for the affect.
    Returns 0.0 when the gap between valence and the nearest range boundary
    exceeds 0.40 (the full range of a typical incoherence gap).
    """
    lo, hi = _AFFECT_VALENCE_PRIOR.get(affect, (0.30, 0.70))
    if lo <= valence <= hi:
        return 1.0
    gap = min(abs(valence - lo), abs(valence - hi))
    return round(max(0.0, 1.0 - (gap / 0.40)), 3)


def affect_divergence_score(affect: str, valence: float) -> float:
    """
    Signed divergence: actual VAD valence minus the midpoint of the expected range.

    Negative = VAD more negative than SER label implies.
    Positive = VAD more positive than SER label implies.
    """
    lo, hi = _AFFECT_VALENCE_PRIOR.get(affect, (0.30, 0.70))
    midpoint = (lo + hi) / 2.0
    return round(valence - midpoint, 3)


def compute_trajectory(
    buffer: deque,
    current: DimensionalResult,
    ser_affect: str,
    prior_ser_affect: str | None,
) -> tuple[TrajectorySignal, CoherenceSignal]:
    """
    Compute trajectory and coherence signals for one speaker at one window.

    buffer           Rolling deque of prior DimensionalResult for this speaker.
                     Must be updated AFTER this call (append current to buffer).
    current          DimensionalResult for the window being classified.
    ser_affect       SER affect label for this window (from ToneClassifier).
    prior_ser_affect SER affect label from the previous window, for reframe detection.
                     Pass None on the first window or when not tracking.

    Returns (TrajectorySignal, CoherenceSignal). Both have baseline_established=False
    and trend="calibrating" when buffer has fewer than BASELINE_MIN entries.
    """
    n = len(buffer)

    # Coherence can be computed without a buffer
    coh_score = affect_coherence(ser_affect, current.valence)
    div_score  = affect_divergence_score(ser_affect, current.valence)

    suppression = (
        ser_affect in _LOW_PRESENTATION_AFFECTS
        and current.arousal > _SUPPRESSION_AROUSAL_MIN
        and current.valence < 0.50
    )

    reframe = "none"
    if prior_ser_affect and prior_ser_affect != ser_affect:
        if _is_more_positive(ser_affect, prior_ser_affect):
            # Valence actually improved in this window vs. single prior frame
            if n >= 1:
                prev_valence = list(buffer)[-1].valence
                dim_improved = (current.valence - prev_valence) >= _DELTA_THRESHOLD
            else:
                dim_improved = False
            reframe = "genuine" if dim_improved else "surface"

    coher = CoherenceSignal(
        coherence_score=coh_score,
        suppression_flag=suppression,
        reframe_type=reframe,
        affect_divergence=div_score,
    )

    if n < BASELINE_MIN:
        traj = TrajectorySignal(
            arousal_delta=0.0,
            valence_delta=0.0,
            dominance_delta=0.0,
            arousal_trend="flat",
            valence_trend="flat",
            trend="calibrating",
            frames_in_buffer=n,
            baseline_established=False,
        )
        return traj, coher

    mean_arousal   = sum(f.arousal   for f in buffer) / n
    mean_valence   = sum(f.valence   for f in buffer) / n
    mean_dominance = sum(f.dominance for f in buffer) / n

    a_delta = current.arousal   - mean_arousal
    v_delta = current.valence   - mean_valence
    d_delta = current.dominance - mean_dominance

    a_trend = (
        "rising"  if a_delta >  _DELTA_THRESHOLD else
        "falling" if a_delta < -_DELTA_THRESHOLD else
        "flat"
    )
    v_trend = (
        "rising"  if v_delta >  _DELTA_THRESHOLD else
        "falling" if v_delta < -_DELTA_THRESHOLD else
        "flat"
    )

    # Consecutive movement: check whether the most recent buffered frame
    # was already moving in the same direction as the current frame.
    buf_list = list(buffer)
    prev = buf_list[-1]
    a_consecutive = a_trend == "rising"  and (current.arousal   - prev.arousal)   > 0.03
    v_consecutive = v_trend == "falling" and (current.valence   - prev.valence)   < -0.03

    # Composite trend label
    if suppression:
        trend = "suppressed"
    elif a_trend == "rising" and a_consecutive:
        trend = "escalating"
    elif a_trend == "falling" and mean_arousal > 0.55:
        trend = "de-escalating"
    elif v_trend == "falling" and v_consecutive:
        trend = "worsening"
    elif v_trend == "rising" and mean_valence < 0.45:
        trend = "improving"
    else:
        trend = "stable"

    traj = TrajectorySignal(
        arousal_delta=round(a_delta, 3),
        valence_delta=round(v_delta, 3),
        dominance_delta=round(d_delta, 3),
        arousal_trend=a_trend,
        valence_trend=v_trend,
        trend=trend,
        frames_in_buffer=n,
        baseline_established=True,
    )
    return traj, coher


# ── Internal helpers ───────────────────────────────────────────────────────────


def _is_more_positive(current: str, prior: str) -> bool:
    """True when the current SER affect is ranked more positive than prior."""
    return _AFFECT_POSITIVITY.get(current, 4) > _AFFECT_POSITIVITY.get(prior, 4)