raven/merlin/features/gaze.py
pyr0ball 0dcc25164d feat(features): implement BlinkDetector, GazeEstimator, HeadPoseEstimator, HandGestureDetector
- BlinkDetector: EAR-based blink detection (left/right/both), 6 tests
- GazeEstimator: iris-to-eye-corner ratio gaze direction, frozen GazeDirection dataclass, 4 tests
- HeadPoseEstimator: velocity-based nod/shake/tilt detection (stateful, no tests — daemon smoke test)
- HandGestureDetector: normalize_hand + tip-distance open/pinch/fist classifier (no tests — daemon smoke test)
- TDD: blink and gaze followed RED→GREEN cycle; Black applied to all 6 files
2026-04-26 21:13:59 -07:00

60 lines
1.8 KiB
Python

"""
Gaze direction estimation from MediaPipe Face Mesh iris landmarks.
Requires mediapipe to be run with refine_landmarks=True (enables iris tracking,
landmark indices 468-477).
"""
from __future__ import annotations
from dataclasses import dataclass
import numpy as np
_LEFT_IRIS_CENTER = 468
_RIGHT_IRIS_CENTER = 473
_LEFT_EYE_INNER = 133
_RIGHT_EYE_OUTER = 263
@dataclass(frozen=True)
class GazeDirection:
dx: float # [-1, 1] — negative = left, positive = right
dy: float # [-1, 1] — negative = up, positive = down
@property
def label(self) -> str:
if abs(self.dx) < 0.15 and abs(self.dy) < 0.15:
return "center"
if abs(self.dx) > abs(self.dy):
return "left" if self.dx < 0 else "right"
return "up" if self.dy < 0 else "down"
class GazeEstimator:
"""Estimate gaze direction from iris center relative to eye corners."""
def estimate(self, face_landmarks: np.ndarray) -> GazeDirection:
"""
Args:
face_landmarks: (478, 3) float32 — MediaPipe Face Mesh with iris refinement.
Returns:
GazeDirection with normalized (dx, dy).
"""
left_iris = face_landmarks[_LEFT_IRIS_CENTER]
right_iris = face_landmarks[_RIGHT_IRIS_CENTER]
iris_center = (left_iris + right_iris) / 2.0
left_inner = face_landmarks[_LEFT_EYE_INNER]
right_outer = face_landmarks[_RIGHT_EYE_OUTER]
eye_width = np.linalg.norm(right_outer - left_inner)
if eye_width < 1e-6:
return GazeDirection(dx=0.0, dy=0.0)
eye_center = (left_inner + right_outer) / 2.0
delta = iris_center - eye_center
return GazeDirection(
dx=float(delta[0] / eye_width),
dy=float(delta[1] / eye_width),
)