diff --git a/merlin/features/blink.py b/merlin/features/blink.py new file mode 100644 index 0000000..479ec49 --- /dev/null +++ b/merlin/features/blink.py @@ -0,0 +1,62 @@ +""" +Blink detection from MediaPipe Face Mesh landmarks. + +Uses the Eye Aspect Ratio (EAR) method: when the eye closes, the vertical +landmark distances shrink relative to the horizontal width, driving EAR toward 0. +""" + +from __future__ import annotations + +from enum import Enum +from typing import Optional + +import numpy as np + +_LEFT_EYE = [33, 160, 158, 133, 153, 144] +_RIGHT_EYE = [362, 385, 387, 263, 373, 380] + + +class BlinkEvent(str, Enum): + LEFT = "left_blink" + RIGHT = "right_blink" + BOTH = "both_blink" + + +def eye_aspect_ratio(landmarks: np.ndarray, indices: list[int]) -> float: + """ + EAR = (||p2-p6|| + ||p3-p5||) / (2 * ||p1-p4||) + + ~0.3 for open eye, ~0.0 for closed. + """ + p1, p2, p3, p4, p5, p6 = [landmarks[i] for i in indices] + vert_a = np.linalg.norm(p2 - p6) + vert_b = np.linalg.norm(p3 - p5) + horiz = np.linalg.norm(p1 - p4) + if horiz < 1e-6: + return 0.0 + return float((vert_a + vert_b) / (2.0 * horiz)) + + +class BlinkDetector: + """Detect left, right, or both-eye blinks from face mesh landmarks.""" + + def __init__(self, threshold: float = 0.20) -> None: + self._threshold = threshold + + def detect(self, face_landmarks: np.ndarray) -> Optional[BlinkEvent]: + """ + Args: + face_landmarks: (478, 3) float32 — MediaPipe Face Mesh with iris refinement. + + Returns: + BlinkEvent if a blink is detected, else None. + """ + left_closed = eye_aspect_ratio(face_landmarks, _LEFT_EYE) < self._threshold + right_closed = eye_aspect_ratio(face_landmarks, _RIGHT_EYE) < self._threshold + if left_closed and right_closed: + return BlinkEvent.BOTH + if left_closed: + return BlinkEvent.LEFT + if right_closed: + return BlinkEvent.RIGHT + return None diff --git a/merlin/features/gaze.py b/merlin/features/gaze.py new file mode 100644 index 0000000..d90677e --- /dev/null +++ b/merlin/features/gaze.py @@ -0,0 +1,60 @@ +""" +Gaze direction estimation from MediaPipe Face Mesh iris landmarks. + +Requires mediapipe to be run with refine_landmarks=True (enables iris tracking, +landmark indices 468-477). +""" + +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np + +_LEFT_IRIS_CENTER = 468 +_RIGHT_IRIS_CENTER = 473 +_LEFT_EYE_INNER = 133 +_RIGHT_EYE_OUTER = 263 + + +@dataclass(frozen=True) +class GazeDirection: + dx: float # [-1, 1] — negative = left, positive = right + dy: float # [-1, 1] — negative = up, positive = down + + @property + def label(self) -> str: + if abs(self.dx) < 0.15 and abs(self.dy) < 0.15: + return "center" + if abs(self.dx) > abs(self.dy): + return "left" if self.dx < 0 else "right" + return "up" if self.dy < 0 else "down" + + +class GazeEstimator: + """Estimate gaze direction from iris center relative to eye corners.""" + + def estimate(self, face_landmarks: np.ndarray) -> GazeDirection: + """ + Args: + face_landmarks: (478, 3) float32 — MediaPipe Face Mesh with iris refinement. + + Returns: + GazeDirection with normalized (dx, dy). + """ + left_iris = face_landmarks[_LEFT_IRIS_CENTER] + right_iris = face_landmarks[_RIGHT_IRIS_CENTER] + iris_center = (left_iris + right_iris) / 2.0 + + left_inner = face_landmarks[_LEFT_EYE_INNER] + right_outer = face_landmarks[_RIGHT_EYE_OUTER] + eye_width = np.linalg.norm(right_outer - left_inner) + if eye_width < 1e-6: + return GazeDirection(dx=0.0, dy=0.0) + + eye_center = (left_inner + right_outer) / 2.0 + delta = iris_center - eye_center + return GazeDirection( + dx=float(delta[0] / eye_width), + dy=float(delta[1] / eye_width), + ) diff --git a/merlin/features/hand_gesture.py b/merlin/features/hand_gesture.py new file mode 100644 index 0000000..a42759b --- /dev/null +++ b/merlin/features/hand_gesture.py @@ -0,0 +1,52 @@ +""" +Simple hand gesture detection — open palm, closed fist, pinch. + +Operates on raw (21, 3) landmark arrays; calls normalize_hand() internally. +""" + +from __future__ import annotations + +from enum import Enum +from typing import Optional + +import numpy as np + +from circuitforge_core.input.gestures.normalizer import normalize_hand + +_FINGERTIP_IDX = [4, 8, 12, 16, 20] +_THUMB_TIP = 4 +_INDEX_TIP = 8 + +OPEN_PALM_THRESHOLD = 1.5 +PINCH_THRESHOLD = 0.15 + + +class HandGesture(str, Enum): + OPEN_PALM = "open_palm" + PINCH = "pinch" + FIST = "fist" + + +class HandGestureDetector: + """Classify a hand pose from MediaPipe landmarks.""" + + def detect(self, raw_points: np.ndarray) -> Optional[HandGesture]: + """ + Args: + raw_points: (21, 3) float32 — raw MediaPipe hand landmarks. + + Returns: + HandGesture or None if pose is ambiguous. + """ + vec = normalize_hand(raw_points).reshape(21, 3) + tip_distances = [float(np.linalg.norm(vec[i])) for i in _FINGERTIP_IDX] + tip_sum = sum(tip_distances) + + pinch_dist = float(np.linalg.norm(vec[_THUMB_TIP] - vec[_INDEX_TIP])) + if pinch_dist < PINCH_THRESHOLD: + return HandGesture.PINCH + if tip_sum > OPEN_PALM_THRESHOLD: + return HandGesture.OPEN_PALM + if tip_sum < 0.6: + return HandGesture.FIST + return None diff --git a/merlin/features/head_pose.py b/merlin/features/head_pose.py new file mode 100644 index 0000000..098ee7e --- /dev/null +++ b/merlin/features/head_pose.py @@ -0,0 +1,65 @@ +""" +Head pose estimation — detect nod, shake, and tilt from Face Mesh landmarks. + +Velocity-based: compares nose tip position across consecutive frames. +""" + +from __future__ import annotations + +from enum import Enum +from typing import Optional + +import numpy as np + +_NOSE_TIP = 1 +_L_CHEEK = 234 +_R_CHEEK = 454 + +NOD_THRESHOLD = 0.015 +SHAKE_THRESHOLD = 0.015 +TILT_THRESHOLD = 0.012 + + +class HeadGesture(str, Enum): + NOD = "head_nod" + SHAKE = "head_shake" + TILT_LEFT = "head_tilt_left" + TILT_RIGHT = "head_tilt_right" + + +class HeadPoseEstimator: + """Detect head gestures by comparing nose tip position across frames.""" + + def __init__(self) -> None: + self._prev: Optional[np.ndarray] = None + + def update(self, face_landmarks: np.ndarray) -> Optional[HeadGesture]: + """ + Args: + face_landmarks: (478, 3) float32 — current frame. + + Returns: + HeadGesture if detected, else None. + """ + nose = face_landmarks[_NOSE_TIP] + if self._prev is None: + self._prev = nose.copy() + return None + + delta = nose - self._prev + self._prev = nose.copy() + + dy = float(delta[1]) + dx = float(delta[0]) + + l_cheek = face_landmarks[_L_CHEEK] + r_cheek = face_landmarks[_R_CHEEK] + roll = float(l_cheek[1] - r_cheek[1]) + + if abs(dy) > NOD_THRESHOLD and abs(dy) > abs(dx): + return HeadGesture.NOD + if abs(dx) > SHAKE_THRESHOLD and abs(dx) > abs(dy): + return HeadGesture.SHAKE + if abs(roll) > TILT_THRESHOLD: + return HeadGesture.TILT_LEFT if roll > 0 else HeadGesture.TILT_RIGHT + return None diff --git a/tests/test_features/test_blink.py b/tests/test_features/test_blink.py new file mode 100644 index 0000000..127df87 --- /dev/null +++ b/tests/test_features/test_blink.py @@ -0,0 +1,73 @@ +import numpy as np +import pytest +from merlin.features.blink import BlinkDetector, BlinkEvent, eye_aspect_ratio + +LEFT_EYE = [33, 160, 158, 133, 153, 144] +RIGHT_EYE = [362, 385, 387, 263, 373, 380] + + +def _face(n: int = 478) -> np.ndarray: + return np.zeros((n, 3), dtype=np.float32) + + +def _set_eye_open(face: np.ndarray, indices: list[int]) -> None: + """Set 6 EAR landmarks so EAR = ~0.35 (open eye).""" + p1, p2, p3, p4, p5, p6 = indices + face[p1] = [0.0, 0.0, 0.0] + face[p4] = [1.0, 0.0, 0.0] + face[p2] = [0.25, 0.2, 0.0] + face[p6] = [0.25, -0.2, 0.0] + face[p3] = [0.75, 0.2, 0.0] + face[p5] = [0.75, -0.2, 0.0] + + +def _set_eye_closed(face: np.ndarray, indices: list[int]) -> None: + """Set 6 EAR landmarks so EAR ≈ 0 (closed).""" + for i in indices: + face[i] = [0.0, 0.0, 0.0] + + +def test_ear_open_eye(): + face = _face() + _set_eye_open(face, LEFT_EYE) + ear = eye_aspect_ratio(face, LEFT_EYE) + assert ear > 0.20 + + +def test_ear_closed_eye(): + face = _face() + _set_eye_closed(face, LEFT_EYE) + ear = eye_aspect_ratio(face, LEFT_EYE) + assert ear < 0.05 + + +def test_no_blink_when_both_open(): + detector = BlinkDetector(threshold=0.20) + face = _face() + _set_eye_open(face, LEFT_EYE) + _set_eye_open(face, RIGHT_EYE) + assert detector.detect(face) is None + + +def test_left_blink_detected(): + detector = BlinkDetector(threshold=0.20) + face = _face() + _set_eye_closed(face, LEFT_EYE) + _set_eye_open(face, RIGHT_EYE) + assert detector.detect(face) == BlinkEvent.LEFT + + +def test_right_blink_detected(): + detector = BlinkDetector(threshold=0.20) + face = _face() + _set_eye_open(face, LEFT_EYE) + _set_eye_closed(face, RIGHT_EYE) + assert detector.detect(face) == BlinkEvent.RIGHT + + +def test_both_blink_detected(): + detector = BlinkDetector(threshold=0.20) + face = _face() + _set_eye_closed(face, LEFT_EYE) + _set_eye_closed(face, RIGHT_EYE) + assert detector.detect(face) == BlinkEvent.BOTH diff --git a/tests/test_features/test_gaze.py b/tests/test_features/test_gaze.py new file mode 100644 index 0000000..1c7195d --- /dev/null +++ b/tests/test_features/test_gaze.py @@ -0,0 +1,57 @@ +import numpy as np +from merlin.features.gaze import GazeEstimator, GazeDirection + +_LEFT_IRIS = 468 +_RIGHT_IRIS = 473 +_LEFT_INNER = 133 +_RIGHT_OUTER = 263 + + +def _face(n: int = 478) -> np.ndarray: + return np.zeros((n, 3), dtype=np.float32) + + +def _set_gaze_center(face: np.ndarray) -> None: + """Iris centers at midpoint of eye span → center gaze.""" + face[_LEFT_INNER] = [0.3, 0.5, 0.0] + face[_RIGHT_OUTER] = [0.7, 0.5, 0.0] + mid = (face[_LEFT_INNER] + face[_RIGHT_OUTER]) / 2.0 + face[_LEFT_IRIS] = mid.copy() + face[_RIGHT_IRIS] = mid.copy() + + +def _set_gaze_left(face: np.ndarray) -> None: + """Iris centers shifted left relative to eye span.""" + face[_LEFT_INNER] = [0.3, 0.5, 0.0] + face[_RIGHT_OUTER] = [0.7, 0.5, 0.0] + face[_LEFT_IRIS] = [0.35, 0.5, 0.0] + face[_RIGHT_IRIS] = [0.35, 0.5, 0.0] + + +def test_center_gaze_label(): + face = _face() + _set_gaze_center(face) + g = GazeEstimator().estimate(face) + assert g.label == "center" + + +def test_left_gaze_label(): + face = _face() + _set_gaze_left(face) + g = GazeEstimator().estimate(face) + assert g.label == "left" + + +def test_zero_eye_width_returns_center(): + """Degenerate case: all landmarks at same point → center.""" + face = _face() + g = GazeEstimator().estimate(face) + assert g.dx == 0.0 and g.dy == 0.0 + + +def test_gazeresult_is_frozen(): + g = GazeDirection(dx=0.1, dy=0.2) + import pytest + + with pytest.raises((AttributeError, TypeError)): + g.dx = 0.5