raven/merlin/features/hand_gesture.py
pyr0ball 0dcc25164d feat(features): implement BlinkDetector, GazeEstimator, HeadPoseEstimator, HandGestureDetector
- BlinkDetector: EAR-based blink detection (left/right/both), 6 tests
- GazeEstimator: iris-to-eye-corner ratio gaze direction, frozen GazeDirection dataclass, 4 tests
- HeadPoseEstimator: velocity-based nod/shake/tilt detection (stateful, no tests — daemon smoke test)
- HandGestureDetector: normalize_hand + tip-distance open/pinch/fist classifier (no tests — daemon smoke test)
- TDD: blink and gaze followed RED→GREEN cycle; Black applied to all 6 files
2026-04-26 21:13:59 -07:00

52 lines
1.3 KiB
Python

"""
Simple hand gesture detection — open palm, closed fist, pinch.
Operates on raw (21, 3) landmark arrays; calls normalize_hand() internally.
"""
from __future__ import annotations
from enum import Enum
from typing import Optional
import numpy as np
from circuitforge_core.input.gestures.normalizer import normalize_hand
_FINGERTIP_IDX = [4, 8, 12, 16, 20]
_THUMB_TIP = 4
_INDEX_TIP = 8
OPEN_PALM_THRESHOLD = 1.5
PINCH_THRESHOLD = 0.15
class HandGesture(str, Enum):
OPEN_PALM = "open_palm"
PINCH = "pinch"
FIST = "fist"
class HandGestureDetector:
"""Classify a hand pose from MediaPipe landmarks."""
def detect(self, raw_points: np.ndarray) -> Optional[HandGesture]:
"""
Args:
raw_points: (21, 3) float32 — raw MediaPipe hand landmarks.
Returns:
HandGesture or None if pose is ambiguous.
"""
vec = normalize_hand(raw_points).reshape(21, 3)
tip_distances = [float(np.linalg.norm(vec[i])) for i in _FINGERTIP_IDX]
tip_sum = sum(tip_distances)
pinch_dist = float(np.linalg.norm(vec[_THUMB_TIP] - vec[_INDEX_TIP]))
if pinch_dist < PINCH_THRESHOLD:
return HandGesture.PINCH
if tip_sum > OPEN_PALM_THRESHOLD:
return HandGesture.OPEN_PALM
if tip_sum < 0.6:
return HandGesture.FIST
return None