kiwi/app/services/recipe/time_effort.py

"""
Runtime parser for active/passive time split and equipment detection.

Operates over a list of direction strings. No I/O — pure Python functions.
Sub-millisecond for up to 20 recipes (20 × ~10 steps each = 200 regex calls).
"""
from __future__ import annotations

import math
import re
from dataclasses import dataclass
from typing import Final

# ── Passive step keywords (whole-word, case-insensitive) ──────────────────

_PASSIVE_PATTERNS: Final[list[str]] = [
    "simmer", "bake", "roast", "broil", "refrigerate", "marinate",
    "chill", "cool", "freeze", "rest", "stand", "set", "soak",
    "steep", "proof", "rise", "let", "wait", "overnight", "braise",
    r"slow\s+cook", r"pressure\s+cook",
]

# Pre-compiled as a single alternation — avoids re-compiling on every call.
_PASSIVE_RE: re.Pattern[str] = re.compile(
    r"\b(?:" + "|".join(_PASSIVE_PATTERNS) + r")\b",
    re.IGNORECASE,
)

# ── Time extraction regex ─────────────────────────────────────────────────

# Two-branch pattern:
#   Branch A (groups 1-3): range  "15-20 minutes", "15–20 min"
#   Branch B (groups 4-5): single "10 minutes", "2 hours", "30 sec"
#
# Separator characters: plain hyphen (-), en-dash (–), or literal "-to-"
_TIME_RE: re.Pattern[str] = re.compile(
    r"(\d+)\s*(?:[-\u2013]|-to-)\s*(\d+)\s*(hour|hr|minute|min|second|sec)s?"
    r"|"
    r"(\d+)\s*(hour|hr|minute|min|second|sec)s?",
    re.IGNORECASE,
)

_MAX_MINUTES_PER_STEP: Final[int] = 480  # 8 hours sanity cap

# ── Equipment detection (keyword → label, in detection priority order) ────

_EQUIPMENT_RULES: Final[list[tuple[re.Pattern[str], str]]] = [
    (re.compile(r"\b(?:chop|dice|mince|slice|julienne)\b", re.IGNORECASE), "Knife"),
    (re.compile(r"\b(?:skillet|sauté|saute|fry|sear|pan-fry|pan fry)\b", re.IGNORECASE), "Skillet"),
    (re.compile(r"\b(?:wooden spoon|spatula|stir|fold)\b", re.IGNORECASE), "Spoon"),
    (re.compile(r"\b(?:pot|boil|simmer|blanch|stock)\b", re.IGNORECASE), "Pot"),
    (re.compile(r"\b(?:oven|bake|roast|preheat|broil)\b", re.IGNORECASE), "Oven"),
    (re.compile(r"\b(?:blender|blend|purée|puree|food processor)\b", re.IGNORECASE), "Blender"),
    (re.compile(r"\b(?:stand mixer|hand mixer|whip|beat)\b", re.IGNORECASE), "Mixer"),
    (re.compile(r"\b(?:grill|barbecue|char|griddle)\b", re.IGNORECASE), "Grill"),
    (re.compile(r"\b(?:slow cooker|crockpot|low and slow)\b", re.IGNORECASE), "Slow cooker"),
    (re.compile(r"\b(?:pressure cooker|instant pot)\b", re.IGNORECASE), "Pressure cooker"),
    (re.compile(r"\b(?:drain|strain|colander|rinse pasta)\b", re.IGNORECASE), "Colander"),
]

# ── Dataclasses ───────────────────────────────────────────────────────────


@dataclass(frozen=True)
class StepAnalysis:
    """Analysis result for a single direction step."""
    is_passive: bool
    detected_minutes: int | None  # None when no time mention found in text


@dataclass(frozen=True)
class TimeEffortProfile:
    """Aggregated time and effort profile for a full recipe."""
    active_min: int                    # total minutes requiring active attention
    passive_min: int                   # total minutes the cook can step away
    total_min: int                     # active_min + passive_min
    step_analyses: list[StepAnalysis]  # one entry per direction step
    equipment: list[str]               # ordered, deduplicated equipment labels
    effort_label: str                  # "quick" | "moderate" | "involved"


# ── Core parsing logic ────────────────────────────────────────────────────


def _extract_minutes(text: str) -> int | None:
    """Return the number of minutes mentioned in text, or None.

    Range values (e.g. "15-20 minutes") return the integer midpoint.
    Hours are converted to minutes. Seconds are rounded up to 1 minute minimum.
    Result is capped at _MAX_MINUTES_PER_STEP.
    """
    m = _TIME_RE.search(text)
    if m is None:
        return None

    if m.group(1) is not None:
        # Branch A: range match (e.g. "15-20 minutes")
        low = int(m.group(1))
        high = int(m.group(2))
        unit = m.group(3).lower()
        raw_value: float = (low + high) / 2
    else:
        # Branch B: single value match (e.g. "10 minutes")
        low = int(m.group(4))
        unit = m.group(5).lower()
        raw_value = float(low)

    if unit in ("hour", "hr"):
        minutes: float = raw_value * 60
    elif unit in ("second", "sec"):
        minutes = max(1.0, math.ceil(raw_value / 60))
    else:
        minutes = raw_value

    return min(int(minutes), _MAX_MINUTES_PER_STEP)


def _classify_passive(text: str) -> bool:
    """Return True if the step text matches any passive keyword (whole-word)."""
    return _PASSIVE_RE.search(text) is not None


def _detect_equipment(all_text: str, has_passive: bool) -> list[str]:
    """Return ordered, deduplicated list of equipment labels detected in text.

    all_text should be all direction steps joined with spaces.
    has_passive controls whether 'Timer' is appended at the end.
    """
    seen: set[str] = set()
    result: list[str] = []
    for pattern, label in _EQUIPMENT_RULES:
        if label not in seen and pattern.search(all_text):
            seen.add(label)
            result.append(label)
    if has_passive and "Timer" not in seen:
        result.append("Timer")
    return result


def _effort_label(step_count: int) -> str:
    """Derive effort label from step count."""
    if step_count <= 3:
        return "quick"
    if step_count <= 7:
        return "moderate"
    return "involved"


def parse_time_effort(directions: list[str]) -> TimeEffortProfile:
    """Parse a list of direction strings into a TimeEffortProfile.

    Returns a zero-value profile with empty lists when directions is empty.
    Never raises — all failures silently produce sensible defaults.
    """
    if not directions:
        return TimeEffortProfile(
            active_min=0,
            passive_min=0,
            total_min=0,
            step_analyses=[],
            equipment=[],
            effort_label="quick",
        )

    step_analyses: list[StepAnalysis] = []
    active_min = 0
    passive_min = 0
    has_any_passive = False

    for step in directions:
        is_passive = _classify_passive(step)
        detected = _extract_minutes(step)

        if is_passive:
            has_any_passive = True
            if detected is not None:
                passive_min += detected
        else:
            if detected is not None:
                active_min += detected

        step_analyses.append(StepAnalysis(
            is_passive=is_passive,
            detected_minutes=detected,
        ))

    combined_text = " ".join(directions)
    equipment = _detect_equipment(combined_text, has_any_passive)

    return TimeEffortProfile(
        active_min=active_min,
        passive_min=passive_min,
        total_min=active_min + passive_min,
        step_analyses=step_analyses,
        equipment=equipment,
        effort_label=_effort_label(len(directions)),
    )