"""
Estimate macro nutrition for recipes that have no direct data.

For each recipe where sugar_g / carbs_g / fiber_g / calories are NULL,
look up the matched ingredient_profiles and average their per-100g values,
then scale by a rough 150g-per-ingredient portion assumption.

Mark such rows with nutrition_estimated=1 so the UI can display a disclaimer.
Recipes with food.com direct data (nutrition_estimated=0 and values set) are untouched.

Usage:
    conda run -n job-seeker python scripts/pipeline/estimate_recipe_nutrition.py \
        --db /path/to/kiwi.db
"""
from __future__ import annotations
import argparse
import json
import sqlite3
from pathlib import Path

# Rough grams per ingredient when no quantity data is available.
_GRAMS_PER_INGREDIENT = 150.0


def estimate(db_path: Path) -> None:
    conn = sqlite3.connect(db_path)
    conn.execute("PRAGMA journal_mode=WAL")

    # Load ingredient_profiles macro data into memory for fast lookup.
    profile_macros: dict[str, dict[str, float]] = {}
    for row in conn.execute(
        "SELECT name, calories_per_100g, carbs_g_per_100g, fiber_g_per_100g, sugar_g_per_100g "
        "FROM ingredient_profiles"
    ):
        name, cal, carbs, fiber, sugar = row
        if name:
            profile_macros[name] = {
                "calories": float(cal or 0),
                "carbs": float(carbs or 0),
                "fiber": float(fiber or 0),
                "sugar": float(sugar or 0),
            }

    # Select recipes with no direct nutrition data.
    rows = conn.execute(
        "SELECT id, ingredient_names FROM recipes "
        "WHERE sugar_g IS NULL AND carbs_g IS NULL AND fiber_g IS NULL"
    ).fetchall()

    updated = 0
    batch: list[tuple] = []

    for recipe_id, ingredient_names_json in rows:
        try:
            names: list[str] = json.loads(ingredient_names_json or "[]")
        except Exception:
            names = []

        matched = [profile_macros[n] for n in names if n in profile_macros]
        if not matched:
            continue

        # Average per-100g macros across matched ingredients,
        # then multiply by assumed portion weight per ingredient.
        n = len(matched)
        portion_factor = _GRAMS_PER_INGREDIENT / 100.0

        total_cal = sum(m["calories"] for m in matched) / n * portion_factor * n
        total_carbs = sum(m["carbs"] for m in matched) / n * portion_factor * n
        total_fiber = sum(m["fiber"] for m in matched) / n * portion_factor * n
        total_sugar = sum(m["sugar"] for m in matched) / n * portion_factor * n

        batch.append((
            round(total_cal, 1) or None,
            round(total_carbs, 2) or None,
            round(total_fiber, 2) or None,
            round(total_sugar, 2) or None,
            recipe_id,
        ))

        if len(batch) >= 5000:
            conn.executemany(
                "UPDATE recipes SET calories=?, carbs_g=?, fiber_g=?, sugar_g=?, "
                "nutrition_estimated=1 WHERE id=?",
                batch,
            )
            conn.commit()
            updated += len(batch)
            print(f"  {updated} recipes estimated...")
            batch = []

    if batch:
        conn.executemany(
            "UPDATE recipes SET calories=?, carbs_g=?, fiber_g=?, sugar_g=?, "
            "nutrition_estimated=1 WHERE id=?",
            batch,
        )
        conn.commit()
        updated += len(batch)

    conn.close()
    print(f"Total: {updated} recipes received estimated nutrition")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--db", required=True, type=Path)
    args = parser.parse_args()
    estimate(args.db)