From e377bd85aa69f523c7b9f6b85e4623684ed341d1 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Mon, 30 Mar 2026 22:59:46 -0700 Subject: [PATCH] feat: ElementClassifier -- ingredient element tagging with heuristic fallback --- app/services/recipe/__init__.py | 0 app/services/recipe/element_classifier.py | 120 ++++++++++++++++++ app/services/recipe/staple_library.py | 56 ++++++++ tests/services/__init__.py | 0 tests/services/recipe/__init__.py | 0 .../recipe/test_element_classifier.py | 68 ++++++++++ tests/services/recipe/test_staple_library.py | 24 ++++ 7 files changed, 268 insertions(+) create mode 100644 app/services/recipe/__init__.py create mode 100644 app/services/recipe/element_classifier.py create mode 100644 app/services/recipe/staple_library.py create mode 100644 tests/services/__init__.py create mode 100644 tests/services/recipe/__init__.py create mode 100644 tests/services/recipe/test_element_classifier.py create mode 100644 tests/services/recipe/test_staple_library.py diff --git a/app/services/recipe/__init__.py b/app/services/recipe/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/recipe/element_classifier.py b/app/services/recipe/element_classifier.py new file mode 100644 index 0000000..ace1f74 --- /dev/null +++ b/app/services/recipe/element_classifier.py @@ -0,0 +1,120 @@ +""" +ElementClassifier -- classify pantry items into culinary element tags. + +Lookup order: + 1. ingredient_profiles table (pre-computed from USDA FDC) + 2. Keyword heuristic fallback (for unlisted ingredients) +""" +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from app.db.store import Store + +# All valid ingredient-level element labels (Method is recipe-level, not ingredient-level) +ELEMENTS = frozenset({ + "Seasoning", "Richness", "Brightness", "Depth", + "Aroma", "Structure", "Texture", +}) + +_HEURISTIC: list[tuple[list[str], str]] = [ + (["vinegar", "lemon", "lime", "citrus", "wine", "yogurt", "kefir", + "buttermilk", "tomato", "tamarind"], "Brightness"), + (["oil", "butter", "cream", "lard", "fat", "avocado", "coconut milk", + "ghee", "shortening", "crisco"], "Richness"), + (["salt", "soy", "miso", "tamari", "fish sauce", "worcestershire", + "anchov", "capers", "olive", "brine"], "Seasoning"), + (["mushroom", "parmesan", "miso", "nutritional yeast", "bouillon", + "broth", "umami", "anchov", "dried tomato", "soy"], "Depth"), + (["garlic", "onion", "shallot", "herb", "basil", "oregano", "thyme", + "rosemary", "spice", "cumin", "coriander", "paprika", "chili", + "ginger", "cinnamon", "pepper", "cilantro", "dill", "fennel", + "cardamom", "turmeric", "smoke"], "Aroma"), + (["flour", "starch", "cornstarch", "arrowroot", "egg", "gelatin", + "agar", "breadcrumb", "panko", "roux"], "Structure"), + (["nut", "seed", "cracker", "crisp", "wafer", "chip", "crouton", + "granola", "tofu", "tempeh"], "Texture"), +] + + +@dataclass(frozen=True) +class IngredientProfile: + name: str + elements: list[str] + fat_pct: float = 0.0 + fat_saturated_pct: float = 0.0 + moisture_pct: float = 0.0 + protein_pct: float = 0.0 + starch_pct: float = 0.0 + binding_score: int = 0 + glutamate_mg: float = 0.0 + ph_estimate: float | None = None + flavor_molecule_ids: list[str] = field(default_factory=list) + heat_stable: bool = True + add_timing: str = "any" + acid_type: str | None = None + sodium_mg_per_100g: float = 0.0 + is_fermented: bool = False + texture_profile: str = "neutral" + smoke_point_c: float | None = None + is_emulsifier: bool = False + source: str = "heuristic" + + +class ElementClassifier: + def __init__(self, store: "Store") -> None: + self._store = store + + def classify(self, ingredient_name: str) -> IngredientProfile: + """Return element profile for a single ingredient name.""" + name = ingredient_name.lower().strip() + row = self._store._fetch_one( + "SELECT * FROM ingredient_profiles WHERE name = ?", (name,) + ) + if row: + return self._row_to_profile(row) + return self._heuristic_profile(name) + + def classify_batch(self, names: list[str]) -> list[IngredientProfile]: + return [self.classify(n) for n in names] + + def identify_gaps(self, profiles: list[IngredientProfile]) -> list[str]: + """Return element names that have no coverage in the given profile list.""" + covered = set() + for p in profiles: + covered.update(p.elements) + return sorted(ELEMENTS - covered) + + def _row_to_profile(self, row: dict) -> IngredientProfile: + return IngredientProfile( + name=row["name"], + elements=json.loads(row.get("elements") or "[]"), + fat_pct=row.get("fat_pct") or 0.0, + fat_saturated_pct=row.get("fat_saturated_pct") or 0.0, + moisture_pct=row.get("moisture_pct") or 0.0, + protein_pct=row.get("protein_pct") or 0.0, + starch_pct=row.get("starch_pct") or 0.0, + binding_score=row.get("binding_score") or 0, + glutamate_mg=row.get("glutamate_mg") or 0.0, + ph_estimate=row.get("ph_estimate"), + flavor_molecule_ids=json.loads(row.get("flavor_molecule_ids") or "[]"), + heat_stable=bool(row.get("heat_stable", 1)), + add_timing=row.get("add_timing") or "any", + acid_type=row.get("acid_type"), + sodium_mg_per_100g=row.get("sodium_mg_per_100g") or 0.0, + is_fermented=bool(row.get("is_fermented", 0)), + texture_profile=row.get("texture_profile") or "neutral", + smoke_point_c=row.get("smoke_point_c"), + is_emulsifier=bool(row.get("is_emulsifier", 0)), + source="db", + ) + + def _heuristic_profile(self, name: str) -> IngredientProfile: + elements = [] + for keywords, element in _HEURISTIC: + if any(kw in name for kw in keywords): + elements.append(element) + return IngredientProfile(name=name, elements=elements, source="heuristic") diff --git a/app/services/recipe/staple_library.py b/app/services/recipe/staple_library.py new file mode 100644 index 0000000..46474e9 --- /dev/null +++ b/app/services/recipe/staple_library.py @@ -0,0 +1,56 @@ +""" +StapleLibrary -- bulk-preparable base component reference data. +Loaded from YAML files in app/staples/. +""" +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +import yaml + +_STAPLES_DIR = Path(__file__).parents[2] / "staples" + + +@dataclass(frozen=True) +class StapleEntry: + slug: str + name: str + description: str + dietary_labels: list[str] + base_ingredients: list[str] + base_method: str + base_time_minutes: int + yield_formats: dict[str, dict] + compatible_styles: list[str] + + +class StapleLibrary: + def __init__(self, staples_dir: Path = _STAPLES_DIR) -> None: + self._staples: dict[str, StapleEntry] = {} + for yaml_path in sorted(staples_dir.glob("*.yaml")): + entry = self._load(yaml_path) + self._staples[entry.slug] = entry + + def get(self, slug: str) -> StapleEntry | None: + return self._staples.get(slug) + + def list_all(self) -> list[StapleEntry]: + return list(self._staples.values()) + + def filter_by_dietary(self, label: str) -> list[StapleEntry]: + return [s for s in self._staples.values() if label in s.dietary_labels] + + def _load(self, path: Path) -> StapleEntry: + data = yaml.safe_load(path.read_text()) + return StapleEntry( + slug=data["slug"], + name=data["name"], + description=data.get("description", ""), + dietary_labels=data.get("dietary_labels", []), + base_ingredients=data.get("base_ingredients", []), + base_method=data.get("base_method", ""), + base_time_minutes=int(data.get("base_time_minutes", 0)), + yield_formats=data.get("yield_formats", {}), + compatible_styles=data.get("compatible_styles", []), + ) diff --git a/tests/services/__init__.py b/tests/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/services/recipe/__init__.py b/tests/services/recipe/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/services/recipe/test_element_classifier.py b/tests/services/recipe/test_element_classifier.py new file mode 100644 index 0000000..679aecb --- /dev/null +++ b/tests/services/recipe/test_element_classifier.py @@ -0,0 +1,68 @@ +import pytest +import sqlite3 +import json +import tempfile +from pathlib import Path + +from app.db.store import Store + + +@pytest.fixture +def store_with_profiles(tmp_path): + db_path = tmp_path / "test.db" + store = Store(db_path) + # Seed ingredient_profiles + store.conn.execute(""" + INSERT INTO ingredient_profiles + (name, elements, fat_pct, moisture_pct, glutamate_mg, binding_score, + sodium_mg_per_100g, is_fermented, texture_profile) + VALUES (?,?,?,?,?,?,?,?,?) + """, ("butter", json.dumps(["Richness"]), 81.0, 16.0, 0.1, 0, 11.0, 0, "creamy")) + store.conn.execute(""" + INSERT INTO ingredient_profiles + (name, elements, fat_pct, moisture_pct, glutamate_mg, binding_score, + sodium_mg_per_100g, is_fermented, texture_profile) + VALUES (?,?,?,?,?,?,?,?,?) + """, ("parmesan", json.dumps(["Depth", "Seasoning"]), 29.0, 29.0, 1.2, 1, 1600.0, 0, "neutral")) + store.conn.commit() + return store + + +def test_classify_known_ingredient(store_with_profiles): + from app.services.recipe.element_classifier import ElementClassifier + clf = ElementClassifier(store_with_profiles) + profile = clf.classify("butter") + assert "Richness" in profile.elements + assert profile.fat_pct == pytest.approx(81.0) + assert profile.name == "butter" + + +def test_classify_unknown_ingredient_uses_heuristic(store_with_profiles): + from app.services.recipe.element_classifier import ElementClassifier + clf = ElementClassifier(store_with_profiles) + profile = clf.classify("ghost pepper hot sauce") + # Heuristic should detect acid / aroma + assert len(profile.elements) > 0 + assert profile.name == "ghost pepper hot sauce" + + +def test_classify_batch(store_with_profiles): + from app.services.recipe.element_classifier import ElementClassifier + clf = ElementClassifier(store_with_profiles) + results = clf.classify_batch(["butter", "parmesan", "unknown herb"]) + assert len(results) == 3 + assert results[0].name == "butter" + assert results[1].name == "parmesan" + + +def test_identify_gaps(store_with_profiles): + from app.services.recipe.element_classifier import ElementClassifier + clf = ElementClassifier(store_with_profiles) + profiles = [ + clf.classify("butter"), + clf.classify("parmesan"), + ] + gaps = clf.identify_gaps(profiles) + # We have Richness + Depth + Seasoning; should flag Brightness, Aroma, Structure, Texture + assert "Brightness" in gaps + assert "Richness" not in gaps diff --git a/tests/services/recipe/test_staple_library.py b/tests/services/recipe/test_staple_library.py new file mode 100644 index 0000000..777d5e8 --- /dev/null +++ b/tests/services/recipe/test_staple_library.py @@ -0,0 +1,24 @@ +def test_seitan_staple_has_yield_formats(): + from app.services.recipe.staple_library import StapleLibrary + lib = StapleLibrary() + seitan = lib.get("seitan") + assert seitan is not None + assert "fresh" in seitan.yield_formats + assert "frozen" in seitan.yield_formats + + +def test_staple_yield_format_has_elements(): + from app.services.recipe.staple_library import StapleLibrary + lib = StapleLibrary() + seitan = lib.get("seitan") + fresh = seitan.yield_formats["fresh"] + assert "Structure" in fresh["elements"] + + +def test_list_all_staples(): + from app.services.recipe.staple_library import StapleLibrary + lib = StapleLibrary() + all_staples = lib.list_all() + slugs = [s.slug for s in all_staples] + assert "seitan" in slugs + assert "tempeh" in slugs