feat: ElementClassifier -- ingredient element tagging with heuristic fallback
This commit is contained in:
parent
bad6dd175c
commit
e377bd85aa
7 changed files with 268 additions and 0 deletions
0
app/services/recipe/__init__.py
Normal file
0
app/services/recipe/__init__.py
Normal file
120
app/services/recipe/element_classifier.py
Normal file
120
app/services/recipe/element_classifier.py
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
"""
|
||||
ElementClassifier -- classify pantry items into culinary element tags.
|
||||
|
||||
Lookup order:
|
||||
1. ingredient_profiles table (pre-computed from USDA FDC)
|
||||
2. Keyword heuristic fallback (for unlisted ingredients)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.db.store import Store
|
||||
|
||||
# All valid ingredient-level element labels (Method is recipe-level, not ingredient-level)
|
||||
ELEMENTS = frozenset({
|
||||
"Seasoning", "Richness", "Brightness", "Depth",
|
||||
"Aroma", "Structure", "Texture",
|
||||
})
|
||||
|
||||
_HEURISTIC: list[tuple[list[str], str]] = [
|
||||
(["vinegar", "lemon", "lime", "citrus", "wine", "yogurt", "kefir",
|
||||
"buttermilk", "tomato", "tamarind"], "Brightness"),
|
||||
(["oil", "butter", "cream", "lard", "fat", "avocado", "coconut milk",
|
||||
"ghee", "shortening", "crisco"], "Richness"),
|
||||
(["salt", "soy", "miso", "tamari", "fish sauce", "worcestershire",
|
||||
"anchov", "capers", "olive", "brine"], "Seasoning"),
|
||||
(["mushroom", "parmesan", "miso", "nutritional yeast", "bouillon",
|
||||
"broth", "umami", "anchov", "dried tomato", "soy"], "Depth"),
|
||||
(["garlic", "onion", "shallot", "herb", "basil", "oregano", "thyme",
|
||||
"rosemary", "spice", "cumin", "coriander", "paprika", "chili",
|
||||
"ginger", "cinnamon", "pepper", "cilantro", "dill", "fennel",
|
||||
"cardamom", "turmeric", "smoke"], "Aroma"),
|
||||
(["flour", "starch", "cornstarch", "arrowroot", "egg", "gelatin",
|
||||
"agar", "breadcrumb", "panko", "roux"], "Structure"),
|
||||
(["nut", "seed", "cracker", "crisp", "wafer", "chip", "crouton",
|
||||
"granola", "tofu", "tempeh"], "Texture"),
|
||||
]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IngredientProfile:
|
||||
name: str
|
||||
elements: list[str]
|
||||
fat_pct: float = 0.0
|
||||
fat_saturated_pct: float = 0.0
|
||||
moisture_pct: float = 0.0
|
||||
protein_pct: float = 0.0
|
||||
starch_pct: float = 0.0
|
||||
binding_score: int = 0
|
||||
glutamate_mg: float = 0.0
|
||||
ph_estimate: float | None = None
|
||||
flavor_molecule_ids: list[str] = field(default_factory=list)
|
||||
heat_stable: bool = True
|
||||
add_timing: str = "any"
|
||||
acid_type: str | None = None
|
||||
sodium_mg_per_100g: float = 0.0
|
||||
is_fermented: bool = False
|
||||
texture_profile: str = "neutral"
|
||||
smoke_point_c: float | None = None
|
||||
is_emulsifier: bool = False
|
||||
source: str = "heuristic"
|
||||
|
||||
|
||||
class ElementClassifier:
|
||||
def __init__(self, store: "Store") -> None:
|
||||
self._store = store
|
||||
|
||||
def classify(self, ingredient_name: str) -> IngredientProfile:
|
||||
"""Return element profile for a single ingredient name."""
|
||||
name = ingredient_name.lower().strip()
|
||||
row = self._store._fetch_one(
|
||||
"SELECT * FROM ingredient_profiles WHERE name = ?", (name,)
|
||||
)
|
||||
if row:
|
||||
return self._row_to_profile(row)
|
||||
return self._heuristic_profile(name)
|
||||
|
||||
def classify_batch(self, names: list[str]) -> list[IngredientProfile]:
|
||||
return [self.classify(n) for n in names]
|
||||
|
||||
def identify_gaps(self, profiles: list[IngredientProfile]) -> list[str]:
|
||||
"""Return element names that have no coverage in the given profile list."""
|
||||
covered = set()
|
||||
for p in profiles:
|
||||
covered.update(p.elements)
|
||||
return sorted(ELEMENTS - covered)
|
||||
|
||||
def _row_to_profile(self, row: dict) -> IngredientProfile:
|
||||
return IngredientProfile(
|
||||
name=row["name"],
|
||||
elements=json.loads(row.get("elements") or "[]"),
|
||||
fat_pct=row.get("fat_pct") or 0.0,
|
||||
fat_saturated_pct=row.get("fat_saturated_pct") or 0.0,
|
||||
moisture_pct=row.get("moisture_pct") or 0.0,
|
||||
protein_pct=row.get("protein_pct") or 0.0,
|
||||
starch_pct=row.get("starch_pct") or 0.0,
|
||||
binding_score=row.get("binding_score") or 0,
|
||||
glutamate_mg=row.get("glutamate_mg") or 0.0,
|
||||
ph_estimate=row.get("ph_estimate"),
|
||||
flavor_molecule_ids=json.loads(row.get("flavor_molecule_ids") or "[]"),
|
||||
heat_stable=bool(row.get("heat_stable", 1)),
|
||||
add_timing=row.get("add_timing") or "any",
|
||||
acid_type=row.get("acid_type"),
|
||||
sodium_mg_per_100g=row.get("sodium_mg_per_100g") or 0.0,
|
||||
is_fermented=bool(row.get("is_fermented", 0)),
|
||||
texture_profile=row.get("texture_profile") or "neutral",
|
||||
smoke_point_c=row.get("smoke_point_c"),
|
||||
is_emulsifier=bool(row.get("is_emulsifier", 0)),
|
||||
source="db",
|
||||
)
|
||||
|
||||
def _heuristic_profile(self, name: str) -> IngredientProfile:
|
||||
elements = []
|
||||
for keywords, element in _HEURISTIC:
|
||||
if any(kw in name for kw in keywords):
|
||||
elements.append(element)
|
||||
return IngredientProfile(name=name, elements=elements, source="heuristic")
|
||||
56
app/services/recipe/staple_library.py
Normal file
56
app/services/recipe/staple_library.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
"""
|
||||
StapleLibrary -- bulk-preparable base component reference data.
|
||||
Loaded from YAML files in app/staples/.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
_STAPLES_DIR = Path(__file__).parents[2] / "staples"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class StapleEntry:
|
||||
slug: str
|
||||
name: str
|
||||
description: str
|
||||
dietary_labels: list[str]
|
||||
base_ingredients: list[str]
|
||||
base_method: str
|
||||
base_time_minutes: int
|
||||
yield_formats: dict[str, dict]
|
||||
compatible_styles: list[str]
|
||||
|
||||
|
||||
class StapleLibrary:
|
||||
def __init__(self, staples_dir: Path = _STAPLES_DIR) -> None:
|
||||
self._staples: dict[str, StapleEntry] = {}
|
||||
for yaml_path in sorted(staples_dir.glob("*.yaml")):
|
||||
entry = self._load(yaml_path)
|
||||
self._staples[entry.slug] = entry
|
||||
|
||||
def get(self, slug: str) -> StapleEntry | None:
|
||||
return self._staples.get(slug)
|
||||
|
||||
def list_all(self) -> list[StapleEntry]:
|
||||
return list(self._staples.values())
|
||||
|
||||
def filter_by_dietary(self, label: str) -> list[StapleEntry]:
|
||||
return [s for s in self._staples.values() if label in s.dietary_labels]
|
||||
|
||||
def _load(self, path: Path) -> StapleEntry:
|
||||
data = yaml.safe_load(path.read_text())
|
||||
return StapleEntry(
|
||||
slug=data["slug"],
|
||||
name=data["name"],
|
||||
description=data.get("description", ""),
|
||||
dietary_labels=data.get("dietary_labels", []),
|
||||
base_ingredients=data.get("base_ingredients", []),
|
||||
base_method=data.get("base_method", ""),
|
||||
base_time_minutes=int(data.get("base_time_minutes", 0)),
|
||||
yield_formats=data.get("yield_formats", {}),
|
||||
compatible_styles=data.get("compatible_styles", []),
|
||||
)
|
||||
0
tests/services/__init__.py
Normal file
0
tests/services/__init__.py
Normal file
0
tests/services/recipe/__init__.py
Normal file
0
tests/services/recipe/__init__.py
Normal file
68
tests/services/recipe/test_element_classifier.py
Normal file
68
tests/services/recipe/test_element_classifier.py
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
import pytest
|
||||
import sqlite3
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from app.db.store import Store
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def store_with_profiles(tmp_path):
|
||||
db_path = tmp_path / "test.db"
|
||||
store = Store(db_path)
|
||||
# Seed ingredient_profiles
|
||||
store.conn.execute("""
|
||||
INSERT INTO ingredient_profiles
|
||||
(name, elements, fat_pct, moisture_pct, glutamate_mg, binding_score,
|
||||
sodium_mg_per_100g, is_fermented, texture_profile)
|
||||
VALUES (?,?,?,?,?,?,?,?,?)
|
||||
""", ("butter", json.dumps(["Richness"]), 81.0, 16.0, 0.1, 0, 11.0, 0, "creamy"))
|
||||
store.conn.execute("""
|
||||
INSERT INTO ingredient_profiles
|
||||
(name, elements, fat_pct, moisture_pct, glutamate_mg, binding_score,
|
||||
sodium_mg_per_100g, is_fermented, texture_profile)
|
||||
VALUES (?,?,?,?,?,?,?,?,?)
|
||||
""", ("parmesan", json.dumps(["Depth", "Seasoning"]), 29.0, 29.0, 1.2, 1, 1600.0, 0, "neutral"))
|
||||
store.conn.commit()
|
||||
return store
|
||||
|
||||
|
||||
def test_classify_known_ingredient(store_with_profiles):
|
||||
from app.services.recipe.element_classifier import ElementClassifier
|
||||
clf = ElementClassifier(store_with_profiles)
|
||||
profile = clf.classify("butter")
|
||||
assert "Richness" in profile.elements
|
||||
assert profile.fat_pct == pytest.approx(81.0)
|
||||
assert profile.name == "butter"
|
||||
|
||||
|
||||
def test_classify_unknown_ingredient_uses_heuristic(store_with_profiles):
|
||||
from app.services.recipe.element_classifier import ElementClassifier
|
||||
clf = ElementClassifier(store_with_profiles)
|
||||
profile = clf.classify("ghost pepper hot sauce")
|
||||
# Heuristic should detect acid / aroma
|
||||
assert len(profile.elements) > 0
|
||||
assert profile.name == "ghost pepper hot sauce"
|
||||
|
||||
|
||||
def test_classify_batch(store_with_profiles):
|
||||
from app.services.recipe.element_classifier import ElementClassifier
|
||||
clf = ElementClassifier(store_with_profiles)
|
||||
results = clf.classify_batch(["butter", "parmesan", "unknown herb"])
|
||||
assert len(results) == 3
|
||||
assert results[0].name == "butter"
|
||||
assert results[1].name == "parmesan"
|
||||
|
||||
|
||||
def test_identify_gaps(store_with_profiles):
|
||||
from app.services.recipe.element_classifier import ElementClassifier
|
||||
clf = ElementClassifier(store_with_profiles)
|
||||
profiles = [
|
||||
clf.classify("butter"),
|
||||
clf.classify("parmesan"),
|
||||
]
|
||||
gaps = clf.identify_gaps(profiles)
|
||||
# We have Richness + Depth + Seasoning; should flag Brightness, Aroma, Structure, Texture
|
||||
assert "Brightness" in gaps
|
||||
assert "Richness" not in gaps
|
||||
24
tests/services/recipe/test_staple_library.py
Normal file
24
tests/services/recipe/test_staple_library.py
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
def test_seitan_staple_has_yield_formats():
|
||||
from app.services.recipe.staple_library import StapleLibrary
|
||||
lib = StapleLibrary()
|
||||
seitan = lib.get("seitan")
|
||||
assert seitan is not None
|
||||
assert "fresh" in seitan.yield_formats
|
||||
assert "frozen" in seitan.yield_formats
|
||||
|
||||
|
||||
def test_staple_yield_format_has_elements():
|
||||
from app.services.recipe.staple_library import StapleLibrary
|
||||
lib = StapleLibrary()
|
||||
seitan = lib.get("seitan")
|
||||
fresh = seitan.yield_formats["fresh"]
|
||||
assert "Structure" in fresh["elements"]
|
||||
|
||||
|
||||
def test_list_all_staples():
|
||||
from app.services.recipe.staple_library import StapleLibrary
|
||||
lib = StapleLibrary()
|
||||
all_staples = lib.list_all()
|
||||
slugs = [s.slug for s in all_staples]
|
||||
assert "seitan" in slugs
|
||||
assert "tempeh" in slugs
|
||||
Loading…
Reference in a new issue