- Assembly template system (13 templates: burrito, fried rice, omelette, stir fry, pasta, sandwich, grain bowl, soup/stew, casserole, pancakes, porridge, pie, pudding) with role-based matching, whole-word single-keyword guard, deterministic titles via MD5 pantry hash - Prep-state stripping: strips 'melted butter' → 'butter' for coverage checks; reconstructs actionable states as 'Before you start:' cooking instructions (NutritionPanel prep_notes field + RecipesView.vue display block) - FTS5 fixes: always double-quote all terms; strip apostrophes to prevent syntax errors on brands like "Stouffer's"; 'plant-based' → bare 'based' crash - Bidirectional synonym expansion: alt-meat, alt-chicken, alt-beef, alt-pork mapped to canonical texture class; pantry expansion covers 'hamburger' from 'burger patties' etc. - Texture profile backfill script (378K ingredient_profiles rows) with macro-derived classification in priority order (fatty → creamy → starchy → firm → fibrous → tender → liquid → neutral); oats/legumes starchy-first fix - LLM prompt: ban flavoured/sweetened ingredients (vanilla yoghurt) from savoury - Migrations 014 (nutrition macros) + 015 (recipe FTS index) - Nutrition estimation pipeline script - gitignore MagicMock sqlite test artifacts
134 lines
4.3 KiB
Python
134 lines
4.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Backfill texture_profile in ingredient_profiles from existing macro data.
|
|
|
|
Texture categories and their macro signatures (all values g/100g):
|
|
fatty - fat > 60 (oils, lard, pure butter)
|
|
creamy - fat 15-60 (cream, cheese, fatty meats, nut butter)
|
|
firm - protein > 15, fat < 15 (lean meats, fish, legumes, firm tofu)
|
|
starchy - carbs > 40, fat < 10 (flour, oats, rice, bread, potatoes)
|
|
fibrous - fiber > 4, carbs < 40 (brassicas, leafy greens, whole grains)
|
|
tender - protein 2-15, fat < 10, (soft veg, eggs, soft tofu, cooked beans)
|
|
carbs < 40
|
|
liquid - calories < 25, fat < 1, (broth, juice, dilute sauces)
|
|
protein < 3
|
|
neutral - fallthrough default
|
|
|
|
Rules are applied in priority order: fatty → creamy → firm → starchy →
|
|
fibrous → tender → liquid → neutral.
|
|
|
|
Run:
|
|
python scripts/backfill_texture_profiles.py [path/to/kiwi.db]
|
|
|
|
Or inside the container:
|
|
docker exec kiwi-cloud-api-1 python /app/kiwi/scripts/backfill_texture_profiles.py
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import sqlite3
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Default DB paths to try
|
|
_DEFAULT_PATHS = [
|
|
"/devl/kiwi-cloud-data/local-dev/kiwi.db",
|
|
"/devl/kiwi-data/kiwi.db",
|
|
]
|
|
|
|
BATCH_SIZE = 5_000
|
|
|
|
|
|
def _classify(fat: float, protein: float, carbs: float,
|
|
fiber: float, calories: float) -> str:
|
|
# Cap runaway values — data quality issue in some branded entries
|
|
fat = min(fat or 0.0, 100.0)
|
|
protein = min(protein or 0.0, 100.0)
|
|
carbs = min(carbs or 0.0, 100.0)
|
|
fiber = min(fiber or 0.0, 50.0)
|
|
calories = min(calories or 0.0, 900.0)
|
|
|
|
if fat > 60:
|
|
return "fatty"
|
|
if fat > 15:
|
|
return "creamy"
|
|
# Starchy before firm: oats/legumes have high protein AND high carbs — carbs win
|
|
if carbs > 40 and fat < 10:
|
|
return "starchy"
|
|
# Firm: lean proteins with low carbs (meats, fish, hard tofu)
|
|
# Lower protein threshold (>7) catches tofu (9%) and similar plant proteins
|
|
if protein > 7 and fat < 12 and carbs < 20:
|
|
return "firm"
|
|
if fiber > 4 and carbs < 40:
|
|
return "fibrous"
|
|
if 2 < protein <= 15 and fat < 10 and carbs < 40:
|
|
return "tender"
|
|
if calories < 25 and fat < 1 and protein < 3:
|
|
return "liquid"
|
|
return "neutral"
|
|
|
|
|
|
def backfill(db_path: str) -> None:
|
|
conn = sqlite3.connect(db_path)
|
|
conn.row_factory = sqlite3.Row
|
|
|
|
total = conn.execute("SELECT COUNT(*) FROM ingredient_profiles").fetchone()[0]
|
|
print(f"Total rows: {total:,}")
|
|
|
|
updated = 0
|
|
offset = 0
|
|
counts: dict[str, int] = {}
|
|
|
|
while True:
|
|
rows = conn.execute(
|
|
"""SELECT id, fat_pct, protein_pct, carbs_g_per_100g,
|
|
fiber_g_per_100g, calories_per_100g
|
|
FROM ingredient_profiles
|
|
LIMIT ? OFFSET ?""",
|
|
(BATCH_SIZE, offset),
|
|
).fetchall()
|
|
|
|
if not rows:
|
|
break
|
|
|
|
batch: list[tuple[str, int]] = []
|
|
for row in rows:
|
|
texture = _classify(
|
|
row["fat_pct"],
|
|
row["protein_pct"],
|
|
row["carbs_g_per_100g"],
|
|
row["fiber_g_per_100g"],
|
|
row["calories_per_100g"],
|
|
)
|
|
counts[texture] = counts.get(texture, 0) + 1
|
|
batch.append((texture, row["id"]))
|
|
|
|
conn.executemany(
|
|
"UPDATE ingredient_profiles SET texture_profile = ? WHERE id = ?",
|
|
batch,
|
|
)
|
|
conn.commit()
|
|
|
|
updated += len(batch)
|
|
offset += BATCH_SIZE
|
|
print(f" {updated:,} / {total:,} updated...", end="\r")
|
|
|
|
print(f"\nDone. {updated:,} rows updated.\n")
|
|
print("Texture distribution:")
|
|
for texture, count in sorted(counts.items(), key=lambda x: -x[1]):
|
|
pct = count / updated * 100
|
|
print(f" {texture:10s} {count:8,} ({pct:.1f}%)")
|
|
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) > 1:
|
|
path = sys.argv[1]
|
|
else:
|
|
path = next((p for p in _DEFAULT_PATHS if Path(p).exists()), None)
|
|
if not path:
|
|
print(f"No DB found. Pass path as argument or create one of: {_DEFAULT_PATHS}")
|
|
sys.exit(1)
|
|
|
|
print(f"Backfilling texture profiles in: {path}")
|
|
backfill(path)
|