chore: commit in-progress work -- tag inferrer, imitate endpoint, hall-of-chaos easter egg, migration files, Dockerfile .env defense

- app/services/recipe/tag_inferrer.py: infer tags from recipe ingredient text
- app/db/migrations/022_recipe_generic_flag.sql, 029_inferred_tags.sql: schema migrations
- app/api/endpoints/imitate.py: recipe imitation endpoint stub
- app/api/endpoints/community.py: hall-of-chaos easter egg endpoint
- scripts/pipeline/infer_recipe_tags.py, backfill_keywords.py: pipeline scripts
- scripts/pipeline/build_recipe_index.py: extended index builder
- Dockerfile: explicit .env removal as defense-in-depth
- frontend/src/components/FeedbackButton.vue: feedback UX improvements
- frontend/src/style.css: minor style tweaks
- app/cloud_session.py: cloud session improvements
- tests/api/test_community_endpoints.py: additional test coverage
This commit is contained in:
pyr0ball 2026-04-14 13:23:15 -07:00
parent fe18fb48c0
commit 144d1dc6c4
13 changed files with 1108 additions and 7 deletions

View file

@ -16,6 +16,12 @@ COPY kiwi/environment.yml .
RUN conda env create -f environment.yml
COPY kiwi/ ./kiwi/
# Remove gitignored config files that may exist locally — defense-in-depth.
# The parent .dockerignore should exclude these, but an explicit rm guarantees
# they never end up in the cloud image regardless of .dockerignore placement.
RUN rm -f /app/kiwi/.env
# Install cf-core into the kiwi env BEFORE installing kiwi (kiwi lists it as a dep)
RUN conda run -n kiwi pip install --no-cache-dir -e /app/circuitforge-core
WORKDIR /app/kiwi

View file

@ -120,6 +120,21 @@ async def local_feed():
return [_post_to_dict(p) for p in posts]
@router.get("/hall-of-chaos")
async def hall_of_chaos():
"""Hidden easter egg endpoint -- returns the 10 most chaotic bloopers."""
store = _get_community_store()
if store is None:
return {"posts": [], "chaos_level": 0}
posts = await asyncio.to_thread(
store.list_posts, limit=10, post_type="recipe_blooper"
)
return {
"posts": [_post_to_dict(p) for p in posts],
"chaos_level": len(posts),
}
_VALID_POST_TYPES = {"plan", "recipe_success", "recipe_blooper"}
_MAX_TITLE_LEN = 200
_MAX_TEXT_LEN = 2000

View file

@ -0,0 +1,185 @@
"""Kiwi — /api/v1/imitate/samples endpoint for Avocet Imitate tab.
Returns the actual assembled prompt Kiwi sends to its LLM for recipe generation,
including the full pantry context (expiry-first ordering), dietary constraints
(from user_settings if present), and the Level 3 format instructions.
"""
from __future__ import annotations
from fastapi import APIRouter, Depends
from app.cloud_session import get_session, CloudUser
from app.db.store import Store
router = APIRouter()
_LEVEL3_FORMAT = [
"",
"Reply using EXACTLY this plain-text format — no markdown, no bold, no extra commentary:",
"Title: <name of the dish>",
"Ingredients: <comma-separated list>",
"Directions:",
"1. <first step>",
"2. <second step>",
"3. <continue for each step>",
"Notes: <optional tips>",
]
_LEVEL4_FORMAT = [
"",
"Reply using EXACTLY this plain-text format — no markdown, no bold:",
"Title: <name of the dish>",
"Ingredients: <comma-separated list>",
"Directions:",
"1. <first step>",
"2. <second step>",
"Notes: <optional tips>",
]
def _read_user_settings(store: Store) -> dict:
"""Read all key/value pairs from user_settings table."""
try:
rows = store.conn.execute("SELECT key, value FROM user_settings").fetchall()
return {r["key"]: r["value"] for r in rows}
except Exception:
return {}
def _build_recipe_prompt(
pantry_names: list[str],
expiring_names: list[str],
constraints: list[str],
allergies: list[str],
level: int = 3,
) -> str:
"""Assemble the recipe generation prompt matching Kiwi's Level 3/4 format."""
# Expiring items first, then remaining pantry items (deduped)
expiring_set = set(expiring_names)
ordered = list(expiring_names) + [n for n in pantry_names if n not in expiring_set]
if not ordered:
ordered = pantry_names
if level == 4:
lines = [
"Surprise me with a creative, unexpected recipe.",
"Only use ingredients that make culinary sense together. "
"Do not force flavoured/sweetened items (vanilla yoghurt, flavoured syrups, jam) into savoury dishes.",
f"Ingredients available: {', '.join(ordered)}",
]
if constraints:
lines.append(f"Constraints: {', '.join(constraints)}")
if allergies:
lines.append(f"Must NOT contain: {', '.join(allergies)}")
lines.append("Treat any mystery ingredient as a wildcard — use your imagination.")
lines += _LEVEL4_FORMAT
else:
lines = [
"You are a creative chef. Generate a recipe using the ingredients below.",
"IMPORTANT: When you use a pantry item, list it in Ingredients using its exact name "
"from the pantry list. Do not add adjectives, quantities, or cooking states "
"(e.g. use 'butter', not 'unsalted butter' or '2 tbsp butter').",
"IMPORTANT: Only use pantry items that make culinary sense for the dish. "
"Do NOT force flavoured/sweetened items (vanilla yoghurt, fruit yoghurt, jam, "
"dessert sauces, flavoured syrups) into savoury dishes.",
"IMPORTANT: Do not default to the same ingredient repeatedly across dishes. "
"If a pantry item does not genuinely improve this specific dish, leave it out.",
"",
f"Pantry items: {', '.join(ordered)}",
]
if expiring_names:
lines.append(
f"Priority — use these soon (expiring): {', '.join(expiring_names)}"
)
if constraints:
lines.append(f"Dietary constraints: {', '.join(constraints)}")
if allergies:
lines.append(f"IMPORTANT — must NOT contain: {', '.join(allergies)}")
lines += _LEVEL3_FORMAT
return "\n".join(lines)
@router.get("/samples")
async def imitate_samples(
limit: int = 5,
level: int = 3,
session: CloudUser = Depends(get_session),
):
"""Return assembled recipe generation prompts for Avocet's Imitate tab.
Each sample includes:
system_prompt empty (Kiwi uses no system context)
input_text full Level 3/4 prompt with pantry items, expiring items,
dietary constraints, and format instructions
output_text empty (no prior LLM output stored per-request)
level: 3 (structured with element biasing context) or 4 (wildcard creative)
limit: max number of distinct prompt variants to return (varies by pantry state)
"""
limit = max(1, min(limit, 10))
store = Store(session.db)
# Full pantry for context
all_items = store.list_inventory()
pantry_names = [r["product_name"] for r in all_items if r.get("product_name")]
# Expiring items as priority ingredients
expiring = store.expiring_soon(days=14)
expiring_names = [r["product_name"] for r in expiring if r.get("product_name")]
# Dietary constraints from user_settings (keys: constraints, allergies)
settings = _read_user_settings(store)
import json as _json
try:
constraints = _json.loads(settings.get("dietary_constraints", "[]")) or []
except Exception:
constraints = []
try:
allergies = _json.loads(settings.get("dietary_allergies", "[]")) or []
except Exception:
allergies = []
if not pantry_names:
return {"samples": [], "total": 0, "type": f"recipe_level{level}"}
# Build prompt variants: one per expiring item as the "anchor" ingredient,
# plus one general pantry prompt. Cap at limit.
samples = []
seen_anchors: set[str] = set()
for item in (expiring[:limit - 1] if expiring else []):
anchor = item.get("product_name", "")
if not anchor or anchor in seen_anchors:
continue
seen_anchors.add(anchor)
# Put this item first in the list for the prompt
ordered_expiring = [anchor] + [n for n in expiring_names if n != anchor]
prompt = _build_recipe_prompt(pantry_names, ordered_expiring, constraints, allergies, level)
samples.append({
"id": item.get("id", 0),
"anchor_item": anchor,
"expiring_count": len(expiring_names),
"pantry_count": len(pantry_names),
"system_prompt": "",
"input_text": prompt,
"output_text": "",
})
# One general prompt using all expiring as priority
if len(samples) < limit:
prompt = _build_recipe_prompt(pantry_names, expiring_names, constraints, allergies, level)
samples.append({
"id": 0,
"anchor_item": "full pantry",
"expiring_count": len(expiring_names),
"pantry_count": len(pantry_names),
"system_prompt": "",
"input_text": prompt,
"output_text": "",
})
return {"samples": samples, "total": len(samples), "type": f"recipe_level{level}"}

View file

@ -170,6 +170,13 @@ def _user_db_path(user_id: str, household_id: str | None = None) -> Path:
return path
def _anon_db_path() -> Path:
"""Ephemeral DB for unauthenticated guest visitors (Free tier, no persistence)."""
path = CLOUD_DATA_ROOT / "anonymous" / "kiwi.db"
path.parent.mkdir(parents=True, exist_ok=True)
return path
# ── BYOK detection ────────────────────────────────────────────────────────────
_LLM_CONFIG_PATH = Path.home() / ".config" / "circuitforge" / "llm.yaml"
@ -225,11 +232,21 @@ def get_session(request: Request) -> CloudUser:
or request.headers.get("cookie", "")
)
if not raw_header:
raise HTTPException(status_code=401, detail="Not authenticated")
return CloudUser(
user_id="anonymous",
tier="free",
db=_anon_db_path(),
has_byok=has_byok,
)
token = _extract_session_token(raw_header) # gitleaks:allow — function name, not a secret
if not token:
raise HTTPException(status_code=401, detail="Not authenticated")
return CloudUser(
user_id="anonymous",
tier="free",
db=_anon_db_path(),
has_byok=has_byok,
)
user_id = validate_session_jwt(token)
_ensure_provisioned(user_id)

View file

@ -0,0 +1,5 @@
-- Migration 022: Add is_generic flag to recipes
-- Generic recipes are catch-all/dump recipes with loose ingredient lists
-- that should not appear in Level 1 (deterministic "use what I have") results.
-- Admins can mark recipes via the recipe editor or a bulk backfill script.
ALTER TABLE recipes ADD COLUMN is_generic INTEGER NOT NULL DEFAULT 0;

View file

@ -0,0 +1,49 @@
-- Migration 029: Add inferred_tags column and update FTS index to include it.
--
-- inferred_tags holds a JSON array of normalized tag strings derived by
-- scripts/pipeline/infer_recipe_tags.py (e.g. ["cuisine:Italian",
-- "dietary:Low-Carb", "flavor:Umami", "can_be:Gluten-Free"]).
--
-- The FTS5 browser table is rebuilt to index inferred_tags alongside
-- category and keywords so browse domain queries match against all signals.
-- 1. Add inferred_tags column (empty array default; populated by pipeline run)
ALTER TABLE recipes ADD COLUMN inferred_tags TEXT NOT NULL DEFAULT '[]';
-- 2. Drop old FTS table and triggers that only covered category + keywords
DROP TRIGGER IF EXISTS recipes_ai;
DROP TRIGGER IF EXISTS recipes_ad;
DROP TRIGGER IF EXISTS recipes_au;
DROP TABLE IF EXISTS recipe_browser_fts;
-- 3. Recreate FTS5 table: now indexes category, keywords, AND inferred_tags
CREATE VIRTUAL TABLE recipe_browser_fts USING fts5(
category,
keywords,
inferred_tags,
content=recipes,
content_rowid=id
);
-- 4. Triggers to keep FTS in sync with recipes table changes
CREATE TRIGGER recipes_ai AFTER INSERT ON recipes BEGIN
INSERT INTO recipe_browser_fts(rowid, category, keywords, inferred_tags)
VALUES (new.id, new.category, new.keywords, new.inferred_tags);
END;
CREATE TRIGGER recipes_ad AFTER DELETE ON recipes BEGIN
INSERT INTO recipe_browser_fts(recipe_browser_fts, rowid, category, keywords, inferred_tags)
VALUES ('delete', old.id, old.category, old.keywords, old.inferred_tags);
END;
CREATE TRIGGER recipes_au AFTER UPDATE ON recipes BEGIN
INSERT INTO recipe_browser_fts(recipe_browser_fts, rowid, category, keywords, inferred_tags)
VALUES ('delete', old.id, old.category, old.keywords, old.inferred_tags);
INSERT INTO recipe_browser_fts(rowid, category, keywords, inferred_tags)
VALUES (new.id, new.category, new.keywords, new.inferred_tags);
END;
-- 5. Populate FTS from current table state
-- (inferred_tags is '[]' for all rows at this point; run infer_recipe_tags.py
-- to populate, then the FTS will be rebuilt as part of that script.)
INSERT INTO recipe_browser_fts(recipe_browser_fts) VALUES('rebuild');

View file

@ -0,0 +1,300 @@
"""
Recipe tag inference engine.
Derives normalized tags from a recipe's title, ingredient names, existing corpus
tags (category + keywords), enriched ingredient profile data, and optional
nutrition data.
Tags are organized into five namespaces:
cuisine:* -- cuisine/region classification
dietary:* -- dietary restriction / nutrition profile
flavor:* -- flavor profile (spicy, smoky, sweet, etc.)
time:* -- effort / time signals
meal:* -- meal type
can_be:* -- achievable with substitutions (e.g. can_be:Gluten-Free)
Output is a flat sorted list of strings, e.g.:
["can_be:Gluten-Free", "cuisine:Italian", "dietary:Low-Carb",
"flavor:Savory", "flavor:Umami", "time:Quick"]
These populate recipes.inferred_tags and are FTS5-indexed so browse domain
queries find recipes the food.com corpus tags alone would miss.
"""
from __future__ import annotations
# ---------------------------------------------------------------------------
# Text-signal tables
# (tag, [case-insensitive substrings to search in combined title+ingredient text])
# ---------------------------------------------------------------------------
_CUISINE_SIGNALS: list[tuple[str, list[str]]] = [
("cuisine:Japanese", ["miso", "dashi", "ramen", "sushi", "teriyaki", "sake", "mirin",
"wasabi", "panko", "edamame", "tonkatsu", "yakitori", "ponzu"]),
("cuisine:Korean", ["gochujang", "kimchi", "doenjang", "gochugaru",
"bulgogi", "bibimbap", "japchae"]),
("cuisine:Thai", ["fish sauce", "lemongrass", "galangal", "pad thai", "thai basil",
"kaffir lime", "tom yum", "green curry", "red curry", "nam pla"]),
("cuisine:Chinese", ["hoisin", "oyster sauce", "five spice", "bok choy", "chow mein",
"dumpling", "wonton", "mapo", "char siu", "sichuan"]),
("cuisine:Vietnamese", ["pho", "banh mi", "nuoc cham", "rice paper", "vietnamese"]),
("cuisine:Indian", ["garam masala", "turmeric", "cardamom", "fenugreek", "paneer",
"tikka", "masala", "biryani", "dal", "naan", "tandoori",
"curry leaf", "tamarind", "chutney"]),
("cuisine:Middle Eastern", ["tahini", "harissa", "za'atar", "sumac", "baharat", "rose water",
"pomegranate molasses", "freekeh", "fattoush", "shakshuka"]),
("cuisine:Greek", ["feta", "tzatziki", "moussaka", "spanakopita", "orzo",
"kalamata", "gyro", "souvlaki", "dolma"]),
("cuisine:Mediterranean", ["hummus", "pita", "couscous", "preserved lemon"]),
("cuisine:Italian", ["pasta", "pizza", "risotto", "lasagna", "carbonara", "gnocchi",
"parmesan", "mozzarella", "ricotta", "prosciutto", "pancetta",
"arancini", "osso buco", "tiramisu", "pesto", "bolognese",
"cannoli", "polenta", "bruschetta", "focaccia"]),
("cuisine:French", ["croissant", "quiche", "crepe", "coq au vin",
"ratatouille", "bearnaise", "hollandaise", "bouillabaisse",
"herbes de provence", "dijon", "gruyere", "brie", "cassoulet"]),
("cuisine:Spanish", ["paella", "chorizo", "gazpacho", "tapas", "patatas bravas",
"sofrito", "manchego", "albondigas"]),
("cuisine:German", ["sauerkraut", "bratwurst", "schnitzel", "pretzel", "strudel",
"spaetzle", "sauerbraten"]),
("cuisine:Mexican", ["taco", "burrito", "enchilada", "salsa", "guacamole", "chipotle",
"queso", "tamale", "mole", "jalapeno", "tortilla", "carnitas",
"chile verde", "posole", "tostada", "quesadilla"]),
("cuisine:Latin American", ["plantain", "yuca", "chimichurri", "ceviche", "adobo", "empanada"]),
("cuisine:American", ["bbq sauce", "buffalo sauce", "ranch dressing", "coleslaw",
"cornbread", "mac and cheese", "brisket", "cheeseburger"]),
("cuisine:Southern", ["collard greens", "black-eyed peas", "okra", "grits", "catfish",
"hush puppies", "pecan pie"]),
("cuisine:Cajun", ["cajun", "creole", "gumbo", "jambalaya", "andouille", "etouffee"]),
("cuisine:African", ["injera", "berbere", "jollof", "suya", "egusi", "fufu", "tagine"]),
("cuisine:Caribbean", ["jerk", "scotch bonnet", "callaloo", "ackee"]),
]
_DIETARY_SIGNALS: list[tuple[str, list[str]]] = [
("dietary:Vegan", ["vegan", "plant-based", "plant based"]),
("dietary:Vegetarian", ["vegetarian", "meatless"]),
("dietary:Gluten-Free", ["gluten-free", "gluten free", "celiac"]),
("dietary:Dairy-Free", ["dairy-free", "dairy free", "lactose free", "non-dairy"]),
("dietary:Low-Carb", ["low-carb", "low carb", "keto", "ketogenic", "very low carbs"]),
("dietary:High-Protein", ["high protein", "high-protein"]),
("dietary:Low-Fat", ["low-fat", "low fat", "fat-free", "reduced fat"]),
("dietary:Paleo", ["paleo", "whole30"]),
("dietary:Nut-Free", ["nut-free", "nut free", "peanut free"]),
("dietary:Egg-Free", ["egg-free", "egg free"]),
("dietary:Low-Sodium", ["low sodium", "no salt"]),
("dietary:Healthy", ["healthy", "low cholesterol", "heart healthy", "wholesome"]),
]
_FLAVOR_SIGNALS: list[tuple[str, list[str]]] = [
("flavor:Spicy", ["jalapeno", "habanero", "ghost pepper", "sriracha",
"chili flake", "red pepper flake", "cayenne", "hot sauce",
"gochujang", "harissa", "scotch bonnet", "szechuan pepper", "spicy"]),
("flavor:Smoky", ["smoked", "liquid smoke", "smoked paprika",
"bbq sauce", "barbecue", "hickory", "mesquite"]),
("flavor:Sweet", ["honey", "maple syrup", "brown sugar", "caramel", "chocolate",
"vanilla", "condensed milk", "molasses", "agave"]),
("flavor:Savory", ["soy sauce", "fish sauce", "miso", "worcestershire", "anchovy",
"parmesan", "blue cheese", "bone broth"]),
("flavor:Tangy", ["lemon juice", "lime juice", "vinegar", "balsamic", "buttermilk",
"sour cream", "fermented", "pickled", "tamarind", "sumac"]),
("flavor:Herby", ["fresh basil", "fresh cilantro", "fresh dill", "fresh mint",
"fresh tarragon", "fresh thyme", "herbes de provence"]),
("flavor:Rich", ["heavy cream", "creme fraiche", "mascarpone", "double cream",
"ghee", "coconut cream", "cream cheese"]),
("flavor:Umami", ["mushroom", "nutritional yeast", "tomato paste",
"parmesan rind", "bonito", "kombu"]),
]
_TIME_SIGNALS: list[tuple[str, list[str]]] = [
("time:Quick", ["< 15 mins", "< 30 mins", "weeknight", "easy"]),
("time:Under 1 Hour", ["< 60 mins"]),
("time:Make-Ahead", ["freezer", "overnight", "refrigerator", "make-ahead", "make ahead"]),
("time:Slow Cook", ["slow cooker", "crockpot", "< 4 hours", "braise"]),
]
# food.com corpus tag -> normalized tags
_CORPUS_TAG_MAP: dict[str, list[str]] = {
"european": ["cuisine:Italian", "cuisine:French", "cuisine:German",
"cuisine:Spanish"],
"asian": ["cuisine:Chinese", "cuisine:Japanese", "cuisine:Thai",
"cuisine:Korean", "cuisine:Vietnamese"],
"chinese": ["cuisine:Chinese"],
"japanese": ["cuisine:Japanese"],
"thai": ["cuisine:Thai"],
"vietnamese": ["cuisine:Vietnamese"],
"indian": ["cuisine:Indian"],
"greek": ["cuisine:Greek"],
"mexican": ["cuisine:Mexican"],
"african": ["cuisine:African"],
"caribbean": ["cuisine:Caribbean"],
"vegan": ["dietary:Vegan", "dietary:Vegetarian"],
"vegetarian": ["dietary:Vegetarian"],
"healthy": ["dietary:Healthy"],
"low cholesterol": ["dietary:Healthy"],
"very low carbs": ["dietary:Low-Carb"],
"high in...": ["dietary:High-Protein"],
"lactose free": ["dietary:Dairy-Free"],
"egg free": ["dietary:Egg-Free"],
"< 15 mins": ["time:Quick"],
"< 30 mins": ["time:Quick"],
"< 60 mins": ["time:Under 1 Hour"],
"< 4 hours": ["time:Slow Cook"],
"weeknight": ["time:Quick"],
"freezer": ["time:Make-Ahead"],
"dessert": ["meal:Dessert"],
"breakfast": ["meal:Breakfast"],
"lunch/snacks": ["meal:Lunch", "meal:Snack"],
"beverages": ["meal:Beverage"],
"cookie & brownie": ["meal:Dessert"],
"breads": ["meal:Bread"],
}
# ingredient_profiles.elements value -> flavor tag
_ELEMENT_TO_FLAVOR: dict[str, str] = {
"Aroma": "flavor:Herby",
"Richness": "flavor:Rich",
"Structure": "", # no flavor tag
"Binding": "",
"Crust": "flavor:Smoky",
"Lift": "",
"Emulsion": "flavor:Rich",
"Acid": "flavor:Tangy",
}
def _build_text(title: str, ingredient_names: list[str]) -> str:
parts = [title.lower()]
parts.extend(i.lower() for i in ingredient_names)
return " ".join(parts)
def _match_signals(text: str, table: list[tuple[str, list[str]]]) -> list[str]:
return [tag for tag, pats in table if any(p in text for p in pats)]
def infer_tags(
title: str,
ingredient_names: list[str],
corpus_keywords: list[str],
corpus_category: str = "",
# Enriched ingredient profile signals (from ingredient_profiles cross-ref)
element_coverage: dict[str, float] | None = None,
fermented_count: int = 0,
glutamate_total: float = 0.0,
ph_min: float | None = None,
available_sub_constraints: list[str] | None = None,
# Nutrition data for macro-based tags
calories: float | None = None,
protein_g: float | None = None,
fat_g: float | None = None,
carbs_g: float | None = None,
servings: float | None = None,
) -> list[str]:
"""
Derive normalized tags for a recipe.
Parameters
----------
title, ingredient_names, corpus_keywords, corpus_category
: Primary recipe data.
element_coverage
: Dict from recipes.element_coverage -- element name to coverage ratio
(e.g. {"Aroma": 0.6, "Richness": 0.4}). Derived from ingredient_profiles.
fermented_count
: Number of fermented ingredients (from ingredient_profiles.is_fermented).
glutamate_total
: Sum of glutamate_mg across all profiled ingredients. High values signal umami.
ph_min
: Minimum ph_estimate across profiled ingredients. Low values signal acidity.
available_sub_constraints
: Substitution constraint labels achievable for this recipe
(e.g. ["gluten_free", "low_carb"]). From substitution_pairs cross-ref.
These become can_be:* tags.
calories, protein_g, fat_g, carbs_g, servings
: Nutrition data for macro-based dietary tags.
Returns
-------
Sorted list of unique normalized tag strings.
"""
tags: set[str] = set()
# 1. Map corpus tags to normalized vocabulary
for kw in corpus_keywords:
for t in _CORPUS_TAG_MAP.get(kw.lower(), []):
tags.add(t)
if corpus_category:
for t in _CORPUS_TAG_MAP.get(corpus_category.lower(), []):
tags.add(t)
# 2. Text-signal matching
text = _build_text(title, ingredient_names)
tags.update(_match_signals(text, _CUISINE_SIGNALS))
tags.update(_match_signals(text, _DIETARY_SIGNALS))
tags.update(_match_signals(text, _FLAVOR_SIGNALS))
# 3. Time signals from corpus keywords + text
corpus_text = " ".join(kw.lower() for kw in corpus_keywords)
tags.update(_match_signals(corpus_text, _TIME_SIGNALS))
tags.update(_match_signals(text, _TIME_SIGNALS))
# 4. Enriched profile signals
if element_coverage:
for element, coverage in element_coverage.items():
if coverage > 0.2: # >20% of ingredients carry this element
flavor_tag = _ELEMENT_TO_FLAVOR.get(element, "")
if flavor_tag:
tags.add(flavor_tag)
if glutamate_total > 50:
tags.add("flavor:Umami")
if fermented_count > 0:
tags.add("flavor:Tangy")
if ph_min is not None and ph_min < 4.5:
tags.add("flavor:Tangy")
# 5. Achievable-via-substitution tags
if available_sub_constraints:
label_to_tag = {
"gluten_free": "can_be:Gluten-Free",
"low_calorie": "can_be:Low-Calorie",
"low_carb": "can_be:Low-Carb",
"vegan": "can_be:Vegan",
"dairy_free": "can_be:Dairy-Free",
"low_sodium": "can_be:Low-Sodium",
}
for label in available_sub_constraints:
tag = label_to_tag.get(label)
if tag:
tags.add(tag)
# 6. Macro-based dietary tags
if servings and servings > 0 and any(
v is not None for v in (protein_g, fat_g, carbs_g, calories)
):
def _per(v: float | None) -> float | None:
return v / servings if v is not None else None
prot_s = _per(protein_g)
fat_s = _per(fat_g)
carb_s = _per(carbs_g)
cal_s = _per(calories)
if prot_s is not None and prot_s >= 20:
tags.add("dietary:High-Protein")
if fat_s is not None and fat_s <= 5:
tags.add("dietary:Low-Fat")
if carb_s is not None and carb_s <= 10:
tags.add("dietary:Low-Carb")
if cal_s is not None and cal_s <= 250:
tags.add("dietary:Light")
elif protein_g is not None and protein_g >= 20:
tags.add("dietary:High-Protein")
# 7. Vegan implies vegetarian
if "dietary:Vegan" in tags:
tags.add("dietary:Vegetarian")
return sorted(tags)

View file

@ -140,11 +140,13 @@ import { ref, computed, onMounted } from 'vue'
const props = defineProps<{ currentTab?: string }>()
const apiBase = (import.meta.env.VITE_API_BASE as string) ?? ''
// Probe once on mount hidden until confirmed enabled so button never flashes
const enabled = ref(false)
onMounted(async () => {
try {
const res = await fetch('/api/v1/feedback/status')
const res = await fetch(`${apiBase}/api/v1/feedback/status`)
if (res.ok) {
const data = await res.json()
enabled.value = data.enabled === true
@ -205,7 +207,7 @@ async function submit() {
loading.value = true
submitError.value = ''
try {
const res = await fetch('/api/v1/feedback', {
const res = await fetch(`${apiBase}/api/v1/feedback`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
@ -407,6 +409,114 @@ async function submit() {
.mt-md { margin-top: var(--spacing-md); }
.mt-xs { margin-top: var(--spacing-xs); }
/* ── Form elements ────────────────────────────────────────────────────── */
.form-group {
display: flex;
flex-direction: column;
gap: var(--spacing-xs);
}
.form-label {
font-size: var(--font-size-sm);
font-weight: 600;
color: var(--color-text-muted);
text-transform: uppercase;
letter-spacing: 0.06em;
}
.form-input {
width: 100%;
padding: var(--spacing-xs) var(--spacing-sm);
background: var(--color-bg-secondary);
border: 1px solid var(--color-border);
border-radius: var(--radius-md);
color: var(--color-text-primary);
font-family: var(--font-body);
font-size: var(--font-size-sm);
line-height: 1.5;
transition: border-color 0.15s;
box-sizing: border-box;
}
.form-input:focus {
outline: none;
border-color: var(--color-border-focus);
}
.form-input::placeholder { color: var(--color-text-muted); opacity: 0.7; }
/* ── Buttons ──────────────────────────────────────────────────────────── */
.btn {
display: inline-flex;
align-items: center;
justify-content: center;
gap: var(--spacing-xs);
padding: var(--spacing-xs) var(--spacing-md);
border-radius: var(--radius-md);
font-family: var(--font-body);
font-size: var(--font-size-sm);
font-weight: 500;
cursor: pointer;
transition: background 0.15s, color 0.15s, border-color 0.15s;
white-space: nowrap;
}
.btn:disabled { opacity: 0.5; cursor: not-allowed; }
.btn-primary {
background: var(--color-primary);
color: #fff;
border: 1px solid var(--color-primary);
}
.btn-primary:hover:not(:disabled) { filter: brightness(1.1); }
.btn-ghost {
background: transparent;
color: var(--color-text-secondary);
border: 1px solid var(--color-border);
}
.btn-ghost:hover:not(:disabled) {
background: var(--color-bg-secondary);
color: var(--color-text-primary);
border-color: var(--color-border-focus);
}
/* ── Filter chips ─────────────────────────────────────────────────────── */
.filter-chip-row {
display: flex;
flex-wrap: wrap;
gap: var(--spacing-xs);
}
.btn-chip {
padding: 5px var(--spacing-sm);
background: var(--color-bg-secondary);
border: 1px solid var(--color-border);
border-radius: 999px;
font-family: var(--font-body);
font-size: var(--font-size-sm);
font-weight: 500;
color: var(--color-text-secondary);
cursor: pointer;
transition: background 0.15s, color 0.15s, border-color 0.15s;
}
.btn-chip.active,
.btn-chip:hover {
background: color-mix(in srgb, var(--color-primary) 15%, transparent);
border-color: var(--color-primary);
color: var(--color-primary);
}
/* ── Card ─────────────────────────────────────────────────────────────── */
.card {
background: var(--color-bg-card);
border: 1px solid var(--color-border);
border-radius: var(--radius-md);
}
/* ── Text utilities ───────────────────────────────────────────────────── */
.text-muted { color: var(--color-text-muted); }
.text-sm { font-size: var(--font-size-sm); line-height: 1.5; }
.text-xs { font-size: 0.75rem; line-height: 1.5; }
.font-semibold { font-weight: 600; }
/* Transition */
.modal-fade-enter-active, .modal-fade-leave-active { transition: opacity 0.2s ease; }
.modal-fade-enter-from, .modal-fade-leave-to { opacity: 0; }

View file

@ -18,7 +18,8 @@
/* Theme Colors - Dark Mode (Default) */
--color-text-primary: rgba(255, 248, 235, 0.92);
--color-text-secondary: rgba(255, 248, 235, 0.60);
--color-text-muted: rgba(255, 248, 235, 0.38);
/* Raised from 0.38 → 0.52 for WCAG 1.4.3 AA compliance (~5.5:1 against card bg) */
--color-text-muted: rgba(255, 248, 235, 0.52);
--color-bg-primary: #1e1c1a;
--color-bg-secondary: #161412;
@ -40,7 +41,8 @@
/* Status Colors */
--color-success: #4a8c40;
--color-success-dark: #3a7030;
--color-success-light: #6aac60;
/* Lightened from #6aac60 → #7fc073 for WCAG 1.4.3 AA compliance on dark backgrounds */
--color-success-light: #7fc073;
--color-success-bg: rgba(74, 140, 64, 0.12);
--color-success-border: rgba(74, 140, 64, 0.30);

View file

@ -0,0 +1,118 @@
#!/usr/bin/env python3
"""
Backfill keywords column: repair character-split R-vector data.
The food.com corpus was imported with Keywords stored as a JSON array of
individual characters (e.g. ["c","(","\"","I","t","a","l","i","a","n",...])
instead of the intended keyword list (e.g. ["Italian","Low-Fat","Easy"]).
This script detects the broken pattern (all array elements have length 1),
rejoins them into the original R-vector string, parses quoted tokens, and
writes the corrected JSON back.
Rows that are already correct (empty array, or multi-char strings) are skipped.
FTS5 index is rebuilt after the update so searches reflect the fix.
Usage:
conda run -n cf python scripts/backfill_keywords.py [path/to/kiwi.db]
# default: data/kiwi.db
Estimated time on 3.1M rows: 3-8 minutes (mostly the FTS rebuild at the end).
"""
from __future__ import annotations
import json
import re
import sqlite3
import sys
from pathlib import Path
_QUOTED = re.compile(r'"([^"]*)"')
def _parse_r_vector(s: str) -> list[str]:
return _QUOTED.findall(s)
def _repair(raw_json: str) -> str | None:
"""Return corrected JSON string, or None if the row is already clean."""
try:
val = json.loads(raw_json)
except (json.JSONDecodeError, TypeError):
return None
if not isinstance(val, list) or not val:
return None # empty or non-list — leave as-is
# Already correct: contains multi-character strings
if any(isinstance(e, str) and len(e) > 1 for e in val):
return None
# Broken: all single characters — rejoin and re-parse
if all(isinstance(e, str) and len(e) == 1 for e in val):
rejoined = "".join(val)
keywords = _parse_r_vector(rejoined)
return json.dumps(keywords)
return None
def backfill(db_path: Path, batch_size: int = 5000) -> None:
conn = sqlite3.connect(db_path)
conn.execute("PRAGMA journal_mode=WAL")
total = conn.execute("SELECT count(*) FROM recipes").fetchone()[0]
print(f"Total recipes: {total:,}")
fixed = 0
skipped = 0
offset = 0
while True:
rows = conn.execute(
"SELECT id, keywords FROM recipes LIMIT ? OFFSET ?",
(batch_size, offset),
).fetchall()
if not rows:
break
updates: list[tuple[str, int]] = []
for row_id, raw_json in rows:
corrected = _repair(raw_json)
if corrected is not None:
updates.append((corrected, row_id))
else:
skipped += 1
if updates:
conn.executemany(
"UPDATE recipes SET keywords = ? WHERE id = ?", updates
)
conn.commit()
fixed += len(updates)
offset += batch_size
done = offset + len(rows) - (batch_size - len(rows))
pct = min(100, int((offset / total) * 100))
print(f" {pct:>3}% processed {offset:,} fixed {fixed:,} skipped {skipped:,}", end="\r")
print(f"\nDone. Fixed {fixed:,} rows, skipped {skipped:,} (already correct or empty).")
if fixed > 0:
print("Rebuilding FTS5 browser index (recipe_browser_fts)…")
try:
conn.execute("INSERT INTO recipe_browser_fts(recipe_browser_fts) VALUES('rebuild')")
conn.commit()
print("FTS rebuild complete.")
except Exception as e:
print(f"FTS rebuild skipped (table may not exist yet): {e}")
conn.close()
if __name__ == "__main__":
db_path = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("data/kiwi.db")
if not db_path.exists():
print(f"DB not found: {db_path}")
sys.exit(1)
backfill(db_path)

View file

@ -57,6 +57,34 @@ def _parse_r_vector(s: str) -> list[str]:
return _QUOTED.findall(s)
def _parse_keywords(val: object) -> list[str]:
"""Parse the food.com Keywords column into a proper list of keyword strings.
The raw parquet value can arrive in three forms:
- None / NaN []
- str: c("Italian", ...) parse quoted tokens via _parse_r_vector
- list of single chars the R-vector was character-split during dataset
export; rejoin then re-parse
- list of strings already correct, use as-is
"""
import math
if val is None:
return []
if isinstance(val, float) and math.isnan(val):
return []
if isinstance(val, str):
return _parse_r_vector(val)
if isinstance(val, list):
if not val:
return []
# Detect character-split R-vector: every element is a single character
if all(isinstance(e, str) and len(e) == 1 for e in val):
return _parse_r_vector("".join(val))
# Already a proper list of keyword strings
return [str(e) for e in val if e]
return []
def extract_ingredient_names(raw_list: list[str]) -> list[str]:
"""Strip quantities and units from ingredient strings -> normalized names."""
names = []
@ -168,7 +196,7 @@ def build(db_path: Path, recipes_path: Path, batch_size: int = 10000) -> None:
json.dumps(ingredient_names),
json.dumps(directions),
str(row.get("RecipeCategory", "") or ""),
json.dumps(_safe_list(row.get("Keywords"))),
json.dumps(_parse_keywords(row.get("Keywords"))),
_float_or_none(row.get("Calories")),
_float_or_none(row.get("FatContent")),
_float_or_none(row.get("ProteinContent")),

View file

@ -0,0 +1,255 @@
"""
Infer and backfill normalized tags for all recipes.
Reads recipes in batches, cross-references ingredient_profiles and
substitution_pairs, runs tag_inferrer on each recipe, and writes the result
to recipes.inferred_tags. Also rebuilds recipe_browser_fts after the run.
This script is idempotent: pass --force to re-derive tags even if
inferred_tags is already non-empty.
Usage:
conda run -n cf python scripts/pipeline/infer_recipe_tags.py \\
[path/to/kiwi.db] [--batch-size 2000] [--force]
Estimated time on 3.1M rows: 10-20 minutes (CPU-bound text matching).
"""
from __future__ import annotations
import argparse
import json
import sqlite3
import sys
from pathlib import Path
# Allow importing from the app package when run from the repo root
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
from app.services.recipe.tag_inferrer import infer_tags
# ---------------------------------------------------------------------------
# Substitution constraint label mapping
# Keys are what we store in substitution_pairs.constraint_label.
# ---------------------------------------------------------------------------
_INTERESTING_CONSTRAINTS = {"gluten_free", "low_calorie", "low_carb", "vegan", "dairy_free", "low_sodium"}
def _load_profiles(conn: sqlite3.Connection) -> dict[str, dict]:
"""
Load ingredient_profiles into a dict keyed by name.
Values hold only the fields we need for tag inference.
"""
profiles: dict[str, dict] = {}
rows = conn.execute("""
SELECT name, elements, glutamate_mg, is_fermented, ph_estimate
FROM ingredient_profiles
""").fetchall()
for name, elements_json, glutamate_mg, is_fermented, ph_estimate in rows:
try:
elements: list[str] = json.loads(elements_json) if elements_json else []
except Exception:
elements = []
profiles[name] = {
"elements": elements,
"glutamate": float(glutamate_mg or 0),
"fermented": bool(is_fermented),
"ph": float(ph_estimate) if ph_estimate is not None else None,
}
return profiles
def _load_sub_index(conn: sqlite3.Connection) -> dict[str, set[str]]:
"""
Build a dict of ingredient_name -> set of available constraint labels.
Only loads constraints we care about.
"""
index: dict[str, set[str]] = {}
placeholders = ",".join("?" * len(_INTERESTING_CONSTRAINTS))
rows = conn.execute(
f"SELECT original_name, constraint_label FROM substitution_pairs "
f"WHERE constraint_label IN ({placeholders})",
list(_INTERESTING_CONSTRAINTS),
).fetchall()
for name, label in rows:
index.setdefault(name, set()).add(label)
return index
def _enrich(
ingredient_names: list[str],
profile_index: dict[str, dict],
sub_index: dict[str, set[str]],
) -> dict:
"""
Cross-reference ingredient_names against our enrichment indices.
Returns a dict of enriched signals ready for infer_tags().
"""
fermented_count = 0
glutamate_total = 0.0
ph_values: list[float] = []
element_totals: dict[str, float] = {}
profiled = 0
constraint_sets: list[set[str]] = []
for name in ingredient_names:
profile = profile_index.get(name)
if profile:
profiled += 1
glutamate_total += profile["glutamate"]
if profile["fermented"]:
fermented_count += 1
if profile["ph"] is not None:
ph_values.append(profile["ph"])
for elem in profile["elements"]:
element_totals[elem] = element_totals.get(elem, 0.0) + 1.0
subs = sub_index.get(name)
if subs:
constraint_sets.append(subs)
# Element coverage: fraction of profiled ingredients that carry each element
element_coverage: dict[str, float] = {}
if profiled > 0:
element_coverage = {e: round(c / profiled, 3) for e, c in element_totals.items()}
# Only emit a can_be:* tag if ALL relevant ingredients have the substitution available.
# (A recipe is gluten_free-achievable only if every gluten source can be swapped.)
# We use a simpler heuristic: if at least one ingredient has the constraint, flag it.
# Future improvement: require coverage of all gluten-bearing ingredients.
available_constraints: list[str] = []
if constraint_sets:
union_constraints: set[str] = set()
for cs in constraint_sets:
union_constraints.update(cs)
available_constraints = sorted(union_constraints & _INTERESTING_CONSTRAINTS)
return {
"element_coverage": element_coverage,
"fermented_count": fermented_count,
"glutamate_total": glutamate_total,
"ph_min": min(ph_values) if ph_values else None,
"available_sub_constraints": available_constraints,
}
def run(db_path: Path, batch_size: int = 2000, force: bool = False) -> None:
conn = sqlite3.connect(db_path)
conn.execute("PRAGMA journal_mode=WAL")
total = conn.execute("SELECT count(*) FROM recipes").fetchone()[0]
print(f"Total recipes: {total:,}")
print("Loading ingredient profiles...")
profile_index = _load_profiles(conn)
print(f" {len(profile_index):,} profiles loaded")
print("Loading substitution index...")
sub_index = _load_sub_index(conn)
print(f" {len(sub_index):,} substitutable ingredients indexed")
updated = 0
skipped = 0
offset = 0
where_clause = "" if force else "WHERE inferred_tags = '[]' OR inferred_tags IS NULL"
eligible = conn.execute(
f"SELECT count(*) FROM recipes {where_clause}"
).fetchone()[0]
print(f"Recipes to process: {eligible:,} ({'all' if force else 'untagged only'})")
while True:
rows = conn.execute(
f"""
SELECT id, title, ingredient_names, category, keywords,
element_coverage,
calories, fat_g, protein_g, carbs_g, servings
FROM recipes {where_clause}
ORDER BY id
LIMIT ? OFFSET ?
""",
(batch_size, offset),
).fetchall()
if not rows:
break
updates: list[tuple[str, int]] = []
for (row_id, title, ingr_json, category, kw_json,
elem_cov_json, calories, fat_g, protein_g, carbs_g, servings) in rows:
try:
ingredient_names: list[str] = json.loads(ingr_json) if ingr_json else []
corpus_keywords: list[str] = json.loads(kw_json) if kw_json else []
element_coverage: dict[str, float] = (
json.loads(elem_cov_json) if elem_cov_json else {}
)
except Exception:
ingredient_names = []
corpus_keywords = []
element_coverage = {}
enriched = _enrich(ingredient_names, profile_index, sub_index)
# Prefer the pre-computed element_coverage from the recipes table
# (it was computed over all ingredients at import time, not just the
# profiled subset). Fall back to what _enrich computed.
effective_coverage = element_coverage or enriched["element_coverage"]
tags = infer_tags(
title=title or "",
ingredient_names=ingredient_names,
corpus_keywords=corpus_keywords,
corpus_category=category or "",
element_coverage=effective_coverage,
fermented_count=enriched["fermented_count"],
glutamate_total=enriched["glutamate_total"],
ph_min=enriched["ph_min"],
available_sub_constraints=enriched["available_sub_constraints"],
calories=calories,
protein_g=protein_g,
fat_g=fat_g,
carbs_g=carbs_g,
servings=servings,
)
updates.append((json.dumps(tags), row_id))
if updates:
conn.executemany(
"UPDATE recipes SET inferred_tags = ? WHERE id = ?", updates
)
conn.commit()
updated += len(updates)
else:
skipped += len(rows)
offset += len(rows)
pct = min(100, int((offset / eligible) * 100)) if eligible else 100
print(
f" {pct:>3}% offset {offset:,} tagged {updated:,}",
end="\r",
)
print(f"\nDone. Tagged {updated:,} recipes, skipped {skipped:,}.")
if updated > 0:
print("Rebuilding FTS5 browser index (recipe_browser_fts)...")
try:
conn.execute(
"INSERT INTO recipe_browser_fts(recipe_browser_fts) VALUES('rebuild')"
)
conn.commit()
print("FTS rebuild complete.")
except Exception as e:
print(f"FTS rebuild skipped: {e}")
conn.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("db", nargs="?", default="data/kiwi.db", type=Path)
parser.add_argument("--batch-size", type=int, default=2000)
parser.add_argument("--force", action="store_true",
help="Re-derive tags even if inferred_tags is already set.")
args = parser.parse_args()
if not args.db.exists():
print(f"DB not found: {args.db}")
sys.exit(1)
run(args.db, args.batch_size, args.force)

View file

@ -70,3 +70,14 @@ def test_local_feed_returns_json():
response = client.get("/api/v1/community/local-feed")
assert response.status_code == 200
assert isinstance(response.json(), list)
def test_hall_of_chaos_route_exists():
"""GET /community/hall-of-chaos returns 200 and includes chaos_level key."""
mock_store = MagicMock()
mock_store.list_posts.return_value = []
with patch("app.api.endpoints.community._community_store", mock_store):
response = client.get("/api/v1/community/hall-of-chaos")
assert response.status_code == 200
data = response.json()
assert "chaos_level" in data