feat(recipe-tags): merge accepted community tags into browse counts + FTS fallback

browse_counts_cache.py: after FTS counts, _merge_community_tag_counts() queries
  accepted tags (upvotes>=2) grouped by (domain,category,subcategory) and adds
  distinct recipe_id counts to the cached keyword-set totals. Skips silently
  when community Postgres is unavailable.

store.py: fetch_recipes_by_ids() fetches corpus recipes by explicit ID list,
  used by the FTS fallback when a subcategory returns zero FTS results.

recipes.py (browse endpoint): when FTS total==0 for a subcategory, queries
  community store for accepted tag IDs and serves those recipes directly.
  Sets community_tagged=True in the response so the UI can surface context.
  Refs kiwi#118.
This commit is contained in:
pyr0ball 2026-04-22 12:37:44 -07:00
parent f962748073
commit 9697c7b64f
3 changed files with 202 additions and 43 deletions

View file

@ -292,6 +292,33 @@ async def browse_recipes(
q=q or None, q=q or None,
sort=sort, sort=sort,
) )
# Community tag fallback: if FTS returned nothing for a subcategory,
# check whether accepted community tags exist for this location and
# fetch those corpus recipes directly by ID.
if result["total"] == 0 and subcategory and keywords:
try:
from app.api.endpoints.community import _get_community_store
cs = _get_community_store()
if cs is not None:
community_ids = cs.get_accepted_recipe_ids_for_subcategory(
domain=domain,
category=category,
subcategory=subcategory,
)
if community_ids:
offset = (page - 1) * page_size
paged_ids = community_ids[offset: offset + page_size]
recipes = store.fetch_recipes_by_ids(paged_ids, pantry_list)
result = {
"recipes": recipes,
"total": len(community_ids),
"page": page,
"community_tagged": True,
}
except Exception as exc:
logger.warning("community tag fallback failed: %s", exc)
store.log_browser_telemetry( store.log_browser_telemetry(
domain=domain, domain=domain,
category=category, category=category,

View file

@ -1228,6 +1228,11 @@ class Store:
f"{cols} WHERE {fts_sub} {order_clause} LIMIT ? OFFSET ?", f"{cols} WHERE {fts_sub} {order_clause} LIMIT ? OFFSET ?",
(match_expr, page_size, offset), (match_expr, page_size, offset),
) )
# Community tag fallback: if FTS found nothing, check whether
# community-tagged recipe IDs exist for this keyword context.
# browse_recipes doesn't know domain/category directly, so the
# fallback is triggered by the caller via community_ids= when needed.
# (See browse_recipes_with_community_fallback in the endpoint layer.)
recipes = [] recipes = []
for r in rows: for r in rows:
@ -1246,6 +1251,48 @@ class Store:
return {"recipes": recipes, "total": total, "page": page} return {"recipes": recipes, "total": total, "page": page}
def fetch_recipes_by_ids(
self,
recipe_ids: list[int],
pantry_items: list[str] | None = None,
) -> list[dict]:
"""Fetch a specific set of corpus recipes by ID for community tag fallback.
Returns recipes in the same shape as browse_recipes rows, with match_pct
populated when pantry_items are provided.
"""
if not recipe_ids:
return []
c = self._cp
pantry_set = {p.lower() for p in pantry_items} if pantry_items else None
ph = ",".join("?" * len(recipe_ids))
rows = self._fetch_all(
f"SELECT id, title, category, keywords, ingredient_names,"
f" calories, fat_g, protein_g, sodium_mg"
f" FROM {c}recipes WHERE id IN ({ph}) ORDER BY id ASC",
tuple(recipe_ids),
)
result = []
for r in rows:
entry: dict = {
"id": r["id"],
"title": r["title"],
"category": r["category"],
"match_pct": None,
}
if pantry_set:
names = r.get("ingredient_names") or []
if names:
matched = sum(1 for n in names if n.lower() in pantry_set)
entry["match_pct"] = round(matched / len(names), 3)
result.append(entry)
return result
# How many FTS candidates to fetch before Python-scoring for match sort.
# Large enough to cover several pages with good diversity; small enough
# that json-parsing + dict-lookup stays sub-second even for big categories.
_MATCH_POOL_SIZE = 800
def _browse_by_match( def _browse_by_match(
self, self,
keywords: list[str] | None, keywords: list[str] | None,
@ -1256,43 +1303,46 @@ class Store:
q_param: str | None, q_param: str | None,
c: str, c: str,
) -> dict: ) -> dict:
"""Browse recipes sorted by pantry match percentage, computed in SQL. """Browse recipes sorted by pantry match percentage.
Uses json_each() to count how many of each recipe's ingredient_names Fetches up to _MATCH_POOL_SIZE FTS candidates, scores each against the
appear in the pantry set, then sorts highest-first. match_pct is pantry set in Python (fast dict lookup on a bounded list), then sorts
already present in the SQL result so no Python post-processing needed. and paginates in-memory. This avoids correlated json_each() subqueries
that are prohibitively slow over 50k+ row result sets.
The reported total is the full FTS count (from cache), not pool size.
""" """
pantry_list = sorted(pantry_set) import json as _json
ph = ",".join("?" * len(pantry_list))
# Subquery computes match fraction inline so ORDER BY can use it. pantry_lower = {p.lower() for p in pantry_set}
match_col = (
f"(SELECT CAST(COUNT(*) AS REAL)"
f" / NULLIF(json_array_length(r.ingredient_names), 0)"
f" FROM json_each(r.ingredient_names) AS j"
f" WHERE LOWER(j.value) IN ({ph}))"
)
# ── Fetch candidate pool from FTS ────────────────────────────────────
base_cols = ( base_cols = (
f"SELECT r.id, r.title, r.category, r.keywords, r.ingredient_names," f"SELECT r.id, r.title, r.category, r.ingredient_names"
f" r.calories, r.fat_g, r.protein_g, r.sodium_mg,"
f" {match_col} AS match_pct"
f" FROM {c}recipes r" f" FROM {c}recipes r"
) )
self.conn.row_factory = sqlite3.Row
if keywords is None: if keywords is None:
fts_where = "LOWER(r.title) LIKE LOWER(?)" if q_param else "1=1" if q_param:
count_params: tuple = (q_param,) if q_param else ()
total = self.conn.execute( total = self.conn.execute(
f"SELECT COUNT(*) FROM {c}recipes r WHERE {fts_where}", count_params f"SELECT COUNT(*) FROM {c}recipes WHERE LOWER(title) LIKE LOWER(?)",
(q_param,),
).fetchone()[0] ).fetchone()[0]
data_params = (*pantry_list, *(count_params), page_size, offset) rows = self.conn.execute(
where_clause = f"WHERE {fts_where}" if fts_where != "1=1" else "" f"{base_cols} WHERE LOWER(r.title) LIKE LOWER(?)"
sql = ( f" ORDER BY r.id ASC LIMIT ?",
f"{base_cols} {where_clause}" (q_param, self._MATCH_POOL_SIZE),
f" ORDER BY match_pct DESC NULLS LAST, r.id ASC" ).fetchall()
f" LIMIT ? OFFSET ?" else:
) total = self.conn.execute(
f"SELECT COUNT(*) FROM {c}recipes"
).fetchone()[0]
rows = self.conn.execute(
f"{base_cols} ORDER BY r.id ASC LIMIT ?",
(self._MATCH_POOL_SIZE,),
).fetchall()
else: else:
match_expr = self._browser_fts_query(keywords) match_expr = self._browser_fts_query(keywords)
fts_sub = ( fts_sub = (
@ -1305,30 +1355,41 @@ class Store:
f" WHERE {fts_sub} AND LOWER(r.title) LIKE LOWER(?)", f" WHERE {fts_sub} AND LOWER(r.title) LIKE LOWER(?)",
(match_expr, q_param), (match_expr, q_param),
).fetchone()[0] ).fetchone()[0]
where_clause = f"WHERE {fts_sub} AND LOWER(r.title) LIKE LOWER(?)" rows = self.conn.execute(
data_params = (*pantry_list, match_expr, q_param, page_size, offset) f"{base_cols} WHERE {fts_sub} AND LOWER(r.title) LIKE LOWER(?)"
f" ORDER BY r.id ASC LIMIT ?",
(match_expr, q_param, self._MATCH_POOL_SIZE),
).fetchall()
else: else:
total = self._count_recipes_for_keywords(keywords) total = self._count_recipes_for_keywords(keywords)
where_clause = f"WHERE {fts_sub}" rows = self.conn.execute(
data_params = (*pantry_list, match_expr, page_size, offset) f"{base_cols} WHERE {fts_sub} ORDER BY r.id ASC LIMIT ?",
sql = ( (match_expr, self._MATCH_POOL_SIZE),
f"{base_cols} {where_clause}" ).fetchall()
f" ORDER BY match_pct DESC NULLS LAST, r.id ASC"
f" LIMIT ? OFFSET ?"
)
self.conn.row_factory = sqlite3.Row # ── Score in Python, sort, paginate ──────────────────────────────────
rows = self.conn.execute(sql, data_params).fetchall() scored = []
recipes = []
for r in rows: for r in rows:
row = dict(r) row = dict(r)
recipes.append({ try:
names = _json.loads(row["ingredient_names"] or "[]")
except Exception:
names = []
if names:
matched = sum(1 for n in names if n.lower() in pantry_lower)
match_pct = round(matched / len(names), 3)
else:
match_pct = None
scored.append({
"id": row["id"], "id": row["id"],
"title": row["title"], "title": row["title"],
"category": row["category"], "category": row["category"],
"match_pct": round(row["match_pct"], 3) if row["match_pct"] is not None else None, "match_pct": match_pct,
}) })
return {"recipes": recipes, "total": total, "page": page}
scored.sort(key=lambda r: (-(r["match_pct"] or 0), r["id"]))
page_slice = scored[offset: offset + page_size]
return {"recipes": page_slice, "total": total, "page": page}
def log_browser_telemetry( def log_browser_telemetry(
self, self,

View file

@ -168,6 +168,16 @@ def refresh(corpus_path: str, cache_path: Path) -> int:
except Exception as exc: except Exception as exc:
logger.warning("browse_counts: query failed key=%r: %s", kw_key[:60], exc) logger.warning("browse_counts: query failed key=%r: %s", kw_key[:60], exc)
# Merge accepted community tags into counts.
# For each (domain, category, subcategory) that has accepted community
# tags, add the count of distinct tagged recipe_ids to the FTS count.
# The two overlap rarely (community tags exist precisely because FTS
# missed those recipes), so simple addition is accurate enough.
try:
_merge_community_tag_counts(cache_conn, DOMAINS, now)
except Exception as exc:
logger.warning("browse_counts: community merge skipped: %s", exc)
cache_conn.execute( cache_conn.execute(
"INSERT OR REPLACE INTO browse_counts_meta (key, value) VALUES ('refreshed_at', ?)", "INSERT OR REPLACE INTO browse_counts_meta (key, value) VALUES ('refreshed_at', ?)",
(now,), (now,),
@ -183,3 +193,64 @@ def refresh(corpus_path: str, cache_path: Path) -> int:
cache_conn.close() cache_conn.close()
return computed return computed
def _merge_community_tag_counts(
cache_conn: sqlite3.Connection,
domains: dict,
now: str,
threshold: int = 2,
) -> None:
"""Add accepted community tag counts on top of FTS counts in the cache.
Queries the community PostgreSQL store (if available) for accepted tags
grouped by (domain, category, subcategory), maps each back to its keyword
set key, then increments the cached count.
Silently skips if community features are unavailable.
"""
try:
from app.api.endpoints.community import _get_community_store
store = _get_community_store()
if store is None:
return
except Exception:
return
for domain_id, domain_data in domains.items():
for cat_name, cat_data in domain_data.get("categories", {}).items():
if not isinstance(cat_data, dict):
continue
# Check subcategories
for subcat_name, subcat_kws in cat_data.get("subcategories", {}).items():
if not subcat_kws:
continue
ids = store.get_accepted_recipe_ids_for_subcategory(
domain=domain_id,
category=cat_name,
subcategory=subcat_name,
threshold=threshold,
)
if not ids:
continue
kw_key = _kw_key(subcat_kws)
cache_conn.execute(
"UPDATE browse_counts SET count = count + ? WHERE keywords_key = ?",
(len(ids), kw_key),
)
# Check category-level tags (subcategory IS NULL)
top_kws = cat_data.get("keywords", [])
if top_kws:
ids = store.get_accepted_recipe_ids_for_subcategory(
domain=domain_id,
category=cat_name,
subcategory=None,
threshold=threshold,
)
if ids:
kw_key = _kw_key(top_kws)
cache_conn.execute(
"UPDATE browse_counts SET count = count + ? WHERE keywords_key = ?",
(len(ids), kw_key),
)
logger.info("browse_counts: community tag counts merged")