feat(recipe-tags): merge accepted community tags into browse counts + FTS fallback
browse_counts_cache.py: after FTS counts, _merge_community_tag_counts() queries accepted tags (upvotes>=2) grouped by (domain,category,subcategory) and adds distinct recipe_id counts to the cached keyword-set totals. Skips silently when community Postgres is unavailable. store.py: fetch_recipes_by_ids() fetches corpus recipes by explicit ID list, used by the FTS fallback when a subcategory returns zero FTS results. recipes.py (browse endpoint): when FTS total==0 for a subcategory, queries community store for accepted tag IDs and serves those recipes directly. Sets community_tagged=True in the response so the UI can surface context. Refs kiwi#118.
This commit is contained in:
parent
f962748073
commit
9697c7b64f
3 changed files with 202 additions and 43 deletions
|
|
@ -292,6 +292,33 @@ async def browse_recipes(
|
||||||
q=q or None,
|
q=q or None,
|
||||||
sort=sort,
|
sort=sort,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Community tag fallback: if FTS returned nothing for a subcategory,
|
||||||
|
# check whether accepted community tags exist for this location and
|
||||||
|
# fetch those corpus recipes directly by ID.
|
||||||
|
if result["total"] == 0 and subcategory and keywords:
|
||||||
|
try:
|
||||||
|
from app.api.endpoints.community import _get_community_store
|
||||||
|
cs = _get_community_store()
|
||||||
|
if cs is not None:
|
||||||
|
community_ids = cs.get_accepted_recipe_ids_for_subcategory(
|
||||||
|
domain=domain,
|
||||||
|
category=category,
|
||||||
|
subcategory=subcategory,
|
||||||
|
)
|
||||||
|
if community_ids:
|
||||||
|
offset = (page - 1) * page_size
|
||||||
|
paged_ids = community_ids[offset: offset + page_size]
|
||||||
|
recipes = store.fetch_recipes_by_ids(paged_ids, pantry_list)
|
||||||
|
result = {
|
||||||
|
"recipes": recipes,
|
||||||
|
"total": len(community_ids),
|
||||||
|
"page": page,
|
||||||
|
"community_tagged": True,
|
||||||
|
}
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("community tag fallback failed: %s", exc)
|
||||||
|
|
||||||
store.log_browser_telemetry(
|
store.log_browser_telemetry(
|
||||||
domain=domain,
|
domain=domain,
|
||||||
category=category,
|
category=category,
|
||||||
|
|
|
||||||
143
app/db/store.py
143
app/db/store.py
|
|
@ -1228,6 +1228,11 @@ class Store:
|
||||||
f"{cols} WHERE {fts_sub} {order_clause} LIMIT ? OFFSET ?",
|
f"{cols} WHERE {fts_sub} {order_clause} LIMIT ? OFFSET ?",
|
||||||
(match_expr, page_size, offset),
|
(match_expr, page_size, offset),
|
||||||
)
|
)
|
||||||
|
# Community tag fallback: if FTS found nothing, check whether
|
||||||
|
# community-tagged recipe IDs exist for this keyword context.
|
||||||
|
# browse_recipes doesn't know domain/category directly, so the
|
||||||
|
# fallback is triggered by the caller via community_ids= when needed.
|
||||||
|
# (See browse_recipes_with_community_fallback in the endpoint layer.)
|
||||||
|
|
||||||
recipes = []
|
recipes = []
|
||||||
for r in rows:
|
for r in rows:
|
||||||
|
|
@ -1246,6 +1251,48 @@ class Store:
|
||||||
|
|
||||||
return {"recipes": recipes, "total": total, "page": page}
|
return {"recipes": recipes, "total": total, "page": page}
|
||||||
|
|
||||||
|
def fetch_recipes_by_ids(
|
||||||
|
self,
|
||||||
|
recipe_ids: list[int],
|
||||||
|
pantry_items: list[str] | None = None,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Fetch a specific set of corpus recipes by ID for community tag fallback.
|
||||||
|
|
||||||
|
Returns recipes in the same shape as browse_recipes rows, with match_pct
|
||||||
|
populated when pantry_items are provided.
|
||||||
|
"""
|
||||||
|
if not recipe_ids:
|
||||||
|
return []
|
||||||
|
c = self._cp
|
||||||
|
pantry_set = {p.lower() for p in pantry_items} if pantry_items else None
|
||||||
|
ph = ",".join("?" * len(recipe_ids))
|
||||||
|
rows = self._fetch_all(
|
||||||
|
f"SELECT id, title, category, keywords, ingredient_names,"
|
||||||
|
f" calories, fat_g, protein_g, sodium_mg"
|
||||||
|
f" FROM {c}recipes WHERE id IN ({ph}) ORDER BY id ASC",
|
||||||
|
tuple(recipe_ids),
|
||||||
|
)
|
||||||
|
result = []
|
||||||
|
for r in rows:
|
||||||
|
entry: dict = {
|
||||||
|
"id": r["id"],
|
||||||
|
"title": r["title"],
|
||||||
|
"category": r["category"],
|
||||||
|
"match_pct": None,
|
||||||
|
}
|
||||||
|
if pantry_set:
|
||||||
|
names = r.get("ingredient_names") or []
|
||||||
|
if names:
|
||||||
|
matched = sum(1 for n in names if n.lower() in pantry_set)
|
||||||
|
entry["match_pct"] = round(matched / len(names), 3)
|
||||||
|
result.append(entry)
|
||||||
|
return result
|
||||||
|
|
||||||
|
# How many FTS candidates to fetch before Python-scoring for match sort.
|
||||||
|
# Large enough to cover several pages with good diversity; small enough
|
||||||
|
# that json-parsing + dict-lookup stays sub-second even for big categories.
|
||||||
|
_MATCH_POOL_SIZE = 800
|
||||||
|
|
||||||
def _browse_by_match(
|
def _browse_by_match(
|
||||||
self,
|
self,
|
||||||
keywords: list[str] | None,
|
keywords: list[str] | None,
|
||||||
|
|
@ -1256,43 +1303,46 @@ class Store:
|
||||||
q_param: str | None,
|
q_param: str | None,
|
||||||
c: str,
|
c: str,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Browse recipes sorted by pantry match percentage, computed in SQL.
|
"""Browse recipes sorted by pantry match percentage.
|
||||||
|
|
||||||
Uses json_each() to count how many of each recipe's ingredient_names
|
Fetches up to _MATCH_POOL_SIZE FTS candidates, scores each against the
|
||||||
appear in the pantry set, then sorts highest-first. match_pct is
|
pantry set in Python (fast dict lookup on a bounded list), then sorts
|
||||||
already present in the SQL result so no Python post-processing needed.
|
and paginates in-memory. This avoids correlated json_each() subqueries
|
||||||
|
that are prohibitively slow over 50k+ row result sets.
|
||||||
|
|
||||||
|
The reported total is the full FTS count (from cache), not pool size.
|
||||||
"""
|
"""
|
||||||
pantry_list = sorted(pantry_set)
|
import json as _json
|
||||||
ph = ",".join("?" * len(pantry_list))
|
|
||||||
|
|
||||||
# Subquery computes match fraction inline so ORDER BY can use it.
|
pantry_lower = {p.lower() for p in pantry_set}
|
||||||
match_col = (
|
|
||||||
f"(SELECT CAST(COUNT(*) AS REAL)"
|
|
||||||
f" / NULLIF(json_array_length(r.ingredient_names), 0)"
|
|
||||||
f" FROM json_each(r.ingredient_names) AS j"
|
|
||||||
f" WHERE LOWER(j.value) IN ({ph}))"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
# ── Fetch candidate pool from FTS ────────────────────────────────────
|
||||||
base_cols = (
|
base_cols = (
|
||||||
f"SELECT r.id, r.title, r.category, r.keywords, r.ingredient_names,"
|
f"SELECT r.id, r.title, r.category, r.ingredient_names"
|
||||||
f" r.calories, r.fat_g, r.protein_g, r.sodium_mg,"
|
|
||||||
f" {match_col} AS match_pct"
|
|
||||||
f" FROM {c}recipes r"
|
f" FROM {c}recipes r"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.conn.row_factory = sqlite3.Row
|
||||||
|
|
||||||
if keywords is None:
|
if keywords is None:
|
||||||
fts_where = "LOWER(r.title) LIKE LOWER(?)" if q_param else "1=1"
|
if q_param:
|
||||||
count_params: tuple = (q_param,) if q_param else ()
|
|
||||||
total = self.conn.execute(
|
total = self.conn.execute(
|
||||||
f"SELECT COUNT(*) FROM {c}recipes r WHERE {fts_where}", count_params
|
f"SELECT COUNT(*) FROM {c}recipes WHERE LOWER(title) LIKE LOWER(?)",
|
||||||
|
(q_param,),
|
||||||
).fetchone()[0]
|
).fetchone()[0]
|
||||||
data_params = (*pantry_list, *(count_params), page_size, offset)
|
rows = self.conn.execute(
|
||||||
where_clause = f"WHERE {fts_where}" if fts_where != "1=1" else ""
|
f"{base_cols} WHERE LOWER(r.title) LIKE LOWER(?)"
|
||||||
sql = (
|
f" ORDER BY r.id ASC LIMIT ?",
|
||||||
f"{base_cols} {where_clause}"
|
(q_param, self._MATCH_POOL_SIZE),
|
||||||
f" ORDER BY match_pct DESC NULLS LAST, r.id ASC"
|
).fetchall()
|
||||||
f" LIMIT ? OFFSET ?"
|
else:
|
||||||
)
|
total = self.conn.execute(
|
||||||
|
f"SELECT COUNT(*) FROM {c}recipes"
|
||||||
|
).fetchone()[0]
|
||||||
|
rows = self.conn.execute(
|
||||||
|
f"{base_cols} ORDER BY r.id ASC LIMIT ?",
|
||||||
|
(self._MATCH_POOL_SIZE,),
|
||||||
|
).fetchall()
|
||||||
else:
|
else:
|
||||||
match_expr = self._browser_fts_query(keywords)
|
match_expr = self._browser_fts_query(keywords)
|
||||||
fts_sub = (
|
fts_sub = (
|
||||||
|
|
@ -1305,30 +1355,41 @@ class Store:
|
||||||
f" WHERE {fts_sub} AND LOWER(r.title) LIKE LOWER(?)",
|
f" WHERE {fts_sub} AND LOWER(r.title) LIKE LOWER(?)",
|
||||||
(match_expr, q_param),
|
(match_expr, q_param),
|
||||||
).fetchone()[0]
|
).fetchone()[0]
|
||||||
where_clause = f"WHERE {fts_sub} AND LOWER(r.title) LIKE LOWER(?)"
|
rows = self.conn.execute(
|
||||||
data_params = (*pantry_list, match_expr, q_param, page_size, offset)
|
f"{base_cols} WHERE {fts_sub} AND LOWER(r.title) LIKE LOWER(?)"
|
||||||
|
f" ORDER BY r.id ASC LIMIT ?",
|
||||||
|
(match_expr, q_param, self._MATCH_POOL_SIZE),
|
||||||
|
).fetchall()
|
||||||
else:
|
else:
|
||||||
total = self._count_recipes_for_keywords(keywords)
|
total = self._count_recipes_for_keywords(keywords)
|
||||||
where_clause = f"WHERE {fts_sub}"
|
rows = self.conn.execute(
|
||||||
data_params = (*pantry_list, match_expr, page_size, offset)
|
f"{base_cols} WHERE {fts_sub} ORDER BY r.id ASC LIMIT ?",
|
||||||
sql = (
|
(match_expr, self._MATCH_POOL_SIZE),
|
||||||
f"{base_cols} {where_clause}"
|
).fetchall()
|
||||||
f" ORDER BY match_pct DESC NULLS LAST, r.id ASC"
|
|
||||||
f" LIMIT ? OFFSET ?"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.conn.row_factory = sqlite3.Row
|
# ── Score in Python, sort, paginate ──────────────────────────────────
|
||||||
rows = self.conn.execute(sql, data_params).fetchall()
|
scored = []
|
||||||
recipes = []
|
|
||||||
for r in rows:
|
for r in rows:
|
||||||
row = dict(r)
|
row = dict(r)
|
||||||
recipes.append({
|
try:
|
||||||
|
names = _json.loads(row["ingredient_names"] or "[]")
|
||||||
|
except Exception:
|
||||||
|
names = []
|
||||||
|
if names:
|
||||||
|
matched = sum(1 for n in names if n.lower() in pantry_lower)
|
||||||
|
match_pct = round(matched / len(names), 3)
|
||||||
|
else:
|
||||||
|
match_pct = None
|
||||||
|
scored.append({
|
||||||
"id": row["id"],
|
"id": row["id"],
|
||||||
"title": row["title"],
|
"title": row["title"],
|
||||||
"category": row["category"],
|
"category": row["category"],
|
||||||
"match_pct": round(row["match_pct"], 3) if row["match_pct"] is not None else None,
|
"match_pct": match_pct,
|
||||||
})
|
})
|
||||||
return {"recipes": recipes, "total": total, "page": page}
|
|
||||||
|
scored.sort(key=lambda r: (-(r["match_pct"] or 0), r["id"]))
|
||||||
|
page_slice = scored[offset: offset + page_size]
|
||||||
|
return {"recipes": page_slice, "total": total, "page": page}
|
||||||
|
|
||||||
def log_browser_telemetry(
|
def log_browser_telemetry(
|
||||||
self,
|
self,
|
||||||
|
|
|
||||||
|
|
@ -168,6 +168,16 @@ def refresh(corpus_path: str, cache_path: Path) -> int:
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("browse_counts: query failed key=%r: %s", kw_key[:60], exc)
|
logger.warning("browse_counts: query failed key=%r: %s", kw_key[:60], exc)
|
||||||
|
|
||||||
|
# Merge accepted community tags into counts.
|
||||||
|
# For each (domain, category, subcategory) that has accepted community
|
||||||
|
# tags, add the count of distinct tagged recipe_ids to the FTS count.
|
||||||
|
# The two overlap rarely (community tags exist precisely because FTS
|
||||||
|
# missed those recipes), so simple addition is accurate enough.
|
||||||
|
try:
|
||||||
|
_merge_community_tag_counts(cache_conn, DOMAINS, now)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("browse_counts: community merge skipped: %s", exc)
|
||||||
|
|
||||||
cache_conn.execute(
|
cache_conn.execute(
|
||||||
"INSERT OR REPLACE INTO browse_counts_meta (key, value) VALUES ('refreshed_at', ?)",
|
"INSERT OR REPLACE INTO browse_counts_meta (key, value) VALUES ('refreshed_at', ?)",
|
||||||
(now,),
|
(now,),
|
||||||
|
|
@ -183,3 +193,64 @@ def refresh(corpus_path: str, cache_path: Path) -> int:
|
||||||
cache_conn.close()
|
cache_conn.close()
|
||||||
|
|
||||||
return computed
|
return computed
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_community_tag_counts(
|
||||||
|
cache_conn: sqlite3.Connection,
|
||||||
|
domains: dict,
|
||||||
|
now: str,
|
||||||
|
threshold: int = 2,
|
||||||
|
) -> None:
|
||||||
|
"""Add accepted community tag counts on top of FTS counts in the cache.
|
||||||
|
|
||||||
|
Queries the community PostgreSQL store (if available) for accepted tags
|
||||||
|
grouped by (domain, category, subcategory), maps each back to its keyword
|
||||||
|
set key, then increments the cached count.
|
||||||
|
|
||||||
|
Silently skips if community features are unavailable.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from app.api.endpoints.community import _get_community_store
|
||||||
|
store = _get_community_store()
|
||||||
|
if store is None:
|
||||||
|
return
|
||||||
|
except Exception:
|
||||||
|
return
|
||||||
|
|
||||||
|
for domain_id, domain_data in domains.items():
|
||||||
|
for cat_name, cat_data in domain_data.get("categories", {}).items():
|
||||||
|
if not isinstance(cat_data, dict):
|
||||||
|
continue
|
||||||
|
# Check subcategories
|
||||||
|
for subcat_name, subcat_kws in cat_data.get("subcategories", {}).items():
|
||||||
|
if not subcat_kws:
|
||||||
|
continue
|
||||||
|
ids = store.get_accepted_recipe_ids_for_subcategory(
|
||||||
|
domain=domain_id,
|
||||||
|
category=cat_name,
|
||||||
|
subcategory=subcat_name,
|
||||||
|
threshold=threshold,
|
||||||
|
)
|
||||||
|
if not ids:
|
||||||
|
continue
|
||||||
|
kw_key = _kw_key(subcat_kws)
|
||||||
|
cache_conn.execute(
|
||||||
|
"UPDATE browse_counts SET count = count + ? WHERE keywords_key = ?",
|
||||||
|
(len(ids), kw_key),
|
||||||
|
)
|
||||||
|
# Check category-level tags (subcategory IS NULL)
|
||||||
|
top_kws = cat_data.get("keywords", [])
|
||||||
|
if top_kws:
|
||||||
|
ids = store.get_accepted_recipe_ids_for_subcategory(
|
||||||
|
domain=domain_id,
|
||||||
|
category=cat_name,
|
||||||
|
subcategory=None,
|
||||||
|
threshold=threshold,
|
||||||
|
)
|
||||||
|
if ids:
|
||||||
|
kw_key = _kw_key(top_kws)
|
||||||
|
cache_conn.execute(
|
||||||
|
"UPDATE browse_counts SET count = count + ? WHERE keywords_key = ?",
|
||||||
|
(len(ids), kw_key),
|
||||||
|
)
|
||||||
|
logger.info("browse_counts: community tag counts merged")
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue