feat(community): recipe dedup support — similar_to_ref FK, search_similar_posts, migration 006
Adds three-layer dedup infrastructure for community recipe posts: - Migration 006: similar_to_ref self-FK, title lower() index, recipe_id index - CommunityPost.similar_to_ref optional field (frozen dataclass, defaults None) - SharedStore.search_similar_posts(): title ILIKE + recipe_id match, ordered by relevance - insert_post() wires similar_to_ref into the INSERT
This commit is contained in:
parent
41c9830281
commit
af66877b51
3 changed files with 85 additions and 2 deletions
|
|
@ -0,0 +1,22 @@
|
||||||
|
-- 006_community_dedup.sql
|
||||||
|
-- Adds variation-linking and title search support for community recipe dedup.
|
||||||
|
-- Applies to: cf_community PostgreSQL database.
|
||||||
|
-- BSL boundary: MIT (data layer, no inference).
|
||||||
|
|
||||||
|
-- Nullable self-referential FK: user-declared "this is a variation of X"
|
||||||
|
ALTER TABLE community_posts
|
||||||
|
ADD COLUMN IF NOT EXISTS similar_to_ref TEXT REFERENCES community_posts(slug) ON DELETE SET NULL;
|
||||||
|
|
||||||
|
-- Index for variation lookup (find all variations of a parent post)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_community_posts_similar_ref
|
||||||
|
ON community_posts (similar_to_ref)
|
||||||
|
WHERE similar_to_ref IS NOT NULL;
|
||||||
|
|
||||||
|
-- Index to speed up title ILIKE prefix and substring searches
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_community_posts_title_lower
|
||||||
|
ON community_posts (lower(title));
|
||||||
|
|
||||||
|
-- Index on recipe_id for exact-recipe duplicate detection
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_community_posts_recipe_id
|
||||||
|
ON community_posts (recipe_id)
|
||||||
|
WHERE recipe_id IS NOT NULL;
|
||||||
|
|
@ -66,6 +66,9 @@ class CommunityPost:
|
||||||
protein_pct: float | None
|
protein_pct: float | None
|
||||||
moisture_pct: float | None
|
moisture_pct: float | None
|
||||||
|
|
||||||
|
# Variation link: slug of the parent post this is explicitly a variation of
|
||||||
|
similar_to_ref: str | None = None
|
||||||
|
|
||||||
def __post_init__(self) -> None:
|
def __post_init__(self) -> None:
|
||||||
# Coerce list fields to tuples (frozen dataclass: use object.__setattr__)
|
# Coerce list fields to tuples (frozen dataclass: use object.__setattr__)
|
||||||
for key in ("slots", "dietary_tags", "allergen_flags", "flavor_molecules"):
|
for key in ("slots", "dietary_tags", "allergen_flags", "flavor_molecules"):
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,7 @@ def _row_to_post(row: dict) -> CommunityPost:
|
||||||
fat_pct=row.get("fat_pct"),
|
fat_pct=row.get("fat_pct"),
|
||||||
protein_pct=row.get("protein_pct"),
|
protein_pct=row.get("protein_pct"),
|
||||||
moisture_pct=row.get("moisture_pct"),
|
moisture_pct=row.get("moisture_pct"),
|
||||||
|
similar_to_ref=row.get("similar_to_ref"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -137,6 +138,61 @@ class SharedStore:
|
||||||
finally:
|
finally:
|
||||||
self._db.putconn(conn)
|
self._db.putconn(conn)
|
||||||
|
|
||||||
|
def search_similar_posts(
|
||||||
|
self,
|
||||||
|
title: str,
|
||||||
|
recipe_id: int | None = None,
|
||||||
|
post_type: str | None = None,
|
||||||
|
limit: int = 8,
|
||||||
|
) -> list[CommunityPost]:
|
||||||
|
"""Return posts similar to the given title or with the same recipe_id.
|
||||||
|
|
||||||
|
Used by the dedup check before a new post is submitted. Matches on:
|
||||||
|
- exact recipe_id (strongest signal)
|
||||||
|
- case-insensitive title substring match
|
||||||
|
|
||||||
|
Results are ordered: recipe_id matches first, then by published desc.
|
||||||
|
"""
|
||||||
|
conn = self._db.getconn()
|
||||||
|
try:
|
||||||
|
conditions: list[str] = []
|
||||||
|
params: list = []
|
||||||
|
|
||||||
|
title_condition = "lower(title) LIKE lower(%s)"
|
||||||
|
title_param = f"%{title.lower()[:80]}%"
|
||||||
|
|
||||||
|
if recipe_id is not None:
|
||||||
|
conditions.append(f"(recipe_id = %s OR {title_condition})")
|
||||||
|
params.extend([recipe_id, title_param])
|
||||||
|
else:
|
||||||
|
conditions.append(title_condition)
|
||||||
|
params.append(title_param)
|
||||||
|
|
||||||
|
if post_type:
|
||||||
|
conditions.append("post_type = %s")
|
||||||
|
params.append(post_type)
|
||||||
|
|
||||||
|
where = "WHERE " + " AND ".join(conditions)
|
||||||
|
params.append(limit)
|
||||||
|
|
||||||
|
order_clause = (
|
||||||
|
"ORDER BY (recipe_id = %s) DESC, published DESC"
|
||||||
|
if recipe_id is not None
|
||||||
|
else "ORDER BY published DESC"
|
||||||
|
)
|
||||||
|
if recipe_id is not None:
|
||||||
|
params.insert(-1, recipe_id)
|
||||||
|
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
f"SELECT * FROM community_posts {where} {order_clause} LIMIT %s",
|
||||||
|
params,
|
||||||
|
)
|
||||||
|
rows = cur.fetchall()
|
||||||
|
return [_row_to_post(_cursor_to_dict(cur, r)) for r in rows]
|
||||||
|
finally:
|
||||||
|
self._db.putconn(conn)
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Writes
|
# Writes
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|
@ -156,13 +212,14 @@ class SharedStore:
|
||||||
seasoning_score, richness_score, brightness_score,
|
seasoning_score, richness_score, brightness_score,
|
||||||
depth_score, aroma_score, structure_score, texture_profile,
|
depth_score, aroma_score, structure_score, texture_profile,
|
||||||
dietary_tags, allergen_flags, flavor_molecules,
|
dietary_tags, allergen_flags, flavor_molecules,
|
||||||
fat_pct, protein_pct, moisture_pct, source_product
|
fat_pct, protein_pct, moisture_pct, source_product,
|
||||||
|
similar_to_ref
|
||||||
) VALUES (
|
) VALUES (
|
||||||
%s, %s, %s, %s, %s, %s, %s,
|
%s, %s, %s, %s, %s, %s, %s,
|
||||||
%s::jsonb, %s, %s, %s, %s,
|
%s::jsonb, %s, %s, %s, %s,
|
||||||
%s, %s, %s, %s, %s, %s, %s,
|
%s, %s, %s, %s, %s, %s, %s,
|
||||||
%s::jsonb, %s::jsonb, %s::jsonb,
|
%s::jsonb, %s::jsonb, %s::jsonb,
|
||||||
%s, %s, %s, %s
|
%s, %s, %s, %s, %s
|
||||||
)
|
)
|
||||||
""",
|
""",
|
||||||
(
|
(
|
||||||
|
|
@ -178,6 +235,7 @@ class SharedStore:
|
||||||
json.dumps(list(post.flavor_molecules)),
|
json.dumps(list(post.flavor_molecules)),
|
||||||
post.fat_pct, post.protein_pct, post.moisture_pct,
|
post.fat_pct, post.protein_pct, post.moisture_pct,
|
||||||
self._source_product,
|
self._source_product,
|
||||||
|
post.similar_to_ref,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue