From af66877b5178b8ef23d5b028968aa438389b930c Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Mon, 11 May 2026 17:09:18 -0700 Subject: [PATCH] =?UTF-8?q?feat(community):=20recipe=20dedup=20support=20?= =?UTF-8?q?=E2=80=94=20similar=5Fto=5Fref=20FK,=20search=5Fsimilar=5Fposts?= =?UTF-8?q?,=20migration=20006?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds three-layer dedup infrastructure for community recipe posts: - Migration 006: similar_to_ref self-FK, title lower() index, recipe_id index - CommunityPost.similar_to_ref optional field (frozen dataclass, defaults None) - SharedStore.search_similar_posts(): title ILIKE + recipe_id match, ordered by relevance - insert_post() wires similar_to_ref into the INSERT --- .../migrations/006_community_dedup.sql | 22 +++++++ circuitforge_core/community/models.py | 3 + circuitforge_core/community/store.py | 62 ++++++++++++++++++- 3 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 circuitforge_core/community/migrations/006_community_dedup.sql diff --git a/circuitforge_core/community/migrations/006_community_dedup.sql b/circuitforge_core/community/migrations/006_community_dedup.sql new file mode 100644 index 0000000..a16f773 --- /dev/null +++ b/circuitforge_core/community/migrations/006_community_dedup.sql @@ -0,0 +1,22 @@ +-- 006_community_dedup.sql +-- Adds variation-linking and title search support for community recipe dedup. +-- Applies to: cf_community PostgreSQL database. +-- BSL boundary: MIT (data layer, no inference). + +-- Nullable self-referential FK: user-declared "this is a variation of X" +ALTER TABLE community_posts + ADD COLUMN IF NOT EXISTS similar_to_ref TEXT REFERENCES community_posts(slug) ON DELETE SET NULL; + +-- Index for variation lookup (find all variations of a parent post) +CREATE INDEX IF NOT EXISTS idx_community_posts_similar_ref + ON community_posts (similar_to_ref) + WHERE similar_to_ref IS NOT NULL; + +-- Index to speed up title ILIKE prefix and substring searches +CREATE INDEX IF NOT EXISTS idx_community_posts_title_lower + ON community_posts (lower(title)); + +-- Index on recipe_id for exact-recipe duplicate detection +CREATE INDEX IF NOT EXISTS idx_community_posts_recipe_id + ON community_posts (recipe_id) + WHERE recipe_id IS NOT NULL; diff --git a/circuitforge_core/community/models.py b/circuitforge_core/community/models.py index 63b681b..acf1cdb 100644 --- a/circuitforge_core/community/models.py +++ b/circuitforge_core/community/models.py @@ -66,6 +66,9 @@ class CommunityPost: protein_pct: float | None moisture_pct: float | None + # Variation link: slug of the parent post this is explicitly a variation of + similar_to_ref: str | None = None + def __post_init__(self) -> None: # Coerce list fields to tuples (frozen dataclass: use object.__setattr__) for key in ("slots", "dietary_tags", "allergen_flags", "flavor_molecules"): diff --git a/circuitforge_core/community/store.py b/circuitforge_core/community/store.py index 4ffcb4f..dad6361 100644 --- a/circuitforge_core/community/store.py +++ b/circuitforge_core/community/store.py @@ -46,6 +46,7 @@ def _row_to_post(row: dict) -> CommunityPost: fat_pct=row.get("fat_pct"), protein_pct=row.get("protein_pct"), moisture_pct=row.get("moisture_pct"), + similar_to_ref=row.get("similar_to_ref"), ) @@ -137,6 +138,61 @@ class SharedStore: finally: self._db.putconn(conn) + def search_similar_posts( + self, + title: str, + recipe_id: int | None = None, + post_type: str | None = None, + limit: int = 8, + ) -> list[CommunityPost]: + """Return posts similar to the given title or with the same recipe_id. + + Used by the dedup check before a new post is submitted. Matches on: + - exact recipe_id (strongest signal) + - case-insensitive title substring match + + Results are ordered: recipe_id matches first, then by published desc. + """ + conn = self._db.getconn() + try: + conditions: list[str] = [] + params: list = [] + + title_condition = "lower(title) LIKE lower(%s)" + title_param = f"%{title.lower()[:80]}%" + + if recipe_id is not None: + conditions.append(f"(recipe_id = %s OR {title_condition})") + params.extend([recipe_id, title_param]) + else: + conditions.append(title_condition) + params.append(title_param) + + if post_type: + conditions.append("post_type = %s") + params.append(post_type) + + where = "WHERE " + " AND ".join(conditions) + params.append(limit) + + order_clause = ( + "ORDER BY (recipe_id = %s) DESC, published DESC" + if recipe_id is not None + else "ORDER BY published DESC" + ) + if recipe_id is not None: + params.insert(-1, recipe_id) + + with conn.cursor() as cur: + cur.execute( + f"SELECT * FROM community_posts {where} {order_clause} LIMIT %s", + params, + ) + rows = cur.fetchall() + return [_row_to_post(_cursor_to_dict(cur, r)) for r in rows] + finally: + self._db.putconn(conn) + # ------------------------------------------------------------------ # Writes # ------------------------------------------------------------------ @@ -156,13 +212,14 @@ class SharedStore: seasoning_score, richness_score, brightness_score, depth_score, aroma_score, structure_score, texture_profile, dietary_tags, allergen_flags, flavor_molecules, - fat_pct, protein_pct, moisture_pct, source_product + fat_pct, protein_pct, moisture_pct, source_product, + similar_to_ref ) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s::jsonb, %s::jsonb, %s::jsonb, - %s, %s, %s, %s + %s, %s, %s, %s, %s ) """, ( @@ -178,6 +235,7 @@ class SharedStore: json.dumps(list(post.flavor_molecules)), post.fat_pct, post.protein_pct, post.moisture_pct, self._source_product, + post.similar_to_ref, ), ) conn.commit()