diff --git a/app/api/endpoints/community.py b/app/api/endpoints/community.py index cdf019b..4f25fa1 100644 --- a/app/api/endpoints/community.py +++ b/app/api/endpoints/community.py @@ -167,6 +167,54 @@ def _validate_publish_body(body: dict) -> None: raise HTTPException(status_code=422, detail="photo_url must be an https:// URL.") +@router.post("/check-similar") +async def check_similar(body: dict, session: CloudUser = Depends(get_session)): + """Pre-submission dedup check: return similar existing posts for the given title/recipe_id. + + Safe to call with no community store configured — returns empty list rather than 503. + """ + store = _get_community_store() + if store is None: + return {"similar_posts": []} + + title = (body.get("title") or "").strip() + recipe_id = body.get("recipe_id") + post_type = body.get("post_type") + + if not title: + return {"similar_posts": []} + + candidates = await asyncio.to_thread( + store.search_similar_posts, + title, + recipe_id, + post_type, + 8, + ) + + if not candidates: + return {"similar_posts": []} + + from app.services.community.dedup import build_similar_post_result, fetch_recipe_ingredients + incoming_ingredients = await asyncio.to_thread( + fetch_recipe_ingredients, session.db, recipe_id + ) + + results = [] + for post in candidates: + result = await asyncio.to_thread( + build_similar_post_result, + post, + recipe_id, + incoming_ingredients, + session.db, + ) + if result["similarity_tier"] != "different": + results.append(result) + + return {"similar_posts": results[:5]} + + @router.post("/posts", status_code=201) async def publish_post(body: dict, session: CloudUser = Depends(get_session)): from app.tiers import can_use @@ -214,6 +262,8 @@ async def publish_post(body: dict, session: CloudUser = Depends(get_session)): today = datetime.now(timezone.utc).strftime("%Y-%m-%d") slug = f"kiwi-{_post_type_prefix(post_type)}-{pseudonym.lower().replace(' ', '')}-{today}-{slug_title}"[:120] + similar_to_ref = body.get("similar_to_ref") or None + from circuitforge_core.community.models import CommunityPost post = CommunityPost( slug=slug, @@ -241,6 +291,7 @@ async def publish_post(body: dict, session: CloudUser = Depends(get_session)): fat_pct=snapshot.fat_pct, protein_pct=snapshot.protein_pct, moisture_pct=snapshot.moisture_pct, + similar_to_ref=similar_to_ref, ) try: @@ -351,6 +402,7 @@ def _post_to_dict(post) -> dict: "fat_pct": post.fat_pct, "protein_pct": post.protein_pct, "moisture_pct": post.moisture_pct, + "similar_to_ref": getattr(post, "similar_to_ref", None), } diff --git a/app/services/community/dedup.py b/app/services/community/dedup.py new file mode 100644 index 0000000..d17af95 --- /dev/null +++ b/app/services/community/dedup.py @@ -0,0 +1,111 @@ +# app/services/community/dedup.py +# MIT License + +from __future__ import annotations + +import json +import logging +from pathlib import Path + +logger = logging.getLogger(__name__) + +_SIMILARITY_TIERS = { + "exact_recipe": "This exact recipe is already in the community feed.", + "very_similar": "Very similar recipes already exist (70%+ ingredient overlap).", + "somewhat_similar": "Somewhat similar recipes exist (35-70% ingredient overlap).", + "different": "No close matches found.", +} + + +def _parse_ingredient_names(raw) -> set[str]: + """Return a normalised set of ingredient name tokens from various stored formats.""" + if raw is None: + return set() + if isinstance(raw, str): + try: + raw = json.loads(raw) + except (ValueError, TypeError): + return set() + names: set[str] = set() + for item in raw: + if isinstance(item, str): + names.add(item.lower().strip()) + elif isinstance(item, dict): + name = item.get("name") or item.get("ingredient") or "" + if name: + names.add(name.lower().strip()) + return names + + +def jaccard(a: set[str], b: set[str]) -> float: + if not a and not b: + return 1.0 + if not a or not b: + return 0.0 + return len(a & b) / len(a | b) + + +def similarity_tier(jaccard_score: float, exact_recipe: bool) -> str: + if exact_recipe: + return "exact_recipe" + if jaccard_score >= 0.70: + return "very_similar" + if jaccard_score >= 0.35: + return "somewhat_similar" + return "different" + + +def fetch_recipe_ingredients(db_path: Path, recipe_id: int | None) -> set[str]: + """Look up ingredient names for a recipe from the local corpus. Returns empty set on miss.""" + if recipe_id is None: + return set() + try: + from app.db.store import Store + store = Store(db_path) + try: + row = store.get_recipe(recipe_id) + if row is None: + return set() + return _parse_ingredient_names(row.get("ingredient_names")) + finally: + store.close() + except Exception: + logger.debug("ingredient lookup failed for recipe_id=%s", recipe_id) + return set() + + +def build_similar_post_result( + post, + incoming_recipe_id: int | None, + incoming_ingredients: set[str], + db_path: Path, +) -> dict: + """Build a similarity result dict for one existing community post.""" + exact = ( + incoming_recipe_id is not None + and post.recipe_id is not None + and post.recipe_id == incoming_recipe_id + ) + + j_score = 0.0 + if not exact and incoming_ingredients: + existing_ingredients = fetch_recipe_ingredients(db_path, post.recipe_id) + if existing_ingredients: + j_score = jaccard(incoming_ingredients, existing_ingredients) + + tier = similarity_tier(j_score, exact) + + return { + "slug": post.slug, + "title": post.title, + "recipe_name": post.recipe_name, + "pseudonym": post.pseudonym, + "published": ( + post.published.isoformat() + if hasattr(post.published, "isoformat") + else str(post.published) + ), + "similarity_tier": tier, + "jaccard_score": round(j_score, 3) if not exact else None, + "tier_description": _SIMILARITY_TIERS.get(tier, ""), + } diff --git a/frontend/src/components/PublishOutcomeModal.vue b/frontend/src/components/PublishOutcomeModal.vue index 0154581..929f72f 100644 --- a/frontend/src/components/PublishOutcomeModal.vue +++ b/frontend/src/components/PublishOutcomeModal.vue @@ -106,6 +106,39 @@ How you appear on posts -- not your real name or email. + +
+ Similar stories already exist. + You can publish as-is, mark yours as a variation, or cancel. +
+