feat(community): issue #119 — recipe dedup + variation clustering on submit
Some checks are pending
CI / Backend (Python) (push) Waiting to run
CI / Frontend (Vue) (push) Waiting to run
Mirror / mirror (push) Waiting to run

Three-layer dedup check before community post submission:
- L1: title ILIKE search against existing posts in community DB
- L2: Jaccard ingredient overlap using local corpus (≥0.70 very_similar, ≥0.35 somewhat_similar)
- L3: similar_to_ref FK — user can explicitly mark post as variation of existing

New endpoint: POST /api/v1/community/check-similar (gracefully no-ops if community DB absent)
New service: app/services/community/dedup.py — jaccard(), similarity_tier(), build_similar_post_result()
Both publish modals (plan + outcome) now check similarity before submit; user can proceed as-is,
mark as variation, or cancel. similar_to_ref passed in final publish payload.
This commit is contained in:
pyr0ball 2026-05-11 17:25:06 -07:00
parent 59b183a898
commit ef04064728
5 changed files with 497 additions and 4 deletions

View file

@ -167,6 +167,54 @@ def _validate_publish_body(body: dict) -> None:
raise HTTPException(status_code=422, detail="photo_url must be an https:// URL.") raise HTTPException(status_code=422, detail="photo_url must be an https:// URL.")
@router.post("/check-similar")
async def check_similar(body: dict, session: CloudUser = Depends(get_session)):
"""Pre-submission dedup check: return similar existing posts for the given title/recipe_id.
Safe to call with no community store configured returns empty list rather than 503.
"""
store = _get_community_store()
if store is None:
return {"similar_posts": []}
title = (body.get("title") or "").strip()
recipe_id = body.get("recipe_id")
post_type = body.get("post_type")
if not title:
return {"similar_posts": []}
candidates = await asyncio.to_thread(
store.search_similar_posts,
title,
recipe_id,
post_type,
8,
)
if not candidates:
return {"similar_posts": []}
from app.services.community.dedup import build_similar_post_result, fetch_recipe_ingredients
incoming_ingredients = await asyncio.to_thread(
fetch_recipe_ingredients, session.db, recipe_id
)
results = []
for post in candidates:
result = await asyncio.to_thread(
build_similar_post_result,
post,
recipe_id,
incoming_ingredients,
session.db,
)
if result["similarity_tier"] != "different":
results.append(result)
return {"similar_posts": results[:5]}
@router.post("/posts", status_code=201) @router.post("/posts", status_code=201)
async def publish_post(body: dict, session: CloudUser = Depends(get_session)): async def publish_post(body: dict, session: CloudUser = Depends(get_session)):
from app.tiers import can_use from app.tiers import can_use
@ -214,6 +262,8 @@ async def publish_post(body: dict, session: CloudUser = Depends(get_session)):
today = datetime.now(timezone.utc).strftime("%Y-%m-%d") today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
slug = f"kiwi-{_post_type_prefix(post_type)}-{pseudonym.lower().replace(' ', '')}-{today}-{slug_title}"[:120] slug = f"kiwi-{_post_type_prefix(post_type)}-{pseudonym.lower().replace(' ', '')}-{today}-{slug_title}"[:120]
similar_to_ref = body.get("similar_to_ref") or None
from circuitforge_core.community.models import CommunityPost from circuitforge_core.community.models import CommunityPost
post = CommunityPost( post = CommunityPost(
slug=slug, slug=slug,
@ -241,6 +291,7 @@ async def publish_post(body: dict, session: CloudUser = Depends(get_session)):
fat_pct=snapshot.fat_pct, fat_pct=snapshot.fat_pct,
protein_pct=snapshot.protein_pct, protein_pct=snapshot.protein_pct,
moisture_pct=snapshot.moisture_pct, moisture_pct=snapshot.moisture_pct,
similar_to_ref=similar_to_ref,
) )
try: try:
@ -351,6 +402,7 @@ def _post_to_dict(post) -> dict:
"fat_pct": post.fat_pct, "fat_pct": post.fat_pct,
"protein_pct": post.protein_pct, "protein_pct": post.protein_pct,
"moisture_pct": post.moisture_pct, "moisture_pct": post.moisture_pct,
"similar_to_ref": getattr(post, "similar_to_ref", None),
} }

View file

@ -0,0 +1,111 @@
# app/services/community/dedup.py
# MIT License
from __future__ import annotations
import json
import logging
from pathlib import Path
logger = logging.getLogger(__name__)
_SIMILARITY_TIERS = {
"exact_recipe": "This exact recipe is already in the community feed.",
"very_similar": "Very similar recipes already exist (70%+ ingredient overlap).",
"somewhat_similar": "Somewhat similar recipes exist (35-70% ingredient overlap).",
"different": "No close matches found.",
}
def _parse_ingredient_names(raw) -> set[str]:
"""Return a normalised set of ingredient name tokens from various stored formats."""
if raw is None:
return set()
if isinstance(raw, str):
try:
raw = json.loads(raw)
except (ValueError, TypeError):
return set()
names: set[str] = set()
for item in raw:
if isinstance(item, str):
names.add(item.lower().strip())
elif isinstance(item, dict):
name = item.get("name") or item.get("ingredient") or ""
if name:
names.add(name.lower().strip())
return names
def jaccard(a: set[str], b: set[str]) -> float:
if not a and not b:
return 1.0
if not a or not b:
return 0.0
return len(a & b) / len(a | b)
def similarity_tier(jaccard_score: float, exact_recipe: bool) -> str:
if exact_recipe:
return "exact_recipe"
if jaccard_score >= 0.70:
return "very_similar"
if jaccard_score >= 0.35:
return "somewhat_similar"
return "different"
def fetch_recipe_ingredients(db_path: Path, recipe_id: int | None) -> set[str]:
"""Look up ingredient names for a recipe from the local corpus. Returns empty set on miss."""
if recipe_id is None:
return set()
try:
from app.db.store import Store
store = Store(db_path)
try:
row = store.get_recipe(recipe_id)
if row is None:
return set()
return _parse_ingredient_names(row.get("ingredient_names"))
finally:
store.close()
except Exception:
logger.debug("ingredient lookup failed for recipe_id=%s", recipe_id)
return set()
def build_similar_post_result(
post,
incoming_recipe_id: int | None,
incoming_ingredients: set[str],
db_path: Path,
) -> dict:
"""Build a similarity result dict for one existing community post."""
exact = (
incoming_recipe_id is not None
and post.recipe_id is not None
and post.recipe_id == incoming_recipe_id
)
j_score = 0.0
if not exact and incoming_ingredients:
existing_ingredients = fetch_recipe_ingredients(db_path, post.recipe_id)
if existing_ingredients:
j_score = jaccard(incoming_ingredients, existing_ingredients)
tier = similarity_tier(j_score, exact)
return {
"slug": post.slug,
"title": post.title,
"recipe_name": post.recipe_name,
"pseudonym": post.pseudonym,
"published": (
post.published.isoformat()
if hasattr(post.published, "isoformat")
else str(post.published)
),
"similarity_tier": tier,
"jaccard_score": round(j_score, 3) if not exact else None,
"tier_description": _SIMILARITY_TIERS.get(tier, ""),
}

View file

@ -106,6 +106,39 @@
<span class="form-hint">How you appear on posts -- not your real name or email.</span> <span class="form-hint">How you appear on posts -- not your real name or email.</span>
</div> </div>
<!-- Similarity check results -->
<div
v-if="similarPosts.length > 0"
class="similar-panel"
role="region"
aria-label="Similar stories found"
>
<p class="similar-heading text-sm">
<strong>Similar stories already exist.</strong>
You can publish as-is, mark yours as a variation, or cancel.
</p>
<ul class="similar-list" aria-label="Existing similar posts">
<li
v-for="hit in similarPosts"
:key="hit.slug"
class="similar-item"
>
<span class="similar-tier-badge" :class="`tier-${hit.similarity_tier}`">
{{ tierLabel(hit.similarity_tier) }}
</span>
<span class="similar-title">{{ hit.title }}</span>
<span class="similar-by text-muted text-xs">by {{ hit.pseudonym }}</span>
<button
class="btn-link text-xs"
:class="{ 'selected-ref': selectedRef === hit.slug }"
@click="toggleRef(hit.slug)"
>
{{ selectedRef === hit.slug ? 'Unmark variation' : 'Mark as variation' }}
</button>
</li>
</ul>
</div>
<!-- Submission feedback (aria-live region, always rendered) --> <!-- Submission feedback (aria-live region, always rendered) -->
<div <div
class="feedback-region" class="feedback-region"
@ -119,13 +152,24 @@
<!-- Footer actions --> <!-- Footer actions -->
<div class="modal-footer flex gap-sm"> <div class="modal-footer flex gap-sm">
<button <button
v-if="!similarPosts.length || similarChecked"
class="btn btn-primary" class="btn btn-primary"
:disabled="submitting || !title.trim()" :disabled="submitting || !title.trim()"
:aria-busy="submitting" :aria-busy="submitting"
@click="onSubmit" @click="onSubmit"
> >
<span v-if="submitting" class="spinner spinner-sm" aria-hidden="true"></span> <span v-if="submitting" class="spinner spinner-sm" aria-hidden="true"></span>
{{ submitting ? 'Publishing...' : 'Publish' }} {{ submitting ? 'Publishing...' : (selectedRef ? 'Publish as variation' : 'Publish') }}
</button>
<button
v-else
class="btn btn-primary"
:disabled="checking || !title.trim()"
:aria-busy="checking"
@click="onCheckThenSubmit"
>
<span v-if="checking" class="spinner spinner-sm" aria-hidden="true"></span>
{{ checking ? 'Checking...' : 'Publish' }}
</button> </button>
<button class="btn btn-secondary" @click="$emit('close')"> <button class="btn btn-secondary" @click="$emit('close')">
Cancel Cancel
@ -139,7 +183,7 @@
<script setup lang="ts"> <script setup lang="ts">
import { ref, onMounted, onUnmounted, nextTick } from 'vue' import { ref, onMounted, onUnmounted, nextTick } from 'vue'
import { useCommunityStore } from '../stores/community' import { useCommunityStore } from '../stores/community'
import type { PublishPayload } from '../stores/community' import type { PublishPayload, SimilarPost, SimilarityTier } from '../stores/community'
const props = defineProps<{ const props = defineProps<{
recipeId: number | null recipeId: number | null
@ -162,6 +206,21 @@ const submitting = ref(false)
const submitError = ref<string | null>(null) const submitError = ref<string | null>(null)
const submitSuccess = ref<string | null>(null) const submitSuccess = ref<string | null>(null)
const checking = ref(false)
const similarChecked = ref(false)
const similarPosts = ref<SimilarPost[]>([])
const selectedRef = ref<string | null>(null)
function tierLabel(tier: SimilarityTier): string {
if (tier === 'exact_recipe') return 'Same recipe'
if (tier === 'very_similar') return 'Very similar'
return 'Similar'
}
function toggleRef(slug: string) {
selectedRef.value = selectedRef.value === slug ? null : slug
}
const dialogRef = ref<HTMLElement | null>(null) const dialogRef = ref<HTMLElement | null>(null)
const firstFocusRef = ref<HTMLButtonElement | null>(null) const firstFocusRef = ref<HTMLButtonElement | null>(null)
let previousFocus: HTMLElement | null = null let previousFocus: HTMLElement | null = null
@ -215,6 +274,17 @@ onUnmounted(() => {
previousFocus?.focus() previousFocus?.focus()
}) })
async function onCheckThenSubmit() {
if (!title.value.trim()) return
checking.value = true
similarPosts.value = await store.checkSimilar(title.value.trim(), props.recipeId, postType.value)
similarChecked.value = true
checking.value = false
if (!similarPosts.value.length) {
await onSubmit()
}
}
async function onSubmit() { async function onSubmit() {
submitError.value = null submitError.value = null
submitSuccess.value = null submitSuccess.value = null
@ -228,6 +298,7 @@ async function onSubmit() {
if (outcomeNotes.value.trim()) payload.outcome_notes = outcomeNotes.value.trim() if (outcomeNotes.value.trim()) payload.outcome_notes = outcomeNotes.value.trim()
if (pseudonymName.value.trim()) payload.pseudonym_name = pseudonymName.value.trim() if (pseudonymName.value.trim()) payload.pseudonym_name = pseudonymName.value.trim()
if (props.recipeId != null) payload.recipe_id = props.recipeId if (props.recipeId != null) payload.recipe_id = props.recipeId
if (selectedRef.value) payload.similar_to_ref = selectedRef.value
submitting.value = true submitting.value = true
try { try {
@ -349,6 +420,82 @@ async function onSubmit() {
flex-wrap: wrap; flex-wrap: wrap;
} }
.similar-panel {
background: var(--color-surface-alt, var(--color-surface));
border: 1px solid var(--color-warning, #f59e0b);
border-radius: var(--radius-md);
padding: var(--spacing-sm) var(--spacing-md);
margin-bottom: var(--spacing-md);
}
.similar-heading {
margin: 0 0 var(--spacing-sm);
}
.similar-list {
list-style: none;
margin: 0;
padding: 0;
display: flex;
flex-direction: column;
gap: var(--spacing-xs);
}
.similar-item {
display: flex;
align-items: baseline;
gap: var(--spacing-xs);
flex-wrap: wrap;
}
.similar-tier-badge {
font-size: var(--font-size-xs);
font-weight: 700;
padding: 1px 6px;
border-radius: var(--radius-sm);
flex-shrink: 0;
}
.tier-exact_recipe {
background: var(--color-error-bg, #fee2e2);
color: var(--color-error, #dc2626);
}
.tier-very_similar {
background: var(--color-warning-bg, #fef3c7);
color: var(--color-warning-text, #92400e);
}
.tier-somewhat_similar {
background: var(--color-surface-alt, #f3f4f6);
color: var(--color-text-secondary);
}
.similar-title {
font-weight: 600;
font-size: var(--font-size-sm);
}
.similar-by {
flex-shrink: 0;
}
.btn-link {
background: none;
border: none;
color: var(--color-primary);
cursor: pointer;
padding: 0;
text-decoration: underline;
font-size: var(--font-size-xs);
margin-left: auto;
}
.btn-link.selected-ref {
color: var(--color-success);
font-weight: 700;
}
@media (max-width: 480px) { @media (max-width: 480px) {
.modal-panel { .modal-panel {
max-height: 95vh; max-height: 95vh;

View file

@ -78,6 +78,39 @@
<span class="form-hint">How you appear on posts -- not your real name or email.</span> <span class="form-hint">How you appear on posts -- not your real name or email.</span>
</div> </div>
<!-- Similarity check results (shown before final confirm) -->
<div
v-if="similarPosts.length > 0"
class="similar-panel"
role="region"
aria-label="Similar posts found"
>
<p class="similar-heading text-sm">
<strong>Similar plans already exist.</strong>
You can publish as-is, mark yours as a variation, or cancel.
</p>
<ul class="similar-list" aria-label="Existing similar posts">
<li
v-for="hit in similarPosts"
:key="hit.slug"
class="similar-item"
>
<span class="similar-tier-badge" :class="`tier-${hit.similarity_tier}`">
{{ tierLabel(hit.similarity_tier) }}
</span>
<span class="similar-title">{{ hit.title }}</span>
<span class="similar-by text-muted text-xs">by {{ hit.pseudonym }}</span>
<button
class="btn-link text-xs"
:class="{ 'selected-ref': selectedRef === hit.slug }"
@click="toggleRef(hit.slug)"
>
{{ selectedRef === hit.slug ? 'Unmark variation' : 'Mark as variation' }}
</button>
</li>
</ul>
</div>
<!-- Submission feedback (aria-live region, always rendered) --> <!-- Submission feedback (aria-live region, always rendered) -->
<div <div
class="feedback-region" class="feedback-region"
@ -91,13 +124,24 @@
<!-- Footer actions --> <!-- Footer actions -->
<div class="modal-footer flex gap-sm"> <div class="modal-footer flex gap-sm">
<button <button
v-if="!similarPosts.length || similarChecked"
class="btn btn-primary" class="btn btn-primary"
:disabled="submitting || !title.trim()" :disabled="submitting || !title.trim()"
:aria-busy="submitting" :aria-busy="submitting"
@click="onSubmit" @click="onSubmit"
> >
<span v-if="submitting" class="spinner spinner-sm" aria-hidden="true"></span> <span v-if="submitting" class="spinner spinner-sm" aria-hidden="true"></span>
{{ submitting ? 'Publishing...' : 'Publish' }} {{ submitting ? 'Publishing...' : (selectedRef ? 'Publish as variation' : 'Publish') }}
</button>
<button
v-else
class="btn btn-primary"
:disabled="checking || !title.trim()"
:aria-busy="checking"
@click="onCheckThenSubmit"
>
<span v-if="checking" class="spinner spinner-sm" aria-hidden="true"></span>
{{ checking ? 'Checking...' : 'Publish' }}
</button> </button>
<button class="btn btn-secondary" @click="$emit('close')"> <button class="btn btn-secondary" @click="$emit('close')">
Cancel Cancel
@ -111,7 +155,7 @@
<script setup lang="ts"> <script setup lang="ts">
import { ref, onMounted, onUnmounted, nextTick } from 'vue' import { ref, onMounted, onUnmounted, nextTick } from 'vue'
import { useCommunityStore } from '../stores/community' import { useCommunityStore } from '../stores/community'
import type { PublishPayload } from '../stores/community' import type { PublishPayload, SimilarPost, SimilarityTier } from '../stores/community'
const props = defineProps<{ const props = defineProps<{
plan?: { plan?: {
@ -136,6 +180,21 @@ const submitting = ref(false)
const submitError = ref<string | null>(null) const submitError = ref<string | null>(null)
const submitSuccess = ref<string | null>(null) const submitSuccess = ref<string | null>(null)
const checking = ref(false)
const similarChecked = ref(false)
const similarPosts = ref<SimilarPost[]>([])
const selectedRef = ref<string | null>(null)
function tierLabel(tier: SimilarityTier): string {
if (tier === 'exact_recipe') return 'Same recipe'
if (tier === 'very_similar') return 'Very similar'
return 'Similar'
}
function toggleRef(slug: string) {
selectedRef.value = selectedRef.value === slug ? null : slug
}
const dialogRef = ref<HTMLElement | null>(null) const dialogRef = ref<HTMLElement | null>(null)
const firstFocusRef = ref<HTMLInputElement | null>(null) const firstFocusRef = ref<HTMLInputElement | null>(null)
let previousFocus: HTMLElement | null = null let previousFocus: HTMLElement | null = null
@ -189,6 +248,19 @@ onUnmounted(() => {
previousFocus?.focus() previousFocus?.focus()
}) })
async function onCheckThenSubmit() {
if (!title.value.trim()) return
checking.value = true
const planRecipeIds = props.plan?.slots?.map((s) => s.recipe_id) ?? []
const firstRecipeId = planRecipeIds[0] ?? null
similarPosts.value = await store.checkSimilar(title.value.trim(), firstRecipeId, 'plan')
similarChecked.value = true
checking.value = false
if (!similarPosts.value.length) {
await onSubmit()
}
}
async function onSubmit() { async function onSubmit() {
submitError.value = null submitError.value = null
submitSuccess.value = null submitSuccess.value = null
@ -205,6 +277,7 @@ async function onSubmit() {
if (props.plan?.slots?.length) { if (props.plan?.slots?.length) {
payload.slots = props.plan.slots.map(({ day, meal_type, recipe_id }) => ({ day, meal_type, recipe_id })) payload.slots = props.plan.slots.map(({ day, meal_type, recipe_id }) => ({ day, meal_type, recipe_id }))
} }
if (selectedRef.value) payload.similar_to_ref = selectedRef.value
submitting.value = true submitting.value = true
try { try {
@ -295,6 +368,82 @@ async function onSubmit() {
flex-wrap: wrap; flex-wrap: wrap;
} }
.similar-panel {
background: var(--color-surface-alt, var(--color-surface));
border: 1px solid var(--color-warning, #f59e0b);
border-radius: var(--radius-md);
padding: var(--spacing-sm) var(--spacing-md);
margin-bottom: var(--spacing-md);
}
.similar-heading {
margin: 0 0 var(--spacing-sm);
}
.similar-list {
list-style: none;
margin: 0;
padding: 0;
display: flex;
flex-direction: column;
gap: var(--spacing-xs);
}
.similar-item {
display: flex;
align-items: baseline;
gap: var(--spacing-xs);
flex-wrap: wrap;
}
.similar-tier-badge {
font-size: var(--font-size-xs);
font-weight: 700;
padding: 1px 6px;
border-radius: var(--radius-sm);
flex-shrink: 0;
}
.tier-exact_recipe {
background: var(--color-error-bg, #fee2e2);
color: var(--color-error, #dc2626);
}
.tier-very_similar {
background: var(--color-warning-bg, #fef3c7);
color: var(--color-warning-text, #92400e);
}
.tier-somewhat_similar {
background: var(--color-surface-alt, #f3f4f6);
color: var(--color-text-secondary);
}
.similar-title {
font-weight: 600;
font-size: var(--font-size-sm);
}
.similar-by {
flex-shrink: 0;
}
.btn-link {
background: none;
border: none;
color: var(--color-primary);
cursor: pointer;
padding: 0;
text-decoration: underline;
font-size: var(--font-size-xs);
margin-left: auto;
}
.btn-link.selected-ref {
color: var(--color-success);
font-weight: 700;
}
@media (max-width: 480px) { @media (max-width: 480px) {
.modal-panel { .modal-panel {
max-height: 95vh; max-height: 95vh;

View file

@ -64,6 +64,20 @@ export interface PublishPayload {
recipe_id?: number recipe_id?: number
outcome_notes?: string outcome_notes?: string
slots?: CommunityPostSlot[] slots?: CommunityPostSlot[]
similar_to_ref?: string
}
export type SimilarityTier = 'exact_recipe' | 'very_similar' | 'somewhat_similar'
export interface SimilarPost {
slug: string
title: string
recipe_name: string | null
pseudonym: string
published: string
similarity_tier: SimilarityTier
jaccard_score: number | null
tier_description: string
} }
export interface PublishResult { export interface PublishResult {
@ -107,6 +121,25 @@ export const useCommunityStore = defineStore('community', () => {
return response.data return response.data
} }
async function checkSimilar(
title: string,
recipeId?: number | null,
postType?: string,
): Promise<SimilarPost[]> {
try {
const body: Record<string, unknown> = { title }
if (recipeId != null) body.recipe_id = recipeId
if (postType) body.post_type = postType
const response = await api.post<{ similar_posts: SimilarPost[] }>(
'/community/check-similar',
body,
)
return response.data.similar_posts
} catch {
return []
}
}
return { return {
posts, posts,
loading, loading,
@ -115,5 +148,6 @@ export const useCommunityStore = defineStore('community', () => {
fetchPosts, fetchPosts,
forkPost, forkPost,
publishPost, publishPost,
checkSimilar,
} }
}) })