snipe/app/trust/photo.py
pyr0ball f8dd1d261d
Some checks are pending
CI / Python tests (push) Waiting to run
CI / Frontend typecheck + tests (push) Waiting to run
Mirror / mirror (push) Waiting to run
feat: preferences store, community signals, a11y + API fixes
- Store: add save_community_signal, get/set_user_preference, get_all_preferences
- App.vue: move skip link before nav (correct a11y order); bootstrap preferences store after session
- useTrustFeedback: prefix fetch URL with VITE_API_BASE (fixes menagerie routing)
- search.ts: guard v-model.number empty-string from cleared price inputs
- Python: isort import ordering pass across adapters, trust, tasks, tests
2026-04-14 16:15:09 -07:00

84 lines
2.8 KiB
Python

"""Perceptual hash deduplication within a result set (free tier, v0.1)."""
from __future__ import annotations
import io
from typing import Optional
import requests
try:
import imagehash
from PIL import Image
_IMAGEHASH_AVAILABLE = True
except ImportError:
_IMAGEHASH_AVAILABLE = False
# Module-level phash cache: url → hash string (or None on failure).
# Avoids re-downloading the same eBay CDN image on repeated searches.
_phash_cache: dict[str, Optional[str]] = {}
class PhotoScorer:
"""
check_duplicates: compare images within a single result set.
Cross-session dedup (PhotoHash table) is v0.2.
Vision analysis (real/marketing/EM bag) is v0.2 paid tier.
"""
def check_duplicates(self, photo_urls_per_listing: list[list[str]]) -> list[bool]:
"""
Returns a list of booleans parallel to photo_urls_per_listing.
True = this listing's primary photo is a duplicate of another listing in the set.
Falls back to URL-equality check if imagehash is unavailable or fetch fails.
"""
if not _IMAGEHASH_AVAILABLE:
return self._url_dedup(photo_urls_per_listing)
primary_urls = [urls[0] if urls else "" for urls in photo_urls_per_listing]
# Fast path: URL equality is a trivial duplicate signal (no fetch needed)
url_results = self._url_dedup([[u] for u in primary_urls])
hashes: list[Optional[str]] = []
for url in primary_urls:
hashes.append(self._fetch_hash(url))
results = list(url_results) # start from URL-equality results
seen: dict[str, int] = {}
for i, h in enumerate(hashes):
if h is None:
continue
if h in seen:
results[i] = True
results[seen[h]] = True
else:
seen[h] = i
return results
def _fetch_hash(self, url: str) -> Optional[str]:
if not url:
return None
if url in _phash_cache:
return _phash_cache[url]
try:
resp = requests.get(url, timeout=5, stream=True)
resp.raise_for_status()
img = Image.open(io.BytesIO(resp.content))
result: Optional[str] = str(imagehash.phash(img))
except Exception:
result = None
_phash_cache[url] = result
return result
def _url_dedup(self, photo_urls_per_listing: list[list[str]]) -> list[bool]:
seen: set[str] = set()
results = []
for urls in photo_urls_per_listing:
primary = urls[0] if urls else ""
if primary and primary in seen:
results.append(True)
else:
if primary:
seen.add(primary)
results.append(False)
return results