Core trust scoring: - Five metadata signals (account age, feedback count/ratio, price vs market, category history), composited 0–100 - CV-based price signal suppression for heterogeneous search results (e.g. mixed laptop generations won't false-positive suspicious_price) - Expanded scratch/dent title detection: evasive redirects, functional problem phrases, DIY/repair indicators - Hard filters: new_account, established_bad_actor - Soft flags: low_feedback, suspicious_price, duplicate_photo, scratch_dent, long_on_market, significant_price_drop Search & filtering: - Browse API adapter (up to 200 items/page) + Playwright scraper fallback - OR-group query expansion for comprehensive variant coverage - Must-include (AND/ANY/groups), must-exclude, category, price range filters - Saved searches with full filter round-trip via URL params Seller enrichment: - Background BTF /itm/ scraping for account age (Kasada-safe headed Chromium) - On-demand enrichment: POST /api/enrich + ListingCard ↻ button - Category history derived from Browse API categories field (free, no extra calls) - Shopping API GetUserProfile inline enrichment for API adapter Market comps: - eBay Marketplace Insights API with Browse API fallback (catches 403 + 404) - Comps prioritised in ThreadPoolExecutor (submitted first) Infrastructure: - Staging DB fields: times_seen, first_seen_at, price_at_first_seen, category_name - Migrations 004 (staging tracking) + 005 (listing category) - eBay webhook handler stub - Cloud compose stack (compose.cloud.yml) - Vue frontend: search store, saved searches store, ListingCard, filter sidebar Docs: - README fully rewritten to reflect MVP status + full feature documentation - Roadmap table linked to all 13 Forgejo issues
77 lines
3.1 KiB
Python
77 lines
3.1 KiB
Python
"""Five metadata trust signals, each scored 0–20."""
|
||
from __future__ import annotations
|
||
import json
|
||
from typing import Optional
|
||
from app.db.models import Seller
|
||
|
||
ELECTRONICS_CATEGORIES = {"ELECTRONICS", "COMPUTERS_TABLETS", "VIDEO_GAMES", "CELL_PHONES"}
|
||
|
||
# Coefficient of variation (stddev/mean) above which the price distribution is
|
||
# considered too heterogeneous to trust the market median for scam detection.
|
||
# e.g. "Lenovo RTX intel" mixes $200 old ThinkPads with $2000 Legions → CV ~1.0+
|
||
_HETEROGENEOUS_CV_THRESHOLD = 0.6
|
||
|
||
|
||
class MetadataScorer:
|
||
def score(
|
||
self,
|
||
seller: Seller,
|
||
market_median: Optional[float],
|
||
listing_price: float,
|
||
price_cv: Optional[float] = None,
|
||
) -> dict[str, Optional[int]]:
|
||
return {
|
||
"account_age": self._account_age(seller.account_age_days) if seller.account_age_days is not None else None,
|
||
"feedback_count": self._feedback_count(seller.feedback_count),
|
||
"feedback_ratio": self._feedback_ratio(seller.feedback_ratio, seller.feedback_count),
|
||
"price_vs_market": self._price_vs_market(listing_price, market_median, price_cv),
|
||
"category_history": self._category_history(seller.category_history_json),
|
||
}
|
||
|
||
def _account_age(self, days: int) -> int:
|
||
if days < 7: return 0
|
||
if days < 30: return 5
|
||
if days < 90: return 10
|
||
if days < 365: return 15
|
||
return 20
|
||
|
||
def _feedback_count(self, count: int) -> int:
|
||
if count < 3: return 0
|
||
if count < 10: return 5
|
||
if count < 50: return 10
|
||
if count < 200: return 15
|
||
return 20
|
||
|
||
def _feedback_ratio(self, ratio: float, count: int) -> int:
|
||
if ratio < 0.80 and count > 20: return 0
|
||
if ratio < 0.90: return 5
|
||
if ratio < 0.95: return 10
|
||
if ratio < 0.98: return 15
|
||
return 20
|
||
|
||
def _price_vs_market(self, price: float, median: Optional[float], price_cv: Optional[float] = None) -> Optional[int]:
|
||
if median is None: return None # data unavailable → aggregator sets score_is_partial
|
||
if median <= 0: return None
|
||
if price_cv is not None and price_cv > _HETEROGENEOUS_CV_THRESHOLD:
|
||
return None # mixed model/generation search — median is unreliable
|
||
ratio = price / median
|
||
if ratio < 0.50: return 0 # >50% below = scam
|
||
if ratio < 0.70: return 5 # >30% below = suspicious
|
||
if ratio < 0.85: return 10
|
||
if ratio <= 1.20: return 20
|
||
return 15 # above market = still ok, just expensive
|
||
|
||
def _category_history(self, category_history_json: str) -> Optional[int]:
|
||
try:
|
||
history = json.loads(category_history_json)
|
||
except (ValueError, TypeError):
|
||
return None # unparseable → data unavailable
|
||
if not history:
|
||
return None # empty dict → no category data from this source
|
||
electronics_sales = sum(
|
||
v for k, v in history.items() if k in ELECTRONICS_CATEGORIES
|
||
)
|
||
if electronics_sales == 0: return 0
|
||
if electronics_sales < 5: return 8
|
||
if electronics_sales < 20: return 14
|
||
return 20
|