snipe/app/trust/metadata.py
pyr0ball bccedb1fe5 fix(trust): treat feedback_ratio=0.0 as missing data for buyer-only/returning sellers (#52)
eBay omits the 12-month positive percentage for returning sellers and
buyer-only accounts with no recent sales. Previously ratio=0.0 with
count>0 triggered established_bad_actor; now it returns None from the
scorer (score_is_partial=True) and emits a soft no_recent_seller_data
flag instead. ratio=0.0 with count=0 is still treated as no-history.
2026-05-04 09:24:27 -07:00

85 lines
3.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Five metadata trust signals, each scored 020."""
from __future__ import annotations
import json
from typing import Optional
from app.db.models import Seller
ELECTRONICS_CATEGORIES = {"ELECTRONICS", "COMPUTERS_TABLETS", "VIDEO_GAMES", "CELL_PHONES"}
# Coefficient of variation (stddev/mean) above which the price distribution is
# considered too heterogeneous to trust the market median for scam detection.
# e.g. "Lenovo RTX intel" mixes $200 old ThinkPads with $2000 Legions → CV ~1.0+
_HETEROGENEOUS_CV_THRESHOLD = 0.6
class MetadataScorer:
def score(
self,
seller: Seller,
market_median: Optional[float],
listing_price: float,
price_cv: Optional[float] = None,
) -> dict[str, Optional[int]]:
return {
"account_age": self._account_age(seller.account_age_days) if seller.account_age_days is not None else None,
"feedback_count": self._feedback_count(seller.feedback_count),
"feedback_ratio": self._feedback_ratio(seller.feedback_ratio, seller.feedback_count),
"price_vs_market": self._price_vs_market(listing_price, market_median, price_cv),
"category_history": self._category_history(seller.category_history_json),
}
def _account_age(self, days: int) -> int:
if days < 7: return 0
if days < 30: return 5
if days < 90: return 10
if days < 365: return 15
return 20
def _feedback_count(self, count: int) -> int:
if count < 3: return 0
if count < 10: return 5
if count < 50: return 10
if count < 200: return 15
return 20
def _feedback_ratio(self, ratio: float, count: int) -> Optional[int]:
# ratio=0.0 with count>0 means the 12-month percentage wasn't on the page —
# eBay omits the ratio for returning/buyer-only sellers with no recent sales.
# Treat as missing rather than "literally 0% positive" (which eBay doesn't allow
# on active accounts — those get suspended long before reaching 0%).
if ratio == 0.0 and count > 0:
return None
if ratio < 0.80 and count > 20: return 0
if ratio < 0.90: return 5
if ratio < 0.95: return 10
if ratio < 0.98: return 15
return 20
def _price_vs_market(self, price: float, median: Optional[float], price_cv: Optional[float] = None) -> Optional[int]:
if median is None: return None # data unavailable → aggregator sets score_is_partial
if median <= 0: return None
if price_cv is not None and price_cv > _HETEROGENEOUS_CV_THRESHOLD:
return None # mixed model/generation search — median is unreliable
ratio = price / median
if ratio < 0.50: return 0 # >50% below = scam
if ratio < 0.70: return 5 # >30% below = suspicious
if ratio < 0.85: return 10
if ratio <= 1.20: return 20
return 15 # above market = still ok, just expensive
def _category_history(self, category_history_json: str) -> Optional[int]:
try:
history = json.loads(category_history_json)
except (ValueError, TypeError):
return None # unparseable → data unavailable
if not history:
return None # empty dict → no category data from this source
electronics_sales = sum(
v for k, v in history.items() if k in ELECTRONICS_CATEGORIES
)
if electronics_sales == 0: return 0
if electronics_sales < 5: return 8
if electronics_sales < 20: return 14
return 20