snipe/app/trust/aggregator.py
pyr0ball 05f845962f
Some checks failed
CI / Python tests (push) Has been cancelled
CI / Frontend typecheck + tests (push) Has been cancelled
Mirror / mirror (push) Has been cancelled
Release / release (push) Has been cancelled
fix(trust): soften established_bad_actor for high-volume sellers; add declining_ratio flag
Fixes a false-positive edge case (snipe#52) where sellers with 500+
lifetime feedback were hard-flagged as established_bad_actor when the
12-month ratio dipped below 80% — even though the 12-month window may
cover only a small recent sample relative to lifetime history.

Changes:
- established_bad_actor hard filter now only fires for accounts with
  20–500 lifetime feedback (unchanged behavior for moderate accounts)
- Accounts >500 feedback with ratio 60–80%: new declining_ratio soft flag
  (composite score penalised but not zeroed, no hard block)
- Accounts >500 feedback with ratio <60%: still established_bad_actor
  (catastrophically bad even for high-volume sellers)
- Two new constants: HARD_FILTER_BAD_RATIO_MAX_COUNT=500,
  HARD_FILTER_BAD_RATIO_HIGH_THRESHOLD=0.60

Note: buyer-feedback-only accounts (lifetime buyer history inflating
feedback_count for new sellers) requires profile-page scraping to detect
properly — tracked in snipe#52 as medium-term work.

Tests: 22 passed
2026-04-27 12:54:51 -07:00

186 lines
8.5 KiB
Python

"""Composite score and red flag extraction."""
from __future__ import annotations
import json
from datetime import datetime, timezone
from typing import Optional
from app.db.models import Seller, TrustScore
HARD_FILTER_AGE_DAYS = 7
HARD_FILTER_BAD_RATIO_MIN_COUNT = 20
HARD_FILTER_BAD_RATIO_THRESHOLD = 0.80
# Above this lifetime count the 12-month ratio may cover only a tiny recent sample,
# making a hard bad-actor flag disproportionate. Instead we emit the softer
# "declining_ratio" flag and let the composite score carry the penalty.
# Note: buyer-feedback-only accounts (e.g. longtime buyers who start selling) are a
# related edge case that requires profile-page scraping to detect properly — tracked
# in snipe#52 as a medium-term fix.
HARD_FILTER_BAD_RATIO_MAX_COUNT = 500
HARD_FILTER_BAD_RATIO_HIGH_THRESHOLD = 0.60 # catastrophically bad even for high-volume
# Sellers above this feedback count are treated as established retailers.
# Stock photo reuse (duplicate_photo) is suppressed for them — large retailers
# legitimately share manufacturer images across many listings.
_ESTABLISHED_RETAILER_FEEDBACK_THRESHOLD = 1000
# Title keywords that suggest cosmetic damage or wear (free-tier title scan).
# Description-body scan (paid BSL feature) runs via BTF enrichment — not implemented yet.
_SCRATCH_DENT_KEYWORDS = frozenset([
# Explicit cosmetic damage
"scratch", "scratched", "scratches", "scuff", "scuffed",
"dent", "dented", "ding", "dinged",
"crack", "cracked", "chip", "chipped",
"damage", "damaged", "cosmetic damage",
"blemish", "wear", "worn", "worn in",
# Parts / condition catch-alls (also matches eBay condition field strings verbatim)
"as is", "for parts", "parts only", "spares or repair", "parts or repair",
"parts/repair", "parts or not working", "not working",
# Evasive redirects — seller hiding damage detail in listing body
"see description", "read description", "read listing", "see listing",
"see photos for", "see pics for", "see images for",
# Functional problem phrases (phrases > single words to avoid false positives)
"issue with", "issues with", "problem with", "problems with",
"not working", "stopped working", "doesn't work", "does not work",
"no power", "dead on arrival", "powers on but", "turns on but", "boots but",
"faulty", "broken screen", "broken hinge", "broken port",
# DIY / project / repair listings
"needs repair", "needs work", "needs tlc",
"project unit", "project item", "project laptop", "project phone",
"for repair", "sold as is",
])
def _has_damage_keywords(title: str) -> bool:
lower = title.lower()
return any(kw in lower for kw in _SCRATCH_DENT_KEYWORDS)
_LONG_ON_MARKET_MIN_SIGHTINGS = 5
_LONG_ON_MARKET_MIN_DAYS = 14
_PRICE_DROP_THRESHOLD = 0.20 # 20% below first-seen price
def _days_since(iso: Optional[str]) -> Optional[int]:
if not iso:
return None
try:
dt = datetime.fromisoformat(iso.replace("Z", "+00:00"))
# Normalize to naive UTC so both paths (timezone-aware ISO and SQLite
# CURRENT_TIMESTAMP naive strings) compare correctly.
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return (datetime.now(timezone.utc) - dt).days
except ValueError:
return None
class Aggregator:
def aggregate(
self,
signal_scores: dict[str, Optional[int]],
photo_hash_duplicate: bool,
seller: Optional[Seller],
listing_id: int = 0,
listing_title: str = "",
listing_condition: str = "",
times_seen: int = 1,
first_seen_at: Optional[str] = None,
price: float = 0.0,
price_at_first_seen: Optional[float] = None,
is_blocklisted: bool = False,
) -> TrustScore:
is_partial = any(v is None for v in signal_scores.values())
clean = {k: (v if v is not None else 0) for k, v in signal_scores.items()}
# Score only against signals that returned real data — treating "no data"
# as 0 conflates "bad signal" with "missing signal" and drags scores down
# unfairly when the API doesn't expose a field (e.g. registrationDate).
available = [v for v in signal_scores.values() if v is not None]
available_max = len(available) * 20
if available_max > 0:
composite = round((sum(available) / available_max) * 100)
else:
composite = 0
red_flags: list[str] = []
# Blocklist: force established_bad_actor and zero the score regardless of other signals.
if is_blocklisted:
red_flags.append("established_bad_actor")
composite = 0
return TrustScore(
listing_id=listing_id,
composite_score=composite,
account_age_score=clean["account_age"],
feedback_count_score=clean["feedback_count"],
feedback_ratio_score=clean["feedback_ratio"],
price_vs_market_score=clean["price_vs_market"],
category_history_score=clean["category_history"],
photo_hash_duplicate=photo_hash_duplicate,
red_flags_json=json.dumps(red_flags),
score_is_partial=is_partial,
)
# Hard filters
if seller and seller.account_age_days is not None and seller.account_age_days < HARD_FILTER_AGE_DAYS:
red_flags.append("new_account")
if seller and seller.feedback_ratio < HARD_FILTER_BAD_RATIO_THRESHOLD:
if HARD_FILTER_BAD_RATIO_MIN_COUNT < seller.feedback_count <= HARD_FILTER_BAD_RATIO_MAX_COUNT:
# Moderate-volume account with consistently bad ratio → hard flag.
red_flags.append("established_bad_actor")
elif seller.feedback_count > HARD_FILTER_BAD_RATIO_MAX_COUNT:
if seller.feedback_ratio < HARD_FILTER_BAD_RATIO_HIGH_THRESHOLD:
# High-volume seller with catastrophic ratio → still hard flag.
red_flags.append("established_bad_actor")
else:
# High-volume seller with declining but not catastrophic ratio.
# 12-month window may cover only a small recent sample — soft flag only.
red_flags.append("declining_ratio")
if seller and seller.feedback_count == 0:
red_flags.append("zero_feedback")
# Zero feedback is a deliberate signal, not missing data — cap composite score
# so a 0-feedback seller can never appear trustworthy on other signals alone.
composite = min(composite, 35)
# Soft flags
if seller and seller.account_age_days is not None and seller.account_age_days < 30:
red_flags.append("account_under_30_days")
if seller and seller.feedback_count < 10:
red_flags.append("low_feedback_count")
if signal_scores.get("price_vs_market") == 0: # only flag when data exists and price is genuinely <50% of market
red_flags.append("suspicious_price")
is_established_retailer = (
seller is not None
and seller.feedback_count >= _ESTABLISHED_RETAILER_FEEDBACK_THRESHOLD
)
if photo_hash_duplicate and not is_established_retailer:
red_flags.append("duplicate_photo")
if (listing_title and _has_damage_keywords(listing_title)) or (
listing_condition and _has_damage_keywords(listing_condition)
):
red_flags.append("scratch_dent_mentioned")
# Staging DB signals
days_in_index = _days_since(first_seen_at)
if (times_seen >= _LONG_ON_MARKET_MIN_SIGHTINGS
and days_in_index is not None
and days_in_index >= _LONG_ON_MARKET_MIN_DAYS):
red_flags.append("long_on_market")
if (price_at_first_seen and price_at_first_seen > 0
and price < price_at_first_seen * (1 - _PRICE_DROP_THRESHOLD)):
red_flags.append("significant_price_drop")
return TrustScore(
listing_id=listing_id,
composite_score=composite,
account_age_score=clean["account_age"],
feedback_count_score=clean["feedback_count"],
feedback_ratio_score=clean["feedback_ratio"],
price_vs_market_score=clean["price_vs_market"],
category_history_score=clean["category_history"],
photo_hash_duplicate=photo_hash_duplicate,
red_flags_json=json.dumps(red_flags),
score_is_partial=is_partial,
)