Core trust scoring: - Five metadata signals (account age, feedback count/ratio, price vs market, category history), composited 0–100 - CV-based price signal suppression for heterogeneous search results (e.g. mixed laptop generations won't false-positive suspicious_price) - Expanded scratch/dent title detection: evasive redirects, functional problem phrases, DIY/repair indicators - Hard filters: new_account, established_bad_actor - Soft flags: low_feedback, suspicious_price, duplicate_photo, scratch_dent, long_on_market, significant_price_drop Search & filtering: - Browse API adapter (up to 200 items/page) + Playwright scraper fallback - OR-group query expansion for comprehensive variant coverage - Must-include (AND/ANY/groups), must-exclude, category, price range filters - Saved searches with full filter round-trip via URL params Seller enrichment: - Background BTF /itm/ scraping for account age (Kasada-safe headed Chromium) - On-demand enrichment: POST /api/enrich + ListingCard ↻ button - Category history derived from Browse API categories field (free, no extra calls) - Shopping API GetUserProfile inline enrichment for API adapter Market comps: - eBay Marketplace Insights API with Browse API fallback (catches 403 + 404) - Comps prioritised in ThreadPoolExecutor (submitted first) Infrastructure: - Staging DB fields: times_seen, first_seen_at, price_at_first_seen, category_name - Migrations 004 (staging tracking) + 005 (listing category) - eBay webhook handler stub - Cloud compose stack (compose.cloud.yml) - Vue frontend: search store, saved searches store, ListingCard, filter sidebar Docs: - README fully rewritten to reflect MVP status + full feature documentation - Roadmap table linked to all 13 Forgejo issues
132 lines
5.4 KiB
Python
132 lines
5.4 KiB
Python
"""Composite score and red flag extraction."""
|
|
from __future__ import annotations
|
|
import json
|
|
from datetime import datetime, timezone
|
|
from typing import Optional
|
|
from app.db.models import Seller, TrustScore
|
|
|
|
HARD_FILTER_AGE_DAYS = 7
|
|
HARD_FILTER_BAD_RATIO_MIN_COUNT = 20
|
|
HARD_FILTER_BAD_RATIO_THRESHOLD = 0.80
|
|
|
|
# Title keywords that suggest cosmetic damage or wear (free-tier title scan).
|
|
# Description-body scan (paid BSL feature) runs via BTF enrichment — not implemented yet.
|
|
_SCRATCH_DENT_KEYWORDS = frozenset([
|
|
# Explicit cosmetic damage
|
|
"scratch", "scratched", "scratches", "scuff", "scuffed",
|
|
"dent", "dented", "ding", "dinged",
|
|
"crack", "cracked", "chip", "chipped",
|
|
"damage", "damaged", "cosmetic damage",
|
|
"blemish", "wear", "worn", "worn in",
|
|
# Parts / condition catch-alls
|
|
"as is", "for parts", "parts only", "spares or repair", "parts or repair",
|
|
# Evasive redirects — seller hiding damage detail in listing body
|
|
"see description", "read description", "read listing", "see listing",
|
|
"see photos for", "see pics for", "see images for",
|
|
# Functional problem phrases (phrases > single words to avoid false positives)
|
|
"issue with", "issues with", "problem with", "problems with",
|
|
"not working", "stopped working", "doesn't work", "does not work",
|
|
"no power", "dead on arrival", "powers on but", "turns on but", "boots but",
|
|
"faulty", "broken screen", "broken hinge", "broken port",
|
|
# DIY / project / repair listings
|
|
"needs repair", "needs work", "needs tlc",
|
|
"project unit", "project item", "project laptop", "project phone",
|
|
"for repair", "sold as is",
|
|
])
|
|
|
|
|
|
def _has_damage_keywords(title: str) -> bool:
|
|
lower = title.lower()
|
|
return any(kw in lower for kw in _SCRATCH_DENT_KEYWORDS)
|
|
|
|
|
|
_LONG_ON_MARKET_MIN_SIGHTINGS = 5
|
|
_LONG_ON_MARKET_MIN_DAYS = 14
|
|
_PRICE_DROP_THRESHOLD = 0.20 # 20% below first-seen price
|
|
|
|
|
|
def _days_since(iso: Optional[str]) -> Optional[int]:
|
|
if not iso:
|
|
return None
|
|
try:
|
|
dt = datetime.fromisoformat(iso.replace("Z", "+00:00"))
|
|
# Normalize to naive UTC so both paths (timezone-aware ISO and SQLite
|
|
# CURRENT_TIMESTAMP naive strings) compare correctly.
|
|
if dt.tzinfo is not None:
|
|
dt = dt.replace(tzinfo=None)
|
|
return (datetime.utcnow() - dt).days
|
|
except ValueError:
|
|
return None
|
|
|
|
|
|
class Aggregator:
|
|
def aggregate(
|
|
self,
|
|
signal_scores: dict[str, Optional[int]],
|
|
photo_hash_duplicate: bool,
|
|
seller: Optional[Seller],
|
|
listing_id: int = 0,
|
|
listing_title: str = "",
|
|
times_seen: int = 1,
|
|
first_seen_at: Optional[str] = None,
|
|
price: float = 0.0,
|
|
price_at_first_seen: Optional[float] = None,
|
|
) -> TrustScore:
|
|
is_partial = any(v is None for v in signal_scores.values())
|
|
clean = {k: (v if v is not None else 0) for k, v in signal_scores.items()}
|
|
|
|
# Score only against signals that returned real data — treating "no data"
|
|
# as 0 conflates "bad signal" with "missing signal" and drags scores down
|
|
# unfairly when the API doesn't expose a field (e.g. registrationDate).
|
|
available = [v for v in signal_scores.values() if v is not None]
|
|
available_max = len(available) * 20
|
|
if available_max > 0:
|
|
composite = round((sum(available) / available_max) * 100)
|
|
else:
|
|
composite = 0
|
|
|
|
red_flags: list[str] = []
|
|
|
|
# Hard filters
|
|
if seller and seller.account_age_days is not None and seller.account_age_days < HARD_FILTER_AGE_DAYS:
|
|
red_flags.append("new_account")
|
|
if seller and (
|
|
seller.feedback_ratio < HARD_FILTER_BAD_RATIO_THRESHOLD
|
|
and seller.feedback_count > HARD_FILTER_BAD_RATIO_MIN_COUNT
|
|
):
|
|
red_flags.append("established_bad_actor")
|
|
|
|
# Soft flags
|
|
if seller and seller.account_age_days is not None and seller.account_age_days < 30:
|
|
red_flags.append("account_under_30_days")
|
|
if seller and seller.feedback_count < 10:
|
|
red_flags.append("low_feedback_count")
|
|
if signal_scores.get("price_vs_market") == 0: # only flag when data exists and price is genuinely <50% of market
|
|
red_flags.append("suspicious_price")
|
|
if photo_hash_duplicate:
|
|
red_flags.append("duplicate_photo")
|
|
if listing_title and _has_damage_keywords(listing_title):
|
|
red_flags.append("scratch_dent_mentioned")
|
|
|
|
# Staging DB signals
|
|
days_in_index = _days_since(first_seen_at)
|
|
if (times_seen >= _LONG_ON_MARKET_MIN_SIGHTINGS
|
|
and days_in_index is not None
|
|
and days_in_index >= _LONG_ON_MARKET_MIN_DAYS):
|
|
red_flags.append("long_on_market")
|
|
if (price_at_first_seen and price_at_first_seen > 0
|
|
and price < price_at_first_seen * (1 - _PRICE_DROP_THRESHOLD)):
|
|
red_flags.append("significant_price_drop")
|
|
|
|
return TrustScore(
|
|
listing_id=listing_id,
|
|
composite_score=composite,
|
|
account_age_score=clean["account_age"],
|
|
feedback_count_score=clean["feedback_count"],
|
|
feedback_ratio_score=clean["feedback_ratio"],
|
|
price_vs_market_score=clean["price_vs_market"],
|
|
category_history_score=clean["category_history"],
|
|
photo_hash_duplicate=photo_hash_duplicate,
|
|
red_flags_json=json.dumps(red_flags),
|
|
score_is_partial=is_partial,
|
|
)
|