- Parallel execution: search() and get_completed_sales() now run
concurrently via ThreadPoolExecutor — each gets its own Store/SQLite
connection for thread safety. First cold search time ~halved.
- Pagination: SearchFilters.pages (default 1) controls how many eBay
result pages are fetched. Both search and sold-comps support up to 3
parallel Playwright sessions per call (capped to avoid Xvfb overload).
UI: segmented 1/2/3/5 pages selector in filter sidebar with cost hint.
- True median: get_completed_sales() now averages the two middle values
for even-length price lists instead of always taking the lower bound.
- Fix suspicious_price false positive: aggregator now checks
signal_scores.get("price_vs_market") == 0 (pre-None-substitution)
so listings without market data are never flagged as suspicious.
- Fix title pollution: scraper strips eBay's hidden screen-reader span
("Opens in a new window or tab") from listing titles via regex.
Lazy-imports playwright/playwright_stealth inside _get() so pure
parsing functions are importable without the full browser stack.
- Tests: 48 pass on host (scraper tests now runnable without Docker),
new regression guards for all three bug fixes.
56 lines
2.1 KiB
Python
56 lines
2.1 KiB
Python
"""Composite score and red flag extraction."""
|
|
from __future__ import annotations
|
|
import json
|
|
from typing import Optional
|
|
from app.db.models import Seller, TrustScore
|
|
|
|
HARD_FILTER_AGE_DAYS = 7
|
|
HARD_FILTER_BAD_RATIO_MIN_COUNT = 20
|
|
HARD_FILTER_BAD_RATIO_THRESHOLD = 0.80
|
|
|
|
|
|
class Aggregator:
|
|
def aggregate(
|
|
self,
|
|
signal_scores: dict[str, Optional[int]],
|
|
photo_hash_duplicate: bool,
|
|
seller: Optional[Seller],
|
|
listing_id: int = 0,
|
|
) -> TrustScore:
|
|
is_partial = any(v is None for v in signal_scores.values())
|
|
clean = {k: (v if v is not None else 0) for k, v in signal_scores.items()}
|
|
composite = sum(clean.values())
|
|
|
|
red_flags: list[str] = []
|
|
|
|
# Hard filters
|
|
if seller and seller.account_age_days is not None and seller.account_age_days < HARD_FILTER_AGE_DAYS:
|
|
red_flags.append("new_account")
|
|
if seller and (
|
|
seller.feedback_ratio < HARD_FILTER_BAD_RATIO_THRESHOLD
|
|
and seller.feedback_count > HARD_FILTER_BAD_RATIO_MIN_COUNT
|
|
):
|
|
red_flags.append("established_bad_actor")
|
|
|
|
# Soft flags
|
|
if seller and seller.account_age_days is not None and seller.account_age_days < 30:
|
|
red_flags.append("account_under_30_days")
|
|
if seller and seller.feedback_count < 10:
|
|
red_flags.append("low_feedback_count")
|
|
if signal_scores.get("price_vs_market") == 0: # only flag when data exists and price is genuinely <50% of market
|
|
red_flags.append("suspicious_price")
|
|
if photo_hash_duplicate:
|
|
red_flags.append("duplicate_photo")
|
|
|
|
return TrustScore(
|
|
listing_id=listing_id,
|
|
composite_score=composite,
|
|
account_age_score=clean["account_age"],
|
|
feedback_count_score=clean["feedback_count"],
|
|
feedback_ratio_score=clean["feedback_ratio"],
|
|
price_vs_market_score=clean["price_vs_market"],
|
|
category_history_score=clean["category_history"],
|
|
photo_hash_duplicate=photo_hash_duplicate,
|
|
red_flags_json=json.dumps(red_flags),
|
|
score_is_partial=is_partial,
|
|
)
|