diff --git a/api/main.py b/api/main.py index c7f738b..83781f6 100644 --- a/api/main.py +++ b/api/main.py @@ -5,6 +5,7 @@ import dataclasses import hashlib import logging import os +from concurrent.futures import ThreadPoolExecutor from pathlib import Path from fastapi import FastAPI, HTTPException @@ -38,33 +39,44 @@ def health(): @app.get("/api/search") -def search(q: str = "", max_price: float = 0, min_price: float = 0): +def search(q: str = "", max_price: float = 0, min_price: float = 0, pages: int = 1): if not q.strip(): return {"listings": [], "trust_scores": {}, "sellers": {}, "market_price": None} - store = Store(_DB_PATH) - adapter = ScrapedEbayAdapter(store) - filters = SearchFilters( max_price=max_price if max_price > 0 else None, min_price=min_price if min_price > 0 else None, + pages=max(1, pages), ) + # Each adapter gets its own Store (SQLite connection) — required for thread safety. + # search() and get_completed_sales() run concurrently; they write to different tables + # so SQLite file-level locking is the only contention point. + search_adapter = ScrapedEbayAdapter(Store(_DB_PATH)) + comps_adapter = ScrapedEbayAdapter(Store(_DB_PATH)) + try: - listings = adapter.search(q, filters) - adapter.get_completed_sales(q) # warm market comp cache + with ThreadPoolExecutor(max_workers=2) as ex: + listings_future = ex.submit(search_adapter.search, q, filters) + comps_future = ex.submit(comps_adapter.get_completed_sales, q, pages) + listings = listings_future.result() + comps_future.result() # wait; side-effect is saving market comp to DB except Exception as e: log.warning("eBay scrape failed: %s", e) raise HTTPException(status_code=502, detail=f"eBay search failed: {e}") + # Use search_adapter's store for post-processing — it has the sellers already written + store = search_adapter._store store.save_listings(listings) scorer = TrustScorer(store) trust_scores_list = scorer.score_batch(listings, q) - # Market comp + # Market comp written by comps_adapter — read from a fresh connection to avoid + # cross-thread connection reuse + comp_store = Store(_DB_PATH) query_hash = hashlib.md5(q.encode()).hexdigest() - comp = store.get_market_comp("ebay", query_hash) + comp = comp_store.get_market_comp("ebay", query_hash) market_price = comp.median_price if comp else None # Serialize — keyed by platform_listing_id for easy Vue lookup diff --git a/app/platforms/__init__.py b/app/platforms/__init__.py index da6c94c..f9162ee 100644 --- a/app/platforms/__init__.py +++ b/app/platforms/__init__.py @@ -12,6 +12,7 @@ class SearchFilters: min_price: Optional[float] = None condition: Optional[list[str]] = field(default_factory=list) location_radius_km: Optional[int] = None + pages: int = 1 # number of result pages to fetch (48 listings/page) class PlatformAdapter(ABC): diff --git a/app/platforms/ebay/scraper.py b/app/platforms/ebay/scraper.py index 64ecd5d..50e33ed 100644 --- a/app/platforms/ebay/scraper.py +++ b/app/platforms/ebay/scraper.py @@ -14,12 +14,11 @@ import hashlib import itertools import re import time +from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime, timedelta, timezone from typing import Optional from bs4 import BeautifulSoup -from playwright.sync_api import sync_playwright -from playwright_stealth import Stealth from app.db.models import Listing, MarketComp, Seller from app.db.store import Store @@ -164,10 +163,15 @@ def scrape_listings(html: str) -> list[Listing]: buying_format = "auction" if time_remaining is not None else "fixed_price" ends_at = (datetime.now(timezone.utc) + time_remaining).isoformat() if time_remaining else None + # Strip eBay's screen-reader accessibility text injected into title links. + # get_text() is CSS-blind and picks up visually-hidden spans. + raw_title = title_el.get_text(separator=" ", strip=True) + title = re.sub(r"\s*Opens in a new window or tab\s*", "", raw_title, flags=re.IGNORECASE).strip() + results.append(Listing( platform="ebay", platform_listing_id=platform_listing_id, - title=title_el.get_text(strip=True), + title=title, price=price, currency="USD", condition=condition, @@ -256,6 +260,9 @@ class ScrapedEbayAdapter(PlatformAdapter): env["DISPLAY"] = display try: + from playwright.sync_api import sync_playwright # noqa: PLC0415 — lazy: only needed in Docker + from playwright_stealth import Stealth # noqa: PLC0415 + with sync_playwright() as pw: browser = pw.chromium.launch( headless=False, @@ -280,12 +287,12 @@ class ScrapedEbayAdapter(PlatformAdapter): return html def search(self, query: str, filters: SearchFilters) -> list[Listing]: - params: dict = {"_nkw": query, "_sop": "15", "_ipg": "48"} + base_params: dict = {"_nkw": query, "_sop": "15", "_ipg": "48"} if filters.max_price: - params["_udhi"] = str(filters.max_price) + base_params["_udhi"] = str(filters.max_price) if filters.min_price: - params["_udlo"] = str(filters.min_price) + base_params["_udlo"] = str(filters.min_price) if filters.condition: cond_map = { "new": "1000", "used": "3000", @@ -293,38 +300,62 @@ class ScrapedEbayAdapter(PlatformAdapter): } codes = [cond_map[c] for c in filters.condition if c in cond_map] if codes: - params["LH_ItemCondition"] = "|".join(codes) + base_params["LH_ItemCondition"] = "|".join(codes) - html = self._get(params) - listings = scrape_listings(html) + pages = max(1, filters.pages) + page_params = [{**base_params, "_pgn": str(p)} for p in range(1, pages + 1)] - # Cache seller objects extracted from the same page - self._store.save_sellers(list(scrape_sellers(html).values())) + with ThreadPoolExecutor(max_workers=min(pages, 3)) as ex: + htmls = list(ex.map(self._get, page_params)) + seen_ids: set[str] = set() + listings: list[Listing] = [] + sellers: dict[str, "Seller"] = {} + for html in htmls: + for listing in scrape_listings(html): + if listing.platform_listing_id not in seen_ids: + seen_ids.add(listing.platform_listing_id) + listings.append(listing) + sellers.update(scrape_sellers(html)) + + self._store.save_sellers(list(sellers.values())) return listings def get_seller(self, seller_platform_id: str) -> Optional[Seller]: # Sellers are pre-populated during search(); no extra fetch needed return self._store.get_seller("ebay", seller_platform_id) - def get_completed_sales(self, query: str) -> list[Listing]: + def get_completed_sales(self, query: str, pages: int = 1) -> list[Listing]: query_hash = hashlib.md5(query.encode()).hexdigest() if self._store.get_market_comp("ebay", query_hash): return [] # cache hit — comp already stored - params = { + base_params = { "_nkw": query, "LH_Sold": "1", "LH_Complete": "1", - "_sop": "13", # price + shipping: lowest first + "_sop": "13", # sort by price+shipping, lowest first "_ipg": "48", } + pages = max(1, pages) + page_params = [{**base_params, "_pgn": str(p)} for p in range(1, pages + 1)] + try: - html = self._get(params) - listings = scrape_listings(html) - prices = sorted(l.price for l in listings if l.price > 0) + with ThreadPoolExecutor(max_workers=min(pages, 3)) as ex: + htmls = list(ex.map(self._get, page_params)) + + seen_ids: set[str] = set() + all_listings: list[Listing] = [] + for html in htmls: + for listing in scrape_listings(html): + if listing.platform_listing_id not in seen_ids: + seen_ids.add(listing.platform_listing_id) + all_listings.append(listing) + + prices = sorted(l.price for l in all_listings if l.price > 0) if prices: - median = prices[len(prices) // 2] + mid = len(prices) // 2 + median = (prices[mid - 1] + prices[mid]) / 2 if len(prices) % 2 == 0 else prices[mid] self._store.save_market_comp(MarketComp( platform="ebay", query_hash=query_hash, @@ -332,6 +363,6 @@ class ScrapedEbayAdapter(PlatformAdapter): sample_count=len(prices), expires_at=(datetime.now(timezone.utc) + timedelta(hours=6)).isoformat(), )) - return listings + return all_listings except Exception: return [] diff --git a/app/trust/aggregator.py b/app/trust/aggregator.py index 83d7d5d..27f51bc 100644 --- a/app/trust/aggregator.py +++ b/app/trust/aggregator.py @@ -37,7 +37,7 @@ class Aggregator: red_flags.append("account_under_30_days") if seller and seller.feedback_count < 10: red_flags.append("low_feedback_count") - if clean["price_vs_market"] == 0: + if signal_scores.get("price_vs_market") == 0: # only flag when data exists and price is genuinely <50% of market red_flags.append("suspicious_price") if photo_hash_duplicate: red_flags.append("duplicate_photo") diff --git a/tests/db/test_store.py b/tests/db/test_store.py index d6ca099..26e60ac 100644 --- a/tests/db/test_store.py +++ b/tests/db/test_store.py @@ -1,4 +1,5 @@ import pytest +from datetime import datetime, timedelta, timezone from pathlib import Path from app.db.store import Store from app.db.models import Listing, Seller, TrustScore, MarketComp @@ -57,7 +58,7 @@ def test_save_and_get_market_comp(store): query_hash="abc123", median_price=1050.0, sample_count=12, - expires_at="2026-03-26T00:00:00", + expires_at=(datetime.now(timezone.utc) + timedelta(hours=6)).isoformat(), ) store.save_market_comp(comp) result = store.get_market_comp("ebay", "abc123") diff --git a/tests/platforms/test_ebay_scraper.py b/tests/platforms/test_ebay_scraper.py index 8e9a6e5..a4c8519 100644 --- a/tests/platforms/test_ebay_scraper.py +++ b/tests/platforms/test_ebay_scraper.py @@ -27,8 +27,9 @@ _EBAY_HTML = """ +
  • -
    RTX 4090 Founders Edition GPU
    +
    RTX 4090 Founders Edition GPUOpens in a new window or tab
    $950.00
    Used · Free shipping
    @@ -179,6 +180,15 @@ class TestScrapeListings: titles = [l.title for l in listings] assert "Shop on eBay" not in titles + def test_strips_ebay_accessibility_text_from_title(self): + """eBay injects a hidden 'Opens in a new window or tab' span into title links + for screen readers. get_text() is CSS-blind so we must strip it explicitly.""" + listings = scrape_listings(_EBAY_HTML) + for listing in listings: + assert "Opens in a new window or tab" not in listing.title + # Verify the actual title content is preserved + assert listings[0].title == "RTX 4090 Founders Edition GPU" + def test_parses_three_real_listings(self): assert len(scrape_listings(_EBAY_HTML)) == 3 diff --git a/tests/trust/test_aggregator.py b/tests/trust/test_aggregator.py index 4b52b28..613fec8 100644 --- a/tests/trust/test_aggregator.py +++ b/tests/trust/test_aggregator.py @@ -50,3 +50,46 @@ def test_partial_score_flagged_when_signals_missing(): } result = agg.aggregate(scores, photo_hash_duplicate=False, seller=None) assert result.score_is_partial is True + + +def test_suspicious_price_not_flagged_when_market_data_absent(): + """None price_vs_market (no market comp) must NOT trigger suspicious_price. + + Regression guard: clean[] replaces None with 0, so naive `clean[...] == 0` + would fire even when the signal is simply unavailable. + """ + agg = Aggregator() + scores = { + "account_age": 15, "feedback_count": 15, + "feedback_ratio": 20, "price_vs_market": None, # no market data + "category_history": 0, + } + result = agg.aggregate(scores, photo_hash_duplicate=False, seller=None) + assert "suspicious_price" not in result.red_flags_json + + +def test_suspicious_price_flagged_when_price_genuinely_low(): + """price_vs_market == 0 (explicitly, meaning >50% below median) → flag fires.""" + agg = Aggregator() + scores = { + "account_age": 15, "feedback_count": 15, + "feedback_ratio": 20, "price_vs_market": 0, # price is scam-level low + "category_history": 0, + } + result = agg.aggregate(scores, photo_hash_duplicate=False, seller=None) + assert "suspicious_price" in result.red_flags_json + + +def test_new_account_not_flagged_when_age_absent(): + """account_age_days=None (scraper tier) must NOT trigger new_account or account_under_30_days.""" + agg = Aggregator() + scores = {k: 10 for k in ["account_age", "feedback_count", + "feedback_ratio", "price_vs_market", "category_history"]} + scraper_seller = Seller( + platform="ebay", platform_seller_id="u", username="u", + account_age_days=None, # not fetched at scraper tier + feedback_count=50, feedback_ratio=0.99, category_history_json="{}", + ) + result = agg.aggregate(scores, photo_hash_duplicate=False, seller=scraper_seller) + assert "new_account" not in result.red_flags_json + assert "account_under_30_days" not in result.red_flags_json diff --git a/web/src/stores/search.ts b/web/src/stores/search.ts index 9e44ba2..3ab385a 100644 --- a/web/src/stores/search.ts +++ b/web/src/stores/search.ts @@ -60,6 +60,7 @@ export interface SearchFilters { hideNewAccounts?: boolean hideSuspiciousPrice?: boolean hideDuplicatePhotos?: boolean + pages?: number // number of eBay result pages to fetch (48 listings/page, default 1) } // ── Store ──────────────────────────────────────────────────────────────────── @@ -83,7 +84,8 @@ export const useSearchStore = defineStore('search', () => { // API does not exist yet — stub returns empty results const params = new URLSearchParams({ q }) if (filters.maxPrice != null) params.set('max_price', String(filters.maxPrice)) - if (filters.minTrustScore != null) params.set('min_trust', String(filters.minTrustScore)) + if (filters.minPrice != null) params.set('min_price', String(filters.minPrice)) + if (filters.pages != null && filters.pages > 1) params.set('pages', String(filters.pages)) const res = await fetch(`/api/search?${params}`) if (!res.ok) throw new Error(`Search failed: ${res.status} ${res.statusText}`) diff --git a/web/src/views/SearchView.vue b/web/src/views/SearchView.vue index b0da997..1910284 100644 --- a/web/src/views/SearchView.vue +++ b/web/src/views/SearchView.vue @@ -41,6 +41,21 @@ {{ filters.minTrustScore ?? 0 }} +
    + Pages to fetch +
    + +
    +

    {{ (filters.pages ?? 1) * 48 }} listings · {{ (filters.pages ?? 1) * 2 }} Playwright calls

    +
    +
    Price
    @@ -166,6 +181,7 @@ const filters = reactive({ hideNewAccounts: false, hideSuspiciousPrice: false, hideDuplicatePhotos: false, + pages: 1, }) const CONDITIONS = [ @@ -407,6 +423,43 @@ async function onSearch() { height: 14px; } +.filter-pages { + display: flex; + gap: var(--space-1); +} + +.filter-pages-btn { + flex: 1; + padding: var(--space-1) 0; + background: var(--color-surface-raised); + border: 1px solid var(--color-border); + border-radius: var(--radius-sm); + color: var(--color-text-muted); + font-family: var(--font-body); + font-size: 0.8125rem; + font-weight: 600; + cursor: pointer; + transition: background 120ms ease, color 120ms ease, border-color 120ms ease; +} + +.filter-pages-btn:hover:not(.filter-pages-btn--active) { + border-color: var(--app-primary); + color: var(--app-primary); +} + +.filter-pages-btn--active { + background: var(--app-primary); + border-color: var(--app-primary); + color: var(--color-text-inverse); +} + +.filter-pages-hint { + font-size: 0.6875rem; + color: var(--color-text-muted); + margin: 0; + opacity: 0.75; +} + /* Results area */ .results-area { flex: 1;