- FastAPI service (port 8510) wrapping scraper + trust scorer - Playwright+Xvfb+stealth transport to bypass eBay Kasada bot protection - li.s-card selector migration (eBay markup change from li.s-item) - Three-layer caching: HTML (5min), phash (permanent), market comp (6h SQLite) - Batch DB writes (executemany + single commit) — warm requests <1s - Unique Xvfb display counter (:200–:299) prevents lock file collisions - Vue 3 nginx web service (port 8509) proxying /api/ to FastAPI - Auction card de-emphasis: opacity 0.72 for listings with >1h remaining - 35 scraper unit tests updated for new li.s-card fixture markup - tests/ volume-mounted in compose.override.yml for live test editing
282 lines
11 KiB
Python
282 lines
11 KiB
Python
"""Tests for the scraper-based eBay adapter.
|
|
|
|
Uses a minimal HTML fixture mirroring eBay's current s-card markup.
|
|
No HTTP requests are made — all tests operate on the pure parsing functions.
|
|
"""
|
|
import pytest
|
|
from datetime import timedelta
|
|
from app.platforms.ebay.scraper import (
|
|
scrape_listings,
|
|
scrape_sellers,
|
|
_parse_price,
|
|
_parse_time_left,
|
|
_extract_seller_from_card,
|
|
)
|
|
from bs4 import BeautifulSoup
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Minimal eBay search results HTML fixture (li.s-card schema)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_EBAY_HTML = """
|
|
<html><body>
|
|
<ul class="srp-results">
|
|
<!-- Promo item: no data-listingid — must be skipped -->
|
|
<li class="s-card">
|
|
<div class="s-card__title">Shop on eBay</div>
|
|
</li>
|
|
|
|
<!-- Real listing 1: established seller, used, fixed price -->
|
|
<li class="s-card" data-listingid="123456789">
|
|
<div class="s-card__title">RTX 4090 Founders Edition GPU</div>
|
|
<a class="s-card__link" href="https://www.ebay.com/itm/123456789?somequery=1"></a>
|
|
<span class="s-card__price">$950.00</span>
|
|
<div class="s-card__subtitle">Used · Free shipping</div>
|
|
<img class="s-card__image" src="https://i.ebayimg.com/thumbs/1.jpg"/>
|
|
<span class="su-styled-text">techguy</span>
|
|
<span class="su-styled-text">99.1% positive (1,234)</span>
|
|
</li>
|
|
|
|
<!-- Real listing 2: price range, new, data-src photo -->
|
|
<li class="s-card" data-listingid="987654321">
|
|
<div class="s-card__title">RTX 4090 Gaming OC 24GB</div>
|
|
<a class="s-card__link" href="https://www.ebay.com/itm/987654321"></a>
|
|
<span class="s-card__price">$1,100.00 to $1,200.00</span>
|
|
<div class="s-card__subtitle">New · Free shipping</div>
|
|
<img class="s-card__image" data-src="https://i.ebayimg.com/thumbs/2.jpg" src=""/>
|
|
<span class="su-styled-text">gpu_warehouse</span>
|
|
<span class="su-styled-text">98.7% positive (450)</span>
|
|
</li>
|
|
|
|
<!-- Real listing 3: new account, suspicious price -->
|
|
<li class="s-card" data-listingid="555000111">
|
|
<div class="s-card__title">RTX 4090 BNIB Sealed</div>
|
|
<a class="s-card__link" href="https://www.ebay.com/itm/555000111"></a>
|
|
<span class="s-card__price">$499.00</span>
|
|
<div class="s-card__subtitle">New</div>
|
|
<img class="s-card__image" src="https://i.ebayimg.com/thumbs/3.jpg"/>
|
|
<span class="su-styled-text">new_user_2024</span>
|
|
<span class="su-styled-text">100.0% positive (2)</span>
|
|
</li>
|
|
</ul>
|
|
</body></html>
|
|
"""
|
|
|
|
_AUCTION_HTML = """
|
|
<html><body>
|
|
<ul class="srp-results">
|
|
<li class="s-card" data-listingid="777000999">
|
|
<div class="s-card__title">Vintage Leica M6 Camera Body</div>
|
|
<a class="s-card__link" href="https://www.ebay.com/itm/777000999"></a>
|
|
<span class="s-card__price">$450.00</span>
|
|
<div class="s-card__subtitle">Used</div>
|
|
<img class="s-card__image" src="https://i.ebayimg.com/thumbs/cam.jpg"/>
|
|
<span class="su-styled-text">camera_dealer</span>
|
|
<span class="su-styled-text">97.5% positive (800)</span>
|
|
<span class="su-styled-text">2h 30m left</span>
|
|
</li>
|
|
</ul>
|
|
</body></html>
|
|
"""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _parse_price
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestParsePrice:
|
|
def test_simple_price(self):
|
|
assert _parse_price("$950.00") == 950.0
|
|
|
|
def test_price_range_takes_lower_bound(self):
|
|
assert _parse_price("$900.00 to $1,050.00") == 900.0
|
|
|
|
def test_price_with_commas(self):
|
|
assert _parse_price("$1,100.00") == 1100.0
|
|
|
|
def test_price_per_ea(self):
|
|
assert _parse_price("$1,234.56/ea") == 1234.56
|
|
|
|
def test_empty_returns_zero(self):
|
|
assert _parse_price("") == 0.0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _extract_seller_from_card
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestExtractSellerFromCard:
|
|
def _card(self, html: str):
|
|
return BeautifulSoup(html, "lxml").select_one("li.s-card")
|
|
|
|
def test_standard_card(self):
|
|
card = self._card("""
|
|
<li class="s-card" data-listingid="1">
|
|
<span class="su-styled-text">techguy</span>
|
|
<span class="su-styled-text">99.1% positive (1,234)</span>
|
|
</li>""")
|
|
username, count, ratio = _extract_seller_from_card(card)
|
|
assert username == "techguy"
|
|
assert count == 1234
|
|
assert ratio == pytest.approx(0.991, abs=0.001)
|
|
|
|
def test_new_account(self):
|
|
card = self._card("""
|
|
<li class="s-card" data-listingid="2">
|
|
<span class="su-styled-text">new_user_2024</span>
|
|
<span class="su-styled-text">100.0% positive (2)</span>
|
|
</li>""")
|
|
username, count, ratio = _extract_seller_from_card(card)
|
|
assert username == "new_user_2024"
|
|
assert count == 2
|
|
assert ratio == pytest.approx(1.0, abs=0.001)
|
|
|
|
def test_no_feedback_span_returns_empty(self):
|
|
card = self._card("""
|
|
<li class="s-card" data-listingid="3">
|
|
<span class="su-styled-text">some_seller</span>
|
|
</li>""")
|
|
username, count, ratio = _extract_seller_from_card(card)
|
|
assert username == ""
|
|
assert count == 0
|
|
assert ratio == 0.0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _parse_time_left
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestParseTimeLeft:
|
|
def test_days_and_hours(self):
|
|
assert _parse_time_left("3d 14h left") == timedelta(days=3, hours=14)
|
|
|
|
def test_hours_and_minutes(self):
|
|
assert _parse_time_left("14h 23m left") == timedelta(hours=14, minutes=23)
|
|
|
|
def test_minutes_and_seconds(self):
|
|
assert _parse_time_left("23m 45s left") == timedelta(minutes=23, seconds=45)
|
|
|
|
def test_days_only(self):
|
|
assert _parse_time_left("2d left") == timedelta(days=2)
|
|
|
|
def test_no_match_returns_none(self):
|
|
assert _parse_time_left("Buy It Now") is None
|
|
|
|
def test_empty_returns_none(self):
|
|
assert _parse_time_left("") is None
|
|
|
|
def test_all_zeros_returns_none(self):
|
|
assert _parse_time_left("0d 0h 0m 0s left") is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# scrape_listings
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestScrapeListings:
|
|
def test_skips_promo_without_listingid(self):
|
|
listings = scrape_listings(_EBAY_HTML)
|
|
titles = [l.title for l in listings]
|
|
assert "Shop on eBay" not in titles
|
|
|
|
def test_parses_three_real_listings(self):
|
|
assert len(scrape_listings(_EBAY_HTML)) == 3
|
|
|
|
def test_platform_listing_id_from_data_attribute(self):
|
|
listings = scrape_listings(_EBAY_HTML)
|
|
assert listings[0].platform_listing_id == "123456789"
|
|
assert listings[1].platform_listing_id == "987654321"
|
|
assert listings[2].platform_listing_id == "555000111"
|
|
|
|
def test_url_strips_query_string(self):
|
|
listings = scrape_listings(_EBAY_HTML)
|
|
assert "?" not in listings[0].url
|
|
assert listings[0].url == "https://www.ebay.com/itm/123456789"
|
|
|
|
def test_price_range_takes_lower(self):
|
|
assert scrape_listings(_EBAY_HTML)[1].price == 1100.0
|
|
|
|
def test_condition_extracted_and_lowercased(self):
|
|
listings = scrape_listings(_EBAY_HTML)
|
|
assert listings[0].condition == "used"
|
|
assert listings[1].condition == "new"
|
|
|
|
def test_photo_prefers_data_src_over_src(self):
|
|
# Listing 2 has data-src set, src is empty
|
|
assert scrape_listings(_EBAY_HTML)[1].photo_urls == ["https://i.ebayimg.com/thumbs/2.jpg"]
|
|
|
|
def test_photo_falls_back_to_src(self):
|
|
assert scrape_listings(_EBAY_HTML)[0].photo_urls == ["https://i.ebayimg.com/thumbs/1.jpg"]
|
|
|
|
def test_seller_platform_id_from_card(self):
|
|
listings = scrape_listings(_EBAY_HTML)
|
|
assert listings[0].seller_platform_id == "techguy"
|
|
assert listings[2].seller_platform_id == "new_user_2024"
|
|
|
|
def test_platform_is_ebay(self):
|
|
assert all(l.platform == "ebay" for l in scrape_listings(_EBAY_HTML))
|
|
|
|
def test_currency_is_usd(self):
|
|
assert all(l.currency == "USD" for l in scrape_listings(_EBAY_HTML))
|
|
|
|
def test_fixed_price_no_ends_at(self):
|
|
listings = scrape_listings(_EBAY_HTML)
|
|
assert all(l.ends_at is None for l in listings)
|
|
assert all(l.buying_format == "fixed_price" for l in listings)
|
|
|
|
def test_auction_sets_buying_format_and_ends_at(self):
|
|
listings = scrape_listings(_AUCTION_HTML)
|
|
assert len(listings) == 1
|
|
assert listings[0].buying_format == "auction"
|
|
assert listings[0].ends_at is not None
|
|
|
|
def test_empty_html_returns_empty_list(self):
|
|
assert scrape_listings("<html><body></body></html>") == []
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# scrape_sellers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestScrapeSellers:
|
|
def test_extracts_three_sellers(self):
|
|
assert len(scrape_sellers(_EBAY_HTML)) == 3
|
|
|
|
def test_feedback_count_and_ratio(self):
|
|
sellers = scrape_sellers(_EBAY_HTML)
|
|
assert sellers["techguy"].feedback_count == 1234
|
|
assert sellers["techguy"].feedback_ratio == pytest.approx(0.991, abs=0.001)
|
|
|
|
def test_deduplicates_sellers(self):
|
|
# Same seller appearing in two cards should only produce one Seller object
|
|
html = """<html><body><ul>
|
|
<li class="s-card" data-listingid="1">
|
|
<div class="s-card__title">Item A</div>
|
|
<a class="s-card__link" href="https://www.ebay.com/itm/1"></a>
|
|
<span class="su-styled-text">repeatguy</span>
|
|
<span class="su-styled-text">99.0% positive (500)</span>
|
|
</li>
|
|
<li class="s-card" data-listingid="2">
|
|
<div class="s-card__title">Item B</div>
|
|
<a class="s-card__link" href="https://www.ebay.com/itm/2"></a>
|
|
<span class="su-styled-text">repeatguy</span>
|
|
<span class="su-styled-text">99.0% positive (500)</span>
|
|
</li>
|
|
</ul></body></html>"""
|
|
sellers = scrape_sellers(html)
|
|
assert len(sellers) == 1
|
|
assert "repeatguy" in sellers
|
|
|
|
def test_account_age_always_zero(self):
|
|
"""account_age_days is 0 from scraper — causes score_is_partial=True."""
|
|
sellers = scrape_sellers(_EBAY_HTML)
|
|
assert all(s.account_age_days == 0 for s in sellers.values())
|
|
|
|
def test_category_history_always_empty(self):
|
|
"""category_history_json is '{}' from scraper — causes score_is_partial=True."""
|
|
sellers = scrape_sellers(_EBAY_HTML)
|
|
assert all(s.category_history_json == "{}" for s in sellers.values())
|
|
|
|
def test_platform_is_ebay(self):
|
|
sellers = scrape_sellers(_EBAY_HTML)
|
|
assert all(s.platform == "ebay" for s in sellers.values())
|