feat(snipe): FastAPI layer, Playwright+Xvfb scraper, caching, tests

- FastAPI service (port 8510) wrapping scraper + trust scorer
- Playwright+Xvfb+stealth transport to bypass eBay Kasada bot protection
- li.s-card selector migration (eBay markup change from li.s-item)
- Three-layer caching: HTML (5min), phash (permanent), market comp (6h SQLite)
- Batch DB writes (executemany + single commit) — warm requests <1s
- Unique Xvfb display counter (:200–:299) prevents lock file collisions
- Vue 3 nginx web service (port 8509) proxying /api/ to FastAPI
- Auction card de-emphasis: opacity 0.72 for listings with >1h remaining
- 35 scraper unit tests updated for new li.s-card fixture markup
- tests/ volume-mounted in compose.override.yml for live test editing
This commit is contained in:
pyr0ball 2026-03-25 20:09:30 -07:00
parent 720744f75e
commit 58263d814a
15 changed files with 521 additions and 202 deletions

View file

@ -2,6 +2,11 @@ FROM python:3.11-slim
WORKDIR /app
# System deps for Playwright/Chromium
RUN apt-get update && apt-get install -y --no-install-recommends \
xvfb \
&& rm -rf /var/lib/apt/lists/*
# Install circuitforge-core from sibling directory (compose sets context: ..)
COPY circuitforge-core/ ./circuitforge-core/
RUN pip install --no-cache-dir -e ./circuitforge-core
@ -11,5 +16,10 @@ COPY snipe/ ./snipe/
WORKDIR /app/snipe
RUN pip install --no-cache-dir -e .
# Install Playwright + Chromium (after snipe deps so layer is cached separately)
RUN pip install --no-cache-dir playwright playwright-stealth && \
playwright install chromium && \
playwright install-deps chromium
EXPOSE 8510
CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8510"]

0
api/__init__.py Normal file
View file

90
api/main.py Normal file
View file

@ -0,0 +1,90 @@
"""Snipe FastAPI — search endpoint wired to ScrapedEbayAdapter + TrustScorer."""
from __future__ import annotations
import dataclasses
import hashlib
import logging
import os
from pathlib import Path
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from circuitforge_core.config import load_env
from app.db.store import Store
from app.platforms import SearchFilters
from app.platforms.ebay.scraper import ScrapedEbayAdapter
from app.trust import TrustScorer
load_env(Path(".env"))
log = logging.getLogger(__name__)
_DB_PATH = Path(os.environ.get("SNIPE_DB", "data/snipe.db"))
_DB_PATH.parent.mkdir(exist_ok=True)
app = FastAPI(title="Snipe API", version="0.1.0")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/api/health")
def health():
return {"status": "ok"}
@app.get("/api/search")
def search(q: str = "", max_price: float = 0, min_price: float = 0):
if not q.strip():
return {"listings": [], "trust_scores": {}, "sellers": {}, "market_price": None}
store = Store(_DB_PATH)
adapter = ScrapedEbayAdapter(store)
filters = SearchFilters(
max_price=max_price if max_price > 0 else None,
min_price=min_price if min_price > 0 else None,
)
try:
listings = adapter.search(q, filters)
adapter.get_completed_sales(q) # warm market comp cache
except Exception as e:
log.warning("eBay scrape failed: %s", e)
raise HTTPException(status_code=502, detail=f"eBay search failed: {e}")
store.save_listings(listings)
scorer = TrustScorer(store)
trust_scores_list = scorer.score_batch(listings, q)
# Market comp
query_hash = hashlib.md5(q.encode()).hexdigest()
comp = store.get_market_comp("ebay", query_hash)
market_price = comp.median_price if comp else None
# Serialize — keyed by platform_listing_id for easy Vue lookup
trust_map = {
listing.platform_listing_id: dataclasses.asdict(ts)
for listing, ts in zip(listings, trust_scores_list)
if ts is not None
}
seller_map = {
listing.seller_platform_id: dataclasses.asdict(
store.get_seller("ebay", listing.seller_platform_id)
)
for listing in listings
if listing.seller_platform_id
and store.get_seller("ebay", listing.seller_platform_id)
}
return {
"listings": [dataclasses.asdict(l) for l in listings],
"trust_scores": trust_map,
"sellers": seller_map,
"market_price": market_price,
}

View file

@ -20,14 +20,19 @@ class Store:
# --- Seller ---
def save_seller(self, seller: Seller) -> None:
self._conn.execute(
self.save_sellers([seller])
def save_sellers(self, sellers: list[Seller]) -> None:
self._conn.executemany(
"INSERT OR REPLACE INTO sellers "
"(platform, platform_seller_id, username, account_age_days, "
"feedback_count, feedback_ratio, category_history_json) "
"VALUES (?,?,?,?,?,?,?)",
(seller.platform, seller.platform_seller_id, seller.username,
seller.account_age_days, seller.feedback_count, seller.feedback_ratio,
seller.category_history_json),
[
(s.platform, s.platform_seller_id, s.username, s.account_age_days,
s.feedback_count, s.feedback_ratio, s.category_history_json)
for s in sellers
],
)
self._conn.commit()
@ -45,16 +50,20 @@ class Store:
# --- Listing ---
def save_listing(self, listing: Listing) -> None:
self._conn.execute(
self.save_listings([listing])
def save_listings(self, listings: list[Listing]) -> None:
self._conn.executemany(
"INSERT OR REPLACE INTO listings "
"(platform, platform_listing_id, title, price, currency, condition, "
"seller_platform_id, url, photo_urls, listing_age_days, buying_format, ends_at) "
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?)",
(listing.platform, listing.platform_listing_id, listing.title,
listing.price, listing.currency, listing.condition,
listing.seller_platform_id, listing.url,
json.dumps(listing.photo_urls), listing.listing_age_days,
listing.buying_format, listing.ends_at),
[
(l.platform, l.platform_listing_id, l.title, l.price, l.currency,
l.condition, l.seller_platform_id, l.url,
json.dumps(l.photo_urls), l.listing_age_days, l.buying_format, l.ends_at)
for l in listings
],
)
self._conn.commit()

View file

@ -11,19 +11,30 @@ This is the MIT discovery layer. EbayAdapter (paid/CF proxy) unlocks full trust
from __future__ import annotations
import hashlib
import itertools
import re
import time
from datetime import datetime, timedelta, timezone
from typing import Optional
import requests
from bs4 import BeautifulSoup
from playwright.sync_api import sync_playwright
from playwright_stealth import Stealth
from app.db.models import Listing, MarketComp, Seller
from app.db.store import Store
from app.platforms import PlatformAdapter, SearchFilters
EBAY_SEARCH_URL = "https://www.ebay.com/sch/i.html"
_HTML_CACHE_TTL = 300 # seconds — 5 minutes
# Module-level cache persists across per-request adapter instantiations.
# Keyed by URL; value is (html, expiry_timestamp).
_html_cache: dict[str, tuple[str, float]] = {}
# Cycle through display numbers :200:299 so concurrent/sequential Playwright
# calls don't collide on the Xvfb lock file from the previous run.
_display_counter = itertools.cycle(range(200, 300))
_HEADERS = {
"User-Agent": (
@ -39,6 +50,7 @@ _HEADERS = {
}
_SELLER_RE = re.compile(r"^(.+?)\s+\(([0-9,]+)\)\s+([\d.]+)%")
_FEEDBACK_RE = re.compile(r"([\d.]+)%\s+positive\s+\(([0-9,]+)\)", re.I)
_PRICE_RE = re.compile(r"[\d,]+\.?\d*")
_ITEM_ID_RE = re.compile(r"/itm/(\d+)")
_TIME_LEFT_RE = re.compile(r"(?:(\d+)d\s*)?(?:(\d+)h\s*)?(?:(\d+)m\s*)?(?:(\d+)s\s*)?left", re.I)
@ -92,58 +104,77 @@ def _parse_time_left(text: str) -> Optional[timedelta]:
return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds)
def _extract_seller_from_card(card) -> tuple[str, int, float]:
"""Extract (username, feedback_count, feedback_ratio) from an s-card element.
New eBay layout has seller username and feedback as separate su-styled-text spans.
We find the feedback span by regex, then take the immediately preceding text as username.
"""
texts = [s.get_text(strip=True) for s in card.select("span.su-styled-text") if s.get_text(strip=True)]
username, count, ratio = "", 0, 0.0
for i, t in enumerate(texts):
m = _FEEDBACK_RE.search(t)
if m:
ratio = float(m.group(1)) / 100.0
count = int(m.group(2).replace(",", ""))
# Username is the span just before the feedback span
if i > 0:
username = texts[i - 1].strip()
break
return username, count, ratio
def scrape_listings(html: str) -> list[Listing]:
"""Parse eBay search results HTML into Listing objects."""
soup = BeautifulSoup(html, "lxml")
results = []
for item in soup.select("li.s-item"):
# eBay injects a ghost "Shop on eBay" promo as the first item — skip it
title_el = item.select_one("h3.s-item__title span, div.s-item__title span")
if not title_el or "Shop on eBay" in title_el.text:
for item in soup.select("li.s-card"):
# Skip promos: no data-listingid or title is "Shop on eBay"
platform_listing_id = item.get("data-listingid", "")
if not platform_listing_id:
continue
link_el = item.select_one("a.s-item__link")
title_el = item.select_one("div.s-card__title")
if not title_el or "Shop on eBay" in title_el.get_text():
continue
link_el = item.select_one('a.s-card__link[href*="/itm/"]')
url = link_el["href"].split("?")[0] if link_el else ""
id_match = _ITEM_ID_RE.search(url)
platform_listing_id = (
id_match.group(1) if id_match else hashlib.md5(url.encode()).hexdigest()[:12]
)
price_el = item.select_one("span.s-item__price")
price = _parse_price(price_el.text) if price_el else 0.0
price_el = item.select_one("span.s-card__price")
price = _parse_price(price_el.get_text()) if price_el else 0.0
condition_el = item.select_one("span.SECONDARY_INFO")
condition = condition_el.text.strip().lower() if condition_el else ""
condition_el = item.select_one("div.s-card__subtitle")
condition = condition_el.get_text(strip=True).split("·")[0].strip().lower() if condition_el else ""
seller_el = item.select_one("span.s-item__seller-info-text")
seller_username = _parse_seller(seller_el.text)[0] if seller_el else ""
seller_username, _, _ = _extract_seller_from_card(item)
# Images are lazy-loaded — check data-src before src
img_el = item.select_one("div.s-item__image-wrapper img, .s-item__image img")
photo_url = ""
if img_el:
photo_url = img_el.get("data-src") or img_el.get("src") or ""
img_el = item.select_one("img.s-card__image")
photo_url = img_el.get("src") or img_el.get("data-src") or "" if img_el else ""
# Auction detection: presence of s-item__time-left means auction format
time_el = item.select_one("span.s-item__time-left")
time_remaining = _parse_time_left(time_el.text) if time_el else None
# Auction detection via time-left text patterns in card spans
time_remaining = None
for span in item.select("span.su-styled-text"):
t = span.get_text(strip=True)
td = _parse_time_left(t)
if td:
time_remaining = td
break
buying_format = "auction" if time_remaining is not None else "fixed_price"
ends_at = None
if time_remaining is not None:
ends_at = (datetime.now(timezone.utc) + time_remaining).isoformat()
ends_at = (datetime.now(timezone.utc) + time_remaining).isoformat() if time_remaining else None
results.append(Listing(
platform="ebay",
platform_listing_id=platform_listing_id,
title=title_el.text.strip(),
title=title_el.get_text(strip=True),
price=price,
currency="USD",
condition=condition,
seller_platform_id=seller_username,
url=url,
photo_urls=[photo_url] if photo_url else [],
listing_age_days=0, # not reliably in search HTML
listing_age_days=0,
buying_format=buying_format,
ends_at=ends_at,
))
@ -162,11 +193,10 @@ def scrape_sellers(html: str) -> dict[str, Seller]:
soup = BeautifulSoup(html, "lxml")
sellers: dict[str, Seller] = {}
for item in soup.select("li.s-item"):
seller_el = item.select_one("span.s-item__seller-info-text")
if not seller_el:
for item in soup.select("li.s-card"):
if not item.get("data-listingid"):
continue
username, count, ratio = _parse_seller(seller_el.text)
username, count, ratio = _extract_seller_from_card(item)
if username and username not in sellers:
sellers[username] = Seller(
platform="ebay",
@ -194,17 +224,60 @@ class ScrapedEbayAdapter(PlatformAdapter):
category_history) cause TrustScorer to set score_is_partial=True.
"""
def __init__(self, store: Store, delay: float = 0.5):
def __init__(self, store: Store, delay: float = 1.0):
self._store = store
self._delay = delay
self._session = requests.Session()
self._session.headers.update(_HEADERS)
def _get(self, params: dict) -> str:
"""Fetch eBay search HTML via a stealthed Playwright Chromium instance.
Uses Xvfb virtual display (headless=False) to avoid Kasada's headless
detection same pattern as other CF scrapers that face JS challenges.
Results are cached for _HTML_CACHE_TTL seconds so repeated searches
for the same query return immediately without re-scraping.
"""
url = EBAY_SEARCH_URL + "?" + "&".join(f"{k}={v}" for k, v in params.items())
cached = _html_cache.get(url)
if cached and time.time() < cached[1]:
return cached[0]
time.sleep(self._delay)
resp = self._session.get(EBAY_SEARCH_URL, params=params, timeout=15)
resp.raise_for_status()
return resp.text
import subprocess, os
display_num = next(_display_counter)
display = f":{display_num}"
xvfb = subprocess.Popen(
["Xvfb", display, "-screen", "0", "1280x800x24"],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
)
env = os.environ.copy()
env["DISPLAY"] = display
try:
with sync_playwright() as pw:
browser = pw.chromium.launch(
headless=False,
env=env,
args=["--no-sandbox", "--disable-dev-shm-usage"],
)
ctx = browser.new_context(
user_agent=_HEADERS["User-Agent"],
viewport={"width": 1280, "height": 800},
)
page = ctx.new_page()
Stealth().apply_stealth_sync(page)
page.goto(url, wait_until="domcontentloaded", timeout=30_000)
page.wait_for_timeout(2000) # let any JS challenges resolve
html = page.content()
browser.close()
finally:
xvfb.terminate()
xvfb.wait()
_html_cache[url] = (html, time.time() + _HTML_CACHE_TTL)
return html
def search(self, query: str, filters: SearchFilters) -> list[Listing]:
params: dict = {"_nkw": query, "_sop": "15", "_ipg": "48"}
@ -226,8 +299,7 @@ class ScrapedEbayAdapter(PlatformAdapter):
listings = scrape_listings(html)
# Cache seller objects extracted from the same page
for seller in scrape_sellers(html).values():
self._store.save_seller(seller)
self._store.save_sellers(list(scrape_sellers(html).values()))
return listings

View file

@ -11,6 +11,10 @@ try:
except ImportError:
_IMAGEHASH_AVAILABLE = False
# Module-level phash cache: url → hash string (or None on failure).
# Avoids re-downloading the same eBay CDN image on repeated searches.
_phash_cache: dict[str, Optional[str]] = {}
class PhotoScorer:
"""
@ -52,13 +56,17 @@ class PhotoScorer:
def _fetch_hash(self, url: str) -> Optional[str]:
if not url:
return None
if url in _phash_cache:
return _phash_cache[url]
try:
resp = requests.get(url, timeout=5, stream=True)
resp.raise_for_status()
img = Image.open(io.BytesIO(resp.content))
return str(imagehash.phash(img))
result: Optional[str] = str(imagehash.phash(img))
except Exception:
return None
result = None
_phash_cache[url] = result
return result
def _url_dedup(self, photo_urls_per_listing: list[list[str]]) -> list[bool]:
seen: set[str] = set()

View file

@ -1,9 +1,21 @@
services:
snipe:
api:
build:
context: ..
dockerfile: snipe/Dockerfile
network_mode: host
volumes:
- ../circuitforge-core:/app/circuitforge-core
- ./streamlit_app.py:/app/snipe/streamlit_app.py
- ./api:/app/snipe/api
- ./app:/app/snipe/app
- ./data:/app/snipe/data
- ./tests:/app/snipe/tests
environment:
- STREAMLIT_SERVER_RUN_ON_SAVE=true
- RELOAD=true
web:
build:
context: .
dockerfile: docker/web/Dockerfile
volumes:
- ./web/src:/app/src # not used at runtime but keeps override valid

View file

@ -4,8 +4,6 @@ WORKDIR /app
COPY web/package*.json ./
RUN npm ci --prefer-offline
COPY web/ ./
ARG VITE_BASE_URL=/snipe/
ENV VITE_BASE_URL=${VITE_BASE_URL}
RUN npm run build
# Stage 2: serve

View file

@ -5,6 +5,13 @@ server {
root /usr/share/nginx/html;
index index.html;
# Proxy API requests to the FastAPI backend container
location /api/ {
proxy_pass http://172.17.0.1:8510; # Docker host bridge IP api runs network_mode:host
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
}
# index.html never cache; ensures clients always get the latest entry point
# after a deployment (chunks are content-hashed so they can be cached forever)
location = /index.html {

View file

@ -2,11 +2,22 @@
set -euo pipefail
SERVICE=snipe
PORT=8509 # Vue web UI (nginx)
PORT=8509 # Vue web UI (nginx)
API_PORT=8510 # FastAPI
COMPOSE_FILE="compose.yml"
usage() {
echo "Usage: $0 {start|stop|restart|status|logs|open|update}"
echo "Usage: $0 {start|stop|restart|status|logs|open|build|update|test}"
echo ""
echo " start Build (if needed) and start all services"
echo " stop Stop and remove containers"
echo " restart Stop then start"
echo " status Show running containers"
echo " logs Follow logs (logs api | logs web | logs — defaults to all)"
echo " open Open web UI in browser"
echo " build Rebuild Docker images without cache"
echo " update Pull latest images and rebuild"
echo " test Run pytest test suite in the api container"
exit 1
}
@ -16,28 +27,45 @@ shift || true
case "$cmd" in
start)
docker compose -f "$COMPOSE_FILE" up -d
echo "$SERVICE started on http://localhost:$PORT"
echo "$SERVICE started — web: http://localhost:$PORT api: http://localhost:$API_PORT"
;;
stop)
docker compose -f "$COMPOSE_FILE" down
docker compose -f "$COMPOSE_FILE" down --remove-orphans
;;
restart)
docker compose -f "$COMPOSE_FILE" down
docker compose -f "$COMPOSE_FILE" down --remove-orphans
docker compose -f "$COMPOSE_FILE" up -d
echo "$SERVICE restarted on http://localhost:$PORT"
echo "$SERVICE restarted http://localhost:$PORT"
;;
status)
docker compose -f "$COMPOSE_FILE" ps
;;
logs)
docker compose -f "$COMPOSE_FILE" logs -f "${@:-$SERVICE}"
# logs [api|web] — default: all services
target="${1:-}"
if [[ -n "$target" ]]; then
docker compose -f "$COMPOSE_FILE" logs -f "$target"
else
docker compose -f "$COMPOSE_FILE" logs -f
fi
;;
open)
xdg-open "http://localhost:$PORT" 2>/dev/null || open "http://localhost:$PORT"
xdg-open "http://localhost:$PORT" 2>/dev/null || open "http://localhost:$PORT" 2>/dev/null || \
echo "Open http://localhost:$PORT in your browser"
;;
build)
docker compose -f "$COMPOSE_FILE" build --no-cache
echo "Build complete."
;;
update)
docker compose -f "$COMPOSE_FILE" pull
docker compose -f "$COMPOSE_FILE" up -d --build
echo "$SERVICE updated — http://localhost:$PORT"
;;
test)
echo "Running test suite..."
docker compose -f "$COMPOSE_FILE" exec api \
conda run -n job-seeker python -m pytest /app/snipe/tests/ -v "${@}"
;;
*)
usage

View file

@ -16,11 +16,15 @@ dependencies = [
"python-dotenv>=1.0",
"beautifulsoup4>=4.12",
"lxml>=5.0",
"fastapi>=0.111",
"uvicorn[standard]>=0.29",
"playwright>=1.44",
"playwright-stealth>=1.0",
]
[tool.setuptools.packages.find]
where = ["."]
include = ["app*"]
include = ["app*", "api*"]
[tool.pytest.ini_options]
testpaths = ["tests"]

View file

@ -1,55 +1,79 @@
"""Tests for the scraper-based eBay adapter.
Uses a minimal HTML fixture that mirrors eBay's search results structure.
Uses a minimal HTML fixture mirroring eBay's current s-card markup.
No HTTP requests are made all tests operate on the pure parsing functions.
"""
import pytest
from datetime import timedelta
from app.platforms.ebay.scraper import (
scrape_listings, scrape_sellers, _parse_price, _parse_seller, _parse_time_left,
scrape_listings,
scrape_sellers,
_parse_price,
_parse_time_left,
_extract_seller_from_card,
)
from bs4 import BeautifulSoup
# ---------------------------------------------------------------------------
# Minimal eBay search results HTML fixture
# Minimal eBay search results HTML fixture (li.s-card schema)
# ---------------------------------------------------------------------------
_EBAY_HTML = """
<html><body>
<ul class="srp-results">
<!-- eBay injects this ghost item first should be skipped -->
<li class="s-item">
<div class="s-item__title"><span>Shop on eBay</span></div>
<a class="s-item__link" href="https://ebay.com/shop"></a>
<!-- Promo item: no data-listingid must be skipped -->
<li class="s-card">
<div class="s-card__title">Shop on eBay</div>
</li>
<!-- Real listing 1: established seller, normal price -->
<li class="s-item">
<h3 class="s-item__title"><span>RTX 4090 Founders Edition GPU</span></h3>
<a class="s-item__link" href="https://www.ebay.com/itm/123456789"></a>
<span class="s-item__price">$950.00</span>
<span class="SECONDARY_INFO">Used</span>
<div class="s-item__image-wrapper"><img src="https://i.ebayimg.com/thumbs/1.jpg"/></div>
<span class="s-item__seller-info-text">techguy (1,234) 99.1% positive feedback</span>
<!-- Real listing 1: established seller, used, fixed price -->
<li class="s-card" data-listingid="123456789">
<div class="s-card__title">RTX 4090 Founders Edition GPU</div>
<a class="s-card__link" href="https://www.ebay.com/itm/123456789?somequery=1"></a>
<span class="s-card__price">$950.00</span>
<div class="s-card__subtitle">Used · Free shipping</div>
<img class="s-card__image" src="https://i.ebayimg.com/thumbs/1.jpg"/>
<span class="su-styled-text">techguy</span>
<span class="su-styled-text">99.1% positive (1,234)</span>
</li>
<!-- Real listing 2: price range, new condition -->
<li class="s-item">
<h3 class="s-item__title"><span>RTX 4090 Gaming OC 24GB</span></h3>
<a class="s-item__link" href="https://www.ebay.com/itm/987654321"></a>
<span class="s-item__price">$1,100.00 to $1,200.00</span>
<span class="SECONDARY_INFO">New</span>
<div class="s-item__image-wrapper"><img data-src="https://i.ebayimg.com/thumbs/2.jpg" src=""/></div>
<span class="s-item__seller-info-text">gpu_warehouse (450) 98.7% positive feedback</span>
<!-- Real listing 2: price range, new, data-src photo -->
<li class="s-card" data-listingid="987654321">
<div class="s-card__title">RTX 4090 Gaming OC 24GB</div>
<a class="s-card__link" href="https://www.ebay.com/itm/987654321"></a>
<span class="s-card__price">$1,100.00 to $1,200.00</span>
<div class="s-card__subtitle">New · Free shipping</div>
<img class="s-card__image" data-src="https://i.ebayimg.com/thumbs/2.jpg" src=""/>
<span class="su-styled-text">gpu_warehouse</span>
<span class="su-styled-text">98.7% positive (450)</span>
</li>
<!-- Real listing 3: low feedback seller, suspicious price -->
<li class="s-item">
<h3 class="s-item__title"><span>RTX 4090 BNIB Sealed</span></h3>
<a class="s-item__link" href="https://www.ebay.com/itm/555000111"></a>
<span class="s-item__price">$499.00</span>
<span class="SECONDARY_INFO">New</span>
<div class="s-item__image-wrapper"><img src="https://i.ebayimg.com/thumbs/3.jpg"/></div>
<span class="s-item__seller-info-text">new_user_2024 (2) 100.0% positive feedback</span>
<!-- Real listing 3: new account, suspicious price -->
<li class="s-card" data-listingid="555000111">
<div class="s-card__title">RTX 4090 BNIB Sealed</div>
<a class="s-card__link" href="https://www.ebay.com/itm/555000111"></a>
<span class="s-card__price">$499.00</span>
<div class="s-card__subtitle">New</div>
<img class="s-card__image" src="https://i.ebayimg.com/thumbs/3.jpg"/>
<span class="su-styled-text">new_user_2024</span>
<span class="su-styled-text">100.0% positive (2)</span>
</li>
</ul>
</body></html>
"""
_AUCTION_HTML = """
<html><body>
<ul class="srp-results">
<li class="s-card" data-listingid="777000999">
<div class="s-card__title">Vintage Leica M6 Camera Body</div>
<a class="s-card__link" href="https://www.ebay.com/itm/777000999"></a>
<span class="s-card__price">$450.00</span>
<div class="s-card__subtitle">Used</div>
<img class="s-card__image" src="https://i.ebayimg.com/thumbs/cam.jpg"/>
<span class="su-styled-text">camera_dealer</span>
<span class="su-styled-text">97.5% positive (800)</span>
<span class="su-styled-text">2h 30m left</span>
</li>
</ul>
</body></html>
@ -57,7 +81,7 @@ _EBAY_HTML = """
# ---------------------------------------------------------------------------
# Unit tests: pure parsing functions
# _parse_price
# ---------------------------------------------------------------------------
class TestParsePrice:
@ -70,141 +94,189 @@ class TestParsePrice:
def test_price_with_commas(self):
assert _parse_price("$1,100.00") == 1100.0
def test_price_per_ea(self):
assert _parse_price("$1,234.56/ea") == 1234.56
def test_empty_returns_zero(self):
assert _parse_price("") == 0.0
class TestParseSeller:
def test_standard_format(self):
username, count, ratio = _parse_seller("techguy (1,234) 99.1% positive feedback")
# ---------------------------------------------------------------------------
# _extract_seller_from_card
# ---------------------------------------------------------------------------
class TestExtractSellerFromCard:
def _card(self, html: str):
return BeautifulSoup(html, "lxml").select_one("li.s-card")
def test_standard_card(self):
card = self._card("""
<li class="s-card" data-listingid="1">
<span class="su-styled-text">techguy</span>
<span class="su-styled-text">99.1% positive (1,234)</span>
</li>""")
username, count, ratio = _extract_seller_from_card(card)
assert username == "techguy"
assert count == 1234
assert ratio == pytest.approx(0.991, abs=0.001)
def test_low_count(self):
username, count, ratio = _parse_seller("new_user_2024 (2) 100.0% positive feedback")
def test_new_account(self):
card = self._card("""
<li class="s-card" data-listingid="2">
<span class="su-styled-text">new_user_2024</span>
<span class="su-styled-text">100.0% positive (2)</span>
</li>""")
username, count, ratio = _extract_seller_from_card(card)
assert username == "new_user_2024"
assert count == 2
assert ratio == pytest.approx(1.0, abs=0.001)
def test_fallback_on_malformed(self):
username, count, ratio = _parse_seller("weirdformat")
assert username == "weirdformat"
def test_no_feedback_span_returns_empty(self):
card = self._card("""
<li class="s-card" data-listingid="3">
<span class="su-styled-text">some_seller</span>
</li>""")
username, count, ratio = _extract_seller_from_card(card)
assert username == ""
assert count == 0
assert ratio == 0.0
# ---------------------------------------------------------------------------
# Integration tests: HTML fixture → domain objects
# ---------------------------------------------------------------------------
class TestScrapeListings:
def test_skips_shop_on_ebay_ghost(self):
listings = scrape_listings(_EBAY_HTML)
titles = [l.title for l in listings]
assert all("Shop on eBay" not in t for t in titles)
def test_parses_three_real_listings(self):
listings = scrape_listings(_EBAY_HTML)
assert len(listings) == 3
def test_extracts_platform_listing_id_from_url(self):
listings = scrape_listings(_EBAY_HTML)
assert listings[0].platform_listing_id == "123456789"
assert listings[1].platform_listing_id == "987654321"
def test_price_range_takes_lower(self):
listings = scrape_listings(_EBAY_HTML)
assert listings[1].price == 1100.0
def test_condition_lowercased(self):
listings = scrape_listings(_EBAY_HTML)
assert listings[0].condition == "used"
assert listings[1].condition == "new"
def test_photo_prefers_data_src(self):
listings = scrape_listings(_EBAY_HTML)
# Listing 2 has data-src set, src empty
assert listings[1].photo_urls == ["https://i.ebayimg.com/thumbs/2.jpg"]
def test_seller_platform_id_set(self):
listings = scrape_listings(_EBAY_HTML)
assert listings[0].seller_platform_id == "techguy"
assert listings[2].seller_platform_id == "new_user_2024"
class TestScrapeSellers:
def test_extracts_three_sellers(self):
sellers = scrape_sellers(_EBAY_HTML)
assert len(sellers) == 3
def test_feedback_count_and_ratio(self):
sellers = scrape_sellers(_EBAY_HTML)
assert sellers["techguy"].feedback_count == 1234
assert sellers["techguy"].feedback_ratio == pytest.approx(0.991, abs=0.001)
def test_account_age_is_zero(self):
"""account_age_days is always 0 from scraper — signals partial score."""
sellers = scrape_sellers(_EBAY_HTML)
assert all(s.account_age_days == 0 for s in sellers.values())
def test_category_history_is_empty(self):
"""category_history_json is always '{}' from scraper — signals partial score."""
sellers = scrape_sellers(_EBAY_HTML)
assert all(s.category_history_json == "{}" for s in sellers.values())
# ---------------------------------------------------------------------------
# _parse_time_left
# ---------------------------------------------------------------------------
class TestParseTimeLeft:
def test_days_hours(self):
td = _parse_time_left("3d 14h left")
assert td == timedelta(days=3, hours=14)
def test_days_and_hours(self):
assert _parse_time_left("3d 14h left") == timedelta(days=3, hours=14)
def test_hours_minutes(self):
td = _parse_time_left("14h 23m left")
assert td == timedelta(hours=14, minutes=23)
def test_hours_and_minutes(self):
assert _parse_time_left("14h 23m left") == timedelta(hours=14, minutes=23)
def test_minutes_seconds(self):
td = _parse_time_left("23m 45s left")
assert td == timedelta(minutes=23, seconds=45)
def test_minutes_and_seconds(self):
assert _parse_time_left("23m 45s left") == timedelta(minutes=23, seconds=45)
def test_days_only(self):
td = _parse_time_left("2d left")
assert td == timedelta(days=2)
assert _parse_time_left("2d left") == timedelta(days=2)
def test_no_match_returns_none(self):
assert _parse_time_left("Buy It Now") is None
def test_empty_string_returns_none(self):
def test_empty_returns_none(self):
assert _parse_time_left("") is None
def test_all_zeros_returns_none(self):
# Regex can match "0d 0h 0m 0s left" — should treat as no time left = None
assert _parse_time_left("0d 0h 0m 0s left") is None
def test_auction_listing_sets_ends_at(self):
"""scrape_listings should set ends_at for an auction item."""
auction_html = """
<html><body><ul class="srp-results">
<li class="s-item">
<h3 class="s-item__title"><span>Test Item</span></h3>
<a class="s-item__link" href="https://www.ebay.com/itm/999"></a>
<span class="s-item__price">$100.00</span>
<span class="s-item__time-left">2h 30m left</span>
</li>
</ul></body></html>
"""
listings = scrape_listings(auction_html)
# ---------------------------------------------------------------------------
# scrape_listings
# ---------------------------------------------------------------------------
class TestScrapeListings:
def test_skips_promo_without_listingid(self):
listings = scrape_listings(_EBAY_HTML)
titles = [l.title for l in listings]
assert "Shop on eBay" not in titles
def test_parses_three_real_listings(self):
assert len(scrape_listings(_EBAY_HTML)) == 3
def test_platform_listing_id_from_data_attribute(self):
listings = scrape_listings(_EBAY_HTML)
assert listings[0].platform_listing_id == "123456789"
assert listings[1].platform_listing_id == "987654321"
assert listings[2].platform_listing_id == "555000111"
def test_url_strips_query_string(self):
listings = scrape_listings(_EBAY_HTML)
assert "?" not in listings[0].url
assert listings[0].url == "https://www.ebay.com/itm/123456789"
def test_price_range_takes_lower(self):
assert scrape_listings(_EBAY_HTML)[1].price == 1100.0
def test_condition_extracted_and_lowercased(self):
listings = scrape_listings(_EBAY_HTML)
assert listings[0].condition == "used"
assert listings[1].condition == "new"
def test_photo_prefers_data_src_over_src(self):
# Listing 2 has data-src set, src is empty
assert scrape_listings(_EBAY_HTML)[1].photo_urls == ["https://i.ebayimg.com/thumbs/2.jpg"]
def test_photo_falls_back_to_src(self):
assert scrape_listings(_EBAY_HTML)[0].photo_urls == ["https://i.ebayimg.com/thumbs/1.jpg"]
def test_seller_platform_id_from_card(self):
listings = scrape_listings(_EBAY_HTML)
assert listings[0].seller_platform_id == "techguy"
assert listings[2].seller_platform_id == "new_user_2024"
def test_platform_is_ebay(self):
assert all(l.platform == "ebay" for l in scrape_listings(_EBAY_HTML))
def test_currency_is_usd(self):
assert all(l.currency == "USD" for l in scrape_listings(_EBAY_HTML))
def test_fixed_price_no_ends_at(self):
listings = scrape_listings(_EBAY_HTML)
assert all(l.ends_at is None for l in listings)
assert all(l.buying_format == "fixed_price" for l in listings)
def test_auction_sets_buying_format_and_ends_at(self):
listings = scrape_listings(_AUCTION_HTML)
assert len(listings) == 1
assert listings[0].buying_format == "auction"
assert listings[0].ends_at is not None
def test_fixed_price_listing_no_ends_at(self):
"""scrape_listings should leave ends_at=None for fixed-price items."""
listings = scrape_listings(_EBAY_HTML)
fixed = [l for l in listings if l.buying_format == "fixed_price"]
assert len(fixed) > 0
assert all(l.ends_at is None for l in fixed)
def test_empty_html_returns_empty_list(self):
assert scrape_listings("<html><body></body></html>") == []
# ---------------------------------------------------------------------------
# scrape_sellers
# ---------------------------------------------------------------------------
class TestScrapeSellers:
def test_extracts_three_sellers(self):
assert len(scrape_sellers(_EBAY_HTML)) == 3
def test_feedback_count_and_ratio(self):
sellers = scrape_sellers(_EBAY_HTML)
assert sellers["techguy"].feedback_count == 1234
assert sellers["techguy"].feedback_ratio == pytest.approx(0.991, abs=0.001)
def test_deduplicates_sellers(self):
# Same seller appearing in two cards should only produce one Seller object
html = """<html><body><ul>
<li class="s-card" data-listingid="1">
<div class="s-card__title">Item A</div>
<a class="s-card__link" href="https://www.ebay.com/itm/1"></a>
<span class="su-styled-text">repeatguy</span>
<span class="su-styled-text">99.0% positive (500)</span>
</li>
<li class="s-card" data-listingid="2">
<div class="s-card__title">Item B</div>
<a class="s-card__link" href="https://www.ebay.com/itm/2"></a>
<span class="su-styled-text">repeatguy</span>
<span class="su-styled-text">99.0% positive (500)</span>
</li>
</ul></body></html>"""
sellers = scrape_sellers(html)
assert len(sellers) == 1
assert "repeatguy" in sellers
def test_account_age_always_zero(self):
"""account_age_days is 0 from scraper — causes score_is_partial=True."""
sellers = scrape_sellers(_EBAY_HTML)
assert all(s.account_age_days == 0 for s in sellers.values())
def test_category_history_always_empty(self):
"""category_history_json is '{}' from scraper — causes score_is_partial=True."""
sellers = scrape_sellers(_EBAY_HTML)
assert all(s.category_history_json == "{}" for s in sellers.values())
def test_platform_is_ebay(self):
sellers = scrape_sellers(_EBAY_HTML)
assert all(s.platform == "ebay" for s in sellers.values())

View file

@ -183,8 +183,17 @@ h1, h2, h3, h4, h5, h6 {
/* Auction de-emphasis
Auctions with >1h remaining have fluid prices de-emphasise
the current price to avoid anchoring on a misleading figure.
the card and current price to avoid anchoring on a misleading figure.
*/
.listing-card--auction {
opacity: 0.72;
border-color: var(--color-border-light);
}
.listing-card--auction:hover {
opacity: 1;
}
.auction-price--live {
opacity: 0.55;
font-style: italic;

View file

@ -3,7 +3,7 @@
class="listing-card"
:class="{
'steal-card': isSteal,
'listing-card--auction': isAuction,
'listing-card--auction': isAuction && hoursRemaining !== null && hoursRemaining > 1,
}"
>
<!-- Thumbnail -->

View file

@ -4,7 +4,7 @@ import UnoCSS from 'unocss/vite'
export default defineConfig({
plugins: [vue(), UnoCSS()],
base: process.env.VITE_BASE_URL ?? '/snipe/',
base: process.env.VITE_BASE_URL ?? '/',
server: {
host: '0.0.0.0',
port: 5174,