feat(snipe): FastAPI layer, Playwright+Xvfb scraper, caching, tests

- FastAPI service (port 8510) wrapping scraper + trust scorer
- Playwright+Xvfb+stealth transport to bypass eBay Kasada bot protection
- li.s-card selector migration (eBay markup change from li.s-item)
- Three-layer caching: HTML (5min), phash (permanent), market comp (6h SQLite)
- Batch DB writes (executemany + single commit) — warm requests <1s
- Unique Xvfb display counter (:200–:299) prevents lock file collisions
- Vue 3 nginx web service (port 8509) proxying /api/ to FastAPI
- Auction card de-emphasis: opacity 0.72 for listings with >1h remaining
- 35 scraper unit tests updated for new li.s-card fixture markup
- tests/ volume-mounted in compose.override.yml for live test editing
This commit is contained in:
pyr0ball 2026-03-25 20:09:30 -07:00
parent 720744f75e
commit 58263d814a
15 changed files with 521 additions and 202 deletions

View file

@ -2,6 +2,11 @@ FROM python:3.11-slim
WORKDIR /app WORKDIR /app
# System deps for Playwright/Chromium
RUN apt-get update && apt-get install -y --no-install-recommends \
xvfb \
&& rm -rf /var/lib/apt/lists/*
# Install circuitforge-core from sibling directory (compose sets context: ..) # Install circuitforge-core from sibling directory (compose sets context: ..)
COPY circuitforge-core/ ./circuitforge-core/ COPY circuitforge-core/ ./circuitforge-core/
RUN pip install --no-cache-dir -e ./circuitforge-core RUN pip install --no-cache-dir -e ./circuitforge-core
@ -11,5 +16,10 @@ COPY snipe/ ./snipe/
WORKDIR /app/snipe WORKDIR /app/snipe
RUN pip install --no-cache-dir -e . RUN pip install --no-cache-dir -e .
# Install Playwright + Chromium (after snipe deps so layer is cached separately)
RUN pip install --no-cache-dir playwright playwright-stealth && \
playwright install chromium && \
playwright install-deps chromium
EXPOSE 8510 EXPOSE 8510
CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8510"] CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8510"]

0
api/__init__.py Normal file
View file

90
api/main.py Normal file
View file

@ -0,0 +1,90 @@
"""Snipe FastAPI — search endpoint wired to ScrapedEbayAdapter + TrustScorer."""
from __future__ import annotations
import dataclasses
import hashlib
import logging
import os
from pathlib import Path
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from circuitforge_core.config import load_env
from app.db.store import Store
from app.platforms import SearchFilters
from app.platforms.ebay.scraper import ScrapedEbayAdapter
from app.trust import TrustScorer
load_env(Path(".env"))
log = logging.getLogger(__name__)
_DB_PATH = Path(os.environ.get("SNIPE_DB", "data/snipe.db"))
_DB_PATH.parent.mkdir(exist_ok=True)
app = FastAPI(title="Snipe API", version="0.1.0")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/api/health")
def health():
return {"status": "ok"}
@app.get("/api/search")
def search(q: str = "", max_price: float = 0, min_price: float = 0):
if not q.strip():
return {"listings": [], "trust_scores": {}, "sellers": {}, "market_price": None}
store = Store(_DB_PATH)
adapter = ScrapedEbayAdapter(store)
filters = SearchFilters(
max_price=max_price if max_price > 0 else None,
min_price=min_price if min_price > 0 else None,
)
try:
listings = adapter.search(q, filters)
adapter.get_completed_sales(q) # warm market comp cache
except Exception as e:
log.warning("eBay scrape failed: %s", e)
raise HTTPException(status_code=502, detail=f"eBay search failed: {e}")
store.save_listings(listings)
scorer = TrustScorer(store)
trust_scores_list = scorer.score_batch(listings, q)
# Market comp
query_hash = hashlib.md5(q.encode()).hexdigest()
comp = store.get_market_comp("ebay", query_hash)
market_price = comp.median_price if comp else None
# Serialize — keyed by platform_listing_id for easy Vue lookup
trust_map = {
listing.platform_listing_id: dataclasses.asdict(ts)
for listing, ts in zip(listings, trust_scores_list)
if ts is not None
}
seller_map = {
listing.seller_platform_id: dataclasses.asdict(
store.get_seller("ebay", listing.seller_platform_id)
)
for listing in listings
if listing.seller_platform_id
and store.get_seller("ebay", listing.seller_platform_id)
}
return {
"listings": [dataclasses.asdict(l) for l in listings],
"trust_scores": trust_map,
"sellers": seller_map,
"market_price": market_price,
}

View file

@ -20,14 +20,19 @@ class Store:
# --- Seller --- # --- Seller ---
def save_seller(self, seller: Seller) -> None: def save_seller(self, seller: Seller) -> None:
self._conn.execute( self.save_sellers([seller])
def save_sellers(self, sellers: list[Seller]) -> None:
self._conn.executemany(
"INSERT OR REPLACE INTO sellers " "INSERT OR REPLACE INTO sellers "
"(platform, platform_seller_id, username, account_age_days, " "(platform, platform_seller_id, username, account_age_days, "
"feedback_count, feedback_ratio, category_history_json) " "feedback_count, feedback_ratio, category_history_json) "
"VALUES (?,?,?,?,?,?,?)", "VALUES (?,?,?,?,?,?,?)",
(seller.platform, seller.platform_seller_id, seller.username, [
seller.account_age_days, seller.feedback_count, seller.feedback_ratio, (s.platform, s.platform_seller_id, s.username, s.account_age_days,
seller.category_history_json), s.feedback_count, s.feedback_ratio, s.category_history_json)
for s in sellers
],
) )
self._conn.commit() self._conn.commit()
@ -45,16 +50,20 @@ class Store:
# --- Listing --- # --- Listing ---
def save_listing(self, listing: Listing) -> None: def save_listing(self, listing: Listing) -> None:
self._conn.execute( self.save_listings([listing])
def save_listings(self, listings: list[Listing]) -> None:
self._conn.executemany(
"INSERT OR REPLACE INTO listings " "INSERT OR REPLACE INTO listings "
"(platform, platform_listing_id, title, price, currency, condition, " "(platform, platform_listing_id, title, price, currency, condition, "
"seller_platform_id, url, photo_urls, listing_age_days, buying_format, ends_at) " "seller_platform_id, url, photo_urls, listing_age_days, buying_format, ends_at) "
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?)", "VALUES (?,?,?,?,?,?,?,?,?,?,?,?)",
(listing.platform, listing.platform_listing_id, listing.title, [
listing.price, listing.currency, listing.condition, (l.platform, l.platform_listing_id, l.title, l.price, l.currency,
listing.seller_platform_id, listing.url, l.condition, l.seller_platform_id, l.url,
json.dumps(listing.photo_urls), listing.listing_age_days, json.dumps(l.photo_urls), l.listing_age_days, l.buying_format, l.ends_at)
listing.buying_format, listing.ends_at), for l in listings
],
) )
self._conn.commit() self._conn.commit()

View file

@ -11,19 +11,30 @@ This is the MIT discovery layer. EbayAdapter (paid/CF proxy) unlocks full trust
from __future__ import annotations from __future__ import annotations
import hashlib import hashlib
import itertools
import re import re
import time import time
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from typing import Optional from typing import Optional
import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from playwright.sync_api import sync_playwright
from playwright_stealth import Stealth
from app.db.models import Listing, MarketComp, Seller from app.db.models import Listing, MarketComp, Seller
from app.db.store import Store from app.db.store import Store
from app.platforms import PlatformAdapter, SearchFilters from app.platforms import PlatformAdapter, SearchFilters
EBAY_SEARCH_URL = "https://www.ebay.com/sch/i.html" EBAY_SEARCH_URL = "https://www.ebay.com/sch/i.html"
_HTML_CACHE_TTL = 300 # seconds — 5 minutes
# Module-level cache persists across per-request adapter instantiations.
# Keyed by URL; value is (html, expiry_timestamp).
_html_cache: dict[str, tuple[str, float]] = {}
# Cycle through display numbers :200:299 so concurrent/sequential Playwright
# calls don't collide on the Xvfb lock file from the previous run.
_display_counter = itertools.cycle(range(200, 300))
_HEADERS = { _HEADERS = {
"User-Agent": ( "User-Agent": (
@ -39,6 +50,7 @@ _HEADERS = {
} }
_SELLER_RE = re.compile(r"^(.+?)\s+\(([0-9,]+)\)\s+([\d.]+)%") _SELLER_RE = re.compile(r"^(.+?)\s+\(([0-9,]+)\)\s+([\d.]+)%")
_FEEDBACK_RE = re.compile(r"([\d.]+)%\s+positive\s+\(([0-9,]+)\)", re.I)
_PRICE_RE = re.compile(r"[\d,]+\.?\d*") _PRICE_RE = re.compile(r"[\d,]+\.?\d*")
_ITEM_ID_RE = re.compile(r"/itm/(\d+)") _ITEM_ID_RE = re.compile(r"/itm/(\d+)")
_TIME_LEFT_RE = re.compile(r"(?:(\d+)d\s*)?(?:(\d+)h\s*)?(?:(\d+)m\s*)?(?:(\d+)s\s*)?left", re.I) _TIME_LEFT_RE = re.compile(r"(?:(\d+)d\s*)?(?:(\d+)h\s*)?(?:(\d+)m\s*)?(?:(\d+)s\s*)?left", re.I)
@ -92,58 +104,77 @@ def _parse_time_left(text: str) -> Optional[timedelta]:
return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds) return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds)
def _extract_seller_from_card(card) -> tuple[str, int, float]:
"""Extract (username, feedback_count, feedback_ratio) from an s-card element.
New eBay layout has seller username and feedback as separate su-styled-text spans.
We find the feedback span by regex, then take the immediately preceding text as username.
"""
texts = [s.get_text(strip=True) for s in card.select("span.su-styled-text") if s.get_text(strip=True)]
username, count, ratio = "", 0, 0.0
for i, t in enumerate(texts):
m = _FEEDBACK_RE.search(t)
if m:
ratio = float(m.group(1)) / 100.0
count = int(m.group(2).replace(",", ""))
# Username is the span just before the feedback span
if i > 0:
username = texts[i - 1].strip()
break
return username, count, ratio
def scrape_listings(html: str) -> list[Listing]: def scrape_listings(html: str) -> list[Listing]:
"""Parse eBay search results HTML into Listing objects.""" """Parse eBay search results HTML into Listing objects."""
soup = BeautifulSoup(html, "lxml") soup = BeautifulSoup(html, "lxml")
results = [] results = []
for item in soup.select("li.s-item"): for item in soup.select("li.s-card"):
# eBay injects a ghost "Shop on eBay" promo as the first item — skip it # Skip promos: no data-listingid or title is "Shop on eBay"
title_el = item.select_one("h3.s-item__title span, div.s-item__title span") platform_listing_id = item.get("data-listingid", "")
if not title_el or "Shop on eBay" in title_el.text: if not platform_listing_id:
continue continue
link_el = item.select_one("a.s-item__link") title_el = item.select_one("div.s-card__title")
if not title_el or "Shop on eBay" in title_el.get_text():
continue
link_el = item.select_one('a.s-card__link[href*="/itm/"]')
url = link_el["href"].split("?")[0] if link_el else "" url = link_el["href"].split("?")[0] if link_el else ""
id_match = _ITEM_ID_RE.search(url)
platform_listing_id = (
id_match.group(1) if id_match else hashlib.md5(url.encode()).hexdigest()[:12]
)
price_el = item.select_one("span.s-item__price") price_el = item.select_one("span.s-card__price")
price = _parse_price(price_el.text) if price_el else 0.0 price = _parse_price(price_el.get_text()) if price_el else 0.0
condition_el = item.select_one("span.SECONDARY_INFO") condition_el = item.select_one("div.s-card__subtitle")
condition = condition_el.text.strip().lower() if condition_el else "" condition = condition_el.get_text(strip=True).split("·")[0].strip().lower() if condition_el else ""
seller_el = item.select_one("span.s-item__seller-info-text") seller_username, _, _ = _extract_seller_from_card(item)
seller_username = _parse_seller(seller_el.text)[0] if seller_el else ""
# Images are lazy-loaded — check data-src before src img_el = item.select_one("img.s-card__image")
img_el = item.select_one("div.s-item__image-wrapper img, .s-item__image img") photo_url = img_el.get("src") or img_el.get("data-src") or "" if img_el else ""
photo_url = ""
if img_el:
photo_url = img_el.get("data-src") or img_el.get("src") or ""
# Auction detection: presence of s-item__time-left means auction format # Auction detection via time-left text patterns in card spans
time_el = item.select_one("span.s-item__time-left") time_remaining = None
time_remaining = _parse_time_left(time_el.text) if time_el else None for span in item.select("span.su-styled-text"):
t = span.get_text(strip=True)
td = _parse_time_left(t)
if td:
time_remaining = td
break
buying_format = "auction" if time_remaining is not None else "fixed_price" buying_format = "auction" if time_remaining is not None else "fixed_price"
ends_at = None ends_at = (datetime.now(timezone.utc) + time_remaining).isoformat() if time_remaining else None
if time_remaining is not None:
ends_at = (datetime.now(timezone.utc) + time_remaining).isoformat()
results.append(Listing( results.append(Listing(
platform="ebay", platform="ebay",
platform_listing_id=platform_listing_id, platform_listing_id=platform_listing_id,
title=title_el.text.strip(), title=title_el.get_text(strip=True),
price=price, price=price,
currency="USD", currency="USD",
condition=condition, condition=condition,
seller_platform_id=seller_username, seller_platform_id=seller_username,
url=url, url=url,
photo_urls=[photo_url] if photo_url else [], photo_urls=[photo_url] if photo_url else [],
listing_age_days=0, # not reliably in search HTML listing_age_days=0,
buying_format=buying_format, buying_format=buying_format,
ends_at=ends_at, ends_at=ends_at,
)) ))
@ -162,11 +193,10 @@ def scrape_sellers(html: str) -> dict[str, Seller]:
soup = BeautifulSoup(html, "lxml") soup = BeautifulSoup(html, "lxml")
sellers: dict[str, Seller] = {} sellers: dict[str, Seller] = {}
for item in soup.select("li.s-item"): for item in soup.select("li.s-card"):
seller_el = item.select_one("span.s-item__seller-info-text") if not item.get("data-listingid"):
if not seller_el:
continue continue
username, count, ratio = _parse_seller(seller_el.text) username, count, ratio = _extract_seller_from_card(item)
if username and username not in sellers: if username and username not in sellers:
sellers[username] = Seller( sellers[username] = Seller(
platform="ebay", platform="ebay",
@ -194,17 +224,60 @@ class ScrapedEbayAdapter(PlatformAdapter):
category_history) cause TrustScorer to set score_is_partial=True. category_history) cause TrustScorer to set score_is_partial=True.
""" """
def __init__(self, store: Store, delay: float = 0.5): def __init__(self, store: Store, delay: float = 1.0):
self._store = store self._store = store
self._delay = delay self._delay = delay
self._session = requests.Session()
self._session.headers.update(_HEADERS)
def _get(self, params: dict) -> str: def _get(self, params: dict) -> str:
"""Fetch eBay search HTML via a stealthed Playwright Chromium instance.
Uses Xvfb virtual display (headless=False) to avoid Kasada's headless
detection same pattern as other CF scrapers that face JS challenges.
Results are cached for _HTML_CACHE_TTL seconds so repeated searches
for the same query return immediately without re-scraping.
"""
url = EBAY_SEARCH_URL + "?" + "&".join(f"{k}={v}" for k, v in params.items())
cached = _html_cache.get(url)
if cached and time.time() < cached[1]:
return cached[0]
time.sleep(self._delay) time.sleep(self._delay)
resp = self._session.get(EBAY_SEARCH_URL, params=params, timeout=15)
resp.raise_for_status() import subprocess, os
return resp.text display_num = next(_display_counter)
display = f":{display_num}"
xvfb = subprocess.Popen(
["Xvfb", display, "-screen", "0", "1280x800x24"],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
)
env = os.environ.copy()
env["DISPLAY"] = display
try:
with sync_playwright() as pw:
browser = pw.chromium.launch(
headless=False,
env=env,
args=["--no-sandbox", "--disable-dev-shm-usage"],
)
ctx = browser.new_context(
user_agent=_HEADERS["User-Agent"],
viewport={"width": 1280, "height": 800},
)
page = ctx.new_page()
Stealth().apply_stealth_sync(page)
page.goto(url, wait_until="domcontentloaded", timeout=30_000)
page.wait_for_timeout(2000) # let any JS challenges resolve
html = page.content()
browser.close()
finally:
xvfb.terminate()
xvfb.wait()
_html_cache[url] = (html, time.time() + _HTML_CACHE_TTL)
return html
def search(self, query: str, filters: SearchFilters) -> list[Listing]: def search(self, query: str, filters: SearchFilters) -> list[Listing]:
params: dict = {"_nkw": query, "_sop": "15", "_ipg": "48"} params: dict = {"_nkw": query, "_sop": "15", "_ipg": "48"}
@ -226,8 +299,7 @@ class ScrapedEbayAdapter(PlatformAdapter):
listings = scrape_listings(html) listings = scrape_listings(html)
# Cache seller objects extracted from the same page # Cache seller objects extracted from the same page
for seller in scrape_sellers(html).values(): self._store.save_sellers(list(scrape_sellers(html).values()))
self._store.save_seller(seller)
return listings return listings

View file

@ -11,6 +11,10 @@ try:
except ImportError: except ImportError:
_IMAGEHASH_AVAILABLE = False _IMAGEHASH_AVAILABLE = False
# Module-level phash cache: url → hash string (or None on failure).
# Avoids re-downloading the same eBay CDN image on repeated searches.
_phash_cache: dict[str, Optional[str]] = {}
class PhotoScorer: class PhotoScorer:
""" """
@ -52,13 +56,17 @@ class PhotoScorer:
def _fetch_hash(self, url: str) -> Optional[str]: def _fetch_hash(self, url: str) -> Optional[str]:
if not url: if not url:
return None return None
if url in _phash_cache:
return _phash_cache[url]
try: try:
resp = requests.get(url, timeout=5, stream=True) resp = requests.get(url, timeout=5, stream=True)
resp.raise_for_status() resp.raise_for_status()
img = Image.open(io.BytesIO(resp.content)) img = Image.open(io.BytesIO(resp.content))
return str(imagehash.phash(img)) result: Optional[str] = str(imagehash.phash(img))
except Exception: except Exception:
return None result = None
_phash_cache[url] = result
return result
def _url_dedup(self, photo_urls_per_listing: list[list[str]]) -> list[bool]: def _url_dedup(self, photo_urls_per_listing: list[list[str]]) -> list[bool]:
seen: set[str] = set() seen: set[str] = set()

View file

@ -1,9 +1,21 @@
services: services:
snipe: api:
build:
context: ..
dockerfile: snipe/Dockerfile
network_mode: host
volumes: volumes:
- ../circuitforge-core:/app/circuitforge-core - ../circuitforge-core:/app/circuitforge-core
- ./streamlit_app.py:/app/snipe/streamlit_app.py - ./api:/app/snipe/api
- ./app:/app/snipe/app - ./app:/app/snipe/app
- ./data:/app/snipe/data - ./data:/app/snipe/data
- ./tests:/app/snipe/tests
environment: environment:
- STREAMLIT_SERVER_RUN_ON_SAVE=true - RELOAD=true
web:
build:
context: .
dockerfile: docker/web/Dockerfile
volumes:
- ./web/src:/app/src # not used at runtime but keeps override valid

View file

@ -4,8 +4,6 @@ WORKDIR /app
COPY web/package*.json ./ COPY web/package*.json ./
RUN npm ci --prefer-offline RUN npm ci --prefer-offline
COPY web/ ./ COPY web/ ./
ARG VITE_BASE_URL=/snipe/
ENV VITE_BASE_URL=${VITE_BASE_URL}
RUN npm run build RUN npm run build
# Stage 2: serve # Stage 2: serve

View file

@ -5,6 +5,13 @@ server {
root /usr/share/nginx/html; root /usr/share/nginx/html;
index index.html; index index.html;
# Proxy API requests to the FastAPI backend container
location /api/ {
proxy_pass http://172.17.0.1:8510; # Docker host bridge IP api runs network_mode:host
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
}
# index.html never cache; ensures clients always get the latest entry point # index.html never cache; ensures clients always get the latest entry point
# after a deployment (chunks are content-hashed so they can be cached forever) # after a deployment (chunks are content-hashed so they can be cached forever)
location = /index.html { location = /index.html {

View file

@ -2,11 +2,22 @@
set -euo pipefail set -euo pipefail
SERVICE=snipe SERVICE=snipe
PORT=8509 # Vue web UI (nginx) PORT=8509 # Vue web UI (nginx)
API_PORT=8510 # FastAPI
COMPOSE_FILE="compose.yml" COMPOSE_FILE="compose.yml"
usage() { usage() {
echo "Usage: $0 {start|stop|restart|status|logs|open|update}" echo "Usage: $0 {start|stop|restart|status|logs|open|build|update|test}"
echo ""
echo " start Build (if needed) and start all services"
echo " stop Stop and remove containers"
echo " restart Stop then start"
echo " status Show running containers"
echo " logs Follow logs (logs api | logs web | logs — defaults to all)"
echo " open Open web UI in browser"
echo " build Rebuild Docker images without cache"
echo " update Pull latest images and rebuild"
echo " test Run pytest test suite in the api container"
exit 1 exit 1
} }
@ -16,28 +27,45 @@ shift || true
case "$cmd" in case "$cmd" in
start) start)
docker compose -f "$COMPOSE_FILE" up -d docker compose -f "$COMPOSE_FILE" up -d
echo "$SERVICE started on http://localhost:$PORT" echo "$SERVICE started — web: http://localhost:$PORT api: http://localhost:$API_PORT"
;; ;;
stop) stop)
docker compose -f "$COMPOSE_FILE" down docker compose -f "$COMPOSE_FILE" down --remove-orphans
;; ;;
restart) restart)
docker compose -f "$COMPOSE_FILE" down docker compose -f "$COMPOSE_FILE" down --remove-orphans
docker compose -f "$COMPOSE_FILE" up -d docker compose -f "$COMPOSE_FILE" up -d
echo "$SERVICE restarted on http://localhost:$PORT" echo "$SERVICE restarted http://localhost:$PORT"
;; ;;
status) status)
docker compose -f "$COMPOSE_FILE" ps docker compose -f "$COMPOSE_FILE" ps
;; ;;
logs) logs)
docker compose -f "$COMPOSE_FILE" logs -f "${@:-$SERVICE}" # logs [api|web] — default: all services
target="${1:-}"
if [[ -n "$target" ]]; then
docker compose -f "$COMPOSE_FILE" logs -f "$target"
else
docker compose -f "$COMPOSE_FILE" logs -f
fi
;; ;;
open) open)
xdg-open "http://localhost:$PORT" 2>/dev/null || open "http://localhost:$PORT" xdg-open "http://localhost:$PORT" 2>/dev/null || open "http://localhost:$PORT" 2>/dev/null || \
echo "Open http://localhost:$PORT in your browser"
;;
build)
docker compose -f "$COMPOSE_FILE" build --no-cache
echo "Build complete."
;; ;;
update) update)
docker compose -f "$COMPOSE_FILE" pull docker compose -f "$COMPOSE_FILE" pull
docker compose -f "$COMPOSE_FILE" up -d --build docker compose -f "$COMPOSE_FILE" up -d --build
echo "$SERVICE updated — http://localhost:$PORT"
;;
test)
echo "Running test suite..."
docker compose -f "$COMPOSE_FILE" exec api \
conda run -n job-seeker python -m pytest /app/snipe/tests/ -v "${@}"
;; ;;
*) *)
usage usage

View file

@ -16,11 +16,15 @@ dependencies = [
"python-dotenv>=1.0", "python-dotenv>=1.0",
"beautifulsoup4>=4.12", "beautifulsoup4>=4.12",
"lxml>=5.0", "lxml>=5.0",
"fastapi>=0.111",
"uvicorn[standard]>=0.29",
"playwright>=1.44",
"playwright-stealth>=1.0",
] ]
[tool.setuptools.packages.find] [tool.setuptools.packages.find]
where = ["."] where = ["."]
include = ["app*"] include = ["app*", "api*"]
[tool.pytest.ini_options] [tool.pytest.ini_options]
testpaths = ["tests"] testpaths = ["tests"]

View file

@ -1,55 +1,79 @@
"""Tests for the scraper-based eBay adapter. """Tests for the scraper-based eBay adapter.
Uses a minimal HTML fixture that mirrors eBay's search results structure. Uses a minimal HTML fixture mirroring eBay's current s-card markup.
No HTTP requests are made all tests operate on the pure parsing functions. No HTTP requests are made all tests operate on the pure parsing functions.
""" """
import pytest import pytest
from datetime import timedelta from datetime import timedelta
from app.platforms.ebay.scraper import ( from app.platforms.ebay.scraper import (
scrape_listings, scrape_sellers, _parse_price, _parse_seller, _parse_time_left, scrape_listings,
scrape_sellers,
_parse_price,
_parse_time_left,
_extract_seller_from_card,
) )
from bs4 import BeautifulSoup
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Minimal eBay search results HTML fixture # Minimal eBay search results HTML fixture (li.s-card schema)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
_EBAY_HTML = """ _EBAY_HTML = """
<html><body> <html><body>
<ul class="srp-results"> <ul class="srp-results">
<!-- eBay injects this ghost item first should be skipped --> <!-- Promo item: no data-listingid must be skipped -->
<li class="s-item"> <li class="s-card">
<div class="s-item__title"><span>Shop on eBay</span></div> <div class="s-card__title">Shop on eBay</div>
<a class="s-item__link" href="https://ebay.com/shop"></a>
</li> </li>
<!-- Real listing 1: established seller, normal price --> <!-- Real listing 1: established seller, used, fixed price -->
<li class="s-item"> <li class="s-card" data-listingid="123456789">
<h3 class="s-item__title"><span>RTX 4090 Founders Edition GPU</span></h3> <div class="s-card__title">RTX 4090 Founders Edition GPU</div>
<a class="s-item__link" href="https://www.ebay.com/itm/123456789"></a> <a class="s-card__link" href="https://www.ebay.com/itm/123456789?somequery=1"></a>
<span class="s-item__price">$950.00</span> <span class="s-card__price">$950.00</span>
<span class="SECONDARY_INFO">Used</span> <div class="s-card__subtitle">Used · Free shipping</div>
<div class="s-item__image-wrapper"><img src="https://i.ebayimg.com/thumbs/1.jpg"/></div> <img class="s-card__image" src="https://i.ebayimg.com/thumbs/1.jpg"/>
<span class="s-item__seller-info-text">techguy (1,234) 99.1% positive feedback</span> <span class="su-styled-text">techguy</span>
<span class="su-styled-text">99.1% positive (1,234)</span>
</li> </li>
<!-- Real listing 2: price range, new condition --> <!-- Real listing 2: price range, new, data-src photo -->
<li class="s-item"> <li class="s-card" data-listingid="987654321">
<h3 class="s-item__title"><span>RTX 4090 Gaming OC 24GB</span></h3> <div class="s-card__title">RTX 4090 Gaming OC 24GB</div>
<a class="s-item__link" href="https://www.ebay.com/itm/987654321"></a> <a class="s-card__link" href="https://www.ebay.com/itm/987654321"></a>
<span class="s-item__price">$1,100.00 to $1,200.00</span> <span class="s-card__price">$1,100.00 to $1,200.00</span>
<span class="SECONDARY_INFO">New</span> <div class="s-card__subtitle">New · Free shipping</div>
<div class="s-item__image-wrapper"><img data-src="https://i.ebayimg.com/thumbs/2.jpg" src=""/></div> <img class="s-card__image" data-src="https://i.ebayimg.com/thumbs/2.jpg" src=""/>
<span class="s-item__seller-info-text">gpu_warehouse (450) 98.7% positive feedback</span> <span class="su-styled-text">gpu_warehouse</span>
<span class="su-styled-text">98.7% positive (450)</span>
</li> </li>
<!-- Real listing 3: low feedback seller, suspicious price --> <!-- Real listing 3: new account, suspicious price -->
<li class="s-item"> <li class="s-card" data-listingid="555000111">
<h3 class="s-item__title"><span>RTX 4090 BNIB Sealed</span></h3> <div class="s-card__title">RTX 4090 BNIB Sealed</div>
<a class="s-item__link" href="https://www.ebay.com/itm/555000111"></a> <a class="s-card__link" href="https://www.ebay.com/itm/555000111"></a>
<span class="s-item__price">$499.00</span> <span class="s-card__price">$499.00</span>
<span class="SECONDARY_INFO">New</span> <div class="s-card__subtitle">New</div>
<div class="s-item__image-wrapper"><img src="https://i.ebayimg.com/thumbs/3.jpg"/></div> <img class="s-card__image" src="https://i.ebayimg.com/thumbs/3.jpg"/>
<span class="s-item__seller-info-text">new_user_2024 (2) 100.0% positive feedback</span> <span class="su-styled-text">new_user_2024</span>
<span class="su-styled-text">100.0% positive (2)</span>
</li>
</ul>
</body></html>
"""
_AUCTION_HTML = """
<html><body>
<ul class="srp-results">
<li class="s-card" data-listingid="777000999">
<div class="s-card__title">Vintage Leica M6 Camera Body</div>
<a class="s-card__link" href="https://www.ebay.com/itm/777000999"></a>
<span class="s-card__price">$450.00</span>
<div class="s-card__subtitle">Used</div>
<img class="s-card__image" src="https://i.ebayimg.com/thumbs/cam.jpg"/>
<span class="su-styled-text">camera_dealer</span>
<span class="su-styled-text">97.5% positive (800)</span>
<span class="su-styled-text">2h 30m left</span>
</li> </li>
</ul> </ul>
</body></html> </body></html>
@ -57,7 +81,7 @@ _EBAY_HTML = """
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Unit tests: pure parsing functions # _parse_price
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
class TestParsePrice: class TestParsePrice:
@ -70,141 +94,189 @@ class TestParsePrice:
def test_price_with_commas(self): def test_price_with_commas(self):
assert _parse_price("$1,100.00") == 1100.0 assert _parse_price("$1,100.00") == 1100.0
def test_price_per_ea(self):
assert _parse_price("$1,234.56/ea") == 1234.56
def test_empty_returns_zero(self): def test_empty_returns_zero(self):
assert _parse_price("") == 0.0 assert _parse_price("") == 0.0
class TestParseSeller: # ---------------------------------------------------------------------------
def test_standard_format(self): # _extract_seller_from_card
username, count, ratio = _parse_seller("techguy (1,234) 99.1% positive feedback") # ---------------------------------------------------------------------------
class TestExtractSellerFromCard:
def _card(self, html: str):
return BeautifulSoup(html, "lxml").select_one("li.s-card")
def test_standard_card(self):
card = self._card("""
<li class="s-card" data-listingid="1">
<span class="su-styled-text">techguy</span>
<span class="su-styled-text">99.1% positive (1,234)</span>
</li>""")
username, count, ratio = _extract_seller_from_card(card)
assert username == "techguy" assert username == "techguy"
assert count == 1234 assert count == 1234
assert ratio == pytest.approx(0.991, abs=0.001) assert ratio == pytest.approx(0.991, abs=0.001)
def test_low_count(self): def test_new_account(self):
username, count, ratio = _parse_seller("new_user_2024 (2) 100.0% positive feedback") card = self._card("""
<li class="s-card" data-listingid="2">
<span class="su-styled-text">new_user_2024</span>
<span class="su-styled-text">100.0% positive (2)</span>
</li>""")
username, count, ratio = _extract_seller_from_card(card)
assert username == "new_user_2024" assert username == "new_user_2024"
assert count == 2 assert count == 2
assert ratio == pytest.approx(1.0, abs=0.001) assert ratio == pytest.approx(1.0, abs=0.001)
def test_fallback_on_malformed(self): def test_no_feedback_span_returns_empty(self):
username, count, ratio = _parse_seller("weirdformat") card = self._card("""
assert username == "weirdformat" <li class="s-card" data-listingid="3">
<span class="su-styled-text">some_seller</span>
</li>""")
username, count, ratio = _extract_seller_from_card(card)
assert username == ""
assert count == 0 assert count == 0
assert ratio == 0.0 assert ratio == 0.0
# ---------------------------------------------------------------------------
# Integration tests: HTML fixture → domain objects
# ---------------------------------------------------------------------------
class TestScrapeListings:
def test_skips_shop_on_ebay_ghost(self):
listings = scrape_listings(_EBAY_HTML)
titles = [l.title for l in listings]
assert all("Shop on eBay" not in t for t in titles)
def test_parses_three_real_listings(self):
listings = scrape_listings(_EBAY_HTML)
assert len(listings) == 3
def test_extracts_platform_listing_id_from_url(self):
listings = scrape_listings(_EBAY_HTML)
assert listings[0].platform_listing_id == "123456789"
assert listings[1].platform_listing_id == "987654321"
def test_price_range_takes_lower(self):
listings = scrape_listings(_EBAY_HTML)
assert listings[1].price == 1100.0
def test_condition_lowercased(self):
listings = scrape_listings(_EBAY_HTML)
assert listings[0].condition == "used"
assert listings[1].condition == "new"
def test_photo_prefers_data_src(self):
listings = scrape_listings(_EBAY_HTML)
# Listing 2 has data-src set, src empty
assert listings[1].photo_urls == ["https://i.ebayimg.com/thumbs/2.jpg"]
def test_seller_platform_id_set(self):
listings = scrape_listings(_EBAY_HTML)
assert listings[0].seller_platform_id == "techguy"
assert listings[2].seller_platform_id == "new_user_2024"
class TestScrapeSellers:
def test_extracts_three_sellers(self):
sellers = scrape_sellers(_EBAY_HTML)
assert len(sellers) == 3
def test_feedback_count_and_ratio(self):
sellers = scrape_sellers(_EBAY_HTML)
assert sellers["techguy"].feedback_count == 1234
assert sellers["techguy"].feedback_ratio == pytest.approx(0.991, abs=0.001)
def test_account_age_is_zero(self):
"""account_age_days is always 0 from scraper — signals partial score."""
sellers = scrape_sellers(_EBAY_HTML)
assert all(s.account_age_days == 0 for s in sellers.values())
def test_category_history_is_empty(self):
"""category_history_json is always '{}' from scraper — signals partial score."""
sellers = scrape_sellers(_EBAY_HTML)
assert all(s.category_history_json == "{}" for s in sellers.values())
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# _parse_time_left # _parse_time_left
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
class TestParseTimeLeft: class TestParseTimeLeft:
def test_days_hours(self): def test_days_and_hours(self):
td = _parse_time_left("3d 14h left") assert _parse_time_left("3d 14h left") == timedelta(days=3, hours=14)
assert td == timedelta(days=3, hours=14)
def test_hours_minutes(self): def test_hours_and_minutes(self):
td = _parse_time_left("14h 23m left") assert _parse_time_left("14h 23m left") == timedelta(hours=14, minutes=23)
assert td == timedelta(hours=14, minutes=23)
def test_minutes_seconds(self): def test_minutes_and_seconds(self):
td = _parse_time_left("23m 45s left") assert _parse_time_left("23m 45s left") == timedelta(minutes=23, seconds=45)
assert td == timedelta(minutes=23, seconds=45)
def test_days_only(self): def test_days_only(self):
td = _parse_time_left("2d left") assert _parse_time_left("2d left") == timedelta(days=2)
assert td == timedelta(days=2)
def test_no_match_returns_none(self): def test_no_match_returns_none(self):
assert _parse_time_left("Buy It Now") is None assert _parse_time_left("Buy It Now") is None
def test_empty_string_returns_none(self): def test_empty_returns_none(self):
assert _parse_time_left("") is None assert _parse_time_left("") is None
def test_all_zeros_returns_none(self): def test_all_zeros_returns_none(self):
# Regex can match "0d 0h 0m 0s left" — should treat as no time left = None
assert _parse_time_left("0d 0h 0m 0s left") is None assert _parse_time_left("0d 0h 0m 0s left") is None
def test_auction_listing_sets_ends_at(self):
"""scrape_listings should set ends_at for an auction item.""" # ---------------------------------------------------------------------------
auction_html = """ # scrape_listings
<html><body><ul class="srp-results"> # ---------------------------------------------------------------------------
<li class="s-item">
<h3 class="s-item__title"><span>Test Item</span></h3> class TestScrapeListings:
<a class="s-item__link" href="https://www.ebay.com/itm/999"></a> def test_skips_promo_without_listingid(self):
<span class="s-item__price">$100.00</span> listings = scrape_listings(_EBAY_HTML)
<span class="s-item__time-left">2h 30m left</span> titles = [l.title for l in listings]
</li> assert "Shop on eBay" not in titles
</ul></body></html>
""" def test_parses_three_real_listings(self):
listings = scrape_listings(auction_html) assert len(scrape_listings(_EBAY_HTML)) == 3
def test_platform_listing_id_from_data_attribute(self):
listings = scrape_listings(_EBAY_HTML)
assert listings[0].platform_listing_id == "123456789"
assert listings[1].platform_listing_id == "987654321"
assert listings[2].platform_listing_id == "555000111"
def test_url_strips_query_string(self):
listings = scrape_listings(_EBAY_HTML)
assert "?" not in listings[0].url
assert listings[0].url == "https://www.ebay.com/itm/123456789"
def test_price_range_takes_lower(self):
assert scrape_listings(_EBAY_HTML)[1].price == 1100.0
def test_condition_extracted_and_lowercased(self):
listings = scrape_listings(_EBAY_HTML)
assert listings[0].condition == "used"
assert listings[1].condition == "new"
def test_photo_prefers_data_src_over_src(self):
# Listing 2 has data-src set, src is empty
assert scrape_listings(_EBAY_HTML)[1].photo_urls == ["https://i.ebayimg.com/thumbs/2.jpg"]
def test_photo_falls_back_to_src(self):
assert scrape_listings(_EBAY_HTML)[0].photo_urls == ["https://i.ebayimg.com/thumbs/1.jpg"]
def test_seller_platform_id_from_card(self):
listings = scrape_listings(_EBAY_HTML)
assert listings[0].seller_platform_id == "techguy"
assert listings[2].seller_platform_id == "new_user_2024"
def test_platform_is_ebay(self):
assert all(l.platform == "ebay" for l in scrape_listings(_EBAY_HTML))
def test_currency_is_usd(self):
assert all(l.currency == "USD" for l in scrape_listings(_EBAY_HTML))
def test_fixed_price_no_ends_at(self):
listings = scrape_listings(_EBAY_HTML)
assert all(l.ends_at is None for l in listings)
assert all(l.buying_format == "fixed_price" for l in listings)
def test_auction_sets_buying_format_and_ends_at(self):
listings = scrape_listings(_AUCTION_HTML)
assert len(listings) == 1 assert len(listings) == 1
assert listings[0].buying_format == "auction" assert listings[0].buying_format == "auction"
assert listings[0].ends_at is not None assert listings[0].ends_at is not None
def test_fixed_price_listing_no_ends_at(self): def test_empty_html_returns_empty_list(self):
"""scrape_listings should leave ends_at=None for fixed-price items.""" assert scrape_listings("<html><body></body></html>") == []
listings = scrape_listings(_EBAY_HTML)
fixed = [l for l in listings if l.buying_format == "fixed_price"]
assert len(fixed) > 0 # ---------------------------------------------------------------------------
assert all(l.ends_at is None for l in fixed) # scrape_sellers
# ---------------------------------------------------------------------------
class TestScrapeSellers:
def test_extracts_three_sellers(self):
assert len(scrape_sellers(_EBAY_HTML)) == 3
def test_feedback_count_and_ratio(self):
sellers = scrape_sellers(_EBAY_HTML)
assert sellers["techguy"].feedback_count == 1234
assert sellers["techguy"].feedback_ratio == pytest.approx(0.991, abs=0.001)
def test_deduplicates_sellers(self):
# Same seller appearing in two cards should only produce one Seller object
html = """<html><body><ul>
<li class="s-card" data-listingid="1">
<div class="s-card__title">Item A</div>
<a class="s-card__link" href="https://www.ebay.com/itm/1"></a>
<span class="su-styled-text">repeatguy</span>
<span class="su-styled-text">99.0% positive (500)</span>
</li>
<li class="s-card" data-listingid="2">
<div class="s-card__title">Item B</div>
<a class="s-card__link" href="https://www.ebay.com/itm/2"></a>
<span class="su-styled-text">repeatguy</span>
<span class="su-styled-text">99.0% positive (500)</span>
</li>
</ul></body></html>"""
sellers = scrape_sellers(html)
assert len(sellers) == 1
assert "repeatguy" in sellers
def test_account_age_always_zero(self):
"""account_age_days is 0 from scraper — causes score_is_partial=True."""
sellers = scrape_sellers(_EBAY_HTML)
assert all(s.account_age_days == 0 for s in sellers.values())
def test_category_history_always_empty(self):
"""category_history_json is '{}' from scraper — causes score_is_partial=True."""
sellers = scrape_sellers(_EBAY_HTML)
assert all(s.category_history_json == "{}" for s in sellers.values())
def test_platform_is_ebay(self):
sellers = scrape_sellers(_EBAY_HTML)
assert all(s.platform == "ebay" for s in sellers.values())

View file

@ -183,8 +183,17 @@ h1, h2, h3, h4, h5, h6 {
/* Auction de-emphasis /* Auction de-emphasis
Auctions with >1h remaining have fluid prices de-emphasise Auctions with >1h remaining have fluid prices de-emphasise
the current price to avoid anchoring on a misleading figure. the card and current price to avoid anchoring on a misleading figure.
*/ */
.listing-card--auction {
opacity: 0.72;
border-color: var(--color-border-light);
}
.listing-card--auction:hover {
opacity: 1;
}
.auction-price--live { .auction-price--live {
opacity: 0.55; opacity: 0.55;
font-style: italic; font-style: italic;

View file

@ -3,7 +3,7 @@
class="listing-card" class="listing-card"
:class="{ :class="{
'steal-card': isSteal, 'steal-card': isSteal,
'listing-card--auction': isAuction, 'listing-card--auction': isAuction && hoursRemaining !== null && hoursRemaining > 1,
}" }"
> >
<!-- Thumbnail --> <!-- Thumbnail -->

View file

@ -4,7 +4,7 @@ import UnoCSS from 'unocss/vite'
export default defineConfig({ export default defineConfig({
plugins: [vue(), UnoCSS()], plugins: [vue(), UnoCSS()],
base: process.env.VITE_BASE_URL ?? '/snipe/', base: process.env.VITE_BASE_URL ?? '/',
server: { server: {
host: '0.0.0.0', host: '0.0.0.0',
port: 5174, port: 5174,