"""One-shot Mercari probe using the same headed Chromium + Xvfb + stealth stack as the eBay scraper. Run inside the snipe-api container: docker exec -it snipe-api-1 python /app/scripts/probe_mercari.py """ from __future__ import annotations import itertools import os import subprocess import sys import time _display_counter = itertools.count(200) _CHROMIUM_ARGS = ["--no-sandbox", "--disable-dev-shm-usage"] _USER_AGENT = ( "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" ) SEARCH_URL = "https://www.mercari.com/search/?keyword=rtx+4090" # Give Cloudflare challenge time to resolve (if it does) WAIT_MS = 8_000 def probe(url: str) -> str: from playwright.sync_api import sync_playwright from playwright_stealth import Stealth display_num = next(_display_counter) display = f":{display_num}" env = os.environ.copy() env["DISPLAY"] = display xvfb = subprocess.Popen( ["Xvfb", display, "-screen", "0", "1280x800x24"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) time.sleep(0.5) try: with sync_playwright() as pw: browser = pw.chromium.launch( headless=False, env=env, args=_CHROMIUM_ARGS, ) ctx = browser.new_context( user_agent=_USER_AGENT, viewport={"width": 1280, "height": 800}, ) page = ctx.new_page() Stealth().apply_stealth_sync(page) print(f"[probe] Navigating to {url} …", flush=True) response = page.goto(url, wait_until="domcontentloaded", timeout=40_000) print(f"[probe] HTTP status: {response.status if response else 'unknown'}", flush=True) print(f"[probe] Waiting {WAIT_MS}ms for JS / Turnstile …", flush=True) page.wait_for_timeout(WAIT_MS) html = page.content() title = page.title() print(f"[probe] Page title: {title!r}", flush=True) browser.close() finally: xvfb.terminate() xvfb.wait() return html def analyse(html: str) -> None: from bs4 import BeautifulSoup soup = BeautifulSoup(html, "html.parser") # Cloudflare challenge indicators if "Just a moment" in html or "cf-challenge" in html or "turnstile" in html.lower(): print("[result] BLOCKED — Cloudflare Turnstile still active") return print("[result] Cloudflare challenge NOT detected — page appears to have loaded") # Try to find listing cards # Mercari US uses data-testid or item cards in the DOM candidates = [ soup.select("[data-testid='ItemCell']"), soup.select("[data-testid='item-cell']"), soup.select("li[data-testid]"), soup.select(".merList .merListItem"), soup.select("[class*='ItemCell']"), soup.select("[class*='item-cell']"), ] for sel_result in candidates: if sel_result: print(f"[result] Found {len(sel_result)} listing card(s) via selector") card = sel_result[0] print(f"[result] First card snippet:\n{card.prettify()[:800]}") return # Fallback: show body text summary body = soup.find("body") text = body.get_text(separator=" ", strip=True)[:500] if body else html[:500] print(f"[result] No listing cards found. Body text preview:\n{text}") # Save full HTML for manual inspection out = "/tmp/mercari_probe.html" with open(out, "w") as fh: fh.write(html) print(f"[result] Full HTML saved to {out}") if __name__ == "__main__": html = probe(SEARCH_URL) analyse(html)