feat(mercari): Phase 2 — MercariAdapter with Xvfb stability fixes

Implements full Mercari scraping support for the trust-scoring pipeline: - `app/platforms/mercari/` — new MercariAdapter (scraper-based), scraper (parse_search_html / parse_listing_html), and __init__ - `app/platforms/__init__.py` — adds "mercari" to SUPPORTED_PLATFORMS - `api/main.py` — platform routing: _make_adapter, OR-group guard, seller lookup, BTF/Trading API guards all parameterised by platform - `web/src/views/SearchView.vue` — enables Mercari tab in platform picker BrowserPool stability fixes (browser_pool.py): - Add -ac flag to Xvfb (disables X11 auth requirement in Docker containers) - Shift display counter from :100-:199 to :200-:399 (avoids ghost kernel socket conflicts with low-numbered displays) - Add wait_for_selector / wait_for_timeout_ms params to fetch_html, _fetch_with_slot, _fetch_fresh - Add time.sleep(0.3) in _fetch_fresh after Xvfb start (was missing) Mercari scraper fix: - Remove sortBy=SORT_SCORE from build_search_url — that param is deprecated on Mercari and causes an empty 85KB response instead of search results Probe + debug scripts in scripts/: - probe_mercari.py — standalone Cloudflare bypass test - debug_fetch_fresh.py — pool simulation diagnostic Trust signal coverage: feedback_count, feedback_ratio partial score (account_age_days, category_history absent = score_is_partial=True). get_completed_sales stubbed for Phase 3. Tracks: snipe#53 (pool thread-safety fix, follow-up)
2026-05-03 18:39:25 -07:00 · 2026-05-03 18:39:25 -07:00 · 15996472b7
commit 15996472b7
parent f48f8ef80f
9 changed files with 677 additions and 100 deletions
--- a/api/main.py
+++ b/api/main.py
@ -664,22 +664,22 @@ def _try_trading_api_enrichment(
    return enriched


-def _make_adapter(shared_store: Store, force: str = "auto"):
-    """Return the appropriate adapter.
+def _make_adapter(shared_store: Store, force: str = "auto", platform: str = "ebay"):
+    """Return the appropriate adapter for the given platform.

-    force: "auto" | "api" | "scraper"
+    force: "auto" | "api" | "scraper"  (ignored for non-eBay platforms)
      auto    — API if creds present, else scraper
      api     — Browse API (raises if no creds)
      scraper — Playwright scraper regardless of creds

    Adapters receive shared_store because they only read/write sellers and
    market_comps — never listings. Listings are returned and saved by the caller.
-
-    # Platform registry — add new adapters here as platforms are implemented.
-    # _make_adapter() currently handles eBay only. Phase 2 will add:
-    #   "mercari": MercariAdapter
-    #   "poshmark": PoshmarkAdapter
    """
+    if platform == "mercari":
+        from app.platforms.mercari import MercariAdapter
+        return MercariAdapter(shared_store)
+
+    # eBay
    client_id, client_secret, env = _ebay_creds()
    has_creds = bool(client_id and client_secret)

@ -696,8 +696,10 @@ def _make_adapter(shared_store: Store, force: str = "auto"):
    return ScrapedEbayAdapter(shared_store)


-def _adapter_name(force: str = "auto") -> str:
+def _adapter_name(force: str = "auto", platform: str = "ebay") -> str:
    """Return the name of the adapter that would be used — without creating it."""
+    if platform != "ebay":
+        return platform
    client_id, client_secret, _ = _ebay_creds()
    if force == "scraper":
        return "scraper"
@ -735,7 +737,7 @@ def search(
        q = ebay_item_id

    if not q.strip():
-        return {"listings": [], "trust_scores": {}, "sellers": {}, "market_price": None, "adapter_used": _adapter_name(adapter)}
+        return {"listings": [], "trust_scores": {}, "sellers": {}, "market_price": None, "adapter_used": _adapter_name(adapter, platform=platform)}

    # Cap pages to the tier's maximum — free cloud users get 1 page, local gets unlimited.
    features = compute_features(session.tier)
@ -743,9 +745,8 @@ def search(

    must_exclude_terms = _parse_terms(must_exclude)

-    # In Groups mode, expand OR groups into multiple targeted eBay queries to
-    # guarantee comprehensive result coverage — eBay relevance won't silently drop variants.
-    if must_include_mode == "groups" and must_include.strip():
+    # OR-group expansion is eBay-specific; other platforms use the base query directly.
+    if platform == "ebay" and must_include_mode == "groups" and must_include.strip():
        or_groups = parse_groups(must_include)
        ebay_queries = expand_queries(q, or_groups)
    else:
@ -772,7 +773,7 @@ def search(
        category_id=category_id.strip() or None,
    )

-    adapter_used = _adapter_name(adapter)
+    adapter_used = _adapter_name(adapter, platform=platform)

    shared_db = session.shared_db
    user_db = session.user_db
@ -832,11 +833,11 @@ def search(
            }
            seller_map = {
                listing.seller_platform_id: dataclasses.asdict(
-                    shared_store.get_seller("ebay", listing.seller_platform_id)
+                    shared_store.get_seller(platform, listing.seller_platform_id)
                )
                for listing in listings
                if listing.seller_platform_id
-                and shared_store.get_seller("ebay", listing.seller_platform_id)
+                and shared_store.get_seller(platform, listing.seller_platform_id)
            }

            _is_unauthed = session.user_id == "anonymous" or session.user_id.startswith("guest:")
@ -890,11 +891,11 @@ def search(

    # Each thread creates its own Store — sqlite3 check_same_thread=True.
    def _run_search(ebay_query: str) -> list:
-        return _make_adapter(Store(shared_db), adapter).search(ebay_query, base_filters)
+        return _make_adapter(Store(shared_db), adapter, platform=platform).search(ebay_query, base_filters)

    def _run_comps() -> None:
        try:
-            _make_adapter(Store(shared_db), adapter).get_completed_sales(comp_query, pages)
+            _make_adapter(Store(shared_db), adapter, platform=platform).get_completed_sales(comp_query, pages)
        except Exception:
            log.warning("comps: unhandled exception for %r", comp_query, exc_info=True)

@ -943,25 +944,23 @@ def search(

        user_store.save_listings(listings)

-        # Derive category_history from accumulated listing data — free for API adapter
-        # (category_name comes from Browse API response), no-op for scraper listings (category_name=None).
-        # Reads listings from user_store, writes seller categories to shared_store.
+        # Derive category_history from accumulated listing data — eBay only
+        # (category_name comes from Browse API response; other platforms return None).
        seller_ids = list({l.seller_platform_id for l in listings if l.seller_platform_id})
+        if platform == "ebay":
            n_cat = shared_store.refresh_seller_categories("ebay", seller_ids, listing_store=user_store)
            if n_cat:
                log.info("Category history derived for %d sellers from listing data", n_cat)

        # Re-fetch to hydrate staging fields (times_seen, first_seen_at, id, price_at_first_seen)
        # that are only available from the DB after the upsert.
-        staged = user_store.get_listings_staged("ebay", [l.platform_listing_id for l in listings])
+        staged = user_store.get_listings_staged(platform, [l.platform_listing_id for l in listings])
        listings = [staged.get(l.platform_listing_id, l) for l in listings]

-        # Trading API enrichment: if the user has connected their eBay account, use
-        # Trading API GetUser to instantly fill account_age_days for sellers missing it.
-        # This is synchronous (~200ms per seller) but only runs for sellers that need
-        # enrichment — typically a small subset. Sellers resolved here are excluded from
-        # the slower BTF Playwright background pass.
-        _main_adapter = _make_adapter(shared_store, adapter)
+        # Trading API enrichment and BTF scraping are eBay-specific.
+        _main_adapter = _make_adapter(shared_store, adapter, platform=platform)
+        trading_api_enriched: set[str] = set()
+        if platform == "ebay":
            sellers_needing_age = [
                l.seller_platform_id for l in listings
                if l.seller_platform_id
@ -975,9 +974,7 @@ def search(
                _main_adapter, sellers_needing_age, user_db
            )

-        # BTF enrichment: scrape /itm/ pages for sellers still missing account_age_days
-        # after the Trading API pass. Runs in the background so it doesn't delay the
-        # response. Live score updates are pushed to the pre-registered SSE queue.
+            # BTF enrichment: scrape /itm/ pages for sellers still missing account_age_days.
            _trigger_scraper_enrichment(
                listings, shared_store, shared_db,
                user_db=user_db, query=comp_query, session_id=session_id,
@ -996,7 +993,7 @@ def search(
            _enqueue_vision_tasks(listings, trust_scores_list, session)

        query_hash = hashlib.md5(comp_query.encode()).hexdigest()
-        comp = shared_store.get_market_comp("ebay", query_hash)
+        comp = shared_store.get_market_comp(platform, query_hash)
        market_price = comp.median_price if comp else None

        # Store raw listings (as dicts) + market_price in cache.
@ -1015,11 +1012,11 @@ def search(
        }
        seller_map = {
            listing.seller_platform_id: dataclasses.asdict(
-                shared_store.get_seller("ebay", listing.seller_platform_id)
+                shared_store.get_seller(platform, listing.seller_platform_id)
            )
            for listing in listings
            if listing.seller_platform_id
-            and shared_store.get_seller("ebay", listing.seller_platform_id)
+            and shared_store.get_seller(platform, listing.seller_platform_id)
        }

        # Build a preference reader for affiliate URL wrapping.
@ -1123,7 +1120,7 @@ def search_async(
            "trust_scores": {},
            "sellers": {},
            "market_price": None,
-            "adapter_used": _adapter_name(adapter),
+            "adapter_used": _adapter_name(adapter, platform=platform),
            "affiliate_active": bool(os.environ.get("EBAY_AFFILIATE_CAMPAIGN_ID", "").strip()),
        })
        _update_queues[empty_id].put(None)
@ -1152,7 +1149,8 @@ def search_async(
        q_norm = q  # captured from outer scope
        must_exclude_terms = _parse_terms(must_exclude)

-        if must_include_mode == "groups" and must_include.strip():
+        # OR-group expansion is eBay-specific; other platforms use the base query directly.
+        if platform == "ebay" and must_include_mode == "groups" and must_include.strip():
            or_groups = parse_groups(must_include)
            ebay_queries = expand_queries(q_norm, or_groups)
        else:
@ -1174,7 +1172,7 @@ def search_async(
            category_id=category_id.strip() or None,
        )

-        adapter_used = _adapter_name(adapter)
+        adapter_used = _adapter_name(adapter, platform=platform)
        q_ref = _update_queues.get(session_id)
        if q_ref is None:
            return  # client disconnected before we even started
@ -1281,11 +1279,11 @@ def search_async(

        try:
            def _run_search(ebay_query: str) -> list:
-                return _make_adapter(Store(_shared_db), adapter).search(ebay_query, base_filters)
+                return _make_adapter(Store(_shared_db), adapter, platform=platform).search(ebay_query, base_filters)

            def _run_comps() -> None:
                try:
-                    _make_adapter(Store(_shared_db), adapter).get_completed_sales(comp_query, pages)
+                    _make_adapter(Store(_shared_db), adapter, platform=platform).get_completed_sales(comp_query, pages)
                except Exception:
                    log.warning("async comps: unhandled exception for %r", comp_query, exc_info=True)

@ -1314,14 +1312,17 @@ def search_async(
            user_store.save_listings(listings)

            seller_ids = list({l.seller_platform_id for l in listings if l.seller_platform_id})
+            if platform == "ebay":
                n_cat = shared_store.refresh_seller_categories("ebay", seller_ids, listing_store=user_store)
                if n_cat:
                    log.info("async_search: category history derived for %d sellers", n_cat)

-            staged = user_store.get_listings_staged("ebay", [l.platform_listing_id for l in listings])
+            staged = user_store.get_listings_staged(platform, [l.platform_listing_id for l in listings])
            listings = [staged.get(l.platform_listing_id, l) for l in listings]

-            _main_adapter = _make_adapter(shared_store, adapter)
+            _main_adapter = _make_adapter(shared_store, adapter, platform=platform)
+            sellers_needing_age: list[str] = []
+            if platform == "ebay":
                sellers_needing_age = [
                    l.seller_platform_id for l in listings
                    if l.seller_platform_id
@ -1331,7 +1332,7 @@ def search_async(
                seen_set: set[str] = set()
                sellers_needing_age = [s for s in sellers_needing_age if not (s in seen_set or seen_set.add(s))]  # type: ignore[func-returns-value]

-            # Use a temporary CloudUser-like object for Trading API enrichment
+            # Use a temporary CloudUser-like object for Trading API enrichment (eBay only)
            from api.cloud_session import CloudUser as _CloudUser
            _session_stub = _CloudUser(
                user_id=_user_id,
@ -1339,6 +1340,8 @@ def search_async(
                shared_db=_shared_db,
                user_db=_user_db,
            )
+            trading_api_enriched: set[str] = set()
+            if platform == "ebay":
                trading_api_enriched = _try_trading_api_enrichment(
                    _main_adapter, sellers_needing_age, _user_db
                )
@ -1353,7 +1356,7 @@ def search_async(
                _enqueue_vision_tasks(listings, trust_scores_list, _session_stub)

            query_hash = _hashlib_local.md5(comp_query.encode()).hexdigest()
-            comp = shared_store.get_market_comp("ebay", query_hash)
+            comp = shared_store.get_market_comp(platform, query_hash)
            market_price = comp.median_price if comp else None

            # Store raw listings + market_price in cache (trust scores excluded).
@ -1369,11 +1372,11 @@ def search_async(
            }
            seller_map = {
                listing.seller_platform_id: dataclasses.asdict(
-                    shared_store.get_seller("ebay", listing.seller_platform_id)
+                    shared_store.get_seller(platform, listing.seller_platform_id)
                )
                for listing in listings
                if listing.seller_platform_id
-                and shared_store.get_seller("ebay", listing.seller_platform_id)
+                and shared_store.get_seller(platform, listing.seller_platform_id)
            }

            _is_unauthed = _user_id == "anonymous" or _user_id.startswith("guest:")
@ -1404,12 +1407,17 @@ def search_async(
                "session_id": session_id,
            })

-            # Kick off background enrichment — it pushes "update" events and the sentinel.
+            # BTF background enrichment is eBay-specific.
+            if platform == "ebay":
                _trigger_scraper_enrichment(
                    listings, shared_store, _shared_db,
                    user_db=_user_db, query=comp_query, session_id=session_id,
                    skip_seller_ids=trading_api_enriched,
                )
+            else:
+                # For non-eBay platforms, push the sentinel directly since there's no
+                # background enrichment pass.
+                _push(None)

        except _sqlite3.OperationalError as e:
            log.warning("async_search DB contention: %s", e)
--- a/app/platforms/init.py
+++ b/app/platforms/init.py
@ -9,7 +9,7 @@ from app.db.models import Listing, Seller

 # Single source of truth for platform validation.
 # Phase 2 will extend this set as new adapters are implemented.
-SUPPORTED_PLATFORMS: frozenset[str] = frozenset({"ebay"})
+SUPPORTED_PLATFORMS: frozenset[str] = frozenset({"ebay", "mercari"})


@dataclass
--- a/app/platforms/ebay/browser_pool.py
+++ b/app/platforms/ebay/browser_pool.py
@ -6,6 +6,7 @@ long-lived Playwright browser instances with fresh contexts ready to serve.
 Key design:
 - Pool slots: ``(xvfb_proc, pw_instance, browser, context, display_num, last_used_ts)``
  One headed Chromium browser per slot — keeps the Kasada fingerprint clean.
+- Display numbering: :200-:399 (avoids host :0 and low-numbered kernel socket conflicts).
 - Thread safety: ``queue.Queue`` with blocking get (timeout=3s before fresh fallback).
 - Replenishment: after each use, the dirty context is closed and a new context is
  opened on the *same* browser, then returned to the queue.  Browser launch overhead
@ -33,15 +34,17 @@ from typing import Optional

 log = logging.getLogger(__name__)

-# Reuse the same display counter namespace as scraper.py to avoid collisions.
-# Pool uses :100-:199; scraper.py fallback uses :200-:299.
-_pool_display_counter = itertools.cycle(range(100, 200))
+# Display counter shared by pool warmup and _fetch_fresh fallback.
+# Range :200-:399 avoids low-numbered displays that may be pre-occupied by
+# the host X server or lingering kernel sockets from previous runs.
+_pool_display_counter = itertools.cycle(range(200, 400))

 _IDLE_TIMEOUT_SECS = 300  # 5 minutes
 _CLEANUP_INTERVAL_SECS = 60
 _QUEUE_TIMEOUT_SECS = 3.0

 _CHROMIUM_ARGS = ["--no-sandbox", "--disable-dev-shm-usage"]
+_XVFB_ARGS = ["-screen", "0", "1280x800x24", "-ac"]  # -ac: disable X auth (safe in isolated Docker)
 _USER_AGENT = (
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
    "(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
@ -74,7 +77,7 @@ def _launch_slot() -> "_PooledBrowser":
    env["DISPLAY"] = display

    xvfb = subprocess.Popen(
-        ["Xvfb", display, "-screen", "0", "1280x800x24"],
+        ["Xvfb", display] + _XVFB_ARGS,
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
    )
@ -230,7 +233,13 @@ class BrowserPool:
    # Core fetch
    # ------------------------------------------------------------------

-    def fetch_html(self, url: str, delay: float = 1.0) -> str:
+    def fetch_html(
+        self,
+        url: str,
+        delay: float = 1.0,
+        wait_for_selector: Optional[str] = None,
+        wait_for_timeout_ms: int = 2000,
+    ) -> str:
        """Navigate to *url* and return the rendered HTML.

        Borrows a browser context from the pool (blocks up to 3s), uses it to
@ -238,6 +247,15 @@ class BrowserPool:

        Falls back to a fully fresh browser if the pool is empty after the
        timeout or if Playwright is unavailable.
+
+        Args:
+            wait_for_selector: CSS/data-testid selector to wait for before capturing
+                HTML (e.g. ``"[data-testid='SearchResults']"``).  When set, the fixed
+                *wait_for_timeout_ms* sleep is skipped — the page is captured as soon
+                as the selector appears (or after 15s timeout, whichever comes first).
+            wait_for_timeout_ms: static post-navigation sleep in ms when
+                *wait_for_selector* is None.  Default 2000; set higher (e.g. 8000)
+                for sites with JS challenge pages (Cloudflare Turnstile).
        """
        time.sleep(delay)

@ -249,7 +267,11 @@ class BrowserPool:

        if slot is not None:
            try:
-                html = self._fetch_with_slot(slot, url)
+                html = self._fetch_with_slot(
+                    slot, url,
+                    wait_for_selector=wait_for_selector,
+                    wait_for_timeout_ms=wait_for_timeout_ms,
+                )
                # Replenish: close dirty context, open fresh one, return to queue.
                try:
                    fresh_slot = _replenish_slot(slot)
@ -264,7 +286,11 @@ class BrowserPool:
                # Fall through to fresh browser below.

        # Fallback: fresh browser (same code as old scraper._fetch_url).
-        return self._fetch_fresh(url)
+        return self._fetch_fresh(
+            url,
+            wait_for_selector=wait_for_selector,
+            wait_for_timeout_ms=wait_for_timeout_ms,
+        )

    # ------------------------------------------------------------------
    # Internal helpers
@ -282,7 +308,13 @@ class BrowserPool:
            self._playwright_available = False
        return self._playwright_available

-    def _fetch_with_slot(self, slot: _PooledBrowser, url: str) -> str:
+    def _fetch_with_slot(
+        self,
+        slot: _PooledBrowser,
+        url: str,
+        wait_for_selector: Optional[str] = None,
+        wait_for_timeout_ms: int = 2000,
+    ) -> str:
        """Open a new page on *slot.ctx*, navigate to *url*, return HTML."""
        from playwright_stealth import Stealth

@ -290,7 +322,13 @@ class BrowserPool:
        try:
            Stealth().apply_stealth_sync(page)
            page.goto(url, wait_until="domcontentloaded", timeout=30_000)
-            page.wait_for_timeout(2000)
+            if wait_for_selector:
+                try:
+                    page.wait_for_selector(wait_for_selector, timeout=15_000)
+                except Exception:
+                    pass  # selector didn't appear; return whatever loaded
+            else:
+                page.wait_for_timeout(wait_for_timeout_ms)
            return page.content()
        finally:
            try:
@ -298,7 +336,12 @@ class BrowserPool:
            except Exception:
                pass

-    def _fetch_fresh(self, url: str) -> str:
+    def _fetch_fresh(
+        self,
+        url: str,
+        wait_for_selector: Optional[str] = None,
+        wait_for_timeout_ms: int = 2000,
+    ) -> str:
        """Launch a fully fresh browser, fetch *url*, close everything."""
        import subprocess as _subprocess

@ -307,7 +350,7 @@ class BrowserPool:
            from playwright_stealth import Stealth
        except ImportError as exc:
            raise RuntimeError(
-                "Playwright not installed — cannot fetch eBay pages. "
+                "Playwright not installed — cannot fetch pages. "
                "Install playwright and playwright-stealth in the Docker image."
            ) from exc

@ -317,10 +360,11 @@ class BrowserPool:
        env["DISPLAY"] = display

        xvfb = _subprocess.Popen(
-            ["Xvfb", display, "-screen", "0", "1280x800x24"],
+            ["Xvfb", display] + _XVFB_ARGS,
            stdout=_subprocess.DEVNULL,
            stderr=_subprocess.DEVNULL,
        )
+        time.sleep(0.3)  # wait for Xvfb to bind the display socket before Chromium starts
        try:
            with sync_playwright() as pw:
                browser = pw.chromium.launch(
@ -335,7 +379,13 @@ class BrowserPool:
                page = ctx.new_page()
                Stealth().apply_stealth_sync(page)
                page.goto(url, wait_until="domcontentloaded", timeout=30_000)
-                page.wait_for_timeout(2000)
+                if wait_for_selector:
+                    try:
+                        page.wait_for_selector(wait_for_selector, timeout=15_000)
+                    except Exception:
+                        pass  # selector didn't appear; return whatever loaded
+                else:
+                    page.wait_for_timeout(wait_for_timeout_ms)
                html = page.content()
                browser.close()
        finally:
--- a/app/platforms/mercari/init.py
+++ b/app/platforms/mercari/init.py
@ -0,0 +1,4 @@
+"""Mercari platform adapter."""
+from app.platforms.mercari.adapter import MercariAdapter
+
+__all__ = ["MercariAdapter"]
--- a/app/platforms/mercari/adapter.py
+++ b/app/platforms/mercari/adapter.py
@ -0,0 +1,173 @@
+"""MercariAdapter — scraper-based Mercari platform adapter.
+
+Trust signal coverage vs eBay:
+  ✅ feedback_count    (NumSales from listing page)
+  ✅ feedback_ratio    (ReviewStarsWrapper data-stars / 5)
+  ❌ account_age_days  (requires seller profile page — future work)
+  ❌ category_history  (not exposed in HTML — future work)
+  ✅ price_vs_market   (computed by trust scorer from comps, same as eBay)
+
+Because account_age and category_history are always None, TrustScore.score_is_partial
+will be True for all Mercari results.  The aggregator handles this correctly
+by scoring only from available signals.
+
+seller_platform_id on Listing objects holds the product_id (e.g. "m86032668393")
+rather than the seller username, because search results don't expose seller identity.
+get_seller() resolves the product_id → seller by fetching the listing page.
+The DB lookup key is (platform="mercari", platform_seller_id=product_id).
+"""
+from __future__ import annotations
+
+import json
+import logging
+import time
+from typing import Optional
+
+from app.db.models import Listing, MarketComp, Seller
+from app.db.store import Store
+from app.platforms import PlatformAdapter, SearchFilters
+from app.platforms.mercari.scraper import (
+    build_search_url,
+    parse_listing_html,
+    parse_search_html,
+)
+
+log = logging.getLogger(__name__)
+
+_SELLER_CACHE_TTL_HOURS = 6
+_BETWEEN_LISTING_FETCH_SECS = 1.5
+
+
+class MercariAdapter(PlatformAdapter):
+    def __init__(self, store: Store) -> None:
+        self._store = store
+
+    def search(self, query: str, filters: SearchFilters) -> list[Listing]:
+        from app.platforms.ebay.browser_pool import get_pool
+
+        url = build_search_url(query, filters.max_price, filters.min_price)
+        log.info("mercari: fetching search URL: %s", url)
+
+        html = get_pool().fetch_html(
+            url,
+            delay=1.0,
+            wait_for_timeout_ms=8000,
+        )
+        raw_listings = parse_search_html(html)
+
+        listings: list[Listing] = []
+        seen: set[str] = set()
+        for raw in raw_listings:
+            pid = raw["product_id"]
+            if pid in seen:
+                continue
+            seen.add(pid)
+            listings.append(_normalise_listing(raw, query))
+
+        log.info("mercari: parsed %d listings for %r", len(listings), query)
+
+        # Client-side keyword filter (mirrors eBay scraper behaviour).
+        if filters.must_include:
+            listings = _apply_keyword_filter(listings, filters.must_include, filters.must_include_mode)
+        if filters.must_exclude:
+            listings = _apply_exclude_filter(listings, filters.must_exclude)
+
+        return listings
+
+    def get_seller(self, seller_platform_id: str) -> Optional[Seller]:
+        """Fetch seller data from the listing page identified by seller_platform_id.
+
+        For Mercari, seller_platform_id is the product_id (e.g. "m86032668393")
+        because seller usernames aren't available from search results HTML.
+        """
+        cached = self._store.get_seller("mercari", seller_platform_id)
+        if cached:
+            return cached
+
+        from app.platforms.ebay.browser_pool import get_pool
+
+        url = f"https://www.mercari.com/us/item/{seller_platform_id}/"
+        try:
+            time.sleep(_BETWEEN_LISTING_FETCH_SECS)
+            html = get_pool().fetch_html(
+                url,
+                delay=0.5,
+                wait_for_timeout_ms=6000,
+            )
+            raw = parse_listing_html(html, seller_platform_id)
+            seller = _normalise_seller(raw)
+            self._store.save_seller(seller)
+            return seller
+        except Exception as exc:
+            log.warning("mercari: get_seller failed for %s: %s", seller_platform_id, exc)
+            return None
+
+    def get_completed_sales(self, query: str, pages: int = 1) -> list[Listing]:
+        """Mercari sold-listing comps — stubbed for Phase 3.
+
+        Mercari exposes sold listings via ?status=ITEM_STATUS_TRADING but the
+        data is sparse.  Phase 3 will implement comp extraction here; for now
+        the trust scorer falls back to price_vs_market=None (partial score).
+        """
+        return []
+
+
+# ---------------------------------------------------------------------------
+# Normalisation helpers
+# ---------------------------------------------------------------------------
+
+def _normalise_listing(raw: dict, query: str) -> Listing:
+    return Listing(
+        platform="mercari",
+        platform_listing_id=raw["product_id"],
+        title=raw["title"],
+        price=raw["price"],
+        currency="USD",
+        condition="",       # not available from search results; get_seller() populates this
+        seller_platform_id=raw["product_id"],  # see module docstring
+        url=raw["url"],
+        photo_urls=[raw["photo_url"]] if raw.get("photo_url") else [],
+        listing_age_days=0,
+        buying_format="fixed_price",
+        category_name=None,
+    )
+
+
+def _normalise_seller(raw: dict) -> Seller:
+    stars = raw.get("stars", 0.0)
+    feedback_ratio = min(stars / 5.0, 1.0) if stars > 0 else 0.0
+
+    return Seller(
+        platform="mercari",
+        platform_seller_id=raw["product_id"],
+        username=raw.get("username", ""),
+        account_age_days=None,           # not available without seller profile page
+        feedback_count=raw.get("num_sales", 0),
+        feedback_ratio=feedback_ratio,
+        category_history_json=json.dumps({}),
+    )
+
+
+def _apply_keyword_filter(listings: list[Listing], must_include: list[str], mode: str) -> list[Listing]:
+    if not must_include:
+        return listings
+
+    def _matches(listing: Listing) -> bool:
+        title = listing.title.lower()
+        if mode == "any":
+            return any(kw.lower() in title for kw in must_include)
+        # "all" (default) and "groups" both require all terms present
+        return all(kw.lower() in title for kw in must_include)
+
+    return [l for l in listings if _matches(l)]
+
+
+def _apply_exclude_filter(listings: list[Listing], must_exclude: list[str]) -> list[Listing]:
+    if not must_exclude:
+        return listings
+
+    def _clean(listing: Listing) -> bool:
+        title = listing.title.lower()
+        return not any(term.lower() in title for term in must_exclude)
+
+    return [l for l in listings if _clean(l)]
--- a/app/platforms/mercari/scraper.py
+++ b/app/platforms/mercari/scraper.py
@ -0,0 +1,165 @@
+"""Mercari search + listing page scraper.
+
+Uses the shared eBay browser pool (headed Chromium + Xvfb + playwright-stealth)
+which already bypasses Cloudflare Turnstile.  Import the pool singleton from
+ebay.browser_pool so both platforms share the same warm Chromium instances.
+
+Seller data is NOT available from search results HTML — only from individual
+listing pages.  The adapter lazily fetches listing pages in get_seller().
+"""
+from __future__ import annotations
+
+import logging
+import re
+from typing import Optional
+from urllib.parse import urlencode
+
+from bs4 import BeautifulSoup, NavigableString
+
+log = logging.getLogger(__name__)
+
+_BASE = "https://www.mercari.com"
+_SEARCH_PATH = "/search/"
+_ITEM_PATH = "/us/item/"
+
+_PRICE_RE = re.compile(r"[\d,]+\.?\d*")
+_POSTED_RE = re.compile(r"(\d{2})/(\d{2})/(\d{2,4})")  # MM/DD/YY or MM/DD/YYYY
+
+
+def build_search_url(query: str, max_price: Optional[float] = None, min_price: Optional[float] = None) -> str:
+    # No explicit sortBy — Mercari's default (relevance) is the most useful order.
+    # "sortBy=SORT_SCORE" was a deprecated value that returns an empty results page.
+    params: dict = {"keyword": query}
+    # Mercari accepts priceMin/priceMax as whole dollar strings (not cents)
+    if min_price is not None and min_price > 0:
+        params["priceMin"] = str(int(min_price))
+    if max_price is not None and max_price > 0:
+        params["priceMax"] = str(int(max_price))
+    return f"{_BASE}{_SEARCH_PATH}?{urlencode(params)}"
+
+
+def parse_search_html(html: str) -> list[dict]:
+    """Parse Mercari search results HTML into a list of raw listing dicts."""
+    soup = BeautifulSoup(html, "html.parser")
+    results: list[dict] = []
+
+    for item in soup.find_all(attrs={"data-testid": "ItemContainer"}):
+        pid = item.get("data-productid", "")
+        if not pid:
+            continue
+
+        parent = item.parent
+        href = parent.get("href") if parent and parent.name == "a" else None
+        url = f"{_BASE}{href}" if href else f"{_BASE}{_ITEM_PATH}{pid}/"
+
+        name_el = item.find(attrs={"data-testid": "ItemName"})
+        title = name_el.get_text(strip=True) if name_el else ""
+
+        price = _extract_current_price(item)
+        img_el = item.find("img")
+        photo_url = img_el.get("src", "") if img_el else ""
+
+        results.append({
+            "product_id": pid,
+            "url": url,
+            "title": title,
+            "price": price,
+            "photo_url": photo_url,
+            "brand": item.get("data-brand", ""),
+            "is_on_sale": item.get("data-is-on-sale") == "true",
+        })
+
+    return results
+
+
+def _extract_current_price(item: BeautifulSoup) -> float:
+    """Return the current (non-strikethrough) price from an ItemContainer."""
+    price_el = item.find(attrs={"data-testid": "ProductThumbItemPrice"})
+    if not price_el:
+        return 0.0
+
+    # Direct text nodes are the current price; the nested span is the original.
+    price_text = "".join(
+        str(c) for c in price_el.children if isinstance(c, NavigableString)
+    ).strip()
+
+    m = _PRICE_RE.search(price_text)
+    if m:
+        try:
+            return float(m.group().replace(",", ""))
+        except ValueError:
+            pass
+    return 0.0
+
+
+def parse_listing_html(html: str, product_id: str) -> dict:
+    """Parse a Mercari listing page into a raw seller dict."""
+    soup = BeautifulSoup(html, "html.parser")
+
+    def _text(testid: str) -> str:
+        el = soup.find(attrs={"data-testid": testid})
+        return el.get_text(strip=True) if el else ""
+
+    username_raw = _text("ItemDetailsSellerUserName")
+    username = username_raw.lstrip("@")
+
+    num_sales = _safe_int(_text("NumSales"))
+    rating_count = _safe_int(_text("SellerRatingCount"))
+
+    stars = 0.0
+    rw = soup.find(attrs={"data-testid": "ReviewStarsWrapper"})
+    if rw:
+        try:
+            stars = float(rw.get("data-stars", 0))
+        except (ValueError, TypeError):
+            pass
+
+    condition = _text("ItemDetailsCondition").lower()
+    posted_text = _text("ItemDetailsPosted")
+    listing_age_days = _parse_listing_age(posted_text)
+
+    price_text = _text("ItemPrice")
+    price = 0.0
+    m = _PRICE_RE.search(price_text.replace(",", ""))
+    if m:
+        try:
+            price = float(m.group())
+        except ValueError:
+            pass
+
+    return {
+        "product_id": product_id,
+        "username": username,
+        "num_sales": num_sales,       # completed sales → maps to feedback_count
+        "rating_count": rating_count,  # number of reviews (additional signal)
+        "stars": stars,                # 0.0–5.0 → divide by 5 = feedback_ratio
+        "condition": condition,
+        "listing_age_days": listing_age_days,
+        "price": price,
+    }
+
+
+def _safe_int(text: str) -> int:
+    m = _PRICE_RE.search(text.replace(",", ""))
+    if m:
+        try:
+            return int(float(m.group()))
+        except ValueError:
+            pass
+    return 0
+
+
+def _parse_listing_age(posted_text: str) -> int:
+    """Convert a posted date like '04/10/26' to days since posted."""
+    from datetime import datetime, timezone
+    m = _POSTED_RE.search(posted_text)
+    if not m:
+        return 0
+    try:
+        month, day, year = int(m.group(1)), int(m.group(2)), int(m.group(3))
+        if year < 100:
+            year += 2000
+        posted = datetime(year, month, day, tzinfo=timezone.utc)
+        return (datetime.now(timezone.utc) - posted).days
+    except (ValueError, OverflowError):
+        return 0
--- a/scripts/debug_fetch_fresh.py
+++ b/scripts/debug_fetch_fresh.py
@ -0,0 +1,64 @@
+"""Reproduce the exact FastAPI code path: pool warmup → slot close → _fetch_fresh.
+
+Run inside the container:
+    docker exec -it snipe-api-1 python /app/snipe/scripts/debug_fetch_fresh.py
+"""
+import sys, time, threading
+sys.path.insert(0, '/app/snipe')
+
+from bs4 import BeautifulSoup
+from app.platforms.ebay.browser_pool import BrowserPool, _close_slot
+
+URL = "https://www.mercari.com/search/?keyword=rtx+4090&sortBy=SORT_SCORE&priceMax=800"
+
+print("=== Test 1: _fetch_fresh with no pool (baseline) ===", flush=True)
+pool0 = BrowserPool(size=0)
+t0 = time.time()
+html = pool0._fetch_fresh(URL, wait_for_timeout_ms=8000)
+items = BeautifulSoup(html, "html.parser").find_all(attrs={"data-testid": "ItemContainer"})
+print(f"Items: {len(items)}, HTML: {len(html)}b, elapsed: {time.time()-t0:.1f}s", flush=True)
+
+print("\n=== Test 2: pool warmup (size=2), grab slot, close it, then _fetch_fresh ===", flush=True)
+pool2 = BrowserPool(size=2)
+
+# Warmup in background (blocks until done)
+warm_done = threading.Event()
+def do_warmup():
+    pool2.start()
+    warm_done.set()
+
+t = threading.Thread(target=do_warmup, daemon=True)
+t.start()
+warm_done.wait(timeout=30)
+print(f"Pool size after warmup: {pool2._q.qsize()}", flush=True)
+
+# Grab a slot and close it (simulating the thread-error path)
+import queue
+try:
+    slot = pool2._q.get(timeout=3.0)
+    print(f"Got slot on display :{slot.display_num}", flush=True)
+    _close_slot(slot)
+    print("Slot closed", flush=True)
+except queue.Empty:
+    print("Pool empty — no slot to simulate", flush=True)
+
+# Now call _fetch_fresh in this thread (same as FastAPI handler thread)
+print("Calling _fetch_fresh from warmup-thread context...", flush=True)
+t0 = time.time()
+html2 = pool2._fetch_fresh(URL, wait_for_timeout_ms=8000)
+items2 = BeautifulSoup(html2, "html.parser").find_all(attrs={"data-testid": "ItemContainer"})
+print(f"Items: {len(items2)}, HTML: {len(html2)}b, elapsed: {time.time()-t0:.1f}s", flush=True)
+
+# Save HTML for inspection if empty
+if len(items2) == 0:
+    with open("/tmp/debug_mercari.html", "w") as f:
+        f.write(html2)
+    print("Saved HTML to /tmp/debug_mercari.html", flush=True)
+    title = BeautifulSoup(html2, "html.parser").find("title")
+    print("Page title:", title.get_text() if title else "(none)", flush=True)
+    if "Just a moment" in html2 or "turnstile" in html2.lower():
+        print("BLOCKED: Cloudflare challenge", flush=True)
+    else:
+        body = BeautifulSoup(html2, "html.parser").find("body")
+        if body:
+            print("Body snippet:", body.get_text(separator=" ", strip=True)[:300], flush=True)
--- a/scripts/probe_mercari.py
+++ b/scripts/probe_mercari.py
@ -0,0 +1,113 @@
+"""One-shot Mercari probe using the same headed Chromium + Xvfb + stealth stack
+as the eBay scraper.  Run inside the snipe-api container:
+
+    docker exec -it snipe-api-1 python /app/scripts/probe_mercari.py
+"""
+from __future__ import annotations
+
+import itertools
+import os
+import subprocess
+import sys
+import time
+
+_display_counter = itertools.count(200)
+_CHROMIUM_ARGS = ["--no-sandbox", "--disable-dev-shm-usage"]
+_USER_AGENT = (
+    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
+    "(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
+)
+
+SEARCH_URL = "https://www.mercari.com/search/?keyword=rtx+4090"
+# Give Cloudflare challenge time to resolve (if it does)
+WAIT_MS = 8_000
+
+
+def probe(url: str) -> str:
+    from playwright.sync_api import sync_playwright
+    from playwright_stealth import Stealth
+
+    display_num = next(_display_counter)
+    display = f":{display_num}"
+    env = os.environ.copy()
+    env["DISPLAY"] = display
+
+    xvfb = subprocess.Popen(
+        ["Xvfb", display, "-screen", "0", "1280x800x24"],
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+    time.sleep(0.5)
+
+    try:
+        with sync_playwright() as pw:
+            browser = pw.chromium.launch(
+                headless=False,
+                env=env,
+                args=_CHROMIUM_ARGS,
+            )
+            ctx = browser.new_context(
+                user_agent=_USER_AGENT,
+                viewport={"width": 1280, "height": 800},
+            )
+            page = ctx.new_page()
+            Stealth().apply_stealth_sync(page)
+            print(f"[probe] Navigating to {url} …", flush=True)
+            response = page.goto(url, wait_until="domcontentloaded", timeout=40_000)
+            print(f"[probe] HTTP status: {response.status if response else 'unknown'}", flush=True)
+            print(f"[probe] Waiting {WAIT_MS}ms for JS / Turnstile …", flush=True)
+            page.wait_for_timeout(WAIT_MS)
+            html = page.content()
+            title = page.title()
+            print(f"[probe] Page title: {title!r}", flush=True)
+            browser.close()
+    finally:
+        xvfb.terminate()
+        xvfb.wait()
+
+    return html
+
+
+def analyse(html: str) -> None:
+    from bs4 import BeautifulSoup
+
+    soup = BeautifulSoup(html, "html.parser")
+
+    # Cloudflare challenge indicators
+    if "Just a moment" in html or "cf-challenge" in html or "turnstile" in html.lower():
+        print("[result] BLOCKED — Cloudflare Turnstile still active")
+        return
+
+    print("[result] Cloudflare challenge NOT detected — page appears to have loaded")
+
+    # Try to find listing cards
+    # Mercari US uses data-testid or item cards in the DOM
+    candidates = [
+        soup.select("[data-testid='ItemCell']"),
+        soup.select("[data-testid='item-cell']"),
+        soup.select("li[data-testid]"),
+        soup.select(".merList .merListItem"),
+        soup.select("[class*='ItemCell']"),
+        soup.select("[class*='item-cell']"),
+    ]
+    for sel_result in candidates:
+        if sel_result:
+            print(f"[result] Found {len(sel_result)} listing card(s) via selector")
+            card = sel_result[0]
+            print(f"[result] First card snippet:\n{card.prettify()[:800]}")
+            return
+
+    # Fallback: show body text summary
+    body = soup.find("body")
+    text = body.get_text(separator=" ", strip=True)[:500] if body else html[:500]
+    print(f"[result] No listing cards found. Body text preview:\n{text}")
+    # Save full HTML for manual inspection
+    out = "/tmp/mercari_probe.html"
+    with open(out, "w") as fh:
+        fh.write(html)
+    print(f"[result] Full HTML saved to {out}")
+
+
+if __name__ == "__main__":
+    html = probe(SEARCH_URL)
+    analyse(html)
--- a/web/src/views/SearchView.vue
+++ b/web/src/views/SearchView.vue
@ -698,7 +698,7 @@ const parsedMustIncludeGroups = computed(() =>

 const PLATFORMS: { value: string; label: string; available: boolean }[] = [
  { value: 'ebay',     label: 'eBay',     available: true },
-  { value: 'mercari',  label: 'Mercari',  available: false },
+  { value: 'mercari',  label: 'Mercari',  available: true },
  { value: 'poshmark', label: 'Poshmark', available: false },
 ]