snipe/app/platforms/mercari/adapter.py

"""MercariAdapter — scraper-based Mercari platform adapter.

Trust signal coverage vs eBay:
  ✅ feedback_count    (NumSales from listing page)
  ✅ feedback_ratio    (ReviewStarsWrapper data-stars / 5)
  ❌ account_age_days  (requires seller profile page — future work)
  ❌ category_history  (not exposed in HTML — future work)
  ✅ price_vs_market   (computed by trust scorer from comps, same as eBay)

Because account_age and category_history are always None, TrustScore.score_is_partial
will be True for all Mercari results.  The aggregator handles this correctly
by scoring only from available signals.

seller_platform_id on Listing objects holds the product_id (e.g. "m86032668393")
rather than the seller username, because search results don't expose seller identity.
get_seller() resolves the product_id → seller by fetching the listing page.
The DB lookup key is (platform="mercari", platform_seller_id=product_id).
"""
from __future__ import annotations

import json
import logging
import time
from typing import Optional

from app.db.models import Listing, MarketComp, Seller
from app.db.store import Store
from app.platforms import PlatformAdapter, SearchFilters
from app.platforms.mercari.scraper import (
    build_search_url,
    parse_listing_html,
    parse_search_html,
)

log = logging.getLogger(__name__)

_SELLER_CACHE_TTL_HOURS = 6
_BETWEEN_LISTING_FETCH_SECS = 1.5


class MercariAdapter(PlatformAdapter):
    def __init__(self, store: Store) -> None:
        self._store = store

    def search(self, query: str, filters: SearchFilters) -> list[Listing]:
        from app.platforms.ebay.browser_pool import get_pool

        url = build_search_url(query, filters.max_price, filters.min_price)
        log.info("mercari: fetching search URL: %s", url)

        html = get_pool().fetch_html(
            url,
            delay=1.0,
            wait_for_timeout_ms=8000,
        )
        raw_listings = parse_search_html(html)

        listings: list[Listing] = []
        seen: set[str] = set()
        for raw in raw_listings:
            pid = raw["product_id"]
            if pid in seen:
                continue
            seen.add(pid)
            listings.append(_normalise_listing(raw, query))

        log.info("mercari: parsed %d listings for %r", len(listings), query)

        # Client-side keyword filter (mirrors eBay scraper behaviour).
        if filters.must_include:
            listings = _apply_keyword_filter(listings, filters.must_include, filters.must_include_mode)
        if filters.must_exclude:
            listings = _apply_exclude_filter(listings, filters.must_exclude)

        return listings

    def get_seller(self, seller_platform_id: str) -> Optional[Seller]:
        """Fetch seller data from the listing page identified by seller_platform_id.

        For Mercari, seller_platform_id is the product_id (e.g. "m86032668393")
        because seller usernames aren't available from search results HTML.
        """
        cached = self._store.get_seller("mercari", seller_platform_id)
        if cached:
            return cached

        from app.platforms.ebay.browser_pool import get_pool

        url = f"https://www.mercari.com/us/item/{seller_platform_id}/"
        try:
            time.sleep(_BETWEEN_LISTING_FETCH_SECS)
            html = get_pool().fetch_html(
                url,
                delay=0.5,
                wait_for_timeout_ms=6000,
            )
            raw = parse_listing_html(html, seller_platform_id)
            seller = _normalise_seller(raw)
            self._store.save_seller(seller)
            return seller
        except Exception as exc:
            log.warning("mercari: get_seller failed for %s: %s", seller_platform_id, exc)
            return None

    def get_completed_sales(self, query: str, pages: int = 1) -> list[Listing]:
        """Mercari sold-listing comps — stubbed for Phase 3.

        Mercari exposes sold listings via ?status=ITEM_STATUS_TRADING but the
        data is sparse.  Phase 3 will implement comp extraction here; for now
        the trust scorer falls back to price_vs_market=None (partial score).
        """
        return []


# ---------------------------------------------------------------------------
# Normalisation helpers
# ---------------------------------------------------------------------------

def _normalise_listing(raw: dict, query: str) -> Listing:
    return Listing(
        platform="mercari",
        platform_listing_id=raw["product_id"],
        title=raw["title"],
        price=raw["price"],
        currency="USD",
        condition="",       # not available from search results; get_seller() populates this
        seller_platform_id=raw["product_id"],  # see module docstring
        url=raw["url"],
        photo_urls=[raw["photo_url"]] if raw.get("photo_url") else [],
        listing_age_days=0,
        buying_format="fixed_price",
        category_name=None,
    )


def _normalise_seller(raw: dict) -> Seller:
    stars = raw.get("stars", 0.0)
    feedback_ratio = min(stars / 5.0, 1.0) if stars > 0 else 0.0

    return Seller(
        platform="mercari",
        platform_seller_id=raw["product_id"],
        username=raw.get("username", ""),
        account_age_days=None,           # not available without seller profile page
        feedback_count=raw.get("num_sales", 0),
        feedback_ratio=feedback_ratio,
        category_history_json=json.dumps({}),
    )


def _apply_keyword_filter(listings: list[Listing], must_include: list[str], mode: str) -> list[Listing]:
    if not must_include:
        return listings

    def _matches(listing: Listing) -> bool:
        title = listing.title.lower()
        if mode == "any":
            return any(kw.lower() in title for kw in must_include)
        # "all" (default) and "groups" both require all terms present
        return all(kw.lower() in title for kw in must_include)

    return [l for l in listings if _matches(l)]


def _apply_exclude_filter(listings: list[Listing], must_exclude: list[str]) -> list[Listing]:
    if not must_exclude:
        return listings

    def _clean(listing: Listing) -> bool:
        title = listing.title.lower()
        return not any(term.lower() in title for term in must_exclude)

    return [l for l in listings if _clean(l)]