feat(search): normalize eBay listing + checkout URLs as item lookup

When the user pastes an eBay listing URL (www.ebay.com/itm/...) or an
eBay checkout URL (pay.ebay.com/rxo?itemId=...) into the search field,
extract the numeric item ID and use it as the search query.

Supported URL patterns:
- https://www.ebay.com/itm/Title-Slug/123456789012
- https://www.ebay.com/itm/123456789012
- https://ebay.com/itm/123456789012
- https://pay.ebay.com/rxo?action=view&sessionid=...&itemId=123456789012
- https://pay.ebay.com/rxo/view?itemId=123456789012

Closes #42
This commit is contained in:
pyr0ball 2026-04-20 10:49:17 -07:00
parent 349cff8c50
commit df4610c57b
2 changed files with 101 additions and 0 deletions

View file

@ -10,11 +10,13 @@ import json as _json
import logging
import os
import queue as _queue
import re
import uuid
from concurrent.futures import ThreadPoolExecutor
from contextlib import asynccontextmanager
from pathlib import Path
from typing import Optional
from urllib.parse import parse_qs, urlparse
from circuitforge_core.affiliates import wrap_url as _wrap_affiliate_url
from circuitforge_core.api import make_corrections_router as _make_corrections_router
@ -210,6 +212,52 @@ async def _lifespan(app: FastAPI):
_community_store = None
_EBAY_ITM_RE = re.compile(r"/itm/(?:[^/]+/)?(\d{8,13})(?:[/?#]|$)")
_EBAY_ITEM_ID_DIGITS = re.compile(r"^\d{8,13}$")
def _extract_ebay_item_id(q: str) -> str | None:
"""Extract a numeric eBay item ID from a URL, or return None if *q* is not an eBay URL.
Supported formats:
- https://www.ebay.com/itm/Title-String/123456789012
- https://www.ebay.com/itm/123456789012
- https://ebay.com/itm/123456789012
- https://pay.ebay.com/rxo?action=view&sessionid=...&itemId=123456789012
- https://pay.ebay.com/rxo/view?itemId=123456789012
"""
q = q.strip()
# Must look like a URL — require http/https scheme or an ebay.com hostname.
if not (q.startswith("http://") or q.startswith("https://")):
return None
try:
parsed = urlparse(q)
except Exception:
return None
host = parsed.hostname or ""
if not (host == "ebay.com" or host.endswith(".ebay.com")):
return None
# pay.ebay.com checkout URLs — item ID is in the itemId query param.
if host == "pay.ebay.com":
params = parse_qs(parsed.query)
item_id_list = params.get("itemId") or params.get("itemid")
if item_id_list:
candidate = item_id_list[0]
if _EBAY_ITEM_ID_DIGITS.match(candidate):
return candidate
return None
# Standard listing URLs — item ID appears after /itm/.
m = _EBAY_ITM_RE.search(parsed.path)
if m:
return m.group(1)
return None
def _ebay_creds() -> tuple[str, str, str]:
"""Return (client_id, client_secret, env) from env vars.
@ -587,6 +635,13 @@ def search(
adapter: str = "auto", # "auto" | "api" | "scraper" — override adapter selection
session: CloudUser = Depends(get_session),
):
# If the user pasted an eBay listing or checkout URL, extract the item ID
# and use it as the search query so the exact item surfaces in results.
ebay_item_id = _extract_ebay_item_id(q)
if ebay_item_id:
log.info("search: eBay URL detected, extracted item_id=%s", ebay_item_id)
q = ebay_item_id
if not q.strip():
return {"listings": [], "trust_scores": {}, "sellers": {}, "market_price": None, "adapter_used": _adapter_name(adapter)}

View file

@ -1,5 +1,6 @@
import pytest
from api.main import _extract_ebay_item_id
from app.platforms.ebay.normaliser import normalise_listing, normalise_seller
@ -56,3 +57,48 @@ def test_normalise_seller_maps_fields():
assert seller.feedback_count == 300
assert seller.feedback_ratio == pytest.approx(0.991, abs=0.001)
assert seller.account_age_days > 0
# ── _extract_ebay_item_id ─────────────────────────────────────────────────────
class TestExtractEbayItemId:
"""Unit tests for the URL-to-item-ID normaliser."""
def test_itm_url_with_title_slug(self):
url = "https://www.ebay.com/itm/Sony-WH-1000XM5-Headphones/123456789012"
assert _extract_ebay_item_id(url) == "123456789012"
def test_itm_url_without_title_slug(self):
url = "https://www.ebay.com/itm/123456789012"
assert _extract_ebay_item_id(url) == "123456789012"
def test_itm_url_no_www(self):
url = "https://ebay.com/itm/123456789012"
assert _extract_ebay_item_id(url) == "123456789012"
def test_itm_url_with_query_params(self):
url = "https://www.ebay.com/itm/123456789012?hash=item1234abcd"
assert _extract_ebay_item_id(url) == "123456789012"
def test_pay_ebay_rxo_with_itemId_query_param(self):
url = "https://pay.ebay.com/rxo?action=view&sessionid=abc123&itemId=123456789012"
assert _extract_ebay_item_id(url) == "123456789012"
def test_pay_ebay_rxo_path_with_itemId(self):
url = "https://pay.ebay.com/rxo/view?itemId=123456789012"
assert _extract_ebay_item_id(url) == "123456789012"
def test_non_ebay_url_returns_none(self):
assert _extract_ebay_item_id("https://amazon.com/dp/B08N5WRWNW") is None
def test_plain_keyword_returns_none(self):
assert _extract_ebay_item_id("rtx 4090 gpu") is None
def test_empty_string_returns_none(self):
assert _extract_ebay_item_id("") is None
def test_ebay_url_no_item_id_returns_none(self):
assert _extract_ebay_item_id("https://www.ebay.com/sch/i.html?_nkw=gpu") is None
def test_pay_ebay_no_item_id_returns_none(self):
assert _extract_ebay_item_id("https://pay.ebay.com/rxo?action=view&sessionid=abc") is None