feat(search): normalize eBay listing + checkout URLs as item lookup

When the user pastes an eBay listing URL (www.ebay.com/itm/...) or an
eBay checkout URL (pay.ebay.com/rxo?itemId=...) into the search field,
extract the numeric item ID and use it as the search query.

Supported URL patterns:
- https://www.ebay.com/itm/Title-Slug/123456789012
- https://www.ebay.com/itm/123456789012
- https://ebay.com/itm/123456789012
- https://pay.ebay.com/rxo?action=view&sessionid=...&itemId=123456789012
- https://pay.ebay.com/rxo/view?itemId=123456789012

Closes #42
This commit is contained in:
pyr0ball 2026-04-20 10:49:17 -07:00
parent 349cff8c50
commit df4610c57b
2 changed files with 101 additions and 0 deletions

View file

@ -10,11 +10,13 @@ import json as _json
import logging import logging
import os import os
import queue as _queue import queue as _queue
import re
import uuid import uuid
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from urllib.parse import parse_qs, urlparse
from circuitforge_core.affiliates import wrap_url as _wrap_affiliate_url from circuitforge_core.affiliates import wrap_url as _wrap_affiliate_url
from circuitforge_core.api import make_corrections_router as _make_corrections_router from circuitforge_core.api import make_corrections_router as _make_corrections_router
@ -210,6 +212,52 @@ async def _lifespan(app: FastAPI):
_community_store = None _community_store = None
_EBAY_ITM_RE = re.compile(r"/itm/(?:[^/]+/)?(\d{8,13})(?:[/?#]|$)")
_EBAY_ITEM_ID_DIGITS = re.compile(r"^\d{8,13}$")
def _extract_ebay_item_id(q: str) -> str | None:
"""Extract a numeric eBay item ID from a URL, or return None if *q* is not an eBay URL.
Supported formats:
- https://www.ebay.com/itm/Title-String/123456789012
- https://www.ebay.com/itm/123456789012
- https://ebay.com/itm/123456789012
- https://pay.ebay.com/rxo?action=view&sessionid=...&itemId=123456789012
- https://pay.ebay.com/rxo/view?itemId=123456789012
"""
q = q.strip()
# Must look like a URL — require http/https scheme or an ebay.com hostname.
if not (q.startswith("http://") or q.startswith("https://")):
return None
try:
parsed = urlparse(q)
except Exception:
return None
host = parsed.hostname or ""
if not (host == "ebay.com" or host.endswith(".ebay.com")):
return None
# pay.ebay.com checkout URLs — item ID is in the itemId query param.
if host == "pay.ebay.com":
params = parse_qs(parsed.query)
item_id_list = params.get("itemId") or params.get("itemid")
if item_id_list:
candidate = item_id_list[0]
if _EBAY_ITEM_ID_DIGITS.match(candidate):
return candidate
return None
# Standard listing URLs — item ID appears after /itm/.
m = _EBAY_ITM_RE.search(parsed.path)
if m:
return m.group(1)
return None
def _ebay_creds() -> tuple[str, str, str]: def _ebay_creds() -> tuple[str, str, str]:
"""Return (client_id, client_secret, env) from env vars. """Return (client_id, client_secret, env) from env vars.
@ -587,6 +635,13 @@ def search(
adapter: str = "auto", # "auto" | "api" | "scraper" — override adapter selection adapter: str = "auto", # "auto" | "api" | "scraper" — override adapter selection
session: CloudUser = Depends(get_session), session: CloudUser = Depends(get_session),
): ):
# If the user pasted an eBay listing or checkout URL, extract the item ID
# and use it as the search query so the exact item surfaces in results.
ebay_item_id = _extract_ebay_item_id(q)
if ebay_item_id:
log.info("search: eBay URL detected, extracted item_id=%s", ebay_item_id)
q = ebay_item_id
if not q.strip(): if not q.strip():
return {"listings": [], "trust_scores": {}, "sellers": {}, "market_price": None, "adapter_used": _adapter_name(adapter)} return {"listings": [], "trust_scores": {}, "sellers": {}, "market_price": None, "adapter_used": _adapter_name(adapter)}

View file

@ -1,5 +1,6 @@
import pytest import pytest
from api.main import _extract_ebay_item_id
from app.platforms.ebay.normaliser import normalise_listing, normalise_seller from app.platforms.ebay.normaliser import normalise_listing, normalise_seller
@ -56,3 +57,48 @@ def test_normalise_seller_maps_fields():
assert seller.feedback_count == 300 assert seller.feedback_count == 300
assert seller.feedback_ratio == pytest.approx(0.991, abs=0.001) assert seller.feedback_ratio == pytest.approx(0.991, abs=0.001)
assert seller.account_age_days > 0 assert seller.account_age_days > 0
# ── _extract_ebay_item_id ─────────────────────────────────────────────────────
class TestExtractEbayItemId:
"""Unit tests for the URL-to-item-ID normaliser."""
def test_itm_url_with_title_slug(self):
url = "https://www.ebay.com/itm/Sony-WH-1000XM5-Headphones/123456789012"
assert _extract_ebay_item_id(url) == "123456789012"
def test_itm_url_without_title_slug(self):
url = "https://www.ebay.com/itm/123456789012"
assert _extract_ebay_item_id(url) == "123456789012"
def test_itm_url_no_www(self):
url = "https://ebay.com/itm/123456789012"
assert _extract_ebay_item_id(url) == "123456789012"
def test_itm_url_with_query_params(self):
url = "https://www.ebay.com/itm/123456789012?hash=item1234abcd"
assert _extract_ebay_item_id(url) == "123456789012"
def test_pay_ebay_rxo_with_itemId_query_param(self):
url = "https://pay.ebay.com/rxo?action=view&sessionid=abc123&itemId=123456789012"
assert _extract_ebay_item_id(url) == "123456789012"
def test_pay_ebay_rxo_path_with_itemId(self):
url = "https://pay.ebay.com/rxo/view?itemId=123456789012"
assert _extract_ebay_item_id(url) == "123456789012"
def test_non_ebay_url_returns_none(self):
assert _extract_ebay_item_id("https://amazon.com/dp/B08N5WRWNW") is None
def test_plain_keyword_returns_none(self):
assert _extract_ebay_item_id("rtx 4090 gpu") is None
def test_empty_string_returns_none(self):
assert _extract_ebay_item_id("") is None
def test_ebay_url_no_item_id_returns_none(self):
assert _extract_ebay_item_id("https://www.ebay.com/sch/i.html?_nkw=gpu") is None
def test_pay_ebay_no_item_id_returns_none(self):
assert _extract_ebay_item_id("https://pay.ebay.com/rxo?action=view&sessionid=abc") is None