feat(search): normalize eBay listing + checkout URLs as item lookup
When the user pastes an eBay listing URL (www.ebay.com/itm/...) or an eBay checkout URL (pay.ebay.com/rxo?itemId=...) into the search field, extract the numeric item ID and use it as the search query. Supported URL patterns: - https://www.ebay.com/itm/Title-Slug/123456789012 - https://www.ebay.com/itm/123456789012 - https://ebay.com/itm/123456789012 - https://pay.ebay.com/rxo?action=view&sessionid=...&itemId=123456789012 - https://pay.ebay.com/rxo/view?itemId=123456789012 Closes #42
This commit is contained in:
parent
349cff8c50
commit
df4610c57b
2 changed files with 101 additions and 0 deletions
55
api/main.py
55
api/main.py
|
|
@ -10,11 +10,13 @@ import json as _json
|
|||
import logging
|
||||
import os
|
||||
import queue as _queue
|
||||
import re
|
||||
import uuid
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
from circuitforge_core.affiliates import wrap_url as _wrap_affiliate_url
|
||||
from circuitforge_core.api import make_corrections_router as _make_corrections_router
|
||||
|
|
@ -210,6 +212,52 @@ async def _lifespan(app: FastAPI):
|
|||
_community_store = None
|
||||
|
||||
|
||||
_EBAY_ITM_RE = re.compile(r"/itm/(?:[^/]+/)?(\d{8,13})(?:[/?#]|$)")
|
||||
_EBAY_ITEM_ID_DIGITS = re.compile(r"^\d{8,13}$")
|
||||
|
||||
|
||||
def _extract_ebay_item_id(q: str) -> str | None:
|
||||
"""Extract a numeric eBay item ID from a URL, or return None if *q* is not an eBay URL.
|
||||
|
||||
Supported formats:
|
||||
- https://www.ebay.com/itm/Title-String/123456789012
|
||||
- https://www.ebay.com/itm/123456789012
|
||||
- https://ebay.com/itm/123456789012
|
||||
- https://pay.ebay.com/rxo?action=view&sessionid=...&itemId=123456789012
|
||||
- https://pay.ebay.com/rxo/view?itemId=123456789012
|
||||
"""
|
||||
q = q.strip()
|
||||
# Must look like a URL — require http/https scheme or an ebay.com hostname.
|
||||
if not (q.startswith("http://") or q.startswith("https://")):
|
||||
return None
|
||||
|
||||
try:
|
||||
parsed = urlparse(q)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
host = parsed.hostname or ""
|
||||
if not (host == "ebay.com" or host.endswith(".ebay.com")):
|
||||
return None
|
||||
|
||||
# pay.ebay.com checkout URLs — item ID is in the itemId query param.
|
||||
if host == "pay.ebay.com":
|
||||
params = parse_qs(parsed.query)
|
||||
item_id_list = params.get("itemId") or params.get("itemid")
|
||||
if item_id_list:
|
||||
candidate = item_id_list[0]
|
||||
if _EBAY_ITEM_ID_DIGITS.match(candidate):
|
||||
return candidate
|
||||
return None
|
||||
|
||||
# Standard listing URLs — item ID appears after /itm/.
|
||||
m = _EBAY_ITM_RE.search(parsed.path)
|
||||
if m:
|
||||
return m.group(1)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _ebay_creds() -> tuple[str, str, str]:
|
||||
"""Return (client_id, client_secret, env) from env vars.
|
||||
|
||||
|
|
@ -587,6 +635,13 @@ def search(
|
|||
adapter: str = "auto", # "auto" | "api" | "scraper" — override adapter selection
|
||||
session: CloudUser = Depends(get_session),
|
||||
):
|
||||
# If the user pasted an eBay listing or checkout URL, extract the item ID
|
||||
# and use it as the search query so the exact item surfaces in results.
|
||||
ebay_item_id = _extract_ebay_item_id(q)
|
||||
if ebay_item_id:
|
||||
log.info("search: eBay URL detected, extracted item_id=%s", ebay_item_id)
|
||||
q = ebay_item_id
|
||||
|
||||
if not q.strip():
|
||||
return {"listings": [], "trust_scores": {}, "sellers": {}, "market_price": None, "adapter_used": _adapter_name(adapter)}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import pytest
|
||||
|
||||
from api.main import _extract_ebay_item_id
|
||||
from app.platforms.ebay.normaliser import normalise_listing, normalise_seller
|
||||
|
||||
|
||||
|
|
@ -56,3 +57,48 @@ def test_normalise_seller_maps_fields():
|
|||
assert seller.feedback_count == 300
|
||||
assert seller.feedback_ratio == pytest.approx(0.991, abs=0.001)
|
||||
assert seller.account_age_days > 0
|
||||
|
||||
|
||||
# ── _extract_ebay_item_id ─────────────────────────────────────────────────────
|
||||
|
||||
class TestExtractEbayItemId:
|
||||
"""Unit tests for the URL-to-item-ID normaliser."""
|
||||
|
||||
def test_itm_url_with_title_slug(self):
|
||||
url = "https://www.ebay.com/itm/Sony-WH-1000XM5-Headphones/123456789012"
|
||||
assert _extract_ebay_item_id(url) == "123456789012"
|
||||
|
||||
def test_itm_url_without_title_slug(self):
|
||||
url = "https://www.ebay.com/itm/123456789012"
|
||||
assert _extract_ebay_item_id(url) == "123456789012"
|
||||
|
||||
def test_itm_url_no_www(self):
|
||||
url = "https://ebay.com/itm/123456789012"
|
||||
assert _extract_ebay_item_id(url) == "123456789012"
|
||||
|
||||
def test_itm_url_with_query_params(self):
|
||||
url = "https://www.ebay.com/itm/123456789012?hash=item1234abcd"
|
||||
assert _extract_ebay_item_id(url) == "123456789012"
|
||||
|
||||
def test_pay_ebay_rxo_with_itemId_query_param(self):
|
||||
url = "https://pay.ebay.com/rxo?action=view&sessionid=abc123&itemId=123456789012"
|
||||
assert _extract_ebay_item_id(url) == "123456789012"
|
||||
|
||||
def test_pay_ebay_rxo_path_with_itemId(self):
|
||||
url = "https://pay.ebay.com/rxo/view?itemId=123456789012"
|
||||
assert _extract_ebay_item_id(url) == "123456789012"
|
||||
|
||||
def test_non_ebay_url_returns_none(self):
|
||||
assert _extract_ebay_item_id("https://amazon.com/dp/B08N5WRWNW") is None
|
||||
|
||||
def test_plain_keyword_returns_none(self):
|
||||
assert _extract_ebay_item_id("rtx 4090 gpu") is None
|
||||
|
||||
def test_empty_string_returns_none(self):
|
||||
assert _extract_ebay_item_id("") is None
|
||||
|
||||
def test_ebay_url_no_item_id_returns_none(self):
|
||||
assert _extract_ebay_item_id("https://www.ebay.com/sch/i.html?_nkw=gpu") is None
|
||||
|
||||
def test_pay_ebay_no_item_id_returns_none(self):
|
||||
assert _extract_ebay_item_id("https://pay.ebay.com/rxo?action=view&sessionid=abc") is None
|
||||
|
|
|
|||
Loading…
Reference in a new issue