feat(search): normalize eBay listing + checkout URLs as item lookup
When the user pastes an eBay listing URL (www.ebay.com/itm/...) or an eBay checkout URL (pay.ebay.com/rxo?itemId=...) into the search field, extract the numeric item ID and use it as the search query. Supported URL patterns: - https://www.ebay.com/itm/Title-Slug/123456789012 - https://www.ebay.com/itm/123456789012 - https://ebay.com/itm/123456789012 - https://pay.ebay.com/rxo?action=view&sessionid=...&itemId=123456789012 - https://pay.ebay.com/rxo/view?itemId=123456789012 Closes #42
This commit is contained in:
parent
349cff8c50
commit
df4610c57b
2 changed files with 101 additions and 0 deletions
55
api/main.py
55
api/main.py
|
|
@ -10,11 +10,13 @@ import json as _json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import queue as _queue
|
import queue as _queue
|
||||||
|
import re
|
||||||
import uuid
|
import uuid
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
from urllib.parse import parse_qs, urlparse
|
||||||
|
|
||||||
from circuitforge_core.affiliates import wrap_url as _wrap_affiliate_url
|
from circuitforge_core.affiliates import wrap_url as _wrap_affiliate_url
|
||||||
from circuitforge_core.api import make_corrections_router as _make_corrections_router
|
from circuitforge_core.api import make_corrections_router as _make_corrections_router
|
||||||
|
|
@ -210,6 +212,52 @@ async def _lifespan(app: FastAPI):
|
||||||
_community_store = None
|
_community_store = None
|
||||||
|
|
||||||
|
|
||||||
|
_EBAY_ITM_RE = re.compile(r"/itm/(?:[^/]+/)?(\d{8,13})(?:[/?#]|$)")
|
||||||
|
_EBAY_ITEM_ID_DIGITS = re.compile(r"^\d{8,13}$")
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_ebay_item_id(q: str) -> str | None:
|
||||||
|
"""Extract a numeric eBay item ID from a URL, or return None if *q* is not an eBay URL.
|
||||||
|
|
||||||
|
Supported formats:
|
||||||
|
- https://www.ebay.com/itm/Title-String/123456789012
|
||||||
|
- https://www.ebay.com/itm/123456789012
|
||||||
|
- https://ebay.com/itm/123456789012
|
||||||
|
- https://pay.ebay.com/rxo?action=view&sessionid=...&itemId=123456789012
|
||||||
|
- https://pay.ebay.com/rxo/view?itemId=123456789012
|
||||||
|
"""
|
||||||
|
q = q.strip()
|
||||||
|
# Must look like a URL — require http/https scheme or an ebay.com hostname.
|
||||||
|
if not (q.startswith("http://") or q.startswith("https://")):
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
parsed = urlparse(q)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
host = parsed.hostname or ""
|
||||||
|
if not (host == "ebay.com" or host.endswith(".ebay.com")):
|
||||||
|
return None
|
||||||
|
|
||||||
|
# pay.ebay.com checkout URLs — item ID is in the itemId query param.
|
||||||
|
if host == "pay.ebay.com":
|
||||||
|
params = parse_qs(parsed.query)
|
||||||
|
item_id_list = params.get("itemId") or params.get("itemid")
|
||||||
|
if item_id_list:
|
||||||
|
candidate = item_id_list[0]
|
||||||
|
if _EBAY_ITEM_ID_DIGITS.match(candidate):
|
||||||
|
return candidate
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Standard listing URLs — item ID appears after /itm/.
|
||||||
|
m = _EBAY_ITM_RE.search(parsed.path)
|
||||||
|
if m:
|
||||||
|
return m.group(1)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _ebay_creds() -> tuple[str, str, str]:
|
def _ebay_creds() -> tuple[str, str, str]:
|
||||||
"""Return (client_id, client_secret, env) from env vars.
|
"""Return (client_id, client_secret, env) from env vars.
|
||||||
|
|
||||||
|
|
@ -587,6 +635,13 @@ def search(
|
||||||
adapter: str = "auto", # "auto" | "api" | "scraper" — override adapter selection
|
adapter: str = "auto", # "auto" | "api" | "scraper" — override adapter selection
|
||||||
session: CloudUser = Depends(get_session),
|
session: CloudUser = Depends(get_session),
|
||||||
):
|
):
|
||||||
|
# If the user pasted an eBay listing or checkout URL, extract the item ID
|
||||||
|
# and use it as the search query so the exact item surfaces in results.
|
||||||
|
ebay_item_id = _extract_ebay_item_id(q)
|
||||||
|
if ebay_item_id:
|
||||||
|
log.info("search: eBay URL detected, extracted item_id=%s", ebay_item_id)
|
||||||
|
q = ebay_item_id
|
||||||
|
|
||||||
if not q.strip():
|
if not q.strip():
|
||||||
return {"listings": [], "trust_scores": {}, "sellers": {}, "market_price": None, "adapter_used": _adapter_name(adapter)}
|
return {"listings": [], "trust_scores": {}, "sellers": {}, "market_price": None, "adapter_used": _adapter_name(adapter)}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from api.main import _extract_ebay_item_id
|
||||||
from app.platforms.ebay.normaliser import normalise_listing, normalise_seller
|
from app.platforms.ebay.normaliser import normalise_listing, normalise_seller
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -56,3 +57,48 @@ def test_normalise_seller_maps_fields():
|
||||||
assert seller.feedback_count == 300
|
assert seller.feedback_count == 300
|
||||||
assert seller.feedback_ratio == pytest.approx(0.991, abs=0.001)
|
assert seller.feedback_ratio == pytest.approx(0.991, abs=0.001)
|
||||||
assert seller.account_age_days > 0
|
assert seller.account_age_days > 0
|
||||||
|
|
||||||
|
|
||||||
|
# ── _extract_ebay_item_id ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class TestExtractEbayItemId:
|
||||||
|
"""Unit tests for the URL-to-item-ID normaliser."""
|
||||||
|
|
||||||
|
def test_itm_url_with_title_slug(self):
|
||||||
|
url = "https://www.ebay.com/itm/Sony-WH-1000XM5-Headphones/123456789012"
|
||||||
|
assert _extract_ebay_item_id(url) == "123456789012"
|
||||||
|
|
||||||
|
def test_itm_url_without_title_slug(self):
|
||||||
|
url = "https://www.ebay.com/itm/123456789012"
|
||||||
|
assert _extract_ebay_item_id(url) == "123456789012"
|
||||||
|
|
||||||
|
def test_itm_url_no_www(self):
|
||||||
|
url = "https://ebay.com/itm/123456789012"
|
||||||
|
assert _extract_ebay_item_id(url) == "123456789012"
|
||||||
|
|
||||||
|
def test_itm_url_with_query_params(self):
|
||||||
|
url = "https://www.ebay.com/itm/123456789012?hash=item1234abcd"
|
||||||
|
assert _extract_ebay_item_id(url) == "123456789012"
|
||||||
|
|
||||||
|
def test_pay_ebay_rxo_with_itemId_query_param(self):
|
||||||
|
url = "https://pay.ebay.com/rxo?action=view&sessionid=abc123&itemId=123456789012"
|
||||||
|
assert _extract_ebay_item_id(url) == "123456789012"
|
||||||
|
|
||||||
|
def test_pay_ebay_rxo_path_with_itemId(self):
|
||||||
|
url = "https://pay.ebay.com/rxo/view?itemId=123456789012"
|
||||||
|
assert _extract_ebay_item_id(url) == "123456789012"
|
||||||
|
|
||||||
|
def test_non_ebay_url_returns_none(self):
|
||||||
|
assert _extract_ebay_item_id("https://amazon.com/dp/B08N5WRWNW") is None
|
||||||
|
|
||||||
|
def test_plain_keyword_returns_none(self):
|
||||||
|
assert _extract_ebay_item_id("rtx 4090 gpu") is None
|
||||||
|
|
||||||
|
def test_empty_string_returns_none(self):
|
||||||
|
assert _extract_ebay_item_id("") is None
|
||||||
|
|
||||||
|
def test_ebay_url_no_item_id_returns_none(self):
|
||||||
|
assert _extract_ebay_item_id("https://www.ebay.com/sch/i.html?_nkw=gpu") is None
|
||||||
|
|
||||||
|
def test_pay_ebay_no_item_id_returns_none(self):
|
||||||
|
assert _extract_ebay_item_id("https://pay.ebay.com/rxo?action=view&sessionid=abc") is None
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue