feat: EbayCategoryCache refresh from eBay Taxonomy API with bootstrap fallback

This commit is contained in:
pyr0ball 2026-04-14 11:14:52 -07:00
parent 7c73186394
commit 0b8cb63968
2 changed files with 169 additions and 0 deletions

View file

@ -13,6 +13,8 @@ import sqlite3
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from typing import Optional from typing import Optional
import requests
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
# Bootstrap table — common categories for self-hosters without eBay API credentials. # Bootstrap table — common categories for self-hosters without eBay API credentials.
@ -114,3 +116,95 @@ class EbayCategoryCache:
(limit,), (limit,),
) )
return [(row[0], row[1]) for row in cur.fetchall()] return [(row[0], row[1]) for row in cur.fetchall()]
def refresh(
self,
token_manager: Optional["EbayTokenManager"] = None,
) -> int:
"""Fetch the eBay category tree and upsert leaf nodes into SQLite.
Args:
token_manager: An `EbayTokenManager` instance for the Taxonomy API.
If None, falls back to seeding the hardcoded bootstrap table.
Returns:
Number of leaf categories stored.
"""
if token_manager is None:
self._seed_bootstrap()
cur = self._conn.execute("SELECT COUNT(*) FROM ebay_categories")
return cur.fetchone()[0]
try:
token = token_manager.get_token()
headers = {"Authorization": f"Bearer {token}"}
# Step 1: get default tree ID for EBAY_US
id_resp = requests.get(
"https://api.ebay.com/commerce/taxonomy/v1/get_default_category_tree_id",
params={"marketplace_id": "EBAY_US"},
headers=headers,
timeout=30,
)
id_resp.raise_for_status()
tree_id = id_resp.json()["categoryTreeId"]
# Step 2: fetch full tree (large response — may take several seconds)
tree_resp = requests.get(
f"https://api.ebay.com/commerce/taxonomy/v1/category_tree/{tree_id}",
headers=headers,
timeout=120,
)
tree_resp.raise_for_status()
tree = tree_resp.json()
leaves: list[tuple[str, str, str]] = []
_extract_leaves(tree["rootCategoryNode"], path="", leaves=leaves)
now = datetime.now(timezone.utc).isoformat()
self._conn.executemany(
"INSERT OR REPLACE INTO ebay_categories"
" (category_id, name, full_path, is_leaf, refreshed_at)"
" VALUES (?, ?, ?, 1, ?)",
[(cid, name, path, now) for cid, name, path in leaves],
)
self._conn.commit()
log.info(
"EbayCategoryCache: refreshed %d leaf categories from eBay Taxonomy API.",
len(leaves),
)
return len(leaves)
except Exception:
log.warning(
"EbayCategoryCache: Taxonomy API refresh failed — falling back to bootstrap.",
exc_info=True,
)
self._seed_bootstrap()
cur = self._conn.execute("SELECT COUNT(*) FROM ebay_categories")
return cur.fetchone()[0]
def _extract_leaves(
node: dict,
path: str,
leaves: list[tuple[str, str, str]],
) -> None:
"""Recursively walk the eBay category tree, collecting leaf node tuples.
Args:
node: A categoryTreeNode dict from the eBay Taxonomy API response.
path: The ancestor breadcrumb, e.g. "Consumer Electronics > Computers".
leaves: Accumulator list of (category_id, name, full_path) tuples.
"""
cat = node["category"]
cat_id: str = cat["categoryId"]
cat_name: str = cat["categoryName"]
full_path = f"{path} > {cat_name}" if path else cat_name
if node.get("leafCategoryTreeNode", False):
leaves.append((cat_id, cat_name, full_path))
return # leaf — no children to recurse into
for child in node.get("childCategoryTreeNodes", []):
_extract_leaves(child, full_path, leaves)

View file

@ -4,6 +4,7 @@ from __future__ import annotations
import sqlite3 import sqlite3
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from pathlib import Path from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest import pytest
@ -87,3 +88,77 @@ def test_get_all_for_prompt_returns_rows(db):
assert len(results) > 0 assert len(results) > 0
# Each entry is (category_id, full_path) # Each entry is (category_id, full_path)
assert all(len(r) == 2 for r in results) assert all(len(r) == 2 for r in results)
def _make_tree_response() -> dict:
"""Minimal eBay Taxonomy API tree response with two leaf nodes."""
return {
"categoryTreeId": "0",
"rootCategoryNode": {
"category": {"categoryId": "6000", "categoryName": "Root"},
"leafCategoryTreeNode": False,
"childCategoryTreeNodes": [
{
"category": {"categoryId": "6001", "categoryName": "Electronics"},
"leafCategoryTreeNode": False,
"childCategoryTreeNodes": [
{
"category": {"categoryId": "6002", "categoryName": "GPUs"},
"leafCategoryTreeNode": True,
"childCategoryTreeNodes": [],
},
{
"category": {"categoryId": "6003", "categoryName": "CPUs"},
"leafCategoryTreeNode": True,
"childCategoryTreeNodes": [],
},
],
}
],
},
}
def test_refresh_inserts_leaf_nodes(db):
mock_tm = MagicMock()
mock_tm.get_token.return_value = "fake-token"
tree_resp = MagicMock()
tree_resp.raise_for_status = MagicMock()
tree_resp.json.return_value = _make_tree_response()
id_resp = MagicMock()
id_resp.raise_for_status = MagicMock()
id_resp.json.return_value = {"categoryTreeId": "0"}
with patch("app.platforms.ebay.categories.requests.get") as mock_get:
mock_get.side_effect = [id_resp, tree_resp]
cache = EbayCategoryCache(db)
count = cache.refresh(mock_tm)
assert count == 2 # two leaf nodes in our fake tree
cur = db.execute("SELECT category_id FROM ebay_categories ORDER BY category_id")
ids = {row[0] for row in cur.fetchall()}
assert "6002" in ids
assert "6003" in ids
def test_refresh_no_token_manager_seeds_bootstrap(db):
cache = EbayCategoryCache(db)
count = cache.refresh(token_manager=None)
assert count >= BOOTSTRAP_MIN
def test_refresh_api_error_logs_warning(db, caplog):
import logging
mock_tm = MagicMock()
mock_tm.get_token.return_value = "fake-token"
with patch("app.platforms.ebay.categories.requests.get") as mock_get:
mock_get.side_effect = Exception("network error")
cache = EbayCategoryCache(db)
with caplog.at_level(logging.WARNING, logger="app.platforms.ebay.categories"):
count = cache.refresh(mock_tm)
# Falls back to bootstrap on API error
assert count >= BOOTSTRAP_MIN