feat: EbayCategoryCache get_relevant and get_all_for_prompt

This commit is contained in:
pyr0ball 2026-04-14 11:06:13 -07:00
parent 099943b50b
commit 7c73186394
2 changed files with 69 additions and 0 deletions

View file

@ -76,3 +76,41 @@ class EbayCategoryCache:
)
self._conn.commit()
log.info("EbayCategoryCache: seeded %d bootstrap categories.", len(_BOOTSTRAP_CATEGORIES))
def get_relevant(
self,
keywords: list[str],
limit: int = 30,
) -> list[tuple[str, str]]:
"""Return (category_id, full_path) pairs matching any keyword.
Matches against both name and full_path (case-insensitive LIKE).
Returns at most `limit` rows.
"""
if not keywords:
return []
conditions = " OR ".join(
"LOWER(name) LIKE ? OR LOWER(full_path) LIKE ?" for _ in keywords
)
params: list[str] = []
for kw in keywords:
like = f"%{kw.lower()}%"
params.extend([like, like])
params.append(limit)
cur = self._conn.execute(
f"SELECT category_id, full_path FROM ebay_categories"
f" WHERE {conditions} ORDER BY name LIMIT ?",
params,
)
return [(row[0], row[1]) for row in cur.fetchall()]
def get_all_for_prompt(self, limit: int = 80) -> list[tuple[str, str]]:
"""Return up to `limit` (category_id, full_path) pairs, sorted by name.
Used when no keyword context is available.
"""
cur = self._conn.execute(
"SELECT category_id, full_path FROM ebay_categories ORDER BY name LIMIT ?",
(limit,),
)
return [(row[0], row[1]) for row in cur.fetchall()]

View file

@ -56,3 +56,34 @@ def test_seed_bootstrap_populates_rows(db):
cur = db.execute("SELECT COUNT(*) FROM ebay_categories")
count = cur.fetchone()[0]
assert count >= BOOTSTRAP_MIN
def test_get_relevant_keyword_match(db):
cache = EbayCategoryCache(db)
cache._seed_bootstrap()
results = cache.get_relevant(["GPU", "graphics"], limit=5)
ids = [r[0] for r in results]
assert "27386" in ids # Graphics Cards
def test_get_relevant_no_match(db):
cache = EbayCategoryCache(db)
cache._seed_bootstrap()
results = cache.get_relevant(["zzznomatch_xyzxyz"], limit=5)
assert results == []
def test_get_relevant_respects_limit(db):
cache = EbayCategoryCache(db)
cache._seed_bootstrap()
results = cache.get_relevant(["electronics"], limit=3)
assert len(results) <= 3
def test_get_all_for_prompt_returns_rows(db):
cache = EbayCategoryCache(db)
cache._seed_bootstrap()
results = cache.get_all_for_prompt(limit=10)
assert len(results) > 0
# Each entry is (category_id, full_path)
assert all(len(r) == 2 for r in results)