snipe/api/main.py
pyr0ball 9e20759dbe feat: wire cloud session, Heimdall licensing, and split-store DB isolation
- api/cloud_session.py: new module — JWT validation (Directus HS256),
  Heimdall provision+tier-resolve, CloudUser+SessionFeatures dataclasses,
  compute_features() tier→feature-flag mapping, require_tier() dependency
  factory, get_session() FastAPI dependency (local-mode transparent passthrough)
- api/main.py: remove _DB_PATH singleton; all endpoints receive session via
  Depends(get_session); shared_store (sellers/comps) and user_store (listings/
  saved_searches) created per-request from session.shared_db / session.user_db;
  pages capped to features.max_pages; saved_searches limit enforced for free tier;
  /api/session endpoint exposes tier+features to frontend; _trigger_scraper_enrichment
  receives shared_db Path (background thread creates its own Store)
- app/platforms/ebay/adapter.py, scraper.py: rename store→shared_store parameter
  (adapters only touch sellers+comps, never listings — naming reflects this)
- app/trust/__init__.py: rename store→shared_store (TrustScorer reads
  sellers+comps from shared DB; listing staging fields come from caller)
- app/db/store.py: refresh_seller_categories gains listing_store param for
  split-DB mode (reads listings from user_store, writes categories to self)
- web/src/stores/session.ts: new Pinia store — bootstrap() fetches /api/session,
  exposes tier+features reactively; falls back to full-access local defaults
- web/src/App.vue: call session.bootstrap() on mount
- web/src/views/SearchView.vue: import session store; pages buttons disabled+greyed
  above features.max_pages with upgrade tooltip
- compose.cloud.yml: add CLOUD_MODE=true + CLOUD_DATA_ROOT env; fix volume mount
- docker/web/nginx.cloud.conf: forward X-CF-Session header from Caddy to API
- .env.example: document cloud env vars (CLOUD_MODE, DIRECTUS_JWT_SECRET, etc.)
2026-03-27 02:07:06 -07:00

460 lines
18 KiB
Python

"""Snipe FastAPI — search endpoint wired to ScrapedEbayAdapter + TrustScorer."""
from __future__ import annotations
import dataclasses
import hashlib
import logging
import os
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from fastapi import Depends, FastAPI, HTTPException
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
from circuitforge_core.config import load_env
from app.db.store import Store
from app.db.models import SavedSearch as SavedSearchModel
from app.platforms import SearchFilters
from app.platforms.ebay.scraper import ScrapedEbayAdapter
from app.platforms.ebay.adapter import EbayAdapter
from app.platforms.ebay.auth import EbayTokenManager
from app.platforms.ebay.query_builder import expand_queries, parse_groups
from app.trust import TrustScorer
from api.cloud_session import CloudUser, compute_features, get_session
from api.ebay_webhook import router as ebay_webhook_router
load_env(Path(".env"))
log = logging.getLogger(__name__)
def _ebay_creds() -> tuple[str, str, str]:
"""Return (client_id, client_secret, env) from env vars.
New names: EBAY_APP_ID / EBAY_CERT_ID (sandbox: EBAY_SANDBOX_APP_ID / EBAY_SANDBOX_CERT_ID)
Legacy fallback: EBAY_CLIENT_ID / EBAY_CLIENT_SECRET
"""
env = os.environ.get("EBAY_ENV", "production").strip()
if env == "sandbox":
client_id = os.environ.get("EBAY_SANDBOX_APP_ID", "").strip()
client_secret = os.environ.get("EBAY_SANDBOX_CERT_ID", "").strip()
else:
client_id = (os.environ.get("EBAY_APP_ID") or os.environ.get("EBAY_CLIENT_ID", "")).strip()
client_secret = (os.environ.get("EBAY_CERT_ID") or os.environ.get("EBAY_CLIENT_SECRET", "")).strip()
return client_id, client_secret, env
app = FastAPI(title="Snipe API", version="0.1.0")
app.include_router(ebay_webhook_router)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/api/health")
def health():
return {"status": "ok"}
@app.get("/api/session")
def session_info(session: CloudUser = Depends(get_session)):
"""Return the current session tier and computed feature flags.
Used by the Vue frontend to gate UI features (pages slider cap,
saved search limits, shared DB badges, etc.) without hardcoding
tier logic client-side.
"""
features = compute_features(session.tier)
return {
"user_id": session.user_id,
"tier": session.tier,
"features": dataclasses.asdict(features),
}
def _trigger_scraper_enrichment(
listings: list,
shared_store: Store,
shared_db: Path,
) -> None:
"""Fire-and-forget background enrichment for missing seller signals.
Two enrichment passes run concurrently in the same daemon thread:
1. BTF (/itm/ pages) — fills account_age_days for sellers where it is None.
2. _ssn search pages — fills category_history_json for sellers with no history.
The main response returns immediately; enriched data lands in the DB for
future searches. Uses ScrapedEbayAdapter's Playwright stack regardless of
which adapter was used for the main search (Shopping API handles age for
the API adapter inline; BTF is the fallback for no-creds / scraper mode).
shared_store: used for pre-flight seller checks (same-thread reads).
shared_db: path passed to background thread — it creates its own Store
(sqlite3 connections are not thread-safe).
"""
# Caps per search: limits Playwright sessions launched in the background so we
# don't hammer Kasada or spin up dozens of Xvfb instances after a large search.
# Remaining sellers get enriched incrementally on subsequent searches.
_BTF_MAX_PER_SEARCH = 3
_CAT_MAX_PER_SEARCH = 3
needs_btf: dict[str, str] = {}
needs_categories: list[str] = []
for listing in listings:
sid = listing.seller_platform_id
if not sid:
continue
seller = shared_store.get_seller("ebay", sid)
if not seller:
continue
if (seller.account_age_days is None
and sid not in needs_btf
and len(needs_btf) < _BTF_MAX_PER_SEARCH):
needs_btf[sid] = listing.platform_listing_id
if (seller.category_history_json in ("{}", "", None)
and sid not in needs_categories
and len(needs_categories) < _CAT_MAX_PER_SEARCH):
needs_categories.append(sid)
if not needs_btf and not needs_categories:
return
log.info(
"Scraper enrichment: %d BTF age + %d category pages queued",
len(needs_btf), len(needs_categories),
)
def _run():
try:
enricher = ScrapedEbayAdapter(Store(shared_db))
if needs_btf:
enricher.enrich_sellers_btf(needs_btf, max_workers=2)
log.info("BTF enrichment complete for %d sellers", len(needs_btf))
if needs_categories:
enricher.enrich_sellers_categories(needs_categories, max_workers=2)
log.info("Category enrichment complete for %d sellers", len(needs_categories))
except Exception as e:
log.warning("Scraper enrichment failed: %s", e)
import threading
t = threading.Thread(target=_run, daemon=True)
t.start()
def _parse_terms(raw: str) -> list[str]:
"""Split a comma-separated keyword string into non-empty, stripped terms."""
return [t.strip() for t in raw.split(",") if t.strip()]
def _make_adapter(shared_store: Store, force: str = "auto"):
"""Return the appropriate adapter.
force: "auto" | "api" | "scraper"
auto — API if creds present, else scraper
api — Browse API (raises if no creds)
scraper — Playwright scraper regardless of creds
Adapters receive shared_store because they only read/write sellers and
market_comps — never listings. Listings are returned and saved by the caller.
"""
client_id, client_secret, env = _ebay_creds()
has_creds = bool(client_id and client_secret)
if force == "scraper":
return ScrapedEbayAdapter(shared_store)
if force == "api":
if not has_creds:
raise ValueError("adapter=api requested but no eBay API credentials configured")
return EbayAdapter(EbayTokenManager(client_id, client_secret, env), shared_store, env=env)
# auto
if has_creds:
return EbayAdapter(EbayTokenManager(client_id, client_secret, env), shared_store, env=env)
log.debug("No eBay API credentials — using scraper adapter (partial trust scores)")
return ScrapedEbayAdapter(shared_store)
def _adapter_name(force: str = "auto") -> str:
"""Return the name of the adapter that would be used — without creating it."""
client_id, client_secret, _ = _ebay_creds()
if force == "scraper":
return "scraper"
if force == "api" or (force == "auto" and client_id and client_secret):
return "api"
return "scraper"
@app.get("/api/search")
def search(
q: str = "",
max_price: float = 0,
min_price: float = 0,
pages: int = 1,
must_include: str = "", # raw filter string; client-side always applied
must_include_mode: str = "all", # "all" | "any" | "groups" — drives eBay expansion
must_exclude: str = "", # comma-separated; forwarded to eBay -term + client-side
category_id: str = "", # eBay category ID — forwarded to Browse API / scraper _sacat
adapter: str = "auto", # "auto" | "api" | "scraper" — override adapter selection
session: CloudUser = Depends(get_session),
):
if not q.strip():
return {"listings": [], "trust_scores": {}, "sellers": {}, "market_price": None, "adapter_used": _adapter_name(adapter)}
# Cap pages to the tier's maximum — free cloud users get 1 page, local gets unlimited.
features = compute_features(session.tier)
pages = min(max(1, pages), features.max_pages)
must_exclude_terms = _parse_terms(must_exclude)
# In Groups mode, expand OR groups into multiple targeted eBay queries to
# guarantee comprehensive result coverage — eBay relevance won't silently drop variants.
if must_include_mode == "groups" and must_include.strip():
or_groups = parse_groups(must_include)
ebay_queries = expand_queries(q, or_groups)
else:
ebay_queries = [q]
base_filters = SearchFilters(
max_price=max_price if max_price > 0 else None,
min_price=min_price if min_price > 0 else None,
pages=pages,
must_exclude=must_exclude_terms, # forwarded to eBay -term by the scraper
category_id=category_id.strip() or None,
)
adapter_used = _adapter_name(adapter)
shared_db = session.shared_db
user_db = session.user_db
# Each thread creates its own Store — sqlite3 check_same_thread=True.
def _run_search(ebay_query: str) -> list:
return _make_adapter(Store(shared_db), adapter).search(ebay_query, base_filters)
def _run_comps() -> None:
try:
_make_adapter(Store(shared_db), adapter).get_completed_sales(q, pages)
except Exception:
log.warning("comps: unhandled exception for %r", q, exc_info=True)
try:
# Comps submitted first — guarantees an immediate worker slot even at max concurrency.
# Seller enrichment runs after the executor exits (background thread), so comps are
# always prioritised over tracking seller age / category history.
max_workers = min(len(ebay_queries) + 1, 5)
with ThreadPoolExecutor(max_workers=max_workers) as ex:
comps_future = ex.submit(_run_comps)
search_futures = [ex.submit(_run_search, eq) for eq in ebay_queries]
# Merge and deduplicate across all search queries
seen_ids: set[str] = set()
listings: list = []
for fut in search_futures:
for listing in fut.result():
if listing.platform_listing_id not in seen_ids:
seen_ids.add(listing.platform_listing_id)
listings.append(listing)
comps_future.result() # side-effect: market comp written to shared DB
except Exception as e:
log.warning("eBay scrape failed: %s", e)
raise HTTPException(status_code=502, detail=f"eBay search failed: {e}")
log.info("Multi-search: %d queries → %d unique listings", len(ebay_queries), len(listings))
# Main-thread stores — fresh connections, same thread.
# shared_store: sellers, market_comps (all users share this data)
# user_store: listings, saved_searches (per-user in cloud mode, same file in local mode)
shared_store = Store(shared_db)
user_store = Store(user_db)
user_store.save_listings(listings)
# Derive category_history from accumulated listing data — free for API adapter
# (category_name comes from Browse API response), no-op for scraper listings (category_name=None).
# Reads listings from user_store, writes seller categories to shared_store.
seller_ids = list({l.seller_platform_id for l in listings if l.seller_platform_id})
n_cat = shared_store.refresh_seller_categories("ebay", seller_ids, listing_store=user_store)
if n_cat:
log.info("Category history derived for %d sellers from listing data", n_cat)
# Re-fetch to hydrate staging fields (times_seen, first_seen_at, id, price_at_first_seen)
# that are only available from the DB after the upsert.
staged = user_store.get_listings_staged("ebay", [l.platform_listing_id for l in listings])
listings = [staged.get(l.platform_listing_id, l) for l in listings]
# BTF enrichment: scrape /itm/ pages for sellers missing account_age_days.
# Runs in the background so it doesn't delay the response; next search of
# the same sellers will have full scores.
_trigger_scraper_enrichment(listings, shared_store, shared_db)
scorer = TrustScorer(shared_store)
trust_scores_list = scorer.score_batch(listings, q)
query_hash = hashlib.md5(q.encode()).hexdigest()
comp = shared_store.get_market_comp("ebay", query_hash)
market_price = comp.median_price if comp else None
# Serialize — keyed by platform_listing_id for easy Vue lookup
trust_map = {
listing.platform_listing_id: dataclasses.asdict(ts)
for listing, ts in zip(listings, trust_scores_list)
if ts is not None
}
seller_map = {
listing.seller_platform_id: dataclasses.asdict(
shared_store.get_seller("ebay", listing.seller_platform_id)
)
for listing in listings
if listing.seller_platform_id
and shared_store.get_seller("ebay", listing.seller_platform_id)
}
return {
"listings": [dataclasses.asdict(l) for l in listings],
"trust_scores": trust_map,
"sellers": seller_map,
"market_price": market_price,
"adapter_used": adapter_used,
}
# ── On-demand enrichment ──────────────────────────────────────────────────────
@app.post("/api/enrich")
def enrich_seller(
seller: str,
listing_id: str,
query: str = "",
session: CloudUser = Depends(get_session),
):
"""Synchronous on-demand enrichment for a single seller + re-score.
Runs enrichment paths in parallel:
- Shopping API GetUserProfile (fast, ~500ms) — account_age_days if API creds present
- BTF /itm/ Playwright scrape (~20s) — account_age_days fallback
- _ssn Playwright scrape (~20s) — category_history_json
BTF and _ssn run concurrently; total wall time ~20s when Playwright needed.
Returns the updated trust_score and seller so the frontend can patch in-place.
"""
import threading
shared_store = Store(session.shared_db)
user_store = Store(session.user_db)
shared_db = session.shared_db
seller_obj = shared_store.get_seller("ebay", seller)
if not seller_obj:
raise HTTPException(status_code=404, detail=f"Seller '{seller}' not found")
# Fast path: Shopping API for account age (inline, no Playwright)
try:
api_adapter = _make_adapter(shared_store, "api")
if hasattr(api_adapter, "enrich_sellers_shopping_api"):
api_adapter.enrich_sellers_shopping_api([seller])
except Exception:
pass # no API creds — fall through to BTF
seller_obj = shared_store.get_seller("ebay", seller)
needs_btf = seller_obj is not None and seller_obj.account_age_days is None
needs_categories = seller_obj is None or seller_obj.category_history_json in ("{}", "", None)
# Slow path: Playwright for remaining gaps (BTF + _ssn in parallel threads).
# Each thread creates its own Store — sqlite3 connections are not thread-safe.
if needs_btf or needs_categories:
errors: list[Exception] = []
def _btf():
try:
ScrapedEbayAdapter(Store(shared_db)).enrich_sellers_btf(
{seller: listing_id}, max_workers=1
)
except Exception as e:
errors.append(e)
def _ssn():
try:
ScrapedEbayAdapter(Store(shared_db)).enrich_sellers_categories(
[seller], max_workers=1
)
except Exception as e:
errors.append(e)
threads = []
if needs_btf:
threads.append(threading.Thread(target=_btf, daemon=True))
if needs_categories:
threads.append(threading.Thread(target=_ssn, daemon=True))
for t in threads:
t.start()
for t in threads:
t.join(timeout=60)
if errors:
log.warning("enrich_seller: %d scrape error(s): %s", len(errors), errors[0])
# Re-fetch listing with staging fields, re-score
staged = user_store.get_listings_staged("ebay", [listing_id])
listing = staged.get(listing_id)
if not listing:
raise HTTPException(status_code=404, detail=f"Listing '{listing_id}' not found")
scorer = TrustScorer(shared_store)
trust_list = scorer.score_batch([listing], query or listing.title)
trust = trust_list[0] if trust_list else None
seller_final = shared_store.get_seller("ebay", seller)
return {
"trust_score": dataclasses.asdict(trust) if trust else None,
"seller": dataclasses.asdict(seller_final) if seller_final else None,
}
# ── Saved Searches ────────────────────────────────────────────────────────────
class SavedSearchCreate(BaseModel):
name: str
query: str
filters_json: str = "{}"
@app.get("/api/saved-searches")
def list_saved_searches(session: CloudUser = Depends(get_session)):
user_store = Store(session.user_db)
return {"saved_searches": [dataclasses.asdict(s) for s in user_store.list_saved_searches()]}
@app.post("/api/saved-searches", status_code=201)
def create_saved_search(
body: SavedSearchCreate,
session: CloudUser = Depends(get_session),
):
user_store = Store(session.user_db)
features = compute_features(session.tier)
if features.saved_searches_limit is not None:
existing = user_store.list_saved_searches()
if len(existing) >= features.saved_searches_limit:
raise HTTPException(
status_code=403,
detail=f"Free tier allows up to {features.saved_searches_limit} saved searches. Upgrade to save more.",
)
created = user_store.save_saved_search(
SavedSearchModel(name=body.name, query=body.query, platform="ebay", filters_json=body.filters_json)
)
return dataclasses.asdict(created)
@app.delete("/api/saved-searches/{saved_id}", status_code=204)
def delete_saved_search(saved_id: int, session: CloudUser = Depends(get_session)):
Store(session.user_db).delete_saved_search(saved_id)
@app.patch("/api/saved-searches/{saved_id}/run")
def mark_saved_search_run(saved_id: int, session: CloudUser = Depends(get_session)):
Store(session.user_db).update_saved_search_last_run(saved_id)
return {"ok": True}