Compare commits

..

No commits in common. "main" and "feature/affiliate-links" have entirely different histories.

144 changed files with 834 additions and 15254 deletions

View file

@ -1,28 +0,0 @@
[changelog]
header = ""
body = """
{% for group, commits in commits | group_by(attribute="group") %}
### {{ group | upper_first }}
{% for commit in commits %}
- {{ commit.message | upper_first }} ([{{ commit.id | truncate(length=7, end="") }}](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/commit/{{ commit.id }}))
{%- endfor %}
{% endfor %}
"""
trim = true
[git]
conventional_commits = true
filter_unconventional = true
split_commits = false
commit_parsers = [
{ message = "^feat", group = "Features" },
{ message = "^fix", group = "Bug Fixes" },
{ message = "^perf", group = "Performance" },
{ message = "^refactor", group = "Refactoring" },
{ message = "^docs", group = "Documentation" },
{ message = "^test", group = "Testing" },
{ message = "^chore", skip = true },
{ message = "^ci", skip = true },
]
filter_commits = false
tag_pattern = "v[0-9].*"

View file

@ -19,25 +19,6 @@ EBAY_SANDBOX_CERT_ID=
# production | sandbox # production | sandbox
EBAY_ENV=production EBAY_ENV=production
# ── eBay OAuth — Authorization Code (user account connection) ─────────────────
# Enables paid-tier users to connect their personal eBay account for instant
# trust scoring via Trading API GetUser (account age + per-category feedback).
# Without this, Snipe falls back to Shopping API + Playwright scraping.
#
# Setup steps:
# 1. Go to https://developer.ebay.com/my/keys → select your Production app
# 2. Under "Auth Accepted URL / RuName", create a new entry:
# - Callback URL: https://your-domain/api/ebay/callback
# (e.g. https://menagerie.circuitforge.tech/snipe/api/ebay/callback)
# - Snipe generates the redirect automatically — just register the URL above
# 3. Copy the RuName value (looks like "YourName-AppName-PRD-xxx-yyy")
# and paste it as EBAY_RUNAME below.
# 4. Set EBAY_OAUTH_REDIRECT_URI to the same HTTPS callback URL.
#
# Self-hosted: your callback URL must be HTTPS and publicly reachable.
# EBAY_RUNAME=YourName-AppName-PRD-xxxxxxxx-xxxxxxxx
# EBAY_OAUTH_REDIRECT_URI=https://your-domain/api/ebay/callback
# ── eBay Account Deletion Webhook ────────────────────────────────────────────── # ── eBay Account Deletion Webhook ──────────────────────────────────────────────
# Register endpoint at https://developer.ebay.com/my/notification — required for # Register endpoint at https://developer.ebay.com/my/notification — required for
# production key activation. Set EBAY_NOTIFICATION_ENDPOINT to the public HTTPS # production key activation. Set EBAY_NOTIFICATION_ENDPOINT to the public HTTPS
@ -51,9 +32,6 @@ EBAY_WEBHOOK_VERIFY_SIGNATURES=true
# ── Database ─────────────────────────────────────────────────────────────────── # ── Database ───────────────────────────────────────────────────────────────────
SNIPE_DB=data/snipe.db SNIPE_DB=data/snipe.db
# Product identifier reported in cf-orch coordinator analytics for per-app breakdown
CF_APP_NAME=snipe
# ── Cloud mode (managed / menagerie instance only) ───────────────────────────── # ── Cloud mode (managed / menagerie instance only) ─────────────────────────────
# Leave unset for self-hosted / local use. When set, per-user DB isolation # Leave unset for self-hosted / local use. When set, per-user DB isolation
# and Heimdall licensing are enabled. compose.cloud.yml sets CLOUD_MODE=true # and Heimdall licensing are enabled. compose.cloud.yml sets CLOUD_MODE=true
@ -76,54 +54,6 @@ CF_APP_NAME=snipe
# own ID; the CF cloud instance uses CF's campaign ID (disclosed in the UI). # own ID; the CF cloud instance uses CF's campaign ID (disclosed in the UI).
# EBAY_AFFILIATE_CAMPAIGN_ID= # EBAY_AFFILIATE_CAMPAIGN_ID=
# ── LLM inference (Search with AI / photo analysis) ──────────────────────────
# For self-hosted use, create config/llm.yaml from config/llm.yaml.example.
# config/llm.yaml is the preferred way to configure backends (supports cf-orch,
# multiple fallback backends, per-backend model selection).
#
# As a quick alternative, circuitforge-core LLMRouter also auto-detects backends
# from these env vars when no llm.yaml is present:
# 1. ANTHROPIC_API_KEY → Claude API (cloud; requires Paid tier key)
# 2. OPENAI_API_KEY → OpenAI-compatible endpoint
# 3. OLLAMA_HOST → local Ollama (default: http://localhost:11434)
# Leave all unset to disable LLM features (Search with AI won't be available).
# ANTHROPIC_API_KEY=
# ANTHROPIC_MODEL=claude-haiku-4-5-20251001
# OPENAI_API_KEY=
# OPENAI_BASE_URL=https://api.openai.com/v1
# OPENAI_MODEL=gpt-4o-mini
# OLLAMA_HOST=http://localhost:11434
# OLLAMA_MODEL=llava:7b
# GPU Server — routes vision/LLM tasks to a cf-orch coordinator for VRAM management.
# Self-hosted: point at a local cf-orch coordinator if you have one running.
# Cloud (internal): managed coordinator at orch.circuitforge.tech.
# Leave unset to run vision tasks inline (no VRAM coordination).
# GPU_SERVER_URL=http://10.1.10.71:7700
#
# CF_ORCH_URL is accepted as a backward-compat alias for GPU_SERVER_URL.
#
# cf-orch agent (compose --profile orch) — coordinator URL for the sidecar agent.
# Defaults to GPU_SERVER_URL if unset.
# CF_ORCH_COORDINATOR_URL=http://10.1.10.71:7700
# ── Shared Postgres (optional — strongly recommended for cloud/multi-user) ────
# When set, sellers, market_comps, reported_sellers, and scammer_blocklist are
# stored in Postgres instead of SQLite. Required to avoid database-locked errors
# under concurrent load (>10 simultaneous search users).
# Cloud instances: set to the cf-postgres DSN. Self-hosted: leave unset for SQLite.
# SNIPE_SHARED_DB_URL=postgresql://snipe:<password>@localhost:5432/snipe_shared
# ── Community DB (optional) ──────────────────────────────────────────────────
# When set, seller trust signals (confirmed scammers added to blocklist) are
# published to the shared community PostgreSQL for cross-user signal aggregation.
# Managed instances: set automatically by cf-orch. Self-hosted: leave unset.
# Requires cf-community-postgres container (cf-orch compose stack).
# COMMUNITY_DB_URL=postgresql://cf_community:<password>@localhost:5432/cf_community
# ── In-app feedback (beta) ──────────────────────────────────────────────────── # ── In-app feedback (beta) ────────────────────────────────────────────────────
# When set, a feedback FAB appears in the UI and routes submissions to Forgejo. # When set, a feedback FAB appears in the UI and routes submissions to Forgejo.
# Leave unset to silently hide the button (demo/offline deployments). # Leave unset to silently hide the button (demo/offline deployments).

View file

@ -1,57 +0,0 @@
name: CI
on:
push:
branches: [main, 'feature/**', 'fix/**']
pull_request:
branches: [main]
jobs:
python:
name: Python tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: pip
# circuitforge-core is a sibling on dev machines but a public GitHub
# mirror in CI — install from there to avoid path-dependency issues.
- name: Install circuitforge-core
run: pip install --no-cache-dir git+https://github.com/CircuitForgeLLC/circuitforge-core.git
- name: Install snipe (dev extras)
run: pip install --no-cache-dir -e ".[dev]"
- name: Lint
run: ruff check .
- name: Test
run: pytest tests/ -v --tb=short
frontend:
name: Frontend typecheck + tests
runs-on: ubuntu-latest
defaults:
run:
working-directory: web
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '20'
cache: npm
cache-dependency-path: web/package-lock.json
- name: Install dependencies
run: npm ci
- name: Typecheck + build
run: npm run build
- name: Unit tests
run: npm run test

View file

@ -1,30 +0,0 @@
name: Mirror
on:
push:
branches: [main]
tags: ['v*']
jobs:
mirror:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Mirror to GitHub
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_MIRROR_TOKEN }}
REPO: ${{ github.event.repository.name }}
run: |
git remote add github "https://x-access-token:${GITHUB_TOKEN}@github.com/CircuitForgeLLC/${REPO}.git"
git push github --mirror
- name: Mirror to Codeberg
env:
CODEBERG_TOKEN: ${{ secrets.CODEBERG_MIRROR_TOKEN }}
REPO: ${{ github.event.repository.name }}
run: |
git remote add codeberg "https://CircuitForge:${CODEBERG_TOKEN}@codeberg.org/CircuitForge/${REPO}.git"
git push codeberg --mirror

View file

@ -1,92 +0,0 @@
name: Release
on:
push:
tags: ['v*']
env:
# Forgejo container registry (BSL product — not pushing to public GHCR)
# cf-agents#3: revisit public registry policy before enabling GHCR push
REGISTRY: git.opensourcesolarpunk.com
IMAGE_API: git.opensourcesolarpunk.com/circuit-forge/snipe-api
IMAGE_WEB: git.opensourcesolarpunk.com/circuit-forge/snipe-web
jobs:
release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
# ── Changelog ────────────────────────────────────────────────────────────
- name: Generate changelog
uses: orhun/git-cliff-action@v3
id: cliff
with:
config: .cliff.toml
args: --latest --strip header
env:
OUTPUT: CHANGES.md
# ── Docker ───────────────────────────────────────────────────────────────
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Forgejo registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.FORGEJO_RELEASE_TOKEN }}
# API image — built with circuitforge-core sibling from GitHub mirror
- name: Checkout circuitforge-core
uses: actions/checkout@v4
with:
repository: CircuitForgeLLC/circuitforge-core
path: circuitforge-core
- name: Build and push API image
uses: docker/build-push-action@v6
with:
context: .
file: Dockerfile
push: true
platforms: linux/amd64,linux/arm64
tags: |
${{ env.IMAGE_API }}:${{ github.ref_name }}
${{ env.IMAGE_API }}:latest
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Build and push web image
uses: docker/build-push-action@v6
with:
context: .
file: docker/web/Dockerfile
push: true
platforms: linux/amd64,linux/arm64
tags: |
${{ env.IMAGE_WEB }}:${{ github.ref_name }}
${{ env.IMAGE_WEB }}:latest
cache-from: type=gha
cache-to: type=gha,mode=max
# ── Forgejo Release ───────────────────────────────────────────────────────
- name: Create Forgejo release
env:
FORGEJO_TOKEN: ${{ secrets.FORGEJO_RELEASE_TOKEN }}
REPO: ${{ github.event.repository.name }}
TAG: ${{ github.ref_name }}
NOTES: ${{ steps.cliff.outputs.content }}
run: |
curl -sS -X POST \
"https://git.opensourcesolarpunk.com/api/v1/repos/Circuit-Forge/${REPO}/releases" \
-H "Authorization: token ${FORGEJO_TOKEN}" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg tag "$TAG" --arg body "$NOTES" \
'{tag_name: $tag, name: $tag, body: $body}')"

View file

@ -1,62 +0,0 @@
# Snipe CI — runs on GitHub mirror for public credibility badge.
# Forgejo (.forgejo/workflows/ci.yml) is the canonical CI — keep these in sync.
# No Forgejo-specific secrets used here; circuitforge-core is public on Forgejo.
#
# Note: playwright browser binaries are not installed here — tests using
# headed Chromium (Kasada bypass) are skipped in CI via pytest marks.
name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
backend:
name: Backend (Python)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: pip
- name: Install circuitforge-core
run: pip install git+https://git.opensourcesolarpunk.com/Circuit-Forge/circuitforge-core.git@main
- name: Install dependencies
run: pip install -e ".[dev]"
- name: Lint
run: ruff check .
- name: Test
run: pytest tests/ -v --tb=short -m "not browser"
frontend:
name: Frontend (Vue)
runs-on: ubuntu-latest
defaults:
run:
working-directory: web
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '20'
cache: npm
cache-dependency-path: web/package-lock.json
- name: Install dependencies
run: npm ci
- name: Type check
run: npx vue-tsc --noEmit
- name: Test
run: npm run test

2
.gitignore vendored
View file

@ -9,5 +9,3 @@ data/
.superpowers/ .superpowers/
web/node_modules/ web/node_modules/
web/dist/ web/dist/
config/llm.yaml
.worktrees/

View file

@ -1,181 +0,0 @@
# Changelog
All notable changes to `snipe` are documented here.
Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
Versions follow [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
---
## [0.5.1] — 2026-04-16
### Added
**Reported sellers tracking** — after bulk-reporting sellers to eBay Trust & Safety, cards show a muted "Reported to eBay" badge so users know not to re-report the same seller.
- Migration 012: `reported_sellers` table in user DB (UNIQUE on platform + seller ID, preserves first-report timestamp on re-report).
- `Store.mark_reported` / `list_reported` methods.
- `POST /api/reported` + `GET /api/reported` endpoints.
- `reported` Pinia store: optimistic local update, best-effort server persistence.
- `ListingCard`: accepts `sellerReported` prop; shows `.card__reported-badge` when true.
- `App.vue`: loads reported store at startup alongside blocklist.
**Community blocklist share toggle** — Settings > Community section (signed-in users only, default OFF).
- Toggle persisted as `community.blocklist_share` via existing user preferences path system.
- Backend `add_to_blocklist` now gates community signal publishing on opt-in preference; privacy-by-architecture: sharing is never implicit.
### Fixed
- SSE live score push (snipe#1) verified working end-to-end: enrichment thread correctly streams re-scored trust scores via `SimpleQueue → StreamingResponse` generator, terminates with `event: done`. Closed.
---
## [0.5.0] — 2026-04-16
### Added
**Listing detail page** — full trust breakdown for any individual listing (closes placeholder)
- `ListingView.vue` rewritten from "coming soon" stub into a full trust breakdown view.
- SVG trust ring: `stroke-dasharray` fill proportional to composite score (0100), colour-coded `lv-ring--high/mid/low` (≥80 / 5079 / <50).
- Five-signal breakdown table: account age, feedback count, feedback ratio, price vs. market, category history — each row shows score, max, and a plain-English label.
- Red flag badges: hard flags (`.lv-flag--hard`) for `new_account`, `suspicious_price`, `duplicate_photo`, `zero_feedback`, `established_bad_actor`; soft flags (`.lv-flag--soft`) for `scratch_dent_mentioned`, `long_on_market`, `significant_price_drop`, `account_under_30_days`.
- Triple Red easter egg: new/under-30-days account + suspicious price + photo/actor/zero-feedback/scratch flag combination triggers pulsing red glow animation.
- Partial score warning: `score_is_partial` flag shows `.lv-verdict__partial` notice and "pending" in affected signal rows.
- Seller panel: username, account age, feedback count/ratio, category history JSON, inline block-seller form.
- Photo carousel: thumbnail strip with keyboard-navigable main image.
- Not-found state for direct URL navigation when store is empty.
- `getListing(platformListingId)` getter added to search store.
- `ListingCard.vue`: "Details" link wired to `/listing/:id` route.
**Theme override** — user-controlled dark/light/system toggle in Settings
- `useTheme` composable: module-level `mode` ref, `setMode()` writes `data-theme` attribute + localStorage, `restore()` re-reads localStorage on hard reload.
- `theme.css`: explicit `[data-theme="dark"]` and `[data-theme="light"]` attribute selector blocks so user preference beats OS media query. Snipe mode override preserved.
- `SettingsView.vue`: new Appearance section with System/Dark/Light segmented button group.
- `App.vue`: `restoreTheme()` called in `onMounted` alongside snipe mode restore.
**Frontend test suite** — 32 Vitest tests, all green
- `useTheme.test.ts` (7 tests): defaults, setMode, data-theme attribute, localStorage persistence, restore() behaviour.
- `searchStore.test.ts` (7 tests): getListing() edge cases, pipe characters in IDs, trustScores/sellers map lookups.
- `ListingView.test.ts` (18 tests): not-found state, title/price/score/signals/seller rendering, hard/soft flag badges, no-flags, triple-red class, partial/pending signals, ring colour classes.
### Fixed
- `useTheme.restore()` re-reads from localStorage instead of cached module-level ref — prevented correct theme restore after a `setMode()` call in the same JS session.
- Landing hero subtitle rewritten with narrative opener ("Seen a listing that looks almost too good to pass up?") — universal framing, no category assumptions.
- eBay cancellation callout CTA updated to "Search above to score listings before you commit" — direct action vs. passive notice.
- Tile descriptions: concrete examples added ("40% below median", quoted "scratch and dent") for instant domain recognition.
---
## [0.4.0] — 2026-04-14
### Added
**Search with AI** — natural language to eBay search filters (closes #29, Paid+ tier)
- `QueryTranslator`: sends a free-text prompt to a local LLM (via cf-orch, defaulting to `llama3.1:8b`) with a domain-aware system prompt and eBay Taxonomy category hints. Returns structured `SearchParamsResponse` (keywords, price range, condition, category, sort order, pages).
- `EbayCategoryCache`: bootstraps from a seed list; refreshes from the eBay Browse API Taxonomy endpoint on a 7-day TTL. `get_relevant(query)` injects the 10 closest categories into the system prompt to reduce hallucinated filter values.
- `POST /api/search/build` — tier-gated endpoint (paid+) that accepts `{"prompt": "..."}` and returns populated `SearchParamsResponse`. Wired to `LLMRouter` via the Peregrine-style shim.
- `LLMQueryPanel.vue`: collapsible panel above the search form with a text area, a "Search with AI" button, and an auto-run toggle. A11y (accessibility): `aria-expanded`, `aria-controls`, `aria-live="polite"` on status, keyboard-navigable, `prefers-reduced-motion` guard on collapse animation.
- `useLLMQueryBuilder` composable: manages `buildQuery()` state machine (`idle | loading | done | error`), exposes `autoRun` flag, calls `populateFromLLM()` on the search store.
- `SettingsView`: new "Search with AI" section with the auto-run toggle persisted to user preferences.
- `search.ts`: `populateFromLLM()` merges LLM-returned filters into the store; guards `v-model.number` empty-string edge case (cleared price inputs sent `NaN` to the API).
**Preferences system**
- `Store.get_user_preference` / `set_user_preference` / `get_all_preferences`: dot-path read/write over a singleton `user_preferences` JSON row (immutable update pattern via `circuitforge_core.preferences.paths`).
- `Store.save_community_signal`: persists trust feedback signals to `community_signals` table.
- `preferencesStore` (Pinia): loaded after session bootstrap; `load()` / `set()` / `get()` surface preferences to Vue components.
**Community module** (closes #31 #32 #33)
- `corrections` router wired: `POST /api/community/signal` now lands in SQLite `community_signals`.
- `COMMUNITY_DB_URL` env var documented in `.env.example`.
### Fixed
- `useTrustFeedback`: prefixes fetch URL with `VITE_API_BASE` so feedback signals route correctly under menagerie reverse proxy.
- `App.vue`: skip-to-main link moved before `<AppNav>` so keyboard users reach it as the first focusable element (WCAG 2.4.1 bypass-blocks compliance).
- `@/` path alias removed from Vue components (Vite config had no alias configured; replaced with relative imports to fix production build).
- `search.ts`: LLM-populated filters now sync back into `SearchView` local state so the form reflects the AI-generated values immediately.
- Python import ordering pass (isort) across adapters, trust modules, tasks, and test files.
### Closed
- `#29` LLM query builder — shipped.
- `#31` `#32` `#33` Community corrections router — shipped.
---
## [0.3.0] — 2026-04-14
### Added
**Infrastructure and DevOps**
- `.forgejo/workflows/ci.yml` — Python lint (ruff) + pytest + Vue typecheck + vitest on every PR/push to main. Installs circuitforge-core from GitHub mirror so the CI runner doesn't need the sibling directory.
- `.forgejo/workflows/release.yml` — Docker build and push (api + web images) to Forgejo container registry on `v*` tags. Builds both images multi-arch (amd64 + arm64). Creates a Forgejo release with git-cliff changelog notes.
- `.forgejo/workflows/mirror.yml` — Mirror push to GitHub and Codeberg on main/tags.
- `install.sh` — Full rewrite following the CircuitForge installer pattern: colored output, `--docker` / `--bare-metal` / `--help` flags, auto-detection of Docker/conda/Python/Node/Chromium/Xvfb, license key prompting, structured named functions.
- `docs/nginx-self-hosted.conf` — Sample nginx config for bare-metal self-hosted deployments (SPA fallback, SSE proxy settings, long-term asset caching).
- `docs/getting-started/installation.md` — No-Docker install section: bare-metal instructions, nginx setup, Chromium/Xvfb note.
- `compose.override.yml``cf-orch-agent` sidecar service for routing vision tasks to a cf-orch GPU coordinator (`--profile orch` opt-in). `CF_ORCH_COORDINATOR_URL` env var documented.
- `.env.example``CF_ORCH_URL` and `CF_ORCH_COORDINATOR_URL` comments expanded with self-hosted coordinator guidance.
**Screenshots** (post CSS fix)
- Retook all docs screenshots (`01-hero`, `02-results`, `03-steal-badge`, `hero`) after the color-mix token fix so tints match the theme in both dark and light mode.
### Closed
- `#1` SSE live score push — already fully implemented in 0.2.0; closed.
- `#22` Forgejo Actions CI/CD — shipped.
- `#24` nginx config for no-Docker self-hosting — shipped.
- `#25` Self-hosted installer script — shipped.
- `#15` cf-orch agent in compose stack — shipped.
- `#27` MCP server — already shipped in 0.2.0; closed.
---
## [0.2.0] — 2026-04-12
### Added
**Trust signal UI** — community feedback on seller trust scores (MIT component layer)
- `web/src/components/TrustFeedbackButtons.vue`: "This score looks right / This score is wrong" button pair displayed below the trust badge on each listing card. Shows "Thanks, noted." on submission with no countdown or urgency.
- `web/src/composables/useTrustFeedback.ts`: `FeedbackState` machine (`idle | sending | confirmed | disputed`). Fail-soft: any network error still transitions to confirmed state — the UI never surfaces signal pipeline failures.
- Slotted into `ListingCard.vue` after the trust badge, inside `.card__score-col`.
- WCAG (Web Content Accessibility Guidelines) 2.1 compliance: `aria-live="polite"` on confirmation message, `aria-busy` during send, keyboard-focusable buttons with `focus-visible` styles, `prefers-reduced-motion` guard on transitions.
- Uses `--trust-high` / `--trust-low` theme CSS custom properties for color consistency.
_Note: The backend signal endpoint (`POST /api/community/signal`) and seller signal store are gated on cf-orch community postgres landing. The UI degrades gracefully when the endpoint is absent._
**Forgejo feedback FAB** (floating action button)
- `FeedbackButton.vue`: floating "Feedback" button in the corner of every view. Opens a two-step modal (type + description → attribution + confirm) that files a Forgejo issue against `Circuit-Forge/snipe`. Hidden when `FORGEJO_API_TOKEN` is unset or in demo mode.
- `GET /api/feedback/status` — returns `{"enabled": bool}` so the button never flashes before checking.
- `POST /api/feedback` — files the issue; returns `issue_number` and `issue_url`.
**Live SSE score push** (closes #1)
- Background enrichment results pushed to the browser via Server-Sent Events as trust scores complete.
---
## [0.1.0] — 2026-03-25
### Added
Initial beta release of Snipe — eBay listing intelligence and trust scoring.
- Listing search via eBay scraper (Kasada bypass with headed Chromium + Xvfb).
- Trust score composite: feedback rate, negative feedback ratio, member age, zero-feedback penalty.
- `TrustScore` dataclass with red flags, partial score flag, composite score (0-100).
- Vue 3 SPA frontend: search view, listing card grid, listing detail view, blocklist management.
- FastAPI backend: `/api/search`, `/api/enrich`, `/api/blocklist`.
- Keyword filtering for search queries.
- SQLite persistence via cf-core `db` module.

View file

@ -5,7 +5,6 @@ WORKDIR /app
# System deps for Playwright/Chromium # System deps for Playwright/Chromium
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \
xvfb \ xvfb \
libpq-dev \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Install circuitforge-core from sibling directory (compose sets context: ..) # Install circuitforge-core from sibling directory (compose sets context: ..)

313
README.md
View file

@ -1,181 +1,210 @@
<!-- Logo coming soon — replace docs/snipe-logo.svg when final icon ships --> # Snipe — Auction Sniping & Listing Intelligence
<div align="center">
<img src="docs/snipe-logo.svg" alt="Snipe logo" width="120" />
# Snipe > *Part of the Circuit Forge LLC "AI for the tasks you hate most" suite.*
**Auction intelligence and sniping for people who don't trust the platform.** **Status:** Active — eBay listing intelligence MVP complete (search, trust scoring, affiliate links, feedback FAB, vision task scheduling). Auction sniping engine and multi-platform support are next.
[![License: MIT / BSL 1.1](https://img.shields.io/badge/license-MIT%20%2F%20BSL%201.1-blue)](LICENSE) ## What it does
[![Status: Beta](https://img.shields.io/badge/status-beta-yellow)]()
[![Version](https://img.shields.io/badge/version-0.5.1-green)](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/releases)
[![Forgejo](https://img.shields.io/badge/primary%20repo-Forgejo-orange)](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe)
[![Docs](https://img.shields.io/badge/docs-docs.circuitforge.tech%2Fsnipe-green)](https://docs.circuitforge.tech/snipe)
*Part of the Circuit Forge LLC suite — "AI for the tasks the system made hard on purpose."* Snipe has two layers that work together:
</div>
**Layer 1 — Listing intelligence (MVP, implemented)**
Before you bid, Snipe tells you whether a listing is worth your time. It fetches eBay listings, scores each seller's trustworthiness across five signals, flags suspicious pricing relative to completed sales, and surfaces red flags like new accounts, cosmetic damage buried in titles, and listings that have been sitting unsold for weeks.
**Layer 2 — Auction sniping (roadmap)**
Snipe manages the bid itself: monitors listings across platforms, schedules last-second bids, handles soft-close extensions, and guides you through the post-win logistics (payment routing, shipping coordination, provenance documentation for antiques).
The name is the origin of the word "sniping" — common snipes are notoriously elusive birds, secretive and camouflaged, that flush suddenly from cover. Shooting one required extreme patience, stillness, and a precise last-second shot. That's the auction strategy.
--- ---
<table> ## Implemented: eBay Listing Intelligence
<tr>
<td><img src="docs/screenshots/hero.png" alt="Snipe search page with filter panel and feature overview"/></td> ### Search & filtering
<td><img src="docs/screenshots/results.png" alt="Search results — trust score badges, STEAL price flags, seller feedback, and market price comparison"/></td> - Full-text eBay search via Browse API (with Playwright scraper fallback when no API credentials configured)
</tr> - Price range, must-include keywords (AND / ANY / OR-groups mode), must-exclude terms, eBay category filter
</table> - OR-group mode expands keyword combinations into multiple targeted queries and deduplicates results — eBay relevance won't silently drop variants
- Pages-to-fetch control: each Browse API page returns up to 200 listings
- Saved searches with one-click re-run that restores all filter settings
### Seller trust scoring
Five signals, each scored 020, composited to 0100:
| Signal | What it measures |
|--------|-----------------|
| `account_age` | Days since eBay account registration |
| `feedback_count` | Total feedback received |
| `feedback_ratio` | Positive feedback percentage |
| `price_vs_market` | Listing price vs. median of recent completed sales |
| `category_history` | Whether seller has history selling in this category |
Scores are marked **partial** when signals are unavailable (e.g. account age not yet enriched). Partial scores are displayed with a visual indicator rather than penalizing the seller for missing data.
### Red flags
Hard filters that override the composite score:
- `new_account` — account registered within 7 days
- `established_bad_actor` — feedback ratio < 80% with 20+ reviews
Soft flags surfaced as warnings:
- `account_under_30_days` — account under 30 days old
- `low_feedback_count` — fewer than 10 reviews
- `suspicious_price` — listing price below 50% of market median *(suppressed automatically when the search returns a heterogeneous price distribution — e.g. mixed laptop generations — to prevent false positives)*
- `duplicate_photo` — same image found on another listing (perceptual hash)
- `scratch_dent_mentioned` — title keywords indicating cosmetic damage, functional problems, or evasive language (see below)
- `long_on_market` — listing has been seen 5+ times over 14+ days without selling
- `significant_price_drop` — current price more than 20% below first-seen price
### Scratch & dent title detection
Scans listing titles for signals the item may have undisclosed damage or problems:
- **Explicit damage**: scratch, scuff, dent, crack, chip, blemish, worn
- **Condition catch-alls**: as is, for parts, parts only, spares or repair
- **Evasive redirects**: "see description", "read description", "see photos for" (seller hiding damage detail in listing body)
- **Functional problems**: "not working", "stopped working", "no power", "dead on arrival", "powers on but", "faulty", "broken screen/hinge/port"
- **DIY/repair listings**: "needs repair", "needs tlc", "project laptop", "for repair", "sold as is"
### Seller enrichment
- **Inline (API adapter)**: account age filled from Browse API `registrationDate` field
- **Background (scraper)**: `/itm/` listing pages scraped for seller "Joined" date via Playwright + Xvfb (Kasada-safe headed Chromium)
- **On-demand**: ↻ button on any listing card triggers `POST /api/enrich` — runs enrichment and re-scores without waiting for a second search
- **Category history**: derived from the seller's accumulated listing data (Browse API `categories` field); improves with every search, no extra API calls
### Affiliate link builder
Listing cards surface eBay affiliate-wrapped URLs. Uses `circuitforge_core.affiliates.wrap_url` — resolution order: user opted out → plain URL; user has BYOK affiliate ID → their ID; CF env var set (`EBAY_AFFILIATE_ID`) → CF's ID; otherwise plain URL. Users can configure their own eBay Partner Network ID or opt out entirely in Settings.
Disclosure tooltip appears on first encounter per-session and on each wrapped link (per-retailer copy from `get_disclosure_text`).
### Feedback FAB
In-app feedback button (bottom-right FAB) opens a modal: title, description, optional screenshot. Posts to the CF feedback endpoint. Status probed on load; FAB hidden if endpoint unreachable.
### Vision task scheduling
Photo condition assessment tasks queued through `circuitforge_core.tasks.TaskScheduler` — VRAM-aware slot management shared with any other LLM workloads on the same host. Runs moondream2 locally (free tier) or Claude vision (paid/cloud). Results stored per-listing and update the trust score card.
### Market price comparison
Completed sales fetched via eBay Marketplace Insights API (with Browse API fallback for app tiers that don't have Insights access). Median stored per query hash, used to score `price_vs_market` across all listings in a search.
### Adapters
| Adapter | When used | Signals available |
|---------|-----------|-------------------|
| Browse API (`api`) | eBay API credentials configured | All signals; account age inline |
| Playwright scraper (`scraper`) | No credentials / forced | All signals except account age (async BTF enrichment) |
| `auto` (default) | — | API if credentials present, scraper otherwise |
--- ---
## Why Snipe? ## Stack
Auction platforms are designed to make you act fast and trust blindly. The closing countdown, the hidden price history, the new-account seller with one feedback — all of it is structured against the buyer. | Layer | Tech | Port |
|-------|------|------|
Snipe inverts that. Before you place a bid, you get a trust score built from five independently sourced signals: seller account age, feedback volume, feedback ratio, price versus recent completed sales, and category history. A hard-coded red flag for new accounts or bad actors overrides the composite. Soft flags surface buried damage disclosures, duplicate photos, and listings that have been sitting unsold for weeks. When the listing is priced well below market, you see a STEAL badge — sourced from eBay Marketplace Insights, not from the seller's description. | Frontend | Vue 3 + Pinia + UnoCSS + Vite (nginx) | 8509 |
| API | FastAPI (uvicorn) | 8510 |
The sniping engine — precise last-second bid submission with NTP (network time protocol) synchronization and soft-close handling — is next on the roadmap. The intelligence layer is live now. | Scraper | Playwright + playwright-stealth + Xvfb | — |
| DB | SQLite (`data/snipe.db`) | — |
--- | Core | circuitforge-core (editable install) | — |
## Features
### Listing intelligence (live)
- **Trust scoring** — five-signal composite score (0100) per listing: account age, feedback count, feedback ratio, price vs. market, category history
- **Red flag detection** — hard flags for new accounts and established bad actors; soft flags for damage keywords, evasive language, duplicate photos, long-on-market listings, and significant price drops
- **Price vs. market** — listing price compared against completed-sale medians via eBay Marketplace Insights API (Browse API fallback)
- **Keyword filtering** — must-include (AND / ANY / OR-groups), must-exclude, category, price range; OR-groups expand into multiple targeted queries so eBay relevance doesn't silently drop variants
- **Saved searches** — one-click re-run that restores all filter settings
- **Background enrichment** — seller account age scraped via Playwright + Xvfb (Kasada/Cloudflare-safe headed Chromium); on-demand re-score per listing without re-searching
- **LLM query builder** — describe what you want in plain language; an LLM builds the search terms (paid tier)
- **Vision photo assessment** — condition scoring from listing photos via moondream2 locally or Claude vision (paid/cloud); VRAM-aware scheduling via circuitforge-core task scheduler
- **Affiliate link builder** — eBay Partner Network wrapping with user BYOK support and per-retailer disclosure
### Platforms
| Platform | Search | Trust scoring | Completed-sale comps |
|----------|--------|---------------|----------------------|
| **eBay** | Browse API + Playwright fallback | All 5 signals | Marketplace Insights + Browse fallback |
| **Mercari** | Playwright scraper | 3/5 signals (partial) | Phase 3 |
| CT Bids, HiBid, AuctionZip, Invaluable, GovPlanet, Bidsquare, Proxibid | Planned | Planned | Planned |
### Auction sniping engine (roadmap)
- NTP-synchronized last-second bid submission
- Soft-close detection and strategy adjustment
- Proxy bid ladder with configurable max
- Human approval gate before any bid executes
- Post-win workflow: payment routing, shipping coordination, provenance documentation
---
## Quick Start
**Requirements:** Docker with Compose plugin, Git. No API keys required to get started.
```bash
# One-line install — clones to ~/snipe by default
bash <(curl -fsSL https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/raw/branch/main/install.sh)
```
Then open **http://localhost:8509**.
### Manual setup
Snipe's API image builds from a parent context that includes `circuitforge-core`. Both repos must sit as siblings:
```
workspace/
├── snipe/ ← this repo
└── circuitforge-core/ ← required sibling
```
```bash
mkdir snipe-workspace && cd snipe-workspace
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/snipe.git
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/circuitforge-core.git
cd snipe
cp .env.example .env # add eBay API credentials if you have them (optional)
./manage.sh start
```
### Optional: eBay API credentials
Snipe works without credentials using its Playwright scraper fallback. Adding credentials unlocks faster searches and inline seller account age without an extra scrape:
1. Register at [developer.ebay.com](https://developer.ebay.com/my/keys)
2. Copy your Production **App ID** and **Cert ID** into `.env`
3. `./manage.sh restart`
---
## Tiers
| Tier | What you get |
|------|-------------|
| **Free** | eBay + Mercari search, full trust scoring, keyword filtering, saved searches — local LLM only |
| **Paid** | LLM query builder, background saved-search monitoring with alerts, cloud LLM option |
| **Premium** | Vision photo condition assessment, fine-tuned trust models, multi-user |
| **Ultra** | Human-in-the-loop operator — handles CAPTCHAs, phone calls, anything automation can't |
License key format: `CFG-SNPE-XXXX-XXXX-XXXX`
---
## Running ## Running
```bash ```bash
./manage.sh start # start all services ./manage.sh start # start all services
./manage.sh stop # stop ./manage.sh stop # stop
./manage.sh restart # restart
./manage.sh logs # tail logs ./manage.sh logs # tail logs
./manage.sh open # open in browser ./manage.sh open # open in browser
``` ```
--- Cloud stack (shared DB, multi-user):
```bash
## Stack docker compose -f compose.cloud.yml -p snipe-cloud up -d
docker compose -f compose.cloud.yml -p snipe-cloud build api # after Python changes
| Layer | Technology | Port | ```
|-------|-----------|------|
| Frontend | Vue 3 + Pinia + UnoCSS + Vite (served via nginx) | 8509 |
| API | FastAPI (uvicorn) | 8510 |
| Scraper | Playwright + playwright-stealth + Xvfb (Kasada/Cloudflare-safe headed Chromium) | — |
| Database | SQLite (`data/snipe.db`) | — |
| Core | circuitforge-core (editable install) | — |
The scraper stack uses headed Chromium via Xvfb (X virtual framebuffer) with playwright-stealth for all platform access. Headless and `requests`-based approaches are blocked by eBay and Mercari.
--- ---
## Documentation ## Roadmap
Full documentation at **[docs.circuitforge.tech/snipe](https://docs.circuitforge.tech/snipe)** — setup guide, trust scoring algorithm, platform adapter reference, API docs, and self-hosting notes. ### Near-term (eBay)
| Issue | Feature |
|-------|---------|
| [#1](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/issues/1) | SSE/WebSocket live score push — enriched data appears without re-search |
| [#2](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/issues/2) | eBay OAuth (Connect eBay Account) for full trust score access via Trading API |
| [#4](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/issues/4) | Scammer database: community blocklist + batch eBay Trust & Safety reporting |
| [#5](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/issues/5) | UPC/product lookup → LLM-crafted search terms (paid tier) |
| [#8](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/issues/8) | "Triple Red" easter egg: CSS animation when all hard flags fire simultaneously |
| [#11](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/issues/11) | Vision-based photo condition assessment — moondream2 (local) / Claude vision (cloud, paid) |
| [#12](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/issues/12) | Background saved-search monitoring with configurable alerts |
### Cloud / infrastructure
| Issue | Feature |
|-------|---------|
| [#6](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/issues/6) | Shared seller/scammer/comps DB across cloud users (public data, no re-scraping) |
| [#7](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/issues/7) | Shared image hash DB — requires explicit opt-in consent (CF privacy-by-architecture) |
### Auction sniping engine
| Issue | Feature |
|-------|---------|
| [#9](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/issues/9) | Bid scheduling + snipe execution (NTP-synchronized, soft-close handling, human approval gate) |
| [#13](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/issues/13) | Post-win workflow: payment routing, shipping coordination, provenance documentation |
### Multi-platform expansion
| Issue | Feature |
|-------|---------|
| [#10](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/issues/10) | CT Bids, HiBid, AuctionZip, Invaluable, GovPlanet, Bidsquare, Proxibid |
--- ---
## Forgejo-primary ## Primary platforms (full vision)
Snipe is developed and maintained on Forgejo at [git.opensourcesolarpunk.com/Circuit-Forge/snipe](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe). GitHub and Codeberg are read-only mirrors. File issues and submit pull requests on Forgejo. - **eBay** — general + collectibles *(search + trust scoring: implemented)*
- **CT Bids** — Connecticut state surplus and municipal auctions
- **GovPlanet / IronPlanet** — government surplus equipment
- **AuctionZip** — antique auction house aggregator (1,000+ houses)
- **Invaluable / LiveAuctioneers** — fine art and antiques
- **Bidsquare** — antiques and collectibles
- **HiBid** — estate auctions
- **Proxibid** — industrial and collector auctions
--- ## Why auctions are hard
## Contributing Online auctions are frustrating because:
- Winning requires being present at the exact closing moment — sometimes 2 AM
- Platforms vary wildly: some allow proxy bids, some don't; closing times extend on activity
- Scammers exploit auction urgency — new accounts, stolen photos, pressure to pay outside platform
- Price history is hidden — you don't know if an item is underpriced or a trap
- Sellers hide damage in descriptions rather than titles to avoid automated filters
- Shipping logistics for large / fragile antiques require coordination with the auction house
- Provenance documentation is inconsistent across auction houses
Bug reports and feature requests: open an issue on Forgejo. The discovery pipeline (scrapers, adapters, signal extraction) is MIT-licensed — pull requests welcome. AI trust-scoring features are BSL 1.1 — contributions are accepted but the license terms apply. ## Bidding strategy engine (planned)
--- - **Hard snipe**: submit bid N seconds before close (default: 8s)
- **Soft-close handling**: detect if platform extends on last-minute bids; adjust strategy
- **Proxy ladder**: set max and let the engine bid in increments, reserve snipe for final window
- **Reserve detection**: identify likely reserve price from bid history patterns
- **Comparable sales**: pull recent auction results for same/similar items across platforms
## License ## Post-win workflow (planned)
Snipe uses a dual license: 1. Payment method routing (platform-specific: CC, wire, check)
2. Shipping quote requests to approved carriers (freight / large items via uShip; parcel via FedEx/UPS)
3. Condition report request from auction house
4. Provenance packet generation (for antiques / fine art resale or insurance)
5. Add to inventory (for dealers / collectors tracking portfolio value)
| Component | License | ## Product code (license key)
|-----------|---------|
| Discovery pipeline — scrapers, platform adapters, search, keyword filtering | [MIT](LICENSE-MIT) |
| LLM trust-scoring, query builder, vision assessment, AI features | [BSL 1.1](LICENSE-BSL) — free for personal non-commercial self-hosting; commercial use requires a paid license; converts to MIT after 4 years |
Humans own design, architecture, code review, testing, and verification. LLMs are part of our development workflow. [Our positions on LLM use →](https://circuitforge.tech/positions) `CFG-SNPE-XXXX-XXXX-XXXX`
Privacy · Safety · Accessibility — co-equal, non-negotiable. ## Tech notes
[circuitforge.tech](https://circuitforge.tech) - Shared `circuitforge-core` scaffold (DB, LLM router, tier system, config)
- Platform adapters: currently eBay only; AuctionZip, Invaluable, HiBid, CT Bids planned (Playwright + API where available)
- Bid execution: Playwright automation with precise timing (NTP-synchronized)
- Soft-close detection: platform-specific rules engine
- Comparable sales: eBay completed listings via Marketplace Insights API + Browse API fallback
- Vision module: condition assessment from listing photos — moondream2 / Claude vision (paid tier stub in `app/trust/photo.py`)
- **Kasada bypass**: headed Chromium via Xvfb; all scraping uses this path — headless and `requests`-based approaches are blocked by eBay

View file

@ -1,9 +1,11 @@
"""Cloud session resolution for Snipe FastAPI. """Cloud session resolution for Snipe FastAPI.
Delegates JWT validation, Heimdall provisioning, tier resolution, and guest In local mode (CLOUD_MODE unset/false): all functions return a local CloudUser
session management to circuitforge_core.CloudSessionFactory. Snipe-specific with no auth checks, full tier access, and both DB paths pointing to SNIPE_DB.
CloudUser (shared_db + user_db paths), SessionFeatures, and DB helpers are
kept here. In cloud mode (CLOUD_MODE=true): validates the cf_session JWT injected by Caddy
as X-CF-Session, resolves user_id, auto-provisions a free Heimdall license on
first visit, fetches the tier, and returns per-user DB paths.
FastAPI usage: FastAPI usage:
@app.get("/api/search") @app.get("/api/search")
@ -14,14 +16,19 @@ FastAPI usage:
""" """
from __future__ import annotations from __future__ import annotations
import hashlib
import hmac
import logging import logging
import os import os
import re
import time
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from circuitforge_core.cloud_session import CloudSessionFactory as _CoreFactory import jwt as pyjwt
from fastapi import Depends, HTTPException, Request, Response import requests
from fastapi import Depends, HTTPException, Request
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -29,12 +36,19 @@ log = logging.getLogger(__name__)
CLOUD_MODE: bool = os.environ.get("CLOUD_MODE", "").lower() in ("1", "true", "yes") CLOUD_MODE: bool = os.environ.get("CLOUD_MODE", "").lower() in ("1", "true", "yes")
CLOUD_DATA_ROOT: Path = Path(os.environ.get("CLOUD_DATA_ROOT", "/devl/snipe-cloud-data")) CLOUD_DATA_ROOT: Path = Path(os.environ.get("CLOUD_DATA_ROOT", "/devl/snipe-cloud-data"))
DIRECTUS_JWT_SECRET: str = os.environ.get("DIRECTUS_JWT_SECRET", "")
CF_SERVER_SECRET: str = os.environ.get("CF_SERVER_SECRET", "")
HEIMDALL_URL: str = os.environ.get("HEIMDALL_URL", "https://license.circuitforge.tech")
HEIMDALL_ADMIN_TOKEN: str = os.environ.get("HEIMDALL_ADMIN_TOKEN", "")
# Local-mode DB paths (ignored in cloud mode)
_LOCAL_SNIPE_DB: Path = Path(os.environ.get("SNIPE_DB", "data/snipe.db")) _LOCAL_SNIPE_DB: Path = Path(os.environ.get("SNIPE_DB", "data/snipe.db"))
TIERS = ["free", "paid", "premium", "ultra"] # Tier cache: user_id → (tier, fetched_at_epoch)
_TIER_CACHE: dict[str, tuple[str, float]] = {}
_TIER_CACHE_TTL = 300 # 5 minutes
_core = _CoreFactory(product="snipe") TIERS = ["free", "paid", "premium", "ultra"]
# ── Domain ──────────────────────────────────────────────────────────────────── # ── Domain ────────────────────────────────────────────────────────────────────
@ -57,13 +71,13 @@ class SessionFeatures:
photo_analysis: bool photo_analysis: bool
shared_scammer_db: bool shared_scammer_db: bool
shared_image_db: bool shared_image_db: bool
llm_query_builder: bool
def compute_features(tier: str) -> SessionFeatures: def compute_features(tier: str) -> SessionFeatures:
"""Compute feature flags from tier. Evaluated server-side; sent to frontend.""" """Compute feature flags from tier. Evaluated server-side; sent to frontend."""
local = tier == "local" local = tier == "local"
paid_plus = local or tier in ("paid", "premium", "ultra") paid_plus = local or tier in ("paid", "premium", "ultra")
premium_plus = local or tier in ("premium", "ultra")
return SessionFeatures( return SessionFeatures(
saved_searches=True, # all tiers get saved searches saved_searches=True, # all tiers get saved searches
@ -74,10 +88,82 @@ def compute_features(tier: str) -> SessionFeatures:
photo_analysis=paid_plus, photo_analysis=paid_plus,
shared_scammer_db=paid_plus, shared_scammer_db=paid_plus,
shared_image_db=paid_plus, shared_image_db=paid_plus,
llm_query_builder=paid_plus,
) )
# ── JWT validation ────────────────────────────────────────────────────────────
def _extract_session_token(header_value: str) -> str:
"""Extract cf_session value from a Cookie or X-CF-Session header string."""
# X-CF-Session may be the raw JWT or the full cookie string
m = re.search(r'(?:^|;)\s*cf_session=([^;]+)', header_value)
return m.group(1).strip() if m else header_value.strip()
def validate_session_jwt(token: str) -> str:
"""Validate a cf_session JWT and return the Directus user_id.
Uses HMAC-SHA256 verification against DIRECTUS_JWT_SECRET (same secret
cf-directus uses to sign session tokens). Returns user_id on success,
raises HTTPException(401) on failure.
Directus 11+ uses 'id' (not 'sub') for the user UUID in its JWT payload.
"""
try:
payload = pyjwt.decode(
token,
DIRECTUS_JWT_SECRET,
algorithms=["HS256"],
options={"require": ["id", "exp"]},
)
return payload["id"]
except Exception as exc:
log.debug("JWT validation failed: %s", exc)
raise HTTPException(status_code=401, detail="Session invalid or expired")
# ── Heimdall integration ──────────────────────────────────────────────────────
def _ensure_provisioned(user_id: str) -> None:
"""Idempotent: create a free Heimdall license for this user if none exists."""
if not HEIMDALL_ADMIN_TOKEN:
return
try:
requests.post(
f"{HEIMDALL_URL}/admin/provision",
json={"directus_user_id": user_id, "product": "snipe", "tier": "free"},
headers={"Authorization": f"Bearer {HEIMDALL_ADMIN_TOKEN}"},
timeout=5,
)
except Exception as exc:
log.warning("Heimdall provision failed for user %s: %s", user_id, exc)
def _fetch_cloud_tier(user_id: str) -> str:
"""Resolve tier from Heimdall with a 5-minute in-process cache."""
now = time.monotonic()
cached = _TIER_CACHE.get(user_id)
if cached and (now - cached[1]) < _TIER_CACHE_TTL:
return cached[0]
if not HEIMDALL_ADMIN_TOKEN:
return "free"
try:
resp = requests.post(
f"{HEIMDALL_URL}/admin/cloud/resolve",
json={"directus_user_id": user_id, "product": "snipe"},
headers={"Authorization": f"Bearer {HEIMDALL_ADMIN_TOKEN}"},
timeout=5,
)
tier = resp.json().get("tier", "free") if resp.ok else "free"
except Exception as exc:
log.warning("Heimdall tier resolve failed for user %s: %s", user_id, exc)
tier = "free"
_TIER_CACHE[user_id] = (tier, now)
return tier
# ── DB path helpers ─────────────────────────────────────────────────────────── # ── DB path helpers ───────────────────────────────────────────────────────────
def _shared_db_path() -> Path: def _shared_db_path() -> Path:
@ -92,39 +178,44 @@ def _user_db_path(user_id: str) -> Path:
return path return path
def _anon_db_path() -> Path:
"""Shared pool DB for unauthenticated visitors.
All anonymous searches write listing data here. Seller and market comp
data accumulates in shared_db as normal, growing the anti-scammer corpus
with every public search regardless of auth state.
"""
path = CLOUD_DATA_ROOT / "anonymous" / "snipe" / "user.db"
path.parent.mkdir(parents=True, exist_ok=True)
return path
# ── FastAPI dependency ──────────────────────────────────────────────────────── # ── FastAPI dependency ────────────────────────────────────────────────────────
def get_session(request: Request, response: Response) -> CloudUser: def get_session(request: Request) -> CloudUser:
"""FastAPI dependency — resolves the current user from the request. """FastAPI dependency — resolves the current user from the request.
Delegates auth/tier resolution to cf-core CloudSessionFactory, then maps Local mode: returns a fully-privileged "local" user pointing at SNIPE_DB.
the result to Snipe's CloudUser with shared_db + user_db paths.
Local mode: fully-privileged "local" user pointing at SNIPE_DB.
Cloud mode: validates X-CF-Session JWT, provisions Heimdall license, Cloud mode: validates X-CF-Session JWT, provisions Heimdall license,
resolves tier, returns per-user DB paths. resolves tier, returns per-user DB paths.
Anonymous: guest session with free-tier access to shared scammer corpus.
""" """
core_user = _core.resolve(request, response) if not CLOUD_MODE:
uid, tier = core_user.user_id, core_user.tier return CloudUser(
user_id="local",
tier="local",
shared_db=_LOCAL_SNIPE_DB,
user_db=_LOCAL_SNIPE_DB,
)
if not CLOUD_MODE or uid in ("local", "local-dev"): raw_header = (
return CloudUser(user_id=uid, tier=tier, shared_db=_LOCAL_SNIPE_DB, user_db=_LOCAL_SNIPE_DB) request.headers.get("x-cf-session", "")
if uid.startswith("anon-"): or request.headers.get("cookie", "")
return CloudUser(user_id=uid, tier=tier, shared_db=_shared_db_path(), user_db=_anon_db_path()) )
return CloudUser(user_id=uid, tier=tier, shared_db=_shared_db_path(), user_db=_user_db_path(uid)) if not raw_header:
raise HTTPException(status_code=401, detail="Not authenticated")
token = _extract_session_token(raw_header)
if not token:
raise HTTPException(status_code=401, detail="Not authenticated")
user_id = validate_session_jwt(token)
_ensure_provisioned(user_id)
tier = _fetch_cloud_tier(user_id)
return CloudUser(
user_id=user_id,
tier=tier,
shared_db=_shared_db_path(),
user_db=_user_db_path(user_id),
)
def require_tier(min_tier: str): def require_tier(min_tier: str):

View file

@ -26,14 +26,13 @@ from pathlib import Path
from typing import Optional from typing import Optional
import requests import requests
from fastapi import APIRouter, Header, HTTPException, Request
from cryptography.exceptions import InvalidSignature from cryptography.exceptions import InvalidSignature
from cryptography.hazmat.primitives.asymmetric.ec import ECDSA from cryptography.hazmat.primitives.asymmetric.ec import ECDSA
from cryptography.hazmat.primitives.hashes import SHA1 from cryptography.hazmat.primitives.hashes import SHA1
from cryptography.hazmat.primitives.serialization import load_pem_public_key from cryptography.hazmat.primitives.serialization import load_pem_public_key
from fastapi import APIRouter, Header, HTTPException, Request
from app.db.store import Store from app.db.store import Store
from app.platforms.ebay.auth import EbayTokenManager
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -41,24 +40,6 @@ router = APIRouter()
_DB_PATH = Path(os.environ.get("SNIPE_DB", "data/snipe.db")) _DB_PATH = Path(os.environ.get("SNIPE_DB", "data/snipe.db"))
# ── App-level token manager ───────────────────────────────────────────────────
# Lazily initialized from env vars; shared across all webhook requests.
# The Notification public_key endpoint requires a Bearer app token.
_app_token_manager: EbayTokenManager | None = None
def _get_app_token() -> str | None:
"""Return a valid eBay app-level Bearer token, or None if creds are absent."""
global _app_token_manager
client_id = (os.environ.get("EBAY_APP_ID") or os.environ.get("EBAY_CLIENT_ID", "")).strip()
client_secret = (os.environ.get("EBAY_CERT_ID") or os.environ.get("EBAY_CLIENT_SECRET", "")).strip()
if not client_id or not client_secret:
return None
if _app_token_manager is None:
_app_token_manager = EbayTokenManager(client_id, client_secret)
return _app_token_manager.get_token()
# ── Public-key cache ────────────────────────────────────────────────────────── # ── Public-key cache ──────────────────────────────────────────────────────────
# eBay key rotation is rare; 1-hour TTL is appropriate. # eBay key rotation is rare; 1-hour TTL is appropriate.
_KEY_CACHE_TTL = 3600 _KEY_CACHE_TTL = 3600
@ -77,14 +58,7 @@ def _fetch_public_key(kid: str) -> bytes:
return cached[0] return cached[0]
key_url = _EBAY_KEY_URL.format(kid=kid) key_url = _EBAY_KEY_URL.format(kid=kid)
headers: dict[str, str] = {} resp = requests.get(key_url, timeout=10)
app_token = _get_app_token()
if app_token:
headers["Authorization"] = f"Bearer {app_token}"
else:
log.warning("public_key fetch: no app credentials — request will likely fail")
resp = requests.get(key_url, headers=headers, timeout=10)
if not resp.ok: if not resp.ok:
log.error("public key fetch failed: %s %s — body: %s", resp.status_code, key_url, resp.text[:500]) log.error("public key fetch failed: %s %s — body: %s", resp.status_code, key_url, resp.text[:500])
resp.raise_for_status() resp.raise_for_status()
@ -94,42 +68,6 @@ def _fetch_public_key(kid: str) -> bytes:
return pem_bytes return pem_bytes
# ── GET — webhook health check ───────────────────────────────────────────────
@router.get("/api/ebay/webhook-health")
def ebay_webhook_health() -> dict:
"""Lightweight health check for eBay webhook compliance monitoring.
Returns 200 + status dict when the webhook is fully configured.
Returns 500 when required env vars are missing.
Intended for Uptime Kuma or similar uptime monitors.
"""
token = os.environ.get("EBAY_NOTIFICATION_TOKEN", "")
endpoint = os.environ.get("EBAY_NOTIFICATION_ENDPOINT", "")
client_id = (os.environ.get("EBAY_APP_ID") or os.environ.get("EBAY_CLIENT_ID", "")).strip()
client_secret = (os.environ.get("EBAY_CERT_ID") or os.environ.get("EBAY_CLIENT_SECRET", "")).strip()
missing = [
name for name, val in [
("EBAY_NOTIFICATION_TOKEN", token),
("EBAY_NOTIFICATION_ENDPOINT", endpoint),
("EBAY_APP_ID / EBAY_CLIENT_ID", client_id),
("EBAY_CERT_ID / EBAY_CLIENT_SECRET", client_secret),
] if not val
]
if missing:
log.error("ebay_webhook_health: missing config: %s", missing)
raise HTTPException(
status_code=500,
detail=f"Webhook misconfigured — missing: {missing}",
)
return {
"status": "ok",
"endpoint": endpoint,
"signature_verification": os.environ.get("EBAY_WEBHOOK_VERIFY_SIGNATURES", "true"),
}
# ── GET — challenge verification ────────────────────────────────────────────── # ── GET — challenge verification ──────────────────────────────────────────────
@router.get("/api/ebay/account-deletion") @router.get("/api/ebay/account-deletion")

File diff suppressed because it is too large Load diff

View file

@ -1,11 +0,0 @@
-- Community trust signals: user feedback on individual trust scores.
-- "This score looks right" (confirmed=1) / "This score is wrong" (confirmed=0).
-- Stored in shared_db so signals aggregate across all users.
CREATE TABLE IF NOT EXISTS community_signals (
id INTEGER PRIMARY KEY AUTOINCREMENT,
seller_id TEXT NOT NULL,
confirmed INTEGER NOT NULL CHECK (confirmed IN (0, 1)),
recorded_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
);
CREATE INDEX IF NOT EXISTS idx_community_signals_seller ON community_signals(seller_id);

View file

@ -1,9 +0,0 @@
-- Per-user preferences stored as a single JSON blob.
-- Lives in user_db (each user has their own DB file) — never in shared.db.
-- Single-row enforced by PRIMARY KEY CHECK (id = 1): acts as a singleton table.
-- Path reads/writes use cf-core preferences.paths (get_path / set_path).
CREATE TABLE IF NOT EXISTS user_preferences (
id INTEGER PRIMARY KEY CHECK (id = 1),
prefs_json TEXT NOT NULL DEFAULT '{}',
updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
);

View file

@ -1,23 +0,0 @@
-- LLM output corrections for SFT training pipeline (cf-core make_corrections_router).
-- Stores thumbs-up/down feedback and explicit corrections on LLM-generated content.
-- Used once #29 (LLM query builder) ships; table is safe to pre-create now.
CREATE TABLE IF NOT EXISTS corrections (
id INTEGER PRIMARY KEY AUTOINCREMENT,
item_id TEXT NOT NULL DEFAULT '',
product TEXT NOT NULL,
correction_type TEXT NOT NULL,
input_text TEXT NOT NULL,
original_output TEXT NOT NULL,
corrected_output TEXT NOT NULL DEFAULT '',
rating TEXT NOT NULL DEFAULT 'down',
context TEXT NOT NULL DEFAULT '{}',
opted_in INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE INDEX IF NOT EXISTS idx_corrections_product
ON corrections (product);
CREATE INDEX IF NOT EXISTS idx_corrections_opted_in
ON corrections (opted_in);

View file

@ -1,16 +0,0 @@
-- app/db/migrations/011_ebay_categories.sql
-- eBay category leaf node cache. Refreshed weekly via EbayCategoryCache.refresh().
-- Seeded with a small bootstrap table when no eBay API credentials are configured.
-- MIT License
CREATE TABLE IF NOT EXISTS ebay_categories (
id INTEGER PRIMARY KEY AUTOINCREMENT,
category_id TEXT NOT NULL UNIQUE,
name TEXT NOT NULL,
full_path TEXT NOT NULL, -- "Consumer Electronics > ... > Leaf Name"
is_leaf INTEGER NOT NULL DEFAULT 1, -- SQLite stores bool as int
refreshed_at TEXT NOT NULL -- ISO8601 timestamp
);
CREATE INDEX IF NOT EXISTS idx_ebay_cat_name
ON ebay_categories (name);

View file

@ -1,12 +0,0 @@
CREATE TABLE IF NOT EXISTS reported_sellers (
id INTEGER PRIMARY KEY AUTOINCREMENT,
platform TEXT NOT NULL,
platform_seller_id TEXT NOT NULL,
username TEXT,
reported_at TEXT DEFAULT CURRENT_TIMESTAMP,
reported_by TEXT NOT NULL DEFAULT 'user', -- user | bulk_action
UNIQUE(platform, platform_seller_id)
);
CREATE INDEX IF NOT EXISTS idx_reported_sellers_lookup
ON reported_sellers(platform, platform_seller_id);

View file

@ -1,20 +0,0 @@
-- Migration 013: eBay user OAuth tokens
--
-- Stores per-user eBay Authorization Code tokens so the app can call
-- Trading API GetUser for instant account_age_days + category feedback
-- without Playwright scraping.
--
-- Stored in the per-user DB (user.db), never the shared DB.
-- access_token is short-lived (2h); refresh_token is valid 18 months.
-- The API layer refreshes access_token automatically before expiry.
CREATE TABLE IF NOT EXISTS ebay_user_tokens (
id INTEGER PRIMARY KEY,
-- Single row per user DB — upsert on reconnect
access_token TEXT NOT NULL,
refresh_token TEXT NOT NULL,
expires_at REAL NOT NULL, -- epoch seconds; access token expiry
scopes TEXT NOT NULL DEFAULT '',
connected_at TEXT NOT NULL DEFAULT (datetime('now')),
last_refreshed TEXT
);

View file

@ -1,24 +0,0 @@
-- Migration 014: background monitor settings on saved_searches + watch_alerts table
ALTER TABLE saved_searches ADD COLUMN monitor_enabled INTEGER NOT NULL DEFAULT 0;
ALTER TABLE saved_searches ADD COLUMN poll_interval_min INTEGER NOT NULL DEFAULT 60;
ALTER TABLE saved_searches ADD COLUMN min_trust_score INTEGER NOT NULL DEFAULT 60;
ALTER TABLE saved_searches ADD COLUMN last_checked_at TEXT;
CREATE TABLE IF NOT EXISTS watch_alerts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
saved_search_id INTEGER NOT NULL REFERENCES saved_searches(id) ON DELETE CASCADE,
platform_listing_id TEXT NOT NULL,
title TEXT NOT NULL,
price REAL NOT NULL,
currency TEXT NOT NULL DEFAULT 'USD',
trust_score INTEGER NOT NULL,
url TEXT,
first_alerted_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
dismissed_at TEXT,
UNIQUE(saved_search_id, platform_listing_id)
);
CREATE INDEX IF NOT EXISTS idx_watch_alerts_undismissed
ON watch_alerts(saved_search_id)
WHERE dismissed_at IS NULL;

View file

@ -1,20 +0,0 @@
-- Migration 015: cross-user monitor registry for the background polling loop
--
-- In cloud mode this table lives in shared.db — the polling loop queries it
-- to find all due monitors without scanning per-user DB files.
-- In local mode it lives in the single local DB (same result, one user).
--
-- user_db_path references the per-user snipe user.db so the poller knows
-- which DB to open for the full SavedSearch config and to write alerts.
CREATE TABLE IF NOT EXISTS active_monitors (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_db_path TEXT NOT NULL,
saved_search_id INTEGER NOT NULL,
poll_interval_min INTEGER NOT NULL DEFAULT 60,
last_checked_at TEXT,
UNIQUE(user_db_path, saved_search_id)
);
CREATE INDEX IF NOT EXISTS idx_active_monitors_due
ON active_monitors(last_checked_at);

View file

@ -1,6 +1,5 @@
"""Dataclasses for all Snipe domain objects.""" """Dataclasses for all Snipe domain objects."""
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@ -81,26 +80,6 @@ class SavedSearch:
id: Optional[int] = None id: Optional[int] = None
created_at: Optional[str] = None created_at: Optional[str] = None
last_run_at: Optional[str] = None last_run_at: Optional[str] = None
# Monitor settings (migration 014)
monitor_enabled: bool = False
poll_interval_min: int = 60
min_trust_score: int = 60
last_checked_at: Optional[str] = None
@dataclass
class WatchAlert:
"""A new listing surfaced by the background monitor for a saved search."""
saved_search_id: int
platform_listing_id: str
title: str
price: float
trust_score: int
currency: str = "USD"
url: Optional[str] = None
id: Optional[int] = None
first_alerted_at: Optional[str] = None
dismissed_at: Optional[str] = None
@dataclass @dataclass

View file

@ -1,49 +0,0 @@
-- Snipe shared tables: sellers, market_comps, reported_sellers
-- Replaces the equivalent tables in shared.db (SQLite).
-- Per-user tables (listings, trust_scores, saved_searches) remain in SQLite.
CREATE TABLE IF NOT EXISTS sellers (
id BIGSERIAL PRIMARY KEY,
platform TEXT NOT NULL,
platform_seller_id TEXT NOT NULL,
username TEXT NOT NULL,
account_age_days INTEGER,
feedback_count INTEGER NOT NULL DEFAULT 0,
feedback_ratio DOUBLE PRECISION NOT NULL DEFAULT 0,
category_history_json TEXT NOT NULL DEFAULT '{}',
fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (platform, platform_seller_id)
);
CREATE TABLE IF NOT EXISTS market_comps (
id BIGSERIAL PRIMARY KEY,
platform TEXT NOT NULL,
query_hash TEXT NOT NULL,
median_price DOUBLE PRECISION NOT NULL,
sample_count INTEGER NOT NULL,
fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
expires_at TIMESTAMPTZ NOT NULL,
UNIQUE (platform, query_hash)
);
CREATE TABLE IF NOT EXISTS reported_sellers (
id BIGSERIAL PRIMARY KEY,
platform TEXT NOT NULL,
platform_seller_id TEXT NOT NULL,
username TEXT,
reported_by TEXT NOT NULL DEFAULT 'user',
reported_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (platform, platform_seller_id)
);
CREATE TABLE IF NOT EXISTS scammer_blocklist (
id BIGSERIAL PRIMARY KEY,
platform TEXT NOT NULL,
platform_seller_id TEXT NOT NULL,
username TEXT NOT NULL,
reason TEXT,
source TEXT NOT NULL DEFAULT 'manual',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (platform, platform_seller_id)
);

View file

@ -1,380 +0,0 @@
from __future__ import annotations
import logging
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
import psycopg2
from psycopg2.pool import ThreadedConnectionPool
from app.db.models import MarketComp, ScammerEntry, Seller
log = logging.getLogger(__name__)
_MIN_CONN = 2
_MAX_CONN = 20
class SnipeSharedDB:
"""Thread-safe Postgres connection pool for Snipe shared tables."""
def __init__(self, dsn: str) -> None:
self._pool = ThreadedConnectionPool(_MIN_CONN, _MAX_CONN, dsn=dsn)
def getconn(self):
return self._pool.getconn()
def putconn(self, conn) -> None:
self._pool.putconn(conn)
def close(self) -> None:
self._pool.closeall()
def run_migrations(self) -> None:
"""Apply pg_migrations/*.sql in filename order. Idempotent."""
migrations_dir = Path(__file__).parent / "pg_migrations"
files = sorted(migrations_dir.glob("*.sql"), key=lambda p: p.name)
conn = self.getconn()
try:
with conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS _snipe_shared_migrations (
filename TEXT PRIMARY KEY,
applied_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
)
""")
conn.commit()
for f in files:
cur.execute(
"SELECT 1 FROM _snipe_shared_migrations WHERE filename = %s",
(f.name,),
)
if cur.fetchone():
continue
log.info("Applying migration: %s", f.name)
cur.execute(f.read_text())
cur.execute(
"INSERT INTO _snipe_shared_migrations (filename) VALUES (%s)",
(f.name,),
)
conn.commit()
except Exception:
conn.rollback()
raise
finally:
self.putconn(conn)
class SnipeSharedStore:
"""Postgres-backed store for sellers, market_comps, and reported_sellers.
Satisfies SharedTableProtocol. clone() returns self ThreadedConnectionPool
is already thread-safe, so no new instance is needed per thread.
"""
def __init__(self, db: SnipeSharedDB) -> None:
self._db = db
def clone(self) -> "SnipeSharedStore":
return self
# Sellers
def save_seller(self, seller: Seller) -> None:
self.save_sellers([seller])
def save_sellers(self, sellers: list[Seller]) -> None:
if not sellers:
return
conn = self._db.getconn()
try:
with conn.cursor() as cur:
cur.executemany(
"""
INSERT INTO sellers
(platform, platform_seller_id, username, account_age_days,
feedback_count, feedback_ratio, category_history_json)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (platform, platform_seller_id) DO UPDATE SET
username = EXCLUDED.username,
feedback_count = EXCLUDED.feedback_count,
feedback_ratio = EXCLUDED.feedback_ratio,
account_age_days = COALESCE(
EXCLUDED.account_age_days,
sellers.account_age_days
),
category_history_json = COALESCE(
NULLIF(NULLIF(EXCLUDED.category_history_json, '{}'), ''),
NULLIF(NULLIF(sellers.category_history_json, '{}'), ''),
'{}'
),
fetched_at = NOW()
""",
[
(s.platform, s.platform_seller_id, s.username, s.account_age_days,
s.feedback_count, s.feedback_ratio, s.category_history_json or "{}")
for s in sellers
],
)
conn.commit()
except Exception:
conn.rollback()
raise
finally:
self._db.putconn(conn)
def get_seller(self, platform: str, platform_seller_id: str) -> Optional[Seller]:
conn = self._db.getconn()
try:
with conn.cursor() as cur:
cur.execute(
"""
SELECT platform, platform_seller_id, username, account_age_days,
feedback_count, feedback_ratio, category_history_json,
id, fetched_at
FROM sellers
WHERE platform = %s AND platform_seller_id = %s
""",
(platform, platform_seller_id),
)
row = cur.fetchone()
if not row:
return None
return Seller(*row[:7], id=row[7], fetched_at=str(row[8]))
finally:
self._db.putconn(conn)
def delete_seller_data(self, platform: str, platform_seller_id: str) -> None:
conn = self._db.getconn()
try:
with conn.cursor() as cur:
cur.execute(
"DELETE FROM sellers WHERE platform = %s AND platform_seller_id = %s",
(platform, platform_seller_id),
)
conn.commit()
except Exception:
conn.rollback()
raise
finally:
self._db.putconn(conn)
# MarketComps
def save_market_comp(self, comp: MarketComp) -> None:
conn = self._db.getconn()
try:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO market_comps
(platform, query_hash, median_price, sample_count, expires_at)
VALUES (%s, %s, %s, %s, %s::TIMESTAMPTZ)
ON CONFLICT (platform, query_hash) DO UPDATE SET
median_price = EXCLUDED.median_price,
sample_count = EXCLUDED.sample_count,
expires_at = EXCLUDED.expires_at,
fetched_at = NOW()
""",
(comp.platform, comp.query_hash, comp.median_price,
comp.sample_count, comp.expires_at),
)
conn.commit()
except Exception:
conn.rollback()
raise
finally:
self._db.putconn(conn)
def get_market_comp(self, platform: str, query_hash: str) -> Optional[MarketComp]:
now = datetime.now(timezone.utc).isoformat()
conn = self._db.getconn()
try:
with conn.cursor() as cur:
cur.execute(
"""
SELECT platform, query_hash, median_price, sample_count,
expires_at, id, fetched_at
FROM market_comps
WHERE platform = %s AND query_hash = %s AND expires_at > %s::TIMESTAMPTZ
""",
(platform, query_hash, now),
)
row = cur.fetchone()
if not row:
return None
return MarketComp(*row[:5], id=row[5], fetched_at=str(row[6]))
finally:
self._db.putconn(conn)
# Reported Sellers
def mark_reported(
self,
platform: str,
platform_seller_id: str,
username: Optional[str] = None,
reported_by: str = "user",
) -> None:
conn = self._db.getconn()
try:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO reported_sellers
(platform, platform_seller_id, username, reported_by)
VALUES (%s, %s, %s, %s)
ON CONFLICT (platform, platform_seller_id) DO NOTHING
""",
(platform, platform_seller_id, username, reported_by),
)
conn.commit()
except Exception:
conn.rollback()
raise
finally:
self._db.putconn(conn)
def list_reported(self, platform: str = "ebay") -> list[str]:
conn = self._db.getconn()
try:
with conn.cursor() as cur:
cur.execute(
"SELECT platform_seller_id FROM reported_sellers WHERE platform = %s",
(platform,),
)
return [row[0] for row in cur.fetchall()]
finally:
self._db.putconn(conn)
# Seller Category Refresh
def refresh_seller_categories(
self,
platform: str,
seller_ids: list[str],
listing_store=None, # always a SQLite Store in practice
) -> int:
"""Derive category_history_json from listing data and update sellers in Postgres.
listing_store must be provided (it's always the per-user SQLite Store).
Returns count of sellers updated.
"""
from app.platforms.ebay.scraper import _classify_category_label # lazy to avoid circular
import json
if not seller_ids or listing_store is None:
return 0
updated = 0
for sid in seller_ids:
seller = self.get_seller(platform, sid)
if not seller or seller.category_history_json not in ("{}", "", None):
continue
# listing_store is always a SQLite Store; access _conn directly for the query.
rows = listing_store._conn.execute(
"SELECT category_name, COUNT(*) FROM listings "
"WHERE platform=? AND seller_platform_id=? AND category_name IS NOT NULL "
"GROUP BY category_name",
(platform, sid),
).fetchall()
if not rows:
continue
counts: dict[str, int] = {}
for cat_name, cnt in rows:
key = _classify_category_label(cat_name)
if key:
counts[key] = counts.get(key, 0) + cnt
if counts:
from dataclasses import replace
self.save_sellers([replace(seller, category_history_json=json.dumps(counts))])
updated += 1
return updated
# Scammer Blocklist
def is_blocklisted(self, platform: str, platform_seller_id: str) -> bool:
conn = self._db.getconn()
try:
with conn.cursor() as cur:
cur.execute(
"SELECT 1 FROM scammer_blocklist "
"WHERE platform = %s AND platform_seller_id = %s LIMIT 1",
(platform, platform_seller_id),
)
return cur.fetchone() is not None
finally:
self._db.putconn(conn)
def add_to_blocklist(self, entry: ScammerEntry) -> ScammerEntry:
conn = self._db.getconn()
try:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO scammer_blocklist
(platform, platform_seller_id, username, reason, source)
VALUES (%s, %s, %s, %s, %s)
ON CONFLICT (platform, platform_seller_id) DO UPDATE SET
username = EXCLUDED.username,
reason = COALESCE(EXCLUDED.reason, scammer_blocklist.reason),
source = EXCLUDED.source
""",
(entry.platform, entry.platform_seller_id, entry.username,
entry.reason, entry.source),
)
conn.commit()
cur.execute(
"SELECT id, created_at FROM scammer_blocklist "
"WHERE platform = %s AND platform_seller_id = %s",
(entry.platform, entry.platform_seller_id),
)
row = cur.fetchone()
from dataclasses import replace
return replace(entry, id=row[0], created_at=str(row[1]))
except Exception:
conn.rollback()
raise
finally:
self._db.putconn(conn)
def remove_from_blocklist(self, platform: str, platform_seller_id: str) -> None:
conn = self._db.getconn()
try:
with conn.cursor() as cur:
cur.execute(
"DELETE FROM scammer_blocklist "
"WHERE platform = %s AND platform_seller_id = %s",
(platform, platform_seller_id),
)
conn.commit()
except Exception:
conn.rollback()
raise
finally:
self._db.putconn(conn)
def list_blocklist(self, platform: str = "ebay") -> list[ScammerEntry]:
conn = self._db.getconn()
try:
with conn.cursor() as cur:
cur.execute(
"""
SELECT platform, platform_seller_id, username, reason, source, id, created_at
FROM scammer_blocklist
WHERE platform = %s
ORDER BY created_at DESC
""",
(platform,),
)
return [
ScammerEntry(
platform=r[0], platform_seller_id=r[1], username=r[2],
reason=r[3], source=r[4], id=r[5], created_at=str(r[6]),
)
for r in cur.fetchall()
]
finally:
self._db.putconn(conn)

View file

@ -1,86 +0,0 @@
"""Protocol (duck-type interface) for shared table backends (SQLite and Postgres)."""
from __future__ import annotations
from typing import Any, Optional, Protocol, runtime_checkable
from app.db.models import MarketComp, ScammerEntry, Seller
@runtime_checkable
class SharedTableProtocol(Protocol):
"""Protocol that both Store (SQLite) and SnipeSharedStore (Postgres) must satisfy.
This enables code that reads/writes shared tables (sellers, market_comps,
reported_sellers, scammer_blocklist) to remain agnostic to the underlying backend.
"""
def save_seller(self, seller: Seller) -> None:
"""Persist a single seller record."""
...
def save_sellers(self, sellers: list[Seller]) -> None:
"""Persist multiple seller records (batch upsert)."""
...
def get_seller(self, platform: str, platform_seller_id: str) -> Optional[Seller]:
"""Fetch a single seller by platform and platform_seller_id."""
...
def save_market_comp(self, comp: MarketComp) -> None:
"""Persist a market comparison record."""
...
def get_market_comp(self, platform: str, query_hash: str) -> Optional[MarketComp]:
"""Fetch a market comparison by platform and query_hash."""
...
def mark_reported(
self,
platform: str,
platform_seller_id: str,
username: Optional[str] = None,
reported_by: str = "user",
) -> None:
"""Record that a seller has been reported to the platform."""
...
def list_reported(self, platform: str = "ebay") -> list[str]:
"""Return all platform_seller_ids that have been reported."""
...
def delete_seller_data(self, platform: str, platform_seller_id: str) -> None:
"""Permanently erase a seller and all related data (GDPR/eBay compliance)."""
...
def refresh_seller_categories(
self,
platform: str,
seller_ids: list[str],
listing_store: Optional[Any] = None,
) -> int:
"""Derive category_history_json for sellers that lack it from stored listings.
listing_store: Store holding listings (may differ from self in split-DB mode).
Returns count of sellers updated.
"""
...
def is_blocklisted(self, platform: str, platform_seller_id: str) -> bool:
"""Return True if a seller is on the community scammer blocklist."""
...
def add_to_blocklist(self, entry: ScammerEntry) -> ScammerEntry:
"""Upsert a seller into the blocklist. Returns the saved entry with id and created_at."""
...
def remove_from_blocklist(self, platform: str, platform_seller_id: str) -> None:
"""Remove a seller from the blocklist."""
...
def list_blocklist(self, platform: str = "ebay") -> list[ScammerEntry]:
"""Return all blocklisted sellers for a platform, newest first."""
...
def clone(self) -> SharedTableProtocol:
"""Create a new independent instance pointing to the same backend."""
...

View file

@ -1,6 +1,5 @@
"""Thin SQLite read/write layer for all Snipe models.""" """Thin SQLite read/write layer for all Snipe models."""
from __future__ import annotations from __future__ import annotations
import json import json
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
@ -8,7 +7,7 @@ from typing import Optional
from circuitforge_core.db import get_connection, run_migrations from circuitforge_core.db import get_connection, run_migrations
from .models import Listing, MarketComp, SavedSearch, ScammerEntry, Seller, TrustScore, WatchAlert from .models import Listing, Seller, TrustScore, MarketComp, SavedSearch, ScammerEntry
MIGRATIONS_DIR = Path(__file__).parent / "migrations" MIGRATIONS_DIR = Path(__file__).parent / "migrations"
@ -21,10 +20,6 @@ class Store:
# WAL mode: allows concurrent readers + one writer without blocking # WAL mode: allows concurrent readers + one writer without blocking
self._conn.execute("PRAGMA journal_mode=WAL") self._conn.execute("PRAGMA journal_mode=WAL")
def clone(self) -> Store:
"""Create a new independent instance pointing to the same database."""
return Store(self._db_path)
# --- Seller --- # --- Seller ---
def delete_seller_data(self, platform: str, platform_seller_id: str) -> None: def delete_seller_data(self, platform: str, platform_seller_id: str) -> None:
@ -314,66 +309,15 @@ class Store:
def list_saved_searches(self) -> list[SavedSearch]: def list_saved_searches(self) -> list[SavedSearch]:
rows = self._conn.execute( rows = self._conn.execute(
"SELECT name, query, platform, filters_json, id, created_at, last_run_at, " "SELECT name, query, platform, filters_json, id, created_at, last_run_at "
"monitor_enabled, poll_interval_min, min_trust_score, last_checked_at "
"FROM saved_searches ORDER BY created_at DESC" "FROM saved_searches ORDER BY created_at DESC"
).fetchall() ).fetchall()
return [ return [
SavedSearch( SavedSearch(name=r[0], query=r[1], platform=r[2], filters_json=r[3],
name=r[0], query=r[1], platform=r[2], filters_json=r[3], id=r[4], created_at=r[5], last_run_at=r[6])
id=r[4], created_at=r[5], last_run_at=r[6],
monitor_enabled=bool(r[7]), poll_interval_min=r[8],
min_trust_score=r[9], last_checked_at=r[10],
)
for r in rows for r in rows
] ]
def update_monitor_settings(
self,
saved_id: int,
*,
monitor_enabled: bool,
poll_interval_min: int,
min_trust_score: int,
) -> None:
self._conn.execute(
"UPDATE saved_searches "
"SET monitor_enabled=?, poll_interval_min=?, min_trust_score=? "
"WHERE id=?",
(int(monitor_enabled), poll_interval_min, min_trust_score, saved_id),
)
self._conn.commit()
def list_monitored_searches(self) -> list[SavedSearch]:
"""Return all saved searches with monitoring enabled (used by background poller)."""
rows = self._conn.execute(
"SELECT name, query, platform, filters_json, id, created_at, last_run_at, "
"monitor_enabled, poll_interval_min, min_trust_score, last_checked_at "
"FROM saved_searches WHERE monitor_enabled=1"
).fetchall()
return [
SavedSearch(
name=r[0], query=r[1], platform=r[2], filters_json=r[3],
id=r[4], created_at=r[5], last_run_at=r[6],
monitor_enabled=True, poll_interval_min=r[8],
min_trust_score=r[9], last_checked_at=r[10],
)
for r in rows
]
def mark_search_checked(self, saved_id: int) -> None:
self._conn.execute(
"UPDATE saved_searches SET last_checked_at=? WHERE id=?",
(datetime.now(timezone.utc).isoformat(), saved_id),
)
self._conn.commit()
def count_active_monitors(self) -> int:
row = self._conn.execute(
"SELECT COUNT(*) FROM saved_searches WHERE monitor_enabled=1"
).fetchone()
return row[0] if row else 0
def delete_saved_search(self, saved_id: int) -> None: def delete_saved_search(self, saved_id: int) -> None:
self._conn.execute("DELETE FROM saved_searches WHERE id=?", (saved_id,)) self._conn.execute("DELETE FROM saved_searches WHERE id=?", (saved_id,))
self._conn.commit() self._conn.commit()
@ -385,112 +329,6 @@ class Store:
) )
self._conn.commit() self._conn.commit()
# --- WatchAlerts ---
def upsert_alert(self, alert: WatchAlert) -> tuple[int, bool]:
"""Insert alert if not already present. Returns (id, is_new)."""
existing = self._conn.execute(
"SELECT id FROM watch_alerts WHERE saved_search_id=? AND platform_listing_id=?",
(alert.saved_search_id, alert.platform_listing_id),
).fetchone()
if existing:
return existing[0], False
cur = self._conn.execute(
"INSERT INTO watch_alerts "
"(saved_search_id, platform_listing_id, title, price, currency, trust_score, url) "
"VALUES (?,?,?,?,?,?,?)",
(alert.saved_search_id, alert.platform_listing_id, alert.title,
alert.price, alert.currency, alert.trust_score, alert.url),
)
self._conn.commit()
return cur.lastrowid, True
def list_alerts(self, *, include_dismissed: bool = False) -> list[WatchAlert]:
where = "" if include_dismissed else "WHERE dismissed_at IS NULL"
rows = self._conn.execute(
f"SELECT id, saved_search_id, platform_listing_id, title, price, currency, "
f"trust_score, url, first_alerted_at, dismissed_at "
f"FROM watch_alerts {where} ORDER BY first_alerted_at DESC"
).fetchall()
return [
WatchAlert(
id=r[0], saved_search_id=r[1], platform_listing_id=r[2],
title=r[3], price=r[4], currency=r[5], trust_score=r[6],
url=r[7], first_alerted_at=r[8], dismissed_at=r[9],
)
for r in rows
]
def count_undismissed_alerts(self) -> int:
row = self._conn.execute(
"SELECT COUNT(*) FROM watch_alerts WHERE dismissed_at IS NULL"
).fetchone()
return row[0] if row else 0
def dismiss_alert(self, alert_id: int) -> None:
self._conn.execute(
"UPDATE watch_alerts SET dismissed_at=? WHERE id=?",
(datetime.now(timezone.utc).isoformat(), alert_id),
)
self._conn.commit()
def dismiss_all_alerts(self) -> int:
"""Dismiss all undismissed alerts. Returns count dismissed."""
cur = self._conn.execute(
"UPDATE watch_alerts SET dismissed_at=? WHERE dismissed_at IS NULL",
(datetime.now(timezone.utc).isoformat(),),
)
self._conn.commit()
return cur.rowcount
# --- ActiveMonitors (sched_db / shared_db) ---
def upsert_active_monitor(
self,
user_db_path: str,
saved_search_id: int,
poll_interval_min: int,
) -> None:
"""Register or update a monitor in the cross-user registry (sched_db)."""
self._conn.execute(
"INSERT INTO active_monitors (user_db_path, saved_search_id, poll_interval_min) "
"VALUES (?,?,?) "
"ON CONFLICT(user_db_path, saved_search_id) DO UPDATE SET "
" poll_interval_min=excluded.poll_interval_min",
(user_db_path, saved_search_id, poll_interval_min),
)
self._conn.commit()
def remove_active_monitor(self, user_db_path: str, saved_search_id: int) -> None:
self._conn.execute(
"DELETE FROM active_monitors WHERE user_db_path=? AND saved_search_id=?",
(user_db_path, saved_search_id),
)
self._conn.commit()
def list_due_active_monitors(self) -> list[tuple[str, int, int]]:
"""Return (user_db_path, saved_search_id, poll_interval_min) for monitors that are due.
Due = never checked OR last_checked_at is old enough given poll_interval_min.
Uses SQLite's strftime('%s') for epoch arithmetic without Python datetime overhead.
"""
rows = self._conn.execute(
"SELECT user_db_path, saved_search_id, poll_interval_min "
"FROM active_monitors "
"WHERE last_checked_at IS NULL "
" OR (strftime('%s','now') - strftime('%s', last_checked_at)) "
" >= poll_interval_min * 60"
).fetchall()
return [(r[0], r[1], r[2]) for r in rows]
def mark_active_monitor_checked(self, user_db_path: str, saved_search_id: int) -> None:
self._conn.execute(
"UPDATE active_monitors SET last_checked_at=? "
"WHERE user_db_path=? AND saved_search_id=?",
(datetime.now(timezone.utc).isoformat(), user_db_path, saved_search_id),
)
self._conn.commit()
# --- ScammerBlocklist --- # --- ScammerBlocklist ---
def add_to_blocklist(self, entry: ScammerEntry) -> ScammerEntry: def add_to_blocklist(self, entry: ScammerEntry) -> ScammerEntry:
@ -543,88 +381,6 @@ class Store:
for r in rows for r in rows
] ]
# --- Reported Sellers ---
def mark_reported(
self,
platform: str,
platform_seller_id: str,
username: Optional[str] = None,
reported_by: str = "user",
) -> None:
"""Record that the user has filed an eBay T&S report for this seller.
Uses IGNORE on conflict so the first-report timestamp is preserved.
"""
self._conn.execute(
"INSERT OR IGNORE INTO reported_sellers "
"(platform, platform_seller_id, username, reported_by) "
"VALUES (?,?,?,?)",
(platform, platform_seller_id, username, reported_by),
)
self._conn.commit()
def list_reported(self, platform: str = "ebay") -> list[str]:
"""Return all platform_seller_ids that have been reported."""
rows = self._conn.execute(
"SELECT platform_seller_id FROM reported_sellers WHERE platform=?",
(platform,),
).fetchall()
return [r[0] for r in rows]
def save_community_signal(self, seller_id: str, confirmed: bool) -> None:
"""Record a user's trust-score feedback signal into the shared DB."""
self._conn.execute(
"INSERT INTO community_signals (seller_id, confirmed) VALUES (?, ?)",
(seller_id, 1 if confirmed else 0),
)
self._conn.commit()
# --- User Preferences ---
def get_user_preference(self, path: str, default=None):
"""Read a preference value at dot-separated path (e.g. 'affiliate.opt_out').
Reads from the singleton user_preferences row; returns *default* if the
table is empty or the path is not set.
"""
from circuitforge_core.preferences.paths import get_path
row = self._conn.execute(
"SELECT prefs_json FROM user_preferences WHERE id=1"
).fetchone()
if not row:
return default
return get_path(json.loads(row[0]), path, default=default)
def set_user_preference(self, path: str, value) -> None:
"""Write *value* at dot-separated path (immutable JSON update).
Creates the singleton row on first write; merges subsequent updates
so sibling paths are preserved.
"""
from circuitforge_core.preferences.paths import set_path
row = self._conn.execute(
"SELECT prefs_json FROM user_preferences WHERE id=1"
).fetchone()
prefs = json.loads(row[0]) if row else {}
updated = set_path(prefs, path, value)
self._conn.execute(
"INSERT INTO user_preferences (id, prefs_json, updated_at) "
"VALUES (1, ?, strftime('%Y-%m-%dT%H:%M:%SZ', 'now')) "
"ON CONFLICT(id) DO UPDATE SET "
" prefs_json = excluded.prefs_json, "
" updated_at = excluded.updated_at",
(json.dumps(updated),),
)
self._conn.commit()
def get_all_preferences(self) -> dict:
"""Return all preferences as a plain dict (empty dict if not yet set)."""
row = self._conn.execute(
"SELECT prefs_json FROM user_preferences WHERE id=1"
).fetchone()
return json.loads(row[0]) if row else {}
def get_market_comp(self, platform: str, query_hash: str) -> Optional[MarketComp]: def get_market_comp(self, platform: str, query_hash: str) -> Optional[MarketComp]:
row = self._conn.execute( row = self._conn.execute(
"SELECT platform, query_hash, median_price, sample_count, expires_at, id, fetched_at " "SELECT platform, query_hash, median_price, sample_count, expires_at, id, fetched_at "

View file

@ -1,5 +0,0 @@
# app/llm/__init__.py
# BSL 1.1 License
from .query_translator import QueryTranslator, QueryTranslatorError, SearchParamsResponse
__all__ = ["QueryTranslator", "QueryTranslatorError", "SearchParamsResponse"]

View file

@ -1,231 +0,0 @@
# app/llm/query_translator.py
# BSL 1.1 License
"""LLM query builder — translates natural language to eBay SearchFilters.
Supports two backends, selected at construction time:
cforch_url cf-orch task endpoint (cloud/premium). The coordinator resolves
product+task to a model and returns an allocation. The caller
POSTs to the allocated service URL, then DELETEs the allocation.
llm_router circuitforge_core.LLMRouter (local installs: ollama/vllm/api keys).
Exactly one of cforch_url or llm_router must be supplied.
"""
from __future__ import annotations
import json
import logging
from dataclasses import dataclass
from typing import TYPE_CHECKING, Optional
import httpx
if TYPE_CHECKING:
from app.platforms.ebay.categories import EbayCategoryCache
log = logging.getLogger(__name__)
class QueryTranslatorError(Exception):
"""Raised when the LLM output cannot be parsed into SearchParamsResponse."""
def __init__(self, message: str, raw: str = "") -> None:
super().__init__(message)
self.raw = raw
@dataclass(frozen=True)
class SearchParamsResponse:
"""Parsed LLM response — maps 1:1 to the /api/search query parameters."""
base_query: str
must_include_mode: str # "all" | "any" | "groups"
must_include: str # raw filter string
must_exclude: str # comma-separated exclusion terms
max_price: Optional[float]
min_price: Optional[float]
condition: list[str] # subset of ["new", "used", "for_parts"]
category_id: Optional[str] # eBay category ID string, or None
explanation: str # one-sentence plain-language summary
_VALID_MODES = {"all", "any", "groups"}
_VALID_CONDITIONS = {"new", "used", "for_parts"}
def _parse_response(raw: str) -> SearchParamsResponse:
"""Parse the LLM's raw text output into a SearchParamsResponse.
Raises QueryTranslatorError if the JSON is malformed or required fields
are missing.
"""
try:
data = json.loads(raw.strip())
except json.JSONDecodeError as exc:
raise QueryTranslatorError(f"LLM returned unparseable JSON: {exc}", raw=raw) from exc
try:
base_query = str(data["base_query"]).strip()
if not base_query:
raise KeyError("base_query is empty")
must_include_mode = str(data.get("must_include_mode", "all"))
if must_include_mode not in _VALID_MODES:
must_include_mode = "all"
must_include = str(data.get("must_include", ""))
must_exclude = str(data.get("must_exclude", ""))
max_price = float(data["max_price"]) if data.get("max_price") is not None else None
min_price = float(data["min_price"]) if data.get("min_price") is not None else None
raw_conditions = data.get("condition", [])
condition = [c for c in raw_conditions if c in _VALID_CONDITIONS]
category_id = str(data["category_id"]) if data.get("category_id") else None
explanation = str(data.get("explanation", "")).strip()
except (KeyError, TypeError, ValueError) as exc:
raise QueryTranslatorError(
f"LLM response missing or invalid field: {exc}", raw=raw
) from exc
return SearchParamsResponse(
base_query=base_query,
must_include_mode=must_include_mode,
must_include=must_include,
must_exclude=must_exclude,
max_price=max_price,
min_price=min_price,
condition=condition,
category_id=category_id,
explanation=explanation,
)
# ── System prompt template ────────────────────────────────────────────────────
_SYSTEM_PROMPT_TEMPLATE = """\
You are a search assistant for Snipe, an eBay listing intelligence tool.
Your job is to translate a natural-language description of what someone is looking for
into a structured eBay search configuration.
Return ONLY a JSON object with these exact fields no preamble, no markdown, no extra keys:
base_query (string) Primary search term, short e.g. "RTX 3080", "vintage Leica"
must_include_mode (string) One of: "all" (AND), "any" (OR), "groups" (CNF: pipe=OR within group, comma=AND between groups)
must_include (string) Filter string per mode leave blank if nothing to filter
must_exclude (string) Comma-separated terms to exclude e.g. "mining,for parts,broken"
max_price (number|null) Maximum price in USD, or null
min_price (number|null) Minimum price in USD, or null
condition (array) Any of: "new", "used", "for_parts" empty array means any condition
category_id (string|null) eBay category ID from the list below, or null if no match
explanation (string) One plain sentence summarizing what you built
eBay "groups" mode syntax example: to find a GPU that is BOTH (nvidia OR amd) AND (16gb OR 8gb):
must_include_mode: "groups"
must_include: "nvidia|amd, 16gb|8gb"
Phrase "like new", "open box", "refurbished" -> condition: ["used"]
Phrase "broken", "for parts", "not working" -> condition: ["for_parts"]
If unsure about condition, use an empty array.
Available eBay categories (use category_id verbatim if one fits otherwise omit):
{category_hints}
If none match, omit category_id (set to null). Respond with valid JSON only. No commentary outside the JSON object.
"""
# ── QueryTranslator ───────────────────────────────────────────────────────────
class QueryTranslator:
"""Translates natural-language search descriptions into SearchParamsResponse.
Args:
category_cache: An EbayCategoryCache instance (may have empty cache).
cforch_url: cf-orch coordinator base URL (cloud/premium path).
llm_router: A circuitforge_core LLMRouter instance (local path).
Exactly one of cforch_url or llm_router must be provided.
"""
def __init__(
self,
category_cache: "EbayCategoryCache",
*,
cforch_url: str | None = None,
llm_router: object | None = None,
) -> None:
if cforch_url is None and llm_router is None:
raise ValueError("Either cforch_url or llm_router must be provided")
self._cache = category_cache
self._cforch_url = cforch_url
self._llm_router = llm_router
def translate(self, natural_language: str) -> SearchParamsResponse:
"""Translate a natural-language query into a SearchParamsResponse.
Raises QueryTranslatorError if the LLM fails or returns bad JSON.
"""
# Extract up to 10 keywords for category hint lookup
keywords = [w for w in natural_language.split()[:10] if len(w) > 2]
hints = self._cache.get_relevant(keywords, limit=30)
if not hints:
hints = self._cache.get_all_for_prompt(limit=40)
if hints:
category_hints = "\n".join(f"{cid}: {path}" for cid, path in hints)
else:
category_hints = "(no categories cached — omit category_id)"
system_prompt = _SYSTEM_PROMPT_TEMPLATE.format(category_hints=category_hints)
try:
if self._cforch_url:
raw = self._call_orch(system_prompt, natural_language)
else:
raw = self._call_local(system_prompt, natural_language)
except QueryTranslatorError:
raise
except Exception as exc:
raise QueryTranslatorError(
f"LLM backend error: {exc}", raw=""
) from exc
return _parse_response(raw)
def _call_orch(self, system_prompt: str, user_message: str) -> str:
"""Allocate via cf-orch task endpoint, call the model, release the slot."""
alloc_resp = httpx.post(
f"{self._cforch_url}/api/inference/task",
json={"product": "snipe", "task": "query_translation"},
timeout=10.0,
)
alloc_resp.raise_for_status()
alloc = alloc_resp.json()
service_url = alloc["url"]
allocation_id = alloc["allocation_id"]
try:
resp = httpx.post(
f"{service_url}/v1/chat/completions",
json={
"model": "__auto__",
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
],
"max_tokens": 512,
},
timeout=60.0,
)
resp.raise_for_status()
return resp.json()["choices"][0]["message"]["content"]
finally:
try:
httpx.delete(
f"{self._cforch_url}/api/services/cf-text/allocations/{allocation_id}",
timeout=5.0,
)
except Exception:
log.warning("Failed to release cf-orch allocation %s", allocation_id)
def _call_local(self, system_prompt: str, user_message: str) -> str:
"""Call the locally-configured LLMRouter (ollama/vllm/api keys)."""
return self._llm_router.complete( # type: ignore[union-attr]
user_message,
system=system_prompt,
max_tokens=512,
)

View file

@ -1,36 +0,0 @@
# app/llm/router.py
# BSL 1.1 License
"""
Snipe LLMRouter shim tri-level config path priority.
Config lookup order:
1. <repo>/config/llm.yaml per-install local override
2. ~/.config/circuitforge/llm.yaml user-level config (circuitforge-core default)
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, GPU_SERVER_URL)
"""
from pathlib import Path
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
_REPO_CONFIG = Path(__file__).parent.parent.parent / "config" / "llm.yaml"
_USER_CONFIG = Path.home() / ".config" / "circuitforge" / "llm.yaml"
class LLMRouter(_CoreLLMRouter):
"""Snipe-specific LLMRouter with tri-level config resolution.
Explicit ``config_path`` bypasses the lookup (useful in tests).
"""
def __init__(self, config_path: Path | None = None) -> None:
if config_path is not None:
super().__init__(config_path)
return
if _REPO_CONFIG.exists():
super().__init__(_REPO_CONFIG)
elif _USER_CONFIG.exists():
super().__init__(_USER_CONFIG)
else:
# No yaml — let circuitforge-core env-var auto-config handle it.
super().__init__()

View file

View file

@ -1,110 +0,0 @@
"""Condense Snipe API search results into LLM-friendly format.
Raw Snipe responses are verbose full listing dicts, nested seller objects,
redundant fields. This module trims to what an LLM needs for reasoning:
title, price, market delta, trust summary, GPU inference score, url.
Results are sorted by a composite key: trust × gpu_inference_score / price.
This surfaces high-trust, VRAM-rich, underpriced boards at the top.
"""
from __future__ import annotations
import json
from typing import Any
from app.mcp.gpu_scoring import parse_gpu, score_gpu
def format_results(
response: dict[str, Any],
vram_weight: float = 0.6,
arch_weight: float = 0.4,
top_n: int = 20,
) -> dict[str, Any]:
"""Return a condensed, LLM-ready summary of a Snipe search response."""
listings: list[dict] = response.get("listings", [])
trust_map: dict = response.get("trust_scores", {})
seller_map: dict = response.get("sellers", {})
market_price: float | None = response.get("market_price")
condensed = []
for listing in listings:
lid = listing.get("platform_listing_id", "")
title = listing.get("title", "")
price = float(listing.get("price") or 0)
trust = trust_map.get(lid, {})
seller_id = listing.get("seller_platform_id", "")
seller = seller_map.get(seller_id, {})
gpu_info = _gpu_info(title, vram_weight, arch_weight)
trust_score = trust.get("composite_score", 0) or 0
inference_score = gpu_info["inference_score"] if gpu_info else 0.0
condensed.append({
"id": lid,
"title": title,
"price": price,
"vs_market": _vs_market(price, market_price),
"trust_score": trust_score,
"trust_partial": bool(trust.get("score_is_partial")),
"red_flags": _parse_flags(trust.get("red_flags_json", "[]")),
"seller_age_days": seller.get("account_age_days"),
"seller_feedback": seller.get("feedback_count"),
"gpu": gpu_info,
"url": listing.get("url", ""),
# Sort key — not included in output
"_sort_key": _composite_key(trust_score, inference_score, price),
})
condensed.sort(key=lambda r: r["_sort_key"], reverse=True)
for r in condensed:
del r["_sort_key"]
no_gpu = sum(1 for r in condensed if r["gpu"] is None)
return {
"total_found": len(listings),
"showing": min(top_n, len(condensed)),
"market_price": market_price,
"adapter": response.get("adapter_used"),
"no_gpu_detected": no_gpu,
"results": condensed[:top_n],
}
def _gpu_info(title: str, vram_weight: float, arch_weight: float) -> dict | None:
spec = parse_gpu(title)
if not spec:
return None
match = score_gpu(spec, vram_weight, arch_weight)
return {
"model": spec.model,
"vram_gb": spec.vram_gb,
"arch": spec.arch_name,
"vendor": spec.vendor,
"vram_score": match.vram_score,
"arch_score": match.arch_score,
"inference_score": match.inference_score,
}
def _vs_market(price: float, market_price: float | None) -> str | None:
if not market_price or price <= 0:
return None
delta_pct = ((market_price - price) / market_price) * 100
if delta_pct >= 0:
return f"{delta_pct:.0f}% below market (${market_price:.0f} median)"
return f"{abs(delta_pct):.0f}% above market (${market_price:.0f} median)"
def _composite_key(trust_score: float, inference_score: float, price: float) -> float:
"""Higher = better value. Zero price or zero trust scores near zero."""
if price <= 0 or trust_score <= 0:
return 0.0
return (trust_score * (inference_score or 50.0)) / price
def _parse_flags(flags_json: str) -> list[str]:
try:
return json.loads(flags_json) or []
except (ValueError, TypeError):
return []

View file

@ -1,143 +0,0 @@
"""GPU architecture and VRAM scoring for laptop mainboard inference-value ranking.
Parses GPU model names from eBay listing titles and scores them on two axes:
- vram_score: linear 0100, anchored at 24 GB = 100
- arch_score: linear 0100, architecture tier 15 (5 = newest)
inference_score = (vram_score × vram_weight + arch_score × arch_weight)
/ (vram_weight + arch_weight)
Patterns are matched longest-first to prevent "RTX 3070" matching before "RTX 3070 Ti".
"""
from __future__ import annotations
import re
from dataclasses import dataclass
@dataclass(frozen=True)
class GpuSpec:
model: str # canonical name, e.g. "RTX 3070 Ti"
vram_gb: int
arch_tier: int # 15; 5 = newest generation
arch_name: str # human-readable, e.g. "Ampere"
vendor: str # "nvidia" | "amd" | "intel"
@dataclass
class GpuMatch:
spec: GpuSpec
vram_score: float
arch_score: float
inference_score: float
# ── GPU database ──────────────────────────────────────────────────────────────
# Laptop VRAM often differs from desktop; using common laptop variants.
# Listed longest-name-first within each family to guide sort order.
_GPU_DB: list[GpuSpec] = [
# NVIDIA Ada Lovelace — tier 5
GpuSpec("RTX 4090", 16, 5, "Ada Lovelace", "nvidia"),
GpuSpec("RTX 4080", 12, 5, "Ada Lovelace", "nvidia"),
GpuSpec("RTX 4070 Ti", 12, 5, "Ada Lovelace", "nvidia"),
GpuSpec("RTX 4070", 8, 5, "Ada Lovelace", "nvidia"),
GpuSpec("RTX 4060 Ti", 8, 5, "Ada Lovelace", "nvidia"),
GpuSpec("RTX 4060", 8, 5, "Ada Lovelace", "nvidia"),
GpuSpec("RTX 4050", 6, 5, "Ada Lovelace", "nvidia"),
# NVIDIA Ampere — tier 4
GpuSpec("RTX 3090", 24, 4, "Ampere", "nvidia"), # rare laptop variant
GpuSpec("RTX 3080 Ti", 16, 4, "Ampere", "nvidia"),
GpuSpec("RTX 3080", 8, 4, "Ampere", "nvidia"), # most laptop 3080s = 8 GB
GpuSpec("RTX 3070 Ti", 8, 4, "Ampere", "nvidia"),
GpuSpec("RTX 3070", 8, 4, "Ampere", "nvidia"),
GpuSpec("RTX 3060", 6, 4, "Ampere", "nvidia"),
GpuSpec("RTX 3050 Ti", 4, 4, "Ampere", "nvidia"),
GpuSpec("RTX 3050", 4, 4, "Ampere", "nvidia"),
# NVIDIA Turing — tier 3
GpuSpec("RTX 2080", 8, 3, "Turing", "nvidia"),
GpuSpec("RTX 2070", 8, 3, "Turing", "nvidia"),
GpuSpec("RTX 2060", 6, 3, "Turing", "nvidia"),
GpuSpec("GTX 1660 Ti", 6, 3, "Turing", "nvidia"),
GpuSpec("GTX 1660", 6, 3, "Turing", "nvidia"),
GpuSpec("GTX 1650 Ti", 4, 3, "Turing", "nvidia"),
GpuSpec("GTX 1650", 4, 3, "Turing", "nvidia"),
# NVIDIA Pascal — tier 2
GpuSpec("GTX 1080", 8, 2, "Pascal", "nvidia"),
GpuSpec("GTX 1070", 8, 2, "Pascal", "nvidia"),
GpuSpec("GTX 1060", 6, 2, "Pascal", "nvidia"),
GpuSpec("GTX 1050 Ti", 4, 2, "Pascal", "nvidia"),
GpuSpec("GTX 1050", 4, 2, "Pascal", "nvidia"),
# AMD RDNA3 — tier 5
GpuSpec("RX 7900M", 16, 5, "RDNA3", "amd"),
GpuSpec("RX 7700S", 8, 5, "RDNA3", "amd"),
GpuSpec("RX 7600M XT", 8, 5, "RDNA3", "amd"),
GpuSpec("RX 7600S", 8, 5, "RDNA3", "amd"),
GpuSpec("RX 7600M", 8, 5, "RDNA3", "amd"),
# AMD RDNA2 — tier 4
GpuSpec("RX 6850M XT", 12, 4, "RDNA2", "amd"),
GpuSpec("RX 6800S", 12, 4, "RDNA2", "amd"),
GpuSpec("RX 6800M", 12, 4, "RDNA2", "amd"),
GpuSpec("RX 6700S", 10, 4, "RDNA2", "amd"),
GpuSpec("RX 6700M", 10, 4, "RDNA2", "amd"),
GpuSpec("RX 6650M", 8, 4, "RDNA2", "amd"),
GpuSpec("RX 6600S", 8, 4, "RDNA2", "amd"),
GpuSpec("RX 6600M", 8, 4, "RDNA2", "amd"),
GpuSpec("RX 6500M", 4, 4, "RDNA2", "amd"),
# AMD RDNA1 — tier 3
GpuSpec("RX 5700M", 8, 3, "RDNA1", "amd"),
GpuSpec("RX 5600M", 6, 3, "RDNA1", "amd"),
GpuSpec("RX 5500M", 4, 3, "RDNA1", "amd"),
# Intel Arc Alchemist — tier 4 (improving ROCm/IPEX-LLM support)
GpuSpec("Arc A770M", 16, 4, "Alchemist", "intel"),
GpuSpec("Arc A550M", 8, 4, "Alchemist", "intel"),
GpuSpec("Arc A370M", 4, 4, "Alchemist", "intel"),
GpuSpec("Arc A350M", 4, 4, "Alchemist", "intel"),
]
def _build_patterns() -> list[tuple[re.Pattern[str], GpuSpec]]:
"""Compile regex patterns, sorted longest-model-name first to prevent prefix shadowing."""
result = []
for spec in sorted(_GPU_DB, key=lambda s: -len(s.model)):
# Allow optional space or hyphen between tokens (e.g. "RTX3070" or "RTX-3070")
escaped = re.escape(spec.model).replace(r"\ ", r"[\s\-]?")
result.append((re.compile(escaped, re.IGNORECASE), spec))
return result
_PATTERNS: list[tuple[re.Pattern[str], GpuSpec]] = _build_patterns()
def parse_gpu(title: str) -> GpuSpec | None:
"""Return the first GPU model found in a listing title, or None."""
for pattern, spec in _PATTERNS:
if pattern.search(title):
return spec
return None
def score_gpu(spec: GpuSpec, vram_weight: float, arch_weight: float) -> GpuMatch:
"""Compute normalized inference value scores for a GPU spec.
vram_score: linear scale, 24 GB anchors at 100. Capped at 100.
arch_score: linear scale, tier 1 = 0, tier 5 = 100.
inference_score: weighted average of both, normalized to the total weight.
"""
vram_score = min(100.0, (spec.vram_gb / 24.0) * 100.0)
arch_score = ((spec.arch_tier - 1) / 4.0) * 100.0
total_weight = vram_weight + arch_weight
if total_weight <= 0:
inference_score = 0.0
else:
inference_score = (
vram_score * vram_weight + arch_score * arch_weight
) / total_weight
return GpuMatch(
spec=spec,
vram_score=round(vram_score, 1),
arch_score=round(arch_score, 1),
inference_score=round(inference_score, 1),
)

View file

@ -1,262 +0,0 @@
"""Snipe MCP Server — eBay search with trust scoring and GPU inference-value ranking.
Exposes three tools to Claude:
snipe_search search eBay via Snipe, GPU-scored and trust-ranked
snipe_enrich deep seller/listing enrichment for a specific result
snipe_save persist a productive search for ongoing monitoring
Run with:
python -m app.mcp.server
(from /Library/Development/CircuitForge/snipe with cf conda env active)
Configure in Claude Code ~/.claude.json:
"snipe": {
"command": "/devl/miniconda3/envs/cf/bin/python",
"args": ["-m", "app.mcp.server"],
"cwd": "/Library/Development/CircuitForge/snipe",
"env": { "SNIPE_API_URL": "http://localhost:8510" }
}
"""
from __future__ import annotations
import asyncio
import json
import os
import httpx
from mcp.server import Server
from mcp.server.stdio import stdio_server
from mcp.types import TextContent, Tool
_SNIPE_API = os.environ.get("SNIPE_API_URL", "http://localhost:8510")
_TIMEOUT = 120.0
server = Server("snipe")
@server.list_tools()
async def list_tools() -> list[Tool]:
return [
Tool(
name="snipe_search",
description=(
"Search eBay listings via Snipe. Returns results condensed for LLM reasoning, "
"sorted by composite value: trust_score × gpu_inference_score / price. "
"GPU inference_score weights VRAM and architecture tier — tune with vram_weight/arch_weight. "
"Use must_include_mode='groups' with pipe-separated OR alternatives for broad GPU coverage "
"(e.g. 'rtx 3060|rtx 3070|rtx 3080'). "
"Laptop Motherboard category ID: 177946."
),
inputSchema={
"type": "object",
"required": ["query"],
"properties": {
"query": {
"type": "string",
"description": "Base eBay search keywords, e.g. 'laptop motherboard'",
},
"must_include": {
"type": "string",
"description": (
"Comma-separated AND groups; use | for OR within a group. "
"E.g. 'rtx 3060|rtx 3070|rx 6700m, 8gb|12gb|16gb'"
),
},
"must_include_mode": {
"type": "string",
"enum": ["all", "any", "groups"],
"default": "groups",
"description": "groups: pipe=OR comma=AND. Recommended for multi-GPU searches.",
},
"must_exclude": {
"type": "string",
"description": (
"Comma-separated terms to exclude. "
"Suggested: 'broken,cracked,no post,for parts,parts only,untested,"
"lcd,screen,chassis,housing,bios locked'"
),
},
"max_price": {
"type": "number",
"default": 0,
"description": "Max price USD (0 = no limit)",
},
"min_price": {
"type": "number",
"default": 0,
"description": "Min price USD (0 = no limit)",
},
"pages": {
"type": "integer",
"default": 2,
"description": "Pages of eBay results to fetch (1 page ≈ 50 listings)",
},
"category_id": {
"type": "string",
"default": "",
"description": (
"eBay category ID. "
"177946 = Laptop Motherboards & System Boards. "
"27386 = Graphics Cards (PCIe, for price comparison). "
"Leave empty to search all categories."
),
},
"vram_weight": {
"type": "number",
"default": 0.6,
"description": (
"01. Weight of VRAM in GPU inference score. "
"Higher = VRAM is primary ranking factor. "
"Use 1.0 to rank purely by VRAM (ignores arch generation)."
),
},
"arch_weight": {
"type": "number",
"default": 0.4,
"description": (
"01. Weight of architecture generation in GPU inference score. "
"Higher = prefer newer GPU arch (Ada > Ampere > Turing etc.). "
"Use 0.0 to ignore arch and rank purely by VRAM."
),
},
"top_n": {
"type": "integer",
"default": 20,
"description": "Max results to return after sorting",
},
},
},
),
Tool(
name="snipe_enrich",
description=(
"Deep-dive enrichment for a specific seller + listing. "
"Runs BTF scraping and category history to fill partial trust scores (~20s). "
"Use when snipe_search returns trust_partial=true on a promising listing."
),
inputSchema={
"type": "object",
"required": ["seller_id", "listing_id"],
"properties": {
"seller_id": {
"type": "string",
"description": "eBay seller platform ID (from snipe_search result seller_id field)",
},
"listing_id": {
"type": "string",
"description": "eBay listing platform ID (from snipe_search result id field)",
},
"query": {
"type": "string",
"default": "",
"description": "Original search query — provides market comp context for re-scoring",
},
},
},
),
Tool(
name="snipe_save",
description="Persist a productive search for ongoing monitoring in the Snipe UI.",
inputSchema={
"type": "object",
"required": ["name", "query"],
"properties": {
"name": {
"type": "string",
"description": "Human-readable label, e.g. 'RTX 3070+ laptop boards under $250'",
},
"query": {
"type": "string",
"description": "The eBay search query string",
},
"filters_json": {
"type": "string",
"default": "{}",
"description": "JSON string of filter params to preserve (max_price, must_include, etc.)",
},
},
},
),
]
@server.call_tool()
async def call_tool(name: str, arguments: dict) -> list[TextContent]:
if name == "snipe_search":
return await _search(arguments)
if name == "snipe_enrich":
return await _enrich(arguments)
if name == "snipe_save":
return await _save(arguments)
return [TextContent(type="text", text=f"Unknown tool: {name}")]
async def _search(args: dict) -> list[TextContent]:
from app.mcp.formatters import format_results
# Build params — omit empty strings and zero numerics (except q)
raw = {
"q": args.get("query", ""),
"must_include": args.get("must_include", ""),
"must_include_mode": args.get("must_include_mode", "groups"),
"must_exclude": args.get("must_exclude", ""),
"max_price": args.get("max_price", 0),
"min_price": args.get("min_price", 0),
"pages": args.get("pages", 2),
"category_id": args.get("category_id", ""),
}
params = {k: v for k, v in raw.items() if v != "" and v != 0 or k == "q"}
async with httpx.AsyncClient(timeout=_TIMEOUT) as client:
resp = await client.get(f"{_SNIPE_API}/api/search", params=params)
resp.raise_for_status()
formatted = format_results(
resp.json(),
vram_weight=float(args.get("vram_weight", 0.6)),
arch_weight=float(args.get("arch_weight", 0.4)),
top_n=int(args.get("top_n", 20)),
)
return [TextContent(type="text", text=json.dumps(formatted, indent=2))]
async def _enrich(args: dict) -> list[TextContent]:
async with httpx.AsyncClient(timeout=_TIMEOUT) as client:
resp = await client.post(
f"{_SNIPE_API}/api/enrich",
params={
"seller": args["seller_id"],
"listing_id": args["listing_id"],
"query": args.get("query", ""),
},
)
resp.raise_for_status()
return [TextContent(type="text", text=json.dumps(resp.json(), indent=2))]
async def _save(args: dict) -> list[TextContent]:
async with httpx.AsyncClient(timeout=_TIMEOUT) as client:
resp = await client.post(
f"{_SNIPE_API}/api/saved-searches",
json={
"name": args["name"],
"query": args["query"],
"filters_json": args.get("filters_json", "{}"),
},
)
resp.raise_for_status()
data = resp.json()
return [TextContent(type="text", text=f"Saved (id={data.get('id')}): {args['name']}")]
async def _main() -> None:
async with stdio_server() as (read_stream, write_stream):
await server.run(
read_stream,
write_stream,
server.create_initialization_options(),
)
if __name__ == "__main__":
asyncio.run(_main())

View file

@ -1,16 +1,10 @@
"""PlatformAdapter abstract base and shared types.""" """PlatformAdapter abstract base and shared types."""
from __future__ import annotations from __future__ import annotations
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
from app.db.models import Listing, Seller from app.db.models import Listing, Seller
# Single source of truth for platform validation.
# Phase 2 will extend this set as new adapters are implemented.
SUPPORTED_PLATFORMS: frozenset[str] = frozenset({"ebay", "mercari"})
@dataclass @dataclass
class SearchFilters: class SearchFilters:
@ -22,8 +16,6 @@ class SearchFilters:
must_include: list[str] = field(default_factory=list) # client-side title filter must_include: list[str] = field(default_factory=list) # client-side title filter
must_exclude: list[str] = field(default_factory=list) # forwarded to eBay -term AND client-side must_exclude: list[str] = field(default_factory=list) # forwarded to eBay -term AND client-side
category_id: Optional[str] = None # eBay category ID (e.g. "27386" = GPUs) category_id: Optional[str] = None # eBay category ID (e.g. "27386" = GPUs)
must_include_mode: str = "all" # "all" | "any" | "groups"
adapter: str = "auto" # "auto" | "api" | "scraper"
class PlatformAdapter(ABC): class PlatformAdapter(ABC):

View file

@ -1,13 +1,10 @@
"""eBay Browse + Trading API adapter.""" """eBay Browse API adapter."""
from __future__ import annotations from __future__ import annotations
import hashlib import hashlib
import logging import logging
import xml.etree.ElementTree as ET
from dataclasses import replace from dataclasses import replace
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from typing import Optional from typing import Optional
import requests import requests
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -21,8 +18,8 @@ _SHOPPING_API_MAX_PER_SEARCH = 5 # sellers enriched per search call
_SHOPPING_API_INTER_REQUEST_DELAY = 0.5 # seconds between successive calls _SHOPPING_API_INTER_REQUEST_DELAY = 0.5 # seconds between successive calls
_SELLER_ENRICH_TTL_HOURS = 24 # skip re-enrichment within this window _SELLER_ENRICH_TTL_HOURS = 24 # skip re-enrichment within this window
from app.db.models import Listing, MarketComp, Seller from app.db.models import Listing, Seller, MarketComp
from app.db.protocol import SharedTableProtocol from app.db.store import Store
from app.platforms import PlatformAdapter, SearchFilters from app.platforms import PlatformAdapter, SearchFilters
from app.platforms.ebay.auth import EbayTokenManager from app.platforms.ebay.auth import EbayTokenManager
from app.platforms.ebay.normaliser import normalise_listing, normalise_seller from app.platforms.ebay.normaliser import normalise_listing, normalise_seller
@ -67,7 +64,7 @@ BROWSE_BASE = {
class EbayAdapter(PlatformAdapter): class EbayAdapter(PlatformAdapter):
def __init__(self, token_manager: EbayTokenManager, shared_store: SharedTableProtocol, env: str = "production"): def __init__(self, token_manager: EbayTokenManager, shared_store: Store, env: str = "production"):
self._tokens = token_manager self._tokens = token_manager
self._store = shared_store self._store = shared_store
self._env = env self._env = env
@ -211,70 +208,6 @@ class EbayAdapter(PlatformAdapter):
except Exception as e: except Exception as e:
log.debug("Shopping API enrich failed for %s: %s", username, e) log.debug("Shopping API enrich failed for %s: %s", username, e)
# ── Trading API GetUser (requires user OAuth token) ───────────────────────
_TRADING_API_URL = "https://api.ebay.com/ws/api.dll"
_TRADING_API_COMPATIBILITY = "1283"
def enrich_seller_trading_api(self, username: str, user_access_token: str) -> bool:
"""Enrich a seller's account_age_days using Trading API GetUser.
Uses the connected user's OAuth access token (Authorization Code flow),
which bypasses Shopping API rate limits and works even when the Shopping
API GetUserProfile call is throttled.
Unlike BTF scraping, this is a clean API call (~200ms, no Playwright).
Called from the search endpoint when the requesting user has connected
their eBay account.
Returns True if enrichment succeeded, False on any failure.
"""
xml_body = (
'<?xml version="1.0" encoding="utf-8"?>'
'<GetUserRequest xmlns="urn:ebay:apis:eBLBaseComponents">'
f'<UserID>{username}</UserID>'
'</GetUserRequest>'
)
try:
resp = requests.post(
self._TRADING_API_URL,
headers={
"X-EBAY-API-CALL-NAME": "GetUser",
"X-EBAY-API-SITEID": "0",
"X-EBAY-API-COMPATIBILITY-LEVEL": self._TRADING_API_COMPATIBILITY,
"X-EBAY-API-IAF-TOKEN": f"Bearer {user_access_token}",
"Content-Type": "text/xml",
},
data=xml_body.encode("utf-8"),
timeout=10,
)
resp.raise_for_status()
root = ET.fromstring(resp.text)
ns = {"e": "urn:ebay:apis:eBLBaseComponents"}
ack = root.findtext("e:Ack", namespaces=ns)
if ack not in ("Success", "Warning"):
errors = [e.findtext("e:LongMessage", namespaces=ns, default="")
for e in root.findall("e:Errors", namespaces=ns)]
log.debug("Trading API GetUser failed for %s: %s", username, errors)
return False
reg_date = root.findtext("e:User/e:RegistrationDate", namespaces=ns)
if not reg_date:
return False
dt = datetime.fromisoformat(reg_date.replace("Z", "+00:00"))
age_days = (datetime.now(timezone.utc) - dt).days
seller = self._store.get_seller("ebay", username)
if seller:
self._store.save_seller(replace(seller, account_age_days=age_days))
log.debug("Trading API GetUser: %s registered %d days ago", username, age_days)
return True
except Exception as exc:
log.debug("Trading API GetUser failed for %s: %s", username, exc)
return False
def get_seller(self, seller_platform_id: str) -> Optional[Seller]: def get_seller(self, seller_platform_id: str) -> Optional[Seller]:
cached = self._store.get_seller("ebay", seller_platform_id) cached = self._store.get_seller("ebay", seller_platform_id)
if cached: if cached:

View file

@ -1,10 +1,8 @@
"""eBay OAuth2 client credentials token manager.""" """eBay OAuth2 client credentials token manager."""
from __future__ import annotations from __future__ import annotations
import base64 import base64
import time import time
from typing import Optional from typing import Optional
import requests import requests
EBAY_OAUTH_URLS = { EBAY_OAUTH_URLS = {

View file

@ -1,400 +0,0 @@
"""Thread-local Playwright browser manager for the eBay scraper.
Each uvicorn worker thread that calls fetch_html() gets its own Playwright
instance, browser, and context created lazily on first use. This avoids
the "cannot switch to a different thread" error that arises when Playwright
sync API instances are shared across threads (they bind their greenlet event
loop to the creating thread).
Key design:
- Thread-local: _thread_local.slot holds the _PooledBrowser for the current
thread. No slot is ever handed to another thread.
- Lazy creation: slots are created on first fetch_html() call per thread, not
at startup. start() is a lightweight lifecycle marker only.
- Registry: _slot_registry (keyed by thread-id) lets stop() close every active
slot across all threads without walking thread-local storage.
- Replenishment: after each use the dirty context is closed and a fresh one
opened on the same browser. Browser launch overhead is paid at most once
per worker thread lifetime.
- Graceful degradation: if Playwright / Xvfb is unavailable, fetch_html falls
back to _fetch_fresh (identical behavior to before this module existed).
Pool size is read from BROWSER_POOL_SIZE env var (default: 2) but is now a
soft limit used only for documentation; actual concurrency is bounded by
uvicorn's thread count.
"""
from __future__ import annotations
import itertools
import logging
import os
import subprocess
import threading
import time
from dataclasses import dataclass, field
from typing import Optional
log = logging.getLogger(__name__)
_pool_display_counter = itertools.cycle(range(200, 400))
_CHROMIUM_ARGS = ["--no-sandbox", "--disable-dev-shm-usage"]
_XVFB_ARGS = ["-screen", "0", "1280x800x24", "-ac"]
_USER_AGENT = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
)
_VIEWPORT = {"width": 1280, "height": 800}
# Thread-local storage: each thread gets its own _PooledBrowser slot.
_thread_local = threading.local()
@dataclass
class _PooledBrowser:
"""One browser slot, bound to a single thread."""
xvfb: subprocess.Popen
pw: object # playwright instance (sync_playwright().__enter__())
browser: object # playwright Browser
ctx: object # playwright BrowserContext (fresh per use)
display_num: int
last_used_ts: float = field(default_factory=time.time)
def _launch_slot() -> _PooledBrowser:
"""Launch a new Xvfb display + headed Chromium browser + fresh context.
Must be called from the thread that will use the slot.
"""
from playwright.sync_api import sync_playwright
from playwright_stealth import Stealth # noqa: F401
display_num = next(_pool_display_counter)
display = f":{display_num}"
env = os.environ.copy()
env["DISPLAY"] = display
xvfb = subprocess.Popen(
["Xvfb", display] + _XVFB_ARGS,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
time.sleep(0.3)
pw = sync_playwright().start()
try:
browser = pw.chromium.launch(
headless=False,
env=env,
args=_CHROMIUM_ARGS,
)
ctx = browser.new_context(
user_agent=_USER_AGENT,
viewport=_VIEWPORT,
)
except Exception:
pw.stop()
xvfb.terminate()
xvfb.wait()
raise
return _PooledBrowser(
xvfb=xvfb,
pw=pw,
browser=browser,
ctx=ctx,
display_num=display_num,
last_used_ts=time.time(),
)
def _close_slot(slot: _PooledBrowser) -> None:
"""Cleanly close a slot: context → browser → Playwright → Xvfb."""
try:
slot.ctx.close()
except Exception:
pass
try:
slot.browser.close()
except Exception:
pass
try:
slot.pw.stop()
except Exception:
pass
try:
slot.xvfb.terminate()
slot.xvfb.wait(timeout=5)
except Exception:
pass
def _replenish_slot(slot: _PooledBrowser) -> _PooledBrowser:
"""Close the used context and open a fresh one on the same browser."""
try:
slot.ctx.close()
except Exception:
pass
new_ctx = slot.browser.new_context(
user_agent=_USER_AGENT,
viewport=_VIEWPORT,
)
return _PooledBrowser(
xvfb=slot.xvfb,
pw=slot.pw,
browser=slot.browser,
ctx=new_ctx,
display_num=slot.display_num,
last_used_ts=time.time(),
)
class BrowserPool:
"""Thread-local Playwright browser manager.
Each thread that calls fetch_html() owns its own browser instance.
No slots are shared between threads.
"""
def __init__(self, size: int = 2) -> None:
self._size = size
self._lock = threading.Lock()
self._started = False
self._stopped = False
self._playwright_available: Optional[bool] = None
# Registry of all active slots keyed by thread id — used only by stop().
self._slot_registry: dict[int, _PooledBrowser] = {}
# ------------------------------------------------------------------
# Lifecycle
# ------------------------------------------------------------------
def start(self) -> None:
"""Mark the pool as started. Slots are created lazily per thread."""
with self._lock:
if self._started:
return
self._started = True
if not self._check_playwright():
log.warning(
"BrowserPool: Playwright / Xvfb not available — "
"pool disabled, falling back to per-call fresh browser."
)
return
log.info("BrowserPool: started (thread-local mode, size hint=%d)", self._size)
def stop(self) -> None:
"""Close all active slots across all threads."""
with self._lock:
self._stopped = True
registry_snapshot = dict(self._slot_registry)
closed = 0
for slot in registry_snapshot.values():
_close_slot(slot)
closed += 1
self._slot_registry.clear()
log.info("BrowserPool: stopped, closed %d slot(s)", closed)
# ------------------------------------------------------------------
# Core fetch
# ------------------------------------------------------------------
def fetch_html(
self,
url: str,
delay: float = 1.0,
wait_for_selector: Optional[str] = None,
wait_for_timeout_ms: int = 2000,
) -> str:
"""Navigate to *url* and return the rendered HTML.
Uses the calling thread's browser slot (creates one if needed).
Falls back to a fresh browser if Playwright is unavailable or the
slot fails.
"""
time.sleep(delay)
slot = self._get_or_create_thread_slot()
if slot is not None:
try:
html = self._fetch_with_slot(
slot, url,
wait_for_selector=wait_for_selector,
wait_for_timeout_ms=wait_for_timeout_ms,
)
try:
fresh_slot = _replenish_slot(slot)
self._register_slot(fresh_slot)
except Exception as exc:
log.warning("BrowserPool: replenish failed, slot discarded: %s", exc)
_close_slot(slot)
self._unregister_slot()
return html
except Exception as exc:
log.warning("BrowserPool: pooled fetch failed (%s) — closing slot", exc)
_close_slot(slot)
self._unregister_slot()
return self._fetch_fresh(
url,
wait_for_selector=wait_for_selector,
wait_for_timeout_ms=wait_for_timeout_ms,
)
# ------------------------------------------------------------------
# Thread-local slot management
# ------------------------------------------------------------------
def _get_or_create_thread_slot(self) -> Optional[_PooledBrowser]:
"""Return the calling thread's slot, creating it if absent."""
if not self._check_playwright():
return None
slot: Optional[_PooledBrowser] = getattr(_thread_local, "slot", None)
if slot is not None:
return slot
try:
slot = _launch_slot()
self._register_slot(slot)
log.debug("BrowserPool: launched slot :%d for thread %d",
slot.display_num, threading.get_ident())
return slot
except Exception as exc:
log.warning("BrowserPool: slot launch failed: %s", exc)
return None
def _register_slot(self, slot: _PooledBrowser) -> None:
"""Bind slot to the calling thread (both thread-local and registry)."""
_thread_local.slot = slot
with self._lock:
self._slot_registry[threading.get_ident()] = slot
def _unregister_slot(self) -> None:
"""Remove the calling thread's slot from thread-local and registry."""
_thread_local.slot = None
with self._lock:
self._slot_registry.pop(threading.get_ident(), None)
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
def _check_playwright(self) -> bool:
if self._playwright_available is not None:
return self._playwright_available
try:
import playwright # noqa: F401
from playwright_stealth import Stealth # noqa: F401
self._playwright_available = True
except ImportError:
self._playwright_available = False
return self._playwright_available
def _fetch_with_slot(
self,
slot: _PooledBrowser,
url: str,
wait_for_selector: Optional[str] = None,
wait_for_timeout_ms: int = 2000,
) -> str:
from playwright_stealth import Stealth
page = slot.ctx.new_page()
try:
Stealth().apply_stealth_sync(page)
page.goto(url, wait_until="domcontentloaded", timeout=30_000)
if wait_for_selector:
try:
page.wait_for_selector(wait_for_selector, timeout=15_000)
except Exception:
pass
else:
page.wait_for_timeout(wait_for_timeout_ms)
return page.content()
finally:
try:
page.close()
except Exception:
pass
def _fetch_fresh(
self,
url: str,
wait_for_selector: Optional[str] = None,
wait_for_timeout_ms: int = 2000,
) -> str:
import subprocess as _subprocess
try:
from playwright.sync_api import sync_playwright
from playwright_stealth import Stealth
except ImportError as exc:
raise RuntimeError(
"Playwright not installed — cannot fetch pages. "
"Install playwright and playwright-stealth in the Docker image."
) from exc
display_num = next(_pool_display_counter)
display = f":{display_num}"
env = os.environ.copy()
env["DISPLAY"] = display
xvfb = _subprocess.Popen(
["Xvfb", display] + _XVFB_ARGS,
stdout=_subprocess.DEVNULL,
stderr=_subprocess.DEVNULL,
)
time.sleep(0.3)
try:
with sync_playwright() as pw:
browser = pw.chromium.launch(
headless=False,
env=env,
args=_CHROMIUM_ARGS,
)
ctx = browser.new_context(
user_agent=_USER_AGENT,
viewport=_VIEWPORT,
)
page = ctx.new_page()
Stealth().apply_stealth_sync(page)
page.goto(url, wait_until="domcontentloaded", timeout=30_000)
if wait_for_selector:
try:
page.wait_for_selector(wait_for_selector, timeout=15_000)
except Exception:
pass
else:
page.wait_for_timeout(wait_for_timeout_ms)
html = page.content()
browser.close()
finally:
xvfb.terminate()
xvfb.wait()
return html
# ---------------------------------------------------------------------------
# Module-level singleton
# ---------------------------------------------------------------------------
_pool: Optional[BrowserPool] = None
_pool_lock = threading.Lock()
def get_pool() -> BrowserPool:
"""Return the module-level BrowserPool singleton (creates it if needed)."""
global _pool
if _pool is None:
with _pool_lock:
if _pool is None:
size = int(os.environ.get("BROWSER_POOL_SIZE", "2"))
_pool = BrowserPool(size)
return _pool

View file

@ -1,254 +0,0 @@
# app/platforms/ebay/categories.py
# MIT License
"""eBay category cache — fetches leaf categories from the Taxonomy API and stores them
in the local SQLite DB for injection into LLM query-builder prompts.
Refreshed weekly. Falls back to a hardcoded bootstrap table when no eBay API
credentials are configured (scraper-only users still get usable category hints).
"""
from __future__ import annotations
import logging
import sqlite3
from datetime import datetime, timedelta, timezone
from typing import Optional
import requests
log = logging.getLogger(__name__)
# Bootstrap table — common categories for self-hosters without eBay API credentials.
# category_id values are stable eBay leaf IDs (US marketplace, as of 2026).
_BOOTSTRAP_CATEGORIES: list[tuple[str, str, str]] = [
("27386", "Graphics Cards", "Consumer Electronics > Computers > Components > Graphics/Video Cards"),
("164", "CPUs/Processors", "Consumer Electronics > Computers > Components > CPUs/Processors"),
("170083","RAM", "Consumer Electronics > Computers > Components > Memory (RAM)"),
("175669","Solid State Drives", "Consumer Electronics > Computers > Components > Drives > Solid State Drives"),
("177089","Hard Drives", "Consumer Electronics > Computers > Components > Drives > Hard Drives"),
("179142","Laptops", "Consumer Electronics > Computers > Laptops & Netbooks"),
("171957","Desktop Computers", "Consumer Electronics > Computers > Desktops & All-in-Ones"),
("293", "Consumer Electronics","Consumer Electronics"),
("625", "Cameras", "Consumer Electronics > Cameras & Photography > Digital Cameras"),
("15052", "Vintage Cameras", "Consumer Electronics > Cameras & Photography > Vintage Movie Cameras"),
("11724", "Audio Equipment", "Consumer Electronics > TV, Video & Home Audio > Home Audio"),
("3676", "Vinyl Records", "Music > Records"),
("870", "Musical Instruments","Musical Instruments & Gear"),
("31388", "Video Game Consoles","Video Games & Consoles > Video Game Consoles"),
("139971","Video Games", "Video Games & Consoles > Video Games"),
("139973","Video Game Accessories", "Video Games & Consoles > Video Game Accessories"),
("14308", "Networking Gear", "Computers/Tablets & Networking > Home Networking & Connectivity"),
("182062","Smartphones", "Cell Phones & Smartphones"),
("9394", "Tablets", "Computers/Tablets & Networking > Tablets & eBook Readers"),
("11233", "Collectibles", "Collectibles"),
]
class EbayCategoryCache:
"""Caches eBay leaf categories in SQLite for LLM prompt injection.
Args:
conn: An open sqlite3.Connection with migration 011 already applied.
"""
def __init__(self, conn: sqlite3.Connection) -> None:
self._conn = conn
def is_stale(self, max_age_days: int = 7) -> bool:
"""Return True if the cache is empty or all entries are older than max_age_days."""
cur = self._conn.execute("SELECT MAX(refreshed_at) FROM ebay_categories")
row = cur.fetchone()
if not row or not row[0]:
return True
try:
latest = datetime.fromisoformat(row[0])
if latest.tzinfo is None:
latest = latest.replace(tzinfo=timezone.utc)
return datetime.now(timezone.utc) - latest > timedelta(days=max_age_days)
except ValueError:
return True
def _seed_bootstrap(self) -> None:
"""Insert the hardcoded bootstrap categories. Idempotent (ON CONFLICT IGNORE)."""
now = datetime.now(timezone.utc).isoformat()
self._conn.executemany(
"INSERT OR IGNORE INTO ebay_categories"
" (category_id, name, full_path, is_leaf, refreshed_at)"
" VALUES (?, ?, ?, 1, ?)",
[(cid, name, path, now) for cid, name, path in _BOOTSTRAP_CATEGORIES],
)
self._conn.commit()
log.info("EbayCategoryCache: seeded %d bootstrap categories.", len(_BOOTSTRAP_CATEGORIES))
def get_relevant(
self,
keywords: list[str],
limit: int = 30,
) -> list[tuple[str, str]]:
"""Return (category_id, full_path) pairs matching any keyword.
Matches against both name and full_path (case-insensitive LIKE).
Returns at most `limit` rows.
"""
if not keywords:
return []
conditions = " OR ".join(
"LOWER(name) LIKE ? OR LOWER(full_path) LIKE ?" for _ in keywords
)
params: list[str] = []
for kw in keywords:
like = f"%{kw.lower()}%"
params.extend([like, like])
params.append(limit)
cur = self._conn.execute(
f"SELECT category_id, full_path FROM ebay_categories"
f" WHERE {conditions} ORDER BY name LIMIT ?",
params,
)
return [(row[0], row[1]) for row in cur.fetchall()]
def get_all_for_prompt(self, limit: int = 80) -> list[tuple[str, str]]:
"""Return up to `limit` (category_id, full_path) pairs, sorted by name.
Used when no keyword context is available.
"""
cur = self._conn.execute(
"SELECT category_id, full_path FROM ebay_categories ORDER BY name LIMIT ?",
(limit,),
)
return [(row[0], row[1]) for row in cur.fetchall()]
def refresh(
self,
token_manager: Optional["EbayTokenManager"] = None,
community_store: Optional[object] = None,
) -> int:
"""Fetch the eBay category tree and upsert leaf nodes into SQLite.
Args:
token_manager: An `EbayTokenManager` instance for the Taxonomy API.
If None, falls back to seeding the hardcoded bootstrap table.
community_store: Optional SnipeCommunityStore instance.
If provided and token_manager is set, publish leaves after a successful
Taxonomy API fetch.
If provided and token_manager is None, fetch from community before
falling back to the hardcoded bootstrap (requires >= 10 rows).
Returns:
Number of leaf categories stored.
"""
if token_manager is None:
# Try community store first
if community_store is not None:
try:
community_cats = community_store.fetch_categories()
if len(community_cats) >= 10:
now = datetime.now(timezone.utc).isoformat()
self._conn.executemany(
"INSERT OR REPLACE INTO ebay_categories"
" (category_id, name, full_path, is_leaf, refreshed_at)"
" VALUES (?, ?, ?, 1, ?)",
[(cid, name, path, now) for cid, name, path in community_cats],
)
self._conn.commit()
log.info(
"EbayCategoryCache: loaded %d categories from community store.",
len(community_cats),
)
return len(community_cats)
log.info(
"EbayCategoryCache: community store has %d categories (< 10) — falling back to bootstrap.",
len(community_cats),
)
except Exception:
log.warning(
"EbayCategoryCache: community store fetch failed — falling back to bootstrap.",
exc_info=True,
)
self._seed_bootstrap()
cur = self._conn.execute("SELECT COUNT(*) FROM ebay_categories")
return cur.fetchone()[0]
try:
token = token_manager.get_token()
headers = {"Authorization": f"Bearer {token}"}
# Step 1: get default tree ID for EBAY_US
id_resp = requests.get(
"https://api.ebay.com/commerce/taxonomy/v1/get_default_category_tree_id",
params={"marketplace_id": "EBAY_US"},
headers=headers,
timeout=30,
)
id_resp.raise_for_status()
tree_id = id_resp.json()["categoryTreeId"]
# Step 2: fetch full tree (large response — may take several seconds)
tree_resp = requests.get(
f"https://api.ebay.com/commerce/taxonomy/v1/category_tree/{tree_id}",
headers=headers,
timeout=120,
)
tree_resp.raise_for_status()
tree = tree_resp.json()
leaves: list[tuple[str, str, str]] = []
_extract_leaves(tree["rootCategoryNode"], path="", leaves=leaves)
now = datetime.now(timezone.utc).isoformat()
self._conn.executemany(
"INSERT OR REPLACE INTO ebay_categories"
" (category_id, name, full_path, is_leaf, refreshed_at)"
" VALUES (?, ?, ?, 1, ?)",
[(cid, name, path, now) for cid, name, path in leaves],
)
self._conn.commit()
log.info(
"EbayCategoryCache: refreshed %d leaf categories from eBay Taxonomy API.",
len(leaves),
)
# Publish to community store if available
if community_store is not None:
try:
community_store.publish_categories(leaves)
except Exception:
log.warning(
"EbayCategoryCache: failed to publish categories to community store.",
exc_info=True,
)
return len(leaves)
except Exception:
log.warning(
"EbayCategoryCache: Taxonomy API refresh failed — falling back to bootstrap.",
exc_info=True,
)
self._seed_bootstrap()
cur = self._conn.execute("SELECT COUNT(*) FROM ebay_categories")
return cur.fetchone()[0]
def _extract_leaves(
node: dict,
path: str,
leaves: list[tuple[str, str, str]],
) -> None:
"""Recursively walk the eBay category tree, collecting leaf node tuples.
Args:
node: A categoryTreeNode dict from the eBay Taxonomy API response.
path: The ancestor breadcrumb, e.g. "Consumer Electronics > Computers".
leaves: Accumulator list of (category_id, name, full_path) tuples.
"""
cat = node["category"]
cat_id: str = cat["categoryId"]
cat_name: str = cat["categoryName"]
full_path = f"{path} > {cat_name}" if path else cat_name
if node.get("leafCategoryTreeNode", False):
leaves.append((cat_id, cat_name, full_path))
return # leaf — no children to recurse into
for child in node.get("childCategoryTreeNodes", []):
_extract_leaves(child, full_path, leaves)

View file

@ -1,10 +1,8 @@
"""Convert raw eBay API responses into Snipe domain objects.""" """Convert raw eBay API responses into Snipe domain objects."""
from __future__ import annotations from __future__ import annotations
import json import json
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Optional from typing import Optional
from app.db.models import Listing, Seller from app.db.models import Listing, Seller

View file

@ -16,7 +16,7 @@ import json
import logging import logging
import re import re
import time import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from typing import Optional from typing import Optional
@ -25,7 +25,7 @@ log = logging.getLogger(__name__)
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from app.db.models import Listing, MarketComp, Seller from app.db.models import Listing, MarketComp, Seller
from app.db.protocol import SharedTableProtocol from app.db.store import Store
from app.platforms import PlatformAdapter, SearchFilters from app.platforms import PlatformAdapter, SearchFilters
EBAY_SEARCH_URL = "https://www.ebay.com/sch/i.html" EBAY_SEARCH_URL = "https://www.ebay.com/sch/i.html"
@ -286,12 +286,12 @@ class ScrapedEbayAdapter(PlatformAdapter):
category_history) cause TrustScorer to set score_is_partial=True. category_history) cause TrustScorer to set score_is_partial=True.
""" """
def __init__(self, shared_store: SharedTableProtocol, delay: float = 1.0): def __init__(self, shared_store: Store, delay: float = 1.0):
self._store = shared_store self._store = shared_store
self._delay = delay self._delay = delay
def _fetch_url(self, url: str) -> str: def _fetch_url(self, url: str) -> str:
"""Core Playwright fetch — stealthed headed Chromium via pre-warmed browser pool. """Core Playwright fetch — stealthed headed Chromium via Xvfb.
Shared by both search (_get) and BTF item-page enrichment (_fetch_item_html). Shared by both search (_get) and BTF item-page enrichment (_fetch_item_html).
Results cached for _HTML_CACHE_TTL seconds. Results cached for _HTML_CACHE_TTL seconds.
@ -300,8 +300,41 @@ class ScrapedEbayAdapter(PlatformAdapter):
if cached and time.time() < cached[1]: if cached and time.time() < cached[1]:
return cached[0] return cached[0]
from app.platforms.ebay.browser_pool import get_pool # noqa: PLC0415 — lazy import time.sleep(self._delay)
html = get_pool().fetch_html(url, delay=self._delay)
import subprocess, os
display_num = next(_display_counter)
display = f":{display_num}"
xvfb = subprocess.Popen(
["Xvfb", display, "-screen", "0", "1280x800x24"],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
)
env = os.environ.copy()
env["DISPLAY"] = display
try:
from playwright.sync_api import sync_playwright # noqa: PLC0415 — lazy: only needed in Docker
from playwright_stealth import Stealth # noqa: PLC0415
with sync_playwright() as pw:
browser = pw.chromium.launch(
headless=False,
env=env,
args=["--no-sandbox", "--disable-dev-shm-usage"],
)
ctx = browser.new_context(
user_agent=_HEADERS["User-Agent"],
viewport={"width": 1280, "height": 800},
)
page = ctx.new_page()
Stealth().apply_stealth_sync(page)
page.goto(url, wait_until="domcontentloaded", timeout=30_000)
page.wait_for_timeout(2000) # let any JS challenges resolve
html = page.content()
browser.close()
finally:
xvfb.terminate()
xvfb.wait()
_html_cache[url] = (html, time.time() + _HTML_CACHE_TTL) _html_cache[url] = (html, time.time() + _HTML_CACHE_TTL)
return html return html
@ -374,6 +407,8 @@ class ScrapedEbayAdapter(PlatformAdapter):
Does not raise failures per-seller are silently skipped so the main Does not raise failures per-seller are silently skipped so the main
search response is never blocked. search response is never blocked.
""" """
db_path = self._store._db_path # capture for thread-local Store creation
def _enrich_one(item: tuple[str, str]) -> None: def _enrich_one(item: tuple[str, str]) -> None:
seller_id, listing_id = item seller_id, listing_id = item
try: try:
@ -386,7 +421,7 @@ class ScrapedEbayAdapter(PlatformAdapter):
) )
if age_days is None and fb_count is None: if age_days is None and fb_count is None:
return # nothing new to write return # nothing new to write
thread_store = self._store.clone() thread_store = Store(db_path)
seller = thread_store.get_seller("ebay", seller_id) seller = thread_store.get_seller("ebay", seller_id)
if not seller: if not seller:
log.warning("BTF enrich: seller %s not found in DB", seller_id) log.warning("BTF enrich: seller %s not found in DB", seller_id)

View file

@ -1,4 +0,0 @@
"""Mercari platform adapter."""
from app.platforms.mercari.adapter import MercariAdapter
__all__ = ["MercariAdapter"]

View file

@ -1,173 +0,0 @@
"""MercariAdapter — scraper-based Mercari platform adapter.
Trust signal coverage vs eBay:
feedback_count (NumSales from listing page)
feedback_ratio (ReviewStarsWrapper data-stars / 5)
account_age_days (requires seller profile page future work)
category_history (not exposed in HTML future work)
price_vs_market (computed by trust scorer from comps, same as eBay)
Because account_age and category_history are always None, TrustScore.score_is_partial
will be True for all Mercari results. The aggregator handles this correctly
by scoring only from available signals.
seller_platform_id on Listing objects holds the product_id (e.g. "m86032668393")
rather than the seller username, because search results don't expose seller identity.
get_seller() resolves the product_id seller by fetching the listing page.
The DB lookup key is (platform="mercari", platform_seller_id=product_id).
"""
from __future__ import annotations
import json
import logging
import time
from typing import Optional
from app.db.models import Listing, MarketComp, Seller
from app.db.store import Store
from app.platforms import PlatformAdapter, SearchFilters
from app.platforms.mercari.scraper import (
build_search_url,
parse_listing_html,
parse_search_html,
)
log = logging.getLogger(__name__)
_SELLER_CACHE_TTL_HOURS = 6
_BETWEEN_LISTING_FETCH_SECS = 1.5
class MercariAdapter(PlatformAdapter):
def __init__(self, store: Store) -> None:
self._store = store
def search(self, query: str, filters: SearchFilters) -> list[Listing]:
from app.platforms.ebay.browser_pool import get_pool
url = build_search_url(query, filters.max_price, filters.min_price)
log.info("mercari: fetching search URL: %s", url)
html = get_pool().fetch_html(
url,
delay=1.0,
wait_for_timeout_ms=8000,
)
raw_listings = parse_search_html(html)
listings: list[Listing] = []
seen: set[str] = set()
for raw in raw_listings:
pid = raw["product_id"]
if pid in seen:
continue
seen.add(pid)
listings.append(_normalise_listing(raw, query))
log.info("mercari: parsed %d listings for %r", len(listings), query)
# Client-side keyword filter (mirrors eBay scraper behaviour).
if filters.must_include:
listings = _apply_keyword_filter(listings, filters.must_include, filters.must_include_mode)
if filters.must_exclude:
listings = _apply_exclude_filter(listings, filters.must_exclude)
return listings
def get_seller(self, seller_platform_id: str) -> Optional[Seller]:
"""Fetch seller data from the listing page identified by seller_platform_id.
For Mercari, seller_platform_id is the product_id (e.g. "m86032668393")
because seller usernames aren't available from search results HTML.
"""
cached = self._store.get_seller("mercari", seller_platform_id)
if cached:
return cached
from app.platforms.ebay.browser_pool import get_pool
url = f"https://www.mercari.com/us/item/{seller_platform_id}/"
try:
time.sleep(_BETWEEN_LISTING_FETCH_SECS)
html = get_pool().fetch_html(
url,
delay=0.5,
wait_for_timeout_ms=6000,
)
raw = parse_listing_html(html, seller_platform_id)
seller = _normalise_seller(raw)
self._store.save_seller(seller)
return seller
except Exception as exc:
log.warning("mercari: get_seller failed for %s: %s", seller_platform_id, exc)
return None
def get_completed_sales(self, query: str, pages: int = 1) -> list[Listing]:
"""Mercari sold-listing comps — stubbed for Phase 3.
Mercari exposes sold listings via ?status=ITEM_STATUS_TRADING but the
data is sparse. Phase 3 will implement comp extraction here; for now
the trust scorer falls back to price_vs_market=None (partial score).
"""
return []
# ---------------------------------------------------------------------------
# Normalisation helpers
# ---------------------------------------------------------------------------
def _normalise_listing(raw: dict, query: str) -> Listing:
return Listing(
platform="mercari",
platform_listing_id=raw["product_id"],
title=raw["title"],
price=raw["price"],
currency="USD",
condition="", # not available from search results; get_seller() populates this
seller_platform_id=raw["product_id"], # see module docstring
url=raw["url"],
photo_urls=[raw["photo_url"]] if raw.get("photo_url") else [],
listing_age_days=0,
buying_format="fixed_price",
category_name=None,
)
def _normalise_seller(raw: dict) -> Seller:
stars = raw.get("stars", 0.0)
feedback_ratio = min(stars / 5.0, 1.0) if stars > 0 else 0.0
return Seller(
platform="mercari",
platform_seller_id=raw["product_id"],
username=raw.get("username", ""),
account_age_days=None, # not available without seller profile page
feedback_count=raw.get("num_sales", 0),
feedback_ratio=feedback_ratio,
category_history_json=json.dumps({}),
)
def _apply_keyword_filter(listings: list[Listing], must_include: list[str], mode: str) -> list[Listing]:
if not must_include:
return listings
def _matches(listing: Listing) -> bool:
title = listing.title.lower()
if mode == "any":
return any(kw.lower() in title for kw in must_include)
# "all" (default) and "groups" both require all terms present
return all(kw.lower() in title for kw in must_include)
return [l for l in listings if _matches(l)]
def _apply_exclude_filter(listings: list[Listing], must_exclude: list[str]) -> list[Listing]:
if not must_exclude:
return listings
def _clean(listing: Listing) -> bool:
title = listing.title.lower()
return not any(term.lower() in title for term in must_exclude)
return [l for l in listings if _clean(l)]

View file

@ -1,165 +0,0 @@
"""Mercari search + listing page scraper.
Uses the shared eBay browser pool (headed Chromium + Xvfb + playwright-stealth)
which already bypasses Cloudflare Turnstile. Import the pool singleton from
ebay.browser_pool so both platforms share the same warm Chromium instances.
Seller data is NOT available from search results HTML only from individual
listing pages. The adapter lazily fetches listing pages in get_seller().
"""
from __future__ import annotations
import logging
import re
from typing import Optional
from urllib.parse import urlencode
from bs4 import BeautifulSoup, NavigableString
log = logging.getLogger(__name__)
_BASE = "https://www.mercari.com"
_SEARCH_PATH = "/search/"
_ITEM_PATH = "/us/item/"
_PRICE_RE = re.compile(r"[\d,]+\.?\d*")
_POSTED_RE = re.compile(r"(\d{2})/(\d{2})/(\d{2,4})") # MM/DD/YY or MM/DD/YYYY
def build_search_url(query: str, max_price: Optional[float] = None, min_price: Optional[float] = None) -> str:
# No explicit sortBy — Mercari's default (relevance) is the most useful order.
# "sortBy=SORT_SCORE" was a deprecated value that returns an empty results page.
params: dict = {"keyword": query}
# Mercari accepts priceMin/priceMax as whole dollar strings (not cents)
if min_price is not None and min_price > 0:
params["priceMin"] = str(int(min_price))
if max_price is not None and max_price > 0:
params["priceMax"] = str(int(max_price))
return f"{_BASE}{_SEARCH_PATH}?{urlencode(params)}"
def parse_search_html(html: str) -> list[dict]:
"""Parse Mercari search results HTML into a list of raw listing dicts."""
soup = BeautifulSoup(html, "html.parser")
results: list[dict] = []
for item in soup.find_all(attrs={"data-testid": "ItemContainer"}):
pid = item.get("data-productid", "")
if not pid:
continue
parent = item.parent
href = parent.get("href") if parent and parent.name == "a" else None
url = f"{_BASE}{href}" if href else f"{_BASE}{_ITEM_PATH}{pid}/"
name_el = item.find(attrs={"data-testid": "ItemName"})
title = name_el.get_text(strip=True) if name_el else ""
price = _extract_current_price(item)
img_el = item.find("img")
photo_url = img_el.get("src", "") if img_el else ""
results.append({
"product_id": pid,
"url": url,
"title": title,
"price": price,
"photo_url": photo_url,
"brand": item.get("data-brand", ""),
"is_on_sale": item.get("data-is-on-sale") == "true",
})
return results
def _extract_current_price(item: BeautifulSoup) -> float:
"""Return the current (non-strikethrough) price from an ItemContainer."""
price_el = item.find(attrs={"data-testid": "ProductThumbItemPrice"})
if not price_el:
return 0.0
# Direct text nodes are the current price; the nested span is the original.
price_text = "".join(
str(c) for c in price_el.children if isinstance(c, NavigableString)
).strip()
m = _PRICE_RE.search(price_text)
if m:
try:
return float(m.group().replace(",", ""))
except ValueError:
pass
return 0.0
def parse_listing_html(html: str, product_id: str) -> dict:
"""Parse a Mercari listing page into a raw seller dict."""
soup = BeautifulSoup(html, "html.parser")
def _text(testid: str) -> str:
el = soup.find(attrs={"data-testid": testid})
return el.get_text(strip=True) if el else ""
username_raw = _text("ItemDetailsSellerUserName")
username = username_raw.lstrip("@")
num_sales = _safe_int(_text("NumSales"))
rating_count = _safe_int(_text("SellerRatingCount"))
stars = 0.0
rw = soup.find(attrs={"data-testid": "ReviewStarsWrapper"})
if rw:
try:
stars = float(rw.get("data-stars", 0))
except (ValueError, TypeError):
pass
condition = _text("ItemDetailsCondition").lower()
posted_text = _text("ItemDetailsPosted")
listing_age_days = _parse_listing_age(posted_text)
price_text = _text("ItemPrice")
price = 0.0
m = _PRICE_RE.search(price_text.replace(",", ""))
if m:
try:
price = float(m.group())
except ValueError:
pass
return {
"product_id": product_id,
"username": username,
"num_sales": num_sales, # completed sales → maps to feedback_count
"rating_count": rating_count, # number of reviews (additional signal)
"stars": stars, # 0.05.0 → divide by 5 = feedback_ratio
"condition": condition,
"listing_age_days": listing_age_days,
"price": price,
}
def _safe_int(text: str) -> int:
m = _PRICE_RE.search(text.replace(",", ""))
if m:
try:
return int(float(m.group()))
except ValueError:
pass
return 0
def _parse_listing_age(posted_text: str) -> int:
"""Convert a posted date like '04/10/26' to days since posted."""
from datetime import datetime, timezone
m = _POSTED_RE.search(posted_text)
if not m:
return 0
try:
month, day, year = int(m.group(1)), int(m.group(2)), int(m.group(3))
if year < 100:
year += 2000
posted = datetime(year, month, day, tzinfo=timezone.utc)
return (datetime.now(timezone.utc) - posted).days
except (ValueError, OverflowError):
return 0

View file

@ -1,145 +0,0 @@
# app/tasks/monitor.py
"""Background saved-search monitor — polls eBay and writes WatchAlerts for new listings.
Design notes:
- Runs synchronously inside an asyncio.to_thread() call from the polling loop.
- Uses the same eBay adapter + trust scoring pipeline as the live search endpoint.
- Dedup via watch_alerts (saved_search_id, platform_listing_id) UNIQUE constraint.
- Never takes any transactional action alert only.
"""
from __future__ import annotations
import json
import logging
from pathlib import Path
from app.db.models import SavedSearch, WatchAlert
from app.db.store import Store
log = logging.getLogger(__name__)
_AUCTION_ALERT_WINDOW_HOURS = 24 # alert on auctions ending within this window
def should_alert(
*,
trust_score: int,
score_is_partial: bool,
price: float,
buying_format: str,
min_trust_score: int,
ends_at: "str | None" = None,
) -> bool:
"""Return True if a listing qualifies for a watch alert.
BIN (fixed_price / best_offer): alert immediately these sell on a first-come
basis, so speed matters. Require a higher trust bar on partial scores to reduce
false positives while BTF scraping is still in flight.
Auction: only alert when the auction is within _AUCTION_ALERT_WINDOW_HOURS of
ending. Alerting on a 7-day auction 6 days early is noise the user can't act
usefully until the end window anyway. Bid scheduling (paid+) and sniping algo
(premium) are separate features built on top of this alert layer.
"""
from datetime import datetime, timezone
# Partial scores: apply a +10 buffer so we don't surface unreliable signals.
effective_min = min_trust_score + 10 if score_is_partial else min_trust_score
if trust_score < effective_min:
return False
if buying_format in ("fixed_price", "best_offer"):
# BIN: alert immediately — inventory can disappear any time.
return True
if buying_format == "auction":
if not ends_at:
# No end time recorded — alert anyway rather than silently skip.
return True
try:
end = datetime.fromisoformat(ends_at.replace("Z", "+00:00"))
hours_remaining = (end - datetime.now(timezone.utc)).total_seconds() / 3600
return 0 < hours_remaining <= _AUCTION_ALERT_WINDOW_HOURS
except (ValueError, TypeError):
log.debug("should_alert: could not parse ends_at=%r, alerting anyway", ends_at)
return True
# Unknown format — alert and let the user decide.
return True
def run_monitor_search(
search: SavedSearch,
*,
user_db: Path,
shared_db: Path,
) -> int:
"""Execute one background monitor run for a saved search.
Fetches current listings, scores them, writes new high-trust finds
to watch_alerts. Returns the count of new alerts written.
Called from the async polling loop via asyncio.to_thread().
"""
from app.platforms.ebay.adapter import EbayAdapter
from app.trust import TrustScorer
log.info("Monitor: checking saved search %d (%r)", search.id, search.name)
filters = json.loads(search.filters_json or "{}")
query = filters.pop("query_raw", search.query)
try:
adapter = EbayAdapter()
raw_listings = adapter.search(query, **filters)
except Exception as exc:
log.warning("Monitor: eBay search failed for search %d: %s", search.id, exc)
return 0
shared_store = Store(shared_db)
user_store = Store(user_db)
scorer = TrustScorer(shared_store)
try:
trust_scores = scorer.score_batch(raw_listings, query)
except Exception as exc:
log.warning("Monitor: trust scoring failed for search %d: %s", search.id, exc)
return 0
new_alert_count = 0
for listing, trust in zip(raw_listings, trust_scores):
qualifies = should_alert(
trust_score=trust.composite_score,
score_is_partial=trust.score_is_partial,
price=listing.price,
buying_format=listing.buying_format,
min_trust_score=search.min_trust_score,
ends_at=listing.ends_at,
)
if not qualifies:
continue
alert = WatchAlert(
saved_search_id=search.id,
platform_listing_id=listing.platform_listing_id,
title=listing.title,
price=listing.price,
currency=listing.currency,
trust_score=trust.composite_score,
url=listing.url,
)
_, is_new = user_store.upsert_alert(alert)
if is_new:
new_alert_count += 1
log.info(
"Monitor: new alert — search %d, listing %s, score=%d",
search.id, listing.platform_listing_id, trust.composite_score,
)
user_store.mark_search_checked(search.id)
log.info(
"Monitor: search %d done — %d new alerts from %d listings",
search.id, new_alert_count, len(raw_listings),
)
return new_alert_count

View file

@ -7,30 +7,29 @@ Current task types:
trust_photo_analysis download primary photo, run vision LLM, write trust_photo_analysis download primary photo, run vision LLM, write
result to trust_scores.photo_analysis_json (Paid tier). result to trust_scores.photo_analysis_json (Paid tier).
Image assessment routing: Prompt note: The vision prompt is a functional first pass. Tune against real
Cloud (GPU_SERVER_URL set): allocates via cf-orch task endpoint eBay listings before GA specifically stock-photo vs genuine-product distinction
product=snipe, task=image_assessment. and the damage vocabulary.
Local (no GPU_SERVER_URL) or TaskNotFound fallback: uses LLMRouter
with a vision-capable local backend (moondream2, llava, etc.).
""" """
from __future__ import annotations from __future__ import annotations
import base64 import base64
import json import json
import logging import logging
import os
from pathlib import Path from pathlib import Path
import httpx
import requests import requests
from circuitforge_core.db import get_connection from circuitforge_core.db import get_connection
from circuitforge_core.llm import LLMRouter
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
LLM_TASK_TYPES: frozenset[str] = frozenset({"trust_photo_analysis"}) LLM_TASK_TYPES: frozenset[str] = frozenset({"trust_photo_analysis"})
VRAM_BUDGETS: dict[str, float] = { VRAM_BUDGETS: dict[str, float] = {
"trust_photo_analysis": 6000, # Q5_K_M Qwen2-VL via cf-orch; LLMRouter fallback uses 2.0 GB # moondream2 / vision-capable LLM — single image, short response
"trust_photo_analysis": 2.0,
} }
_VISION_SYSTEM_PROMPT = ( _VISION_SYSTEM_PROMPT = (
@ -53,7 +52,8 @@ def insert_task(
) -> tuple[int, bool]: ) -> tuple[int, bool]:
"""Insert a background task if no identical task is already in-flight. """Insert a background task if no identical task is already in-flight.
Returns (task_id, is_new). Uses get_connection() so WAL mode and timeout=30 apply same as all other
Snipe DB access. Returns (task_id, is_new).
""" """
conn = get_connection(db_path) conn = get_connection(db_path)
conn.row_factory = __import__("sqlite3").Row conn.row_factory = __import__("sqlite3").Row
@ -121,26 +121,32 @@ def _run_trust_photo_analysis(
p = json.loads(params or "{}") p = json.loads(params or "{}")
photo_url = p.get("photo_url", "") photo_url = p.get("photo_url", "")
listing_title = p.get("listing_title", "") listing_title = p.get("listing_title", "")
# user_db: per-user DB in cloud mode; same as db_path in local mode.
result_db = Path(p.get("user_db", str(db_path))) result_db = Path(p.get("user_db", str(db_path)))
if not photo_url: if not photo_url:
raise ValueError("trust_photo_analysis: 'photo_url' is required in params") raise ValueError("trust_photo_analysis: 'photo_url' is required in params")
# Download and base64-encode the photo
resp = requests.get(photo_url, timeout=10) resp = requests.get(photo_url, timeout=10)
resp.raise_for_status() resp.raise_for_status()
image_b64 = base64.b64encode(resp.content).decode() image_b64 = base64.b64encode(resp.content).decode()
image_data_url = f"data:image/jpeg;base64,{image_b64}"
user_prompt = "Assess this listing image." # Build user prompt with optional title context
user_prompt = "Evaluate this eBay listing photo."
if listing_title: if listing_title:
user_prompt = f"Assess this eBay listing image: {listing_title}" user_prompt = f"Evaluate this eBay listing photo for: {listing_title}"
cforch_url = os.getenv("GPU_SERVER_URL") or os.getenv("CF_ORCH_URL") # Call LLMRouter with vision capability
if cforch_url: router = LLMRouter()
raw = _assess_via_orch(cforch_url, image_data_url, user_prompt) raw = router.complete(
else: user_prompt,
raw = _assess_via_local_llm(image_b64, user_prompt) system=_VISION_SYSTEM_PROMPT,
images=[image_b64],
max_tokens=128,
)
# Parse — be lenient: strip markdown fences if present
try: try:
cleaned = raw.strip().removeprefix("```json").removeprefix("```").removesuffix("```").strip() cleaned = raw.strip().removeprefix("```json").removeprefix("```").removesuffix("```").strip()
analysis = json.loads(cleaned) analysis = json.loads(cleaned)
@ -163,54 +169,3 @@ def _run_trust_photo_analysis(
analysis.get("visible_damage"), analysis.get("visible_damage"),
analysis.get("confidence"), analysis.get("confidence"),
) )
def _assess_via_orch(cforch_url: str, image_data_url: str, user_prompt: str) -> str:
"""Run photo assessment via cf-orch task endpoint (cloud path)."""
from circuitforge_orch.client import CFOrchClient, TaskNotFound
client = CFOrchClient(cforch_url)
try:
with client.task_allocate("snipe", "image_assessment") as alloc:
resp = httpx.post(
f"{alloc.url}/v1/chat/completions",
json={
"model": alloc.model or "__auto__",
"messages": [
{
"role": "system",
"content": _VISION_SYSTEM_PROMPT,
},
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": image_data_url}},
{"type": "text", "text": user_prompt},
],
},
],
"max_tokens": 128,
},
timeout=60.0,
)
resp.raise_for_status()
return resp.json()["choices"][0]["message"]["content"]
except TaskNotFound:
log.warning(
"snipe.image_assessment not registered in cf-orch — falling back to local LLM"
)
image_b64 = image_data_url.split(",", 1)[1]
return _assess_via_local_llm(image_b64, user_prompt)
def _assess_via_local_llm(image_b64: str, user_prompt: str) -> str:
"""Run photo assessment via local LLMRouter (local/self-hosted path)."""
from app.llm.router import LLMRouter
router = LLMRouter()
return router.complete(
user_prompt,
system=_VISION_SYSTEM_PROMPT,
images=[image_b64],
max_tokens=128,
)

View file

@ -5,11 +5,9 @@ from __future__ import annotations
from pathlib import Path from pathlib import Path
from circuitforge_core.tasks.scheduler import ( from circuitforge_core.tasks.scheduler import (
TaskScheduler, # re-export for tests TaskScheduler,
)
from circuitforge_core.tasks.scheduler import (
get_scheduler as _base_get_scheduler, get_scheduler as _base_get_scheduler,
reset_scheduler, # re-export for lifespan teardown reset_scheduler, # re-export for tests
) )
from app.tasks.runner import LLM_TASK_TYPES, VRAM_BUDGETS, run_task from app.tasks.runner import LLM_TASK_TYPES, VRAM_BUDGETS, run_task

View file

@ -14,8 +14,7 @@ Intentionally ungated (free for all):
- saved_searches retention feature; friction cost outweighs gate value - saved_searches retention feature; friction cost outweighs gate value
""" """
from __future__ import annotations from __future__ import annotations
from circuitforge_core.tiers import can_use as _core_can_use, TIERS # noqa: F401
from circuitforge_core.tiers import can_use as _core_can_use # noqa: F401
# Feature key → minimum tier required. # Feature key → minimum tier required.
FEATURES: dict[str, str] = { FEATURES: dict[str, str] = {
@ -26,7 +25,6 @@ FEATURES: dict[str, str] = {
"reverse_image_search": "paid", "reverse_image_search": "paid",
"ebay_oauth": "paid", # full trust scores via eBay Trading API "ebay_oauth": "paid", # full trust scores via eBay Trading API
"background_monitoring": "paid", # limited at Paid; see LIMITS below "background_monitoring": "paid", # limited at Paid; see LIMITS below
"llm_query_builder": "paid", # inline natural-language → filter translator
# Premium tier # Premium tier
"auto_bidding": "premium", "auto_bidding": "premium",

View file

@ -1,18 +1,16 @@
import hashlib
import math
from app.db.models import Listing, TrustScore
from app.db.protocol import SharedTableProtocol
from .aggregator import Aggregator
from .metadata import MetadataScorer from .metadata import MetadataScorer
from .photo import PhotoScorer from .photo import PhotoScorer
from .aggregator import Aggregator
from app.db.models import Seller, Listing, TrustScore
from app.db.store import Store
import hashlib
import math
class TrustScorer: class TrustScorer:
"""Orchestrates metadata + photo scoring for a batch of listings.""" """Orchestrates metadata + photo scoring for a batch of listings."""
def __init__(self, shared_store: SharedTableProtocol): def __init__(self, shared_store: Store):
self._store = shared_store self._store = shared_store
self._meta = MetadataScorer() self._meta = MetadataScorer()
self._photo = PhotoScorer() self._photo = PhotoScorer()
@ -54,7 +52,6 @@ class TrustScorer:
signal_scores, is_dup, seller, signal_scores, is_dup, seller,
listing_id=listing.id or 0, listing_id=listing.id or 0,
listing_title=listing.title, listing_title=listing.title,
listing_condition=listing.condition,
times_seen=listing.times_seen, times_seen=listing.times_seen,
first_seen_at=listing.first_seen_at, first_seen_at=listing.first_seen_at,
price=listing.price, price=listing.price,

View file

@ -1,25 +1,14 @@
"""Composite score and red flag extraction.""" """Composite score and red flag extraction."""
from __future__ import annotations from __future__ import annotations
import json import json
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Optional from typing import Optional
from app.db.models import Seller, TrustScore from app.db.models import Seller, TrustScore
HARD_FILTER_AGE_DAYS = 7 HARD_FILTER_AGE_DAYS = 7
HARD_FILTER_BAD_RATIO_MIN_COUNT = 20 HARD_FILTER_BAD_RATIO_MIN_COUNT = 20
HARD_FILTER_BAD_RATIO_THRESHOLD = 0.80 HARD_FILTER_BAD_RATIO_THRESHOLD = 0.80
# Above this lifetime count the 12-month ratio may cover only a tiny recent sample,
# making a hard bad-actor flag disproportionate. Instead we emit the softer
# "declining_ratio" flag and let the composite score carry the penalty.
# Note: buyer-feedback-only accounts (e.g. longtime buyers who start selling) are a
# related edge case that requires profile-page scraping to detect properly — tracked
# in snipe#52 as a medium-term fix.
HARD_FILTER_BAD_RATIO_MAX_COUNT = 500
HARD_FILTER_BAD_RATIO_HIGH_THRESHOLD = 0.60 # catastrophically bad even for high-volume
# Sellers above this feedback count are treated as established retailers. # Sellers above this feedback count are treated as established retailers.
# Stock photo reuse (duplicate_photo) is suppressed for them — large retailers # Stock photo reuse (duplicate_photo) is suppressed for them — large retailers
# legitimately share manufacturer images across many listings. # legitimately share manufacturer images across many listings.
@ -34,9 +23,8 @@ _SCRATCH_DENT_KEYWORDS = frozenset([
"crack", "cracked", "chip", "chipped", "crack", "cracked", "chip", "chipped",
"damage", "damaged", "cosmetic damage", "damage", "damaged", "cosmetic damage",
"blemish", "wear", "worn", "worn in", "blemish", "wear", "worn", "worn in",
# Parts / condition catch-alls (also matches eBay condition field strings verbatim) # Parts / condition catch-alls
"as is", "for parts", "parts only", "spares or repair", "parts or repair", "as is", "for parts", "parts only", "spares or repair", "parts or repair",
"parts/repair", "parts or not working", "not working",
# Evasive redirects — seller hiding damage detail in listing body # Evasive redirects — seller hiding damage detail in listing body
"see description", "read description", "read listing", "see listing", "see description", "read description", "read listing", "see listing",
"see photos for", "see pics for", "see images for", "see photos for", "see pics for", "see images for",
@ -69,9 +57,9 @@ def _days_since(iso: Optional[str]) -> Optional[int]:
dt = datetime.fromisoformat(iso.replace("Z", "+00:00")) dt = datetime.fromisoformat(iso.replace("Z", "+00:00"))
# Normalize to naive UTC so both paths (timezone-aware ISO and SQLite # Normalize to naive UTC so both paths (timezone-aware ISO and SQLite
# CURRENT_TIMESTAMP naive strings) compare correctly. # CURRENT_TIMESTAMP naive strings) compare correctly.
if dt.tzinfo is None: if dt.tzinfo is not None:
dt = dt.replace(tzinfo=timezone.utc) dt = dt.replace(tzinfo=None)
return (datetime.now(timezone.utc) - dt).days return (datetime.utcnow() - dt).days
except ValueError: except ValueError:
return None return None
@ -84,7 +72,6 @@ class Aggregator:
seller: Optional[Seller], seller: Optional[Seller],
listing_id: int = 0, listing_id: int = 0,
listing_title: str = "", listing_title: str = "",
listing_condition: str = "",
times_seen: int = 1, times_seen: int = 1,
first_seen_at: Optional[str] = None, first_seen_at: Optional[str] = None,
price: float = 0.0, price: float = 0.0,
@ -126,23 +113,11 @@ class Aggregator:
# Hard filters # Hard filters
if seller and seller.account_age_days is not None and seller.account_age_days < HARD_FILTER_AGE_DAYS: if seller and seller.account_age_days is not None and seller.account_age_days < HARD_FILTER_AGE_DAYS:
red_flags.append("new_account") red_flags.append("new_account")
if seller and seller.feedback_ratio == 0.0 and seller.feedback_count > 0: if seller and (
# 12-month ratio missing from page — returning seller or buyer-only account. seller.feedback_ratio < HARD_FILTER_BAD_RATIO_THRESHOLD
# Score will be partial (metadata._feedback_ratio returns None). Soft flag and seller.feedback_count > HARD_FILTER_BAD_RATIO_MIN_COUNT
# only: do NOT fire established_bad_actor on what is likely missing data. ):
red_flags.append("no_recent_seller_data")
elif seller and seller.feedback_ratio < HARD_FILTER_BAD_RATIO_THRESHOLD:
if HARD_FILTER_BAD_RATIO_MIN_COUNT < seller.feedback_count <= HARD_FILTER_BAD_RATIO_MAX_COUNT:
# Moderate-volume account with consistently bad ratio → hard flag.
red_flags.append("established_bad_actor") red_flags.append("established_bad_actor")
elif seller.feedback_count > HARD_FILTER_BAD_RATIO_MAX_COUNT:
if seller.feedback_ratio < HARD_FILTER_BAD_RATIO_HIGH_THRESHOLD:
# High-volume seller with catastrophic ratio → still hard flag.
red_flags.append("established_bad_actor")
else:
# High-volume seller with declining but not catastrophic ratio.
# 12-month window may cover only a small recent sample — soft flag only.
red_flags.append("declining_ratio")
if seller and seller.feedback_count == 0: if seller and seller.feedback_count == 0:
red_flags.append("zero_feedback") red_flags.append("zero_feedback")
# Zero feedback is a deliberate signal, not missing data — cap composite score # Zero feedback is a deliberate signal, not missing data — cap composite score
@ -162,9 +137,7 @@ class Aggregator:
) )
if photo_hash_duplicate and not is_established_retailer: if photo_hash_duplicate and not is_established_retailer:
red_flags.append("duplicate_photo") red_flags.append("duplicate_photo")
if (listing_title and _has_damage_keywords(listing_title)) or ( if listing_title and _has_damage_keywords(listing_title):
listing_condition and _has_damage_keywords(listing_condition)
):
red_flags.append("scratch_dent_mentioned") red_flags.append("scratch_dent_mentioned")
# Staging DB signals # Staging DB signals

View file

@ -1,9 +1,7 @@
"""Five metadata trust signals, each scored 020.""" """Five metadata trust signals, each scored 020."""
from __future__ import annotations from __future__ import annotations
import json import json
from typing import Optional from typing import Optional
from app.db.models import Seller from app.db.models import Seller
ELECTRONICS_CATEGORIES = {"ELECTRONICS", "COMPUTERS_TABLETS", "VIDEO_GAMES", "CELL_PHONES"} ELECTRONICS_CATEGORIES = {"ELECTRONICS", "COMPUTERS_TABLETS", "VIDEO_GAMES", "CELL_PHONES"}
@ -44,13 +42,7 @@ class MetadataScorer:
if count < 200: return 15 if count < 200: return 15
return 20 return 20
def _feedback_ratio(self, ratio: float, count: int) -> Optional[int]: def _feedback_ratio(self, ratio: float, count: int) -> int:
# ratio=0.0 with count>0 means the 12-month percentage wasn't on the page —
# eBay omits the ratio for returning/buyer-only sellers with no recent sales.
# Treat as missing rather than "literally 0% positive" (which eBay doesn't allow
# on active accounts — those get suspended long before reaching 0%).
if ratio == 0.0 and count > 0:
return None
if ratio < 0.80 and count > 20: return 0 if ratio < 0.80 and count > 20: return 0
if ratio < 0.90: return 5 if ratio < 0.90: return 5
if ratio < 0.95: return 10 if ratio < 0.95: return 10

View file

@ -1,9 +1,7 @@
"""Perceptual hash deduplication within a result set (free tier, v0.1).""" """Perceptual hash deduplication within a result set (free tier, v0.1)."""
from __future__ import annotations from __future__ import annotations
import io
from typing import Optional from typing import Optional
import io
import requests import requests
try: try:

View file

@ -1,24 +1,19 @@
"""Main search + results page.""" """Main search + results page."""
from __future__ import annotations from __future__ import annotations
import logging import logging
import os import os
from pathlib import Path from pathlib import Path
import streamlit as st import streamlit as st
from circuitforge_core.config import load_env from circuitforge_core.config import load_env
from app.db.store import Store from app.db.store import Store
from app.platforms import PlatformAdapter, SearchFilters from app.platforms import PlatformAdapter, SearchFilters
from app.trust import TrustScorer from app.trust import TrustScorer
from app.ui.components.easter_eggs import ( from app.ui.components.filters import build_filter_options, render_filter_sidebar, FilterState
auction_hours_remaining,
check_snipe_mode,
inject_steal_css,
render_snipe_mode_banner,
)
from app.ui.components.filters import FilterState, build_filter_options, render_filter_sidebar
from app.ui.components.listing_row import render_listing_row from app.ui.components.listing_row import render_listing_row
from app.ui.components.easter_eggs import (
inject_steal_css, check_snipe_mode, render_snipe_mode_banner,
auction_hours_remaining,
)
log = logging.getLogger(__name__) log = logging.getLogger(__name__)

View file

@ -22,6 +22,7 @@ import streamlit as st
from app.db.models import Listing, TrustScore from app.db.models import Listing, TrustScore
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# 1. Konami → Snipe Mode # 1. Konami → Snipe Mode
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------

View file

@ -1,12 +1,9 @@
"""Build dynamic filter options from a result set and render the Streamlit sidebar.""" """Build dynamic filter options from a result set and render the Streamlit sidebar."""
from __future__ import annotations from __future__ import annotations
import json import json
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
import streamlit as st import streamlit as st
from app.db.models import Listing, TrustScore from app.db.models import Listing, TrustScore

View file

@ -1,17 +1,13 @@
"""Render a single listing row with trust score, badges, and error states.""" """Render a single listing row with trust score, badges, and error states."""
from __future__ import annotations from __future__ import annotations
import json import json
from typing import Optional from typing import Optional
import streamlit as st import streamlit as st
from app.db.models import Listing, Seller, TrustScore from app.db.models import Listing, TrustScore, Seller
from app.ui.components.easter_eggs import ( from app.ui.components.easter_eggs import (
auction_hours_remaining, is_steal, render_steal_banner, render_auction_notice, auction_hours_remaining,
is_steal,
render_auction_notice,
render_steal_banner,
) )

View file

@ -1,8 +1,6 @@
"""First-run wizard: collect eBay credentials and write .env.""" """First-run wizard: collect eBay credentials and write .env."""
from __future__ import annotations from __future__ import annotations
from pathlib import Path from pathlib import Path
import streamlit as st import streamlit as st
from circuitforge_core.wizard import BaseWizard from circuitforge_core.wizard import BaseWizard

View file

@ -20,19 +20,9 @@ services:
CLOUD_MODE: "true" CLOUD_MODE: "true"
CLOUD_DATA_ROOT: /devl/snipe-cloud-data CLOUD_DATA_ROOT: /devl/snipe-cloud-data
# DIRECTUS_JWT_SECRET, HEIMDALL_URL, HEIMDALL_ADMIN_TOKEN — set in .env (never commit) # DIRECTUS_JWT_SECRET, HEIMDALL_URL, HEIMDALL_ADMIN_TOKEN — set in .env (never commit)
# GPU_SERVER_URL routes LLM query builder through cf-orch for VRAM-aware scheduling.
# Override in .env to use a different coordinator URL.
GPU_SERVER_URL: "http://host.docker.internal:7700"
# SNIPE_SHARED_DB_URL — Postgres DSN for shared tables (sellers, market_comps, blocklist).
# Required for production multi-user deployments. Set in .env (never commit).
# SNIPE_SHARED_DB_URL: "postgresql://snipe:<password>@postgres:5432/snipe_shared"
CF_APP_NAME: snipe
extra_hosts:
- "host.docker.internal:host-gateway"
# No network_mode: host — isolated on snipe-cloud-net; nginx reaches it via 'api:8510' # No network_mode: host — isolated on snipe-cloud-net; nginx reaches it via 'api:8510'
volumes: volumes:
- /devl/snipe-cloud-data:/devl/snipe-cloud-data - /devl/snipe-cloud-data:/devl/snipe-cloud-data
- ./config/llm.cloud.yaml:/app/snipe/config/llm.yaml:ro
networks: networks:
- snipe-cloud-net - snipe-cloud-net

View file

@ -1,48 +1,21 @@
# compose.override.yml — dev-only additions (auto-applied by Docker Compose in dev).
# Safe to delete on a self-hosted machine — compose.yml is self-contained.
#
# What this adds over compose.yml:
# - Live source mounts so code changes take effect without rebuilding images
# - RELOAD=true to enable uvicorn --reload for the API
# - cf-orch-agent sidecar for local GPU task routing (opt-in: --profile orch)
#
# NOTE: circuitforge-core is NOT mounted here — use `./manage.sh build` to
# pick up cf-core changes. Mounting it as a bind volume would break self-hosted
# installs that don't have the sibling directory.
services: services:
api: api:
build:
context: ..
dockerfile: snipe/Dockerfile
network_mode: host
volumes: volumes:
- ../circuitforge-core:/app/circuitforge-core
- ./api:/app/snipe/api - ./api:/app/snipe/api
- ./app:/app/snipe/app - ./app:/app/snipe/app
- ./data:/app/snipe/data
- ./tests:/app/snipe/tests - ./tests:/app/snipe/tests
environment: environment:
- RELOAD=true - RELOAD=true
# Point the LLM/vision task scheduler at the local cf-orch coordinator.
# Only has effect when GPU_SERVER_URL is set (uncomment in .env, or set inline).
# - GPU_SERVER_URL=http://10.1.10.71:7700
# cf-orch agent — routes trust_photo_analysis vision tasks to the GPU coordinator. web:
# Only starts when you pass --profile orch: build:
# docker compose --profile orch up context: .
# dockerfile: docker/web/Dockerfile
# Requires a running cf-orch coordinator. Default: Heimdall at 10.1.10.71:7700. volumes:
# Override via CF_ORCH_COORDINATOR_URL in .env. - ./web/src:/app/src # not used at runtime but keeps override valid
#
# To use a locally-built cf-orch image instead of the published one:
# build:
# context: ../circuitforge-orch
# dockerfile: Dockerfile
cf-orch-agent:
image: ghcr.io/circuitforgellc/cf-orch:latest
command: >
agent
--coordinator ${CF_ORCH_COORDINATOR_URL:-http://10.1.10.71:7700}
--node-id snipe-dev
--host 0.0.0.0
--port 7701
--advertise-host 127.0.0.1
environment:
CF_COORDINATOR_URL: ${CF_ORCH_COORDINATOR_URL:-http://10.1.10.71:7700}
restart: on-failure
profiles:
- orch

View file

@ -3,14 +3,11 @@ services:
build: build:
context: .. context: ..
dockerfile: snipe/Dockerfile dockerfile: snipe/Dockerfile
# Host networking lets nginx (in the web container) reach the API at ports:
# 172.17.0.1:8510 (the Docker bridge gateway). Required — nginx.conf - "8510:8510"
# is baked into the image and hard-codes that address.
network_mode: host
env_file: .env env_file: .env
volumes: volumes:
- ./data:/app/snipe/data - ./data:/app/snipe/data
restart: unless-stopped
web: web:
build: build:

View file

@ -1,38 +0,0 @@
# config/llm.cloud.yaml
# Snipe — LLM config for the managed cloud instance (menagerie)
#
# Mounted read-only into the cloud API container at /app/config/llm.yaml
# (see compose.cloud.yml). Personal fine-tunes and local-only backends
# (claude_code, copilot) are intentionally excluded here.
#
# CF Orchestrator routes both ollama and vllm allocations for VRAM-aware
# scheduling. GPU_SERVER_URL must be set in .env for allocations to resolve;
# if cf-orch is unreachable the backend falls back to its static base_url.
#
# Model choice for query builder: llama3.1:8b
# - Reliable instruction following and JSON output
# - No creative fine-tuning drift (unlike writer models in the pool)
# - Fits comfortably in 8 GB VRAM alongside other services
backends:
ollama:
type: openai_compat
base_url: http://host.docker.internal:11434/v1
api_key: ollama
model: llama3.1:8b
enabled: true
supports_images: false
cf_orch:
service: ollama
ttl_s: 300
anthropic:
type: anthropic
api_key_env: ANTHROPIC_API_KEY
model: claude-haiku-4-5-20251001
enabled: false
supports_images: false
fallback_order:
- ollama
- anthropic

View file

@ -1,60 +0,0 @@
# config/llm.yaml.example
# Snipe — LLM backend configuration
#
# Copy to config/llm.yaml and edit for your setup.
# The query builder ("Search with AI") uses the text fallback_order.
#
# Backends are tried in fallback_order until one succeeds.
# Set enabled: false to skip a backend without removing it.
#
# CF Orchestrator (cf-orch): when CF_ORCH_URL is set in the environment and a
# backend has a cf_orch block, allocations are routed through cf-orch for
# VRAM-aware scheduling. Omit cf_orch to hit the backend directly.
backends:
anthropic:
type: anthropic
api_key_env: ANTHROPIC_API_KEY
model: claude-haiku-4-5-20251001
enabled: false
supports_images: false
openai:
type: openai_compat
base_url: https://api.openai.com/v1
api_key_env: OPENAI_API_KEY
model: gpt-4o-mini
enabled: false
supports_images: false
ollama:
type: openai_compat
base_url: http://localhost:11434/v1
api_key: ollama
model: llama3.1:8b
enabled: true
supports_images: false
# Uncomment to route through cf-orch for VRAM-aware scheduling:
# cf_orch:
# service: ollama
# ttl_s: 300
# ── cf-orch trunk services ─────────────────────────────────────────────────
# Allocate via cf-orch; the router calls the allocated service directly.
# Set CF_ORCH_URL (env) or url below to activate.
cf_text:
type: openai_compat
enabled: false
base_url: http://localhost:8008/v1
model: __auto__
api_key: any
supports_images: false
cf_orch:
service: cf-text
model_candidates: []
ttl_s: 3600
fallback_order:
- anthropic
- openai
- ollama

View file

@ -16,10 +16,6 @@ server {
# Forward the session header injected by Caddy from the cf_session cookie. # Forward the session header injected by Caddy from the cf_session cookie.
# Caddy adds: header_up X-CF-Session {http.request.cookie.cf_session} # Caddy adds: header_up X-CF-Session {http.request.cookie.cf_session}
proxy_set_header X-CF-Session $http_x_cf_session; proxy_set_header X-CF-Session $http_x_cf_session;
# eBay search + comps can take 60-90s (Marketplace Insights 404 → Browse fallback).
# Default 60s proxy_read_timeout drops slow searches with a NetworkError on the client.
proxy_read_timeout 120s;
proxy_send_timeout 120s;
} }
# index.html — never cache; ensures clients always get the latest entry point # index.html — never cache; ensures clients always get the latest entry point

View file

@ -1,39 +0,0 @@
# eBay API Keys (Optional)
Snipe works without any credentials using its Playwright scraper fallback. Adding eBay API credentials unlocks faster searches and higher rate limits.
## What API keys enable
| Feature | Without keys | With keys |
|---------|-------------|-----------|
| Listing search | Playwright scraper | eBay Browse API (faster, higher limits) |
| Market comps (completed sales) | Not available | eBay Marketplace Insights API |
| Seller account data | BTF scraper (Xvfb) | BTF scraper (same — eBay API doesn't expose join date) |
## Getting credentials
1. Create a developer account at [developer.ebay.com](https://developer.ebay.com/my/keys)
2. Create a new application (choose **Production**)
3. Copy your **App ID (Client ID)** and **Cert ID (Client Secret)**
## Configuration
Add your credentials to `.env`:
```bash
EBAY_APP_ID=YourAppID-...
EBAY_CERT_ID=YourCertID-...
```
Then restart:
```bash
./manage.sh restart
```
## Verifying
After restart, the search bar shows **API** as available in the data source selector. The auto mode will use the API by default.
!!! note
The Marketplace Insights API (for completed sales comps) requires an approved eBay developer account. New accounts may not have access. Snipe gracefully falls back to Browse API results when Insights returns 403 or 404.

View file

@ -1,102 +0,0 @@
# Installation
## Requirements
- Docker with Compose plugin
- Git
- No API keys required to get started
## One-line install
```bash
bash <(curl -fsSL https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/raw/branch/main/install.sh)
```
This clones the repo to `~/snipe` and starts the stack. Open **http://localhost:8509** when it completes.
## Manual install
Snipe's API image is built from a context that includes `circuitforge-core`. Both repos must sit as siblings:
```
workspace/
├── snipe/ ← this repo
└── circuitforge-core/ ← required sibling
```
```bash
mkdir snipe-workspace && cd snipe-workspace
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/snipe.git
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/circuitforge-core.git
cd snipe
cp .env.example .env
./manage.sh start
```
## Managing the stack
```bash
./manage.sh start # build and start all containers
./manage.sh stop # stop containers
./manage.sh restart # rebuild and restart
./manage.sh status # container health
./manage.sh logs # tail logs
./manage.sh open # open in browser
```
## Updating
```bash
git pull
./manage.sh restart
```
## Ports
| Service | Default port |
|---------|-------------|
| Web UI | 8509 |
| API | 8510 |
Both ports are configurable in `.env`.
---
## No-Docker install (bare metal)
Run `install.sh --bare-metal` to skip Docker and install via conda or venv instead.
This sets up the Python environment, builds the Vue frontend, and writes helper scripts.
**Requirements:** Python 3.11+, Node.js 20+, `xvfb` (for the eBay scraper).
```bash
bash <(curl -fsSL https://git.opensourcesolarpunk.com/Circuit-Forge/snipe/raw/branch/main/install.sh) --bare-metal
```
After install, you get two scripts:
| Script | What it does |
|--------|-------------|
| `./start-local.sh` | Start the FastAPI API on port 8510 |
| `./serve-ui.sh` | Serve the built frontend with `python3 -m http.server 8509` (dev only) |
`serve-ui.sh` is single-threaded and suitable for testing only. For a real deployment, use nginx.
### nginx config (production bare-metal)
Install nginx, copy the sample config, and reload:
```bash
sudo cp docs/nginx-self-hosted.conf /etc/nginx/sites-available/snipe
sudo ln -s /etc/nginx/sites-available/snipe /etc/nginx/sites-enabled/snipe
# Edit the file — update `root` to your actual web/dist path
sudo nginx -t && sudo systemctl reload nginx
```
See [`docs/nginx-self-hosted.conf`](../nginx-self-hosted.conf) for the full config with TLS notes.
### Chromium / Xvfb note
Snipe uses headed Chromium via Xvfb to bypass Kasada (the anti-bot layer on eBay seller profile pages). If Chromium is not detected, the scraper falls back to the eBay Browse API — add `EBAY_APP_ID` / `EBAY_CERT_ID` to `.env` so that fallback has credentials.
The installer detects and installs Xvfb automatically on Debian/Ubuntu/Fedora. Chromium is installed via `playwright install chromium`. macOS is not supported for the scraper path.

View file

@ -1,39 +0,0 @@
# Quick Start
## 1. Run a search
Type a query into the search bar and press **Search** or hit Enter.
!!! tip
Start broad (`vintage camera`) then narrow with keyword filters once you see results. The must-include and must-exclude fields let you refine without re-searching from scratch.
## 2. Read the trust badge
Each listing card shows a trust badge in the top-right corner:
| Badge | Meaning |
|-------|---------|
| Green (70100) | Established seller, no major concerns |
| Yellow (4069) | Some signals missing or marginal |
| Red (039) | Multiple red flags — proceed carefully |
| `STEAL` label | Price significantly below market median |
A spinning indicator below the badge means enrichment is still in progress (account age is being fetched). Scores update automatically when enrichment completes.
## 3. Check red flags
Red flag pills appear below the listing title when Snipe detects a concern. Hover or tap a flag for a plain-language explanation.
## 4. Click through to eBay
Listing titles link directly to eBay. In cloud mode, links include an affiliate code that supports Snipe's development at no cost to you. You can opt out in Settings.
## 5. Filter results
Use the sidebar filters to narrow results without re-running the eBay search:
- **Min trust score** — slider to hide low-confidence listings
- **Min account age / Min feedback** — hide new or low-volume sellers
- **Hide listings checkboxes** — hide new accounts, suspicious prices, duplicate photos, damage mentions, long-on-market, significant price drop
These filters apply instantly to the current result set. Use the search bar to change the underlying eBay query.

View file

@ -1,33 +0,0 @@
# Snipe
**eBay trust scoring before you bid.**
![Snipe landing hero](screenshots/01-hero.png)
Snipe scores eBay listings and sellers for trustworthiness before you place a bid. Paste a search query, get results with trust scores, and know exactly which listings are worth your time.
## What it catches
- **New accounts** selling high-value items with no track record
- **Suspicious prices** — listings priced far below completed sales
- **Duplicate photos** — images copy-pasted from other listings (perceptual hash deduplication)
- **Damage buried in titles** — scratch, dent, untested, for parts, and similar
- **Known bad actors** — sellers on the community blocklist
## How it works
![Search results with trust scores](screenshots/02-results.png)
Each listing gets a composite trust score from 0100 based on five seller signals: account age, feedback count, feedback ratio, price vs. market, and category history. Red flags are surfaced alongside the score, not buried in it.
## Free, no account required
Search and scoring work without creating an account. Community features (reporting sellers, importing blocklists) require a free account.
## Quick links
- [Installation](getting-started/installation.md)
- [Understanding trust scores](user-guide/trust-scores.md)
- [Red flags reference](user-guide/red-flags.md)
- [Cloud demo](https://menagerie.circuitforge.tech/snipe)
- [Source code](https://git.opensourcesolarpunk.com/Circuit-Forge/snipe)

View file

@ -1,58 +0,0 @@
# nginx config for Snipe — bare-metal self-hosted (no Docker).
#
# Usage:
# sudo cp docs/nginx-self-hosted.conf /etc/nginx/sites-available/snipe
# # Edit: update `root` to your actual web/dist path and `server_name` to your hostname
# sudo ln -s /etc/nginx/sites-available/snipe /etc/nginx/sites-enabled/snipe
# sudo nginx -t && sudo systemctl reload nginx
#
# Assumes:
# - The Snipe FastAPI API is running on 127.0.0.1:8510 (./start-local.sh)
# - The Vue frontend was built by install.sh into web/dist/
# - TLS termination is handled separately (Caddy, certbot, or upstream proxy)
#
# For TLS with Let's Encrypt, run:
# sudo certbot --nginx -d your.domain.com
# Certbot will add the ssl_certificate lines automatically.
server {
listen 80;
server_name your.domain.com; # replace or use _ for catch-all
# Path to the Vue production build — update to match your install directory
root /home/youruser/snipe/snipe/web/dist;
index index.html;
# Proxy all /api/ requests to the FastAPI backend
location /api/ {
proxy_pass http://127.0.0.1:8510;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# SSE (Server-Sent Events) — live trust score updates
# These are long-lived streaming responses; disable buffering.
proxy_buffering off;
proxy_cache off;
proxy_read_timeout 120s;
}
# index.html — never cache; ensures clients always get the latest entry point
# after a deployment (JS/CSS chunks are content-hashed so they cache forever)
location = /index.html {
add_header Cache-Control "no-cache, no-store, must-revalidate";
try_files $uri /index.html;
}
# SPA fallback — all unknown paths serve index.html so Vue Router handles routing
location / {
try_files $uri $uri/ /index.html;
}
# Long-term cache for content-hashed static assets
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff2?)$ {
expires 1y;
add_header Cache-Control "public, immutable";
}
}

View file

@ -1 +0,0 @@
(function(){var s=document.createElement("script");s.defer=true;s.dataset.domain="docs.circuitforge.tech,circuitforge.tech";s.dataset.api="https://analytics.circuitforge.tech/api/event";s.src="https://analytics.circuitforge.tech/js/script.js";document.head.appendChild(s);})();

View file

@ -1,66 +0,0 @@
# Architecture
## Stack
| Layer | Technology |
|-------|-----------|
| Frontend | Vue 3, Pinia, UnoCSS |
| API | FastAPI (Python), host networking |
| Database | SQLite (WAL mode) |
| Scraper | Playwright + Chromium + Xvfb |
| Container | Docker Compose |
## Data flow
```mermaid
graph LR
User -->|search query| VueSPA
VueSPA -->|GET /api/search| FastAPI
FastAPI -->|Browse API or Playwright| eBay
eBay --> FastAPI
FastAPI -->|score_batch| TrustScorer
TrustScorer --> FastAPI
FastAPI -->|BTF enrich queue| XvfbScraper
XvfbScraper -->|seller profile| eBayProfile
eBayProfile --> XvfbScraper
XvfbScraper -->|account_age update| SQLite
FastAPI -->|SSE push| VueSPA
```
## Database layout
Snipe uses two SQLite databases in cloud mode:
| Database | Contents |
|----------|---------|
| `shared.db` | Sellers, listings, market comps, community signals, scammer blocklist |
| `user.db` | Trust scores, saved searches, user preferences, background tasks |
In local (self-hosted) mode, everything uses a single `snipe.db`.
WAL (Write-Ahead Logging) mode is enabled on all connections for concurrent reader safety.
## Seller enrichment pipeline
eBay's Browse API returns listings without seller account ages. Snipe fetches account ages by loading the seller's eBay profile page in a headed Chromium instance via Xvfb.
Each enrichment session uses a unique Xvfb display number (`:200``:299`, cycling) to prevent lock file collisions across parallel sessions. Kasada bot protection blocks headless Chrome and curl-based requests — only a full headed browser session passes.
## Affiliate URL wrapping
All listing URLs are wrapped with an eBay Partner Network (EPN) affiliate code before being returned to the frontend. Resolution order:
1. User opted out → plain URL
2. User has BYOK EPN ID (Premium) → wrap with user's ID
3. CF affiliate ID configured in `.env` → wrap with CF's ID
4. Not configured → plain URL
## Licensing
| Layer | License |
|-------|---------|
| Discovery pipeline (scraper, trust scoring, search) | MIT |
| AI features (photo analysis, description reasoning) | BSL 1.1 |
| Fine-tuned model weights | Proprietary |
BSL 1.1 is free for personal non-commercial self-hosting. SaaS re-hosting requires a commercial license. Converts to MIT after 4 years.

View file

@ -1,31 +0,0 @@
# Tier System
Snipe uses CircuitForge's three-tier model.
## Tiers
| Tier | Price | Key features |
|------|-------|-------------|
| **Free** | Free | Search, trust scoring, red flags, blocklist, market comps, affiliate links, saved searches |
| **Paid** | $5/mo or $129 lifetime | Photo analysis, background monitoring (up to 5 searches), serial number check |
| **Premium** | $10/mo or $249 lifetime | All Paid features, background monitoring (up to 25), custom affiliate ID (BYOK EPN) |
## Free tier philosophy
Snipe's core trust-scoring pipeline — the part that actually catches scammers — is entirely free and requires no account. This is intentional.
More users = more community blocklist data = better protection for everyone. The free tier drives the network effect that makes the paid features more valuable.
## Self-hosted
Running Snipe yourself? All features are available with no tier gates in local mode. Bring your own LLM (Ollama compatible) to unlock photo analysis and description reasoning on your own hardware.
## BYOK (Bring Your Own Key)
Premium subscribers can supply:
- **Local LLM endpoint** — any OpenAI-compatible server (Ollama, vLLM, LM Studio) unlocks AI features on Free tier
- **eBay Partner Network campaign ID** — your affiliate revenue instead of Snipe's
## Cloud trial
15-day free trial of Paid tier on first signup. No credit card required.

View file

@ -1,84 +0,0 @@
# Trust Score Algorithm
## Signal scoring
Each signal contributes 020 points to the composite score.
### account_age
| Days old | Score |
|----------|-------|
| < 7 | 0 (triggers `new_account` hard flag) |
| 730 | 5 |
| 3090 | 10 |
| 90365 | 15 |
| > 365 | 20 |
Data source: eBay profile page (BTF scraper via headed Chromium + Xvfb — eBay API does not expose account registration date).
### feedback_count
| Count | Score |
|-------|-------|
| 0 | 0 (triggers `zero_feedback` hard flag, score capped at 35) |
| 19 | 5 |
| 1049 | 10 |
| 50199 | 15 |
| 200+ | 20 |
### feedback_ratio
| Ratio | Score |
|-------|-------|
| < 80% (with 20+ reviews) | 0 (triggers `established_bad_actor`) |
| < 90% | 5 |
| 9094% | 10 |
| 9598% | 15 |
| 99100% | 20 |
### price_vs_market
Compares listing price to the median of recent completed sales from eBay Marketplace Insights API.
| Price vs. median | Score |
|-----------------|-------|
| < 40% | 0 (triggers `suspicious_price` flag) |
| 4059% | 5 |
| 6079% | 10 |
| 80120% | 20 (normal range) |
| 121149% | 15 |
| 150%+ | 10 |
`suspicious_price` flag is suppressed when the market price distribution is too wide (standard deviation > 50% of median) — this prevents false positives on heterogeneous search results.
When no market data is available, this signal returns `None` and is excluded from the composite.
### category_history
Derived from the seller's recent listing history (categories of their sold items):
| Result | Score |
|--------|-------|
| Seller has history in this category | 20 |
| Seller sells cross-category (generalist) | 10 |
| No category history available | None (excluded from composite) |
## Composite calculation
```
composite = (sum of available signal scores) / (20 × count of available signals) × 100
```
This ensures missing signals don't penalize a seller — only available signals count toward the denominator.
## Zero-feedback cap
When `feedback_count == 0`, the composite is hard-capped at **35** after the standard calculation. A 0-feedback seller cannot score above 35 regardless of other signals.
## Partial scores
A score is marked **partial** when one or more signals are `None` (not yet available). The score is recalculated and the partial flag is cleared when enrichment completes.
## Red flag override
Red flags are evaluated independently of the composite score. A seller can have a high composite score and still trigger red flags — for example, a long-established seller with a suspicious-priced listing and duplicate photos.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 118 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 191 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 164 KiB

View file

@ -1,34 +0,0 @@
# Community Blocklist
The blocklist is a shared database of sellers flagged by Snipe users. When a blocklisted seller appears in search results, their listing card is marked with an `established_bad_actor` flag.
## Viewing the blocklist
Navigate to **Blocklist** in the sidebar to see all reported sellers, with usernames, platforms, and optional reasons.
## Reporting a seller
On any listing card, click the **Block** button (shield icon) to report the seller. You can optionally add a reason (e.g. "sent counterfeit item", "never shipped").
!!! note
In cloud mode, blocking requires a signed-in account. Anonymous users can view the blocklist but cannot report sellers.
## Importing a blocklist
The Blocklist view has an **Import CSV** button. The accepted format:
```csv
platform,platform_seller_id,username,reason
ebay,seller123,seller123,counterfeit item
ebay,badactor99,badactor99,
```
The `reason` column is optional. `platform` defaults to `ebay` if omitted.
## Exporting the blocklist
Click **Export CSV** in the Blocklist view to download the current blocklist. Use this to back up, share with others, or import into another Snipe instance.
## Blocklist sync (roadmap)
Batch reporting to eBay's Trust & Safety team is on the roadmap (issue #4). This would allow community-flagged sellers to be reported directly to eBay from within Snipe.

View file

@ -1,58 +0,0 @@
# Red Flags
Red flags appear as pills on listing cards when Snipe detects a concern. Each flag is independent — a listing can have multiple flags at once.
## Hard red flags
These override the composite score display with a strong visual warning.
### `zero_feedback`
Seller has received zero feedback. Score is capped at 35.
### `new_account`
Account registered within the last 7 days. Extremely high fraud indicator for high-value listings.
### `established_bad_actor`
Feedback ratio below 80% with 20 or more reviews. A sustained pattern of negative feedback from an established seller.
## Soft flags
Shown as warnings — not automatic disqualifiers, but worth investigating.
### `account_under_30_days`
Account is less than 30 days old. Less severe than `new_account` but worth noting for high-value items.
### `low_feedback_count`
Fewer than 10 feedback ratings total. Seller is new to eBay or rarely transacts.
### `suspicious_price`
Listing price is more than 50% below the market median from recent completed sales.
!!! note
This flag is suppressed automatically when the search returns a heterogeneous price range — for example, a search that mixes laptop generations spanning $200$2,000. In that case, the median is not meaningful and flagging would produce false positives.
### `duplicate_photo`
The same image (by perceptual hash) appears on another listing. Common in scams where photos are lifted from legitimate listings.
### `scratch_dent_mentioned`
The title or description contains keywords indicating cosmetic damage, functional problems, or evasive language:
- Damage: *scratch, dent, crack, chip, broken, damaged*
- Functional: *untested, for parts, parts only, as-is, not working*
- Evasive: *read description, see description, sold as-is*
### `long_on_market`
The listing has been seen 5 or more times over 14 or more days without selling. A listing that isn't moving may be overpriced or have undisclosed problems.
### `significant_price_drop`
The current price is more than 20% below the price when Snipe first saw this listing. Sudden drops can indicate seller desperation — or a motivated seller — depending on context.
## Triple Red
When a listing hits all three of these simultaneously:
- `new_account` OR `account_under_30_days`
- `suspicious_price`
- `duplicate_photo` OR `zero_feedback` OR `established_bad_actor` OR `scratch_dent_mentioned`
The card gets a **pulsing red border glow** to make it impossible to miss in a crowded results grid.

View file

@ -1,56 +0,0 @@
# Searching
## Basic search
Type a query and press **Search**. Snipe fetches listings from eBay and scores each seller in parallel.
Result count depends on the **Pages to fetch** setting (1 page = up to 200 listings). More pages means a more complete picture but a longer wait.
## Keyword modes
The must-include field has three modes:
| Mode | Behavior |
|------|---------|
| **All** | Every term must appear in results (eBay AND search) |
| **Any** | At least one term must appear (eBay OR search) |
| **Groups** | Comma-separated groups, each searched separately and merged |
Groups mode is the most powerful. Use it to search for variations that eBay's relevance ranking might drop:
```
16gb, 32gb
RTX 4090, 4090 founders
```
This sends two separate eBay queries and deduplicates the results by listing ID.
## Must-exclude
Terms in the must-exclude field are forwarded to eBay on re-search. Common uses:
```
broken, parts only, for parts, untested, cracked
```
!!! note
Must-exclude applies on re-search (it goes to eBay). The **Hide listings: Scratch/dent mentioned** sidebar filter applies instantly to current results using Snipe's own detection logic, which is more comprehensive than eBay's keyword exclusion.
## Filters sidebar
The sidebar has two sections:
**eBay Search** — settings forwarded to eBay on re-search:
- Category filter
- Price range (min/max)
- Pages to fetch
- Data source (Auto / API / Scraper)
**Filter Results** — applied instantly to current results:
- Min trust score slider
- Min account age / Min feedback count
- Hide listings checkboxes
## Saved searches
Click the bookmark icon next to the Search button to save a search with its current filter settings. Saved searches appear in the **Saved** view and can be re-run with one click, restoring all filters.

View file

@ -1,25 +0,0 @@
# Settings
Navigate to **Settings** in the sidebar to access preferences.
## Community
### Trust score feedback
Shows "This score looks right / wrong" buttons on each listing card. Your feedback is recorded anonymously and used to improve trust scoring for all users.
This is opt-in and enabled by default.
## Affiliate Links (cloud accounts only)
### Opt out of affiliate links
When enabled, listing links go directly to eBay without an affiliate code. Your purchases won't generate revenue for Snipe's development.
By default, Snipe includes an affiliate code in eBay links at no cost to you — you pay the same price either way.
### Custom affiliate ID (Premium)
Premium subscribers can supply their own eBay Partner Network (EPN) campaign ID. When set, your eBay purchases through Snipe links generate revenue for your own EPN account instead of Snipe's.
This requires an active EPN account at [partnernetwork.ebay.com](https://partnernetwork.ebay.com).

View file

@ -1,39 +0,0 @@
# Trust Scores
## How scoring works
Each listing gets a composite trust score from 0100, built from five signals:
| Signal | Max points | What it measures |
|--------|-----------|-----------------|
| `account_age` | 20 | Days since the seller's eBay account was registered |
| `feedback_count` | 20 | Total feedback received (volume proxy for experience) |
| `feedback_ratio` | 20 | Percentage of positive feedback |
| `price_vs_market` | 20 | How the listing price compares to recent completed sales |
| `category_history` | 20 | Whether the seller has a history in this item category |
The composite score is the sum of available signals divided by the maximum possible from available signals. Missing signals don't penalize the seller — they reduce the max rather than adding a zero.
## Score bands
| Score | Label | Meaning |
|-------|-------|---------|
| 70100 | Green | Established seller, no major concerns |
| 4069 | Yellow | Some signals marginal or missing |
| 039 | Red | Multiple red flags — proceed carefully |
## Zero-feedback cap
A seller with zero feedback is hard-capped at a composite score of **35**, regardless of other signals. Zero feedback is the single strongest indicator of a fraudulent or new account, and it would be misleading to allow such a seller to score higher based on price alignment alone.
## Partial scores
When account age hasn't yet been enriched (the BTF scraper is still running), the score is marked **partial** and shown with a spinning indicator. Partial scores are based on available signals only and update automatically when enrichment completes — typically within 3060 seconds per seller.
## STEAL badge
The **STEAL** badge appears when a listing's price is significantly below the market median from recently completed sales. This is a useful signal for buyers, but it can also indicate a scam — always cross-reference with the trust score and red flags.
## Market comps
Market price data comes from eBay's Marketplace Insights API (completed sales). When this API is unavailable (requires an approved eBay developer account), Snipe falls back to listing prices from the Browse API, which is less accurate. The market price shown in search results reflects whichever source was available.

View file

@ -1,384 +0,0 @@
#!/usr/bin/env bash
# Snipe — self-hosted installer
#
# Supports two install paths:
# Docker (recommended) — everything in containers, no system Python deps required
# Bare metal — conda or pip venv + uvicorn, for machines without Docker
#
# Usage:
# bash install.sh # interactive (auto-detects Docker)
# bash install.sh --docker # Docker Compose setup only
# bash install.sh --bare-metal # conda or venv + uvicorn
# bash install.sh --help
#
# No account or API key required. eBay credentials are optional (faster searches).
set -euo pipefail
# ── Terminal colours ───────────────────────────────────────────────────────────
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
DIM='\033[2m'
NC='\033[0m'
info() { echo -e "${BLUE}${NC} $*"; }
ok() { echo -e "${GREEN}${NC} $*"; }
warn() { echo -e "${YELLOW}${NC} $*"; }
error() { echo -e "${RED}${NC} $*" >&2; }
header() { echo; echo -e "${BOLD}$*${NC}"; printf '%0.s─' {1..60}; echo; }
dim() { echo -e "${DIM}$*${NC}"; }
ask() { echo -e "${CYAN}?${NC} ${BOLD}$*${NC}"; }
fail() { error "$*"; exit 1; }
# ── Paths ──────────────────────────────────────────────────────────────────────
SNIPE_CONFIG_DIR="${HOME}/.config/circuitforge"
SNIPE_ENV_FILE="${SNIPE_CONFIG_DIR}/snipe.env"
SNIPE_VENV_DIR="${SNIPE_CONFIG_DIR}/venv"
FORGEJO="https://git.opensourcesolarpunk.com/Circuit-Forge"
# Default install directory. Overridable:
# SNIPE_DIR=/opt/snipe bash install.sh
SNIPE_INSTALL_DIR="${SNIPE_DIR:-${HOME}/snipe}"
# ── Argument parsing ───────────────────────────────────────────────────────────
MODE_FORCE=""
for arg in "$@"; do
case "$arg" in
--bare-metal) MODE_FORCE="bare-metal" ;;
--docker) MODE_FORCE="docker" ;;
--help|-h)
echo "Usage: bash install.sh [--docker|--bare-metal|--help]"
echo
echo " --docker Docker Compose install (recommended)"
echo " --bare-metal conda or pip venv + uvicorn"
echo " --help Show this message"
echo
echo " Set SNIPE_DIR=/path to change the install directory (default: ~/snipe)"
exit 0
;;
*) echo "Unknown argument: $arg" >&2; exit 1 ;;
esac
done
# ── Banner ─────────────────────────────────────────────────────────────────────
echo
echo -e "${BOLD} 🎯 Snipe — eBay listing intelligence${NC}"
echo -e "${DIM} Bid with confidence. Privacy-first, no account required.${NC}"
echo -e "${DIM} Part of the Circuit Forge LLC suite (BSL 1.1)${NC}"
echo
# ── System checks ──────────────────────────────────────────────────────────────
header "System checks"
HAS_DOCKER=false
HAS_CONDA=false
HAS_CONDA_CMD=""
HAS_PYTHON=false
HAS_NODE=false
HAS_CHROMIUM=false
HAS_XVFB=false
command -v git >/dev/null 2>&1 || fail "Git is required. Install: sudo apt-get install git"
ok "Git found"
docker compose version >/dev/null 2>&1 && HAS_DOCKER=true
if $HAS_DOCKER; then ok "Docker (Compose plugin) found"; fi
# Detect conda / mamba / micromamba in preference order
for _c in conda mamba micromamba; do
if command -v "$_c" >/dev/null 2>&1; then
HAS_CONDA=true
HAS_CONDA_CMD="$_c"
ok "Conda manager found: $_c"
break
fi
done
# Python 3.11+ check
if command -v python3 >/dev/null 2>&1; then
_py_ok=$(python3 -c "import sys; print(sys.version_info >= (3,11))" 2>/dev/null || echo "False")
if [[ "$_py_ok" == "True" ]]; then
HAS_PYTHON=true
ok "Python 3.11+ found ($(python3 --version))"
else
warn "Python found but version is below 3.11 ($(python3 --version)) — bare-metal path may fail"
fi
fi
command -v node >/dev/null 2>&1 && HAS_NODE=true
if $HAS_NODE; then ok "Node.js found ($(node --version))"; fi
# Chromium / Google Chrome — needed for the Kasada-bypass scraper
for _chrome in google-chrome chromium-browser chromium; do
if command -v "$_chrome" >/dev/null 2>&1; then
HAS_CHROMIUM=true
ok "Chromium/Chrome found: $_chrome"
break
fi
done
if ! $HAS_CHROMIUM; then
warn "Chromium / Google Chrome not found."
warn "Snipe uses headed Chromium + Xvfb to bypass eBay's Kasada anti-bot."
warn "The installer will install Chromium via Playwright. If that fails,"
warn "add eBay API credentials to .env to use the API adapter instead."
fi
# Xvfb — virtual framebuffer for headed Chromium on headless servers
command -v Xvfb >/dev/null 2>&1 && HAS_XVFB=true
if $HAS_XVFB; then ok "Xvfb found"; fi
# ── Mode selection ─────────────────────────────────────────────────────────────
header "Install mode"
INSTALL_MODE=""
if [[ -n "$MODE_FORCE" ]]; then
INSTALL_MODE="$MODE_FORCE"
info "Mode forced: $INSTALL_MODE"
elif $HAS_DOCKER; then
INSTALL_MODE="docker"
ok "Docker available — using Docker install (recommended)"
dim " Pass --bare-metal to override"
elif $HAS_PYTHON; then
INSTALL_MODE="bare-metal"
warn "Docker not found — using bare-metal install"
else
fail "Docker or Python 3.11+ is required. Install Docker: https://docs.docker.com/get-docker/"
fi
# ── Clone repos ───────────────────────────────────────────────────────────────
header "Clone repositories"
# compose.yml and the Dockerfile both use context: .. (parent directory), so
# snipe/ and circuitforge-core/ must be siblings inside SNIPE_INSTALL_DIR.
REPO_DIR="$SNIPE_INSTALL_DIR"
SNIPE_DIR_ACTUAL="$REPO_DIR/snipe"
CORE_DIR="$REPO_DIR/circuitforge-core"
_clone_or_pull() {
local label="$1" url="$2" dest="$3"
if [[ -d "$dest/.git" ]]; then
info "$label already cloned — pulling latest..."
git -C "$dest" pull --ff-only
else
info "Cloning $label..."
mkdir -p "$(dirname "$dest")"
git clone "$url" "$dest"
fi
ok "$label$dest"
}
_clone_or_pull "snipe" "$FORGEJO/snipe.git" "$SNIPE_DIR_ACTUAL"
_clone_or_pull "circuitforge-core" "$FORGEJO/circuitforge-core.git" "$CORE_DIR"
# ── Config file ────────────────────────────────────────────────────────────────
header "Configuration"
ENV_FILE="$SNIPE_DIR_ACTUAL/.env"
if [[ ! -f "$ENV_FILE" ]]; then
cp "$SNIPE_DIR_ACTUAL/.env.example" "$ENV_FILE"
# Disable webhook signature verification for local installs
# (no production eBay key yet — the endpoint won't be registered)
sed -i 's/^EBAY_WEBHOOK_VERIFY_SIGNATURES=true/EBAY_WEBHOOK_VERIFY_SIGNATURES=false/' "$ENV_FILE"
ok ".env created from .env.example"
echo
dim " Snipe works out of the box with no API keys (scraper mode)."
dim " Add EBAY_APP_ID / EBAY_CERT_ID later for faster searches (optional)."
dim " Edit: $ENV_FILE"
echo
else
info ".env already exists — skipping (delete to reset defaults)"
fi
# ── License key (optional) ─────────────────────────────────────────────────────
header "CircuitForge license key (optional)"
dim " Snipe is free to self-host. A Paid/Premium key unlocks cloud features"
dim " (photo analysis, eBay OAuth). Skip this if you don't have one."
echo
ask "Enter your license key, or press Enter to skip:"
read -r _license_key || true
if [[ -n "${_license_key:-}" ]]; then
_key_re='^CFG-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}$'
if echo "$_license_key" | grep -qP "$_key_re" 2>/dev/null || \
echo "$_license_key" | grep -qE "$_key_re" 2>/dev/null; then
# Append / uncomment Heimdall vars in .env
if grep -q "^# HEIMDALL_URL=" "$ENV_FILE" 2>/dev/null; then
sed -i "s|^# HEIMDALL_URL=.*|HEIMDALL_URL=https://license.circuitforge.tech|" "$ENV_FILE"
else
echo "HEIMDALL_URL=https://license.circuitforge.tech" >> "$ENV_FILE"
fi
# Write or replace CF_LICENSE_KEY
if grep -q "^CF_LICENSE_KEY=" "$ENV_FILE" 2>/dev/null; then
sed -i "s|^CF_LICENSE_KEY=.*|CF_LICENSE_KEY=${_license_key}|" "$ENV_FILE"
else
echo "CF_LICENSE_KEY=${_license_key}" >> "$ENV_FILE"
fi
ok "License key saved to .env"
else
warn "Key format not recognised (expected CFG-XXXX-XXXX-XXXX-XXXX) — skipping."
warn "Edit $ENV_FILE to add it manually."
fi
else
info "No license key entered — self-hosted free tier."
fi
# ── Docker install ─────────────────────────────────────────────────────────────
_install_docker() {
header "Docker install"
cd "$SNIPE_DIR_ACTUAL"
info "Building Docker images (~1 GB download on first run)..."
docker compose build
info "Starting Snipe..."
docker compose up -d
echo
ok "Snipe is running!"
printf '%0.s─' {1..60}; echo
echo -e " ${GREEN}Web UI:${NC} http://localhost:8509"
echo -e " ${GREEN}API:${NC} http://localhost:8510/docs"
echo
echo -e " ${DIM}Manage: cd $SNIPE_DIR_ACTUAL && ./manage.sh {start|stop|restart|logs|test}${NC}"
printf '%0.s─' {1..60}; echo
echo
}
# ── Bare-metal install ─────────────────────────────────────────────────────────
_install_xvfb() {
if $HAS_XVFB; then return; fi
info "Installing Xvfb (required for eBay scraper)..."
if command -v apt-get >/dev/null 2>&1; then
sudo apt-get install -y --no-install-recommends xvfb
ok "Xvfb installed"
elif command -v dnf >/dev/null 2>&1; then
sudo dnf install -y xorg-x11-server-Xvfb
ok "Xvfb installed"
elif command -v brew >/dev/null 2>&1; then
warn "macOS: Xvfb not available via Homebrew."
warn "The scraper (Kasada bypass) will not work on macOS."
warn "Add eBay API credentials to .env to use the API adapter instead."
else
warn "Could not install Xvfb automatically. Install it with your system package manager."
warn " Debian/Ubuntu: sudo apt-get install xvfb"
warn " Fedora/RHEL: sudo dnf install xorg-x11-server-Xvfb"
fi
}
_setup_python_env() {
if $HAS_CONDA; then
info "Setting up conda environment (manager: $HAS_CONDA_CMD)..."
_env_name="cf"
if "$HAS_CONDA_CMD" env list 2>/dev/null | grep -q "^${_env_name} "; then
info "Conda env '$_env_name' already exists — updating packages..."
else
"$HAS_CONDA_CMD" create -n "$_env_name" python=3.11 -y
fi
"$HAS_CONDA_CMD" run -n "$_env_name" pip install --quiet -e "$CORE_DIR"
"$HAS_CONDA_CMD" run -n "$_env_name" pip install --quiet -e "$SNIPE_DIR_ACTUAL"
"$HAS_CONDA_CMD" run -n "$_env_name" playwright install chromium
"$HAS_CONDA_CMD" run -n "$_env_name" playwright install-deps chromium
PYTHON_BIN="$HAS_CONDA_CMD run -n $_env_name"
ok "Conda environment '$_env_name' ready"
else
info "Setting up pip venv at $SNIPE_VENV_DIR ..."
mkdir -p "$SNIPE_CONFIG_DIR"
python3 -m venv "$SNIPE_VENV_DIR"
"$SNIPE_VENV_DIR/bin/pip" install --quiet -e "$CORE_DIR"
"$SNIPE_VENV_DIR/bin/pip" install --quiet -e "$SNIPE_DIR_ACTUAL"
"$SNIPE_VENV_DIR/bin/playwright" install chromium
"$SNIPE_VENV_DIR/bin/playwright" install-deps chromium
PYTHON_BIN="$SNIPE_VENV_DIR/bin"
ok "Python venv ready at $SNIPE_VENV_DIR"
fi
}
_build_frontend() {
if ! $HAS_NODE; then
warn "Node.js not found — skipping frontend build."
warn "Install Node.js 20+ from https://nodejs.org and re-run install.sh."
warn "Until then, access the API at http://localhost:8510/docs"
return
fi
info "Building Vue frontend..."
cd "$SNIPE_DIR_ACTUAL/web"
npm ci --prefer-offline --silent
npm run build
cd "$SNIPE_DIR_ACTUAL"
ok "Frontend built → web/dist/"
}
_write_start_scripts() {
# start-local.sh — launches the FastAPI server
cat > "$SNIPE_DIR_ACTUAL/start-local.sh" << 'STARTSCRIPT'
#!/usr/bin/env bash
# Start Snipe API (bare-metal / no-Docker mode)
set -euo pipefail
cd "$(dirname "$0")"
if [[ -f "$HOME/.config/circuitforge/venv/bin/uvicorn" ]]; then
UVICORN="$HOME/.config/circuitforge/venv/bin/uvicorn"
elif command -v conda >/dev/null 2>&1 && conda env list 2>/dev/null | grep -q "^cf "; then
UVICORN="conda run -n cf uvicorn"
elif command -v mamba >/dev/null 2>&1 && mamba env list 2>/dev/null | grep -q "^cf "; then
UVICORN="mamba run -n cf uvicorn"
else
echo "No Snipe Python environment found. Run install.sh first." >&2; exit 1
fi
mkdir -p data
echo "Starting Snipe API → http://localhost:8510 ..."
exec $UVICORN api.main:app --host 0.0.0.0 --port 8510 "${@}"
STARTSCRIPT
chmod +x "$SNIPE_DIR_ACTUAL/start-local.sh"
# serve-ui.sh — serves the built Vue frontend (dev only)
cat > "$SNIPE_DIR_ACTUAL/serve-ui.sh" << 'UISCRIPT'
#!/usr/bin/env bash
# Serve the pre-built Vue frontend (dev only — use nginx for production).
# See docs/nginx-self-hosted.conf for a production nginx config.
cd "$(dirname "$0")/web/dist"
echo "Serving Snipe UI → http://localhost:8509 (Ctrl+C to stop)"
exec python3 -m http.server 8509
UISCRIPT
chmod +x "$SNIPE_DIR_ACTUAL/serve-ui.sh"
ok "Start scripts written"
}
_install_bare_metal() {
header "Bare-metal install"
_install_xvfb
_setup_python_env
_build_frontend
_write_start_scripts
echo
ok "Snipe installed (bare-metal mode)"
printf '%0.s─' {1..60}; echo
echo -e " ${GREEN}Start API:${NC} cd $SNIPE_DIR_ACTUAL && ./start-local.sh"
echo -e " ${GREEN}Serve UI:${NC} cd $SNIPE_DIR_ACTUAL && ./serve-ui.sh ${DIM}(separate terminal)${NC}"
echo -e " ${GREEN}API docs:${NC} http://localhost:8510/docs"
echo -e " ${GREEN}Web UI:${NC} http://localhost:8509 ${DIM}(after ./serve-ui.sh)${NC}"
echo
echo -e " ${DIM}For production, configure nginx to proxy /api/ to localhost:8510${NC}"
echo -e " ${DIM}and serve web/dist/ as the document root.${NC}"
echo -e " ${DIM}See: $SNIPE_DIR_ACTUAL/docs/nginx-self-hosted.conf${NC}"
printf '%0.s─' {1..60}; echo
echo
}
# ── Main ───────────────────────────────────────────────────────────────────────
main() {
if [[ "$INSTALL_MODE" == "docker" ]]; then
_install_docker
else
_install_bare_metal
fi
}
main

View file

@ -78,7 +78,7 @@ case "$cmd" in
test) test)
echo "Running test suite..." echo "Running test suite..."
docker compose -f "$COMPOSE_FILE" exec api \ docker compose -f "$COMPOSE_FILE" exec api \
python -m pytest /app/snipe/tests/ -v "${@}" conda run -n job-seeker python -m pytest /app/snipe/tests/ -v "${@}"
;; ;;
# ── Cloud commands ──────────────────────────────────────────────────────── # ── Cloud commands ────────────────────────────────────────────────────────

View file

@ -1,66 +0,0 @@
site_name: Snipe
site_description: eBay trust scoring before you bid — catch scammers, flag suspicious prices, surface duplicate photos.
site_author: Circuit Forge LLC
site_url: https://docs.circuitforge.tech/snipe
repo_url: https://git.opensourcesolarpunk.com/Circuit-Forge/snipe
repo_name: Circuit-Forge/snipe
theme:
name: material
palette:
- scheme: default
primary: deep orange
accent: orange
toggle:
icon: material/brightness-7
name: Switch to dark mode
- scheme: slate
primary: deep orange
accent: orange
toggle:
icon: material/brightness-4
name: Switch to light mode
features:
- navigation.tabs
- navigation.sections
- navigation.expand
- navigation.top
- search.suggest
- search.highlight
- content.code.copy
markdown_extensions:
- admonition
- pymdownx.details
- pymdownx.superfences:
custom_fences:
- name: mermaid
class: mermaid
format: !!python/name:pymdownx.superfences.fence_code_format
- pymdownx.highlight:
anchor_linenums: true
- pymdownx.tabbed:
alternate_style: true
- tables
- toc:
permalink: true
nav:
- Home: index.md
- Getting Started:
- Installation: getting-started/installation.md
- Quick Start: getting-started/quick-start.md
- eBay API Keys (Optional): getting-started/ebay-api.md
- User Guide:
- Searching: user-guide/searching.md
- Trust Scores: user-guide/trust-scores.md
- Red Flags: user-guide/red-flags.md
- Community Blocklist: user-guide/blocklist.md
- Settings: user-guide/settings.md
- Reference:
- Trust Score Algorithm: reference/trust-scoring.md
- Tier System: reference/tier-system.md
- Architecture: reference/architecture.md
extra_javascript:
- plausible.js

View file

@ -4,11 +4,11 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "snipe" name = "snipe"
version = "0.3.0" version = "0.1.0"
description = "Auction listing monitor and trust scorer" description = "Auction listing monitor and trust scorer"
requires-python = ">=3.11" requires-python = ">=3.11"
dependencies = [ dependencies = [
"circuitforge-core[community]>=0.8.0", "circuitforge-core",
"streamlit>=1.32", "streamlit>=1.32",
"requests>=2.31", "requests>=2.31",
"imagehash>=4.3", "imagehash>=4.3",
@ -23,20 +23,6 @@ dependencies = [
"playwright-stealth>=1.0", "playwright-stealth>=1.0",
"cryptography>=42.0", "cryptography>=42.0",
"PyJWT>=2.8", "PyJWT>=2.8",
"httpx>=0.27",
]
[project.optional-dependencies]
orchestration = [
# Paid+ tier only — not published to PyPI. Install from source or Forgejo Packages.
# pip install -e ../circuitforge-orch (dev)
# pip install snipe[orchestration] (self-hosted Paid+)
"circuitforge-orch>=0.1.0",
]
dev = [
"pytest>=8.0",
"pytest-cov>=5.0",
"ruff>=0.4",
] ]
[tool.setuptools.packages.find] [tool.setuptools.packages.find]
@ -45,19 +31,3 @@ include = ["app*", "api*"]
[tool.pytest.ini_options] [tool.pytest.ini_options]
testpaths = ["tests"] testpaths = ["tests"]
markers = [
"browser: tests that require a headed Chromium browser (Xvfb + playwright install required)",
]
[tool.ruff]
line-length = 100
target-version = "py311"
[tool.ruff.lint]
select = ["E", "F", "W", "I"]
ignore = [
"E501", # line length — handled by formatter
"E402", # module-import-not-at-top — intentional for conditional/lazy imports
"E701", # multiple-statements-colon — `if x: return y` is accepted style
"E741", # ambiguous variable name — l/q used intentionally for listing/query
]

View file

@ -1,64 +0,0 @@
"""Reproduce the exact FastAPI code path: pool warmup → slot close → _fetch_fresh.
Run inside the container:
docker exec -it snipe-api-1 python /app/snipe/scripts/debug_fetch_fresh.py
"""
import sys, time, threading
sys.path.insert(0, '/app/snipe')
from bs4 import BeautifulSoup
from app.platforms.ebay.browser_pool import BrowserPool, _close_slot
URL = "https://www.mercari.com/search/?keyword=rtx+4090&sortBy=SORT_SCORE&priceMax=800"
print("=== Test 1: _fetch_fresh with no pool (baseline) ===", flush=True)
pool0 = BrowserPool(size=0)
t0 = time.time()
html = pool0._fetch_fresh(URL, wait_for_timeout_ms=8000)
items = BeautifulSoup(html, "html.parser").find_all(attrs={"data-testid": "ItemContainer"})
print(f"Items: {len(items)}, HTML: {len(html)}b, elapsed: {time.time()-t0:.1f}s", flush=True)
print("\n=== Test 2: pool warmup (size=2), grab slot, close it, then _fetch_fresh ===", flush=True)
pool2 = BrowserPool(size=2)
# Warmup in background (blocks until done)
warm_done = threading.Event()
def do_warmup():
pool2.start()
warm_done.set()
t = threading.Thread(target=do_warmup, daemon=True)
t.start()
warm_done.wait(timeout=30)
print(f"Pool size after warmup: {pool2._q.qsize()}", flush=True)
# Grab a slot and close it (simulating the thread-error path)
import queue
try:
slot = pool2._q.get(timeout=3.0)
print(f"Got slot on display :{slot.display_num}", flush=True)
_close_slot(slot)
print("Slot closed", flush=True)
except queue.Empty:
print("Pool empty — no slot to simulate", flush=True)
# Now call _fetch_fresh in this thread (same as FastAPI handler thread)
print("Calling _fetch_fresh from warmup-thread context...", flush=True)
t0 = time.time()
html2 = pool2._fetch_fresh(URL, wait_for_timeout_ms=8000)
items2 = BeautifulSoup(html2, "html.parser").find_all(attrs={"data-testid": "ItemContainer"})
print(f"Items: {len(items2)}, HTML: {len(html2)}b, elapsed: {time.time()-t0:.1f}s", flush=True)
# Save HTML for inspection if empty
if len(items2) == 0:
with open("/tmp/debug_mercari.html", "w") as f:
f.write(html2)
print("Saved HTML to /tmp/debug_mercari.html", flush=True)
title = BeautifulSoup(html2, "html.parser").find("title")
print("Page title:", title.get_text() if title else "(none)", flush=True)
if "Just a moment" in html2 or "turnstile" in html2.lower():
print("BLOCKED: Cloudflare challenge", flush=True)
else:
body = BeautifulSoup(html2, "html.parser").find("body")
if body:
print("Body snippet:", body.get_text(separator=" ", strip=True)[:300], flush=True)

View file

@ -1,113 +0,0 @@
"""One-shot Mercari probe using the same headed Chromium + Xvfb + stealth stack
as the eBay scraper. Run inside the snipe-api container:
docker exec -it snipe-api-1 python /app/scripts/probe_mercari.py
"""
from __future__ import annotations
import itertools
import os
import subprocess
import sys
import time
_display_counter = itertools.count(200)
_CHROMIUM_ARGS = ["--no-sandbox", "--disable-dev-shm-usage"]
_USER_AGENT = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
)
SEARCH_URL = "https://www.mercari.com/search/?keyword=rtx+4090"
# Give Cloudflare challenge time to resolve (if it does)
WAIT_MS = 8_000
def probe(url: str) -> str:
from playwright.sync_api import sync_playwright
from playwright_stealth import Stealth
display_num = next(_display_counter)
display = f":{display_num}"
env = os.environ.copy()
env["DISPLAY"] = display
xvfb = subprocess.Popen(
["Xvfb", display, "-screen", "0", "1280x800x24"],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
time.sleep(0.5)
try:
with sync_playwright() as pw:
browser = pw.chromium.launch(
headless=False,
env=env,
args=_CHROMIUM_ARGS,
)
ctx = browser.new_context(
user_agent=_USER_AGENT,
viewport={"width": 1280, "height": 800},
)
page = ctx.new_page()
Stealth().apply_stealth_sync(page)
print(f"[probe] Navigating to {url}", flush=True)
response = page.goto(url, wait_until="domcontentloaded", timeout=40_000)
print(f"[probe] HTTP status: {response.status if response else 'unknown'}", flush=True)
print(f"[probe] Waiting {WAIT_MS}ms for JS / Turnstile …", flush=True)
page.wait_for_timeout(WAIT_MS)
html = page.content()
title = page.title()
print(f"[probe] Page title: {title!r}", flush=True)
browser.close()
finally:
xvfb.terminate()
xvfb.wait()
return html
def analyse(html: str) -> None:
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, "html.parser")
# Cloudflare challenge indicators
if "Just a moment" in html or "cf-challenge" in html or "turnstile" in html.lower():
print("[result] BLOCKED — Cloudflare Turnstile still active")
return
print("[result] Cloudflare challenge NOT detected — page appears to have loaded")
# Try to find listing cards
# Mercari US uses data-testid or item cards in the DOM
candidates = [
soup.select("[data-testid='ItemCell']"),
soup.select("[data-testid='item-cell']"),
soup.select("li[data-testid]"),
soup.select(".merList .merListItem"),
soup.select("[class*='ItemCell']"),
soup.select("[class*='item-cell']"),
]
for sel_result in candidates:
if sel_result:
print(f"[result] Found {len(sel_result)} listing card(s) via selector")
card = sel_result[0]
print(f"[result] First card snippet:\n{card.prettify()[:800]}")
return
# Fallback: show body text summary
body = soup.find("body")
text = body.get_text(separator=" ", strip=True)[:500] if body else html[:500]
print(f"[result] No listing cards found. Body text preview:\n{text}")
# Save full HTML for manual inspection
out = "/tmp/mercari_probe.html"
with open(out, "w") as fh:
fh.write(html)
print(f"[result] Full HTML saved to {out}")
if __name__ == "__main__":
html = probe(SEARCH_URL)
analyse(html)

View file

@ -1,8 +1,6 @@
"""Streamlit entrypoint.""" """Streamlit entrypoint."""
from pathlib import Path from pathlib import Path
import streamlit as st import streamlit as st
from app.wizard import SnipeSetupWizard from app.wizard import SnipeSetupWizard
st.set_page_config( st.set_page_config(
@ -18,7 +16,6 @@ if not wizard.is_configured():
st.stop() st.stop()
from app.ui.components.easter_eggs import inject_konami_detector from app.ui.components.easter_eggs import inject_konami_detector
inject_konami_detector() inject_konami_detector()
with st.sidebar: with st.sidebar:
@ -30,5 +27,4 @@ with st.sidebar:
) )
from app.ui.Search import render from app.ui.Search import render
render(audio_enabled=audio_enabled) render(audio_enabled=audio_enabled)

View file

@ -1,17 +0,0 @@
import os
import pytest
def pytest_configure(config):
config.addinivalue_line(
"markers",
"postgres: mark test as requiring a live Postgres instance (SNIPE_SHARED_DB_URL must be set)",
)
@pytest.fixture
def postgres_dsn():
dsn = os.environ.get("SNIPE_SHARED_DB_URL")
if not dsn:
pytest.skip("SNIPE_SHARED_DB_URL not set — skipping Postgres tests")
return dsn

View file

@ -1,157 +0,0 @@
"""Tests for SnipeSharedStore — requires live Postgres via SNIPE_SHARED_DB_URL."""
import pytest
from app.db.models import MarketComp, Seller
from app.db.pg_shared import SnipeSharedDB, SnipeSharedStore
from app.db.protocol import SharedTableProtocol
@pytest.mark.postgres
def test_snipe_shared_store_satisfies_protocol(postgres_dsn):
assert issubclass(SnipeSharedStore, SharedTableProtocol)
@pytest.mark.postgres
def test_save_and_get_seller(postgres_dsn):
db = SnipeSharedDB(postgres_dsn)
db.run_migrations()
store = SnipeSharedStore(db)
seller = Seller(
platform="ebay",
platform_seller_id="test-seller-001",
username="testseller",
account_age_days=365,
feedback_count=100,
feedback_ratio=0.99,
category_history_json='{"electronics": 5}',
)
store.save_seller(seller)
result = store.get_seller("ebay", "test-seller-001")
assert result is not None
assert result.username == "testseller"
assert result.feedback_count == 100
store.delete_seller_data("ebay", "test-seller-001")
db.close()
@pytest.mark.postgres
def test_save_sellers_coalesce_preserves_age(postgres_dsn):
db = SnipeSharedDB(postgres_dsn)
db.run_migrations()
store = SnipeSharedStore(db)
seller_with_age = Seller(
platform="ebay", platform_seller_id="coalesce-test",
username="u", account_age_days=730,
feedback_count=50, feedback_ratio=0.95, category_history_json="{}",
)
store.save_seller(seller_with_age)
seller_without_age = Seller(
platform="ebay", platform_seller_id="coalesce-test",
username="u", account_age_days=None,
feedback_count=60, feedback_ratio=0.96, category_history_json="{}",
)
store.save_sellers([seller_without_age])
result = store.get_seller("ebay", "coalesce-test")
assert result.account_age_days == 730
assert result.feedback_count == 60
store.delete_seller_data("ebay", "coalesce-test")
db.close()
@pytest.mark.postgres
def test_market_comp_cache(postgres_dsn):
from datetime import datetime, timedelta, timezone
db = SnipeSharedDB(postgres_dsn)
db.run_migrations()
store = SnipeSharedStore(db)
expires = (datetime.now(timezone.utc) + timedelta(hours=1)).isoformat()
comp = MarketComp(
platform="ebay", query_hash="abc123",
median_price=49.99, sample_count=10, expires_at=expires,
)
store.save_market_comp(comp)
result = store.get_market_comp("ebay", "abc123")
assert result is not None
assert result.median_price == 49.99
db.close()
@pytest.mark.postgres
def test_reported_sellers(postgres_dsn):
db = SnipeSharedDB(postgres_dsn)
db.run_migrations()
store = SnipeSharedStore(db)
store.mark_reported("ebay", "bad-seller-99", username="badguy")
reported = store.list_reported("ebay")
assert "bad-seller-99" in reported
store.mark_reported("ebay", "bad-seller-99") # idempotent
db.close()
@pytest.mark.postgres
def test_clone_returns_self(postgres_dsn):
db = SnipeSharedDB(postgres_dsn)
store = SnipeSharedStore(db)
assert store.clone() is store
db.close()
@pytest.mark.postgres
def test_blocklist_add_get_remove(postgres_dsn):
from app.db.models import ScammerEntry
db = SnipeSharedDB(postgres_dsn)
db.run_migrations()
store = SnipeSharedStore(db)
assert not store.is_blocklisted("ebay", "bad-999")
entry = store.add_to_blocklist(ScammerEntry(
platform="ebay", platform_seller_id="bad-999",
username="scammer1", reason="sold fakes", source="manual",
))
assert entry.id is not None
assert store.is_blocklisted("ebay", "bad-999")
entries = store.list_blocklist("ebay")
assert any(e.platform_seller_id == "bad-999" for e in entries)
store.remove_from_blocklist("ebay", "bad-999")
assert not store.is_blocklisted("ebay", "bad-999")
db.close()
@pytest.mark.postgres
def test_blocklist_upsert_is_idempotent(postgres_dsn):
from app.db.models import ScammerEntry
db = SnipeSharedDB(postgres_dsn)
db.run_migrations()
store = SnipeSharedStore(db)
store.add_to_blocklist(ScammerEntry(
platform="ebay", platform_seller_id="dup-test",
username="seller", reason="reason1", source="manual",
))
# Second add — should not raise, should update username but preserve reason via COALESCE
store.add_to_blocklist(ScammerEntry(
platform="ebay", platform_seller_id="dup-test",
username="seller_updated", reason=None, source="community",
))
entries = [e for e in store.list_blocklist("ebay") if e.platform_seller_id == "dup-test"]
assert len(entries) == 1
assert entries[0].username == "seller_updated"
assert entries[0].reason == "reason1" # COALESCE preserved original reason
store.remove_from_blocklist("ebay", "dup-test")
db.close()

View file

@ -1,39 +0,0 @@
"""Verify Store satisfies SharedTableProtocol at import time."""
from app.db.protocol import SharedTableProtocol
from app.db.store import Store
def test_store_satisfies_protocol():
assert issubclass(Store, SharedTableProtocol)
def test_store_clone_returns_new_instance(tmp_path):
db = tmp_path / "test.db"
s = Store(db)
clone = s.clone()
assert isinstance(clone, Store)
assert clone is not s
assert clone._db_path == db
def test_ebay_adapter_accepts_protocol():
from app.platforms.ebay.adapter import EbayAdapter
import tempfile
import pathlib
from unittest.mock import MagicMock
with tempfile.TemporaryDirectory() as tmp:
s = Store(pathlib.Path(tmp) / "t.db")
adapter = EbayAdapter(token_manager=MagicMock(), shared_store=s)
assert adapter._store is s
def test_scraped_adapter_no_db_path_ref():
from app.platforms.ebay.scraper import ScrapedEbayAdapter
import tempfile
import pathlib
with tempfile.TemporaryDirectory() as tmp:
s = Store(pathlib.Path(tmp) / "t.db")
adapter = ScrapedEbayAdapter(shared_store=s)
assert not hasattr(adapter, '_db_path_ref')

View file

@ -1,9 +1,8 @@
from datetime import datetime, timedelta, timezone
import pytest import pytest
from datetime import datetime, timedelta, timezone
from app.db.models import Listing, MarketComp, Seller from pathlib import Path
from app.db.store import Store from app.db.store import Store
from app.db.models import Listing, Seller, TrustScore, MarketComp
@pytest.fixture @pytest.fixture

View file

@ -1,456 +0,0 @@
"""Tests for app.platforms.ebay.browser_pool (thread-local design).
All tests run without real Chromium / Xvfb / Playwright.
Playwright, Xvfb subprocess calls, and Stealth are mocked throughout.
"""
from __future__ import annotations
import subprocess
import threading
import time
from typing import Any
from unittest.mock import MagicMock, patch
import pytest
# ---------------------------------------------------------------------------
# Helpers to reset the module-level singleton between tests
# ---------------------------------------------------------------------------
def _reset_pool_singleton():
import app.platforms.ebay.browser_pool as _mod
_mod._pool = None
def _reset_thread_local():
import app.platforms.ebay.browser_pool as _mod
_mod._thread_local.slot = None
@pytest.fixture(autouse=True)
def reset_pool():
_reset_pool_singleton()
_reset_thread_local()
yield
_reset_pool_singleton()
_reset_thread_local()
def _make_fake_slot():
from app.platforms.ebay.browser_pool import _PooledBrowser
xvfb = MagicMock(spec=subprocess.Popen)
pw = MagicMock()
browser = MagicMock()
ctx = MagicMock()
return _PooledBrowser(
xvfb=xvfb, pw=pw, browser=browser, ctx=ctx,
display_num=100, last_used_ts=time.time(),
)
# ---------------------------------------------------------------------------
# Singleton tests
# ---------------------------------------------------------------------------
class TestGetPoolSingleton:
def test_returns_same_instance(self):
from app.platforms.ebay.browser_pool import get_pool, BrowserPool
assert get_pool() is get_pool()
def test_returns_browser_pool_instance(self):
from app.platforms.ebay.browser_pool import get_pool, BrowserPool
assert isinstance(get_pool(), BrowserPool)
def test_default_size_is_two(self):
from app.platforms.ebay.browser_pool import get_pool
assert get_pool()._size == 2
def test_custom_size_from_env(self, monkeypatch):
monkeypatch.setenv("BROWSER_POOL_SIZE", "5")
from app.platforms.ebay.browser_pool import get_pool
assert get_pool()._size == 5
# ---------------------------------------------------------------------------
# start() / stop() lifecycle tests
# ---------------------------------------------------------------------------
class TestLifecycle:
def test_start_is_noop_when_playwright_unavailable(self):
from app.platforms.ebay.browser_pool import BrowserPool
pool = BrowserPool(size=2)
with patch.object(pool, "_check_playwright", return_value=False):
pool.start()
assert pool._started is True
assert pool._slot_registry == {}
def test_start_only_runs_once(self):
from app.platforms.ebay.browser_pool import BrowserPool
pool = BrowserPool(size=1)
with patch.object(pool, "_check_playwright", return_value=False):
pool.start()
pool.start()
assert pool._started is True
def test_stop_closes_all_registry_slots(self):
from app.platforms.ebay.browser_pool import BrowserPool
pool = BrowserPool(size=2)
slot1 = _make_fake_slot()
slot2 = _make_fake_slot()
pool._slot_registry[1001] = slot1
pool._slot_registry[1002] = slot2
with patch("app.platforms.ebay.browser_pool._close_slot") as mock_close:
pool.stop()
assert mock_close.call_count == 2
assert pool._slot_registry == {}
assert pool._stopped is True
def test_stop_on_empty_registry_is_safe(self):
from app.platforms.ebay.browser_pool import BrowserPool
BrowserPool(size=2).stop()
# ---------------------------------------------------------------------------
# fetch_html — thread-local slot hit path
# ---------------------------------------------------------------------------
class TestFetchHtmlSlotHit:
def test_uses_existing_slot_and_replenishes(self):
from app.platforms.ebay.browser_pool import BrowserPool
import app.platforms.ebay.browser_pool as _mod
pool = BrowserPool(size=1)
slot = _make_fake_slot()
_mod._thread_local.slot = slot
fresh_slot = _make_fake_slot()
with (
patch.object(pool, "_fetch_with_slot", return_value="<html>ok</html>") as mock_fetch,
patch("app.platforms.ebay.browser_pool._replenish_slot", return_value=fresh_slot),
patch.object(pool, "_register_slot") as mock_register,
patch("time.sleep"),
):
html = pool.fetch_html("https://www.ebay.com/sch/i.html?_nkw=test", delay=0)
assert html == "<html>ok</html>"
mock_fetch.assert_called_once_with(
slot, "https://www.ebay.com/sch/i.html?_nkw=test",
wait_for_selector=None, wait_for_timeout_ms=2000,
)
mock_register.assert_called_once_with(fresh_slot)
def test_delay_is_respected(self):
from app.platforms.ebay.browser_pool import BrowserPool
import app.platforms.ebay.browser_pool as _mod
pool = BrowserPool(size=1)
_mod._thread_local.slot = _make_fake_slot()
with (
patch.object(pool, "_fetch_with_slot", return_value="<html/>"),
patch("app.platforms.ebay.browser_pool._replenish_slot", return_value=_make_fake_slot()),
patch.object(pool, "_register_slot"),
patch("app.platforms.ebay.browser_pool.time") as mock_time,
):
pool.fetch_html("https://example.com", delay=1.5)
mock_time.sleep.assert_called_once_with(1.5)
# ---------------------------------------------------------------------------
# fetch_html — no slot / fallback path
# ---------------------------------------------------------------------------
class TestFetchHtmlFallback:
def test_falls_back_when_no_slot_and_playwright_unavailable(self):
from app.platforms.ebay.browser_pool import BrowserPool
pool = BrowserPool(size=1)
# No thread-local slot; playwright unavailable → _get_or_create returns None.
with (
patch.object(pool, "_get_or_create_thread_slot", return_value=None),
patch.object(pool, "_fetch_fresh", return_value="<html>fresh</html>") as mock_fresh,
patch("time.sleep"),
):
html = pool.fetch_html("https://www.ebay.com/sch/i.html?_nkw=widget", delay=0)
assert html == "<html>fresh</html>"
mock_fresh.assert_called_once_with(
"https://www.ebay.com/sch/i.html?_nkw=widget",
wait_for_selector=None, wait_for_timeout_ms=2000,
)
def test_falls_back_when_pooled_fetch_raises(self):
from app.platforms.ebay.browser_pool import BrowserPool
import app.platforms.ebay.browser_pool as _mod
pool = BrowserPool(size=1)
slot = _make_fake_slot()
_mod._thread_local.slot = slot
with (
patch.object(pool, "_fetch_with_slot", side_effect=RuntimeError("Chromium crashed")),
patch.object(pool, "_fetch_fresh", return_value="<html>recovered</html>") as mock_fresh,
patch("app.platforms.ebay.browser_pool._close_slot") as mock_close,
patch.object(pool, "_unregister_slot"),
patch("time.sleep"),
):
html = pool.fetch_html("https://www.ebay.com/", delay=0)
assert html == "<html>recovered</html>"
mock_close.assert_called_once_with(slot)
mock_fresh.assert_called_once()
# ---------------------------------------------------------------------------
# Thread-local slot management
# ---------------------------------------------------------------------------
class TestThreadLocalSlotManagement:
def test_get_or_create_returns_existing_slot(self):
import app.platforms.ebay.browser_pool as _mod
from app.platforms.ebay.browser_pool import BrowserPool
pool = BrowserPool(size=1)
pool._playwright_available = True
existing = _make_fake_slot()
_mod._thread_local.slot = existing
result = pool._get_or_create_thread_slot()
assert result is existing
def test_get_or_create_launches_new_slot_when_absent(self):
import app.platforms.ebay.browser_pool as _mod
from app.platforms.ebay.browser_pool import BrowserPool
pool = BrowserPool(size=1)
pool._playwright_available = True
_mod._thread_local.slot = None
new_slot = _make_fake_slot()
with (
patch("app.platforms.ebay.browser_pool._launch_slot", return_value=new_slot),
patch.object(pool, "_register_slot") as mock_register,
):
result = pool._get_or_create_thread_slot()
assert result is new_slot
mock_register.assert_called_once_with(new_slot)
def test_get_or_create_returns_none_when_playwright_unavailable(self):
from app.platforms.ebay.browser_pool import BrowserPool
pool = BrowserPool(size=1)
pool._playwright_available = False
assert pool._get_or_create_thread_slot() is None
def test_register_slot_sets_thread_local_and_registry(self):
import app.platforms.ebay.browser_pool as _mod
from app.platforms.ebay.browser_pool import BrowserPool
pool = BrowserPool(size=1)
slot = _make_fake_slot()
pool._register_slot(slot)
assert _mod._thread_local.slot is slot
assert threading.get_ident() in pool._slot_registry
def test_unregister_slot_clears_thread_local_and_registry(self):
import app.platforms.ebay.browser_pool as _mod
from app.platforms.ebay.browser_pool import BrowserPool
pool = BrowserPool(size=1)
slot = _make_fake_slot()
pool._register_slot(slot)
pool._unregister_slot()
assert getattr(_mod._thread_local, "slot", None) is None
assert threading.get_ident() not in pool._slot_registry
def test_different_threads_get_independent_slots(self):
from app.platforms.ebay.browser_pool import BrowserPool
pool = BrowserPool(size=2)
pool._playwright_available = True
slots_seen: list = []
errors: list = []
def worker():
new_slot = _make_fake_slot()
with patch("app.platforms.ebay.browser_pool._launch_slot", return_value=new_slot):
s = pool._get_or_create_thread_slot()
slots_seen.append(s)
t1 = threading.Thread(target=worker)
t2 = threading.Thread(target=worker)
t1.start(); t2.start()
t1.join(); t2.join()
assert len(slots_seen) == 2
# Each thread got its own slot object (they may differ or coincidentally share
# the same mock; what matters is both threads succeeded without interference).
assert all(s is not None for s in slots_seen)
# ---------------------------------------------------------------------------
# ImportError graceful fallback
# ---------------------------------------------------------------------------
class TestImportErrorHandling:
def test_check_playwright_returns_false_on_import_error(self):
from app.platforms.ebay.browser_pool import BrowserPool
pool = BrowserPool(size=2)
with patch.dict("sys.modules", {"playwright": None, "playwright_stealth": None}):
pool._playwright_available = None
result = pool._check_playwright()
assert result is False
assert pool._playwright_available is False
def test_start_logs_warning_when_playwright_missing(self, caplog):
import logging
from app.platforms.ebay.browser_pool import BrowserPool
pool = BrowserPool(size=1)
pool._playwright_available = False
with patch.object(pool, "_check_playwright", return_value=False):
with caplog.at_level(logging.WARNING, logger="app.platforms.ebay.browser_pool"):
pool.start()
assert any("not available" in r.message for r in caplog.records)
def test_fetch_fresh_raises_runtime_error_when_playwright_missing(self):
from app.platforms.ebay.browser_pool import BrowserPool
pool = BrowserPool(size=1)
with patch.dict("sys.modules", {"playwright": None, "playwright.sync_api": None}):
with pytest.raises(RuntimeError, match="Playwright not installed"):
pool._fetch_fresh("https://www.ebay.com/")
# ---------------------------------------------------------------------------
# _replenish_slot helper
# ---------------------------------------------------------------------------
class TestReplenishSlot:
def test_replenish_closes_old_context_and_opens_new(self):
from app.platforms.ebay.browser_pool import _replenish_slot, _PooledBrowser
old_ctx = MagicMock()
new_ctx = MagicMock()
browser = MagicMock()
browser.new_context.return_value = new_ctx
slot = _PooledBrowser(
xvfb=MagicMock(), pw=MagicMock(), browser=browser,
ctx=old_ctx, display_num=101, last_used_ts=time.time() - 10,
)
result = _replenish_slot(slot)
old_ctx.close.assert_called_once()
browser.new_context.assert_called_once()
assert result.ctx is new_ctx
assert result.browser is browser
assert result.xvfb is slot.xvfb
assert result.last_used_ts > slot.last_used_ts
# ---------------------------------------------------------------------------
# _close_slot helper
# ---------------------------------------------------------------------------
class TestCloseSlot:
def test_close_slot_closes_all_components(self):
from app.platforms.ebay.browser_pool import _close_slot, _PooledBrowser
xvfb = MagicMock(spec=subprocess.Popen)
pw = MagicMock()
browser = MagicMock()
ctx = MagicMock()
slot = _PooledBrowser(
xvfb=xvfb, pw=pw, browser=browser, ctx=ctx,
display_num=102, last_used_ts=time.time(),
)
_close_slot(slot)
ctx.close.assert_called_once()
browser.close.assert_called_once()
pw.stop.assert_called_once()
xvfb.terminate.assert_called_once()
xvfb.wait.assert_called_once()
def test_close_slot_ignores_exceptions(self):
from app.platforms.ebay.browser_pool import _close_slot, _PooledBrowser
xvfb = MagicMock(spec=subprocess.Popen)
xvfb.terminate.side_effect = OSError("already dead")
xvfb.wait.side_effect = OSError("already dead")
pw = MagicMock()
pw.stop.side_effect = RuntimeError("stopped")
browser = MagicMock()
browser.close.side_effect = RuntimeError("gone")
ctx = MagicMock()
ctx.close.side_effect = RuntimeError("gone")
slot = _PooledBrowser(
xvfb=xvfb, pw=pw, browser=browser, ctx=ctx,
display_num=103, last_used_ts=time.time(),
)
_close_slot(slot) # must not raise
# ---------------------------------------------------------------------------
# Scraper integration — _fetch_url uses pool
# ---------------------------------------------------------------------------
class TestScraperUsesPool:
def test_fetch_url_delegates_to_pool(self):
from app.platforms.ebay.browser_pool import BrowserPool
from app.platforms.ebay.scraper import ScrapedEbayAdapter
from app.db.store import Store
store = MagicMock(spec=Store)
adapter = ScrapedEbayAdapter(store, delay=0)
fake_pool = MagicMock(spec=BrowserPool)
fake_pool.fetch_html.return_value = "<html>pooled</html>"
with patch("app.platforms.ebay.browser_pool.get_pool", return_value=fake_pool):
import app.platforms.ebay.scraper as scraper_mod
scraper_mod._html_cache.clear()
html = adapter._fetch_url("https://www.ebay.com/sch/i.html?_nkw=test")
assert html == "<html>pooled</html>"
fake_pool.fetch_html.assert_called_once_with(
"https://www.ebay.com/sch/i.html?_nkw=test", delay=0
)
def test_fetch_url_uses_cache_before_pool(self):
from app.platforms.ebay.scraper import ScrapedEbayAdapter, _html_cache, _HTML_CACHE_TTL
from app.db.store import Store
store = MagicMock(spec=Store)
adapter = ScrapedEbayAdapter(store, delay=0)
url = "https://www.ebay.com/sch/i.html?_nkw=cached"
_html_cache[url] = ("<html>cached</html>", time.time() + _HTML_CACHE_TTL)
fake_pool = MagicMock()
with patch("app.platforms.ebay.browser_pool.get_pool", return_value=fake_pool):
html = adapter._fetch_url(url)
assert html == "<html>cached</html>"
fake_pool.fetch_html.assert_not_called()
_html_cache.pop(url, None)

View file

@ -1,9 +1,7 @@
import time import time
from unittest.mock import MagicMock, patch
import pytest
import requests import requests
from unittest.mock import patch, MagicMock
import pytest
from app.platforms.ebay.auth import EbayTokenManager from app.platforms.ebay.auth import EbayTokenManager

View file

@ -1,6 +1,4 @@
import pytest import pytest
from api.main import _extract_ebay_item_id
from app.platforms.ebay.normaliser import normalise_listing, normalise_seller from app.platforms.ebay.normaliser import normalise_listing, normalise_seller
@ -57,48 +55,3 @@ def test_normalise_seller_maps_fields():
assert seller.feedback_count == 300 assert seller.feedback_count == 300
assert seller.feedback_ratio == pytest.approx(0.991, abs=0.001) assert seller.feedback_ratio == pytest.approx(0.991, abs=0.001)
assert seller.account_age_days > 0 assert seller.account_age_days > 0
# ── _extract_ebay_item_id ─────────────────────────────────────────────────────
class TestExtractEbayItemId:
"""Unit tests for the URL-to-item-ID normaliser."""
def test_itm_url_with_title_slug(self):
url = "https://www.ebay.com/itm/Sony-WH-1000XM5-Headphones/123456789012"
assert _extract_ebay_item_id(url) == "123456789012"
def test_itm_url_without_title_slug(self):
url = "https://www.ebay.com/itm/123456789012"
assert _extract_ebay_item_id(url) == "123456789012"
def test_itm_url_no_www(self):
url = "https://ebay.com/itm/123456789012"
assert _extract_ebay_item_id(url) == "123456789012"
def test_itm_url_with_query_params(self):
url = "https://www.ebay.com/itm/123456789012?hash=item1234abcd"
assert _extract_ebay_item_id(url) == "123456789012"
def test_pay_ebay_rxo_with_itemId_query_param(self):
url = "https://pay.ebay.com/rxo?action=view&sessionid=abc123&itemId=123456789012"
assert _extract_ebay_item_id(url) == "123456789012"
def test_pay_ebay_rxo_path_with_itemId(self):
url = "https://pay.ebay.com/rxo/view?itemId=123456789012"
assert _extract_ebay_item_id(url) == "123456789012"
def test_non_ebay_url_returns_none(self):
assert _extract_ebay_item_id("https://amazon.com/dp/B08N5WRWNW") is None
def test_plain_keyword_returns_none(self):
assert _extract_ebay_item_id("rtx 4090 gpu") is None
def test_empty_string_returns_none(self):
assert _extract_ebay_item_id("") is None
def test_ebay_url_no_item_id_returns_none(self):
assert _extract_ebay_item_id("https://www.ebay.com/sch/i.html?_nkw=gpu") is None
def test_pay_ebay_no_item_id_returns_none(self):
assert _extract_ebay_item_id("https://pay.ebay.com/rxo?action=view&sessionid=abc") is None

View file

@ -3,18 +3,16 @@
Uses a minimal HTML fixture mirroring eBay's current s-card markup. Uses a minimal HTML fixture mirroring eBay's current s-card markup.
No HTTP requests are made all tests operate on the pure parsing functions. No HTTP requests are made all tests operate on the pure parsing functions.
""" """
from datetime import timedelta
import pytest import pytest
from bs4 import BeautifulSoup from datetime import timedelta
from app.platforms.ebay.scraper import ( from app.platforms.ebay.scraper import (
_extract_seller_from_card,
_parse_price,
_parse_time_left,
scrape_listings, scrape_listings,
scrape_sellers, scrape_sellers,
_parse_price,
_parse_time_left,
_extract_seller_from_card,
) )
from bs4 import BeautifulSoup
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Minimal eBay search results HTML fixture (li.s-card schema) # Minimal eBay search results HTML fixture (li.s-card schema)

View file

@ -1,83 +0,0 @@
"""Integration tests for POST /api/search/build."""
from __future__ import annotations
import json
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from fastapi.testclient import TestClient
@pytest.fixture
def client(tmp_path):
"""TestClient with a fresh DB and mocked LLMRouter/category cache."""
import os
os.environ["SNIPE_DB"] = str(tmp_path / "snipe.db")
# Import app AFTER setting SNIPE_DB so the DB path is picked up
from api.main import app
return TestClient(app, raise_server_exceptions=False)
def _good_llm_response() -> str:
return json.dumps({
"base_query": "RTX 3080",
"must_include_mode": "groups",
"must_include": "rtx|geforce, 3080",
"must_exclude": "mining",
"max_price": 300.0,
"min_price": None,
"condition": ["used"],
"category_id": "27386",
"explanation": "Used RTX 3080 under $300.",
})
def test_build_endpoint_success(client):
with patch("api.main._get_query_translator") as mock_get_t:
mock_t = MagicMock()
from app.llm.query_translator import SearchParamsResponse
mock_t.translate.return_value = SearchParamsResponse(
base_query="RTX 3080",
must_include_mode="groups",
must_include="rtx|geforce, 3080",
must_exclude="mining",
max_price=300.0,
min_price=None,
condition=["used"],
category_id="27386",
explanation="Used RTX 3080 under $300.",
)
mock_get_t.return_value = mock_t
resp = client.post(
"/api/search/build",
json={"natural_language": "used RTX 3080 under $300 no mining"},
)
assert resp.status_code == 200
data = resp.json()
assert data["base_query"] == "RTX 3080"
assert data["explanation"] == "Used RTX 3080 under $300."
def test_build_endpoint_llm_unavailable(client):
with patch("api.main._get_query_translator") as mock_get_t:
mock_get_t.return_value = None # no translator configured
resp = client.post(
"/api/search/build",
json={"natural_language": "GPU"},
)
assert resp.status_code == 503
def test_build_endpoint_bad_json(client):
with patch("api.main._get_query_translator") as mock_get_t:
from app.llm.query_translator import QueryTranslatorError
mock_t = MagicMock()
mock_t.translate.side_effect = QueryTranslatorError("unparseable", raw="garbage output")
mock_get_t.return_value = mock_t
resp = client.post(
"/api/search/build",
json={"natural_language": "GPU"},
)
assert resp.status_code == 422
assert "raw" in resp.json()["detail"]

View file

@ -1,231 +0,0 @@
"""Tests for GET /api/search/async (fire-and-forget search + SSE streaming).
Verifies:
- Returns HTTP 202 with session_id and status: "queued"
- session_id is registered in _update_queues immediately
- Actual scraping is not performed (mocked out)
- Empty query path returns a completed session with done event
"""
from __future__ import annotations
import os
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from fastapi.testclient import TestClient
# ── Fixtures ──────────────────────────────────────────────────────────────────
@pytest.fixture
def client(tmp_path):
"""TestClient with a fresh tmp DB. Must set SNIPE_DB *before* importing app."""
os.environ["SNIPE_DB"] = str(tmp_path / "snipe.db")
from api.main import app
return TestClient(app, raise_server_exceptions=False)
def _make_mock_listing():
"""Return a minimal mock listing object that satisfies the search pipeline."""
m = MagicMock()
m.platform_listing_id = "123456789"
m.seller_platform_id = "test_seller"
m.title = "Test GPU"
m.price = 100.0
m.currency = "USD"
m.condition = "Used"
m.url = "https://www.ebay.com/itm/123456789"
m.photo_urls = []
m.listing_age_days = 5
m.buying_format = "fixed_price"
m.ends_at = None
m.fetched_at = None
m.trust_score_id = None
m.id = 1
m.category_name = None
return m
# ── Core contract tests ───────────────────────────────────────────────────────
def test_async_search_returns_202(client):
"""GET /api/search/async?q=... returns HTTP 202 with session_id and status."""
with (
patch("api.main._make_adapter") as mock_adapter_factory,
patch("api.main._trigger_scraper_enrichment"),
patch("api.main.TrustScorer") as mock_scorer_cls,
):
mock_adapter = MagicMock()
mock_adapter.search.return_value = []
mock_adapter.get_completed_sales.return_value = None
mock_adapter_factory.return_value = mock_adapter
mock_scorer = MagicMock()
mock_scorer.score_batch.return_value = []
mock_scorer_cls.return_value = mock_scorer
resp = client.get("/api/search/async?q=test+gpu")
assert resp.status_code == 202
data = resp.json()
assert "session_id" in data
assert data["status"] == "queued"
assert isinstance(data["session_id"], str)
assert len(data["session_id"]) > 0
def test_async_search_registers_session_id(client):
"""session_id returned by 202 response must appear in _update_queues immediately."""
with (
patch("api.main._make_adapter") as mock_adapter_factory,
patch("api.main._trigger_scraper_enrichment"),
patch("api.main.TrustScorer") as mock_scorer_cls,
):
mock_adapter = MagicMock()
mock_adapter.search.return_value = []
mock_adapter.get_completed_sales.return_value = None
mock_adapter_factory.return_value = mock_adapter
mock_scorer = MagicMock()
mock_scorer.score_batch.return_value = []
mock_scorer_cls.return_value = mock_scorer
resp = client.get("/api/search/async?q=test+gpu")
assert resp.status_code == 202
session_id = resp.json()["session_id"]
# The queue must be registered so the SSE endpoint can open it.
from api.main import _update_queues
assert session_id in _update_queues
def test_async_search_empty_query(client):
"""Empty query returns 202 with a pre-loaded done sentinel, no scraping needed."""
resp = client.get("/api/search/async?q=")
assert resp.status_code == 202
data = resp.json()
assert data["status"] == "queued"
assert "session_id" in data
from api.main import _update_queues
import queue as _queue
sid = data["session_id"]
assert sid in _update_queues
q = _update_queues[sid]
# First item should be the empty listings event
first = q.get_nowait()
assert first is not None
assert first["type"] == "listings"
assert first["listings"] == []
# Second item should be the sentinel
sentinel = q.get_nowait()
assert sentinel is None
def test_async_search_no_real_chromium(client):
"""Async search endpoint must not launch real Chromium in tests.
Verifies that the background scraper is submitted to the executor but the
adapter factory is patched no real Playwright/Xvfb process is spawned.
Uses a broad patch on Store to avoid sqlite3 DB path issues in the thread pool.
"""
import threading
scrape_called = threading.Event()
def _fake_search(query, filters):
scrape_called.set()
return []
with (
patch("api.main._make_adapter") as mock_adapter_factory,
patch("api.main._trigger_scraper_enrichment"),
patch("api.main.TrustScorer") as mock_scorer_cls,
patch("api.main.Store") as mock_store_cls,
):
mock_adapter = MagicMock()
mock_adapter.search.side_effect = _fake_search
mock_adapter.get_completed_sales.return_value = None
mock_adapter_factory.return_value = mock_adapter
mock_scorer = MagicMock()
mock_scorer.score_batch.return_value = []
mock_scorer_cls.return_value = mock_scorer
mock_store = MagicMock()
mock_store.get_listings_staged.return_value = {}
mock_store.refresh_seller_categories.return_value = 0
mock_store.save_listings.return_value = None
mock_store.save_trust_scores.return_value = None
mock_store.get_market_comp.return_value = None
mock_store.get_seller.return_value = None
mock_store.get_user_preference.return_value = None
mock_store_cls.return_value = mock_store
resp = client.get("/api/search/async?q=rtx+3080")
assert resp.status_code == 202
# Give the background worker a moment to run (it's in a thread pool)
scrape_called.wait(timeout=5.0)
# If we get here without a real Playwright process, the test passes.
assert scrape_called.is_set(), "Background search worker never ran"
def test_async_search_query_params_forwarded(client):
"""All filter params accepted by /api/search are also accepted here."""
with (
patch("api.main._make_adapter") as mock_adapter_factory,
patch("api.main._trigger_scraper_enrichment"),
patch("api.main.TrustScorer") as mock_scorer_cls,
):
mock_adapter = MagicMock()
mock_adapter.search.return_value = []
mock_adapter.get_completed_sales.return_value = None
mock_adapter_factory.return_value = mock_adapter
mock_scorer = MagicMock()
mock_scorer.score_batch.return_value = []
mock_scorer_cls.return_value = mock_scorer
resp = client.get(
"/api/search/async"
"?q=rtx+3080"
"&max_price=400"
"&min_price=100"
"&pages=2"
"&must_include=rtx,3080"
"&must_include_mode=all"
"&must_exclude=mining"
"&category_id=27386"
"&adapter=auto"
)
assert resp.status_code == 202
def test_async_search_session_id_is_uuid(client):
"""session_id must be a valid UUID v4 string."""
import uuid as _uuid
with (
patch("api.main._make_adapter") as mock_adapter_factory,
patch("api.main._trigger_scraper_enrichment"),
patch("api.main.TrustScorer") as mock_scorer_cls,
):
mock_adapter = MagicMock()
mock_adapter.search.return_value = []
mock_adapter.get_completed_sales.return_value = None
mock_adapter_factory.return_value = mock_adapter
mock_scorer = MagicMock()
mock_scorer.score_batch.return_value = []
mock_scorer_cls.return_value = mock_scorer
resp = client.get("/api/search/async?q=test")
assert resp.status_code == 202
sid = resp.json()["session_id"]
# Should not raise if it's a valid UUID
parsed = _uuid.UUID(sid)
assert str(parsed) == sid

View file

@ -1,218 +0,0 @@
"""Unit tests for EbayCategoryCache."""
from __future__ import annotations
import sqlite3
from datetime import datetime, timedelta, timezone
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from app.platforms.ebay.categories import EbayCategoryCache
BOOTSTRAP_MIN = 10 # bootstrap must seed at least this many rows
@pytest.fixture
def db(tmp_path):
"""In-memory SQLite with migrations applied."""
from circuitforge_core.db import get_connection, run_migrations
conn = get_connection(tmp_path / "test.db")
run_migrations(conn, Path("app/db/migrations"))
return conn
def test_is_stale_empty_db(db):
cache = EbayCategoryCache(db)
assert cache.is_stale() is True
def test_is_stale_fresh(db):
now = datetime.now(timezone.utc).isoformat()
db.execute(
"INSERT INTO ebay_categories (category_id, name, full_path, is_leaf, refreshed_at)"
" VALUES (?, ?, ?, 1, ?)",
("12345", "Graphics Cards", "Consumer Electronics > GPUs > Graphics Cards", now),
)
db.commit()
cache = EbayCategoryCache(db)
assert cache.is_stale() is False
def test_is_stale_old(db):
old = (datetime.now(timezone.utc) - timedelta(days=8)).isoformat()
db.execute(
"INSERT INTO ebay_categories (category_id, name, full_path, is_leaf, refreshed_at)"
" VALUES (?, ?, ?, 1, ?)",
("12345", "Graphics Cards", "Consumer Electronics > GPUs > Graphics Cards", old),
)
db.commit()
cache = EbayCategoryCache(db)
assert cache.is_stale() is True
def test_seed_bootstrap_populates_rows(db):
cache = EbayCategoryCache(db)
cache._seed_bootstrap()
cur = db.execute("SELECT COUNT(*) FROM ebay_categories")
count = cur.fetchone()[0]
assert count >= BOOTSTRAP_MIN
def test_get_relevant_keyword_match(db):
cache = EbayCategoryCache(db)
cache._seed_bootstrap()
results = cache.get_relevant(["GPU", "graphics"], limit=5)
ids = [r[0] for r in results]
assert "27386" in ids # Graphics Cards
def test_get_relevant_no_match(db):
cache = EbayCategoryCache(db)
cache._seed_bootstrap()
results = cache.get_relevant(["zzznomatch_xyzxyz"], limit=5)
assert results == []
def test_get_relevant_respects_limit(db):
cache = EbayCategoryCache(db)
cache._seed_bootstrap()
results = cache.get_relevant(["electronics"], limit=3)
assert len(results) <= 3
def test_get_all_for_prompt_returns_rows(db):
cache = EbayCategoryCache(db)
cache._seed_bootstrap()
results = cache.get_all_for_prompt(limit=10)
assert len(results) > 0
# Each entry is (category_id, full_path)
assert all(len(r) == 2 for r in results)
def _make_tree_response() -> dict:
"""Minimal eBay Taxonomy API tree response with two leaf nodes."""
return {
"categoryTreeId": "0",
"rootCategoryNode": {
"category": {"categoryId": "6000", "categoryName": "Root"},
"leafCategoryTreeNode": False,
"childCategoryTreeNodes": [
{
"category": {"categoryId": "6001", "categoryName": "Electronics"},
"leafCategoryTreeNode": False,
"childCategoryTreeNodes": [
{
"category": {"categoryId": "6002", "categoryName": "GPUs"},
"leafCategoryTreeNode": True,
"childCategoryTreeNodes": [],
},
{
"category": {"categoryId": "6003", "categoryName": "CPUs"},
"leafCategoryTreeNode": True,
"childCategoryTreeNodes": [],
},
],
}
],
},
}
def test_refresh_inserts_leaf_nodes(db):
mock_tm = MagicMock()
mock_tm.get_token.return_value = "fake-token"
tree_resp = MagicMock()
tree_resp.raise_for_status = MagicMock()
tree_resp.json.return_value = _make_tree_response()
id_resp = MagicMock()
id_resp.raise_for_status = MagicMock()
id_resp.json.return_value = {"categoryTreeId": "0"}
with patch("app.platforms.ebay.categories.requests.get") as mock_get:
mock_get.side_effect = [id_resp, tree_resp]
cache = EbayCategoryCache(db)
count = cache.refresh(mock_tm)
assert count == 2 # two leaf nodes in our fake tree
cur = db.execute("SELECT category_id FROM ebay_categories ORDER BY category_id")
ids = {row[0] for row in cur.fetchall()}
assert "6002" in ids
assert "6003" in ids
def test_refresh_no_token_manager_seeds_bootstrap(db):
cache = EbayCategoryCache(db)
count = cache.refresh(token_manager=None)
assert count >= BOOTSTRAP_MIN
def test_refresh_api_error_logs_warning(db, caplog):
import logging
mock_tm = MagicMock()
mock_tm.get_token.return_value = "fake-token"
with patch("app.platforms.ebay.categories.requests.get") as mock_get:
mock_get.side_effect = Exception("network error")
cache = EbayCategoryCache(db)
with caplog.at_level(logging.WARNING, logger="app.platforms.ebay.categories"):
count = cache.refresh(mock_tm)
# Falls back to bootstrap on API error
assert count >= BOOTSTRAP_MIN
def test_refresh_publishes_to_community_when_creds_available(db):
"""After a successful Taxonomy API refresh, categories are published to community store."""
mock_tm = MagicMock()
mock_tm.get_token.return_value = "fake-token"
id_resp = MagicMock()
id_resp.raise_for_status = MagicMock()
id_resp.json.return_value = {"categoryTreeId": "0"}
tree_resp = MagicMock()
tree_resp.raise_for_status = MagicMock()
tree_resp.json.return_value = _make_tree_response()
mock_community = MagicMock()
mock_community.publish_categories.return_value = 2
with patch("app.platforms.ebay.categories.requests.get") as mock_get:
mock_get.side_effect = [id_resp, tree_resp]
cache = EbayCategoryCache(db)
cache.refresh(mock_tm, community_store=mock_community)
mock_community.publish_categories.assert_called_once()
published = mock_community.publish_categories.call_args[0][0]
assert len(published) == 2
def test_refresh_fetches_from_community_when_no_creds(db):
"""Without creds, community categories are used when available (>= 10 rows)."""
mock_community = MagicMock()
mock_community.fetch_categories.return_value = [
(str(i), f"Cat {i}", f"Path > Cat {i}") for i in range(15)
]
cache = EbayCategoryCache(db)
count = cache.refresh(token_manager=None, community_store=mock_community)
assert count == 15
cur = db.execute("SELECT COUNT(*) FROM ebay_categories")
assert cur.fetchone()[0] == 15
def test_refresh_falls_back_to_bootstrap_when_community_sparse(db):
"""Falls back to bootstrap if community returns fewer than 10 rows."""
mock_community = MagicMock()
mock_community.fetch_categories.return_value = [
("1", "Only One", "Path > Only One")
]
cache = EbayCategoryCache(db)
count = cache.refresh(token_manager=None, community_store=mock_community)
assert count >= BOOTSTRAP_MIN

Some files were not shown because too many files have changed in this diff Show more