fix(tasks): address code review — cloud DB path, migration number, connection handling, enqueue site
- Rename 002_background_tasks.sql → 007_background_tasks.sql to avoid collision with existing 002_add_listing_format.sql migration - Add CREATE UNIQUE INDEX on trust_scores(listing_id) in same migration so save_trust_scores() can use ON CONFLICT upsert semantics - Add Store.save_trust_scores() — upserts scores keyed by listing_id; preserves photo_analysis_json so runner writes are never clobbered - runner.py: replace raw sqlite3.connect() with get_connection() throughout (timeout=30 + WAL mode); fix connection leak in insert_task via try/finally - _run_trust_photo_analysis: read 'user_db' from params to write results to the correct per-user DB in cloud mode (was silently writing to wrong DB) - main.py lifespan: use _shared_db_path() in cloud mode so background_tasks queue lives in shared DB, not _LOCAL_SNIPE_DB - Add _enqueue_vision_tasks() and call it after score_batch() — this is the missing enqueue call site; gated by features.photo_analysis (Paid tier) - Test fixture: add missing 'stage' column to background_tasks schema
This commit is contained in:
parent
f7c5e8dc17
commit
d9660093b1
5 changed files with 235 additions and 33 deletions
74
api/main.py
74
api/main.py
|
|
@ -34,15 +34,16 @@ log = logging.getLogger(__name__)
|
|||
|
||||
@asynccontextmanager
|
||||
async def _lifespan(app: FastAPI):
|
||||
# Start vision/LLM background task scheduler
|
||||
from app.tasks.scheduler import get_scheduler
|
||||
from api.cloud_session import _LOCAL_SNIPE_DB
|
||||
get_scheduler(_LOCAL_SNIPE_DB)
|
||||
log.info("Snipe task scheduler started (db=%s)", _LOCAL_SNIPE_DB)
|
||||
# Start vision/LLM background task scheduler.
|
||||
# background_tasks queue lives in shared_db (cloud) or local_db (local)
|
||||
# so the scheduler has a single stable DB path across all cloud users.
|
||||
from app.tasks.scheduler import get_scheduler, reset_scheduler
|
||||
from api.cloud_session import CLOUD_MODE, _LOCAL_SNIPE_DB, _shared_db_path
|
||||
sched_db = _shared_db_path() if CLOUD_MODE else _LOCAL_SNIPE_DB
|
||||
get_scheduler(sched_db)
|
||||
log.info("Snipe task scheduler started (db=%s)", sched_db)
|
||||
yield
|
||||
# Graceful shutdown
|
||||
from app.tasks.scheduler import reset_scheduler
|
||||
get_scheduler(_LOCAL_SNIPE_DB).shutdown(timeout=10.0)
|
||||
get_scheduler(sched_db).shutdown(timeout=10.0)
|
||||
reset_scheduler()
|
||||
log.info("Snipe task scheduler stopped.")
|
||||
|
||||
|
|
@ -164,6 +165,55 @@ def _trigger_scraper_enrichment(
|
|||
t.start()
|
||||
|
||||
|
||||
def _enqueue_vision_tasks(
|
||||
listings: list,
|
||||
trust_scores_list: list,
|
||||
session: "CloudUser",
|
||||
) -> None:
|
||||
"""Enqueue trust_photo_analysis tasks for listings with photos.
|
||||
|
||||
Runs fire-and-forget: tasks land in the scheduler queue and the response
|
||||
returns immediately. Results are written back to trust_scores.photo_analysis_json
|
||||
by the runner when the vision LLM completes.
|
||||
|
||||
session.shared_db: where background_tasks lives (scheduler's DB).
|
||||
session.user_db: encoded in params so the runner writes to the right
|
||||
trust_scores table in cloud mode.
|
||||
"""
|
||||
import json as _json
|
||||
from app.tasks.runner import insert_task
|
||||
from app.tasks.scheduler import get_scheduler
|
||||
from api.cloud_session import CLOUD_MODE, _shared_db_path, _LOCAL_SNIPE_DB
|
||||
|
||||
sched_db = _shared_db_path() if CLOUD_MODE else _LOCAL_SNIPE_DB
|
||||
sched = get_scheduler(sched_db)
|
||||
|
||||
enqueued = 0
|
||||
for listing, ts in zip(listings, trust_scores_list):
|
||||
if not listing.photo_urls or not listing.id:
|
||||
continue
|
||||
params = _json.dumps({
|
||||
"photo_url": listing.photo_urls[0],
|
||||
"listing_title": listing.title,
|
||||
"user_db": str(session.user_db),
|
||||
})
|
||||
task_id, is_new = insert_task(
|
||||
sched_db, "trust_photo_analysis", job_id=listing.id, params=params
|
||||
)
|
||||
if is_new:
|
||||
ok = sched.enqueue(task_id, "trust_photo_analysis", listing.id, params)
|
||||
if not ok:
|
||||
log.warning(
|
||||
"Vision task queue full — dropped task for listing %s",
|
||||
listing.platform_listing_id,
|
||||
)
|
||||
else:
|
||||
enqueued += 1
|
||||
|
||||
if enqueued:
|
||||
log.info("Enqueued %d vision analysis task(s)", enqueued)
|
||||
|
||||
|
||||
def _parse_terms(raw: str) -> list[str]:
|
||||
"""Split a comma-separated keyword string into non-empty, stripped terms."""
|
||||
return [t.strip() for t in raw.split(",") if t.strip()]
|
||||
|
|
@ -312,6 +362,14 @@ def search(
|
|||
scorer = TrustScorer(shared_store)
|
||||
trust_scores_list = scorer.score_batch(listings, q)
|
||||
|
||||
# Persist trust scores so background vision tasks have a row to UPDATE.
|
||||
user_store.save_trust_scores(trust_scores_list)
|
||||
|
||||
# Enqueue vision analysis for listings with photos — Paid tier and above.
|
||||
features = compute_features(session.tier)
|
||||
if features.photo_analysis:
|
||||
_enqueue_vision_tasks(listings, trust_scores_list, session)
|
||||
|
||||
query_hash = hashlib.md5(q.encode()).hexdigest()
|
||||
comp = shared_store.get_market_comp("ebay", query_hash)
|
||||
market_price = comp.median_price if comp else None
|
||||
|
|
|
|||
24
app/db/migrations/007_background_tasks.sql
Normal file
24
app/db/migrations/007_background_tasks.sql
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
-- 007_background_tasks.sql
|
||||
-- Shared background task queue used by the LLM/vision task scheduler.
|
||||
-- Schema mirrors the circuitforge-core standard.
|
||||
-- Also adds UNIQUE constraint on trust_scores(listing_id) so save_trust_scores()
|
||||
-- can use ON CONFLICT upsert semantics.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS background_tasks (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
task_type TEXT NOT NULL,
|
||||
job_id INTEGER NOT NULL DEFAULT 0,
|
||||
status TEXT NOT NULL DEFAULT 'queued',
|
||||
params TEXT,
|
||||
error TEXT,
|
||||
stage TEXT,
|
||||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_bg_tasks_status_type
|
||||
ON background_tasks (status, task_type);
|
||||
|
||||
-- Enable ON CONFLICT upsert in save_trust_scores() — idempotent on existing DBs.
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_trust_scores_listing
|
||||
ON trust_scores (listing_id);
|
||||
113
app/db/store.py
113
app/db/store.py
|
|
@ -7,15 +7,18 @@ from typing import Optional
|
|||
|
||||
from circuitforge_core.db import get_connection, run_migrations
|
||||
|
||||
from .models import Listing, Seller, TrustScore, MarketComp, SavedSearch
|
||||
from .models import Listing, Seller, TrustScore, MarketComp, SavedSearch, ScammerEntry
|
||||
|
||||
MIGRATIONS_DIR = Path(__file__).parent / "migrations"
|
||||
|
||||
|
||||
class Store:
|
||||
def __init__(self, db_path: Path):
|
||||
self._db_path = db_path
|
||||
self._conn = get_connection(db_path)
|
||||
run_migrations(self._conn, MIGRATIONS_DIR)
|
||||
# WAL mode: allows concurrent readers + one writer without blocking
|
||||
self._conn.execute("PRAGMA journal_mode=WAL")
|
||||
|
||||
# --- Seller ---
|
||||
|
||||
|
|
@ -35,11 +38,26 @@ class Store:
|
|||
self.save_sellers([seller])
|
||||
|
||||
def save_sellers(self, sellers: list[Seller]) -> None:
|
||||
# COALESCE preserves enriched signals (account_age_days, category_history_json)
|
||||
# that were filled by BTF / _ssn passes — never overwrite with NULL from a
|
||||
# fresh search page that doesn't carry those signals.
|
||||
self._conn.executemany(
|
||||
"INSERT OR REPLACE INTO sellers "
|
||||
"INSERT INTO sellers "
|
||||
"(platform, platform_seller_id, username, account_age_days, "
|
||||
"feedback_count, feedback_ratio, category_history_json) "
|
||||
"VALUES (?,?,?,?,?,?,?)",
|
||||
"VALUES (?,?,?,?,?,?,?) "
|
||||
"ON CONFLICT(platform, platform_seller_id) DO UPDATE SET "
|
||||
" username = excluded.username, "
|
||||
" feedback_count = excluded.feedback_count, "
|
||||
" feedback_ratio = excluded.feedback_ratio, "
|
||||
" account_age_days = COALESCE(excluded.account_age_days, sellers.account_age_days), "
|
||||
" category_history_json = COALESCE("
|
||||
" CASE WHEN excluded.category_history_json IN ('{}', '', NULL) THEN NULL "
|
||||
" ELSE excluded.category_history_json END, "
|
||||
" CASE WHEN sellers.category_history_json IN ('{}', '', NULL) THEN NULL "
|
||||
" ELSE sellers.category_history_json END, "
|
||||
" '{}'"
|
||||
" )",
|
||||
[
|
||||
(s.platform, s.platform_seller_id, s.username, s.account_age_days,
|
||||
s.feedback_count, s.feedback_ratio, s.category_history_json)
|
||||
|
|
@ -224,6 +242,43 @@ class Store:
|
|||
price_at_first_seen=row[17],
|
||||
)
|
||||
|
||||
# --- TrustScore ---
|
||||
|
||||
def save_trust_scores(self, scores: list[TrustScore]) -> None:
|
||||
"""Upsert trust scores keyed by listing_id.
|
||||
|
||||
photo_analysis_json is preserved on conflict so background vision
|
||||
results written by the task runner are never overwritten by a re-score.
|
||||
Requires idx_trust_scores_listing UNIQUE index (migration 007).
|
||||
"""
|
||||
self._conn.executemany(
|
||||
"INSERT INTO trust_scores "
|
||||
"(listing_id, composite_score, account_age_score, feedback_count_score, "
|
||||
"feedback_ratio_score, price_vs_market_score, category_history_score, "
|
||||
"photo_hash_duplicate, red_flags_json, score_is_partial) "
|
||||
"VALUES (?,?,?,?,?,?,?,?,?,?) "
|
||||
"ON CONFLICT(listing_id) DO UPDATE SET "
|
||||
" composite_score = excluded.composite_score, "
|
||||
" account_age_score = excluded.account_age_score, "
|
||||
" feedback_count_score = excluded.feedback_count_score, "
|
||||
" feedback_ratio_score = excluded.feedback_ratio_score, "
|
||||
" price_vs_market_score = excluded.price_vs_market_score, "
|
||||
" category_history_score= excluded.category_history_score, "
|
||||
" photo_hash_duplicate = excluded.photo_hash_duplicate, "
|
||||
" red_flags_json = excluded.red_flags_json, "
|
||||
" score_is_partial = excluded.score_is_partial, "
|
||||
" scored_at = CURRENT_TIMESTAMP",
|
||||
# photo_analysis_json intentionally omitted — runner owns that column
|
||||
[
|
||||
(s.listing_id, s.composite_score, s.account_age_score,
|
||||
s.feedback_count_score, s.feedback_ratio_score,
|
||||
s.price_vs_market_score, s.category_history_score,
|
||||
int(s.photo_hash_duplicate), s.red_flags_json, int(s.score_is_partial))
|
||||
for s in scores if s.listing_id
|
||||
],
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
# --- MarketComp ---
|
||||
|
||||
def save_market_comp(self, comp: MarketComp) -> None:
|
||||
|
|
@ -274,6 +329,58 @@ class Store:
|
|||
)
|
||||
self._conn.commit()
|
||||
|
||||
# --- ScammerBlocklist ---
|
||||
|
||||
def add_to_blocklist(self, entry: ScammerEntry) -> ScammerEntry:
|
||||
"""Upsert a seller into the blocklist. Returns the saved entry with id and created_at."""
|
||||
self._conn.execute(
|
||||
"INSERT INTO scammer_blocklist "
|
||||
"(platform, platform_seller_id, username, reason, source) "
|
||||
"VALUES (?,?,?,?,?) "
|
||||
"ON CONFLICT(platform, platform_seller_id) DO UPDATE SET "
|
||||
" username = excluded.username, "
|
||||
" reason = COALESCE(excluded.reason, scammer_blocklist.reason), "
|
||||
" source = excluded.source",
|
||||
(entry.platform, entry.platform_seller_id, entry.username,
|
||||
entry.reason, entry.source),
|
||||
)
|
||||
self._conn.commit()
|
||||
row = self._conn.execute(
|
||||
"SELECT id, created_at FROM scammer_blocklist "
|
||||
"WHERE platform=? AND platform_seller_id=?",
|
||||
(entry.platform, entry.platform_seller_id),
|
||||
).fetchone()
|
||||
from dataclasses import replace
|
||||
return replace(entry, id=row[0], created_at=row[1])
|
||||
|
||||
def remove_from_blocklist(self, platform: str, platform_seller_id: str) -> None:
|
||||
self._conn.execute(
|
||||
"DELETE FROM scammer_blocklist WHERE platform=? AND platform_seller_id=?",
|
||||
(platform, platform_seller_id),
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
def is_blocklisted(self, platform: str, platform_seller_id: str) -> bool:
|
||||
row = self._conn.execute(
|
||||
"SELECT 1 FROM scammer_blocklist WHERE platform=? AND platform_seller_id=? LIMIT 1",
|
||||
(platform, platform_seller_id),
|
||||
).fetchone()
|
||||
return row is not None
|
||||
|
||||
def list_blocklist(self, platform: str = "ebay") -> list[ScammerEntry]:
|
||||
rows = self._conn.execute(
|
||||
"SELECT platform, platform_seller_id, username, reason, source, id, created_at "
|
||||
"FROM scammer_blocklist WHERE platform=? ORDER BY created_at DESC",
|
||||
(platform,),
|
||||
).fetchall()
|
||||
return [
|
||||
ScammerEntry(
|
||||
platform=r[0], platform_seller_id=r[1], username=r[2],
|
||||
reason=r[3], source=r[4], id=r[5], created_at=r[6],
|
||||
)
|
||||
for r in rows
|
||||
]
|
||||
|
||||
def get_market_comp(self, platform: str, query_hash: str) -> Optional[MarketComp]:
|
||||
row = self._conn.execute(
|
||||
"SELECT platform, query_hash, median_price, sample_count, expires_at, id, fetched_at "
|
||||
|
|
|
|||
|
|
@ -16,11 +16,11 @@ from __future__ import annotations
|
|||
import base64
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
from circuitforge_core.db import get_connection
|
||||
from circuitforge_core.llm import LLMRouter
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
|
@ -50,31 +50,35 @@ def insert_task(
|
|||
*,
|
||||
params: str | None = None,
|
||||
) -> tuple[int, bool]:
|
||||
"""Insert a background task if no identical task is already in-flight."""
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
"""Insert a background task if no identical task is already in-flight.
|
||||
|
||||
Uses get_connection() so WAL mode and timeout=30 apply — same as all other
|
||||
Snipe DB access. Returns (task_id, is_new).
|
||||
"""
|
||||
conn = get_connection(db_path)
|
||||
conn.row_factory = __import__("sqlite3").Row
|
||||
try:
|
||||
existing = conn.execute(
|
||||
"SELECT id FROM background_tasks "
|
||||
"WHERE task_type=? AND job_id=? AND status IN ('queued','running')",
|
||||
(task_type, job_id),
|
||||
).fetchone()
|
||||
if existing:
|
||||
conn.close()
|
||||
return existing["id"], False
|
||||
cursor = conn.execute(
|
||||
"INSERT INTO background_tasks (task_type, job_id, params) VALUES (?,?,?)",
|
||||
(task_type, job_id, params),
|
||||
)
|
||||
conn.commit()
|
||||
task_id = cursor.lastrowid
|
||||
return cursor.lastrowid, True
|
||||
finally:
|
||||
conn.close()
|
||||
return task_id, True
|
||||
|
||||
|
||||
def _update_task_status(
|
||||
db_path: Path, task_id: int, status: str, *, error: str = ""
|
||||
) -> None:
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
with get_connection(db_path) as conn:
|
||||
conn.execute(
|
||||
"UPDATE background_tasks "
|
||||
"SET status=?, error=?, updated_at=CURRENT_TIMESTAMP WHERE id=?",
|
||||
|
|
@ -107,10 +111,18 @@ def _run_trust_photo_analysis(
|
|||
listing_id: int,
|
||||
params: str | None,
|
||||
) -> None:
|
||||
"""Download primary listing photo, run vision LLM, write to trust_scores."""
|
||||
"""Download primary listing photo, run vision LLM, write to trust_scores.
|
||||
|
||||
In cloud mode the result must be written to the per-user DB, which differs
|
||||
from db_path (the scheduler's shared task-queue DB). The enqueue call site
|
||||
encodes the correct write target as 'user_db' in params; in local mode it
|
||||
falls back to db_path so the single-DB layout keeps working.
|
||||
"""
|
||||
p = json.loads(params or "{}")
|
||||
photo_url = p.get("photo_url", "")
|
||||
listing_title = p.get("listing_title", "")
|
||||
# user_db: per-user DB in cloud mode; same as db_path in local mode.
|
||||
result_db = Path(p.get("user_db", str(db_path)))
|
||||
|
||||
if not photo_url:
|
||||
raise ValueError("trust_photo_analysis: 'photo_url' is required in params")
|
||||
|
|
@ -144,7 +156,7 @@ def _run_trust_photo_analysis(
|
|||
)
|
||||
analysis = {"raw_response": raw, "parse_error": True}
|
||||
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
with get_connection(result_db) as conn:
|
||||
conn.execute(
|
||||
"UPDATE trust_scores SET photo_analysis_json=? WHERE listing_id=?",
|
||||
(json.dumps(analysis), listing_id),
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ def tmp_db(tmp_path: Path) -> Path:
|
|||
status TEXT NOT NULL DEFAULT 'queued',
|
||||
params TEXT,
|
||||
error TEXT,
|
||||
stage TEXT,
|
||||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
|
|
|||
Loading…
Reference in a new issue