fix: account_age_days=None for scraper tier, stop false new_account flags
Scraper can't fetch seller profile age without following each listing's seller link. Using 0 as sentinel caused every scraped seller to trigger new_account and account_under_30_days red flags erroneously. - Seller.account_age_days: int → Optional[int] (None = not yet fetched) - Migration 003: recreate sellers table without NOT NULL constraint - MetadataScorer: return None for unknown age → score_is_partial=True - Aggregator: gate age flags on is not None - Scraper: account_age_days=None instead of 0
This commit is contained in:
parent
58263d814a
commit
2ab41219f8
6 changed files with 31 additions and 8 deletions
23
app/db/migrations/003_nullable_account_age.sql
Normal file
23
app/db/migrations/003_nullable_account_age.sql
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
-- Make account_age_days nullable — scraper tier cannot fetch it without
|
||||||
|
-- following each seller's profile link, so NULL means "not yet fetched"
|
||||||
|
-- rather than "genuinely zero days old". This prevents false new_account
|
||||||
|
-- flags for all scraped listings.
|
||||||
|
--
|
||||||
|
-- SQLite doesn't support ALTER COLUMN, so we recreate the sellers table.
|
||||||
|
|
||||||
|
CREATE TABLE sellers_new (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
platform TEXT NOT NULL,
|
||||||
|
platform_seller_id TEXT NOT NULL,
|
||||||
|
username TEXT NOT NULL,
|
||||||
|
account_age_days INTEGER, -- NULL = not yet fetched
|
||||||
|
feedback_count INTEGER NOT NULL,
|
||||||
|
feedback_ratio REAL NOT NULL,
|
||||||
|
category_history_json TEXT NOT NULL DEFAULT '{}',
|
||||||
|
fetched_at TEXT DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
UNIQUE(platform, platform_seller_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO sellers_new SELECT * FROM sellers;
|
||||||
|
DROP TABLE sellers;
|
||||||
|
ALTER TABLE sellers_new RENAME TO sellers;
|
||||||
|
|
@ -9,7 +9,7 @@ class Seller:
|
||||||
platform: str
|
platform: str
|
||||||
platform_seller_id: str
|
platform_seller_id: str
|
||||||
username: str
|
username: str
|
||||||
account_age_days: int
|
account_age_days: Optional[int] # None = not yet fetched (scraper tier)
|
||||||
feedback_count: int
|
feedback_count: int
|
||||||
feedback_ratio: float # 0.0–1.0
|
feedback_ratio: float # 0.0–1.0
|
||||||
category_history_json: str # JSON blob of past category sales
|
category_history_json: str # JSON blob of past category sales
|
||||||
|
|
|
||||||
|
|
@ -202,7 +202,7 @@ def scrape_sellers(html: str) -> dict[str, Seller]:
|
||||||
platform="ebay",
|
platform="ebay",
|
||||||
platform_seller_id=username,
|
platform_seller_id=username,
|
||||||
username=username,
|
username=username,
|
||||||
account_age_days=0, # not available from search HTML
|
account_age_days=None, # not fetched at scraper tier
|
||||||
feedback_count=count,
|
feedback_count=count,
|
||||||
feedback_ratio=ratio,
|
feedback_ratio=ratio,
|
||||||
category_history_json="{}", # not available from search HTML
|
category_history_json="{}", # not available from search HTML
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ class Aggregator:
|
||||||
red_flags: list[str] = []
|
red_flags: list[str] = []
|
||||||
|
|
||||||
# Hard filters
|
# Hard filters
|
||||||
if seller and seller.account_age_days < HARD_FILTER_AGE_DAYS:
|
if seller and seller.account_age_days is not None and seller.account_age_days < HARD_FILTER_AGE_DAYS:
|
||||||
red_flags.append("new_account")
|
red_flags.append("new_account")
|
||||||
if seller and (
|
if seller and (
|
||||||
seller.feedback_ratio < HARD_FILTER_BAD_RATIO_THRESHOLD
|
seller.feedback_ratio < HARD_FILTER_BAD_RATIO_THRESHOLD
|
||||||
|
|
@ -33,7 +33,7 @@ class Aggregator:
|
||||||
red_flags.append("established_bad_actor")
|
red_flags.append("established_bad_actor")
|
||||||
|
|
||||||
# Soft flags
|
# Soft flags
|
||||||
if seller and seller.account_age_days < 30:
|
if seller and seller.account_age_days is not None and seller.account_age_days < 30:
|
||||||
red_flags.append("account_under_30_days")
|
red_flags.append("account_under_30_days")
|
||||||
if seller and seller.feedback_count < 10:
|
if seller and seller.feedback_count < 10:
|
||||||
red_flags.append("low_feedback_count")
|
red_flags.append("low_feedback_count")
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ class MetadataScorer:
|
||||||
listing_price: float,
|
listing_price: float,
|
||||||
) -> dict[str, Optional[int]]:
|
) -> dict[str, Optional[int]]:
|
||||||
return {
|
return {
|
||||||
"account_age": self._account_age(seller.account_age_days),
|
"account_age": self._account_age(seller.account_age_days) if seller.account_age_days is not None else None,
|
||||||
"feedback_count": self._feedback_count(seller.feedback_count),
|
"feedback_count": self._feedback_count(seller.feedback_count),
|
||||||
"feedback_ratio": self._feedback_ratio(seller.feedback_ratio, seller.feedback_count),
|
"feedback_ratio": self._feedback_ratio(seller.feedback_ratio, seller.feedback_count),
|
||||||
"price_vs_market": self._price_vs_market(listing_price, market_median),
|
"price_vs_market": self._price_vs_market(listing_price, market_median),
|
||||||
|
|
|
||||||
|
|
@ -267,10 +267,10 @@ class TestScrapeSellers:
|
||||||
assert len(sellers) == 1
|
assert len(sellers) == 1
|
||||||
assert "repeatguy" in sellers
|
assert "repeatguy" in sellers
|
||||||
|
|
||||||
def test_account_age_always_zero(self):
|
def test_account_age_is_none(self):
|
||||||
"""account_age_days is 0 from scraper — causes score_is_partial=True."""
|
"""account_age_days is None from scraper tier — causes score_is_partial=True."""
|
||||||
sellers = scrape_sellers(_EBAY_HTML)
|
sellers = scrape_sellers(_EBAY_HTML)
|
||||||
assert all(s.account_age_days == 0 for s in sellers.values())
|
assert all(s.account_age_days is None for s in sellers.values())
|
||||||
|
|
||||||
def test_category_history_always_empty(self):
|
def test_category_history_always_empty(self):
|
||||||
"""category_history_json is '{}' from scraper — causes score_is_partial=True."""
|
"""category_history_json is '{}' from scraper — causes score_is_partial=True."""
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue