fix: account_age_days=None for scraper tier, stop false new_account flags

Scraper can't fetch seller profile age without following each listing's
seller link. Using 0 as sentinel caused every scraped seller to trigger
new_account and account_under_30_days red flags erroneously.

- Seller.account_age_days: int → Optional[int] (None = not yet fetched)
- Migration 003: recreate sellers table without NOT NULL constraint
- MetadataScorer: return None for unknown age → score_is_partial=True
- Aggregator: gate age flags on is not None
- Scraper: account_age_days=None instead of 0
This commit is contained in:
pyr0ball 2026-03-25 20:36:43 -07:00
parent 58263d814a
commit 2ab41219f8
6 changed files with 31 additions and 8 deletions

View file

@ -0,0 +1,23 @@
-- Make account_age_days nullable — scraper tier cannot fetch it without
-- following each seller's profile link, so NULL means "not yet fetched"
-- rather than "genuinely zero days old". This prevents false new_account
-- flags for all scraped listings.
--
-- SQLite doesn't support ALTER COLUMN, so we recreate the sellers table.
CREATE TABLE sellers_new (
id INTEGER PRIMARY KEY AUTOINCREMENT,
platform TEXT NOT NULL,
platform_seller_id TEXT NOT NULL,
username TEXT NOT NULL,
account_age_days INTEGER, -- NULL = not yet fetched
feedback_count INTEGER NOT NULL,
feedback_ratio REAL NOT NULL,
category_history_json TEXT NOT NULL DEFAULT '{}',
fetched_at TEXT DEFAULT CURRENT_TIMESTAMP,
UNIQUE(platform, platform_seller_id)
);
INSERT INTO sellers_new SELECT * FROM sellers;
DROP TABLE sellers;
ALTER TABLE sellers_new RENAME TO sellers;

View file

@ -9,7 +9,7 @@ class Seller:
platform: str platform: str
platform_seller_id: str platform_seller_id: str
username: str username: str
account_age_days: int account_age_days: Optional[int] # None = not yet fetched (scraper tier)
feedback_count: int feedback_count: int
feedback_ratio: float # 0.01.0 feedback_ratio: float # 0.01.0
category_history_json: str # JSON blob of past category sales category_history_json: str # JSON blob of past category sales

View file

@ -202,7 +202,7 @@ def scrape_sellers(html: str) -> dict[str, Seller]:
platform="ebay", platform="ebay",
platform_seller_id=username, platform_seller_id=username,
username=username, username=username,
account_age_days=0, # not available from search HTML account_age_days=None, # not fetched at scraper tier
feedback_count=count, feedback_count=count,
feedback_ratio=ratio, feedback_ratio=ratio,
category_history_json="{}", # not available from search HTML category_history_json="{}", # not available from search HTML

View file

@ -24,7 +24,7 @@ class Aggregator:
red_flags: list[str] = [] red_flags: list[str] = []
# Hard filters # Hard filters
if seller and seller.account_age_days < HARD_FILTER_AGE_DAYS: if seller and seller.account_age_days is not None and seller.account_age_days < HARD_FILTER_AGE_DAYS:
red_flags.append("new_account") red_flags.append("new_account")
if seller and ( if seller and (
seller.feedback_ratio < HARD_FILTER_BAD_RATIO_THRESHOLD seller.feedback_ratio < HARD_FILTER_BAD_RATIO_THRESHOLD
@ -33,7 +33,7 @@ class Aggregator:
red_flags.append("established_bad_actor") red_flags.append("established_bad_actor")
# Soft flags # Soft flags
if seller and seller.account_age_days < 30: if seller and seller.account_age_days is not None and seller.account_age_days < 30:
red_flags.append("account_under_30_days") red_flags.append("account_under_30_days")
if seller and seller.feedback_count < 10: if seller and seller.feedback_count < 10:
red_flags.append("low_feedback_count") red_flags.append("low_feedback_count")

View file

@ -15,7 +15,7 @@ class MetadataScorer:
listing_price: float, listing_price: float,
) -> dict[str, Optional[int]]: ) -> dict[str, Optional[int]]:
return { return {
"account_age": self._account_age(seller.account_age_days), "account_age": self._account_age(seller.account_age_days) if seller.account_age_days is not None else None,
"feedback_count": self._feedback_count(seller.feedback_count), "feedback_count": self._feedback_count(seller.feedback_count),
"feedback_ratio": self._feedback_ratio(seller.feedback_ratio, seller.feedback_count), "feedback_ratio": self._feedback_ratio(seller.feedback_ratio, seller.feedback_count),
"price_vs_market": self._price_vs_market(listing_price, market_median), "price_vs_market": self._price_vs_market(listing_price, market_median),

View file

@ -267,10 +267,10 @@ class TestScrapeSellers:
assert len(sellers) == 1 assert len(sellers) == 1
assert "repeatguy" in sellers assert "repeatguy" in sellers
def test_account_age_always_zero(self): def test_account_age_is_none(self):
"""account_age_days is 0 from scraper — causes score_is_partial=True.""" """account_age_days is None from scraper tier — causes score_is_partial=True."""
sellers = scrape_sellers(_EBAY_HTML) sellers = scrape_sellers(_EBAY_HTML)
assert all(s.account_age_days == 0 for s in sellers.values()) assert all(s.account_age_days is None for s in sellers.values())
def test_category_history_always_empty(self): def test_category_history_always_empty(self):
"""category_history_json is '{}' from scraper — causes score_is_partial=True.""" """category_history_json is '{}' from scraper — causes score_is_partial=True."""