diff --git a/app/db/migrations/003_nullable_account_age.sql b/app/db/migrations/003_nullable_account_age.sql new file mode 100644 index 0000000..d35090e --- /dev/null +++ b/app/db/migrations/003_nullable_account_age.sql @@ -0,0 +1,23 @@ +-- Make account_age_days nullable — scraper tier cannot fetch it without +-- following each seller's profile link, so NULL means "not yet fetched" +-- rather than "genuinely zero days old". This prevents false new_account +-- flags for all scraped listings. +-- +-- SQLite doesn't support ALTER COLUMN, so we recreate the sellers table. + +CREATE TABLE sellers_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + platform TEXT NOT NULL, + platform_seller_id TEXT NOT NULL, + username TEXT NOT NULL, + account_age_days INTEGER, -- NULL = not yet fetched + feedback_count INTEGER NOT NULL, + feedback_ratio REAL NOT NULL, + category_history_json TEXT NOT NULL DEFAULT '{}', + fetched_at TEXT DEFAULT CURRENT_TIMESTAMP, + UNIQUE(platform, platform_seller_id) +); + +INSERT INTO sellers_new SELECT * FROM sellers; +DROP TABLE sellers; +ALTER TABLE sellers_new RENAME TO sellers; diff --git a/app/db/models.py b/app/db/models.py index 4c47269..37ca916 100644 --- a/app/db/models.py +++ b/app/db/models.py @@ -9,7 +9,7 @@ class Seller: platform: str platform_seller_id: str username: str - account_age_days: int + account_age_days: Optional[int] # None = not yet fetched (scraper tier) feedback_count: int feedback_ratio: float # 0.0–1.0 category_history_json: str # JSON blob of past category sales diff --git a/app/platforms/ebay/scraper.py b/app/platforms/ebay/scraper.py index f465345..64ecd5d 100644 --- a/app/platforms/ebay/scraper.py +++ b/app/platforms/ebay/scraper.py @@ -202,7 +202,7 @@ def scrape_sellers(html: str) -> dict[str, Seller]: platform="ebay", platform_seller_id=username, username=username, - account_age_days=0, # not available from search HTML + account_age_days=None, # not fetched at scraper tier feedback_count=count, feedback_ratio=ratio, category_history_json="{}", # not available from search HTML diff --git a/app/trust/aggregator.py b/app/trust/aggregator.py index cffc3cc..83d7d5d 100644 --- a/app/trust/aggregator.py +++ b/app/trust/aggregator.py @@ -24,7 +24,7 @@ class Aggregator: red_flags: list[str] = [] # Hard filters - if seller and seller.account_age_days < HARD_FILTER_AGE_DAYS: + if seller and seller.account_age_days is not None and seller.account_age_days < HARD_FILTER_AGE_DAYS: red_flags.append("new_account") if seller and ( seller.feedback_ratio < HARD_FILTER_BAD_RATIO_THRESHOLD @@ -33,7 +33,7 @@ class Aggregator: red_flags.append("established_bad_actor") # Soft flags - if seller and seller.account_age_days < 30: + if seller and seller.account_age_days is not None and seller.account_age_days < 30: red_flags.append("account_under_30_days") if seller and seller.feedback_count < 10: red_flags.append("low_feedback_count") diff --git a/app/trust/metadata.py b/app/trust/metadata.py index 4231719..689b4ce 100644 --- a/app/trust/metadata.py +++ b/app/trust/metadata.py @@ -15,7 +15,7 @@ class MetadataScorer: listing_price: float, ) -> dict[str, Optional[int]]: return { - "account_age": self._account_age(seller.account_age_days), + "account_age": self._account_age(seller.account_age_days) if seller.account_age_days is not None else None, "feedback_count": self._feedback_count(seller.feedback_count), "feedback_ratio": self._feedback_ratio(seller.feedback_ratio, seller.feedback_count), "price_vs_market": self._price_vs_market(listing_price, market_median), diff --git a/tests/platforms/test_ebay_scraper.py b/tests/platforms/test_ebay_scraper.py index 9df1d6d..8e9a6e5 100644 --- a/tests/platforms/test_ebay_scraper.py +++ b/tests/platforms/test_ebay_scraper.py @@ -267,10 +267,10 @@ class TestScrapeSellers: assert len(sellers) == 1 assert "repeatguy" in sellers - def test_account_age_always_zero(self): - """account_age_days is 0 from scraper — causes score_is_partial=True.""" + def test_account_age_is_none(self): + """account_age_days is None from scraper tier — causes score_is_partial=True.""" sellers = scrape_sellers(_EBAY_HTML) - assert all(s.account_age_days == 0 for s in sellers.values()) + assert all(s.account_age_days is None for s in sellers.values()) def test_category_history_always_empty(self): """category_history_json is '{}' from scraper — causes score_is_partial=True."""