fix(trust): soften established_bad_actor for high-volume sellers; add declining_ratio flag

Fixes a false-positive edge case (snipe#52) where sellers with 500+ lifetime feedback were hard-flagged as established_bad_actor when the 12-month ratio dipped below 80% — even though the 12-month window may cover only a small recent sample relative to lifetime history. Changes: - established_bad_actor hard filter now only fires for accounts with 20–500 lifetime feedback (unchanged behavior for moderate accounts) - Accounts >500 feedback with ratio 60–80%: new declining_ratio soft flag (composite score penalised but not zeroed, no hard block) - Accounts >500 feedback with ratio <60%: still established_bad_actor (catastrophically bad even for high-volume sellers) - Two new constants: HARD_FILTER_BAD_RATIO_MAX_COUNT=500, HARD_FILTER_BAD_RATIO_HIGH_THRESHOLD=0.60 Note: buyer-feedback-only accounts (lifetime buyer history inflating feedback_count for new sellers) requires profile-page scraping to detect properly — tracked in snipe#52 as medium-term work. Tests: 22 passed
2026-04-27 12:54:51 -07:00 · 2026-04-27 12:54:51 -07:00 · 05f845962f
commit 05f845962f
parent 0354234f86
2 changed files with 61 additions and 5 deletions
--- a/app/trust/aggregator.py
+++ b/app/trust/aggregator.py
@ -11,6 +11,15 @@ HARD_FILTER_AGE_DAYS = 7
 HARD_FILTER_BAD_RATIO_MIN_COUNT = 20
 HARD_FILTER_BAD_RATIO_THRESHOLD = 0.80

+# Above this lifetime count the 12-month ratio may cover only a tiny recent sample,
+# making a hard bad-actor flag disproportionate.  Instead we emit the softer
+# "declining_ratio" flag and let the composite score carry the penalty.
+# Note: buyer-feedback-only accounts (e.g. longtime buyers who start selling) are a
+# related edge case that requires profile-page scraping to detect properly — tracked
+# in snipe#52 as a medium-term fix.
+HARD_FILTER_BAD_RATIO_MAX_COUNT = 500
+HARD_FILTER_BAD_RATIO_HIGH_THRESHOLD = 0.60  # catastrophically bad even for high-volume
+
 # Sellers above this feedback count are treated as established retailers.
 # Stock photo reuse (duplicate_photo) is suppressed for them — large retailers
 # legitimately share manufacturer images across many listings.
@ -117,11 +126,18 @@ class Aggregator:
        # Hard filters
        if seller and seller.account_age_days is not None and seller.account_age_days < HARD_FILTER_AGE_DAYS:
            red_flags.append("new_account")
-        if seller and (
-            seller.feedback_ratio < HARD_FILTER_BAD_RATIO_THRESHOLD
-            and seller.feedback_count > HARD_FILTER_BAD_RATIO_MIN_COUNT
-        ):
+        if seller and seller.feedback_ratio < HARD_FILTER_BAD_RATIO_THRESHOLD:
+            if HARD_FILTER_BAD_RATIO_MIN_COUNT < seller.feedback_count <= HARD_FILTER_BAD_RATIO_MAX_COUNT:
+                # Moderate-volume account with consistently bad ratio → hard flag.
                red_flags.append("established_bad_actor")
+            elif seller.feedback_count > HARD_FILTER_BAD_RATIO_MAX_COUNT:
+                if seller.feedback_ratio < HARD_FILTER_BAD_RATIO_HIGH_THRESHOLD:
+                    # High-volume seller with catastrophic ratio → still hard flag.
+                    red_flags.append("established_bad_actor")
+                else:
+                    # High-volume seller with declining but not catastrophic ratio.
+                    # 12-month window may cover only a small recent sample — soft flag only.
+                    red_flags.append("declining_ratio")
        if seller and seller.feedback_count == 0:
            red_flags.append("zero_feedback")
            # Zero feedback is a deliberate signal, not missing data — cap composite score
--- a/tests/trust/test_aggregator.py
+++ b/tests/trust/test_aggregator.py
@ -232,6 +232,46 @@ def test_significant_price_drop_not_flagged_when_no_prior_price():
    assert "significant_price_drop" not in result.red_flags_json


+# ── declining_ratio (high-volume seller edge case, snipe#52) ─────────────────
+
+def test_declining_ratio_soft_flag_for_high_volume_seller():
+    """High-volume seller (count > 500) with declining but not catastrophic ratio
+    gets declining_ratio soft flag, NOT the hard established_bad_actor flag.
+
+    Edge case: 12-month ratio may reflect only a small recent sample for sellers
+    with large lifetime feedback counts — hard-flagging is disproportionate.
+    """
+    agg = Aggregator()
+    scores = {k: 10 for k in ["account_age", "feedback_count",
+                               "feedback_ratio", "price_vs_market", "category_history"]}
+    high_vol = Seller(
+        platform="ebay", platform_seller_id="u", username="u",
+        account_age_days=2000, feedback_count=800,  # count > 500
+        feedback_ratio=0.75,                         # < 0.80 but > 0.60
+        category_history_json="{}",
+    )
+    result = agg.aggregate(scores, photo_hash_duplicate=False, seller=high_vol)
+    assert "declining_ratio" in result.red_flags_json
+    assert "established_bad_actor" not in result.red_flags_json
+
+
+def test_established_bad_actor_still_fires_for_catastrophic_high_volume_ratio():
+    """High-volume seller (count > 500) with catastrophically bad ratio (< 60%)
+    still gets the hard established_bad_actor flag — not just declining_ratio."""
+    agg = Aggregator()
+    scores = {k: 10 for k in ["account_age", "feedback_count",
+                               "feedback_ratio", "price_vs_market", "category_history"]}
+    bad_high_vol = Seller(
+        platform="ebay", platform_seller_id="u", username="u",
+        account_age_days=2000, feedback_count=800,
+        feedback_ratio=0.50,  # < 0.60 threshold → still hard flag
+        category_history_json="{}",
+    )
+    result = agg.aggregate(scores, photo_hash_duplicate=False, seller=bad_high_vol)
+    assert "established_bad_actor" in result.red_flags_json
+    assert "declining_ratio" not in result.red_flags_json
+
+
 # ── established retailer ──────────────────────────────────────────────────────

 def test_established_retailer_suppresses_duplicate_photo():