snipe/app/trust/metadata.py
pyr0ball eb05be0612
Some checks are pending
CI / API — lint + test (pull_request) Waiting to run
CI / Web — typecheck + test + build (pull_request) Waiting to run
feat: wire Forgejo Actions CI/CD workflows (#22)
- ci.yml: API lint (ruff F+I) + pytest, web vue-tsc + vitest + build
- mirror.yml: push to GitHub (CircuitForgeLLC) + Codeberg (CircuitForge) on main/tags
- release.yml: Docker build → Forgejo registry + release via API; GHCR deferred pending BSL policy (cf-agents#3)
- .cliff.toml: git-cliff changelog config for semver releases
- pyproject.toml: add [dev] extras (pytest, ruff), ruff config
- Fix 45 ruff violations across codebase (import sorting, unused vars, unused imports)
2026-04-06 00:00:28 -07:00

79 lines
3.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Five metadata trust signals, each scored 020."""
from __future__ import annotations
import json
from typing import Optional
from app.db.models import Seller
ELECTRONICS_CATEGORIES = {"ELECTRONICS", "COMPUTERS_TABLETS", "VIDEO_GAMES", "CELL_PHONES"}
# Coefficient of variation (stddev/mean) above which the price distribution is
# considered too heterogeneous to trust the market median for scam detection.
# e.g. "Lenovo RTX intel" mixes $200 old ThinkPads with $2000 Legions → CV ~1.0+
_HETEROGENEOUS_CV_THRESHOLD = 0.6
class MetadataScorer:
def score(
self,
seller: Seller,
market_median: Optional[float],
listing_price: float,
price_cv: Optional[float] = None,
) -> dict[str, Optional[int]]:
return {
"account_age": self._account_age(seller.account_age_days) if seller.account_age_days is not None else None,
"feedback_count": self._feedback_count(seller.feedback_count),
"feedback_ratio": self._feedback_ratio(seller.feedback_ratio, seller.feedback_count),
"price_vs_market": self._price_vs_market(listing_price, market_median, price_cv),
"category_history": self._category_history(seller.category_history_json),
}
def _account_age(self, days: int) -> int:
if days < 7: return 0
if days < 30: return 5
if days < 90: return 10
if days < 365: return 15
return 20
def _feedback_count(self, count: int) -> int:
if count < 3: return 0
if count < 10: return 5
if count < 50: return 10
if count < 200: return 15
return 20
def _feedback_ratio(self, ratio: float, count: int) -> int:
if ratio < 0.80 and count > 20: return 0
if ratio < 0.90: return 5
if ratio < 0.95: return 10
if ratio < 0.98: return 15
return 20
def _price_vs_market(self, price: float, median: Optional[float], price_cv: Optional[float] = None) -> Optional[int]:
if median is None: return None # data unavailable → aggregator sets score_is_partial
if median <= 0: return None
if price_cv is not None and price_cv > _HETEROGENEOUS_CV_THRESHOLD:
return None # mixed model/generation search — median is unreliable
ratio = price / median
if ratio < 0.50: return 0 # >50% below = scam
if ratio < 0.70: return 5 # >30% below = suspicious
if ratio < 0.85: return 10
if ratio <= 1.20: return 20
return 15 # above market = still ok, just expensive
def _category_history(self, category_history_json: str) -> Optional[int]:
try:
history = json.loads(category_history_json)
except (ValueError, TypeError):
return None # unparseable → data unavailable
if not history:
return None # empty dict → no category data from this source
electronics_sales = sum(
v for k, v in history.items() if k in ELECTRONICS_CATEGORIES
)
if electronics_sales == 0: return 0
if electronics_sales < 5: return 8
if electronics_sales < 20: return 14
return 20