feat: add job_quality deterministic trust scorer (closes #48)
12 signal functions covering staleness, repost patterns, salary transparency, ATS blackhole detection, and enrichment signals. All pure functions — no LLM, no network, no I/O. trust_score = 1 - sum(triggered weights), clamped to [0,1]. confidence reflects fraction of signals with available evidence. Salary transparency enforced for CO/CA/NY/WA/IL/MA. ATS blackhole patterns: Lever, Greenhouse, Workday, iCIMS, Taleo. 83 tests (models, all 12 signals individually, scorer). Bumps to v0.12.0.
This commit is contained in:
parent
97ab3bac85
commit
aa057b20e2
11 changed files with 1052 additions and 2 deletions
17
CHANGELOG.md
17
CHANGELOG.md
|
|
@ -6,6 +6,23 @@ Versions follow [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|||
|
||||
---
|
||||
|
||||
## [0.12.0] — 2026-04-20
|
||||
|
||||
### Added
|
||||
|
||||
**`circuitforge_core.job_quality`** — deterministic trust scorer for job listings (MIT, closes #48)
|
||||
|
||||
Pure signal processing module. No LLM calls, no network calls, no file I/O. Fully auditable and independently unit-testable per signal.
|
||||
|
||||
- `models.py` — `JobListing`, `JobEnrichment`, `SignalResult`, `JobQualityScore` (Pydantic)
|
||||
- `signals.py` — 12 signal functions with weights: `listing_age` (0.25), `repost_detected` (0.25), `no_salary_transparency` (0.20), `always_open_pattern` (0.20), `staffing_agency` (0.15), `requirement_overload` (0.12), `layoff_news` (0.12), `jd_vagueness` (0.10), `ats_blackhole` (0.10), `high_applicant_count` (0.08), `poor_response_history` (0.08), `weekend_posted` (0.04)
|
||||
- `scorer.py` — `score_job(listing, enrichment=None) -> JobQualityScore`; trust_score = 1 − clamp(sum(triggered weights), 0, 1); confidence = fraction of signals with available evidence
|
||||
- Salary transparency enforcement for CO, CA, NY, WA, IL, MA; ATS blackhole detection (Lever, Greenhouse, Workday, iCIMS, Taleo)
|
||||
- `ALL_SIGNALS` registry for iteration and extension
|
||||
- 83 tests across models, signals (all 12 individually), and scorer — 100% pass
|
||||
|
||||
---
|
||||
|
||||
## [0.11.0] — 2026-04-20
|
||||
|
||||
### Added
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
__version__ = "0.11.0"
|
||||
__version__ = "0.12.0"
|
||||
|
||||
try:
|
||||
from circuitforge_core.community import CommunityDB, CommunityPost, SharedStore
|
||||
|
|
|
|||
23
circuitforge_core/job_quality/__init__.py
Normal file
23
circuitforge_core/job_quality/__init__.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
"""
|
||||
circuitforge_core.job_quality — deterministic trust scorer for job listings.
|
||||
|
||||
MIT licensed.
|
||||
"""
|
||||
|
||||
from circuitforge_core.job_quality.models import (
|
||||
JobEnrichment,
|
||||
JobListing,
|
||||
JobQualityScore,
|
||||
SignalResult,
|
||||
)
|
||||
from circuitforge_core.job_quality.scorer import score_job
|
||||
from circuitforge_core.job_quality.signals import ALL_SIGNALS
|
||||
|
||||
__all__ = [
|
||||
"JobEnrichment",
|
||||
"JobListing",
|
||||
"JobQualityScore",
|
||||
"SignalResult",
|
||||
"score_job",
|
||||
"ALL_SIGNALS",
|
||||
]
|
||||
70
circuitforge_core/job_quality/models.py
Normal file
70
circuitforge_core/job_quality/models.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
"""
|
||||
Pydantic models for the job_quality trust scorer.
|
||||
|
||||
MIT licensed — no LLM calls, no network calls, no file I/O.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class JobListing(BaseModel):
|
||||
"""Input data sourced directly from a job board scraper or ATS export."""
|
||||
|
||||
# Core identity
|
||||
title: str = ""
|
||||
company: str = ""
|
||||
location: str = ""
|
||||
state_code: str = "" # Two-letter US state code, e.g. "CA"
|
||||
|
||||
# Salary / compensation
|
||||
salary_min: float | None = None
|
||||
salary_max: float | None = None
|
||||
salary_text: str = "" # Raw salary string from the listing
|
||||
|
||||
# Posting metadata
|
||||
posted_at: datetime | None = None
|
||||
repost_count: int = 0 # Times the same listing has been reposted
|
||||
applicant_count: int | None = None
|
||||
is_staffing_agency: bool = False
|
||||
is_always_open: bool = False # Evergreen/always-accepting flag
|
||||
|
||||
# Content
|
||||
description: str = ""
|
||||
requirements: list[str] = Field(default_factory=list)
|
||||
ats_url: str = "" # ATS apply URL (Greenhouse, Lever, Workday, etc.)
|
||||
|
||||
# Signals from scraper enrichment
|
||||
weekend_posted: bool = False # Posted on Saturday or Sunday
|
||||
|
||||
|
||||
class JobEnrichment(BaseModel):
|
||||
"""Optional enrichment data gathered outside the listing (news, history, etc.)."""
|
||||
|
||||
has_layoff_news: bool = False # Recent layoff news for this company
|
||||
avg_response_days: float | None = None # Average recruiter response time (days)
|
||||
no_response_rate: float | None = None # Fraction of applicants with no response (0–1)
|
||||
|
||||
|
||||
class SignalResult(BaseModel):
|
||||
"""Output of a single signal function."""
|
||||
|
||||
name: str
|
||||
triggered: bool
|
||||
weight: float
|
||||
penalty: float # weight * triggered (0.0 when not triggered)
|
||||
detail: str = "" # Human-readable explanation
|
||||
|
||||
|
||||
class JobQualityScore(BaseModel):
|
||||
"""Aggregated trust score for a job listing."""
|
||||
|
||||
trust_score: float # 0.0 (low trust) – 1.0 (high trust)
|
||||
confidence: float # 0.0 – 1.0: fraction of signals with available evidence
|
||||
signals: list[SignalResult]
|
||||
raw_penalty: float # Sum of triggered weights before clamping
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
60
circuitforge_core/job_quality/scorer.py
Normal file
60
circuitforge_core/job_quality/scorer.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
"""
|
||||
score_job: aggregate all signals into a JobQualityScore.
|
||||
|
||||
MIT licensed — pure function, no I/O.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from circuitforge_core.job_quality.models import JobEnrichment, JobListing, JobQualityScore, SignalResult
|
||||
from circuitforge_core.job_quality.signals import ALL_SIGNALS
|
||||
|
||||
|
||||
def score_job(
|
||||
listing: JobListing,
|
||||
enrichment: JobEnrichment | None = None,
|
||||
) -> JobQualityScore:
|
||||
"""
|
||||
Score a job listing for trust/quality.
|
||||
|
||||
Each signal produces a penalty in [0, weight]. The raw penalty is the sum of
|
||||
all triggered signal weights. trust_score = 1 - clamp(raw_penalty, 0, 1).
|
||||
|
||||
confidence reflects what fraction of signals had enough data to evaluate.
|
||||
Signals that return triggered=False with a "not available" detail are counted
|
||||
as unevaluable — they reduce confidence without adding penalty.
|
||||
"""
|
||||
results: list[SignalResult] = []
|
||||
evaluable_count = 0
|
||||
|
||||
for fn in ALL_SIGNALS:
|
||||
result = fn(listing, enrichment)
|
||||
results.append(result)
|
||||
# A signal is evaluable when it either triggered or had data to decide it didn't.
|
||||
# Signals that skip due to missing data always set triggered=False AND include
|
||||
# "not available" or "No" in their detail.
|
||||
if result.triggered or _has_data(result):
|
||||
evaluable_count += 1
|
||||
|
||||
raw_penalty = sum(r.penalty for r in results)
|
||||
trust_score = max(0.0, min(1.0, 1.0 - raw_penalty))
|
||||
confidence = evaluable_count / len(ALL_SIGNALS) if ALL_SIGNALS else 0.0
|
||||
|
||||
return JobQualityScore(
|
||||
trust_score=round(trust_score, 4),
|
||||
confidence=round(confidence, 4),
|
||||
signals=results,
|
||||
raw_penalty=round(raw_penalty, 4),
|
||||
)
|
||||
|
||||
|
||||
def _has_data(result: SignalResult) -> bool:
|
||||
"""Return True when the signal's detail indicates it actually evaluated data."""
|
||||
skip_phrases = (
|
||||
"not available",
|
||||
"No enrichment",
|
||||
"No posted_at",
|
||||
"No response rate",
|
||||
"No salary information",
|
||||
)
|
||||
return not any(phrase.lower() in result.detail.lower() for phrase in skip_phrases)
|
||||
275
circuitforge_core/job_quality/signals.py
Normal file
275
circuitforge_core/job_quality/signals.py
Normal file
|
|
@ -0,0 +1,275 @@
|
|||
"""
|
||||
Individual signal functions for the job_quality trust scorer.
|
||||
|
||||
Each function takes a JobListing and optional JobEnrichment and returns a SignalResult.
|
||||
All signals are pure functions: no I/O, no LLM calls, no side effects.
|
||||
|
||||
MIT licensed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from circuitforge_core.job_quality.models import JobEnrichment, JobListing, SignalResult
|
||||
|
||||
# US states with salary transparency laws (as of 2026)
|
||||
_SALARY_TRANSPARENCY_STATES = {"CO", "CA", "NY", "WA", "IL", "MA"}
|
||||
|
||||
# ATS providers whose apply URLs are commonly associated with high ghosting rates
|
||||
_GHOSTING_ATS_PATTERNS = ("lever.co", "greenhouse.io", "workday.com", "icims.com", "taleo.net")
|
||||
|
||||
# Threshold for "always open" detection: repost every N days for M months
|
||||
_ALWAYS_OPEN_REPOST_THRESHOLD = 3
|
||||
|
||||
# Requirement count above which a listing is considered overloaded
|
||||
_REQUIREMENT_OVERLOAD_COUNT = 12
|
||||
|
||||
# Vagueness: description length below this suggests bare-minimum content
|
||||
_VAGUE_DESCRIPTION_CHARS = 400
|
||||
|
||||
# Applicant count above which competition is considered very high
|
||||
_HIGH_APPLICANT_THRESHOLD = 200
|
||||
|
||||
# Listing age above which staleness is likely
|
||||
_STALE_DAYS = 30
|
||||
|
||||
# Response rate above which the role is considered a high-ghosting source
|
||||
_NO_RESPONSE_RATE_THRESHOLD = 0.60
|
||||
|
||||
|
||||
def _now() -> datetime:
|
||||
return datetime.now(tz=timezone.utc)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# High-weight signals (0.15 – 0.25)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def listing_age(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult:
|
||||
"""Listing older than 30 days is likely stale or already filled."""
|
||||
weight = 0.25
|
||||
if listing.posted_at is None:
|
||||
return SignalResult(name="listing_age", triggered=False, weight=weight, penalty=0.0,
|
||||
detail="No posted_at date available.")
|
||||
age_days = (_now() - listing.posted_at.astimezone(timezone.utc)).days
|
||||
triggered = age_days > _STALE_DAYS
|
||||
return SignalResult(
|
||||
name="listing_age",
|
||||
triggered=triggered,
|
||||
weight=weight,
|
||||
penalty=weight if triggered else 0.0,
|
||||
detail=f"Listing is {age_days} days old (threshold: {_STALE_DAYS}).",
|
||||
)
|
||||
|
||||
|
||||
def repost_detected(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult:
|
||||
"""Listing has been reposted multiple times — a strong ghost-job indicator."""
|
||||
weight = 0.25
|
||||
triggered = listing.repost_count >= _ALWAYS_OPEN_REPOST_THRESHOLD
|
||||
return SignalResult(
|
||||
name="repost_detected",
|
||||
triggered=triggered,
|
||||
weight=weight,
|
||||
penalty=weight if triggered else 0.0,
|
||||
detail=f"Repost count: {listing.repost_count} (threshold: {_ALWAYS_OPEN_REPOST_THRESHOLD}).",
|
||||
)
|
||||
|
||||
|
||||
def no_salary_transparency(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult:
|
||||
"""No salary info despite being in a transparency-law state, or generally absent."""
|
||||
weight = 0.20
|
||||
has_range = listing.salary_min is not None or listing.salary_max is not None
|
||||
has_text = bool(listing.salary_text.strip())
|
||||
has_salary = has_range or has_text
|
||||
in_transparency_state = listing.state_code.upper() in _SALARY_TRANSPARENCY_STATES
|
||||
|
||||
if not has_salary:
|
||||
if in_transparency_state:
|
||||
detail = (f"No salary disclosed despite {listing.state_code} transparency law. "
|
||||
"Possible compliance violation.")
|
||||
else:
|
||||
detail = "No salary information provided."
|
||||
triggered = True
|
||||
else:
|
||||
triggered = False
|
||||
detail = "Salary information present."
|
||||
|
||||
return SignalResult(
|
||||
name="no_salary_transparency",
|
||||
triggered=triggered,
|
||||
weight=weight,
|
||||
penalty=weight if triggered else 0.0,
|
||||
detail=detail,
|
||||
)
|
||||
|
||||
|
||||
def always_open_pattern(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult:
|
||||
"""Listing is flagged as always-accepting or evergreen — pipeline filler."""
|
||||
weight = 0.20
|
||||
triggered = listing.is_always_open
|
||||
return SignalResult(
|
||||
name="always_open_pattern",
|
||||
triggered=triggered,
|
||||
weight=weight,
|
||||
penalty=weight if triggered else 0.0,
|
||||
detail="Listing marked as always-open/evergreen." if triggered else "Not always-open.",
|
||||
)
|
||||
|
||||
|
||||
def staffing_agency(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult:
|
||||
"""Posted by a staffing or recruiting agency rather than the hiring company directly."""
|
||||
weight = 0.15
|
||||
triggered = listing.is_staffing_agency
|
||||
return SignalResult(
|
||||
name="staffing_agency",
|
||||
triggered=triggered,
|
||||
weight=weight,
|
||||
penalty=weight if triggered else 0.0,
|
||||
detail="Listed by a staffing/recruiting agency." if triggered else "Direct employer listing.",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Medium-weight signals (0.08 – 0.12)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def requirement_overload(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult:
|
||||
"""Excessive requirements list suggests a wish-list role or perpetual search."""
|
||||
weight = 0.12
|
||||
count = len(listing.requirements)
|
||||
triggered = count > _REQUIREMENT_OVERLOAD_COUNT
|
||||
return SignalResult(
|
||||
name="requirement_overload",
|
||||
triggered=triggered,
|
||||
weight=weight,
|
||||
penalty=weight if triggered else 0.0,
|
||||
detail=f"{count} requirements listed (threshold: {_REQUIREMENT_OVERLOAD_COUNT}).",
|
||||
)
|
||||
|
||||
|
||||
def layoff_news(listing: JobListing, enrichment: JobEnrichment | None = None) -> SignalResult:
|
||||
"""Company has recent layoff news — new hires may be at high risk."""
|
||||
weight = 0.12
|
||||
if enrichment is None:
|
||||
return SignalResult(name="layoff_news", triggered=False, weight=weight, penalty=0.0,
|
||||
detail="No enrichment data available.")
|
||||
triggered = enrichment.has_layoff_news
|
||||
return SignalResult(
|
||||
name="layoff_news",
|
||||
triggered=triggered,
|
||||
weight=weight,
|
||||
penalty=weight if triggered else 0.0,
|
||||
detail="Recent layoff news detected for this company." if triggered else "No layoff news found.",
|
||||
)
|
||||
|
||||
|
||||
def jd_vagueness(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult:
|
||||
"""Job description is suspiciously short — may not represent a real open role."""
|
||||
weight = 0.10
|
||||
char_count = len(listing.description.strip())
|
||||
triggered = char_count < _VAGUE_DESCRIPTION_CHARS
|
||||
return SignalResult(
|
||||
name="jd_vagueness",
|
||||
triggered=triggered,
|
||||
weight=weight,
|
||||
penalty=weight if triggered else 0.0,
|
||||
detail=f"Description is {char_count} characters (threshold: {_VAGUE_DESCRIPTION_CHARS}).",
|
||||
)
|
||||
|
||||
|
||||
def ats_blackhole(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult:
|
||||
"""Apply URL routes through a high-volume ATS known for candidate ghosting."""
|
||||
weight = 0.10
|
||||
url_lower = listing.ats_url.lower()
|
||||
matched = next((p for p in _GHOSTING_ATS_PATTERNS if p in url_lower), None)
|
||||
triggered = matched is not None
|
||||
return SignalResult(
|
||||
name="ats_blackhole",
|
||||
triggered=triggered,
|
||||
weight=weight,
|
||||
penalty=weight if triggered else 0.0,
|
||||
detail=f"ATS matches high-ghosting pattern '{matched}'." if triggered else "No high-ghosting ATS detected.",
|
||||
)
|
||||
|
||||
|
||||
def high_applicant_count(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult:
|
||||
"""Very high applicant count means low odds and possible ghost-collection."""
|
||||
weight = 0.08
|
||||
if listing.applicant_count is None:
|
||||
return SignalResult(name="high_applicant_count", triggered=False, weight=weight, penalty=0.0,
|
||||
detail="Applicant count not available.")
|
||||
triggered = listing.applicant_count > _HIGH_APPLICANT_THRESHOLD
|
||||
return SignalResult(
|
||||
name="high_applicant_count",
|
||||
triggered=triggered,
|
||||
weight=weight,
|
||||
penalty=weight if triggered else 0.0,
|
||||
detail=f"{listing.applicant_count} applicants (threshold: {_HIGH_APPLICANT_THRESHOLD}).",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Low-weight signals (0.04 – 0.08)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def weekend_posted(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult:
|
||||
"""Posted on a weekend — may indicate bulk/automated ghost-job pipeline posting."""
|
||||
weight = 0.04
|
||||
if listing.posted_at is None and not listing.weekend_posted:
|
||||
return SignalResult(name="weekend_posted", triggered=False, weight=weight, penalty=0.0,
|
||||
detail="No posted_at date available.")
|
||||
if listing.weekend_posted:
|
||||
triggered = True
|
||||
else:
|
||||
triggered = listing.posted_at.weekday() >= 5 # type: ignore[union-attr]
|
||||
return SignalResult(
|
||||
name="weekend_posted",
|
||||
triggered=triggered,
|
||||
weight=weight,
|
||||
penalty=weight if triggered else 0.0,
|
||||
detail="Posted on a weekend." if triggered else "Posted on a weekday.",
|
||||
)
|
||||
|
||||
|
||||
def poor_response_history(listing: JobListing, enrichment: JobEnrichment | None = None) -> SignalResult:
|
||||
"""Company/ATS historically does not respond to applicants."""
|
||||
weight = 0.08
|
||||
if enrichment is None:
|
||||
return SignalResult(name="poor_response_history", triggered=False, weight=weight, penalty=0.0,
|
||||
detail="No enrichment data available.")
|
||||
rate = enrichment.no_response_rate
|
||||
if rate is None:
|
||||
return SignalResult(name="poor_response_history", triggered=False, weight=weight, penalty=0.0,
|
||||
detail="No response rate data available.")
|
||||
triggered = rate > _NO_RESPONSE_RATE_THRESHOLD
|
||||
return SignalResult(
|
||||
name="poor_response_history",
|
||||
triggered=triggered,
|
||||
weight=weight,
|
||||
penalty=weight if triggered else 0.0,
|
||||
detail=f"No-response rate: {rate:.0%} (threshold: {_NO_RESPONSE_RATE_THRESHOLD:.0%}).",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Signal registry — ordered by weight descending for scorer iteration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
ALL_SIGNALS = [
|
||||
listing_age,
|
||||
repost_detected,
|
||||
no_salary_transparency,
|
||||
always_open_pattern,
|
||||
staffing_agency,
|
||||
requirement_overload,
|
||||
layoff_news,
|
||||
jd_vagueness,
|
||||
ats_blackhole,
|
||||
high_applicant_count,
|
||||
weekend_posted,
|
||||
poor_response_history,
|
||||
]
|
||||
|
|
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||
|
||||
[project]
|
||||
name = "circuitforge-core"
|
||||
version = "0.11.0"
|
||||
version = "0.12.0"
|
||||
description = "Shared scaffold for CircuitForge products (MIT)"
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
|
|
|
|||
0
tests/test_job_quality/__init__.py
Normal file
0
tests/test_job_quality/__init__.py
Normal file
106
tests/test_job_quality/test_models.py
Normal file
106
tests/test_job_quality/test_models.py
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
"""Tests for job_quality Pydantic models — construction, defaults, and field types."""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from circuitforge_core.job_quality.models import (
|
||||
JobEnrichment,
|
||||
JobListing,
|
||||
JobQualityScore,
|
||||
SignalResult,
|
||||
)
|
||||
|
||||
|
||||
class TestJobListing:
|
||||
def test_minimal_construction(self):
|
||||
listing = JobListing()
|
||||
assert listing.title == ""
|
||||
assert listing.requirements == []
|
||||
assert listing.salary_min is None
|
||||
|
||||
def test_full_construction(self):
|
||||
listing = JobListing(
|
||||
title="Staff Engineer",
|
||||
company="Acme Corp",
|
||||
location="Remote",
|
||||
state_code="CA",
|
||||
salary_min=150_000,
|
||||
salary_max=200_000,
|
||||
salary_text="$150k–$200k",
|
||||
posted_at=datetime(2026, 1, 1, tzinfo=timezone.utc),
|
||||
repost_count=2,
|
||||
applicant_count=50,
|
||||
is_staffing_agency=False,
|
||||
is_always_open=False,
|
||||
description="A real job description with meaningful content.",
|
||||
requirements=["Python", "Go"],
|
||||
ats_url="https://jobs.lever.co/acme/123",
|
||||
weekend_posted=False,
|
||||
)
|
||||
assert listing.salary_min == 150_000
|
||||
assert listing.state_code == "CA"
|
||||
assert len(listing.requirements) == 2
|
||||
|
||||
def test_repost_count_defaults_zero(self):
|
||||
assert JobListing().repost_count == 0
|
||||
|
||||
def test_requirements_is_independent_list(self):
|
||||
a = JobListing(requirements=["Python"])
|
||||
b = JobListing(requirements=["Go"])
|
||||
assert a.requirements != b.requirements
|
||||
|
||||
|
||||
class TestJobEnrichment:
|
||||
def test_defaults(self):
|
||||
e = JobEnrichment()
|
||||
assert e.has_layoff_news is False
|
||||
assert e.avg_response_days is None
|
||||
assert e.no_response_rate is None
|
||||
|
||||
def test_with_data(self):
|
||||
e = JobEnrichment(has_layoff_news=True, no_response_rate=0.75)
|
||||
assert e.has_layoff_news is True
|
||||
assert e.no_response_rate == 0.75
|
||||
|
||||
|
||||
class TestSignalResult:
|
||||
def test_construction(self):
|
||||
r = SignalResult(name="listing_age", triggered=True, weight=0.25, penalty=0.25, detail="30 days old.")
|
||||
assert r.penalty == 0.25
|
||||
|
||||
def test_not_triggered_zero_penalty(self):
|
||||
r = SignalResult(name="staffing_agency", triggered=False, weight=0.15, penalty=0.0)
|
||||
assert r.penalty == 0.0
|
||||
|
||||
def test_detail_defaults_empty(self):
|
||||
r = SignalResult(name="x", triggered=False, weight=0.1, penalty=0.0)
|
||||
assert r.detail == ""
|
||||
|
||||
|
||||
class TestJobQualityScore:
|
||||
def _make_signal(self, triggered: bool, weight: float) -> SignalResult:
|
||||
return SignalResult(
|
||||
name="test",
|
||||
triggered=triggered,
|
||||
weight=weight,
|
||||
penalty=weight if triggered else 0.0,
|
||||
)
|
||||
|
||||
def test_construction(self):
|
||||
score = JobQualityScore(
|
||||
trust_score=0.75,
|
||||
confidence=0.9,
|
||||
signals=[self._make_signal(True, 0.25)],
|
||||
raw_penalty=0.25,
|
||||
)
|
||||
assert score.trust_score == 0.75
|
||||
assert score.confidence == 0.9
|
||||
assert score.raw_penalty == 0.25
|
||||
|
||||
def test_metadata_defaults_empty(self):
|
||||
score = JobQualityScore(
|
||||
trust_score=1.0, confidence=1.0, signals=[], raw_penalty=0.0
|
||||
)
|
||||
assert score.metadata == {}
|
||||
141
tests/test_job_quality/test_scorer.py
Normal file
141
tests/test_job_quality/test_scorer.py
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
"""
|
||||
Tests for score_job() — the aggregating scorer function.
|
||||
|
||||
Covers: trust_score math, confidence calculation, clamping,
|
||||
signal count, enrichment passthrough, edge cases.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from circuitforge_core.job_quality.models import JobEnrichment, JobListing
|
||||
from circuitforge_core.job_quality.scorer import score_job
|
||||
from circuitforge_core.job_quality.signals import ALL_SIGNALS
|
||||
|
||||
_NOW = datetime.now(tz=timezone.utc)
|
||||
|
||||
|
||||
def _days_ago(n: int) -> datetime:
|
||||
return _NOW - timedelta(days=n)
|
||||
|
||||
|
||||
def _clean_listing() -> JobListing:
|
||||
"""A listing that should trigger no signals."""
|
||||
return JobListing(
|
||||
title="Staff Engineer",
|
||||
company="Acme Corp",
|
||||
state_code="CA",
|
||||
salary_min=140_000,
|
||||
salary_max=180_000,
|
||||
posted_at=_days_ago(3),
|
||||
repost_count=0,
|
||||
applicant_count=30,
|
||||
is_staffing_agency=False,
|
||||
is_always_open=False,
|
||||
description="X" * 600,
|
||||
requirements=["Python", "Go", "SQL"],
|
||||
ats_url="https://careers.acme.com/apply/123",
|
||||
weekend_posted=False,
|
||||
)
|
||||
|
||||
|
||||
def _ghost_listing() -> JobListing:
|
||||
"""A listing designed to trigger as many signals as possible."""
|
||||
return JobListing(
|
||||
state_code="",
|
||||
posted_at=_days_ago(60),
|
||||
repost_count=5,
|
||||
is_staffing_agency=True,
|
||||
is_always_open=True,
|
||||
applicant_count=500,
|
||||
requirements=["R"] * 15,
|
||||
description="Great opportunity.",
|
||||
ats_url="https://jobs.lever.co/ghost/123",
|
||||
weekend_posted=True,
|
||||
)
|
||||
|
||||
|
||||
class TestScoreJob:
|
||||
def test_clean_listing_high_trust(self):
|
||||
score = score_job(_clean_listing(), JobEnrichment(has_layoff_news=False, no_response_rate=0.1))
|
||||
assert score.trust_score >= 0.85, f"Expected high trust, got {score.trust_score}"
|
||||
|
||||
def test_ghost_listing_low_trust(self):
|
||||
score = score_job(_ghost_listing(), JobEnrichment(has_layoff_news=True, no_response_rate=0.9))
|
||||
assert score.trust_score <= 0.25, f"Expected low trust, got {score.trust_score}"
|
||||
|
||||
def test_trust_score_clamped_to_1(self):
|
||||
score = score_job(JobListing()) # No signals triggered, penalty = 0
|
||||
assert score.trust_score <= 1.0
|
||||
|
||||
def test_trust_score_clamped_to_0(self):
|
||||
score = score_job(_ghost_listing(), JobEnrichment(has_layoff_news=True, no_response_rate=0.9))
|
||||
assert score.trust_score >= 0.0
|
||||
|
||||
def test_returns_all_signals(self):
|
||||
score = score_job(JobListing())
|
||||
assert len(score.signals) == len(ALL_SIGNALS)
|
||||
|
||||
def test_signal_names_match_registry(self):
|
||||
score = score_job(JobListing())
|
||||
score_names = {s.name for s in score.signals}
|
||||
registry_names = {fn(JobListing()).name for fn in ALL_SIGNALS}
|
||||
assert score_names == registry_names
|
||||
|
||||
def test_raw_penalty_equals_sum_of_triggered_weights(self):
|
||||
score = score_job(_ghost_listing())
|
||||
expected = sum(s.penalty for s in score.signals)
|
||||
assert abs(score.raw_penalty - round(expected, 4)) < 1e-6
|
||||
|
||||
def test_trust_score_equals_one_minus_penalty(self):
|
||||
score = score_job(_ghost_listing())
|
||||
expected = round(max(0.0, 1.0 - score.raw_penalty), 4)
|
||||
assert score.trust_score == expected
|
||||
|
||||
def test_confidence_between_0_and_1(self):
|
||||
score = score_job(JobListing())
|
||||
assert 0.0 <= score.confidence <= 1.0
|
||||
|
||||
def test_no_enrichment_reduces_confidence(self):
|
||||
score_no_enrich = score_job(_clean_listing(), None)
|
||||
score_with_enrich = score_job(_clean_listing(), JobEnrichment(has_layoff_news=False, no_response_rate=0.1))
|
||||
assert score_with_enrich.confidence >= score_no_enrich.confidence
|
||||
|
||||
def test_enrichment_is_passed_to_signals(self):
|
||||
enrichment = JobEnrichment(has_layoff_news=True)
|
||||
score = score_job(JobListing(), enrichment)
|
||||
layoff_signal = next(s for s in score.signals if s.name == "layoff_news")
|
||||
assert layoff_signal.triggered is True
|
||||
|
||||
def test_metadata_empty_by_default(self):
|
||||
score = score_job(JobListing())
|
||||
assert score.metadata == {}
|
||||
|
||||
def test_no_salary_in_transparency_state(self):
|
||||
listing = JobListing(state_code="CO", posted_at=_days_ago(1), repost_count=0)
|
||||
score = score_job(listing)
|
||||
salary_signal = next(s for s in score.signals if s.name == "no_salary_transparency")
|
||||
assert salary_signal.triggered is True
|
||||
|
||||
def test_penalty_accumulation_is_additive(self):
|
||||
"""Each triggered signal adds its weight independently."""
|
||||
listing = JobListing(
|
||||
is_staffing_agency=True, # +0.15
|
||||
is_always_open=True, # +0.20
|
||||
)
|
||||
score = score_job(listing)
|
||||
staffing = next(s for s in score.signals if s.name == "staffing_agency")
|
||||
always = next(s for s in score.signals if s.name == "always_open_pattern")
|
||||
assert staffing.triggered and always.triggered
|
||||
assert score.raw_penalty >= staffing.weight + always.weight - 1e-9
|
||||
|
||||
def test_score_is_deterministic(self):
|
||||
listing = _ghost_listing()
|
||||
enrich = JobEnrichment(has_layoff_news=True, no_response_rate=0.8)
|
||||
s1 = score_job(listing, enrich)
|
||||
s2 = score_job(listing, enrich)
|
||||
assert s1.trust_score == s2.trust_score
|
||||
assert s1.raw_penalty == s2.raw_penalty
|
||||
358
tests/test_job_quality/test_signals.py
Normal file
358
tests/test_job_quality/test_signals.py
Normal file
|
|
@ -0,0 +1,358 @@
|
|||
"""
|
||||
Unit tests for each individual signal function.
|
||||
|
||||
Each signal is exercised for: triggered path, not-triggered path, and (where
|
||||
applicable) the missing-data / no-enrichment path.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from circuitforge_core.job_quality.models import JobEnrichment, JobListing
|
||||
from circuitforge_core.job_quality.signals import (
|
||||
ALL_SIGNALS,
|
||||
always_open_pattern,
|
||||
ats_blackhole,
|
||||
high_applicant_count,
|
||||
jd_vagueness,
|
||||
layoff_news,
|
||||
listing_age,
|
||||
no_salary_transparency,
|
||||
poor_response_history,
|
||||
repost_detected,
|
||||
requirement_overload,
|
||||
staffing_agency,
|
||||
weekend_posted,
|
||||
)
|
||||
|
||||
_NOW = datetime.now(tz=timezone.utc)
|
||||
|
||||
|
||||
def _days_ago(n: int) -> datetime:
|
||||
return _NOW - timedelta(days=n)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# listing_age
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestListingAge:
|
||||
def test_stale_listing_triggers(self):
|
||||
listing = JobListing(posted_at=_days_ago(31))
|
||||
result = listing_age(listing)
|
||||
assert result.triggered is True
|
||||
assert result.penalty == result.weight
|
||||
|
||||
def test_fresh_listing_does_not_trigger(self):
|
||||
listing = JobListing(posted_at=_days_ago(5))
|
||||
result = listing_age(listing)
|
||||
assert result.triggered is False
|
||||
assert result.penalty == 0.0
|
||||
|
||||
def test_no_posted_at_returns_not_triggered(self):
|
||||
result = listing_age(JobListing())
|
||||
assert result.triggered is False
|
||||
assert result.penalty == 0.0
|
||||
assert "No posted_at" in result.detail
|
||||
|
||||
def test_weight_is_0_25(self):
|
||||
assert listing_age(JobListing()).weight == 0.25
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# repost_detected
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRepostDetected:
|
||||
def test_high_repost_triggers(self):
|
||||
result = repost_detected(JobListing(repost_count=3))
|
||||
assert result.triggered is True
|
||||
|
||||
def test_low_repost_does_not_trigger(self):
|
||||
result = repost_detected(JobListing(repost_count=1))
|
||||
assert result.triggered is False
|
||||
|
||||
def test_zero_repost_does_not_trigger(self):
|
||||
result = repost_detected(JobListing(repost_count=0))
|
||||
assert result.triggered is False
|
||||
|
||||
def test_weight_is_0_25(self):
|
||||
assert repost_detected(JobListing()).weight == 0.25
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# no_salary_transparency
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNoSalaryTransparency:
|
||||
def test_no_salary_triggers(self):
|
||||
result = no_salary_transparency(JobListing(state_code="TX"))
|
||||
assert result.triggered is True
|
||||
|
||||
def test_salary_range_prevents_trigger(self):
|
||||
result = no_salary_transparency(JobListing(salary_min=80_000, salary_max=120_000))
|
||||
assert result.triggered is False
|
||||
|
||||
def test_salary_text_prevents_trigger(self):
|
||||
result = no_salary_transparency(JobListing(salary_text="$90k"))
|
||||
assert result.triggered is False
|
||||
|
||||
def test_transparency_state_detail(self):
|
||||
result = no_salary_transparency(JobListing(state_code="CA"))
|
||||
assert "CA" in result.detail or "transparency" in result.detail.lower()
|
||||
|
||||
def test_weight_is_0_20(self):
|
||||
assert no_salary_transparency(JobListing()).weight == 0.20
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# always_open_pattern
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAlwaysOpenPattern:
|
||||
def test_always_open_triggers(self):
|
||||
result = always_open_pattern(JobListing(is_always_open=True))
|
||||
assert result.triggered is True
|
||||
|
||||
def test_not_always_open(self):
|
||||
result = always_open_pattern(JobListing(is_always_open=False))
|
||||
assert result.triggered is False
|
||||
|
||||
def test_weight_is_0_20(self):
|
||||
assert always_open_pattern(JobListing()).weight == 0.20
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# staffing_agency
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestStaffingAgency:
|
||||
def test_agency_triggers(self):
|
||||
result = staffing_agency(JobListing(is_staffing_agency=True))
|
||||
assert result.triggered is True
|
||||
|
||||
def test_direct_employer_does_not_trigger(self):
|
||||
result = staffing_agency(JobListing(is_staffing_agency=False))
|
||||
assert result.triggered is False
|
||||
|
||||
def test_weight_is_0_15(self):
|
||||
assert staffing_agency(JobListing()).weight == 0.15
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# requirement_overload
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRequirementOverload:
|
||||
def test_overloaded_triggers(self):
|
||||
result = requirement_overload(JobListing(requirements=["R"] * 13))
|
||||
assert result.triggered is True
|
||||
|
||||
def test_reasonable_requirements_do_not_trigger(self):
|
||||
result = requirement_overload(JobListing(requirements=["Python", "Go", "SQL"]))
|
||||
assert result.triggered is False
|
||||
|
||||
def test_empty_requirements_does_not_trigger(self):
|
||||
result = requirement_overload(JobListing())
|
||||
assert result.triggered is False
|
||||
|
||||
def test_weight_is_0_12(self):
|
||||
assert requirement_overload(JobListing()).weight == 0.12
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# layoff_news
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestLayoffNews:
|
||||
def test_layoff_news_triggers(self):
|
||||
enrichment = JobEnrichment(has_layoff_news=True)
|
||||
result = layoff_news(JobListing(), enrichment)
|
||||
assert result.triggered is True
|
||||
|
||||
def test_no_layoff_news_does_not_trigger(self):
|
||||
enrichment = JobEnrichment(has_layoff_news=False)
|
||||
result = layoff_news(JobListing(), enrichment)
|
||||
assert result.triggered is False
|
||||
|
||||
def test_no_enrichment_returns_not_triggered(self):
|
||||
result = layoff_news(JobListing(), None)
|
||||
assert result.triggered is False
|
||||
assert "No enrichment" in result.detail
|
||||
|
||||
def test_weight_is_0_12(self):
|
||||
assert layoff_news(JobListing()).weight == 0.12
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# jd_vagueness
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestJdVagueness:
|
||||
def test_short_description_triggers(self):
|
||||
result = jd_vagueness(JobListing(description="Short."))
|
||||
assert result.triggered is True
|
||||
|
||||
def test_long_description_does_not_trigger(self):
|
||||
result = jd_vagueness(JobListing(description="X" * 500))
|
||||
assert result.triggered is False
|
||||
|
||||
def test_empty_description_triggers(self):
|
||||
result = jd_vagueness(JobListing(description=""))
|
||||
assert result.triggered is True
|
||||
|
||||
def test_weight_is_0_10(self):
|
||||
assert jd_vagueness(JobListing()).weight == 0.10
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ats_blackhole
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAtsBlackhole:
|
||||
@pytest.mark.parametrize("url", [
|
||||
"https://jobs.lever.co/acme/abc",
|
||||
"https://boards.greenhouse.io/acme/jobs/123",
|
||||
"https://acme.workday.com/en-US/recruiting/job/123",
|
||||
"https://acme.icims.com/jobs/123",
|
||||
"https://acme.taleo.net/careersection/123",
|
||||
])
|
||||
def test_known_ats_triggers(self, url: str):
|
||||
result = ats_blackhole(JobListing(ats_url=url))
|
||||
assert result.triggered is True
|
||||
|
||||
def test_direct_url_does_not_trigger(self):
|
||||
result = ats_blackhole(JobListing(ats_url="https://careers.acme.com/apply/123"))
|
||||
assert result.triggered is False
|
||||
|
||||
def test_empty_url_does_not_trigger(self):
|
||||
result = ats_blackhole(JobListing(ats_url=""))
|
||||
assert result.triggered is False
|
||||
|
||||
def test_weight_is_0_10(self):
|
||||
assert ats_blackhole(JobListing()).weight == 0.10
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# high_applicant_count
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestHighApplicantCount:
|
||||
def test_high_count_triggers(self):
|
||||
result = high_applicant_count(JobListing(applicant_count=201))
|
||||
assert result.triggered is True
|
||||
|
||||
def test_low_count_does_not_trigger(self):
|
||||
result = high_applicant_count(JobListing(applicant_count=10))
|
||||
assert result.triggered is False
|
||||
|
||||
def test_none_count_returns_not_triggered(self):
|
||||
result = high_applicant_count(JobListing(applicant_count=None))
|
||||
assert result.triggered is False
|
||||
assert "not available" in result.detail.lower()
|
||||
|
||||
def test_weight_is_0_08(self):
|
||||
assert high_applicant_count(JobListing()).weight == 0.08
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# weekend_posted
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestWeekendPosted:
|
||||
def test_weekend_flag_triggers(self):
|
||||
result = weekend_posted(JobListing(weekend_posted=True))
|
||||
assert result.triggered is True
|
||||
|
||||
def test_saturday_date_triggers(self):
|
||||
# Find next Saturday
|
||||
today = _NOW
|
||||
days_until_sat = (5 - today.weekday()) % 7
|
||||
sat = today - timedelta(days=(today.weekday() - 5) % 7) if today.weekday() > 5 else today + timedelta(days=days_until_sat)
|
||||
# Just use a known Saturday: 2026-04-18
|
||||
sat = datetime(2026, 4, 18, tzinfo=timezone.utc) # Saturday
|
||||
result = weekend_posted(JobListing(posted_at=sat, weekend_posted=False))
|
||||
assert result.triggered is True
|
||||
|
||||
def test_weekday_does_not_trigger(self):
|
||||
# 2026-04-20 is Monday
|
||||
mon = datetime(2026, 4, 20, tzinfo=timezone.utc)
|
||||
result = weekend_posted(JobListing(posted_at=mon, weekend_posted=False))
|
||||
assert result.triggered is False
|
||||
|
||||
def test_no_data_returns_not_triggered(self):
|
||||
result = weekend_posted(JobListing(posted_at=None, weekend_posted=False))
|
||||
assert result.triggered is False
|
||||
|
||||
def test_weight_is_0_04(self):
|
||||
assert weekend_posted(JobListing()).weight == 0.04
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# poor_response_history
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPoorResponseHistory:
|
||||
def test_high_no_response_rate_triggers(self):
|
||||
enrichment = JobEnrichment(no_response_rate=0.75)
|
||||
result = poor_response_history(JobListing(), enrichment)
|
||||
assert result.triggered is True
|
||||
|
||||
def test_low_no_response_rate_does_not_trigger(self):
|
||||
enrichment = JobEnrichment(no_response_rate=0.30)
|
||||
result = poor_response_history(JobListing(), enrichment)
|
||||
assert result.triggered is False
|
||||
|
||||
def test_none_rate_returns_not_triggered(self):
|
||||
enrichment = JobEnrichment(no_response_rate=None)
|
||||
result = poor_response_history(JobListing(), enrichment)
|
||||
assert result.triggered is False
|
||||
assert "available" in result.detail.lower()
|
||||
|
||||
def test_no_enrichment_returns_not_triggered(self):
|
||||
result = poor_response_history(JobListing(), None)
|
||||
assert result.triggered is False
|
||||
|
||||
def test_weight_is_0_08(self):
|
||||
assert poor_response_history(JobListing()).weight == 0.08
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ALL_SIGNALS registry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAllSignalsRegistry:
|
||||
def test_has_12_signals(self):
|
||||
assert len(ALL_SIGNALS) == 12
|
||||
|
||||
def test_all_callable(self):
|
||||
for fn in ALL_SIGNALS:
|
||||
assert callable(fn)
|
||||
|
||||
def test_all_return_signal_result(self):
|
||||
from circuitforge_core.job_quality.models import SignalResult
|
||||
listing = JobListing()
|
||||
for fn in ALL_SIGNALS:
|
||||
result = fn(listing, None)
|
||||
assert isinstance(result, SignalResult)
|
||||
|
||||
def test_signal_names_are_unique(self):
|
||||
listing = JobListing()
|
||||
names = [fn(listing).name for fn in ALL_SIGNALS]
|
||||
assert len(names) == len(set(names))
|
||||
Loading…
Reference in a new issue