From aa057b20e27c94198ea6256acae09e53f20fee76 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Mon, 20 Apr 2026 13:02:57 -0700 Subject: [PATCH] feat: add job_quality deterministic trust scorer (closes #48) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 12 signal functions covering staleness, repost patterns, salary transparency, ATS blackhole detection, and enrichment signals. All pure functions — no LLM, no network, no I/O. trust_score = 1 - sum(triggered weights), clamped to [0,1]. confidence reflects fraction of signals with available evidence. Salary transparency enforced for CO/CA/NY/WA/IL/MA. ATS blackhole patterns: Lever, Greenhouse, Workday, iCIMS, Taleo. 83 tests (models, all 12 signals individually, scorer). Bumps to v0.12.0. --- CHANGELOG.md | 17 + circuitforge_core/__init__.py | 2 +- circuitforge_core/job_quality/__init__.py | 23 ++ circuitforge_core/job_quality/models.py | 70 +++++ circuitforge_core/job_quality/scorer.py | 60 ++++ circuitforge_core/job_quality/signals.py | 275 +++++++++++++++++ pyproject.toml | 2 +- tests/test_job_quality/__init__.py | 0 tests/test_job_quality/test_models.py | 106 +++++++ tests/test_job_quality/test_scorer.py | 141 +++++++++ tests/test_job_quality/test_signals.py | 358 ++++++++++++++++++++++ 11 files changed, 1052 insertions(+), 2 deletions(-) create mode 100644 circuitforge_core/job_quality/__init__.py create mode 100644 circuitforge_core/job_quality/models.py create mode 100644 circuitforge_core/job_quality/scorer.py create mode 100644 circuitforge_core/job_quality/signals.py create mode 100644 tests/test_job_quality/__init__.py create mode 100644 tests/test_job_quality/test_models.py create mode 100644 tests/test_job_quality/test_scorer.py create mode 100644 tests/test_job_quality/test_signals.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 460d72d..b48be1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,23 @@ Versions follow [Semantic Versioning](https://semver.org/spec/v2.0.0.html). --- +## [0.12.0] — 2026-04-20 + +### Added + +**`circuitforge_core.job_quality`** — deterministic trust scorer for job listings (MIT, closes #48) + +Pure signal processing module. No LLM calls, no network calls, no file I/O. Fully auditable and independently unit-testable per signal. + +- `models.py` — `JobListing`, `JobEnrichment`, `SignalResult`, `JobQualityScore` (Pydantic) +- `signals.py` — 12 signal functions with weights: `listing_age` (0.25), `repost_detected` (0.25), `no_salary_transparency` (0.20), `always_open_pattern` (0.20), `staffing_agency` (0.15), `requirement_overload` (0.12), `layoff_news` (0.12), `jd_vagueness` (0.10), `ats_blackhole` (0.10), `high_applicant_count` (0.08), `poor_response_history` (0.08), `weekend_posted` (0.04) +- `scorer.py` — `score_job(listing, enrichment=None) -> JobQualityScore`; trust_score = 1 − clamp(sum(triggered weights), 0, 1); confidence = fraction of signals with available evidence +- Salary transparency enforcement for CO, CA, NY, WA, IL, MA; ATS blackhole detection (Lever, Greenhouse, Workday, iCIMS, Taleo) +- `ALL_SIGNALS` registry for iteration and extension +- 83 tests across models, signals (all 12 individually), and scorer — 100% pass + +--- + ## [0.11.0] — 2026-04-20 ### Added diff --git a/circuitforge_core/__init__.py b/circuitforge_core/__init__.py index a0dd648..7a3c41f 100644 --- a/circuitforge_core/__init__.py +++ b/circuitforge_core/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.11.0" +__version__ = "0.12.0" try: from circuitforge_core.community import CommunityDB, CommunityPost, SharedStore diff --git a/circuitforge_core/job_quality/__init__.py b/circuitforge_core/job_quality/__init__.py new file mode 100644 index 0000000..2951ad2 --- /dev/null +++ b/circuitforge_core/job_quality/__init__.py @@ -0,0 +1,23 @@ +""" +circuitforge_core.job_quality — deterministic trust scorer for job listings. + +MIT licensed. +""" + +from circuitforge_core.job_quality.models import ( + JobEnrichment, + JobListing, + JobQualityScore, + SignalResult, +) +from circuitforge_core.job_quality.scorer import score_job +from circuitforge_core.job_quality.signals import ALL_SIGNALS + +__all__ = [ + "JobEnrichment", + "JobListing", + "JobQualityScore", + "SignalResult", + "score_job", + "ALL_SIGNALS", +] diff --git a/circuitforge_core/job_quality/models.py b/circuitforge_core/job_quality/models.py new file mode 100644 index 0000000..96f8919 --- /dev/null +++ b/circuitforge_core/job_quality/models.py @@ -0,0 +1,70 @@ +""" +Pydantic models for the job_quality trust scorer. + +MIT licensed — no LLM calls, no network calls, no file I/O. +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Any + +from pydantic import BaseModel, Field + + +class JobListing(BaseModel): + """Input data sourced directly from a job board scraper or ATS export.""" + + # Core identity + title: str = "" + company: str = "" + location: str = "" + state_code: str = "" # Two-letter US state code, e.g. "CA" + + # Salary / compensation + salary_min: float | None = None + salary_max: float | None = None + salary_text: str = "" # Raw salary string from the listing + + # Posting metadata + posted_at: datetime | None = None + repost_count: int = 0 # Times the same listing has been reposted + applicant_count: int | None = None + is_staffing_agency: bool = False + is_always_open: bool = False # Evergreen/always-accepting flag + + # Content + description: str = "" + requirements: list[str] = Field(default_factory=list) + ats_url: str = "" # ATS apply URL (Greenhouse, Lever, Workday, etc.) + + # Signals from scraper enrichment + weekend_posted: bool = False # Posted on Saturday or Sunday + + +class JobEnrichment(BaseModel): + """Optional enrichment data gathered outside the listing (news, history, etc.).""" + + has_layoff_news: bool = False # Recent layoff news for this company + avg_response_days: float | None = None # Average recruiter response time (days) + no_response_rate: float | None = None # Fraction of applicants with no response (0–1) + + +class SignalResult(BaseModel): + """Output of a single signal function.""" + + name: str + triggered: bool + weight: float + penalty: float # weight * triggered (0.0 when not triggered) + detail: str = "" # Human-readable explanation + + +class JobQualityScore(BaseModel): + """Aggregated trust score for a job listing.""" + + trust_score: float # 0.0 (low trust) – 1.0 (high trust) + confidence: float # 0.0 – 1.0: fraction of signals with available evidence + signals: list[SignalResult] + raw_penalty: float # Sum of triggered weights before clamping + metadata: dict[str, Any] = Field(default_factory=dict) diff --git a/circuitforge_core/job_quality/scorer.py b/circuitforge_core/job_quality/scorer.py new file mode 100644 index 0000000..6f4f4a0 --- /dev/null +++ b/circuitforge_core/job_quality/scorer.py @@ -0,0 +1,60 @@ +""" +score_job: aggregate all signals into a JobQualityScore. + +MIT licensed — pure function, no I/O. +""" + +from __future__ import annotations + +from circuitforge_core.job_quality.models import JobEnrichment, JobListing, JobQualityScore, SignalResult +from circuitforge_core.job_quality.signals import ALL_SIGNALS + + +def score_job( + listing: JobListing, + enrichment: JobEnrichment | None = None, +) -> JobQualityScore: + """ + Score a job listing for trust/quality. + + Each signal produces a penalty in [0, weight]. The raw penalty is the sum of + all triggered signal weights. trust_score = 1 - clamp(raw_penalty, 0, 1). + + confidence reflects what fraction of signals had enough data to evaluate. + Signals that return triggered=False with a "not available" detail are counted + as unevaluable — they reduce confidence without adding penalty. + """ + results: list[SignalResult] = [] + evaluable_count = 0 + + for fn in ALL_SIGNALS: + result = fn(listing, enrichment) + results.append(result) + # A signal is evaluable when it either triggered or had data to decide it didn't. + # Signals that skip due to missing data always set triggered=False AND include + # "not available" or "No" in their detail. + if result.triggered or _has_data(result): + evaluable_count += 1 + + raw_penalty = sum(r.penalty for r in results) + trust_score = max(0.0, min(1.0, 1.0 - raw_penalty)) + confidence = evaluable_count / len(ALL_SIGNALS) if ALL_SIGNALS else 0.0 + + return JobQualityScore( + trust_score=round(trust_score, 4), + confidence=round(confidence, 4), + signals=results, + raw_penalty=round(raw_penalty, 4), + ) + + +def _has_data(result: SignalResult) -> bool: + """Return True when the signal's detail indicates it actually evaluated data.""" + skip_phrases = ( + "not available", + "No enrichment", + "No posted_at", + "No response rate", + "No salary information", + ) + return not any(phrase.lower() in result.detail.lower() for phrase in skip_phrases) diff --git a/circuitforge_core/job_quality/signals.py b/circuitforge_core/job_quality/signals.py new file mode 100644 index 0000000..c310b0c --- /dev/null +++ b/circuitforge_core/job_quality/signals.py @@ -0,0 +1,275 @@ +""" +Individual signal functions for the job_quality trust scorer. + +Each function takes a JobListing and optional JobEnrichment and returns a SignalResult. +All signals are pure functions: no I/O, no LLM calls, no side effects. + +MIT licensed. +""" + +from __future__ import annotations + +from datetime import datetime, timezone + +from circuitforge_core.job_quality.models import JobEnrichment, JobListing, SignalResult + +# US states with salary transparency laws (as of 2026) +_SALARY_TRANSPARENCY_STATES = {"CO", "CA", "NY", "WA", "IL", "MA"} + +# ATS providers whose apply URLs are commonly associated with high ghosting rates +_GHOSTING_ATS_PATTERNS = ("lever.co", "greenhouse.io", "workday.com", "icims.com", "taleo.net") + +# Threshold for "always open" detection: repost every N days for M months +_ALWAYS_OPEN_REPOST_THRESHOLD = 3 + +# Requirement count above which a listing is considered overloaded +_REQUIREMENT_OVERLOAD_COUNT = 12 + +# Vagueness: description length below this suggests bare-minimum content +_VAGUE_DESCRIPTION_CHARS = 400 + +# Applicant count above which competition is considered very high +_HIGH_APPLICANT_THRESHOLD = 200 + +# Listing age above which staleness is likely +_STALE_DAYS = 30 + +# Response rate above which the role is considered a high-ghosting source +_NO_RESPONSE_RATE_THRESHOLD = 0.60 + + +def _now() -> datetime: + return datetime.now(tz=timezone.utc) + + +# --------------------------------------------------------------------------- +# High-weight signals (0.15 – 0.25) +# --------------------------------------------------------------------------- + + +def listing_age(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult: + """Listing older than 30 days is likely stale or already filled.""" + weight = 0.25 + if listing.posted_at is None: + return SignalResult(name="listing_age", triggered=False, weight=weight, penalty=0.0, + detail="No posted_at date available.") + age_days = (_now() - listing.posted_at.astimezone(timezone.utc)).days + triggered = age_days > _STALE_DAYS + return SignalResult( + name="listing_age", + triggered=triggered, + weight=weight, + penalty=weight if triggered else 0.0, + detail=f"Listing is {age_days} days old (threshold: {_STALE_DAYS}).", + ) + + +def repost_detected(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult: + """Listing has been reposted multiple times — a strong ghost-job indicator.""" + weight = 0.25 + triggered = listing.repost_count >= _ALWAYS_OPEN_REPOST_THRESHOLD + return SignalResult( + name="repost_detected", + triggered=triggered, + weight=weight, + penalty=weight if triggered else 0.0, + detail=f"Repost count: {listing.repost_count} (threshold: {_ALWAYS_OPEN_REPOST_THRESHOLD}).", + ) + + +def no_salary_transparency(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult: + """No salary info despite being in a transparency-law state, or generally absent.""" + weight = 0.20 + has_range = listing.salary_min is not None or listing.salary_max is not None + has_text = bool(listing.salary_text.strip()) + has_salary = has_range or has_text + in_transparency_state = listing.state_code.upper() in _SALARY_TRANSPARENCY_STATES + + if not has_salary: + if in_transparency_state: + detail = (f"No salary disclosed despite {listing.state_code} transparency law. " + "Possible compliance violation.") + else: + detail = "No salary information provided." + triggered = True + else: + triggered = False + detail = "Salary information present." + + return SignalResult( + name="no_salary_transparency", + triggered=triggered, + weight=weight, + penalty=weight if triggered else 0.0, + detail=detail, + ) + + +def always_open_pattern(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult: + """Listing is flagged as always-accepting or evergreen — pipeline filler.""" + weight = 0.20 + triggered = listing.is_always_open + return SignalResult( + name="always_open_pattern", + triggered=triggered, + weight=weight, + penalty=weight if triggered else 0.0, + detail="Listing marked as always-open/evergreen." if triggered else "Not always-open.", + ) + + +def staffing_agency(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult: + """Posted by a staffing or recruiting agency rather than the hiring company directly.""" + weight = 0.15 + triggered = listing.is_staffing_agency + return SignalResult( + name="staffing_agency", + triggered=triggered, + weight=weight, + penalty=weight if triggered else 0.0, + detail="Listed by a staffing/recruiting agency." if triggered else "Direct employer listing.", + ) + + +# --------------------------------------------------------------------------- +# Medium-weight signals (0.08 – 0.12) +# --------------------------------------------------------------------------- + + +def requirement_overload(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult: + """Excessive requirements list suggests a wish-list role or perpetual search.""" + weight = 0.12 + count = len(listing.requirements) + triggered = count > _REQUIREMENT_OVERLOAD_COUNT + return SignalResult( + name="requirement_overload", + triggered=triggered, + weight=weight, + penalty=weight if triggered else 0.0, + detail=f"{count} requirements listed (threshold: {_REQUIREMENT_OVERLOAD_COUNT}).", + ) + + +def layoff_news(listing: JobListing, enrichment: JobEnrichment | None = None) -> SignalResult: + """Company has recent layoff news — new hires may be at high risk.""" + weight = 0.12 + if enrichment is None: + return SignalResult(name="layoff_news", triggered=False, weight=weight, penalty=0.0, + detail="No enrichment data available.") + triggered = enrichment.has_layoff_news + return SignalResult( + name="layoff_news", + triggered=triggered, + weight=weight, + penalty=weight if triggered else 0.0, + detail="Recent layoff news detected for this company." if triggered else "No layoff news found.", + ) + + +def jd_vagueness(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult: + """Job description is suspiciously short — may not represent a real open role.""" + weight = 0.10 + char_count = len(listing.description.strip()) + triggered = char_count < _VAGUE_DESCRIPTION_CHARS + return SignalResult( + name="jd_vagueness", + triggered=triggered, + weight=weight, + penalty=weight if triggered else 0.0, + detail=f"Description is {char_count} characters (threshold: {_VAGUE_DESCRIPTION_CHARS}).", + ) + + +def ats_blackhole(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult: + """Apply URL routes through a high-volume ATS known for candidate ghosting.""" + weight = 0.10 + url_lower = listing.ats_url.lower() + matched = next((p for p in _GHOSTING_ATS_PATTERNS if p in url_lower), None) + triggered = matched is not None + return SignalResult( + name="ats_blackhole", + triggered=triggered, + weight=weight, + penalty=weight if triggered else 0.0, + detail=f"ATS matches high-ghosting pattern '{matched}'." if triggered else "No high-ghosting ATS detected.", + ) + + +def high_applicant_count(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult: + """Very high applicant count means low odds and possible ghost-collection.""" + weight = 0.08 + if listing.applicant_count is None: + return SignalResult(name="high_applicant_count", triggered=False, weight=weight, penalty=0.0, + detail="Applicant count not available.") + triggered = listing.applicant_count > _HIGH_APPLICANT_THRESHOLD + return SignalResult( + name="high_applicant_count", + triggered=triggered, + weight=weight, + penalty=weight if triggered else 0.0, + detail=f"{listing.applicant_count} applicants (threshold: {_HIGH_APPLICANT_THRESHOLD}).", + ) + + +# --------------------------------------------------------------------------- +# Low-weight signals (0.04 – 0.08) +# --------------------------------------------------------------------------- + + +def weekend_posted(listing: JobListing, _: JobEnrichment | None = None) -> SignalResult: + """Posted on a weekend — may indicate bulk/automated ghost-job pipeline posting.""" + weight = 0.04 + if listing.posted_at is None and not listing.weekend_posted: + return SignalResult(name="weekend_posted", triggered=False, weight=weight, penalty=0.0, + detail="No posted_at date available.") + if listing.weekend_posted: + triggered = True + else: + triggered = listing.posted_at.weekday() >= 5 # type: ignore[union-attr] + return SignalResult( + name="weekend_posted", + triggered=triggered, + weight=weight, + penalty=weight if triggered else 0.0, + detail="Posted on a weekend." if triggered else "Posted on a weekday.", + ) + + +def poor_response_history(listing: JobListing, enrichment: JobEnrichment | None = None) -> SignalResult: + """Company/ATS historically does not respond to applicants.""" + weight = 0.08 + if enrichment is None: + return SignalResult(name="poor_response_history", triggered=False, weight=weight, penalty=0.0, + detail="No enrichment data available.") + rate = enrichment.no_response_rate + if rate is None: + return SignalResult(name="poor_response_history", triggered=False, weight=weight, penalty=0.0, + detail="No response rate data available.") + triggered = rate > _NO_RESPONSE_RATE_THRESHOLD + return SignalResult( + name="poor_response_history", + triggered=triggered, + weight=weight, + penalty=weight if triggered else 0.0, + detail=f"No-response rate: {rate:.0%} (threshold: {_NO_RESPONSE_RATE_THRESHOLD:.0%}).", + ) + + +# --------------------------------------------------------------------------- +# Signal registry — ordered by weight descending for scorer iteration +# --------------------------------------------------------------------------- + +ALL_SIGNALS = [ + listing_age, + repost_detected, + no_salary_transparency, + always_open_pattern, + staffing_agency, + requirement_overload, + layoff_news, + jd_vagueness, + ats_blackhole, + high_applicant_count, + weekend_posted, + poor_response_history, +] diff --git a/pyproject.toml b/pyproject.toml index 8320001..6a0e52f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "circuitforge-core" -version = "0.11.0" +version = "0.12.0" description = "Shared scaffold for CircuitForge products (MIT)" requires-python = ">=3.11" dependencies = [ diff --git a/tests/test_job_quality/__init__.py b/tests/test_job_quality/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_job_quality/test_models.py b/tests/test_job_quality/test_models.py new file mode 100644 index 0000000..2818023 --- /dev/null +++ b/tests/test_job_quality/test_models.py @@ -0,0 +1,106 @@ +"""Tests for job_quality Pydantic models — construction, defaults, and field types.""" + +from datetime import datetime, timezone + +import pytest +from pydantic import ValidationError + +from circuitforge_core.job_quality.models import ( + JobEnrichment, + JobListing, + JobQualityScore, + SignalResult, +) + + +class TestJobListing: + def test_minimal_construction(self): + listing = JobListing() + assert listing.title == "" + assert listing.requirements == [] + assert listing.salary_min is None + + def test_full_construction(self): + listing = JobListing( + title="Staff Engineer", + company="Acme Corp", + location="Remote", + state_code="CA", + salary_min=150_000, + salary_max=200_000, + salary_text="$150k–$200k", + posted_at=datetime(2026, 1, 1, tzinfo=timezone.utc), + repost_count=2, + applicant_count=50, + is_staffing_agency=False, + is_always_open=False, + description="A real job description with meaningful content.", + requirements=["Python", "Go"], + ats_url="https://jobs.lever.co/acme/123", + weekend_posted=False, + ) + assert listing.salary_min == 150_000 + assert listing.state_code == "CA" + assert len(listing.requirements) == 2 + + def test_repost_count_defaults_zero(self): + assert JobListing().repost_count == 0 + + def test_requirements_is_independent_list(self): + a = JobListing(requirements=["Python"]) + b = JobListing(requirements=["Go"]) + assert a.requirements != b.requirements + + +class TestJobEnrichment: + def test_defaults(self): + e = JobEnrichment() + assert e.has_layoff_news is False + assert e.avg_response_days is None + assert e.no_response_rate is None + + def test_with_data(self): + e = JobEnrichment(has_layoff_news=True, no_response_rate=0.75) + assert e.has_layoff_news is True + assert e.no_response_rate == 0.75 + + +class TestSignalResult: + def test_construction(self): + r = SignalResult(name="listing_age", triggered=True, weight=0.25, penalty=0.25, detail="30 days old.") + assert r.penalty == 0.25 + + def test_not_triggered_zero_penalty(self): + r = SignalResult(name="staffing_agency", triggered=False, weight=0.15, penalty=0.0) + assert r.penalty == 0.0 + + def test_detail_defaults_empty(self): + r = SignalResult(name="x", triggered=False, weight=0.1, penalty=0.0) + assert r.detail == "" + + +class TestJobQualityScore: + def _make_signal(self, triggered: bool, weight: float) -> SignalResult: + return SignalResult( + name="test", + triggered=triggered, + weight=weight, + penalty=weight if triggered else 0.0, + ) + + def test_construction(self): + score = JobQualityScore( + trust_score=0.75, + confidence=0.9, + signals=[self._make_signal(True, 0.25)], + raw_penalty=0.25, + ) + assert score.trust_score == 0.75 + assert score.confidence == 0.9 + assert score.raw_penalty == 0.25 + + def test_metadata_defaults_empty(self): + score = JobQualityScore( + trust_score=1.0, confidence=1.0, signals=[], raw_penalty=0.0 + ) + assert score.metadata == {} diff --git a/tests/test_job_quality/test_scorer.py b/tests/test_job_quality/test_scorer.py new file mode 100644 index 0000000..caddaf7 --- /dev/null +++ b/tests/test_job_quality/test_scorer.py @@ -0,0 +1,141 @@ +""" +Tests for score_job() — the aggregating scorer function. + +Covers: trust_score math, confidence calculation, clamping, +signal count, enrichment passthrough, edge cases. +""" + +from __future__ import annotations + +from datetime import datetime, timedelta, timezone + +import pytest + +from circuitforge_core.job_quality.models import JobEnrichment, JobListing +from circuitforge_core.job_quality.scorer import score_job +from circuitforge_core.job_quality.signals import ALL_SIGNALS + +_NOW = datetime.now(tz=timezone.utc) + + +def _days_ago(n: int) -> datetime: + return _NOW - timedelta(days=n) + + +def _clean_listing() -> JobListing: + """A listing that should trigger no signals.""" + return JobListing( + title="Staff Engineer", + company="Acme Corp", + state_code="CA", + salary_min=140_000, + salary_max=180_000, + posted_at=_days_ago(3), + repost_count=0, + applicant_count=30, + is_staffing_agency=False, + is_always_open=False, + description="X" * 600, + requirements=["Python", "Go", "SQL"], + ats_url="https://careers.acme.com/apply/123", + weekend_posted=False, + ) + + +def _ghost_listing() -> JobListing: + """A listing designed to trigger as many signals as possible.""" + return JobListing( + state_code="", + posted_at=_days_ago(60), + repost_count=5, + is_staffing_agency=True, + is_always_open=True, + applicant_count=500, + requirements=["R"] * 15, + description="Great opportunity.", + ats_url="https://jobs.lever.co/ghost/123", + weekend_posted=True, + ) + + +class TestScoreJob: + def test_clean_listing_high_trust(self): + score = score_job(_clean_listing(), JobEnrichment(has_layoff_news=False, no_response_rate=0.1)) + assert score.trust_score >= 0.85, f"Expected high trust, got {score.trust_score}" + + def test_ghost_listing_low_trust(self): + score = score_job(_ghost_listing(), JobEnrichment(has_layoff_news=True, no_response_rate=0.9)) + assert score.trust_score <= 0.25, f"Expected low trust, got {score.trust_score}" + + def test_trust_score_clamped_to_1(self): + score = score_job(JobListing()) # No signals triggered, penalty = 0 + assert score.trust_score <= 1.0 + + def test_trust_score_clamped_to_0(self): + score = score_job(_ghost_listing(), JobEnrichment(has_layoff_news=True, no_response_rate=0.9)) + assert score.trust_score >= 0.0 + + def test_returns_all_signals(self): + score = score_job(JobListing()) + assert len(score.signals) == len(ALL_SIGNALS) + + def test_signal_names_match_registry(self): + score = score_job(JobListing()) + score_names = {s.name for s in score.signals} + registry_names = {fn(JobListing()).name for fn in ALL_SIGNALS} + assert score_names == registry_names + + def test_raw_penalty_equals_sum_of_triggered_weights(self): + score = score_job(_ghost_listing()) + expected = sum(s.penalty for s in score.signals) + assert abs(score.raw_penalty - round(expected, 4)) < 1e-6 + + def test_trust_score_equals_one_minus_penalty(self): + score = score_job(_ghost_listing()) + expected = round(max(0.0, 1.0 - score.raw_penalty), 4) + assert score.trust_score == expected + + def test_confidence_between_0_and_1(self): + score = score_job(JobListing()) + assert 0.0 <= score.confidence <= 1.0 + + def test_no_enrichment_reduces_confidence(self): + score_no_enrich = score_job(_clean_listing(), None) + score_with_enrich = score_job(_clean_listing(), JobEnrichment(has_layoff_news=False, no_response_rate=0.1)) + assert score_with_enrich.confidence >= score_no_enrich.confidence + + def test_enrichment_is_passed_to_signals(self): + enrichment = JobEnrichment(has_layoff_news=True) + score = score_job(JobListing(), enrichment) + layoff_signal = next(s for s in score.signals if s.name == "layoff_news") + assert layoff_signal.triggered is True + + def test_metadata_empty_by_default(self): + score = score_job(JobListing()) + assert score.metadata == {} + + def test_no_salary_in_transparency_state(self): + listing = JobListing(state_code="CO", posted_at=_days_ago(1), repost_count=0) + score = score_job(listing) + salary_signal = next(s for s in score.signals if s.name == "no_salary_transparency") + assert salary_signal.triggered is True + + def test_penalty_accumulation_is_additive(self): + """Each triggered signal adds its weight independently.""" + listing = JobListing( + is_staffing_agency=True, # +0.15 + is_always_open=True, # +0.20 + ) + score = score_job(listing) + staffing = next(s for s in score.signals if s.name == "staffing_agency") + always = next(s for s in score.signals if s.name == "always_open_pattern") + assert staffing.triggered and always.triggered + assert score.raw_penalty >= staffing.weight + always.weight - 1e-9 + + def test_score_is_deterministic(self): + listing = _ghost_listing() + enrich = JobEnrichment(has_layoff_news=True, no_response_rate=0.8) + s1 = score_job(listing, enrich) + s2 = score_job(listing, enrich) + assert s1.trust_score == s2.trust_score + assert s1.raw_penalty == s2.raw_penalty diff --git a/tests/test_job_quality/test_signals.py b/tests/test_job_quality/test_signals.py new file mode 100644 index 0000000..56723c9 --- /dev/null +++ b/tests/test_job_quality/test_signals.py @@ -0,0 +1,358 @@ +""" +Unit tests for each individual signal function. + +Each signal is exercised for: triggered path, not-triggered path, and (where +applicable) the missing-data / no-enrichment path. +""" + +from __future__ import annotations + +from datetime import datetime, timedelta, timezone + +import pytest + +from circuitforge_core.job_quality.models import JobEnrichment, JobListing +from circuitforge_core.job_quality.signals import ( + ALL_SIGNALS, + always_open_pattern, + ats_blackhole, + high_applicant_count, + jd_vagueness, + layoff_news, + listing_age, + no_salary_transparency, + poor_response_history, + repost_detected, + requirement_overload, + staffing_agency, + weekend_posted, +) + +_NOW = datetime.now(tz=timezone.utc) + + +def _days_ago(n: int) -> datetime: + return _NOW - timedelta(days=n) + + +# --------------------------------------------------------------------------- +# listing_age +# --------------------------------------------------------------------------- + + +class TestListingAge: + def test_stale_listing_triggers(self): + listing = JobListing(posted_at=_days_ago(31)) + result = listing_age(listing) + assert result.triggered is True + assert result.penalty == result.weight + + def test_fresh_listing_does_not_trigger(self): + listing = JobListing(posted_at=_days_ago(5)) + result = listing_age(listing) + assert result.triggered is False + assert result.penalty == 0.0 + + def test_no_posted_at_returns_not_triggered(self): + result = listing_age(JobListing()) + assert result.triggered is False + assert result.penalty == 0.0 + assert "No posted_at" in result.detail + + def test_weight_is_0_25(self): + assert listing_age(JobListing()).weight == 0.25 + + +# --------------------------------------------------------------------------- +# repost_detected +# --------------------------------------------------------------------------- + + +class TestRepostDetected: + def test_high_repost_triggers(self): + result = repost_detected(JobListing(repost_count=3)) + assert result.triggered is True + + def test_low_repost_does_not_trigger(self): + result = repost_detected(JobListing(repost_count=1)) + assert result.triggered is False + + def test_zero_repost_does_not_trigger(self): + result = repost_detected(JobListing(repost_count=0)) + assert result.triggered is False + + def test_weight_is_0_25(self): + assert repost_detected(JobListing()).weight == 0.25 + + +# --------------------------------------------------------------------------- +# no_salary_transparency +# --------------------------------------------------------------------------- + + +class TestNoSalaryTransparency: + def test_no_salary_triggers(self): + result = no_salary_transparency(JobListing(state_code="TX")) + assert result.triggered is True + + def test_salary_range_prevents_trigger(self): + result = no_salary_transparency(JobListing(salary_min=80_000, salary_max=120_000)) + assert result.triggered is False + + def test_salary_text_prevents_trigger(self): + result = no_salary_transparency(JobListing(salary_text="$90k")) + assert result.triggered is False + + def test_transparency_state_detail(self): + result = no_salary_transparency(JobListing(state_code="CA")) + assert "CA" in result.detail or "transparency" in result.detail.lower() + + def test_weight_is_0_20(self): + assert no_salary_transparency(JobListing()).weight == 0.20 + + +# --------------------------------------------------------------------------- +# always_open_pattern +# --------------------------------------------------------------------------- + + +class TestAlwaysOpenPattern: + def test_always_open_triggers(self): + result = always_open_pattern(JobListing(is_always_open=True)) + assert result.triggered is True + + def test_not_always_open(self): + result = always_open_pattern(JobListing(is_always_open=False)) + assert result.triggered is False + + def test_weight_is_0_20(self): + assert always_open_pattern(JobListing()).weight == 0.20 + + +# --------------------------------------------------------------------------- +# staffing_agency +# --------------------------------------------------------------------------- + + +class TestStaffingAgency: + def test_agency_triggers(self): + result = staffing_agency(JobListing(is_staffing_agency=True)) + assert result.triggered is True + + def test_direct_employer_does_not_trigger(self): + result = staffing_agency(JobListing(is_staffing_agency=False)) + assert result.triggered is False + + def test_weight_is_0_15(self): + assert staffing_agency(JobListing()).weight == 0.15 + + +# --------------------------------------------------------------------------- +# requirement_overload +# --------------------------------------------------------------------------- + + +class TestRequirementOverload: + def test_overloaded_triggers(self): + result = requirement_overload(JobListing(requirements=["R"] * 13)) + assert result.triggered is True + + def test_reasonable_requirements_do_not_trigger(self): + result = requirement_overload(JobListing(requirements=["Python", "Go", "SQL"])) + assert result.triggered is False + + def test_empty_requirements_does_not_trigger(self): + result = requirement_overload(JobListing()) + assert result.triggered is False + + def test_weight_is_0_12(self): + assert requirement_overload(JobListing()).weight == 0.12 + + +# --------------------------------------------------------------------------- +# layoff_news +# --------------------------------------------------------------------------- + + +class TestLayoffNews: + def test_layoff_news_triggers(self): + enrichment = JobEnrichment(has_layoff_news=True) + result = layoff_news(JobListing(), enrichment) + assert result.triggered is True + + def test_no_layoff_news_does_not_trigger(self): + enrichment = JobEnrichment(has_layoff_news=False) + result = layoff_news(JobListing(), enrichment) + assert result.triggered is False + + def test_no_enrichment_returns_not_triggered(self): + result = layoff_news(JobListing(), None) + assert result.triggered is False + assert "No enrichment" in result.detail + + def test_weight_is_0_12(self): + assert layoff_news(JobListing()).weight == 0.12 + + +# --------------------------------------------------------------------------- +# jd_vagueness +# --------------------------------------------------------------------------- + + +class TestJdVagueness: + def test_short_description_triggers(self): + result = jd_vagueness(JobListing(description="Short.")) + assert result.triggered is True + + def test_long_description_does_not_trigger(self): + result = jd_vagueness(JobListing(description="X" * 500)) + assert result.triggered is False + + def test_empty_description_triggers(self): + result = jd_vagueness(JobListing(description="")) + assert result.triggered is True + + def test_weight_is_0_10(self): + assert jd_vagueness(JobListing()).weight == 0.10 + + +# --------------------------------------------------------------------------- +# ats_blackhole +# --------------------------------------------------------------------------- + + +class TestAtsBlackhole: + @pytest.mark.parametrize("url", [ + "https://jobs.lever.co/acme/abc", + "https://boards.greenhouse.io/acme/jobs/123", + "https://acme.workday.com/en-US/recruiting/job/123", + "https://acme.icims.com/jobs/123", + "https://acme.taleo.net/careersection/123", + ]) + def test_known_ats_triggers(self, url: str): + result = ats_blackhole(JobListing(ats_url=url)) + assert result.triggered is True + + def test_direct_url_does_not_trigger(self): + result = ats_blackhole(JobListing(ats_url="https://careers.acme.com/apply/123")) + assert result.triggered is False + + def test_empty_url_does_not_trigger(self): + result = ats_blackhole(JobListing(ats_url="")) + assert result.triggered is False + + def test_weight_is_0_10(self): + assert ats_blackhole(JobListing()).weight == 0.10 + + +# --------------------------------------------------------------------------- +# high_applicant_count +# --------------------------------------------------------------------------- + + +class TestHighApplicantCount: + def test_high_count_triggers(self): + result = high_applicant_count(JobListing(applicant_count=201)) + assert result.triggered is True + + def test_low_count_does_not_trigger(self): + result = high_applicant_count(JobListing(applicant_count=10)) + assert result.triggered is False + + def test_none_count_returns_not_triggered(self): + result = high_applicant_count(JobListing(applicant_count=None)) + assert result.triggered is False + assert "not available" in result.detail.lower() + + def test_weight_is_0_08(self): + assert high_applicant_count(JobListing()).weight == 0.08 + + +# --------------------------------------------------------------------------- +# weekend_posted +# --------------------------------------------------------------------------- + + +class TestWeekendPosted: + def test_weekend_flag_triggers(self): + result = weekend_posted(JobListing(weekend_posted=True)) + assert result.triggered is True + + def test_saturday_date_triggers(self): + # Find next Saturday + today = _NOW + days_until_sat = (5 - today.weekday()) % 7 + sat = today - timedelta(days=(today.weekday() - 5) % 7) if today.weekday() > 5 else today + timedelta(days=days_until_sat) + # Just use a known Saturday: 2026-04-18 + sat = datetime(2026, 4, 18, tzinfo=timezone.utc) # Saturday + result = weekend_posted(JobListing(posted_at=sat, weekend_posted=False)) + assert result.triggered is True + + def test_weekday_does_not_trigger(self): + # 2026-04-20 is Monday + mon = datetime(2026, 4, 20, tzinfo=timezone.utc) + result = weekend_posted(JobListing(posted_at=mon, weekend_posted=False)) + assert result.triggered is False + + def test_no_data_returns_not_triggered(self): + result = weekend_posted(JobListing(posted_at=None, weekend_posted=False)) + assert result.triggered is False + + def test_weight_is_0_04(self): + assert weekend_posted(JobListing()).weight == 0.04 + + +# --------------------------------------------------------------------------- +# poor_response_history +# --------------------------------------------------------------------------- + + +class TestPoorResponseHistory: + def test_high_no_response_rate_triggers(self): + enrichment = JobEnrichment(no_response_rate=0.75) + result = poor_response_history(JobListing(), enrichment) + assert result.triggered is True + + def test_low_no_response_rate_does_not_trigger(self): + enrichment = JobEnrichment(no_response_rate=0.30) + result = poor_response_history(JobListing(), enrichment) + assert result.triggered is False + + def test_none_rate_returns_not_triggered(self): + enrichment = JobEnrichment(no_response_rate=None) + result = poor_response_history(JobListing(), enrichment) + assert result.triggered is False + assert "available" in result.detail.lower() + + def test_no_enrichment_returns_not_triggered(self): + result = poor_response_history(JobListing(), None) + assert result.triggered is False + + def test_weight_is_0_08(self): + assert poor_response_history(JobListing()).weight == 0.08 + + +# --------------------------------------------------------------------------- +# ALL_SIGNALS registry +# --------------------------------------------------------------------------- + + +class TestAllSignalsRegistry: + def test_has_12_signals(self): + assert len(ALL_SIGNALS) == 12 + + def test_all_callable(self): + for fn in ALL_SIGNALS: + assert callable(fn) + + def test_all_return_signal_result(self): + from circuitforge_core.job_quality.models import SignalResult + listing = JobListing() + for fn in ALL_SIGNALS: + result = fn(listing, None) + assert isinstance(result, SignalResult) + + def test_signal_names_are_unique(self): + listing = JobListing() + names = [fn(listing).name for fn in ALL_SIGNALS] + assert len(names) == len(set(names))