diff --git a/scripts/job_ranker.py b/scripts/job_ranker.py index 470f054..e958df3 100644 --- a/scripts/job_ranker.py +++ b/scripts/job_ranker.py @@ -12,10 +12,53 @@ Usage: """ from __future__ import annotations +import logging import math import re from datetime import datetime, timezone +_log = logging.getLogger(__name__) + +# Max jobs passed to the reranker (avoids excessive inference time on large stacks) +_RERANK_POOL = 50 + + +def _try_rerank(resume_text: str, jobs: list[dict]) -> list[dict]: + """Rerank jobs by cross-encoder relevance to resume text. + + Returns jobs sorted best-first by the reranker. Falls back silently to the + input order if the reranker package is unavailable or inference fails. + """ + if not jobs: + return jobs + try: + from circuitforge_core.reranker import rerank + except ImportError: + return jobs + try: + descriptions = [j.get("description") or j.get("title", "") for j in jobs] + results = rerank(resume_text, descriptions, top_n=len(jobs)) + # Map ranked candidates back to job dicts, handling duplicate descriptions + idx_queue: dict[str, list[int]] = {} + for i, d in enumerate(descriptions): + idx_queue.setdefault(d, []).append(i) + reranked: list[dict] = [] + used: set[int] = set() + for r in results: + for idx in idx_queue.get(r.candidate, []): + if idx not in used: + reranked.append(jobs[idx]) + used.add(idx) + break + # Safety: append anything the reranker didn't return + for i, j in enumerate(jobs): + if i not in used: + reranked.append(j) + return reranked + except Exception: + _log.warning("Reranker pass failed; using stack_score order.", exc_info=True) + return jobs + # ── TUNING ───────────────────────────────────────────────────────────────────── # Adjust these constants to change how jobs are ranked. @@ -289,6 +332,7 @@ def rank_jobs( user_level: int = 3, limit: int = 10, min_score: float = 20.0, + resume_text: str = "", ) -> list[dict]: """Score and rank pending jobs; return top-N above min_score. @@ -299,6 +343,10 @@ def rank_jobs( user_level: Seniority level 1–7 (use seniority_from_experience()). limit: Stack size; pass 0 to return all qualifying jobs. min_score: Minimum stack_score to include (0–100). + resume_text: Plain-text resume for cross-encoder reranking pass. + When provided, the top-_RERANK_POOL candidates are + reranked by (resume, description) relevance before + the limit is applied. Graceful no-op when empty. Returns: Sorted list (best first) with 'stack_score' key added to each dict. @@ -310,4 +358,10 @@ def rank_jobs( scored.append({**job, "stack_score": s}) scored.sort(key=lambda j: j["stack_score"], reverse=True) + + if resume_text and scored: + pool = scored[:_RERANK_POOL] + pool = _try_rerank(resume_text, pool) + scored = pool + scored[_RERANK_POOL:] + return scored[:limit] if limit > 0 else scored diff --git a/scripts/resume_optimizer.py b/scripts/resume_optimizer.py index 3681ff8..9d0298c 100644 --- a/scripts/resume_optimizer.py +++ b/scripts/resume_optimizer.py @@ -233,6 +233,22 @@ def rewrite_for_ats( from scripts.llm_router import LLMRouter router = LLMRouter() + # Rerank gaps by JD relevance so the most impactful terms are injected first. + # Falls back silently to the incoming priority ordering on any error. + jd_text = job.get("description", "") + if jd_text and prioritized_gaps: + try: + from circuitforge_core.reranker import rerank as _rerank + terms = [g["term"] for g in prioritized_gaps] + results = _rerank(jd_text, terms, top_n=len(terms)) + term_rank = {r.candidate: r.rank for r in results} + prioritized_gaps = sorted( + prioritized_gaps, + key=lambda g: term_rank.get(g["term"], len(prioritized_gaps)), + ) + except Exception: + pass # keep original priority ordering + # Group gaps by target section by_section: dict[str, list[str]] = {} for gap in prioritized_gaps: diff --git a/tests/test_reranker_integration.py b/tests/test_reranker_integration.py new file mode 100644 index 0000000..50d87a0 --- /dev/null +++ b/tests/test_reranker_integration.py @@ -0,0 +1,234 @@ +# tests/test_reranker_integration.py +"""Tests for reranker integration in job_ranker and resume_optimizer. + +Set CF_RERANKER_MOCK=1 to avoid loading real model weights during tests. +""" +import os +import pytest +from unittest.mock import patch + +os.environ["CF_RERANKER_MOCK"] = "1" + + +# ── Fixtures ────────────────────────────────────────────────────────────────── + +RESUME_TEXT = "Experienced Python engineer with Django REST and data pipeline experience." + +SAMPLE_JOBS = [ + { + "id": 1, + "title": "Python Developer", + "company": "Acme", + "description": "Python Django REST APIs data engineering pipelines", + "date_found": "2026-04-01", + "match_score": 70, + "salary": "120000", + "is_remote": 1, + "location": "Remote", + }, + { + "id": 2, + "title": "Data Analyst", + "company": "Beta", + "description": "SQL Excel Tableau business intelligence reporting dashboards", + "date_found": "2026-04-02", + "match_score": 60, + "salary": "90000", + "is_remote": 1, + "location": "Remote", + }, + { + "id": 3, + "title": "Frontend Engineer", + "company": "Gamma", + "description": "React TypeScript CSS frontend web UI component library", + "date_found": "2026-04-01", + "match_score": 50, + "salary": "110000", + "is_remote": 1, + "location": "Remote", + }, +] + +SAMPLE_RESUME = { + "name": "Alex Rivera", + "email": "alex@example.com", + "phone": "555-1234", + "career_summary": "Experienced Python engineer with Django and REST API experience.", + "skills": ["Python", "Django", "REST APIs"], + "experience": [ + { + "title": "Software Engineer", + "company": "Acme Corp", + "start_date": "2021", + "end_date": "present", + "bullets": ["Built REST APIs.", "Managed data pipelines."], + } + ], + "education": [ + { + "degree": "B.S.", + "field": "Computer Science", + "institution": "State University", + "graduation_year": "2018", + } + ], + "achievements": [], +} + +PRIORITIZED_GAPS = [ + {"term": "Kubernetes", "section": "skills", "priority": 1, "rationale": "JD requires K8s"}, + {"term": "REST APIs", "section": "experience", "priority": 2, "rationale": "JD mentions REST"}, + {"term": "CI/CD", "section": "skills", "priority": 3, "rationale": "DevOps signal"}, +] + +SAMPLE_JOB = { + "title": "Senior Python Engineer", + "company": "Acme", + "description": "Python REST API developer with Kubernetes CI/CD and data pipeline experience.", +} + + +# ── _try_rerank ─────────────────────────────────────────────────────────────── + +def test_try_rerank_empty_list(): + from scripts.job_ranker import _try_rerank + assert _try_rerank("query", []) == [] + + +def test_try_rerank_returns_all_jobs(): + from scripts.job_ranker import _try_rerank + result = _try_rerank(RESUME_TEXT, SAMPLE_JOBS) + assert len(result) == len(SAMPLE_JOBS) + assert {j["id"] for j in result} == {j["id"] for j in SAMPLE_JOBS} + + +def test_try_rerank_preserves_job_fields(): + from scripts.job_ranker import _try_rerank + result = _try_rerank(RESUME_TEXT, SAMPLE_JOBS) + for job in result: + assert "stack_score" not in job or True # stack_score may or may not be present + assert "id" in job + assert "title" in job + + +def test_try_rerank_graceful_fallback_on_import_error(): + """_try_rerank falls back to input order when reranker is unavailable.""" + import builtins + real_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "circuitforge_core.reranker": + raise ImportError("not installed") + return real_import(name, *args, **kwargs) + + with patch("builtins.__import__", side_effect=mock_import): + from scripts.job_ranker import _try_rerank as _tr + result = _tr(RESUME_TEXT, SAMPLE_JOBS) + + assert result == SAMPLE_JOBS + + +# ── rank_jobs ───────────────────────────────────────────────────────────────── + +def test_rank_jobs_no_resume_text(): + """rank_jobs without resume_text works normally; reranker not invoked.""" + from scripts.job_ranker import rank_jobs + results = rank_jobs(SAMPLE_JOBS, ["Python Developer"], limit=10, min_score=0) + assert len(results) >= 1 + assert all("stack_score" in j for j in results) + + +def test_rank_jobs_with_resume_text_returns_same_job_ids(): + """rank_jobs with resume_text returns the same job set (possibly reordered).""" + from scripts.job_ranker import rank_jobs + without = rank_jobs(SAMPLE_JOBS, ["Python Developer"], limit=10, min_score=0) + with_rr = rank_jobs( + SAMPLE_JOBS, ["Python Developer"], limit=10, min_score=0, resume_text=RESUME_TEXT + ) + assert {j["id"] for j in without} == {j["id"] for j in with_rr} + + +def test_rank_jobs_with_resume_text_preserves_stack_score(): + """stack_score is set on all returned jobs regardless of reranker pass.""" + from scripts.job_ranker import rank_jobs + results = rank_jobs( + SAMPLE_JOBS, ["Python Developer"], limit=10, min_score=0, resume_text=RESUME_TEXT + ) + assert all("stack_score" in j for j in results) + + +def test_rank_jobs_limit_respected_after_rerank(): + """limit parameter is still respected when reranker is active.""" + from scripts.job_ranker import rank_jobs + results = rank_jobs( + SAMPLE_JOBS, ["Python Developer"], limit=2, min_score=0, resume_text=RESUME_TEXT + ) + assert len(results) <= 2 + + +def test_rank_jobs_empty_description_falls_back_to_title(): + """Jobs with no description use title as the reranker candidate string.""" + from scripts.job_ranker import rank_jobs + jobs_no_desc = [{**j, "description": None} for j in SAMPLE_JOBS] + results = rank_jobs( + jobs_no_desc, ["Python Developer"], limit=10, min_score=0, resume_text=RESUME_TEXT + ) + assert {j["id"] for j in results} == {j["id"] for j in SAMPLE_JOBS} + + +# ── rewrite_for_ats (reranker gap reordering) ───────────────────────────────── + +def test_rewrite_for_ats_runs_with_reranker(monkeypatch): + """rewrite_for_ats completes without error when reranker is active.""" + from scripts.resume_optimizer import rewrite_for_ats + + with patch("scripts.llm_router.LLMRouter") as MockRouter: + mock_bullets = '["Built REST APIs and Kubernetes pipelines."]' + MockRouter.return_value.complete.return_value = mock_bullets + result = rewrite_for_ats(SAMPLE_RESUME, PRIORITIZED_GAPS, SAMPLE_JOB) + + assert isinstance(result, dict) + assert "skills" in result + assert "experience" in result + + +def test_rewrite_for_ats_preserves_unrewritten_sections(monkeypatch): + """Sections with no gaps are passed through unchanged.""" + from scripts.resume_optimizer import rewrite_for_ats + + # Only 'skills' gaps — education should be untouched + skills_only_gaps = [g for g in PRIORITIZED_GAPS if g["section"] == "skills"] + + with patch("scripts.llm_router.LLMRouter") as MockRouter: + MockRouter.return_value.complete.return_value = '["AWS certified engineer."]' + result = rewrite_for_ats(SAMPLE_RESUME, skills_only_gaps, SAMPLE_JOB) + + assert result["education"] == SAMPLE_RESUME["education"] + + +def test_rewrite_for_ats_reranker_fallback_on_error(monkeypatch): + """rewrite_for_ats completes even if reranker raises an exception.""" + from scripts.resume_optimizer import rewrite_for_ats + from circuitforge_core.reranker import reset_reranker + + # Patch rerank to raise so we test the fallback path + with patch("circuitforge_core.reranker.rerank", side_effect=RuntimeError("boom")): + with patch("scripts.llm_router.LLMRouter") as MockRouter: + MockRouter.return_value.complete.return_value = '["Built pipelines."]' + result = rewrite_for_ats(SAMPLE_RESUME, PRIORITIZED_GAPS, SAMPLE_JOB) + + assert isinstance(result, dict) + + reset_reranker() + + +def test_rewrite_for_ats_empty_gaps_no_reranker_call(): + """rewrite_for_ats with empty gaps returns original resume unchanged.""" + from scripts.resume_optimizer import rewrite_for_ats + + with patch("scripts.llm_router.LLMRouter") as MockRouter: + result = rewrite_for_ats(SAMPLE_RESUME, [], SAMPLE_JOB) + MockRouter.return_value.complete.assert_not_called() + + assert result["skills"] == SAMPLE_RESUME["skills"]