peregrine/tests/test_resume_optimizer.py
pyr0ball 02e004ee5c feat(apply): ATS resume optimizer backend — gap report + LLM rewrite
- scripts/resume_optimizer.py: full pipeline (extract_jd_signals →
  prioritize_gaps → rewrite_for_ats → hallucination_check)
- scripts/db.py: add optimized_resume + ats_gap_report columns +
  save_optimized_resume / get_optimized_resume helpers
- tests/test_resume_optimizer.py: 17 unit tests; patches at source
  module (scripts.llm_router.LLMRouter), not consumer

Tier gate: gap report is free; full LLM rewrite is paid+.
2026-04-01 07:09:46 -07:00

288 lines
11 KiB
Python

# tests/test_resume_optimizer.py
"""Tests for scripts/resume_optimizer.py"""
import json
import pytest
from unittest.mock import MagicMock, patch
# ── Fixtures ─────────────────────────────────────────────────────────────────
SAMPLE_RESUME = {
"name": "Alex Rivera",
"email": "alex@example.com",
"phone": "555-1234",
"career_summary": "Experienced Customer Success Manager with a track record of growth.",
"skills": ["Salesforce", "Python", "customer success"],
"experience": [
{
"title": "Customer Success Manager",
"company": "Acme Corp",
"start_date": "2021",
"end_date": "present",
"bullets": [
"Managed a portfolio of 120 enterprise accounts.",
"Reduced churn by 18% through proactive outreach.",
],
},
{
"title": "Support Engineer",
"company": "Beta Inc",
"start_date": "2018",
"end_date": "2021",
"bullets": ["Resolved escalations for top-tier clients."],
},
],
"education": [
{
"degree": "B.S.",
"field": "Computer Science",
"institution": "State University",
"graduation_year": "2018",
}
],
"achievements": [],
}
SAMPLE_JD = (
"We are looking for a Customer Success Manager with Gainsight, cross-functional "
"leadership experience, and strong stakeholder management skills. AWS knowledge a plus."
)
# ── extract_jd_signals ────────────────────────────────────────────────────────
def test_extract_jd_signals_returns_list():
"""extract_jd_signals returns a list even when LLM and TF-IDF both fail."""
from scripts.resume_optimizer import extract_jd_signals
with patch("scripts.llm_router.LLMRouter") as MockRouter:
MockRouter.return_value.complete.side_effect = Exception("no LLM")
result = extract_jd_signals(SAMPLE_JD, resume_text="Python developer")
assert isinstance(result, list)
def test_extract_jd_signals_llm_path_parses_json_array():
"""extract_jd_signals merges LLM-extracted signals with TF-IDF gaps."""
from scripts.resume_optimizer import extract_jd_signals
llm_response = '["Gainsight", "cross-functional leadership", "stakeholder management"]'
with patch("scripts.llm_router.LLMRouter") as MockRouter:
MockRouter.return_value.complete.return_value = llm_response
result = extract_jd_signals(SAMPLE_JD)
assert "Gainsight" in result
assert "cross-functional leadership" in result
def test_extract_jd_signals_deduplicates():
"""extract_jd_signals deduplicates terms across LLM and TF-IDF sources."""
from scripts.resume_optimizer import extract_jd_signals
llm_response = '["Python", "AWS", "Python"]'
with patch("scripts.llm_router.LLMRouter") as MockRouter:
MockRouter.return_value.complete.return_value = llm_response
result = extract_jd_signals(SAMPLE_JD)
assert result.count("Python") == 1
def test_extract_jd_signals_handles_malformed_llm_json():
"""extract_jd_signals falls back gracefully when LLM returns non-JSON."""
from scripts.resume_optimizer import extract_jd_signals
with patch("scripts.llm_router.LLMRouter") as MockRouter:
MockRouter.return_value.complete.return_value = "Here are some keywords: Gainsight, AWS"
result = extract_jd_signals(SAMPLE_JD)
# Should still return a list (may be empty if TF-IDF also silent)
assert isinstance(result, list)
# ── prioritize_gaps ───────────────────────────────────────────────────────────
def test_prioritize_gaps_skips_existing_terms():
"""prioritize_gaps excludes terms already present in the resume."""
from scripts.resume_optimizer import prioritize_gaps
# "Salesforce" is already in SAMPLE_RESUME skills
result = prioritize_gaps(["Salesforce", "Gainsight"], SAMPLE_RESUME)
terms = [r["term"] for r in result]
assert "Salesforce" not in terms
assert "Gainsight" in terms
def test_prioritize_gaps_routes_tech_terms_to_skills():
"""prioritize_gaps maps known tech keywords to the skills section at priority 1."""
from scripts.resume_optimizer import prioritize_gaps
result = prioritize_gaps(["AWS", "Docker"], SAMPLE_RESUME)
by_term = {r["term"]: r for r in result}
assert by_term["AWS"]["section"] == "skills"
assert by_term["AWS"]["priority"] == 1
assert by_term["Docker"]["section"] == "skills"
def test_prioritize_gaps_routes_leadership_terms_to_summary():
"""prioritize_gaps maps leadership/executive signals to the summary section."""
from scripts.resume_optimizer import prioritize_gaps
result = prioritize_gaps(["cross-functional", "stakeholder"], SAMPLE_RESUME)
by_term = {r["term"]: r for r in result}
assert by_term["cross-functional"]["section"] == "summary"
assert by_term["stakeholder"]["section"] == "summary"
def test_prioritize_gaps_multi_word_routes_to_experience():
"""Multi-word phrases not in skills/summary lists go to experience at priority 2."""
from scripts.resume_optimizer import prioritize_gaps
result = prioritize_gaps(["proactive client engagement"], SAMPLE_RESUME)
assert result[0]["section"] == "experience"
assert result[0]["priority"] == 2
def test_prioritize_gaps_single_word_is_lowest_priority():
"""Single generic words not in any list go to experience at priority 3."""
from scripts.resume_optimizer import prioritize_gaps
result = prioritize_gaps(["innovation"], SAMPLE_RESUME)
assert result[0]["priority"] == 3
def test_prioritize_gaps_sorted_by_priority():
"""prioritize_gaps output is sorted ascending by priority (1 first)."""
from scripts.resume_optimizer import prioritize_gaps
gaps = ["innovation", "AWS", "cross-functional", "managed service contracts"]
result = prioritize_gaps(gaps, SAMPLE_RESUME)
priorities = [r["priority"] for r in result]
assert priorities == sorted(priorities)
# ── hallucination_check ───────────────────────────────────────────────────────
def test_hallucination_check_passes_unchanged_resume():
"""hallucination_check returns True when rewrite has no new employers or institutions."""
from scripts.resume_optimizer import hallucination_check
# Shallow rewrite: same structure
rewritten = {
**SAMPLE_RESUME,
"career_summary": "Dynamic CSM with cross-functional stakeholder management experience.",
}
assert hallucination_check(SAMPLE_RESUME, rewritten) is True
def test_hallucination_check_fails_on_new_employer():
"""hallucination_check returns False when a new company is introduced."""
from scripts.resume_optimizer import hallucination_check
fabricated_entry = {
"title": "VP of Customer Success",
"company": "Fabricated Corp",
"start_date": "2019",
"end_date": "2021",
"bullets": ["Led a team of 30."],
}
rewritten = dict(SAMPLE_RESUME)
rewritten["experience"] = SAMPLE_RESUME["experience"] + [fabricated_entry]
assert hallucination_check(SAMPLE_RESUME, rewritten) is False
def test_hallucination_check_fails_on_new_institution():
"""hallucination_check returns False when a new educational institution appears."""
from scripts.resume_optimizer import hallucination_check
rewritten = dict(SAMPLE_RESUME)
rewritten["education"] = [
*SAMPLE_RESUME["education"],
{"degree": "M.S.", "field": "Data Science", "institution": "MIT", "graduation_year": "2020"},
]
assert hallucination_check(SAMPLE_RESUME, rewritten) is False
# ── render_resume_text ────────────────────────────────────────────────────────
def test_render_resume_text_contains_all_sections():
"""render_resume_text produces plain text containing all resume sections."""
from scripts.resume_optimizer import render_resume_text
text = render_resume_text(SAMPLE_RESUME)
assert "Alex Rivera" in text
assert "SUMMARY" in text
assert "EXPERIENCE" in text
assert "Customer Success Manager" in text
assert "Acme Corp" in text
assert "EDUCATION" in text
assert "State University" in text
assert "SKILLS" in text
assert "Salesforce" in text
def test_render_resume_text_omits_empty_sections():
"""render_resume_text skips sections that have no content."""
from scripts.resume_optimizer import render_resume_text
sparse = {
"name": "Jordan Lee",
"email": "",
"phone": "",
"career_summary": "",
"skills": [],
"experience": [],
"education": [],
"achievements": [],
}
text = render_resume_text(sparse)
assert "EXPERIENCE" not in text
assert "SKILLS" not in text
# ── db integration ────────────────────────────────────────────────────────────
def test_save_and_get_optimized_resume(tmp_path):
"""save_optimized_resume persists and get_optimized_resume retrieves the data."""
from scripts.db import init_db, save_optimized_resume, get_optimized_resume
db_path = tmp_path / "test.db"
init_db(db_path)
# Insert a minimal job to satisfy FK
import sqlite3
conn = sqlite3.connect(db_path)
conn.execute(
"INSERT INTO jobs (id, title, company, url, source, status) VALUES (1, 'CSM', 'Acme', 'http://x.com', 'test', 'approved')"
)
conn.commit()
conn.close()
gap_report = json.dumps([{"term": "Gainsight", "section": "skills", "priority": 1, "rationale": "test"}])
save_optimized_resume(db_path, job_id=1, text="Rewritten resume text.", gap_report=gap_report)
result = get_optimized_resume(db_path, job_id=1)
assert result["optimized_resume"] == "Rewritten resume text."
parsed = json.loads(result["ats_gap_report"])
assert parsed[0]["term"] == "Gainsight"
def test_get_optimized_resume_returns_empty_for_missing(tmp_path):
"""get_optimized_resume returns empty strings when no record exists."""
from scripts.db import init_db, get_optimized_resume
db_path = tmp_path / "test.db"
init_db(db_path)
result = get_optimized_resume(db_path, job_id=999)
assert result["optimized_resume"] == ""
assert result["ats_gap_report"] == ""