feat(apply): ATS resume optimizer backend — gap report + LLM rewrite
- scripts/resume_optimizer.py: full pipeline (extract_jd_signals → prioritize_gaps → rewrite_for_ats → hallucination_check) - scripts/db.py: add optimized_resume + ats_gap_report columns + save_optimized_resume / get_optimized_resume helpers - tests/test_resume_optimizer.py: 17 unit tests; patches at source module (scripts.llm_router.LLMRouter), not consumer Tier gate: gap report is free; full LLM rewrite is paid+.
This commit is contained in:
parent
9702646738
commit
02e004ee5c
3 changed files with 761 additions and 0 deletions
|
|
@ -132,6 +132,8 @@ _MIGRATIONS = [
|
||||||
("hired_at", "TEXT"),
|
("hired_at", "TEXT"),
|
||||||
("survey_at", "TEXT"),
|
("survey_at", "TEXT"),
|
||||||
("calendar_event_id", "TEXT"),
|
("calendar_event_id", "TEXT"),
|
||||||
|
("optimized_resume", "TEXT"), # ATS-rewritten resume text (paid tier)
|
||||||
|
("ats_gap_report", "TEXT"), # JSON gap report (free tier)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -301,6 +303,38 @@ def update_cover_letter(db_path: Path = DEFAULT_DB, job_id: int = None, text: st
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def save_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None,
|
||||||
|
text: str = "", gap_report: str = "") -> None:
|
||||||
|
"""Persist ATS-optimized resume text and/or gap report for a job."""
|
||||||
|
if job_id is None:
|
||||||
|
return
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE jobs SET optimized_resume = ?, ats_gap_report = ? WHERE id = ?",
|
||||||
|
(text or None, gap_report or None, job_id),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def get_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict:
|
||||||
|
"""Return optimized_resume and ats_gap_report for a job, or empty strings if absent."""
|
||||||
|
if job_id is None:
|
||||||
|
return {"optimized_resume": "", "ats_gap_report": ""}
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT optimized_resume, ats_gap_report FROM jobs WHERE id = ?", (job_id,)
|
||||||
|
).fetchone()
|
||||||
|
conn.close()
|
||||||
|
if not row:
|
||||||
|
return {"optimized_resume": "", "ats_gap_report": ""}
|
||||||
|
return {
|
||||||
|
"optimized_resume": row["optimized_resume"] or "",
|
||||||
|
"ats_gap_report": row["ats_gap_report"] or "",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
_UPDATABLE_JOB_COLS = {
|
_UPDATABLE_JOB_COLS = {
|
||||||
"title", "company", "url", "source", "location", "is_remote",
|
"title", "company", "url", "source", "location", "is_remote",
|
||||||
"salary", "description", "match_score", "keyword_gaps",
|
"salary", "description", "match_score", "keyword_gaps",
|
||||||
|
|
|
||||||
439
scripts/resume_optimizer.py
Normal file
439
scripts/resume_optimizer.py
Normal file
|
|
@ -0,0 +1,439 @@
|
||||||
|
"""
|
||||||
|
ATS Resume Optimizer — rewrite a candidate's resume to maximize keyword match
|
||||||
|
for a specific job description without fabricating experience.
|
||||||
|
|
||||||
|
Tier behaviour:
|
||||||
|
Free → gap report only (extract_jd_signals + prioritize_gaps, no LLM rewrite)
|
||||||
|
Paid → full LLM rewrite targeting the JD (rewrite_for_ats)
|
||||||
|
Premium → same as paid for now; fine-tuned voice model is a future enhancement
|
||||||
|
|
||||||
|
Pipeline:
|
||||||
|
job.description
|
||||||
|
→ extract_jd_signals() # TF-IDF gaps + LLM-extracted ATS signals
|
||||||
|
→ prioritize_gaps() # rank by impact, map to resume sections
|
||||||
|
→ rewrite_for_ats() # per-section LLM rewrite (paid+)
|
||||||
|
→ hallucination_check() # reject rewrites that invent new experience
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ── Signal extraction ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def extract_jd_signals(description: str, resume_text: str = "") -> list[str]:
|
||||||
|
"""Return ATS keyword signals from a job description.
|
||||||
|
|
||||||
|
Combines two sources:
|
||||||
|
1. TF-IDF keyword gaps from match.py (fast, deterministic, no LLM cost)
|
||||||
|
2. LLM extraction for phrasing nuance TF-IDF misses (e.g. "cross-functional"
|
||||||
|
vs "cross-team", "led" vs "managed")
|
||||||
|
|
||||||
|
Falls back to TF-IDF-only if LLM is unavailable.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
description: Raw job description text.
|
||||||
|
resume_text: Candidate's resume text (used to compute gap vs. already present).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Deduplicated list of ATS keyword signals, most impactful first.
|
||||||
|
"""
|
||||||
|
# Phase 1: deterministic TF-IDF gaps (always available)
|
||||||
|
tfidf_gaps: list[str] = []
|
||||||
|
if resume_text:
|
||||||
|
try:
|
||||||
|
from scripts.match import match_score
|
||||||
|
_, tfidf_gaps = match_score(resume_text, description)
|
||||||
|
except Exception:
|
||||||
|
log.warning("[resume_optimizer] TF-IDF gap extraction failed", exc_info=True)
|
||||||
|
|
||||||
|
# Phase 2: LLM extraction for phrasing/qualifier nuance
|
||||||
|
llm_signals: list[str] = []
|
||||||
|
try:
|
||||||
|
from scripts.llm_router import LLMRouter
|
||||||
|
prompt = (
|
||||||
|
"Extract the most important ATS (applicant tracking system) keywords and "
|
||||||
|
"phrases from this job description. Focus on:\n"
|
||||||
|
"- Required skills and technologies (exact phrasing matters)\n"
|
||||||
|
"- Action verbs used to describe responsibilities\n"
|
||||||
|
"- Qualification signals ('required', 'must have', 'preferred')\n"
|
||||||
|
"- Industry-specific terminology\n\n"
|
||||||
|
"Return a JSON array of strings only. No explanation.\n\n"
|
||||||
|
f"Job description:\n{description[:3000]}"
|
||||||
|
)
|
||||||
|
raw = LLMRouter().complete(prompt)
|
||||||
|
# Extract JSON array from response (LLM may wrap it in markdown)
|
||||||
|
match = re.search(r"\[.*\]", raw, re.DOTALL)
|
||||||
|
if match:
|
||||||
|
llm_signals = json.loads(match.group(0))
|
||||||
|
llm_signals = [s.strip() for s in llm_signals if isinstance(s, str) and s.strip()]
|
||||||
|
except Exception:
|
||||||
|
log.warning("[resume_optimizer] LLM signal extraction failed", exc_info=True)
|
||||||
|
|
||||||
|
# Merge: LLM signals first (richer phrasing), TF-IDF fills gaps
|
||||||
|
seen: set[str] = set()
|
||||||
|
merged: list[str] = []
|
||||||
|
for term in llm_signals + tfidf_gaps:
|
||||||
|
key = term.lower()
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
merged.append(term)
|
||||||
|
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
# ── Gap prioritization ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
# Map each gap term to the resume section where it would have the most ATS impact.
|
||||||
|
# ATS systems weight keywords higher in certain sections:
|
||||||
|
# skills — direct keyword match, highest density, indexed first
|
||||||
|
# summary — executive summary keywords often boost overall relevance score
|
||||||
|
# experience — verbs + outcomes in bullet points; adds context weight
|
||||||
|
_SECTION_KEYWORDS: dict[str, list[str]] = {
|
||||||
|
"skills": [
|
||||||
|
"python", "sql", "java", "typescript", "react", "vue", "docker",
|
||||||
|
"kubernetes", "aws", "gcp", "azure", "terraform", "ci/cd", "git",
|
||||||
|
"postgresql", "redis", "kafka", "spark", "tableau", "salesforce",
|
||||||
|
"jira", "figma", "excel", "powerpoint", "machine learning", "llm",
|
||||||
|
"deep learning", "pytorch", "tensorflow", "scikit-learn",
|
||||||
|
],
|
||||||
|
"summary": [
|
||||||
|
"leadership", "strategy", "vision", "executive", "director", "vp",
|
||||||
|
"growth", "transformation", "stakeholder", "cross-functional",
|
||||||
|
"p&l", "revenue", "budget", "board", "c-suite",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def prioritize_gaps(gaps: list[str], resume_sections: dict[str, Any]) -> list[dict]:
|
||||||
|
"""Rank keyword gaps by ATS impact and map each to a target resume section.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
gaps: List of missing keyword signals from extract_jd_signals().
|
||||||
|
resume_sections: Structured resume dict from resume_parser.parse_resume().
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dicts, sorted by priority score descending:
|
||||||
|
{
|
||||||
|
"term": str, # the keyword/phrase to inject
|
||||||
|
"section": str, # target resume section ("skills", "summary", "experience")
|
||||||
|
"priority": int, # 1=high, 2=medium, 3=low
|
||||||
|
"rationale": str, # why this section was chosen
|
||||||
|
}
|
||||||
|
|
||||||
|
TODO: implement the ranking logic below.
|
||||||
|
The current stub assigns every gap to "experience" at medium priority.
|
||||||
|
A good implementation should:
|
||||||
|
- Score "skills" section terms highest (direct keyword density)
|
||||||
|
- Score "summary" terms next (executive/leadership signals)
|
||||||
|
- Route remaining gaps to "experience" bullets
|
||||||
|
- Deprioritize terms already present in any section (case-insensitive)
|
||||||
|
- Consider gap term length: multi-word phrases > single words (more specific = higher ATS weight)
|
||||||
|
"""
|
||||||
|
existing_text = _flatten_resume_text(resume_sections).lower()
|
||||||
|
|
||||||
|
prioritized: list[dict] = []
|
||||||
|
for term in gaps:
|
||||||
|
# Skip terms already present anywhere in the resume
|
||||||
|
if term.lower() in existing_text:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# REVIEW: _SECTION_KEYWORDS lists are tech-centric; domain-specific roles
|
||||||
|
# (creative, healthcare, operations) may over-route to experience.
|
||||||
|
# Consider expanding the lists or making them config-driven.
|
||||||
|
term_lower = term.lower()
|
||||||
|
|
||||||
|
# Partial-match: term contains a skills keyword (handles "PostgreSQL" vs "postgresql",
|
||||||
|
# "AWS Lambda" vs "aws", etc.)
|
||||||
|
skills_match = any(kw in term_lower or term_lower in kw
|
||||||
|
for kw in _SECTION_KEYWORDS["skills"])
|
||||||
|
summary_match = any(kw in term_lower or term_lower in kw
|
||||||
|
for kw in _SECTION_KEYWORDS["summary"])
|
||||||
|
|
||||||
|
if skills_match:
|
||||||
|
section = "skills"
|
||||||
|
priority = 1
|
||||||
|
rationale = "matched technical skills list — highest ATS keyword density"
|
||||||
|
elif summary_match:
|
||||||
|
section = "summary"
|
||||||
|
priority = 1
|
||||||
|
rationale = "matched leadership/executive signals — boosts overall relevance score"
|
||||||
|
elif len(term.split()) > 1:
|
||||||
|
section = "experience"
|
||||||
|
priority = 2
|
||||||
|
rationale = "multi-word phrase — more specific than single keywords, context weight in bullets"
|
||||||
|
else:
|
||||||
|
section = "experience"
|
||||||
|
priority = 3
|
||||||
|
rationale = "single generic term — lowest ATS impact, added to experience for coverage"
|
||||||
|
|
||||||
|
prioritized.append({
|
||||||
|
"term": term,
|
||||||
|
"section": section,
|
||||||
|
"priority": priority,
|
||||||
|
"rationale": rationale,
|
||||||
|
})
|
||||||
|
|
||||||
|
prioritized.sort(key=lambda x: x["priority"])
|
||||||
|
return prioritized
|
||||||
|
|
||||||
|
|
||||||
|
def _flatten_resume_text(resume: dict[str, Any]) -> str:
|
||||||
|
"""Concatenate all text from a structured resume dict into one searchable string."""
|
||||||
|
parts: list[str] = []
|
||||||
|
parts.append(resume.get("career_summary", "") or "")
|
||||||
|
parts.extend(resume.get("skills", []))
|
||||||
|
for exp in resume.get("experience", []):
|
||||||
|
parts.append(exp.get("title", ""))
|
||||||
|
parts.append(exp.get("company", ""))
|
||||||
|
parts.extend(exp.get("bullets", []))
|
||||||
|
for edu in resume.get("education", []):
|
||||||
|
parts.append(edu.get("degree", ""))
|
||||||
|
parts.append(edu.get("field", ""))
|
||||||
|
parts.append(edu.get("institution", ""))
|
||||||
|
parts.extend(resume.get("achievements", []))
|
||||||
|
return " ".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
# ── LLM rewrite ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def rewrite_for_ats(
|
||||||
|
resume: dict[str, Any],
|
||||||
|
prioritized_gaps: list[dict],
|
||||||
|
job: dict[str, Any],
|
||||||
|
candidate_voice: str = "",
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Rewrite resume sections to naturally incorporate ATS keyword gaps.
|
||||||
|
|
||||||
|
Operates section-by-section. For each target section in prioritized_gaps,
|
||||||
|
builds a focused prompt that injects only the gaps destined for that section.
|
||||||
|
The hallucination constraint is enforced in the prompt itself and verified
|
||||||
|
post-hoc by hallucination_check().
|
||||||
|
|
||||||
|
Args:
|
||||||
|
resume: Structured resume dict (from resume_parser.parse_resume).
|
||||||
|
prioritized_gaps: Output of prioritize_gaps().
|
||||||
|
job: Job dict with at minimum {"title": str, "company": str, "description": str}.
|
||||||
|
candidate_voice: Free-text personality/style note from user.yaml (may be empty).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
New resume dict (same structure as input) with rewritten sections.
|
||||||
|
Sections with no relevant gaps are copied through unchanged.
|
||||||
|
"""
|
||||||
|
from scripts.llm_router import LLMRouter
|
||||||
|
router = LLMRouter()
|
||||||
|
|
||||||
|
# Group gaps by target section
|
||||||
|
by_section: dict[str, list[str]] = {}
|
||||||
|
for gap in prioritized_gaps:
|
||||||
|
by_section.setdefault(gap["section"], []).append(gap["term"])
|
||||||
|
|
||||||
|
rewritten = dict(resume) # shallow copy — sections replaced below
|
||||||
|
|
||||||
|
for section, terms in by_section.items():
|
||||||
|
terms_str = ", ".join(f'"{t}"' for t in terms)
|
||||||
|
original_content = _section_text_for_prompt(resume, section)
|
||||||
|
|
||||||
|
voice_note = (
|
||||||
|
f'\n\nCandidate voice/style: "{candidate_voice}". '
|
||||||
|
"Preserve this authentic tone — do not write generically."
|
||||||
|
) if candidate_voice else ""
|
||||||
|
|
||||||
|
prompt = (
|
||||||
|
f"You are rewriting the **{section}** section of a resume to help it pass "
|
||||||
|
f"ATS (applicant tracking system) screening for this role:\n"
|
||||||
|
f" Job title: {job.get('title', 'Unknown')}\n"
|
||||||
|
f" Company: {job.get('company', 'Unknown')}\n\n"
|
||||||
|
f"Inject these missing ATS keywords naturally into the section:\n"
|
||||||
|
f" {terms_str}\n\n"
|
||||||
|
f"CRITICAL RULES — violating any of these invalidates the rewrite:\n"
|
||||||
|
f"1. Do NOT invent new employers, job titles, dates, or education.\n"
|
||||||
|
f"2. Do NOT add skills the candidate did not already demonstrate.\n"
|
||||||
|
f"3. Only rephrase existing content — replace vague verbs/nouns with the "
|
||||||
|
f" ATS-preferred equivalents listed above.\n"
|
||||||
|
f"4. Keep the same number of bullet points in experience entries.\n"
|
||||||
|
f"5. Return ONLY the rewritten section content, no labels or explanation."
|
||||||
|
f"{voice_note}\n\n"
|
||||||
|
f"Original {section} section:\n{original_content}"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = router.complete(prompt)
|
||||||
|
rewritten = _apply_section_rewrite(rewritten, section, result.strip())
|
||||||
|
except Exception:
|
||||||
|
log.warning("[resume_optimizer] rewrite failed for section %r", section, exc_info=True)
|
||||||
|
# Leave section unchanged on failure
|
||||||
|
|
||||||
|
return rewritten
|
||||||
|
|
||||||
|
|
||||||
|
def _section_text_for_prompt(resume: dict[str, Any], section: str) -> str:
|
||||||
|
"""Render a resume section as plain text suitable for an LLM prompt."""
|
||||||
|
if section == "summary":
|
||||||
|
return resume.get("career_summary", "") or "(empty)"
|
||||||
|
if section == "skills":
|
||||||
|
skills = resume.get("skills", [])
|
||||||
|
return ", ".join(skills) if skills else "(empty)"
|
||||||
|
if section == "experience":
|
||||||
|
lines: list[str] = []
|
||||||
|
for exp in resume.get("experience", []):
|
||||||
|
lines.append(f"{exp['title']} at {exp['company']} ({exp['start_date']}–{exp['end_date']})")
|
||||||
|
for b in exp.get("bullets", []):
|
||||||
|
lines.append(f" • {b}")
|
||||||
|
return "\n".join(lines) if lines else "(empty)"
|
||||||
|
return "(unsupported section)"
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_section_rewrite(resume: dict[str, Any], section: str, rewritten: str) -> dict[str, Any]:
|
||||||
|
"""Return a new resume dict with the given section replaced by rewritten text."""
|
||||||
|
updated = dict(resume)
|
||||||
|
if section == "summary":
|
||||||
|
updated["career_summary"] = rewritten
|
||||||
|
elif section == "skills":
|
||||||
|
# LLM returns comma-separated or newline-separated skills
|
||||||
|
skills = [s.strip() for s in re.split(r"[,\n•·]+", rewritten) if s.strip()]
|
||||||
|
updated["skills"] = skills
|
||||||
|
elif section == "experience":
|
||||||
|
# For experience, we keep the structured entries but replace the bullets.
|
||||||
|
# The LLM rewrites the whole section as plain text; we re-parse the bullets.
|
||||||
|
updated["experience"] = _reparse_experience_bullets(resume["experience"], rewritten)
|
||||||
|
return updated
|
||||||
|
|
||||||
|
|
||||||
|
def _reparse_experience_bullets(
|
||||||
|
original_entries: list[dict],
|
||||||
|
rewritten_text: str,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Re-associate rewritten bullet text with the original experience entries.
|
||||||
|
|
||||||
|
The LLM rewrites the section as a block of text. We split on the original
|
||||||
|
entry headers (title + company) to re-bind bullets to entries. Falls back
|
||||||
|
to the original entries if splitting fails.
|
||||||
|
"""
|
||||||
|
if not original_entries:
|
||||||
|
return original_entries
|
||||||
|
|
||||||
|
result: list[dict] = []
|
||||||
|
remaining = rewritten_text
|
||||||
|
|
||||||
|
for i, entry in enumerate(original_entries):
|
||||||
|
# Find where the next entry starts so we can slice out this entry's bullets
|
||||||
|
if i + 1 < len(original_entries):
|
||||||
|
next_title = original_entries[i + 1]["title"]
|
||||||
|
# Look for the next entry header in the remaining text
|
||||||
|
split_pat = re.escape(next_title)
|
||||||
|
m = re.search(split_pat, remaining, re.IGNORECASE)
|
||||||
|
chunk = remaining[:m.start()] if m else remaining
|
||||||
|
remaining = remaining[m.start():] if m else ""
|
||||||
|
else:
|
||||||
|
chunk = remaining
|
||||||
|
|
||||||
|
bullets = [
|
||||||
|
re.sub(r"^[•\-–—*◦▪▸►]\s*", "", line).strip()
|
||||||
|
for line in chunk.splitlines()
|
||||||
|
if re.match(r"^[•\-–—*◦▪▸►]\s*", line.strip())
|
||||||
|
]
|
||||||
|
new_entry = dict(entry)
|
||||||
|
new_entry["bullets"] = bullets if bullets else entry["bullets"]
|
||||||
|
result.append(new_entry)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# ── Hallucination guard ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def hallucination_check(original: dict[str, Any], rewritten: dict[str, Any]) -> bool:
|
||||||
|
"""Return True if the rewrite is safe (no fabricated facts detected).
|
||||||
|
|
||||||
|
Checks that the set of employers, job titles, and date ranges in the
|
||||||
|
rewritten resume is a subset of those in the original. Any new entry
|
||||||
|
signals hallucination.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
original: Structured resume dict before rewrite.
|
||||||
|
rewritten: Structured resume dict after rewrite.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True → rewrite is safe to use
|
||||||
|
False → hallucination detected; caller should fall back to original
|
||||||
|
"""
|
||||||
|
orig_anchors = _extract_anchors(original)
|
||||||
|
rewrite_anchors = _extract_anchors(rewritten)
|
||||||
|
|
||||||
|
new_anchors = rewrite_anchors - orig_anchors
|
||||||
|
if new_anchors:
|
||||||
|
log.warning(
|
||||||
|
"[resume_optimizer] hallucination_check FAILED — new anchors in rewrite: %s",
|
||||||
|
new_anchors,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_anchors(resume: dict[str, Any]) -> frozenset[str]:
|
||||||
|
"""Extract stable factual anchors (company, title, dates) from experience entries."""
|
||||||
|
anchors: set[str] = set()
|
||||||
|
for exp in resume.get("experience", []):
|
||||||
|
for field in ("company", "title", "start_date", "end_date"):
|
||||||
|
val = (exp.get(field) or "").strip().lower()
|
||||||
|
if val:
|
||||||
|
anchors.add(val)
|
||||||
|
for edu in resume.get("education", []):
|
||||||
|
val = (edu.get("institution") or "").strip().lower()
|
||||||
|
if val:
|
||||||
|
anchors.add(val)
|
||||||
|
return frozenset(anchors)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Resume → plain text renderer ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
def render_resume_text(resume: dict[str, Any]) -> str:
|
||||||
|
"""Render a structured resume dict back to formatted plain text for PDF export."""
|
||||||
|
lines: list[str] = []
|
||||||
|
|
||||||
|
contact_parts = [resume.get("name", ""), resume.get("email", ""), resume.get("phone", "")]
|
||||||
|
lines.append(" ".join(p for p in contact_parts if p))
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if resume.get("career_summary"):
|
||||||
|
lines.append("SUMMARY")
|
||||||
|
lines.append(resume["career_summary"])
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if resume.get("experience"):
|
||||||
|
lines.append("EXPERIENCE")
|
||||||
|
for exp in resume["experience"]:
|
||||||
|
lines.append(
|
||||||
|
f"{exp.get('title', '')} | {exp.get('company', '')} "
|
||||||
|
f"({exp.get('start_date', '')}–{exp.get('end_date', '')})"
|
||||||
|
)
|
||||||
|
for b in exp.get("bullets", []):
|
||||||
|
lines.append(f" • {b}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if resume.get("education"):
|
||||||
|
lines.append("EDUCATION")
|
||||||
|
for edu in resume["education"]:
|
||||||
|
lines.append(
|
||||||
|
f"{edu.get('degree', '')} {edu.get('field', '')} | "
|
||||||
|
f"{edu.get('institution', '')} {edu.get('graduation_year', '')}"
|
||||||
|
)
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if resume.get("skills"):
|
||||||
|
lines.append("SKILLS")
|
||||||
|
lines.append(", ".join(resume["skills"]))
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if resume.get("achievements"):
|
||||||
|
lines.append("ACHIEVEMENTS")
|
||||||
|
for a in resume["achievements"]:
|
||||||
|
lines.append(f" • {a}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
288
tests/test_resume_optimizer.py
Normal file
288
tests/test_resume_optimizer.py
Normal file
|
|
@ -0,0 +1,288 @@
|
||||||
|
# tests/test_resume_optimizer.py
|
||||||
|
"""Tests for scripts/resume_optimizer.py"""
|
||||||
|
import json
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
|
||||||
|
# ── Fixtures ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
SAMPLE_RESUME = {
|
||||||
|
"name": "Alex Rivera",
|
||||||
|
"email": "alex@example.com",
|
||||||
|
"phone": "555-1234",
|
||||||
|
"career_summary": "Experienced Customer Success Manager with a track record of growth.",
|
||||||
|
"skills": ["Salesforce", "Python", "customer success"],
|
||||||
|
"experience": [
|
||||||
|
{
|
||||||
|
"title": "Customer Success Manager",
|
||||||
|
"company": "Acme Corp",
|
||||||
|
"start_date": "2021",
|
||||||
|
"end_date": "present",
|
||||||
|
"bullets": [
|
||||||
|
"Managed a portfolio of 120 enterprise accounts.",
|
||||||
|
"Reduced churn by 18% through proactive outreach.",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Support Engineer",
|
||||||
|
"company": "Beta Inc",
|
||||||
|
"start_date": "2018",
|
||||||
|
"end_date": "2021",
|
||||||
|
"bullets": ["Resolved escalations for top-tier clients."],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"education": [
|
||||||
|
{
|
||||||
|
"degree": "B.S.",
|
||||||
|
"field": "Computer Science",
|
||||||
|
"institution": "State University",
|
||||||
|
"graduation_year": "2018",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"achievements": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
SAMPLE_JD = (
|
||||||
|
"We are looking for a Customer Success Manager with Gainsight, cross-functional "
|
||||||
|
"leadership experience, and strong stakeholder management skills. AWS knowledge a plus."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── extract_jd_signals ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_extract_jd_signals_returns_list():
|
||||||
|
"""extract_jd_signals returns a list even when LLM and TF-IDF both fail."""
|
||||||
|
from scripts.resume_optimizer import extract_jd_signals
|
||||||
|
|
||||||
|
with patch("scripts.llm_router.LLMRouter") as MockRouter:
|
||||||
|
MockRouter.return_value.complete.side_effect = Exception("no LLM")
|
||||||
|
result = extract_jd_signals(SAMPLE_JD, resume_text="Python developer")
|
||||||
|
|
||||||
|
assert isinstance(result, list)
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_jd_signals_llm_path_parses_json_array():
|
||||||
|
"""extract_jd_signals merges LLM-extracted signals with TF-IDF gaps."""
|
||||||
|
from scripts.resume_optimizer import extract_jd_signals
|
||||||
|
|
||||||
|
llm_response = '["Gainsight", "cross-functional leadership", "stakeholder management"]'
|
||||||
|
|
||||||
|
with patch("scripts.llm_router.LLMRouter") as MockRouter:
|
||||||
|
MockRouter.return_value.complete.return_value = llm_response
|
||||||
|
result = extract_jd_signals(SAMPLE_JD)
|
||||||
|
|
||||||
|
assert "Gainsight" in result
|
||||||
|
assert "cross-functional leadership" in result
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_jd_signals_deduplicates():
|
||||||
|
"""extract_jd_signals deduplicates terms across LLM and TF-IDF sources."""
|
||||||
|
from scripts.resume_optimizer import extract_jd_signals
|
||||||
|
|
||||||
|
llm_response = '["Python", "AWS", "Python"]'
|
||||||
|
|
||||||
|
with patch("scripts.llm_router.LLMRouter") as MockRouter:
|
||||||
|
MockRouter.return_value.complete.return_value = llm_response
|
||||||
|
result = extract_jd_signals(SAMPLE_JD)
|
||||||
|
|
||||||
|
assert result.count("Python") == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_jd_signals_handles_malformed_llm_json():
|
||||||
|
"""extract_jd_signals falls back gracefully when LLM returns non-JSON."""
|
||||||
|
from scripts.resume_optimizer import extract_jd_signals
|
||||||
|
|
||||||
|
with patch("scripts.llm_router.LLMRouter") as MockRouter:
|
||||||
|
MockRouter.return_value.complete.return_value = "Here are some keywords: Gainsight, AWS"
|
||||||
|
result = extract_jd_signals(SAMPLE_JD)
|
||||||
|
|
||||||
|
# Should still return a list (may be empty if TF-IDF also silent)
|
||||||
|
assert isinstance(result, list)
|
||||||
|
|
||||||
|
|
||||||
|
# ── prioritize_gaps ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_prioritize_gaps_skips_existing_terms():
|
||||||
|
"""prioritize_gaps excludes terms already present in the resume."""
|
||||||
|
from scripts.resume_optimizer import prioritize_gaps
|
||||||
|
|
||||||
|
# "Salesforce" is already in SAMPLE_RESUME skills
|
||||||
|
result = prioritize_gaps(["Salesforce", "Gainsight"], SAMPLE_RESUME)
|
||||||
|
terms = [r["term"] for r in result]
|
||||||
|
|
||||||
|
assert "Salesforce" not in terms
|
||||||
|
assert "Gainsight" in terms
|
||||||
|
|
||||||
|
|
||||||
|
def test_prioritize_gaps_routes_tech_terms_to_skills():
|
||||||
|
"""prioritize_gaps maps known tech keywords to the skills section at priority 1."""
|
||||||
|
from scripts.resume_optimizer import prioritize_gaps
|
||||||
|
|
||||||
|
result = prioritize_gaps(["AWS", "Docker"], SAMPLE_RESUME)
|
||||||
|
by_term = {r["term"]: r for r in result}
|
||||||
|
|
||||||
|
assert by_term["AWS"]["section"] == "skills"
|
||||||
|
assert by_term["AWS"]["priority"] == 1
|
||||||
|
assert by_term["Docker"]["section"] == "skills"
|
||||||
|
|
||||||
|
|
||||||
|
def test_prioritize_gaps_routes_leadership_terms_to_summary():
|
||||||
|
"""prioritize_gaps maps leadership/executive signals to the summary section."""
|
||||||
|
from scripts.resume_optimizer import prioritize_gaps
|
||||||
|
|
||||||
|
result = prioritize_gaps(["cross-functional", "stakeholder"], SAMPLE_RESUME)
|
||||||
|
by_term = {r["term"]: r for r in result}
|
||||||
|
|
||||||
|
assert by_term["cross-functional"]["section"] == "summary"
|
||||||
|
assert by_term["stakeholder"]["section"] == "summary"
|
||||||
|
|
||||||
|
|
||||||
|
def test_prioritize_gaps_multi_word_routes_to_experience():
|
||||||
|
"""Multi-word phrases not in skills/summary lists go to experience at priority 2."""
|
||||||
|
from scripts.resume_optimizer import prioritize_gaps
|
||||||
|
|
||||||
|
result = prioritize_gaps(["proactive client engagement"], SAMPLE_RESUME)
|
||||||
|
assert result[0]["section"] == "experience"
|
||||||
|
assert result[0]["priority"] == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_prioritize_gaps_single_word_is_lowest_priority():
|
||||||
|
"""Single generic words not in any list go to experience at priority 3."""
|
||||||
|
from scripts.resume_optimizer import prioritize_gaps
|
||||||
|
|
||||||
|
result = prioritize_gaps(["innovation"], SAMPLE_RESUME)
|
||||||
|
assert result[0]["priority"] == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_prioritize_gaps_sorted_by_priority():
|
||||||
|
"""prioritize_gaps output is sorted ascending by priority (1 first)."""
|
||||||
|
from scripts.resume_optimizer import prioritize_gaps
|
||||||
|
|
||||||
|
gaps = ["innovation", "AWS", "cross-functional", "managed service contracts"]
|
||||||
|
result = prioritize_gaps(gaps, SAMPLE_RESUME)
|
||||||
|
priorities = [r["priority"] for r in result]
|
||||||
|
|
||||||
|
assert priorities == sorted(priorities)
|
||||||
|
|
||||||
|
|
||||||
|
# ── hallucination_check ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_hallucination_check_passes_unchanged_resume():
|
||||||
|
"""hallucination_check returns True when rewrite has no new employers or institutions."""
|
||||||
|
from scripts.resume_optimizer import hallucination_check
|
||||||
|
|
||||||
|
# Shallow rewrite: same structure
|
||||||
|
rewritten = {
|
||||||
|
**SAMPLE_RESUME,
|
||||||
|
"career_summary": "Dynamic CSM with cross-functional stakeholder management experience.",
|
||||||
|
}
|
||||||
|
assert hallucination_check(SAMPLE_RESUME, rewritten) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_hallucination_check_fails_on_new_employer():
|
||||||
|
"""hallucination_check returns False when a new company is introduced."""
|
||||||
|
from scripts.resume_optimizer import hallucination_check
|
||||||
|
|
||||||
|
fabricated_entry = {
|
||||||
|
"title": "VP of Customer Success",
|
||||||
|
"company": "Fabricated Corp",
|
||||||
|
"start_date": "2019",
|
||||||
|
"end_date": "2021",
|
||||||
|
"bullets": ["Led a team of 30."],
|
||||||
|
}
|
||||||
|
rewritten = dict(SAMPLE_RESUME)
|
||||||
|
rewritten["experience"] = SAMPLE_RESUME["experience"] + [fabricated_entry]
|
||||||
|
|
||||||
|
assert hallucination_check(SAMPLE_RESUME, rewritten) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_hallucination_check_fails_on_new_institution():
|
||||||
|
"""hallucination_check returns False when a new educational institution appears."""
|
||||||
|
from scripts.resume_optimizer import hallucination_check
|
||||||
|
|
||||||
|
rewritten = dict(SAMPLE_RESUME)
|
||||||
|
rewritten["education"] = [
|
||||||
|
*SAMPLE_RESUME["education"],
|
||||||
|
{"degree": "M.S.", "field": "Data Science", "institution": "MIT", "graduation_year": "2020"},
|
||||||
|
]
|
||||||
|
|
||||||
|
assert hallucination_check(SAMPLE_RESUME, rewritten) is False
|
||||||
|
|
||||||
|
|
||||||
|
# ── render_resume_text ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_render_resume_text_contains_all_sections():
|
||||||
|
"""render_resume_text produces plain text containing all resume sections."""
|
||||||
|
from scripts.resume_optimizer import render_resume_text
|
||||||
|
|
||||||
|
text = render_resume_text(SAMPLE_RESUME)
|
||||||
|
|
||||||
|
assert "Alex Rivera" in text
|
||||||
|
assert "SUMMARY" in text
|
||||||
|
assert "EXPERIENCE" in text
|
||||||
|
assert "Customer Success Manager" in text
|
||||||
|
assert "Acme Corp" in text
|
||||||
|
assert "EDUCATION" in text
|
||||||
|
assert "State University" in text
|
||||||
|
assert "SKILLS" in text
|
||||||
|
assert "Salesforce" in text
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_resume_text_omits_empty_sections():
|
||||||
|
"""render_resume_text skips sections that have no content."""
|
||||||
|
from scripts.resume_optimizer import render_resume_text
|
||||||
|
|
||||||
|
sparse = {
|
||||||
|
"name": "Jordan Lee",
|
||||||
|
"email": "",
|
||||||
|
"phone": "",
|
||||||
|
"career_summary": "",
|
||||||
|
"skills": [],
|
||||||
|
"experience": [],
|
||||||
|
"education": [],
|
||||||
|
"achievements": [],
|
||||||
|
}
|
||||||
|
text = render_resume_text(sparse)
|
||||||
|
|
||||||
|
assert "EXPERIENCE" not in text
|
||||||
|
assert "SKILLS" not in text
|
||||||
|
|
||||||
|
|
||||||
|
# ── db integration ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_save_and_get_optimized_resume(tmp_path):
|
||||||
|
"""save_optimized_resume persists and get_optimized_resume retrieves the data."""
|
||||||
|
from scripts.db import init_db, save_optimized_resume, get_optimized_resume
|
||||||
|
|
||||||
|
db_path = tmp_path / "test.db"
|
||||||
|
init_db(db_path)
|
||||||
|
|
||||||
|
# Insert a minimal job to satisfy FK
|
||||||
|
import sqlite3
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO jobs (id, title, company, url, source, status) VALUES (1, 'CSM', 'Acme', 'http://x.com', 'test', 'approved')"
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
gap_report = json.dumps([{"term": "Gainsight", "section": "skills", "priority": 1, "rationale": "test"}])
|
||||||
|
save_optimized_resume(db_path, job_id=1, text="Rewritten resume text.", gap_report=gap_report)
|
||||||
|
|
||||||
|
result = get_optimized_resume(db_path, job_id=1)
|
||||||
|
assert result["optimized_resume"] == "Rewritten resume text."
|
||||||
|
parsed = json.loads(result["ats_gap_report"])
|
||||||
|
assert parsed[0]["term"] == "Gainsight"
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_optimized_resume_returns_empty_for_missing(tmp_path):
|
||||||
|
"""get_optimized_resume returns empty strings when no record exists."""
|
||||||
|
from scripts.db import init_db, get_optimized_resume
|
||||||
|
|
||||||
|
db_path = tmp_path / "test.db"
|
||||||
|
init_db(db_path)
|
||||||
|
|
||||||
|
result = get_optimized_resume(db_path, job_id=999)
|
||||||
|
assert result["optimized_resume"] == ""
|
||||||
|
assert result["ats_gap_report"] == ""
|
||||||
Loading…
Reference in a new issue