peregrine/scripts/resume_optimizer.py
pyr0ball 4e11cf3cfa fix: sanitize invalid JSON escape sequences from LLM output in resume optimizer
LLMs occasionally emit backslash sequences that are valid regex but not valid
JSON (e.g. \s, \d, \p). This caused extract_jd_signals() to fall through to
the exception handler, leaving llm_signals empty. With no LLM signals, the
optimizer fell back to TF-IDF only — which is more conservative and can
legitimately return zero gaps, making the UI appear to say the resume is fine.

Fix: strip bare backslashes not followed by a recognised JSON escape character
("  \  /  b  f  n  r  t  u) before parsing. Preserves \n, \", etc.

Reproduces: cover letter generation concurrent with gap analysis raises the
probability of a slightly malformed LLM response due to model load.
2026-04-16 11:11:50 -07:00

840 lines
34 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
ATS Resume Optimizer — rewrite a candidate's resume to maximize keyword match
for a specific job description without fabricating experience.
Tier behaviour:
Free → gap report only (extract_jd_signals + prioritize_gaps, no LLM rewrite)
Paid → full LLM rewrite targeting the JD (rewrite_for_ats)
Premium → same as paid for now; fine-tuned voice model is a future enhancement
Pipeline:
job.description
→ extract_jd_signals() # TF-IDF gaps + LLM-extracted ATS signals
→ prioritize_gaps() # rank by impact, map to resume sections
→ rewrite_for_ats() # per-section LLM rewrite (paid+)
→ hallucination_check() # reject rewrites that invent new experience
"""
from __future__ import annotations
import json
import logging
import re
from pathlib import Path
from typing import Any
log = logging.getLogger(__name__)
# ── Signal extraction ─────────────────────────────────────────────────────────
def extract_jd_signals(description: str, resume_text: str = "") -> list[str]:
"""Return ATS keyword signals from a job description.
Combines two sources:
1. TF-IDF keyword gaps from match.py (fast, deterministic, no LLM cost)
2. LLM extraction for phrasing nuance TF-IDF misses (e.g. "cross-functional"
vs "cross-team", "led" vs "managed")
Falls back to TF-IDF-only if LLM is unavailable.
Args:
description: Raw job description text.
resume_text: Candidate's resume text (used to compute gap vs. already present).
Returns:
Deduplicated list of ATS keyword signals, most impactful first.
"""
# Phase 1: deterministic TF-IDF gaps (always available)
tfidf_gaps: list[str] = []
if resume_text:
try:
from scripts.match import match_score
_, tfidf_gaps = match_score(resume_text, description)
except Exception:
log.warning("[resume_optimizer] TF-IDF gap extraction failed", exc_info=True)
# Phase 2: LLM extraction for phrasing/qualifier nuance
llm_signals: list[str] = []
try:
from scripts.llm_router import LLMRouter
prompt = (
"Extract the most important ATS (applicant tracking system) keywords and "
"phrases from this job description. Focus on:\n"
"- Required skills and technologies (exact phrasing matters)\n"
"- Action verbs used to describe responsibilities\n"
"- Qualification signals ('required', 'must have', 'preferred')\n"
"- Industry-specific terminology\n\n"
"Return a JSON array of strings only. No explanation.\n\n"
f"Job description:\n{description[:3000]}"
)
raw = LLMRouter().complete(prompt)
# Extract JSON array from response (LLM may wrap it in markdown)
match = re.search(r"\[.*\]", raw, re.DOTALL)
if match:
json_str = match.group(0)
# LLMs occasionally emit invalid JSON escape sequences (e.g. \s, \d, \p)
# that are valid regex but not valid JSON. Replace bare backslashes that
# aren't followed by a recognised JSON escape character.
json_str = re.sub(r'\\([^"\\/bfnrtu])', r'\1', json_str)
llm_signals = json.loads(json_str)
llm_signals = [s.strip() for s in llm_signals if isinstance(s, str) and s.strip()]
except Exception:
log.warning("[resume_optimizer] LLM signal extraction failed", exc_info=True)
# Merge: LLM signals first (richer phrasing), TF-IDF fills gaps
seen: set[str] = set()
merged: list[str] = []
for term in llm_signals + tfidf_gaps:
key = term.lower()
if key not in seen:
seen.add(key)
merged.append(term)
return merged
# ── Gap prioritization ────────────────────────────────────────────────────────
# Map each gap term to the resume section where it would have the most ATS impact.
# ATS systems weight keywords higher in certain sections:
# skills — direct keyword match, highest density, indexed first
# summary — executive summary keywords often boost overall relevance score
# experience — verbs + outcomes in bullet points; adds context weight
_SECTION_KEYWORDS: dict[str, list[str]] = {
"skills": [
"python", "sql", "java", "typescript", "react", "vue", "docker",
"kubernetes", "aws", "gcp", "azure", "terraform", "ci/cd", "git",
"postgresql", "redis", "kafka", "spark", "tableau", "salesforce",
"jira", "figma", "excel", "powerpoint", "machine learning", "llm",
"deep learning", "pytorch", "tensorflow", "scikit-learn",
],
"summary": [
"leadership", "strategy", "vision", "executive", "director", "vp",
"growth", "transformation", "stakeholder", "cross-functional",
"p&l", "revenue", "budget", "board", "c-suite",
],
}
def prioritize_gaps(gaps: list[str], resume_sections: dict[str, Any]) -> list[dict]:
"""Rank keyword gaps by ATS impact and map each to a target resume section.
Args:
gaps: List of missing keyword signals from extract_jd_signals().
resume_sections: Structured resume dict from resume_parser.parse_resume().
Returns:
List of dicts, sorted by priority score descending:
{
"term": str, # the keyword/phrase to inject
"section": str, # target resume section ("skills", "summary", "experience")
"priority": int, # 1=high, 2=medium, 3=low
"rationale": str, # why this section was chosen
}
TODO: implement the ranking logic below.
The current stub assigns every gap to "experience" at medium priority.
A good implementation should:
- Score "skills" section terms highest (direct keyword density)
- Score "summary" terms next (executive/leadership signals)
- Route remaining gaps to "experience" bullets
- Deprioritize terms already present in any section (case-insensitive)
- Consider gap term length: multi-word phrases > single words (more specific = higher ATS weight)
"""
existing_text = _flatten_resume_text(resume_sections).lower()
prioritized: list[dict] = []
for term in gaps:
# Skip terms already present anywhere in the resume
if term.lower() in existing_text:
continue
# REVIEW: _SECTION_KEYWORDS lists are tech-centric; domain-specific roles
# (creative, healthcare, operations) may over-route to experience.
# Consider expanding the lists or making them config-driven.
term_lower = term.lower()
# Partial-match: term contains a skills keyword (handles "PostgreSQL" vs "postgresql",
# "AWS Lambda" vs "aws", etc.)
skills_match = any(kw in term_lower or term_lower in kw
for kw in _SECTION_KEYWORDS["skills"])
summary_match = any(kw in term_lower or term_lower in kw
for kw in _SECTION_KEYWORDS["summary"])
if skills_match:
section = "skills"
priority = 1
rationale = "matched technical skills list — highest ATS keyword density"
elif summary_match:
section = "summary"
priority = 1
rationale = "matched leadership/executive signals — boosts overall relevance score"
elif len(term.split()) > 1:
section = "experience"
priority = 2
rationale = "multi-word phrase — more specific than single keywords, context weight in bullets"
else:
section = "experience"
priority = 3
rationale = "single generic term — lowest ATS impact, added to experience for coverage"
prioritized.append({
"term": term,
"section": section,
"priority": priority,
"rationale": rationale,
})
prioritized.sort(key=lambda x: x["priority"])
return prioritized
def _flatten_resume_text(resume: dict[str, Any]) -> str:
"""Concatenate all text from a structured resume dict into one searchable string."""
parts: list[str] = []
parts.append(resume.get("career_summary", "") or "")
parts.extend(resume.get("skills", []))
for exp in resume.get("experience", []):
parts.append(exp.get("title", ""))
parts.append(exp.get("company", ""))
parts.extend(exp.get("bullets", []))
for edu in resume.get("education", []):
parts.append(edu.get("degree", ""))
parts.append(edu.get("field", ""))
parts.append(edu.get("institution", ""))
parts.extend(resume.get("achievements", []))
return " ".join(parts)
# ── LLM rewrite ───────────────────────────────────────────────────────────────
def rewrite_for_ats(
resume: dict[str, Any],
prioritized_gaps: list[dict],
job: dict[str, Any],
candidate_voice: str = "",
) -> dict[str, Any]:
"""Rewrite resume sections to naturally incorporate ATS keyword gaps.
Operates section-by-section. For each target section in prioritized_gaps,
builds a focused prompt that injects only the gaps destined for that section.
The hallucination constraint is enforced in the prompt itself and verified
post-hoc by hallucination_check().
Args:
resume: Structured resume dict (from resume_parser.parse_resume).
prioritized_gaps: Output of prioritize_gaps().
job: Job dict with at minimum {"title": str, "company": str, "description": str}.
candidate_voice: Free-text personality/style note from user.yaml (may be empty).
Returns:
New resume dict (same structure as input) with rewritten sections.
Sections with no relevant gaps are copied through unchanged.
"""
from scripts.llm_router import LLMRouter
router = LLMRouter()
# Group gaps by target section
by_section: dict[str, list[str]] = {}
for gap in prioritized_gaps:
by_section.setdefault(gap["section"], []).append(gap["term"])
rewritten = dict(resume) # shallow copy — sections replaced below
for section, terms in by_section.items():
terms_str = ", ".join(f'"{t}"' for t in terms)
original_content = _section_text_for_prompt(resume, section)
voice_note = (
f'\n\nCandidate voice/style: "{candidate_voice}". '
"Preserve this authentic tone — do not write generically."
) if candidate_voice else ""
prompt = (
f"You are rewriting the **{section}** section of a resume to help it pass "
f"ATS (applicant tracking system) screening for this role:\n"
f" Job title: {job.get('title', 'Unknown')}\n"
f" Company: {job.get('company', 'Unknown')}\n\n"
f"Inject these missing ATS keywords naturally into the section:\n"
f" {terms_str}\n\n"
f"CRITICAL RULES — violating any of these invalidates the rewrite:\n"
f"1. Do NOT invent new employers, job titles, dates, or education.\n"
f"2. Do NOT add skills the candidate did not already demonstrate.\n"
f"3. Only rephrase existing content — replace vague verbs/nouns with the "
f" ATS-preferred equivalents listed above.\n"
f"4. Keep the same number of bullet points in experience entries.\n"
f"5. Return ONLY the rewritten section content, no labels or explanation."
f"{voice_note}\n\n"
f"Original {section} section:\n{original_content}"
)
try:
result = router.complete(prompt)
rewritten = _apply_section_rewrite(rewritten, section, result.strip())
except Exception:
log.warning("[resume_optimizer] rewrite failed for section %r", section, exc_info=True)
# Leave section unchanged on failure
return rewritten
def _section_text_for_prompt(resume: dict[str, Any], section: str) -> str:
"""Render a resume section as plain text suitable for an LLM prompt."""
if section == "summary":
return resume.get("career_summary", "") or "(empty)"
if section == "skills":
skills = resume.get("skills", [])
return ", ".join(skills) if skills else "(empty)"
if section == "experience":
lines: list[str] = []
for exp in resume.get("experience", []):
lines.append(f"{exp['title']} at {exp['company']} ({exp['start_date']}{exp['end_date']})")
for b in exp.get("bullets", []):
lines.append(f"{b}")
return "\n".join(lines) if lines else "(empty)"
return "(unsupported section)"
def _apply_section_rewrite(resume: dict[str, Any], section: str, rewritten: str) -> dict[str, Any]:
"""Return a new resume dict with the given section replaced by rewritten text."""
updated = dict(resume)
if section == "summary":
updated["career_summary"] = rewritten
elif section == "skills":
# LLM returns comma-separated or newline-separated skills
skills = [s.strip() for s in re.split(r"[,\n•·]+", rewritten) if s.strip()]
updated["skills"] = skills
elif section == "experience":
# For experience, we keep the structured entries but replace the bullets.
# The LLM rewrites the whole section as plain text; we re-parse the bullets.
updated["experience"] = _reparse_experience_bullets(resume.get("experience", []), rewritten)
return updated
def _reparse_experience_bullets(
original_entries: list[dict],
rewritten_text: str,
) -> list[dict]:
"""Re-associate rewritten bullet text with the original experience entries.
The LLM rewrites the section as a block of text. We split on the original
entry headers (title + company) to re-bind bullets to entries. Falls back
to the original entries if splitting fails.
"""
if not original_entries:
return original_entries
result: list[dict] = []
remaining = rewritten_text
for i, entry in enumerate(original_entries):
# Find where the next entry starts so we can slice out this entry's bullets
if i + 1 < len(original_entries):
next_title = original_entries[i + 1]["title"]
# Look for the next entry header in the remaining text
split_pat = re.escape(next_title)
m = re.search(split_pat, remaining, re.IGNORECASE)
chunk = remaining[:m.start()] if m else remaining
remaining = remaining[m.start():] if m else ""
else:
chunk = remaining
bullets = [
re.sub(r"^[•\-–—*◦▪▸►]\s*", "", line).strip()
for line in chunk.splitlines()
if re.match(r"^[•\-–—*◦▪▸►]\s*", line.strip())
]
new_entry = dict(entry)
new_entry["bullets"] = bullets if bullets else entry["bullets"]
result.append(new_entry)
return result
# ── Gap framing ───────────────────────────────────────────────────────────────
def frame_skill_gaps(
struct: dict[str, Any],
gap_framings: list[dict],
job: dict[str, Any],
candidate_voice: str = "",
) -> dict[str, Any]:
"""Inject honest framing language for skills the candidate doesn't have directly.
For each gap framing decision the user provided:
- mode "adjacent": user has related experience → injects one bridging sentence
into the most relevant experience entry's bullets
- mode "learning": actively developing the skill → prepends a structured
"Developing: X (context)" note to the skills list
- mode "skip": no connection at all → no change
The user-supplied context text is the source of truth. The LLM's job is only
to phrase it naturally in resume style — not to invent new claims.
Args:
struct: Resume dict (already processed by apply_review_decisions).
gap_framings: List of dicts with keys:
skill — the ATS term the candidate lacks
mode — "adjacent" | "learning" | "skip"
context — candidate's own words describing their related background
job: Job dict for role context in prompts.
candidate_voice: Free-text style note from user.yaml.
Returns:
New resume dict with framing language injected.
"""
from scripts.llm_router import LLMRouter
router = LLMRouter()
updated = dict(struct)
updated["experience"] = [dict(e) for e in (struct.get("experience") or [])]
adjacent_framings = [f for f in gap_framings if f.get("mode") == "adjacent" and f.get("context")]
learning_framings = [f for f in gap_framings if f.get("mode") == "learning" and f.get("context")]
# ── Adjacent experience: inject bridging sentence into most relevant entry ─
for framing in adjacent_framings:
skill = framing["skill"]
context = framing["context"]
# Find the experience entry most likely to be relevant (simple keyword match)
best_entry_idx = _find_most_relevant_entry(updated["experience"], skill)
if best_entry_idx is None:
continue
entry = updated["experience"][best_entry_idx]
bullets = list(entry.get("bullets") or [])
voice_note = (
f'\n\nCandidate voice/style: "{candidate_voice}". Match this tone.'
) if candidate_voice else ""
prompt = (
f"You are adding one honest framing sentence to a resume bullet list.\n\n"
f"The candidate does not have direct experience with '{skill}', "
f"but they have relevant background they described as:\n"
f' "{context}"\n\n'
f"Job context: {job.get('title', '')} at {job.get('company', '')}.\n\n"
f"RULES:\n"
f"1. Add exactly ONE new bullet point that bridges their background to '{skill}'.\n"
f"2. Do NOT fabricate anything beyond what their context description says.\n"
f"3. Use honest language: 'adjacent experience in', 'strong foundation applicable to', "
f" 'directly transferable background in', etc.\n"
f"4. Return ONLY the single new bullet text — no prefix, no explanation."
f"{voice_note}\n\n"
f"Existing bullets for context:\n"
+ "\n".join(f"{b}" for b in bullets[:3])
)
try:
new_bullet = router.complete(prompt).strip()
new_bullet = re.sub(r"^[•\-–—*◦▪▸►]\s*", "", new_bullet).strip()
if new_bullet:
bullets.append(new_bullet)
new_entry = dict(entry)
new_entry["bullets"] = bullets
updated["experience"][best_entry_idx] = new_entry
except Exception:
log.warning(
"[resume_optimizer] frame_skill_gaps adjacent failed for skill %r", skill,
exc_info=True,
)
# ── Learning framing: add structured note to skills list ──────────────────
if learning_framings:
skills = list(updated.get("skills") or [])
for framing in learning_framings:
skill = framing["skill"]
context = framing["context"].strip()
# Format: "Developing: Kubernetes (strong Docker/container orchestration background)"
note = f"Developing: {skill} ({context})" if context else f"Developing: {skill}"
if note not in skills:
skills.append(note)
updated["skills"] = skills
return updated
def _find_most_relevant_entry(
experience: list[dict],
skill: str,
) -> int | None:
"""Return the index of the experience entry most relevant to a skill term.
Uses simple keyword overlap between the skill and entry title/bullets.
Falls back to the most recent (first) entry if no match found.
"""
if not experience:
return None
skill_words = set(skill.lower().split())
best_idx = 0
best_score = -1
for i, entry in enumerate(experience):
entry_text = (
(entry.get("title") or "") + " " +
" ".join(entry.get("bullets") or [])
).lower()
entry_words = set(entry_text.split())
score = len(skill_words & entry_words)
if score > best_score:
best_score = score
best_idx = i
return best_idx
def apply_review_decisions(
draft: dict[str, Any],
decisions: dict[str, Any],
) -> dict[str, Any]:
"""Apply user section-level review decisions to the rewritten struct.
Handles approved skills, summary accept/reject, and per-entry experience
accept/reject. Returns the updated struct; does not call the LLM.
Args:
draft: The review draft dict from build_review_diff (contains
"sections" and "rewritten_struct").
decisions: Dict of per-section decisions from the review UI:
skills: {"approved_additions": [...]}
summary: {"accepted": bool}
experience: {"accepted_entries": [{"title", "company", "accepted"}]}
Returns:
Updated resume struct ready for gap framing and final render.
"""
struct = dict(draft.get("rewritten_struct") or {})
sections = draft.get("sections") or []
# ── Skills: keep original + only approved additions ────────────────────
skills_decision = decisions.get("skills", {})
approved_additions = set(skills_decision.get("approved_additions") or [])
for sec in sections:
if sec["section"] == "skills":
original_kept = set(sec.get("kept") or [])
struct["skills"] = sorted(original_kept | approved_additions)
break
# ── Summary: accept proposed or revert to original ──────────────────────
if not decisions.get("summary", {}).get("accepted", True):
for sec in sections:
if sec["section"] == "summary":
struct["career_summary"] = sec.get("original", struct.get("career_summary", ""))
break
# ── Experience: per-entry accept/reject ─────────────────────────────────
exp_decisions: dict[str, bool] = {
f"{ed.get('title', '')}|{ed.get('company', '')}": ed.get("accepted", True)
for ed in (decisions.get("experience", {}).get("accepted_entries") or [])
}
for sec in sections:
if sec["section"] == "experience":
for entry_diff in (sec.get("entries") or []):
key = f"{entry_diff['title']}|{entry_diff['company']}"
if not exp_decisions.get(key, True):
for exp_entry in (struct.get("experience") or []):
if (exp_entry.get("title") == entry_diff["title"] and
exp_entry.get("company") == entry_diff["company"]):
exp_entry["bullets"] = entry_diff["original_bullets"]
break
return struct
# ── Hallucination guard ───────────────────────────────────────────────────────
def hallucination_check(original: dict[str, Any], rewritten: dict[str, Any]) -> bool:
"""Return True if the rewrite is safe (no fabricated facts detected).
Checks that the set of employers, job titles, and date ranges in the
rewritten resume is a subset of those in the original. Any new entry
signals hallucination.
Args:
original: Structured resume dict before rewrite.
rewritten: Structured resume dict after rewrite.
Returns:
True → rewrite is safe to use
False → hallucination detected; caller should fall back to original
"""
orig_anchors = _extract_anchors(original)
rewrite_anchors = _extract_anchors(rewritten)
new_anchors = rewrite_anchors - orig_anchors
if new_anchors:
log.warning(
"[resume_optimizer] hallucination_check FAILED — new anchors in rewrite: %s",
new_anchors,
)
return False
return True
def _extract_anchors(resume: dict[str, Any]) -> frozenset[str]:
"""Extract stable factual anchors (company, title, dates) from experience entries."""
anchors: set[str] = set()
for exp in resume.get("experience", []):
for field in ("company", "title", "start_date", "end_date"):
val = (exp.get(field) or "").strip().lower()
if val:
anchors.add(val)
for edu in resume.get("education", []):
val = (edu.get("institution") or "").strip().lower()
if val:
anchors.add(val)
return frozenset(anchors)
# ── Resume → plain text renderer ─────────────────────────────────────────────
def render_resume_text(resume: dict[str, Any]) -> str:
"""Render a structured resume dict back to formatted plain text for PDF export."""
lines: list[str] = []
contact_parts = [resume.get("name", ""), resume.get("email", ""), resume.get("phone", "")]
lines.append(" ".join(p for p in contact_parts if p))
lines.append("")
if resume.get("career_summary"):
lines.append("SUMMARY")
lines.append(resume["career_summary"])
lines.append("")
if resume.get("experience"):
lines.append("EXPERIENCE")
for exp in resume["experience"]:
lines.append(
f"{exp.get('title', '')} | {exp.get('company', '')} "
f"({exp.get('start_date', '')}{exp.get('end_date', '')})"
)
for b in exp.get("bullets", []):
lines.append(f"{b}")
lines.append("")
if resume.get("education"):
lines.append("EDUCATION")
for edu in resume["education"]:
lines.append(
f"{edu.get('degree', '')} {edu.get('field', '')} | "
f"{edu.get('institution', '')} {edu.get('graduation_year', '')}"
)
lines.append("")
if resume.get("skills"):
lines.append("SKILLS")
lines.append(", ".join(resume["skills"]))
lines.append("")
if resume.get("achievements"):
lines.append("ACHIEVEMENTS")
for a in resume["achievements"]:
lines.append(f"{a}")
lines.append("")
return "\n".join(lines)
# ── Review diff builder ────────────────────────────────────────────────────────
def build_review_diff(
original: dict[str, Any],
rewritten: dict[str, Any],
) -> dict[str, Any]:
"""Build a structured diff between original and rewritten resume for the review UI.
Returns a dict with:
sections: list of per-section diffs
rewritten_struct: the full rewritten resume dict (used by finalize endpoint)
Each section diff has:
section: "skills" | "summary" | "experience"
type: "skills_diff" | "text_diff" | "bullets_diff"
For skills_diff:
added: list of new skill strings (each requires user approval)
removed: list of removed skill strings
kept: list of unchanged skills
For text_diff (summary):
original: str
proposed: str
For bullets_diff (experience):
entries: list of {title, company, original_bullets, proposed_bullets}
"""
sections = []
# ── Skills diff ────────────────────────────────────────────────────────
orig_skills = set(s.strip() for s in (original.get("skills") or []))
new_skills = set(s.strip() for s in (rewritten.get("skills") or []))
added = sorted(new_skills - orig_skills)
removed = sorted(orig_skills - new_skills)
kept = sorted(orig_skills & new_skills)
if added or removed:
sections.append({
"section": "skills",
"type": "skills_diff",
"added": added,
"removed": removed,
"kept": kept,
})
# ── Summary diff ───────────────────────────────────────────────────────
orig_summary = (original.get("career_summary") or "").strip()
new_summary = (rewritten.get("career_summary") or "").strip()
if orig_summary != new_summary and new_summary:
sections.append({
"section": "summary",
"type": "text_diff",
"original": orig_summary,
"proposed": new_summary,
})
# ── Experience diff ────────────────────────────────────────────────────
orig_exp = original.get("experience") or []
new_exp = rewritten.get("experience") or []
entry_diffs = []
for orig_entry, new_entry in zip(orig_exp, new_exp):
orig_bullets = orig_entry.get("bullets") or []
new_bullets = new_entry.get("bullets") or []
if orig_bullets != new_bullets:
entry_diffs.append({
"title": orig_entry.get("title", ""),
"company": orig_entry.get("company", ""),
"original_bullets": orig_bullets,
"proposed_bullets": new_bullets,
})
if entry_diffs:
sections.append({
"section": "experience",
"type": "bullets_diff",
"entries": entry_diffs,
})
return {
"sections": sections,
"rewritten_struct": rewritten,
}
# ── PDF export ─────────────────────────────────────────────────────────────────
def export_pdf(resume: dict[str, Any], output_path: str) -> None:
"""Render a structured resume dict to a clean PDF using reportlab.
Uses a single-column layout with section headers, consistent spacing,
and a readable sans-serif body font suitable for ATS submission.
Args:
resume: Structured resume dict (same format as resume_parser output).
output_path: Absolute path for the output .pdf file.
"""
from reportlab.lib.pagesizes import LETTER
from reportlab.lib.units import inch
from reportlab.lib.styles import ParagraphStyle
from reportlab.lib.enums import TA_CENTER, TA_LEFT
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, HRFlowable
from reportlab.lib import colors
MARGIN = 0.75 * inch
name_style = ParagraphStyle(
"name", fontName="Helvetica-Bold", fontSize=16, leading=20,
alignment=TA_CENTER, spaceAfter=2,
)
contact_style = ParagraphStyle(
"contact", fontName="Helvetica", fontSize=9, leading=12,
alignment=TA_CENTER, spaceAfter=6,
textColor=colors.HexColor("#555555"),
)
section_style = ParagraphStyle(
"section", fontName="Helvetica-Bold", fontSize=10, leading=14,
spaceBefore=10, spaceAfter=2,
textColor=colors.HexColor("#1a1a2e"),
)
body_style = ParagraphStyle(
"body", fontName="Helvetica", fontSize=9, leading=13, alignment=TA_LEFT,
)
role_style = ParagraphStyle(
"role", fontName="Helvetica-Bold", fontSize=9, leading=13,
)
meta_style = ParagraphStyle(
"meta", fontName="Helvetica-Oblique", fontSize=8, leading=12,
textColor=colors.HexColor("#555555"), spaceAfter=2,
)
bullet_style = ParagraphStyle(
"bullet", fontName="Helvetica", fontSize=9, leading=13, leftIndent=12,
)
def hr():
return HRFlowable(width="100%", thickness=0.5,
color=colors.HexColor("#cccccc"),
spaceAfter=4, spaceBefore=2)
story = []
if resume.get("name"):
story.append(Paragraph(resume["name"], name_style))
contact_parts = [p for p in (
resume.get("email", ""), resume.get("phone", ""),
resume.get("location", ""), resume.get("linkedin", ""),
) if p]
if contact_parts:
story.append(Paragraph(" | ".join(contact_parts), contact_style))
story.append(hr())
summary = (resume.get("career_summary") or "").strip()
if summary:
story.append(Paragraph("SUMMARY", section_style))
story.append(hr())
story.append(Paragraph(summary, body_style))
story.append(Spacer(1, 4))
if resume.get("experience"):
story.append(Paragraph("EXPERIENCE", section_style))
story.append(hr())
for exp in resume["experience"]:
dates = f"{exp.get('start_date', '')}{exp.get('end_date', '')}"
story.append(Paragraph(
f"{exp.get('title', '')} | {exp.get('company', '')}", role_style
))
story.append(Paragraph(dates, meta_style))
for bullet in (exp.get("bullets") or []):
story.append(Paragraph(f"{bullet}", bullet_style))
story.append(Spacer(1, 4))
if resume.get("education"):
story.append(Paragraph("EDUCATION", section_style))
story.append(hr())
for edu in resume["education"]:
degree = f"{edu.get('degree', '')} {edu.get('field', '')}".strip()
story.append(Paragraph(
f"{degree} | {edu.get('institution', '')} {edu.get('graduation_year', '')}".strip(),
body_style,
))
story.append(Spacer(1, 4))
if resume.get("skills"):
story.append(Paragraph("SKILLS", section_style))
story.append(hr())
story.append(Paragraph(", ".join(resume["skills"]), body_style))
story.append(Spacer(1, 4))
if resume.get("achievements"):
story.append(Paragraph("ACHIEVEMENTS", section_style))
story.append(hr())
for a in resume["achievements"]:
story.append(Paragraph(f"{a}", bullet_style))
doc = SimpleDocTemplate(
output_path, pagesize=LETTER,
leftMargin=MARGIN, rightMargin=MARGIN,
topMargin=MARGIN, bottomMargin=MARGIN,
)
doc.build(story)