From 02e004ee5c19fa7da62fb6e26bb1943c63a58fe8 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Wed, 1 Apr 2026 07:09:46 -0700 Subject: [PATCH] =?UTF-8?q?feat(apply):=20ATS=20resume=20optimizer=20backe?= =?UTF-8?q?nd=20=E2=80=94=20gap=20report=20+=20LLM=20rewrite?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - scripts/resume_optimizer.py: full pipeline (extract_jd_signals → prioritize_gaps → rewrite_for_ats → hallucination_check) - scripts/db.py: add optimized_resume + ats_gap_report columns + save_optimized_resume / get_optimized_resume helpers - tests/test_resume_optimizer.py: 17 unit tests; patches at source module (scripts.llm_router.LLMRouter), not consumer Tier gate: gap report is free; full LLM rewrite is paid+. --- scripts/db.py | 34 +++ scripts/resume_optimizer.py | 439 +++++++++++++++++++++++++++++++++ tests/test_resume_optimizer.py | 288 +++++++++++++++++++++ 3 files changed, 761 insertions(+) create mode 100644 scripts/resume_optimizer.py create mode 100644 tests/test_resume_optimizer.py diff --git a/scripts/db.py b/scripts/db.py index f6ccb6e..89dfc84 100644 --- a/scripts/db.py +++ b/scripts/db.py @@ -132,6 +132,8 @@ _MIGRATIONS = [ ("hired_at", "TEXT"), ("survey_at", "TEXT"), ("calendar_event_id", "TEXT"), + ("optimized_resume", "TEXT"), # ATS-rewritten resume text (paid tier) + ("ats_gap_report", "TEXT"), # JSON gap report (free tier) ] @@ -301,6 +303,38 @@ def update_cover_letter(db_path: Path = DEFAULT_DB, job_id: int = None, text: st conn.close() +def save_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None, + text: str = "", gap_report: str = "") -> None: + """Persist ATS-optimized resume text and/or gap report for a job.""" + if job_id is None: + return + conn = sqlite3.connect(db_path) + conn.execute( + "UPDATE jobs SET optimized_resume = ?, ats_gap_report = ? WHERE id = ?", + (text or None, gap_report or None, job_id), + ) + conn.commit() + conn.close() + + +def get_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict: + """Return optimized_resume and ats_gap_report for a job, or empty strings if absent.""" + if job_id is None: + return {"optimized_resume": "", "ats_gap_report": ""} + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + row = conn.execute( + "SELECT optimized_resume, ats_gap_report FROM jobs WHERE id = ?", (job_id,) + ).fetchone() + conn.close() + if not row: + return {"optimized_resume": "", "ats_gap_report": ""} + return { + "optimized_resume": row["optimized_resume"] or "", + "ats_gap_report": row["ats_gap_report"] or "", + } + + _UPDATABLE_JOB_COLS = { "title", "company", "url", "source", "location", "is_remote", "salary", "description", "match_score", "keyword_gaps", diff --git a/scripts/resume_optimizer.py b/scripts/resume_optimizer.py new file mode 100644 index 0000000..1d3b7b3 --- /dev/null +++ b/scripts/resume_optimizer.py @@ -0,0 +1,439 @@ +""" +ATS Resume Optimizer — rewrite a candidate's resume to maximize keyword match +for a specific job description without fabricating experience. + +Tier behaviour: + Free → gap report only (extract_jd_signals + prioritize_gaps, no LLM rewrite) + Paid → full LLM rewrite targeting the JD (rewrite_for_ats) + Premium → same as paid for now; fine-tuned voice model is a future enhancement + +Pipeline: + job.description + → extract_jd_signals() # TF-IDF gaps + LLM-extracted ATS signals + → prioritize_gaps() # rank by impact, map to resume sections + → rewrite_for_ats() # per-section LLM rewrite (paid+) + → hallucination_check() # reject rewrites that invent new experience +""" +from __future__ import annotations + +import json +import logging +import re +from pathlib import Path +from typing import Any + +log = logging.getLogger(__name__) + +# ── Signal extraction ───────────────────────────────────────────────────────── + +def extract_jd_signals(description: str, resume_text: str = "") -> list[str]: + """Return ATS keyword signals from a job description. + + Combines two sources: + 1. TF-IDF keyword gaps from match.py (fast, deterministic, no LLM cost) + 2. LLM extraction for phrasing nuance TF-IDF misses (e.g. "cross-functional" + vs "cross-team", "led" vs "managed") + + Falls back to TF-IDF-only if LLM is unavailable. + + Args: + description: Raw job description text. + resume_text: Candidate's resume text (used to compute gap vs. already present). + + Returns: + Deduplicated list of ATS keyword signals, most impactful first. + """ + # Phase 1: deterministic TF-IDF gaps (always available) + tfidf_gaps: list[str] = [] + if resume_text: + try: + from scripts.match import match_score + _, tfidf_gaps = match_score(resume_text, description) + except Exception: + log.warning("[resume_optimizer] TF-IDF gap extraction failed", exc_info=True) + + # Phase 2: LLM extraction for phrasing/qualifier nuance + llm_signals: list[str] = [] + try: + from scripts.llm_router import LLMRouter + prompt = ( + "Extract the most important ATS (applicant tracking system) keywords and " + "phrases from this job description. Focus on:\n" + "- Required skills and technologies (exact phrasing matters)\n" + "- Action verbs used to describe responsibilities\n" + "- Qualification signals ('required', 'must have', 'preferred')\n" + "- Industry-specific terminology\n\n" + "Return a JSON array of strings only. No explanation.\n\n" + f"Job description:\n{description[:3000]}" + ) + raw = LLMRouter().complete(prompt) + # Extract JSON array from response (LLM may wrap it in markdown) + match = re.search(r"\[.*\]", raw, re.DOTALL) + if match: + llm_signals = json.loads(match.group(0)) + llm_signals = [s.strip() for s in llm_signals if isinstance(s, str) and s.strip()] + except Exception: + log.warning("[resume_optimizer] LLM signal extraction failed", exc_info=True) + + # Merge: LLM signals first (richer phrasing), TF-IDF fills gaps + seen: set[str] = set() + merged: list[str] = [] + for term in llm_signals + tfidf_gaps: + key = term.lower() + if key not in seen: + seen.add(key) + merged.append(term) + + return merged + + +# ── Gap prioritization ──────────────────────────────────────────────────────── + +# Map each gap term to the resume section where it would have the most ATS impact. +# ATS systems weight keywords higher in certain sections: +# skills — direct keyword match, highest density, indexed first +# summary — executive summary keywords often boost overall relevance score +# experience — verbs + outcomes in bullet points; adds context weight +_SECTION_KEYWORDS: dict[str, list[str]] = { + "skills": [ + "python", "sql", "java", "typescript", "react", "vue", "docker", + "kubernetes", "aws", "gcp", "azure", "terraform", "ci/cd", "git", + "postgresql", "redis", "kafka", "spark", "tableau", "salesforce", + "jira", "figma", "excel", "powerpoint", "machine learning", "llm", + "deep learning", "pytorch", "tensorflow", "scikit-learn", + ], + "summary": [ + "leadership", "strategy", "vision", "executive", "director", "vp", + "growth", "transformation", "stakeholder", "cross-functional", + "p&l", "revenue", "budget", "board", "c-suite", + ], +} + + +def prioritize_gaps(gaps: list[str], resume_sections: dict[str, Any]) -> list[dict]: + """Rank keyword gaps by ATS impact and map each to a target resume section. + + Args: + gaps: List of missing keyword signals from extract_jd_signals(). + resume_sections: Structured resume dict from resume_parser.parse_resume(). + + Returns: + List of dicts, sorted by priority score descending: + { + "term": str, # the keyword/phrase to inject + "section": str, # target resume section ("skills", "summary", "experience") + "priority": int, # 1=high, 2=medium, 3=low + "rationale": str, # why this section was chosen + } + + TODO: implement the ranking logic below. + The current stub assigns every gap to "experience" at medium priority. + A good implementation should: + - Score "skills" section terms highest (direct keyword density) + - Score "summary" terms next (executive/leadership signals) + - Route remaining gaps to "experience" bullets + - Deprioritize terms already present in any section (case-insensitive) + - Consider gap term length: multi-word phrases > single words (more specific = higher ATS weight) + """ + existing_text = _flatten_resume_text(resume_sections).lower() + + prioritized: list[dict] = [] + for term in gaps: + # Skip terms already present anywhere in the resume + if term.lower() in existing_text: + continue + + # REVIEW: _SECTION_KEYWORDS lists are tech-centric; domain-specific roles + # (creative, healthcare, operations) may over-route to experience. + # Consider expanding the lists or making them config-driven. + term_lower = term.lower() + + # Partial-match: term contains a skills keyword (handles "PostgreSQL" vs "postgresql", + # "AWS Lambda" vs "aws", etc.) + skills_match = any(kw in term_lower or term_lower in kw + for kw in _SECTION_KEYWORDS["skills"]) + summary_match = any(kw in term_lower or term_lower in kw + for kw in _SECTION_KEYWORDS["summary"]) + + if skills_match: + section = "skills" + priority = 1 + rationale = "matched technical skills list — highest ATS keyword density" + elif summary_match: + section = "summary" + priority = 1 + rationale = "matched leadership/executive signals — boosts overall relevance score" + elif len(term.split()) > 1: + section = "experience" + priority = 2 + rationale = "multi-word phrase — more specific than single keywords, context weight in bullets" + else: + section = "experience" + priority = 3 + rationale = "single generic term — lowest ATS impact, added to experience for coverage" + + prioritized.append({ + "term": term, + "section": section, + "priority": priority, + "rationale": rationale, + }) + + prioritized.sort(key=lambda x: x["priority"]) + return prioritized + + +def _flatten_resume_text(resume: dict[str, Any]) -> str: + """Concatenate all text from a structured resume dict into one searchable string.""" + parts: list[str] = [] + parts.append(resume.get("career_summary", "") or "") + parts.extend(resume.get("skills", [])) + for exp in resume.get("experience", []): + parts.append(exp.get("title", "")) + parts.append(exp.get("company", "")) + parts.extend(exp.get("bullets", [])) + for edu in resume.get("education", []): + parts.append(edu.get("degree", "")) + parts.append(edu.get("field", "")) + parts.append(edu.get("institution", "")) + parts.extend(resume.get("achievements", [])) + return " ".join(parts) + + +# ── LLM rewrite ─────────────────────────────────────────────────────────────── + +def rewrite_for_ats( + resume: dict[str, Any], + prioritized_gaps: list[dict], + job: dict[str, Any], + candidate_voice: str = "", +) -> dict[str, Any]: + """Rewrite resume sections to naturally incorporate ATS keyword gaps. + + Operates section-by-section. For each target section in prioritized_gaps, + builds a focused prompt that injects only the gaps destined for that section. + The hallucination constraint is enforced in the prompt itself and verified + post-hoc by hallucination_check(). + + Args: + resume: Structured resume dict (from resume_parser.parse_resume). + prioritized_gaps: Output of prioritize_gaps(). + job: Job dict with at minimum {"title": str, "company": str, "description": str}. + candidate_voice: Free-text personality/style note from user.yaml (may be empty). + + Returns: + New resume dict (same structure as input) with rewritten sections. + Sections with no relevant gaps are copied through unchanged. + """ + from scripts.llm_router import LLMRouter + router = LLMRouter() + + # Group gaps by target section + by_section: dict[str, list[str]] = {} + for gap in prioritized_gaps: + by_section.setdefault(gap["section"], []).append(gap["term"]) + + rewritten = dict(resume) # shallow copy — sections replaced below + + for section, terms in by_section.items(): + terms_str = ", ".join(f'"{t}"' for t in terms) + original_content = _section_text_for_prompt(resume, section) + + voice_note = ( + f'\n\nCandidate voice/style: "{candidate_voice}". ' + "Preserve this authentic tone — do not write generically." + ) if candidate_voice else "" + + prompt = ( + f"You are rewriting the **{section}** section of a resume to help it pass " + f"ATS (applicant tracking system) screening for this role:\n" + f" Job title: {job.get('title', 'Unknown')}\n" + f" Company: {job.get('company', 'Unknown')}\n\n" + f"Inject these missing ATS keywords naturally into the section:\n" + f" {terms_str}\n\n" + f"CRITICAL RULES — violating any of these invalidates the rewrite:\n" + f"1. Do NOT invent new employers, job titles, dates, or education.\n" + f"2. Do NOT add skills the candidate did not already demonstrate.\n" + f"3. Only rephrase existing content — replace vague verbs/nouns with the " + f" ATS-preferred equivalents listed above.\n" + f"4. Keep the same number of bullet points in experience entries.\n" + f"5. Return ONLY the rewritten section content, no labels or explanation." + f"{voice_note}\n\n" + f"Original {section} section:\n{original_content}" + ) + + try: + result = router.complete(prompt) + rewritten = _apply_section_rewrite(rewritten, section, result.strip()) + except Exception: + log.warning("[resume_optimizer] rewrite failed for section %r", section, exc_info=True) + # Leave section unchanged on failure + + return rewritten + + +def _section_text_for_prompt(resume: dict[str, Any], section: str) -> str: + """Render a resume section as plain text suitable for an LLM prompt.""" + if section == "summary": + return resume.get("career_summary", "") or "(empty)" + if section == "skills": + skills = resume.get("skills", []) + return ", ".join(skills) if skills else "(empty)" + if section == "experience": + lines: list[str] = [] + for exp in resume.get("experience", []): + lines.append(f"{exp['title']} at {exp['company']} ({exp['start_date']}–{exp['end_date']})") + for b in exp.get("bullets", []): + lines.append(f" • {b}") + return "\n".join(lines) if lines else "(empty)" + return "(unsupported section)" + + +def _apply_section_rewrite(resume: dict[str, Any], section: str, rewritten: str) -> dict[str, Any]: + """Return a new resume dict with the given section replaced by rewritten text.""" + updated = dict(resume) + if section == "summary": + updated["career_summary"] = rewritten + elif section == "skills": + # LLM returns comma-separated or newline-separated skills + skills = [s.strip() for s in re.split(r"[,\n•·]+", rewritten) if s.strip()] + updated["skills"] = skills + elif section == "experience": + # For experience, we keep the structured entries but replace the bullets. + # The LLM rewrites the whole section as plain text; we re-parse the bullets. + updated["experience"] = _reparse_experience_bullets(resume["experience"], rewritten) + return updated + + +def _reparse_experience_bullets( + original_entries: list[dict], + rewritten_text: str, +) -> list[dict]: + """Re-associate rewritten bullet text with the original experience entries. + + The LLM rewrites the section as a block of text. We split on the original + entry headers (title + company) to re-bind bullets to entries. Falls back + to the original entries if splitting fails. + """ + if not original_entries: + return original_entries + + result: list[dict] = [] + remaining = rewritten_text + + for i, entry in enumerate(original_entries): + # Find where the next entry starts so we can slice out this entry's bullets + if i + 1 < len(original_entries): + next_title = original_entries[i + 1]["title"] + # Look for the next entry header in the remaining text + split_pat = re.escape(next_title) + m = re.search(split_pat, remaining, re.IGNORECASE) + chunk = remaining[:m.start()] if m else remaining + remaining = remaining[m.start():] if m else "" + else: + chunk = remaining + + bullets = [ + re.sub(r"^[•\-–—*◦▪▸►]\s*", "", line).strip() + for line in chunk.splitlines() + if re.match(r"^[•\-–—*◦▪▸►]\s*", line.strip()) + ] + new_entry = dict(entry) + new_entry["bullets"] = bullets if bullets else entry["bullets"] + result.append(new_entry) + + return result + + +# ── Hallucination guard ─────────────────────────────────────────────────────── + +def hallucination_check(original: dict[str, Any], rewritten: dict[str, Any]) -> bool: + """Return True if the rewrite is safe (no fabricated facts detected). + + Checks that the set of employers, job titles, and date ranges in the + rewritten resume is a subset of those in the original. Any new entry + signals hallucination. + + Args: + original: Structured resume dict before rewrite. + rewritten: Structured resume dict after rewrite. + + Returns: + True → rewrite is safe to use + False → hallucination detected; caller should fall back to original + """ + orig_anchors = _extract_anchors(original) + rewrite_anchors = _extract_anchors(rewritten) + + new_anchors = rewrite_anchors - orig_anchors + if new_anchors: + log.warning( + "[resume_optimizer] hallucination_check FAILED — new anchors in rewrite: %s", + new_anchors, + ) + return False + return True + + +def _extract_anchors(resume: dict[str, Any]) -> frozenset[str]: + """Extract stable factual anchors (company, title, dates) from experience entries.""" + anchors: set[str] = set() + for exp in resume.get("experience", []): + for field in ("company", "title", "start_date", "end_date"): + val = (exp.get(field) or "").strip().lower() + if val: + anchors.add(val) + for edu in resume.get("education", []): + val = (edu.get("institution") or "").strip().lower() + if val: + anchors.add(val) + return frozenset(anchors) + + +# ── Resume → plain text renderer ───────────────────────────────────────────── + +def render_resume_text(resume: dict[str, Any]) -> str: + """Render a structured resume dict back to formatted plain text for PDF export.""" + lines: list[str] = [] + + contact_parts = [resume.get("name", ""), resume.get("email", ""), resume.get("phone", "")] + lines.append(" ".join(p for p in contact_parts if p)) + lines.append("") + + if resume.get("career_summary"): + lines.append("SUMMARY") + lines.append(resume["career_summary"]) + lines.append("") + + if resume.get("experience"): + lines.append("EXPERIENCE") + for exp in resume["experience"]: + lines.append( + f"{exp.get('title', '')} | {exp.get('company', '')} " + f"({exp.get('start_date', '')}–{exp.get('end_date', '')})" + ) + for b in exp.get("bullets", []): + lines.append(f" • {b}") + lines.append("") + + if resume.get("education"): + lines.append("EDUCATION") + for edu in resume["education"]: + lines.append( + f"{edu.get('degree', '')} {edu.get('field', '')} | " + f"{edu.get('institution', '')} {edu.get('graduation_year', '')}" + ) + lines.append("") + + if resume.get("skills"): + lines.append("SKILLS") + lines.append(", ".join(resume["skills"])) + lines.append("") + + if resume.get("achievements"): + lines.append("ACHIEVEMENTS") + for a in resume["achievements"]: + lines.append(f" • {a}") + lines.append("") + + return "\n".join(lines) diff --git a/tests/test_resume_optimizer.py b/tests/test_resume_optimizer.py new file mode 100644 index 0000000..5425a5f --- /dev/null +++ b/tests/test_resume_optimizer.py @@ -0,0 +1,288 @@ +# tests/test_resume_optimizer.py +"""Tests for scripts/resume_optimizer.py""" +import json +import pytest +from unittest.mock import MagicMock, patch + + +# ── Fixtures ───────────────────────────────────────────────────────────────── + +SAMPLE_RESUME = { + "name": "Alex Rivera", + "email": "alex@example.com", + "phone": "555-1234", + "career_summary": "Experienced Customer Success Manager with a track record of growth.", + "skills": ["Salesforce", "Python", "customer success"], + "experience": [ + { + "title": "Customer Success Manager", + "company": "Acme Corp", + "start_date": "2021", + "end_date": "present", + "bullets": [ + "Managed a portfolio of 120 enterprise accounts.", + "Reduced churn by 18% through proactive outreach.", + ], + }, + { + "title": "Support Engineer", + "company": "Beta Inc", + "start_date": "2018", + "end_date": "2021", + "bullets": ["Resolved escalations for top-tier clients."], + }, + ], + "education": [ + { + "degree": "B.S.", + "field": "Computer Science", + "institution": "State University", + "graduation_year": "2018", + } + ], + "achievements": [], +} + +SAMPLE_JD = ( + "We are looking for a Customer Success Manager with Gainsight, cross-functional " + "leadership experience, and strong stakeholder management skills. AWS knowledge a plus." +) + + +# ── extract_jd_signals ──────────────────────────────────────────────────────── + +def test_extract_jd_signals_returns_list(): + """extract_jd_signals returns a list even when LLM and TF-IDF both fail.""" + from scripts.resume_optimizer import extract_jd_signals + + with patch("scripts.llm_router.LLMRouter") as MockRouter: + MockRouter.return_value.complete.side_effect = Exception("no LLM") + result = extract_jd_signals(SAMPLE_JD, resume_text="Python developer") + + assert isinstance(result, list) + + +def test_extract_jd_signals_llm_path_parses_json_array(): + """extract_jd_signals merges LLM-extracted signals with TF-IDF gaps.""" + from scripts.resume_optimizer import extract_jd_signals + + llm_response = '["Gainsight", "cross-functional leadership", "stakeholder management"]' + + with patch("scripts.llm_router.LLMRouter") as MockRouter: + MockRouter.return_value.complete.return_value = llm_response + result = extract_jd_signals(SAMPLE_JD) + + assert "Gainsight" in result + assert "cross-functional leadership" in result + + +def test_extract_jd_signals_deduplicates(): + """extract_jd_signals deduplicates terms across LLM and TF-IDF sources.""" + from scripts.resume_optimizer import extract_jd_signals + + llm_response = '["Python", "AWS", "Python"]' + + with patch("scripts.llm_router.LLMRouter") as MockRouter: + MockRouter.return_value.complete.return_value = llm_response + result = extract_jd_signals(SAMPLE_JD) + + assert result.count("Python") == 1 + + +def test_extract_jd_signals_handles_malformed_llm_json(): + """extract_jd_signals falls back gracefully when LLM returns non-JSON.""" + from scripts.resume_optimizer import extract_jd_signals + + with patch("scripts.llm_router.LLMRouter") as MockRouter: + MockRouter.return_value.complete.return_value = "Here are some keywords: Gainsight, AWS" + result = extract_jd_signals(SAMPLE_JD) + + # Should still return a list (may be empty if TF-IDF also silent) + assert isinstance(result, list) + + +# ── prioritize_gaps ─────────────────────────────────────────────────────────── + +def test_prioritize_gaps_skips_existing_terms(): + """prioritize_gaps excludes terms already present in the resume.""" + from scripts.resume_optimizer import prioritize_gaps + + # "Salesforce" is already in SAMPLE_RESUME skills + result = prioritize_gaps(["Salesforce", "Gainsight"], SAMPLE_RESUME) + terms = [r["term"] for r in result] + + assert "Salesforce" not in terms + assert "Gainsight" in terms + + +def test_prioritize_gaps_routes_tech_terms_to_skills(): + """prioritize_gaps maps known tech keywords to the skills section at priority 1.""" + from scripts.resume_optimizer import prioritize_gaps + + result = prioritize_gaps(["AWS", "Docker"], SAMPLE_RESUME) + by_term = {r["term"]: r for r in result} + + assert by_term["AWS"]["section"] == "skills" + assert by_term["AWS"]["priority"] == 1 + assert by_term["Docker"]["section"] == "skills" + + +def test_prioritize_gaps_routes_leadership_terms_to_summary(): + """prioritize_gaps maps leadership/executive signals to the summary section.""" + from scripts.resume_optimizer import prioritize_gaps + + result = prioritize_gaps(["cross-functional", "stakeholder"], SAMPLE_RESUME) + by_term = {r["term"]: r for r in result} + + assert by_term["cross-functional"]["section"] == "summary" + assert by_term["stakeholder"]["section"] == "summary" + + +def test_prioritize_gaps_multi_word_routes_to_experience(): + """Multi-word phrases not in skills/summary lists go to experience at priority 2.""" + from scripts.resume_optimizer import prioritize_gaps + + result = prioritize_gaps(["proactive client engagement"], SAMPLE_RESUME) + assert result[0]["section"] == "experience" + assert result[0]["priority"] == 2 + + +def test_prioritize_gaps_single_word_is_lowest_priority(): + """Single generic words not in any list go to experience at priority 3.""" + from scripts.resume_optimizer import prioritize_gaps + + result = prioritize_gaps(["innovation"], SAMPLE_RESUME) + assert result[0]["priority"] == 3 + + +def test_prioritize_gaps_sorted_by_priority(): + """prioritize_gaps output is sorted ascending by priority (1 first).""" + from scripts.resume_optimizer import prioritize_gaps + + gaps = ["innovation", "AWS", "cross-functional", "managed service contracts"] + result = prioritize_gaps(gaps, SAMPLE_RESUME) + priorities = [r["priority"] for r in result] + + assert priorities == sorted(priorities) + + +# ── hallucination_check ─────────────────────────────────────────────────────── + +def test_hallucination_check_passes_unchanged_resume(): + """hallucination_check returns True when rewrite has no new employers or institutions.""" + from scripts.resume_optimizer import hallucination_check + + # Shallow rewrite: same structure + rewritten = { + **SAMPLE_RESUME, + "career_summary": "Dynamic CSM with cross-functional stakeholder management experience.", + } + assert hallucination_check(SAMPLE_RESUME, rewritten) is True + + +def test_hallucination_check_fails_on_new_employer(): + """hallucination_check returns False when a new company is introduced.""" + from scripts.resume_optimizer import hallucination_check + + fabricated_entry = { + "title": "VP of Customer Success", + "company": "Fabricated Corp", + "start_date": "2019", + "end_date": "2021", + "bullets": ["Led a team of 30."], + } + rewritten = dict(SAMPLE_RESUME) + rewritten["experience"] = SAMPLE_RESUME["experience"] + [fabricated_entry] + + assert hallucination_check(SAMPLE_RESUME, rewritten) is False + + +def test_hallucination_check_fails_on_new_institution(): + """hallucination_check returns False when a new educational institution appears.""" + from scripts.resume_optimizer import hallucination_check + + rewritten = dict(SAMPLE_RESUME) + rewritten["education"] = [ + *SAMPLE_RESUME["education"], + {"degree": "M.S.", "field": "Data Science", "institution": "MIT", "graduation_year": "2020"}, + ] + + assert hallucination_check(SAMPLE_RESUME, rewritten) is False + + +# ── render_resume_text ──────────────────────────────────────────────────────── + +def test_render_resume_text_contains_all_sections(): + """render_resume_text produces plain text containing all resume sections.""" + from scripts.resume_optimizer import render_resume_text + + text = render_resume_text(SAMPLE_RESUME) + + assert "Alex Rivera" in text + assert "SUMMARY" in text + assert "EXPERIENCE" in text + assert "Customer Success Manager" in text + assert "Acme Corp" in text + assert "EDUCATION" in text + assert "State University" in text + assert "SKILLS" in text + assert "Salesforce" in text + + +def test_render_resume_text_omits_empty_sections(): + """render_resume_text skips sections that have no content.""" + from scripts.resume_optimizer import render_resume_text + + sparse = { + "name": "Jordan Lee", + "email": "", + "phone": "", + "career_summary": "", + "skills": [], + "experience": [], + "education": [], + "achievements": [], + } + text = render_resume_text(sparse) + + assert "EXPERIENCE" not in text + assert "SKILLS" not in text + + +# ── db integration ──────────────────────────────────────────────────────────── + +def test_save_and_get_optimized_resume(tmp_path): + """save_optimized_resume persists and get_optimized_resume retrieves the data.""" + from scripts.db import init_db, save_optimized_resume, get_optimized_resume + + db_path = tmp_path / "test.db" + init_db(db_path) + + # Insert a minimal job to satisfy FK + import sqlite3 + conn = sqlite3.connect(db_path) + conn.execute( + "INSERT INTO jobs (id, title, company, url, source, status) VALUES (1, 'CSM', 'Acme', 'http://x.com', 'test', 'approved')" + ) + conn.commit() + conn.close() + + gap_report = json.dumps([{"term": "Gainsight", "section": "skills", "priority": 1, "rationale": "test"}]) + save_optimized_resume(db_path, job_id=1, text="Rewritten resume text.", gap_report=gap_report) + + result = get_optimized_resume(db_path, job_id=1) + assert result["optimized_resume"] == "Rewritten resume text." + parsed = json.loads(result["ats_gap_report"]) + assert parsed[0]["term"] == "Gainsight" + + +def test_get_optimized_resume_returns_empty_for_missing(tmp_path): + """get_optimized_resume returns empty strings when no record exists.""" + from scripts.db import init_db, get_optimized_resume + + db_path = tmp_path / "test.db" + init_db(db_path) + + result = get_optimized_resume(db_path, job_id=999) + assert result["optimized_resume"] == "" + assert result["ats_gap_report"] == ""