feat(apply): ATS resume optimizer backend — gap report + LLM rewrite

- scripts/resume_optimizer.py: full pipeline (extract_jd_signals → prioritize_gaps → rewrite_for_ats → hallucination_check) - scripts/db.py: add optimized_resume + ats_gap_report columns + save_optimized_resume / get_optimized_resume helpers - tests/test_resume_optimizer.py: 17 unit tests; patches at source module (scripts.llm_router.LLMRouter), not consumer Tier gate: gap report is free; full LLM rewrite is paid+.
2026-04-01 07:09:46 -07:00 · 2026-04-01 07:09:46 -07:00 · 02e004ee5c
commit 02e004ee5c
parent 9702646738
3 changed files with 761 additions and 0 deletions
--- a/scripts/db.py
+++ b/scripts/db.py
@ -132,6 +132,8 @@ _MIGRATIONS = [
    ("hired_at",           "TEXT"),
    ("survey_at",          "TEXT"),
    ("calendar_event_id",  "TEXT"),
    ("optimized_resume",   "TEXT"),   # ATS-rewritten resume text (paid tier)
    ("ats_gap_report",     "TEXT"),   # JSON gap report (free tier)
 ]
@ -301,6 +303,38 @@ def update_cover_letter(db_path: Path = DEFAULT_DB, job_id: int = None, text: st
    conn.close()
 def save_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None,
                          text: str = "", gap_report: str = "") -> None:
    """Persist ATS-optimized resume text and/or gap report for a job."""
    if job_id is None:
        return
    conn = sqlite3.connect(db_path)
    conn.execute(
        "UPDATE jobs SET optimized_resume = ?, ats_gap_report = ? WHERE id = ?",
        (text or None, gap_report or None, job_id),
    )
    conn.commit()
    conn.close()
 def get_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict:
    """Return optimized_resume and ats_gap_report for a job, or empty strings if absent."""
    if job_id is None:
        return {"optimized_resume": "", "ats_gap_report": ""}
    conn = sqlite3.connect(db_path)
    conn.row_factory = sqlite3.Row
    row = conn.execute(
        "SELECT optimized_resume, ats_gap_report FROM jobs WHERE id = ?", (job_id,)
    ).fetchone()
    conn.close()
    if not row:
        return {"optimized_resume": "", "ats_gap_report": ""}
    return {
        "optimized_resume": row["optimized_resume"] or "",
        "ats_gap_report":   row["ats_gap_report"] or "",
    }
 _UPDATABLE_JOB_COLS = {
    "title", "company", "url", "source", "location", "is_remote",
    "salary", "description", "match_score", "keyword_gaps",
--- a/scripts/resume_optimizer.py
+++ b/scripts/resume_optimizer.py
@ -0,0 +1,439 @@
 """
 ATS Resume Optimizer — rewrite a candidate's resume to maximize keyword match
 for a specific job description without fabricating experience.
 Tier behaviour:
  Free   → gap report only  (extract_jd_signals + prioritize_gaps, no LLM rewrite)
  Paid   → full LLM rewrite targeting the JD (rewrite_for_ats)
  Premium → same as paid for now; fine-tuned voice model is a future enhancement
 Pipeline:
  job.description
      → extract_jd_signals()    # TF-IDF gaps + LLM-extracted ATS signals
      → prioritize_gaps()       # rank by impact, map to resume sections
      → rewrite_for_ats()       # per-section LLM rewrite (paid+)
      → hallucination_check()   # reject rewrites that invent new experience
 """
 from __future__ import annotations
 import json
 import logging
 import re
 from pathlib import Path
 from typing import Any
 log = logging.getLogger(__name__)
 # ── Signal extraction ─────────────────────────────────────────────────────────
 def extract_jd_signals(description: str, resume_text: str = "") -> list[str]:
    """Return ATS keyword signals from a job description.
    Combines two sources:
      1. TF-IDF keyword gaps from match.py (fast, deterministic, no LLM cost)
      2. LLM extraction for phrasing nuance TF-IDF misses (e.g. "cross-functional"
         vs "cross-team", "led" vs "managed")
    Falls back to TF-IDF-only if LLM is unavailable.
    Args:
        description: Raw job description text.
        resume_text: Candidate's resume text (used to compute gap vs. already present).
    Returns:
        Deduplicated list of ATS keyword signals, most impactful first.
    """
    # Phase 1: deterministic TF-IDF gaps (always available)
    tfidf_gaps: list[str] = []
    if resume_text:
        try:
            from scripts.match import match_score
            _, tfidf_gaps = match_score(resume_text, description)
        except Exception:
            log.warning("[resume_optimizer] TF-IDF gap extraction failed", exc_info=True)
    # Phase 2: LLM extraction for phrasing/qualifier nuance
    llm_signals: list[str] = []
    try:
        from scripts.llm_router import LLMRouter
        prompt = (
            "Extract the most important ATS (applicant tracking system) keywords and "
            "phrases from this job description. Focus on:\n"
            "- Required skills and technologies (exact phrasing matters)\n"
            "- Action verbs used to describe responsibilities\n"
            "- Qualification signals ('required', 'must have', 'preferred')\n"
            "- Industry-specific terminology\n\n"
            "Return a JSON array of strings only. No explanation.\n\n"
            f"Job description:\n{description[:3000]}"
        )
        raw = LLMRouter().complete(prompt)
        # Extract JSON array from response (LLM may wrap it in markdown)
        match = re.search(r"\[.*\]", raw, re.DOTALL)
        if match:
            llm_signals = json.loads(match.group(0))
            llm_signals = [s.strip() for s in llm_signals if isinstance(s, str) and s.strip()]
    except Exception:
        log.warning("[resume_optimizer] LLM signal extraction failed", exc_info=True)
    # Merge: LLM signals first (richer phrasing), TF-IDF fills gaps
    seen: set[str] = set()
    merged: list[str] = []
    for term in llm_signals + tfidf_gaps:
        key = term.lower()
        if key not in seen:
            seen.add(key)
            merged.append(term)
    return merged
 # ── Gap prioritization ────────────────────────────────────────────────────────
 # Map each gap term to the resume section where it would have the most ATS impact.
 # ATS systems weight keywords higher in certain sections:
 #   skills    — direct keyword match, highest density, indexed first
 #   summary   — executive summary keywords often boost overall relevance score
 #   experience — verbs + outcomes in bullet points; adds context weight
 _SECTION_KEYWORDS: dict[str, list[str]] = {
    "skills": [
        "python", "sql", "java", "typescript", "react", "vue", "docker",
        "kubernetes", "aws", "gcp", "azure", "terraform", "ci/cd", "git",
        "postgresql", "redis", "kafka", "spark", "tableau", "salesforce",
        "jira", "figma", "excel", "powerpoint", "machine learning", "llm",
        "deep learning", "pytorch", "tensorflow", "scikit-learn",
    ],
    "summary": [
        "leadership", "strategy", "vision", "executive", "director", "vp",
        "growth", "transformation", "stakeholder", "cross-functional",
        "p&l", "revenue", "budget", "board", "c-suite",
    ],
 }
 def prioritize_gaps(gaps: list[str], resume_sections: dict[str, Any]) -> list[dict]:
    """Rank keyword gaps by ATS impact and map each to a target resume section.
    Args:
        gaps: List of missing keyword signals from extract_jd_signals().
        resume_sections: Structured resume dict from resume_parser.parse_resume().
    Returns:
        List of dicts, sorted by priority score descending:
            {
              "term": str,          # the keyword/phrase to inject
              "section": str,       # target resume section ("skills", "summary", "experience")
              "priority": int,      # 1=high, 2=medium, 3=low
              "rationale": str,     # why this section was chosen
            }
    TODO: implement the ranking logic below.
    The current stub assigns every gap to "experience" at medium priority.
    A good implementation should:
      - Score "skills" section terms highest (direct keyword density)
      - Score "summary" terms next (executive/leadership signals)
      - Route remaining gaps to "experience" bullets
      - Deprioritize terms already present in any section (case-insensitive)
      - Consider gap term length: multi-word phrases > single words (more specific = higher ATS weight)
    """
    existing_text = _flatten_resume_text(resume_sections).lower()
    prioritized: list[dict] = []
    for term in gaps:
        # Skip terms already present anywhere in the resume
        if term.lower() in existing_text:
            continue
        # REVIEW: _SECTION_KEYWORDS lists are tech-centric; domain-specific roles
        # (creative, healthcare, operations) may over-route to experience.
        # Consider expanding the lists or making them config-driven.
        term_lower = term.lower()
        # Partial-match: term contains a skills keyword (handles "PostgreSQL" vs "postgresql",
        # "AWS Lambda" vs "aws", etc.)
        skills_match = any(kw in term_lower or term_lower in kw
                           for kw in _SECTION_KEYWORDS["skills"])
        summary_match = any(kw in term_lower or term_lower in kw
                            for kw in _SECTION_KEYWORDS["summary"])
        if skills_match:
            section = "skills"
            priority = 1
            rationale = "matched technical skills list — highest ATS keyword density"
        elif summary_match:
            section = "summary"
            priority = 1
            rationale = "matched leadership/executive signals — boosts overall relevance score"
        elif len(term.split()) > 1:
            section = "experience"
            priority = 2
            rationale = "multi-word phrase — more specific than single keywords, context weight in bullets"
        else:
            section = "experience"
            priority = 3
            rationale = "single generic term — lowest ATS impact, added to experience for coverage"
        prioritized.append({
            "term":      term,
            "section":   section,
            "priority":  priority,
            "rationale": rationale,
        })
    prioritized.sort(key=lambda x: x["priority"])
    return prioritized
 def _flatten_resume_text(resume: dict[str, Any]) -> str:
    """Concatenate all text from a structured resume dict into one searchable string."""
    parts: list[str] = []
    parts.append(resume.get("career_summary", "") or "")
    parts.extend(resume.get("skills", []))
    for exp in resume.get("experience", []):
        parts.append(exp.get("title", ""))
        parts.append(exp.get("company", ""))
        parts.extend(exp.get("bullets", []))
    for edu in resume.get("education", []):
        parts.append(edu.get("degree", ""))
        parts.append(edu.get("field", ""))
        parts.append(edu.get("institution", ""))
    parts.extend(resume.get("achievements", []))
    return " ".join(parts)
 # ── LLM rewrite ───────────────────────────────────────────────────────────────
 def rewrite_for_ats(
    resume: dict[str, Any],
    prioritized_gaps: list[dict],
    job: dict[str, Any],
    candidate_voice: str = "",
 ) -> dict[str, Any]:
    """Rewrite resume sections to naturally incorporate ATS keyword gaps.
    Operates section-by-section. For each target section in prioritized_gaps,
    builds a focused prompt that injects only the gaps destined for that section.
    The hallucination constraint is enforced in the prompt itself and verified
    post-hoc by hallucination_check().
    Args:
        resume: Structured resume dict (from resume_parser.parse_resume).
        prioritized_gaps: Output of prioritize_gaps().
        job: Job dict with at minimum {"title": str, "company": str, "description": str}.
        candidate_voice: Free-text personality/style note from user.yaml (may be empty).
    Returns:
        New resume dict (same structure as input) with rewritten sections.
        Sections with no relevant gaps are copied through unchanged.
    """
    from scripts.llm_router import LLMRouter
    router = LLMRouter()
    # Group gaps by target section
    by_section: dict[str, list[str]] = {}
    for gap in prioritized_gaps:
        by_section.setdefault(gap["section"], []).append(gap["term"])
    rewritten = dict(resume)  # shallow copy — sections replaced below
    for section, terms in by_section.items():
        terms_str = ", ".join(f'"{t}"' for t in terms)
        original_content = _section_text_for_prompt(resume, section)
        voice_note = (
            f'\n\nCandidate voice/style: "{candidate_voice}". '
            "Preserve this authentic tone — do not write generically."
        ) if candidate_voice else ""
        prompt = (
            f"You are rewriting the **{section}** section of a resume to help it pass "
            f"ATS (applicant tracking system) screening for this role:\n"
            f"  Job title: {job.get('title', 'Unknown')}\n"
            f"  Company: {job.get('company', 'Unknown')}\n\n"
            f"Inject these missing ATS keywords naturally into the section:\n"
            f"  {terms_str}\n\n"
            f"CRITICAL RULES — violating any of these invalidates the rewrite:\n"
            f"1. Do NOT invent new employers, job titles, dates, or education.\n"
            f"2. Do NOT add skills the candidate did not already demonstrate.\n"
            f"3. Only rephrase existing content — replace vague verbs/nouns with the "
            f"   ATS-preferred equivalents listed above.\n"
            f"4. Keep the same number of bullet points in experience entries.\n"
            f"5. Return ONLY the rewritten section content, no labels or explanation."
            f"{voice_note}\n\n"
            f"Original {section} section:\n{original_content}"
        )
        try:
            result = router.complete(prompt)
            rewritten = _apply_section_rewrite(rewritten, section, result.strip())
        except Exception:
            log.warning("[resume_optimizer] rewrite failed for section %r", section, exc_info=True)
            # Leave section unchanged on failure
    return rewritten
 def _section_text_for_prompt(resume: dict[str, Any], section: str) -> str:
    """Render a resume section as plain text suitable for an LLM prompt."""
    if section == "summary":
        return resume.get("career_summary", "") or "(empty)"
    if section == "skills":
        skills = resume.get("skills", [])
        return ", ".join(skills) if skills else "(empty)"
    if section == "experience":
        lines: list[str] = []
        for exp in resume.get("experience", []):
            lines.append(f"{exp['title']} at {exp['company']} ({exp['start_date']}–{exp['end_date']})")
            for b in exp.get("bullets", []):
                lines.append(f"  • {b}")
        return "\n".join(lines) if lines else "(empty)"
    return "(unsupported section)"
 def _apply_section_rewrite(resume: dict[str, Any], section: str, rewritten: str) -> dict[str, Any]:
    """Return a new resume dict with the given section replaced by rewritten text."""
    updated = dict(resume)
    if section == "summary":
        updated["career_summary"] = rewritten
    elif section == "skills":
        # LLM returns comma-separated or newline-separated skills
        skills = [s.strip() for s in re.split(r"[,\n•·]+", rewritten) if s.strip()]
        updated["skills"] = skills
    elif section == "experience":
        # For experience, we keep the structured entries but replace the bullets.
        # The LLM rewrites the whole section as plain text; we re-parse the bullets.
        updated["experience"] = _reparse_experience_bullets(resume["experience"], rewritten)
    return updated
 def _reparse_experience_bullets(
    original_entries: list[dict],
    rewritten_text: str,
 ) -> list[dict]:
    """Re-associate rewritten bullet text with the original experience entries.
    The LLM rewrites the section as a block of text. We split on the original
    entry headers (title + company) to re-bind bullets to entries. Falls back
    to the original entries if splitting fails.
    """
    if not original_entries:
        return original_entries
    result: list[dict] = []
    remaining = rewritten_text
    for i, entry in enumerate(original_entries):
        # Find where the next entry starts so we can slice out this entry's bullets
        if i + 1 < len(original_entries):
            next_title = original_entries[i + 1]["title"]
            # Look for the next entry header in the remaining text
            split_pat = re.escape(next_title)
            m = re.search(split_pat, remaining, re.IGNORECASE)
            chunk = remaining[:m.start()] if m else remaining
            remaining = remaining[m.start():] if m else ""
        else:
            chunk = remaining
        bullets = [
            re.sub(r"^[•\-–—*◦▪▸►]\s*", "", line).strip()
            for line in chunk.splitlines()
            if re.match(r"^[•\-–—*◦▪▸►]\s*", line.strip())
        ]
        new_entry = dict(entry)
        new_entry["bullets"] = bullets if bullets else entry["bullets"]
        result.append(new_entry)
    return result
 # ── Hallucination guard ───────────────────────────────────────────────────────
 def hallucination_check(original: dict[str, Any], rewritten: dict[str, Any]) -> bool:
    """Return True if the rewrite is safe (no fabricated facts detected).
    Checks that the set of employers, job titles, and date ranges in the
    rewritten resume is a subset of those in the original. Any new entry
    signals hallucination.
    Args:
        original: Structured resume dict before rewrite.
        rewritten: Structured resume dict after rewrite.
    Returns:
        True  → rewrite is safe to use
        False → hallucination detected; caller should fall back to original
    """
    orig_anchors  = _extract_anchors(original)
    rewrite_anchors = _extract_anchors(rewritten)
    new_anchors = rewrite_anchors - orig_anchors
    if new_anchors:
        log.warning(
            "[resume_optimizer] hallucination_check FAILED — new anchors in rewrite: %s",
            new_anchors,
        )
        return False
    return True
 def _extract_anchors(resume: dict[str, Any]) -> frozenset[str]:
    """Extract stable factual anchors (company, title, dates) from experience entries."""
    anchors: set[str] = set()
    for exp in resume.get("experience", []):
        for field in ("company", "title", "start_date", "end_date"):
            val = (exp.get(field) or "").strip().lower()
            if val:
                anchors.add(val)
    for edu in resume.get("education", []):
        val = (edu.get("institution") or "").strip().lower()
        if val:
            anchors.add(val)
    return frozenset(anchors)
 # ── Resume → plain text renderer ─────────────────────────────────────────────
 def render_resume_text(resume: dict[str, Any]) -> str:
    """Render a structured resume dict back to formatted plain text for PDF export."""
    lines: list[str] = []
    contact_parts = [resume.get("name", ""), resume.get("email", ""), resume.get("phone", "")]
    lines.append("  ".join(p for p in contact_parts if p))
    lines.append("")
    if resume.get("career_summary"):
        lines.append("SUMMARY")
        lines.append(resume["career_summary"])
        lines.append("")
    if resume.get("experience"):
        lines.append("EXPERIENCE")
        for exp in resume["experience"]:
            lines.append(
                f"{exp.get('title', '')}  |  {exp.get('company', '')}  "
                f"({exp.get('start_date', '')}–{exp.get('end_date', '')})"
            )
            for b in exp.get("bullets", []):
                lines.append(f"  • {b}")
        lines.append("")
    if resume.get("education"):
        lines.append("EDUCATION")
        for edu in resume["education"]:
            lines.append(
                f"{edu.get('degree', '')} {edu.get('field', '')}  |  "
                f"{edu.get('institution', '')}  {edu.get('graduation_year', '')}"
            )
        lines.append("")
    if resume.get("skills"):
        lines.append("SKILLS")
        lines.append(", ".join(resume["skills"]))
        lines.append("")
    if resume.get("achievements"):
        lines.append("ACHIEVEMENTS")
        for a in resume["achievements"]:
            lines.append(f"  • {a}")
        lines.append("")
    return "\n".join(lines)
--- a/tests/test_resume_optimizer.py
+++ b/tests/test_resume_optimizer.py
@ -0,0 +1,288 @@
 # tests/test_resume_optimizer.py
 """Tests for scripts/resume_optimizer.py"""
 import json
 import pytest
 from unittest.mock import MagicMock, patch
 # ── Fixtures ─────────────────────────────────────────────────────────────────
 SAMPLE_RESUME = {
    "name": "Alex Rivera",
    "email": "alex@example.com",
    "phone": "555-1234",
    "career_summary": "Experienced Customer Success Manager with a track record of growth.",
    "skills": ["Salesforce", "Python", "customer success"],
    "experience": [
        {
            "title": "Customer Success Manager",
            "company": "Acme Corp",
            "start_date": "2021",
            "end_date": "present",
            "bullets": [
                "Managed a portfolio of 120 enterprise accounts.",
                "Reduced churn by 18% through proactive outreach.",
            ],
        },
        {
            "title": "Support Engineer",
            "company": "Beta Inc",
            "start_date": "2018",
            "end_date": "2021",
            "bullets": ["Resolved escalations for top-tier clients."],
        },
    ],
    "education": [
        {
            "degree": "B.S.",
            "field": "Computer Science",
            "institution": "State University",
            "graduation_year": "2018",
        }
    ],
    "achievements": [],
 }
 SAMPLE_JD = (
    "We are looking for a Customer Success Manager with Gainsight, cross-functional "
    "leadership experience, and strong stakeholder management skills. AWS knowledge a plus."
 )
 # ── extract_jd_signals ────────────────────────────────────────────────────────
 def test_extract_jd_signals_returns_list():
    """extract_jd_signals returns a list even when LLM and TF-IDF both fail."""
    from scripts.resume_optimizer import extract_jd_signals
    with patch("scripts.llm_router.LLMRouter") as MockRouter:
        MockRouter.return_value.complete.side_effect = Exception("no LLM")
        result = extract_jd_signals(SAMPLE_JD, resume_text="Python developer")
    assert isinstance(result, list)
 def test_extract_jd_signals_llm_path_parses_json_array():
    """extract_jd_signals merges LLM-extracted signals with TF-IDF gaps."""
    from scripts.resume_optimizer import extract_jd_signals
    llm_response = '["Gainsight", "cross-functional leadership", "stakeholder management"]'
    with patch("scripts.llm_router.LLMRouter") as MockRouter:
        MockRouter.return_value.complete.return_value = llm_response
        result = extract_jd_signals(SAMPLE_JD)
    assert "Gainsight" in result
    assert "cross-functional leadership" in result
 def test_extract_jd_signals_deduplicates():
    """extract_jd_signals deduplicates terms across LLM and TF-IDF sources."""
    from scripts.resume_optimizer import extract_jd_signals
    llm_response = '["Python", "AWS", "Python"]'
    with patch("scripts.llm_router.LLMRouter") as MockRouter:
        MockRouter.return_value.complete.return_value = llm_response
        result = extract_jd_signals(SAMPLE_JD)
    assert result.count("Python") == 1
 def test_extract_jd_signals_handles_malformed_llm_json():
    """extract_jd_signals falls back gracefully when LLM returns non-JSON."""
    from scripts.resume_optimizer import extract_jd_signals
    with patch("scripts.llm_router.LLMRouter") as MockRouter:
        MockRouter.return_value.complete.return_value = "Here are some keywords: Gainsight, AWS"
        result = extract_jd_signals(SAMPLE_JD)
    # Should still return a list (may be empty if TF-IDF also silent)
    assert isinstance(result, list)
 # ── prioritize_gaps ───────────────────────────────────────────────────────────
 def test_prioritize_gaps_skips_existing_terms():
    """prioritize_gaps excludes terms already present in the resume."""
    from scripts.resume_optimizer import prioritize_gaps
    # "Salesforce" is already in SAMPLE_RESUME skills
    result = prioritize_gaps(["Salesforce", "Gainsight"], SAMPLE_RESUME)
    terms = [r["term"] for r in result]
    assert "Salesforce" not in terms
    assert "Gainsight" in terms
 def test_prioritize_gaps_routes_tech_terms_to_skills():
    """prioritize_gaps maps known tech keywords to the skills section at priority 1."""
    from scripts.resume_optimizer import prioritize_gaps
    result = prioritize_gaps(["AWS", "Docker"], SAMPLE_RESUME)
    by_term = {r["term"]: r for r in result}
    assert by_term["AWS"]["section"] == "skills"
    assert by_term["AWS"]["priority"] == 1
    assert by_term["Docker"]["section"] == "skills"
 def test_prioritize_gaps_routes_leadership_terms_to_summary():
    """prioritize_gaps maps leadership/executive signals to the summary section."""
    from scripts.resume_optimizer import prioritize_gaps
    result = prioritize_gaps(["cross-functional", "stakeholder"], SAMPLE_RESUME)
    by_term = {r["term"]: r for r in result}
    assert by_term["cross-functional"]["section"] == "summary"
    assert by_term["stakeholder"]["section"] == "summary"
 def test_prioritize_gaps_multi_word_routes_to_experience():
    """Multi-word phrases not in skills/summary lists go to experience at priority 2."""
    from scripts.resume_optimizer import prioritize_gaps
    result = prioritize_gaps(["proactive client engagement"], SAMPLE_RESUME)
    assert result[0]["section"] == "experience"
    assert result[0]["priority"] == 2
 def test_prioritize_gaps_single_word_is_lowest_priority():
    """Single generic words not in any list go to experience at priority 3."""
    from scripts.resume_optimizer import prioritize_gaps
    result = prioritize_gaps(["innovation"], SAMPLE_RESUME)
    assert result[0]["priority"] == 3
 def test_prioritize_gaps_sorted_by_priority():
    """prioritize_gaps output is sorted ascending by priority (1 first)."""
    from scripts.resume_optimizer import prioritize_gaps
    gaps = ["innovation", "AWS", "cross-functional", "managed service contracts"]
    result = prioritize_gaps(gaps, SAMPLE_RESUME)
    priorities = [r["priority"] for r in result]
    assert priorities == sorted(priorities)
 # ── hallucination_check ───────────────────────────────────────────────────────
 def test_hallucination_check_passes_unchanged_resume():
    """hallucination_check returns True when rewrite has no new employers or institutions."""
    from scripts.resume_optimizer import hallucination_check
    # Shallow rewrite: same structure
    rewritten = {
        **SAMPLE_RESUME,
        "career_summary": "Dynamic CSM with cross-functional stakeholder management experience.",
    }
    assert hallucination_check(SAMPLE_RESUME, rewritten) is True
 def test_hallucination_check_fails_on_new_employer():
    """hallucination_check returns False when a new company is introduced."""
    from scripts.resume_optimizer import hallucination_check
    fabricated_entry = {
        "title": "VP of Customer Success",
        "company": "Fabricated Corp",
        "start_date": "2019",
        "end_date": "2021",
        "bullets": ["Led a team of 30."],
    }
    rewritten = dict(SAMPLE_RESUME)
    rewritten["experience"] = SAMPLE_RESUME["experience"] + [fabricated_entry]
    assert hallucination_check(SAMPLE_RESUME, rewritten) is False
 def test_hallucination_check_fails_on_new_institution():
    """hallucination_check returns False when a new educational institution appears."""
    from scripts.resume_optimizer import hallucination_check
    rewritten = dict(SAMPLE_RESUME)
    rewritten["education"] = [
        *SAMPLE_RESUME["education"],
        {"degree": "M.S.", "field": "Data Science", "institution": "MIT", "graduation_year": "2020"},
    ]
    assert hallucination_check(SAMPLE_RESUME, rewritten) is False
 # ── render_resume_text ────────────────────────────────────────────────────────
 def test_render_resume_text_contains_all_sections():
    """render_resume_text produces plain text containing all resume sections."""
    from scripts.resume_optimizer import render_resume_text
    text = render_resume_text(SAMPLE_RESUME)
    assert "Alex Rivera" in text
    assert "SUMMARY" in text
    assert "EXPERIENCE" in text
    assert "Customer Success Manager" in text
    assert "Acme Corp" in text
    assert "EDUCATION" in text
    assert "State University" in text
    assert "SKILLS" in text
    assert "Salesforce" in text
 def test_render_resume_text_omits_empty_sections():
    """render_resume_text skips sections that have no content."""
    from scripts.resume_optimizer import render_resume_text
    sparse = {
        "name": "Jordan Lee",
        "email": "",
        "phone": "",
        "career_summary": "",
        "skills": [],
        "experience": [],
        "education": [],
        "achievements": [],
    }
    text = render_resume_text(sparse)
    assert "EXPERIENCE" not in text
    assert "SKILLS" not in text
 # ── db integration ────────────────────────────────────────────────────────────
 def test_save_and_get_optimized_resume(tmp_path):
    """save_optimized_resume persists and get_optimized_resume retrieves the data."""
    from scripts.db import init_db, save_optimized_resume, get_optimized_resume
    db_path = tmp_path / "test.db"
    init_db(db_path)
    # Insert a minimal job to satisfy FK
    import sqlite3
    conn = sqlite3.connect(db_path)
    conn.execute(
        "INSERT INTO jobs (id, title, company, url, source, status) VALUES (1, 'CSM', 'Acme', 'http://x.com', 'test', 'approved')"
    )
    conn.commit()
    conn.close()
    gap_report = json.dumps([{"term": "Gainsight", "section": "skills", "priority": 1, "rationale": "test"}])
    save_optimized_resume(db_path, job_id=1, text="Rewritten resume text.", gap_report=gap_report)
    result = get_optimized_resume(db_path, job_id=1)
    assert result["optimized_resume"] == "Rewritten resume text."
    parsed = json.loads(result["ats_gap_report"])
    assert parsed[0]["term"] == "Gainsight"
 def test_get_optimized_resume_returns_empty_for_missing(tmp_path):
    """get_optimized_resume returns empty strings when no record exists."""
    from scripts.db import init_db, get_optimized_resume
    db_path = tmp_path / "test.db"
    init_db(db_path)
    result = get_optimized_resume(db_path, job_id=999)
    assert result["optimized_resume"] == ""
    assert result["ats_gap_report"] == ""