From 02e004ee5c19fa7da62fb6e26bb1943c63a58fe8 Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Wed, 1 Apr 2026 07:09:46 -0700
Subject: [PATCH] =?UTF-8?q?feat(apply):=20ATS=20resume=20optimizer=20backe?=
 =?UTF-8?q?nd=20=E2=80=94=20gap=20report=20+=20LLM=20rewrite?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- scripts/resume_optimizer.py: full pipeline (extract_jd_signals →
  prioritize_gaps → rewrite_for_ats → hallucination_check)
- scripts/db.py: add optimized_resume + ats_gap_report columns +
  save_optimized_resume / get_optimized_resume helpers
- tests/test_resume_optimizer.py: 17 unit tests; patches at source
  module (scripts.llm_router.LLMRouter), not consumer

Tier gate: gap report is free; full LLM rewrite is paid+.
---
 scripts/db.py                  |  34 +++
 scripts/resume_optimizer.py    | 439 +++++++++++++++++++++++++++++++++
 tests/test_resume_optimizer.py | 288 +++++++++++++++++++++
 3 files changed, 761 insertions(+)
 create mode 100644 scripts/resume_optimizer.py
 create mode 100644 tests/test_resume_optimizer.py

diff --git a/scripts/db.py b/scripts/db.py
index f6ccb6e..89dfc84 100644
--- a/scripts/db.py
+++ b/scripts/db.py
@@ -132,6 +132,8 @@ _MIGRATIONS = [
     ("hired_at",           "TEXT"),
     ("survey_at",          "TEXT"),
     ("calendar_event_id",  "TEXT"),
+    ("optimized_resume",   "TEXT"),   # ATS-rewritten resume text (paid tier)
+    ("ats_gap_report",     "TEXT"),   # JSON gap report (free tier)
 ]
 
 
@@ -301,6 +303,38 @@ def update_cover_letter(db_path: Path = DEFAULT_DB, job_id: int = None, text: st
     conn.close()
 
 
+def save_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None,
+                          text: str = "", gap_report: str = "") -> None:
+    """Persist ATS-optimized resume text and/or gap report for a job."""
+    if job_id is None:
+        return
+    conn = sqlite3.connect(db_path)
+    conn.execute(
+        "UPDATE jobs SET optimized_resume = ?, ats_gap_report = ? WHERE id = ?",
+        (text or None, gap_report or None, job_id),
+    )
+    conn.commit()
+    conn.close()
+
+
+def get_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict:
+    """Return optimized_resume and ats_gap_report for a job, or empty strings if absent."""
+    if job_id is None:
+        return {"optimized_resume": "", "ats_gap_report": ""}
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    row = conn.execute(
+        "SELECT optimized_resume, ats_gap_report FROM jobs WHERE id = ?", (job_id,)
+    ).fetchone()
+    conn.close()
+    if not row:
+        return {"optimized_resume": "", "ats_gap_report": ""}
+    return {
+        "optimized_resume": row["optimized_resume"] or "",
+        "ats_gap_report":   row["ats_gap_report"] or "",
+    }
+
+
 _UPDATABLE_JOB_COLS = {
     "title", "company", "url", "source", "location", "is_remote",
     "salary", "description", "match_score", "keyword_gaps",
diff --git a/scripts/resume_optimizer.py b/scripts/resume_optimizer.py
new file mode 100644
index 0000000..1d3b7b3
--- /dev/null
+++ b/scripts/resume_optimizer.py
@@ -0,0 +1,439 @@
+"""
+ATS Resume Optimizer — rewrite a candidate's resume to maximize keyword match
+for a specific job description without fabricating experience.
+
+Tier behaviour:
+  Free   → gap report only  (extract_jd_signals + prioritize_gaps, no LLM rewrite)
+  Paid   → full LLM rewrite targeting the JD (rewrite_for_ats)
+  Premium → same as paid for now; fine-tuned voice model is a future enhancement
+
+Pipeline:
+  job.description
+      → extract_jd_signals()    # TF-IDF gaps + LLM-extracted ATS signals
+      → prioritize_gaps()       # rank by impact, map to resume sections
+      → rewrite_for_ats()       # per-section LLM rewrite (paid+)
+      → hallucination_check()   # reject rewrites that invent new experience
+"""
+from __future__ import annotations
+
+import json
+import logging
+import re
+from pathlib import Path
+from typing import Any
+
+log = logging.getLogger(__name__)
+
+# ── Signal extraction ─────────────────────────────────────────────────────────
+
+def extract_jd_signals(description: str, resume_text: str = "") -> list[str]:
+    """Return ATS keyword signals from a job description.
+
+    Combines two sources:
+      1. TF-IDF keyword gaps from match.py (fast, deterministic, no LLM cost)
+      2. LLM extraction for phrasing nuance TF-IDF misses (e.g. "cross-functional"
+         vs "cross-team", "led" vs "managed")
+
+    Falls back to TF-IDF-only if LLM is unavailable.
+
+    Args:
+        description: Raw job description text.
+        resume_text: Candidate's resume text (used to compute gap vs. already present).
+
+    Returns:
+        Deduplicated list of ATS keyword signals, most impactful first.
+    """
+    # Phase 1: deterministic TF-IDF gaps (always available)
+    tfidf_gaps: list[str] = []
+    if resume_text:
+        try:
+            from scripts.match import match_score
+            _, tfidf_gaps = match_score(resume_text, description)
+        except Exception:
+            log.warning("[resume_optimizer] TF-IDF gap extraction failed", exc_info=True)
+
+    # Phase 2: LLM extraction for phrasing/qualifier nuance
+    llm_signals: list[str] = []
+    try:
+        from scripts.llm_router import LLMRouter
+        prompt = (
+            "Extract the most important ATS (applicant tracking system) keywords and "
+            "phrases from this job description. Focus on:\n"
+            "- Required skills and technologies (exact phrasing matters)\n"
+            "- Action verbs used to describe responsibilities\n"
+            "- Qualification signals ('required', 'must have', 'preferred')\n"
+            "- Industry-specific terminology\n\n"
+            "Return a JSON array of strings only. No explanation.\n\n"
+            f"Job description:\n{description[:3000]}"
+        )
+        raw = LLMRouter().complete(prompt)
+        # Extract JSON array from response (LLM may wrap it in markdown)
+        match = re.search(r"\[.*\]", raw, re.DOTALL)
+        if match:
+            llm_signals = json.loads(match.group(0))
+            llm_signals = [s.strip() for s in llm_signals if isinstance(s, str) and s.strip()]
+    except Exception:
+        log.warning("[resume_optimizer] LLM signal extraction failed", exc_info=True)
+
+    # Merge: LLM signals first (richer phrasing), TF-IDF fills gaps
+    seen: set[str] = set()
+    merged: list[str] = []
+    for term in llm_signals + tfidf_gaps:
+        key = term.lower()
+        if key not in seen:
+            seen.add(key)
+            merged.append(term)
+
+    return merged
+
+
+# ── Gap prioritization ────────────────────────────────────────────────────────
+
+# Map each gap term to the resume section where it would have the most ATS impact.
+# ATS systems weight keywords higher in certain sections:
+#   skills    — direct keyword match, highest density, indexed first
+#   summary   — executive summary keywords often boost overall relevance score
+#   experience — verbs + outcomes in bullet points; adds context weight
+_SECTION_KEYWORDS: dict[str, list[str]] = {
+    "skills": [
+        "python", "sql", "java", "typescript", "react", "vue", "docker",
+        "kubernetes", "aws", "gcp", "azure", "terraform", "ci/cd", "git",
+        "postgresql", "redis", "kafka", "spark", "tableau", "salesforce",
+        "jira", "figma", "excel", "powerpoint", "machine learning", "llm",
+        "deep learning", "pytorch", "tensorflow", "scikit-learn",
+    ],
+    "summary": [
+        "leadership", "strategy", "vision", "executive", "director", "vp",
+        "growth", "transformation", "stakeholder", "cross-functional",
+        "p&l", "revenue", "budget", "board", "c-suite",
+    ],
+}
+
+
+def prioritize_gaps(gaps: list[str], resume_sections: dict[str, Any]) -> list[dict]:
+    """Rank keyword gaps by ATS impact and map each to a target resume section.
+
+    Args:
+        gaps: List of missing keyword signals from extract_jd_signals().
+        resume_sections: Structured resume dict from resume_parser.parse_resume().
+
+    Returns:
+        List of dicts, sorted by priority score descending:
+            {
+              "term": str,          # the keyword/phrase to inject
+              "section": str,       # target resume section ("skills", "summary", "experience")
+              "priority": int,      # 1=high, 2=medium, 3=low
+              "rationale": str,     # why this section was chosen
+            }
+
+    TODO: implement the ranking logic below.
+    The current stub assigns every gap to "experience" at medium priority.
+    A good implementation should:
+      - Score "skills" section terms highest (direct keyword density)
+      - Score "summary" terms next (executive/leadership signals)
+      - Route remaining gaps to "experience" bullets
+      - Deprioritize terms already present in any section (case-insensitive)
+      - Consider gap term length: multi-word phrases > single words (more specific = higher ATS weight)
+    """
+    existing_text = _flatten_resume_text(resume_sections).lower()
+
+    prioritized: list[dict] = []
+    for term in gaps:
+        # Skip terms already present anywhere in the resume
+        if term.lower() in existing_text:
+            continue
+
+        # REVIEW: _SECTION_KEYWORDS lists are tech-centric; domain-specific roles
+        # (creative, healthcare, operations) may over-route to experience.
+        # Consider expanding the lists or making them config-driven.
+        term_lower = term.lower()
+
+        # Partial-match: term contains a skills keyword (handles "PostgreSQL" vs "postgresql",
+        # "AWS Lambda" vs "aws", etc.)
+        skills_match = any(kw in term_lower or term_lower in kw
+                           for kw in _SECTION_KEYWORDS["skills"])
+        summary_match = any(kw in term_lower or term_lower in kw
+                            for kw in _SECTION_KEYWORDS["summary"])
+
+        if skills_match:
+            section = "skills"
+            priority = 1
+            rationale = "matched technical skills list — highest ATS keyword density"
+        elif summary_match:
+            section = "summary"
+            priority = 1
+            rationale = "matched leadership/executive signals — boosts overall relevance score"
+        elif len(term.split()) > 1:
+            section = "experience"
+            priority = 2
+            rationale = "multi-word phrase — more specific than single keywords, context weight in bullets"
+        else:
+            section = "experience"
+            priority = 3
+            rationale = "single generic term — lowest ATS impact, added to experience for coverage"
+
+        prioritized.append({
+            "term":      term,
+            "section":   section,
+            "priority":  priority,
+            "rationale": rationale,
+        })
+
+    prioritized.sort(key=lambda x: x["priority"])
+    return prioritized
+
+
+def _flatten_resume_text(resume: dict[str, Any]) -> str:
+    """Concatenate all text from a structured resume dict into one searchable string."""
+    parts: list[str] = []
+    parts.append(resume.get("career_summary", "") or "")
+    parts.extend(resume.get("skills", []))
+    for exp in resume.get("experience", []):
+        parts.append(exp.get("title", ""))
+        parts.append(exp.get("company", ""))
+        parts.extend(exp.get("bullets", []))
+    for edu in resume.get("education", []):
+        parts.append(edu.get("degree", ""))
+        parts.append(edu.get("field", ""))
+        parts.append(edu.get("institution", ""))
+    parts.extend(resume.get("achievements", []))
+    return " ".join(parts)
+
+
+# ── LLM rewrite ───────────────────────────────────────────────────────────────
+
+def rewrite_for_ats(
+    resume: dict[str, Any],
+    prioritized_gaps: list[dict],
+    job: dict[str, Any],
+    candidate_voice: str = "",
+) -> dict[str, Any]:
+    """Rewrite resume sections to naturally incorporate ATS keyword gaps.
+
+    Operates section-by-section. For each target section in prioritized_gaps,
+    builds a focused prompt that injects only the gaps destined for that section.
+    The hallucination constraint is enforced in the prompt itself and verified
+    post-hoc by hallucination_check().
+
+    Args:
+        resume: Structured resume dict (from resume_parser.parse_resume).
+        prioritized_gaps: Output of prioritize_gaps().
+        job: Job dict with at minimum {"title": str, "company": str, "description": str}.
+        candidate_voice: Free-text personality/style note from user.yaml (may be empty).
+
+    Returns:
+        New resume dict (same structure as input) with rewritten sections.
+        Sections with no relevant gaps are copied through unchanged.
+    """
+    from scripts.llm_router import LLMRouter
+    router = LLMRouter()
+
+    # Group gaps by target section
+    by_section: dict[str, list[str]] = {}
+    for gap in prioritized_gaps:
+        by_section.setdefault(gap["section"], []).append(gap["term"])
+
+    rewritten = dict(resume)  # shallow copy — sections replaced below
+
+    for section, terms in by_section.items():
+        terms_str = ", ".join(f'"{t}"' for t in terms)
+        original_content = _section_text_for_prompt(resume, section)
+
+        voice_note = (
+            f'\n\nCandidate voice/style: "{candidate_voice}". '
+            "Preserve this authentic tone — do not write generically."
+        ) if candidate_voice else ""
+
+        prompt = (
+            f"You are rewriting the **{section}** section of a resume to help it pass "
+            f"ATS (applicant tracking system) screening for this role:\n"
+            f"  Job title: {job.get('title', 'Unknown')}\n"
+            f"  Company: {job.get('company', 'Unknown')}\n\n"
+            f"Inject these missing ATS keywords naturally into the section:\n"
+            f"  {terms_str}\n\n"
+            f"CRITICAL RULES — violating any of these invalidates the rewrite:\n"
+            f"1. Do NOT invent new employers, job titles, dates, or education.\n"
+            f"2. Do NOT add skills the candidate did not already demonstrate.\n"
+            f"3. Only rephrase existing content — replace vague verbs/nouns with the "
+            f"   ATS-preferred equivalents listed above.\n"
+            f"4. Keep the same number of bullet points in experience entries.\n"
+            f"5. Return ONLY the rewritten section content, no labels or explanation."
+            f"{voice_note}\n\n"
+            f"Original {section} section:\n{original_content}"
+        )
+
+        try:
+            result = router.complete(prompt)
+            rewritten = _apply_section_rewrite(rewritten, section, result.strip())
+        except Exception:
+            log.warning("[resume_optimizer] rewrite failed for section %r", section, exc_info=True)
+            # Leave section unchanged on failure
+
+    return rewritten
+
+
+def _section_text_for_prompt(resume: dict[str, Any], section: str) -> str:
+    """Render a resume section as plain text suitable for an LLM prompt."""
+    if section == "summary":
+        return resume.get("career_summary", "") or "(empty)"
+    if section == "skills":
+        skills = resume.get("skills", [])
+        return ", ".join(skills) if skills else "(empty)"
+    if section == "experience":
+        lines: list[str] = []
+        for exp in resume.get("experience", []):
+            lines.append(f"{exp['title']} at {exp['company']} ({exp['start_date']}–{exp['end_date']})")
+            for b in exp.get("bullets", []):
+                lines.append(f"  • {b}")
+        return "\n".join(lines) if lines else "(empty)"
+    return "(unsupported section)"
+
+
+def _apply_section_rewrite(resume: dict[str, Any], section: str, rewritten: str) -> dict[str, Any]:
+    """Return a new resume dict with the given section replaced by rewritten text."""
+    updated = dict(resume)
+    if section == "summary":
+        updated["career_summary"] = rewritten
+    elif section == "skills":
+        # LLM returns comma-separated or newline-separated skills
+        skills = [s.strip() for s in re.split(r"[,\n•·]+", rewritten) if s.strip()]
+        updated["skills"] = skills
+    elif section == "experience":
+        # For experience, we keep the structured entries but replace the bullets.
+        # The LLM rewrites the whole section as plain text; we re-parse the bullets.
+        updated["experience"] = _reparse_experience_bullets(resume["experience"], rewritten)
+    return updated
+
+
+def _reparse_experience_bullets(
+    original_entries: list[dict],
+    rewritten_text: str,
+) -> list[dict]:
+    """Re-associate rewritten bullet text with the original experience entries.
+
+    The LLM rewrites the section as a block of text. We split on the original
+    entry headers (title + company) to re-bind bullets to entries. Falls back
+    to the original entries if splitting fails.
+    """
+    if not original_entries:
+        return original_entries
+
+    result: list[dict] = []
+    remaining = rewritten_text
+
+    for i, entry in enumerate(original_entries):
+        # Find where the next entry starts so we can slice out this entry's bullets
+        if i + 1 < len(original_entries):
+            next_title = original_entries[i + 1]["title"]
+            # Look for the next entry header in the remaining text
+            split_pat = re.escape(next_title)
+            m = re.search(split_pat, remaining, re.IGNORECASE)
+            chunk = remaining[:m.start()] if m else remaining
+            remaining = remaining[m.start():] if m else ""
+        else:
+            chunk = remaining
+
+        bullets = [
+            re.sub(r"^[•\-–—*◦▪▸►]\s*", "", line).strip()
+            for line in chunk.splitlines()
+            if re.match(r"^[•\-–—*◦▪▸►]\s*", line.strip())
+        ]
+        new_entry = dict(entry)
+        new_entry["bullets"] = bullets if bullets else entry["bullets"]
+        result.append(new_entry)
+
+    return result
+
+
+# ── Hallucination guard ───────────────────────────────────────────────────────
+
+def hallucination_check(original: dict[str, Any], rewritten: dict[str, Any]) -> bool:
+    """Return True if the rewrite is safe (no fabricated facts detected).
+
+    Checks that the set of employers, job titles, and date ranges in the
+    rewritten resume is a subset of those in the original. Any new entry
+    signals hallucination.
+
+    Args:
+        original: Structured resume dict before rewrite.
+        rewritten: Structured resume dict after rewrite.
+
+    Returns:
+        True  → rewrite is safe to use
+        False → hallucination detected; caller should fall back to original
+    """
+    orig_anchors  = _extract_anchors(original)
+    rewrite_anchors = _extract_anchors(rewritten)
+
+    new_anchors = rewrite_anchors - orig_anchors
+    if new_anchors:
+        log.warning(
+            "[resume_optimizer] hallucination_check FAILED — new anchors in rewrite: %s",
+            new_anchors,
+        )
+        return False
+    return True
+
+
+def _extract_anchors(resume: dict[str, Any]) -> frozenset[str]:
+    """Extract stable factual anchors (company, title, dates) from experience entries."""
+    anchors: set[str] = set()
+    for exp in resume.get("experience", []):
+        for field in ("company", "title", "start_date", "end_date"):
+            val = (exp.get(field) or "").strip().lower()
+            if val:
+                anchors.add(val)
+    for edu in resume.get("education", []):
+        val = (edu.get("institution") or "").strip().lower()
+        if val:
+            anchors.add(val)
+    return frozenset(anchors)
+
+
+# ── Resume → plain text renderer ─────────────────────────────────────────────
+
+def render_resume_text(resume: dict[str, Any]) -> str:
+    """Render a structured resume dict back to formatted plain text for PDF export."""
+    lines: list[str] = []
+
+    contact_parts = [resume.get("name", ""), resume.get("email", ""), resume.get("phone", "")]
+    lines.append("  ".join(p for p in contact_parts if p))
+    lines.append("")
+
+    if resume.get("career_summary"):
+        lines.append("SUMMARY")
+        lines.append(resume["career_summary"])
+        lines.append("")
+
+    if resume.get("experience"):
+        lines.append("EXPERIENCE")
+        for exp in resume["experience"]:
+            lines.append(
+                f"{exp.get('title', '')}  |  {exp.get('company', '')}  "
+                f"({exp.get('start_date', '')}–{exp.get('end_date', '')})"
+            )
+            for b in exp.get("bullets", []):
+                lines.append(f"  • {b}")
+        lines.append("")
+
+    if resume.get("education"):
+        lines.append("EDUCATION")
+        for edu in resume["education"]:
+            lines.append(
+                f"{edu.get('degree', '')} {edu.get('field', '')}  |  "
+                f"{edu.get('institution', '')}  {edu.get('graduation_year', '')}"
+            )
+        lines.append("")
+
+    if resume.get("skills"):
+        lines.append("SKILLS")
+        lines.append(", ".join(resume["skills"]))
+        lines.append("")
+
+    if resume.get("achievements"):
+        lines.append("ACHIEVEMENTS")
+        for a in resume["achievements"]:
+            lines.append(f"  • {a}")
+        lines.append("")
+
+    return "\n".join(lines)
diff --git a/tests/test_resume_optimizer.py b/tests/test_resume_optimizer.py
new file mode 100644
index 0000000..5425a5f
--- /dev/null
+++ b/tests/test_resume_optimizer.py
@@ -0,0 +1,288 @@
+# tests/test_resume_optimizer.py
+"""Tests for scripts/resume_optimizer.py"""
+import json
+import pytest
+from unittest.mock import MagicMock, patch
+
+
+# ── Fixtures ─────────────────────────────────────────────────────────────────
+
+SAMPLE_RESUME = {
+    "name": "Alex Rivera",
+    "email": "alex@example.com",
+    "phone": "555-1234",
+    "career_summary": "Experienced Customer Success Manager with a track record of growth.",
+    "skills": ["Salesforce", "Python", "customer success"],
+    "experience": [
+        {
+            "title": "Customer Success Manager",
+            "company": "Acme Corp",
+            "start_date": "2021",
+            "end_date": "present",
+            "bullets": [
+                "Managed a portfolio of 120 enterprise accounts.",
+                "Reduced churn by 18% through proactive outreach.",
+            ],
+        },
+        {
+            "title": "Support Engineer",
+            "company": "Beta Inc",
+            "start_date": "2018",
+            "end_date": "2021",
+            "bullets": ["Resolved escalations for top-tier clients."],
+        },
+    ],
+    "education": [
+        {
+            "degree": "B.S.",
+            "field": "Computer Science",
+            "institution": "State University",
+            "graduation_year": "2018",
+        }
+    ],
+    "achievements": [],
+}
+
+SAMPLE_JD = (
+    "We are looking for a Customer Success Manager with Gainsight, cross-functional "
+    "leadership experience, and strong stakeholder management skills. AWS knowledge a plus."
+)
+
+
+# ── extract_jd_signals ────────────────────────────────────────────────────────
+
+def test_extract_jd_signals_returns_list():
+    """extract_jd_signals returns a list even when LLM and TF-IDF both fail."""
+    from scripts.resume_optimizer import extract_jd_signals
+
+    with patch("scripts.llm_router.LLMRouter") as MockRouter:
+        MockRouter.return_value.complete.side_effect = Exception("no LLM")
+        result = extract_jd_signals(SAMPLE_JD, resume_text="Python developer")
+
+    assert isinstance(result, list)
+
+
+def test_extract_jd_signals_llm_path_parses_json_array():
+    """extract_jd_signals merges LLM-extracted signals with TF-IDF gaps."""
+    from scripts.resume_optimizer import extract_jd_signals
+
+    llm_response = '["Gainsight", "cross-functional leadership", "stakeholder management"]'
+
+    with patch("scripts.llm_router.LLMRouter") as MockRouter:
+        MockRouter.return_value.complete.return_value = llm_response
+        result = extract_jd_signals(SAMPLE_JD)
+
+    assert "Gainsight" in result
+    assert "cross-functional leadership" in result
+
+
+def test_extract_jd_signals_deduplicates():
+    """extract_jd_signals deduplicates terms across LLM and TF-IDF sources."""
+    from scripts.resume_optimizer import extract_jd_signals
+
+    llm_response = '["Python", "AWS", "Python"]'
+
+    with patch("scripts.llm_router.LLMRouter") as MockRouter:
+        MockRouter.return_value.complete.return_value = llm_response
+        result = extract_jd_signals(SAMPLE_JD)
+
+    assert result.count("Python") == 1
+
+
+def test_extract_jd_signals_handles_malformed_llm_json():
+    """extract_jd_signals falls back gracefully when LLM returns non-JSON."""
+    from scripts.resume_optimizer import extract_jd_signals
+
+    with patch("scripts.llm_router.LLMRouter") as MockRouter:
+        MockRouter.return_value.complete.return_value = "Here are some keywords: Gainsight, AWS"
+        result = extract_jd_signals(SAMPLE_JD)
+
+    # Should still return a list (may be empty if TF-IDF also silent)
+    assert isinstance(result, list)
+
+
+# ── prioritize_gaps ───────────────────────────────────────────────────────────
+
+def test_prioritize_gaps_skips_existing_terms():
+    """prioritize_gaps excludes terms already present in the resume."""
+    from scripts.resume_optimizer import prioritize_gaps
+
+    # "Salesforce" is already in SAMPLE_RESUME skills
+    result = prioritize_gaps(["Salesforce", "Gainsight"], SAMPLE_RESUME)
+    terms = [r["term"] for r in result]
+
+    assert "Salesforce" not in terms
+    assert "Gainsight" in terms
+
+
+def test_prioritize_gaps_routes_tech_terms_to_skills():
+    """prioritize_gaps maps known tech keywords to the skills section at priority 1."""
+    from scripts.resume_optimizer import prioritize_gaps
+
+    result = prioritize_gaps(["AWS", "Docker"], SAMPLE_RESUME)
+    by_term = {r["term"]: r for r in result}
+
+    assert by_term["AWS"]["section"] == "skills"
+    assert by_term["AWS"]["priority"] == 1
+    assert by_term["Docker"]["section"] == "skills"
+
+
+def test_prioritize_gaps_routes_leadership_terms_to_summary():
+    """prioritize_gaps maps leadership/executive signals to the summary section."""
+    from scripts.resume_optimizer import prioritize_gaps
+
+    result = prioritize_gaps(["cross-functional", "stakeholder"], SAMPLE_RESUME)
+    by_term = {r["term"]: r for r in result}
+
+    assert by_term["cross-functional"]["section"] == "summary"
+    assert by_term["stakeholder"]["section"] == "summary"
+
+
+def test_prioritize_gaps_multi_word_routes_to_experience():
+    """Multi-word phrases not in skills/summary lists go to experience at priority 2."""
+    from scripts.resume_optimizer import prioritize_gaps
+
+    result = prioritize_gaps(["proactive client engagement"], SAMPLE_RESUME)
+    assert result[0]["section"] == "experience"
+    assert result[0]["priority"] == 2
+
+
+def test_prioritize_gaps_single_word_is_lowest_priority():
+    """Single generic words not in any list go to experience at priority 3."""
+    from scripts.resume_optimizer import prioritize_gaps
+
+    result = prioritize_gaps(["innovation"], SAMPLE_RESUME)
+    assert result[0]["priority"] == 3
+
+
+def test_prioritize_gaps_sorted_by_priority():
+    """prioritize_gaps output is sorted ascending by priority (1 first)."""
+    from scripts.resume_optimizer import prioritize_gaps
+
+    gaps = ["innovation", "AWS", "cross-functional", "managed service contracts"]
+    result = prioritize_gaps(gaps, SAMPLE_RESUME)
+    priorities = [r["priority"] for r in result]
+
+    assert priorities == sorted(priorities)
+
+
+# ── hallucination_check ───────────────────────────────────────────────────────
+
+def test_hallucination_check_passes_unchanged_resume():
+    """hallucination_check returns True when rewrite has no new employers or institutions."""
+    from scripts.resume_optimizer import hallucination_check
+
+    # Shallow rewrite: same structure
+    rewritten = {
+        **SAMPLE_RESUME,
+        "career_summary": "Dynamic CSM with cross-functional stakeholder management experience.",
+    }
+    assert hallucination_check(SAMPLE_RESUME, rewritten) is True
+
+
+def test_hallucination_check_fails_on_new_employer():
+    """hallucination_check returns False when a new company is introduced."""
+    from scripts.resume_optimizer import hallucination_check
+
+    fabricated_entry = {
+        "title": "VP of Customer Success",
+        "company": "Fabricated Corp",
+        "start_date": "2019",
+        "end_date": "2021",
+        "bullets": ["Led a team of 30."],
+    }
+    rewritten = dict(SAMPLE_RESUME)
+    rewritten["experience"] = SAMPLE_RESUME["experience"] + [fabricated_entry]
+
+    assert hallucination_check(SAMPLE_RESUME, rewritten) is False
+
+
+def test_hallucination_check_fails_on_new_institution():
+    """hallucination_check returns False when a new educational institution appears."""
+    from scripts.resume_optimizer import hallucination_check
+
+    rewritten = dict(SAMPLE_RESUME)
+    rewritten["education"] = [
+        *SAMPLE_RESUME["education"],
+        {"degree": "M.S.", "field": "Data Science", "institution": "MIT", "graduation_year": "2020"},
+    ]
+
+    assert hallucination_check(SAMPLE_RESUME, rewritten) is False
+
+
+# ── render_resume_text ────────────────────────────────────────────────────────
+
+def test_render_resume_text_contains_all_sections():
+    """render_resume_text produces plain text containing all resume sections."""
+    from scripts.resume_optimizer import render_resume_text
+
+    text = render_resume_text(SAMPLE_RESUME)
+
+    assert "Alex Rivera" in text
+    assert "SUMMARY" in text
+    assert "EXPERIENCE" in text
+    assert "Customer Success Manager" in text
+    assert "Acme Corp" in text
+    assert "EDUCATION" in text
+    assert "State University" in text
+    assert "SKILLS" in text
+    assert "Salesforce" in text
+
+
+def test_render_resume_text_omits_empty_sections():
+    """render_resume_text skips sections that have no content."""
+    from scripts.resume_optimizer import render_resume_text
+
+    sparse = {
+        "name": "Jordan Lee",
+        "email": "",
+        "phone": "",
+        "career_summary": "",
+        "skills": [],
+        "experience": [],
+        "education": [],
+        "achievements": [],
+    }
+    text = render_resume_text(sparse)
+
+    assert "EXPERIENCE" not in text
+    assert "SKILLS" not in text
+
+
+# ── db integration ────────────────────────────────────────────────────────────
+
+def test_save_and_get_optimized_resume(tmp_path):
+    """save_optimized_resume persists and get_optimized_resume retrieves the data."""
+    from scripts.db import init_db, save_optimized_resume, get_optimized_resume
+
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+
+    # Insert a minimal job to satisfy FK
+    import sqlite3
+    conn = sqlite3.connect(db_path)
+    conn.execute(
+        "INSERT INTO jobs (id, title, company, url, source, status) VALUES (1, 'CSM', 'Acme', 'http://x.com', 'test', 'approved')"
+    )
+    conn.commit()
+    conn.close()
+
+    gap_report = json.dumps([{"term": "Gainsight", "section": "skills", "priority": 1, "rationale": "test"}])
+    save_optimized_resume(db_path, job_id=1, text="Rewritten resume text.", gap_report=gap_report)
+
+    result = get_optimized_resume(db_path, job_id=1)
+    assert result["optimized_resume"] == "Rewritten resume text."
+    parsed = json.loads(result["ats_gap_report"])
+    assert parsed[0]["term"] == "Gainsight"
+
+
+def test_get_optimized_resume_returns_empty_for_missing(tmp_path):
+    """get_optimized_resume returns empty strings when no record exists."""
+    from scripts.db import init_db, get_optimized_resume
+
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+
+    result = get_optimized_resume(db_path, job_id=999)
+    assert result["optimized_resume"] == ""
+    assert result["ats_gap_report"] == ""