feat: resume_sync.py — library↔profile transform functions with tests

Pure transform functions (no LLM, no DB) bridging the two resume representations: library struct_json ↔ ResumePayload content fields. Exports library_to_profile_content, profile_to_library, make_auto_backup_name, blank_fields_on_import. 22 tests, all passing.
2026-04-16 13:04:56 -07:00 · 2026-04-16 13:04:56 -07:00 · 048a5f4cc3
commit 048a5f4cc3
parent fe4947a72f
2 changed files with 392 additions and 0 deletions
--- a/scripts/resume_sync.py
+++ b/scripts/resume_sync.py
@ -0,0 +1,211 @@
+"""
+Resume format transform — library ↔ profile.
+
+Converts between:
+  - Library format: struct_json produced by resume_parser.parse_resume()
+      {name, email, phone, career_summary, experience[{title,company,start_date,end_date,location,bullets[]}],
+       education[{institution,degree,field,start_date,end_date}], skills[], achievements[]}
+  - Profile content format: ResumePayload content fields (plain_text_resume.yaml)
+      {name, surname, email, phone, career_summary,
+       experience[{title,company,period,location,industry,responsibilities,skills[]}],
+       education[{institution,degree,field,start_date,end_date}],
+       skills[], achievements[]}
+
+Profile metadata fields (salary, work prefs, self-ID, PII) are never touched here.
+
+License: MIT
+"""
+from __future__ import annotations
+
+from datetime import date
+from typing import Any
+
+
+_CONTENT_FIELDS = frozenset({
+    "name", "surname", "email", "phone", "career_summary",
+    "experience", "skills", "education", "achievements",
+})
+
+
+def library_to_profile_content(struct_json: dict[str, Any]) -> dict[str, Any]:
+    """Transform a library struct_json to ResumePayload content fields.
+
+    Returns only content fields. Caller is responsible for merging with existing
+    metadata fields (salary, preferences, self-ID) so they are not overwritten.
+
+    Lossy for experience[].industry (always blank — parser does not capture it).
+    name is split on first space into name/surname.
+    """
+    full_name: str = struct_json.get("name") or ""
+    parts = full_name.split(" ", 1)
+    name = parts[0]
+    surname = parts[1] if len(parts) > 1 else ""
+
+    experience = []
+    for exp in struct_json.get("experience") or []:
+        start = (exp.get("start_date") or "").strip()
+        end = (exp.get("end_date") or "").strip()
+        if start and end:
+            period = f"{start} \u2013 {end}"
+        elif start:
+            period = start
+        elif end:
+            period = end
+        else:
+            period = ""
+
+        bullets: list[str] = exp.get("bullets") or []
+        responsibilities = "\n".join(b for b in bullets if b)
+
+        experience.append({
+            "title":            exp.get("title") or "",
+            "company":          exp.get("company") or "",
+            "period":           period,
+            "location":         exp.get("location") or "",
+            "industry":         "",  # not captured by parser
+            "responsibilities": responsibilities,
+            "skills":           [],
+        })
+
+    education = []
+    for edu in struct_json.get("education") or []:
+        education.append({
+            "institution": edu.get("institution") or "",
+            "degree":      edu.get("degree") or "",
+            "field":       edu.get("field") or "",
+            "start_date":  edu.get("start_date") or "",
+            "end_date":    edu.get("end_date") or "",
+        })
+
+    return {
+        "name":           name,
+        "surname":        surname,
+        "email":          struct_json.get("email") or "",
+        "phone":          struct_json.get("phone") or "",
+        "career_summary": struct_json.get("career_summary") or "",
+        "experience":     experience,
+        "skills":         list(struct_json.get("skills") or []),
+        "education":      education,
+        "achievements":   list(struct_json.get("achievements") or []),
+    }
+
+
+def profile_to_library(payload: dict[str, Any]) -> tuple[str, dict[str, Any]]:
+    """Transform ResumePayload content fields to (plain_text, struct_json).
+
+    Inverse of library_to_profile_content. The plain_text is a best-effort
+    reconstruction for display and re-parsing. struct_json is the canonical
+    structured representation stored in the resumes table.
+    """
+    name_parts = [payload.get("name") or "", payload.get("surname") or ""]
+    full_name = " ".join(p for p in name_parts if p).strip()
+
+    career_summary = (payload.get("career_summary") or "").strip()
+
+    lines: list[str] = []
+    if full_name:
+        lines.append(full_name)
+    email = payload.get("email") or ""
+    phone = payload.get("phone") or ""
+    if email:
+        lines.append(email)
+    if phone:
+        lines.append(phone)
+
+    if career_summary:
+        lines += ["", "SUMMARY", career_summary]
+
+    experience_structs = []
+    for exp in payload.get("experience") or []:
+        title   = (exp.get("title") or "").strip()
+        company = (exp.get("company") or "").strip()
+        period  = (exp.get("period") or "").strip()
+        location = (exp.get("location") or "").strip()
+
+        # Split period back to start_date / end_date
+        sep_period = period.replace("\u2013", "-").replace("\u2014", "-")
+        date_parts = [p.strip() for p in sep_period.split("-", 1)]
+        start_date = date_parts[0] if date_parts else ""
+        end_date   = date_parts[1] if len(date_parts) > 1 else ""
+
+        resp = (exp.get("responsibilities") or "").strip()
+        bullets = [b.strip() for b in resp.split("\n") if b.strip()]
+
+        if title or company:
+            header = " | ".join(p for p in [title, company, period] if p)
+            lines += ["", header]
+            if location:
+                lines.append(location)
+            for b in bullets:
+                lines.append(f"\u2022 {b}")
+
+        experience_structs.append({
+            "title":      title,
+            "company":    company,
+            "start_date": start_date,
+            "end_date":   end_date,
+            "location":   location,
+            "bullets":    bullets,
+        })
+
+    skills: list[str] = list(payload.get("skills") or [])
+    if skills:
+        lines += ["", "SKILLS", ", ".join(skills)]
+
+    education_structs = []
+    for edu in payload.get("education") or []:
+        institution = (edu.get("institution") or "").strip()
+        degree      = (edu.get("degree") or "").strip()
+        field       = (edu.get("field") or "").strip()
+        start_date  = (edu.get("start_date") or "").strip()
+        end_date    = (edu.get("end_date") or "").strip()
+        if institution or degree:
+            label = " ".join(p for p in [degree, field] if p)
+            lines.append(f"{label} \u2014 {institution}" if institution else label)
+        education_structs.append({
+            "institution": institution,
+            "degree":      degree,
+            "field":       field,
+            "start_date":  start_date,
+            "end_date":    end_date,
+        })
+
+    achievements: list[str] = list(payload.get("achievements") or [])
+
+    struct_json: dict[str, Any] = {
+        "name":           full_name,
+        "email":          email,
+        "phone":          phone,
+        "career_summary": career_summary,
+        "experience":     experience_structs,
+        "skills":         skills,
+        "education":      education_structs,
+        "achievements":   achievements,
+    }
+
+    plain_text = "\n".join(lines).strip()
+    return plain_text, struct_json
+
+
+def make_auto_backup_name(source_name: str) -> str:
+    """Generate a timestamped auto-backup name.
+
+    Example: "Auto-backup before Senior Engineer Resume — 2026-04-16"
+    """
+    today = date.today().isoformat()
+    return f"Auto-backup before {source_name} \u2014 {today}"
+
+
+def blank_fields_on_import(struct_json: dict[str, Any]) -> list[str]:
+    """Return content field names that will be blank after a library→profile import.
+
+    Used to warn the user in the confirmation modal so they know what to fill in.
+    """
+    blank: list[str] = []
+    if struct_json.get("experience"):
+        # industry is always blank — parser never captures it
+        blank.append("experience[].industry")
+        # location may be blank for some entries
+        if any(not (e.get("location") or "").strip() for e in struct_json["experience"]):
+            blank.append("experience[].location")
+    return blank
--- a/tests/test_resume_sync.py
+++ b/tests/test_resume_sync.py
@ -0,0 +1,181 @@
+"""Unit tests for scripts.resume_sync — format transform between library and profile."""
+import json
+import pytest
+from scripts.resume_sync import (
+    library_to_profile_content,
+    profile_to_library,
+    make_auto_backup_name,
+    blank_fields_on_import,
+)
+
+# ── Fixtures ──────────────────────────────────────────────────────────────────
+
+STRUCT_JSON = {
+    "name": "Alex Rivera",
+    "email": "alex@example.com",
+    "phone": "555-0100",
+    "career_summary": "Senior UX Designer with 6 years experience.",
+    "experience": [
+        {
+            "title": "Senior UX Designer",
+            "company": "StreamNote",
+            "start_date": "2023",
+            "end_date": "present",
+            "location": "Remote",
+            "bullets": ["Led queue redesign", "Built component library"],
+        }
+    ],
+    "education": [
+        {
+            "institution": "State University",
+            "degree": "B.F.A.",
+            "field": "Graphic Design",
+            "start_date": "2015",
+            "end_date": "2019",
+        }
+    ],
+    "skills": ["Figma", "User Research"],
+    "achievements": ["Design award 2024"],
+}
+
+PROFILE_PAYLOAD = {
+    "name": "Alex",
+    "surname": "Rivera",
+    "email": "alex@example.com",
+    "phone": "555-0100",
+    "career_summary": "Senior UX Designer with 6 years experience.",
+    "experience": [
+        {
+            "title": "Senior UX Designer",
+            "company": "StreamNote",
+            "period": "2023 – present",
+            "location": "Remote",
+            "industry": "",
+            "responsibilities": "Led queue redesign\nBuilt component library",
+            "skills": [],
+        }
+    ],
+    "education": [
+        {
+            "institution": "State University",
+            "degree": "B.F.A.",
+            "field": "Graphic Design",
+            "start_date": "2015",
+            "end_date": "2019",
+        }
+    ],
+    "skills": ["Figma", "User Research"],
+    "achievements": ["Design award 2024"],
+}
+
+
+# ── library_to_profile_content ────────────────────────────────────────────────
+
+def test_library_to_profile_splits_name():
+    result = library_to_profile_content(STRUCT_JSON)
+    assert result["name"] == "Alex"
+    assert result["surname"] == "Rivera"
+
+def test_library_to_profile_single_word_name():
+    result = library_to_profile_content({**STRUCT_JSON, "name": "Cher"})
+    assert result["name"] == "Cher"
+    assert result["surname"] == ""
+
+def test_library_to_profile_email_phone():
+    result = library_to_profile_content(STRUCT_JSON)
+    assert result["email"] == "alex@example.com"
+    assert result["phone"] == "555-0100"
+
+def test_library_to_profile_career_summary():
+    result = library_to_profile_content(STRUCT_JSON)
+    assert result["career_summary"] == "Senior UX Designer with 6 years experience."
+
+def test_library_to_profile_experience_period():
+    result = library_to_profile_content(STRUCT_JSON)
+    assert result["experience"][0]["period"] == "2023 – present"
+
+def test_library_to_profile_experience_bullets_joined():
+    result = library_to_profile_content(STRUCT_JSON)
+    assert result["experience"][0]["responsibilities"] == "Led queue redesign\nBuilt component library"
+
+def test_library_to_profile_experience_industry_blank():
+    result = library_to_profile_content(STRUCT_JSON)
+    assert result["experience"][0]["industry"] == ""
+
+def test_library_to_profile_education():
+    result = library_to_profile_content(STRUCT_JSON)
+    assert result["education"][0]["institution"] == "State University"
+    assert result["education"][0]["degree"] == "B.F.A."
+
+def test_library_to_profile_skills():
+    result = library_to_profile_content(STRUCT_JSON)
+    assert result["skills"] == ["Figma", "User Research"]
+
+def test_library_to_profile_achievements():
+    result = library_to_profile_content(STRUCT_JSON)
+    assert result["achievements"] == ["Design award 2024"]
+
+def test_library_to_profile_missing_fields_no_keyerror():
+    result = library_to_profile_content({})
+    assert result["name"] == ""
+    assert result["experience"] == []
+    assert result["education"] == []
+    assert result["skills"] == []
+    assert result["achievements"] == []
+
+
+# ── profile_to_library ────────────────────────────────────────────────────────
+
+def test_profile_to_library_full_name():
+    text, struct = profile_to_library(PROFILE_PAYLOAD)
+    assert struct["name"] == "Alex Rivera"
+
+def test_profile_to_library_experience_bullets_reconstructed():
+    _, struct = profile_to_library(PROFILE_PAYLOAD)
+    assert struct["experience"][0]["bullets"] == ["Led queue redesign", "Built component library"]
+
+def test_profile_to_library_period_split():
+    _, struct = profile_to_library(PROFILE_PAYLOAD)
+    assert struct["experience"][0]["start_date"] == "2023"
+    assert struct["experience"][0]["end_date"] == "present"
+
+def test_profile_to_library_education_round_trip():
+    _, struct = profile_to_library(PROFILE_PAYLOAD)
+    assert struct["education"][0]["institution"] == "State University"
+
+def test_profile_to_library_plain_text_contains_name():
+    text, _ = profile_to_library(PROFILE_PAYLOAD)
+    assert "Alex Rivera" in text
+
+def test_profile_to_library_plain_text_contains_summary():
+    text, _ = profile_to_library(PROFILE_PAYLOAD)
+    assert "Senior UX Designer" in text
+
+def test_profile_to_library_empty_payload_no_crash():
+    text, struct = profile_to_library({})
+    assert isinstance(text, str)
+    assert isinstance(struct, dict)
+
+
+# ── make_auto_backup_name ─────────────────────────────────────────────────────
+
+def test_backup_name_format():
+    name = make_auto_backup_name("Senior Engineer Resume")
+    import re
+    assert re.match(r"Auto-backup before Senior Engineer Resume — \d{4}-\d{2}-\d{2}", name)
+
+
+# ── blank_fields_on_import ────────────────────────────────────────────────────
+
+def test_blank_fields_industry_always_listed():
+    result = blank_fields_on_import(STRUCT_JSON)
+    assert "experience[].industry" in result
+
+def test_blank_fields_location_listed_when_missing():
+    no_loc = {**STRUCT_JSON, "experience": [{**STRUCT_JSON["experience"][0], "location": ""}]}
+    result = blank_fields_on_import(no_loc)
+    assert "experience[].location" in result
+
+def test_blank_fields_location_not_listed_when_present():
+    result = blank_fields_on_import(STRUCT_JSON)
+    assert "experience[].location" not in result