Fixes a bug where ISO-formatted dates (e.g. '2023-01 – 2025-03') in the period field were split incorrectly. The old code replaced the en-dash with a hyphen first, then split on the first hyphen, causing dates like '2023-01' to be split into '2023' and '01' instead of the expected start/end pair. The fix splits on the dash/dash separator *before* normalizing to plain hyphens, ensuring round-trip conversion of dates with embedded hyphens. Adds two regression tests: - test_profile_to_library_period_split_iso_dates: verifies en-dash separation - test_profile_to_library_period_split_em_dash: verifies em-dash separation
217 lines
8 KiB
Python
217 lines
8 KiB
Python
"""
|
||
Resume format transform — library ↔ profile.
|
||
|
||
Converts between:
|
||
- Library format: struct_json produced by resume_parser.parse_resume()
|
||
{name, email, phone, career_summary, experience[{title,company,start_date,end_date,location,bullets[]}],
|
||
education[{institution,degree,field,start_date,end_date}], skills[], achievements[]}
|
||
- Profile content format: ResumePayload content fields (plain_text_resume.yaml)
|
||
{name, surname, email, phone, career_summary,
|
||
experience[{title,company,period,location,industry,responsibilities,skills[]}],
|
||
education[{institution,degree,field,start_date,end_date}],
|
||
skills[], achievements[]}
|
||
|
||
Profile metadata fields (salary, work prefs, self-ID, PII) are never touched here.
|
||
|
||
License: MIT
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
from datetime import date
|
||
from typing import Any
|
||
|
||
|
||
_CONTENT_FIELDS = frozenset({
|
||
"name", "surname", "email", "phone", "career_summary",
|
||
"experience", "skills", "education", "achievements",
|
||
})
|
||
|
||
|
||
def library_to_profile_content(struct_json: dict[str, Any]) -> dict[str, Any]:
|
||
"""Transform a library struct_json to ResumePayload content fields.
|
||
|
||
Returns only content fields. Caller is responsible for merging with existing
|
||
metadata fields (salary, preferences, self-ID) so they are not overwritten.
|
||
|
||
Lossy for experience[].industry (always blank — parser does not capture it).
|
||
name is split on first space into name/surname.
|
||
"""
|
||
full_name: str = struct_json.get("name") or ""
|
||
parts = full_name.split(" ", 1)
|
||
name = parts[0]
|
||
surname = parts[1] if len(parts) > 1 else ""
|
||
|
||
experience = []
|
||
for exp in struct_json.get("experience") or []:
|
||
start = (exp.get("start_date") or "").strip()
|
||
end = (exp.get("end_date") or "").strip()
|
||
if start and end:
|
||
period = f"{start} \u2013 {end}"
|
||
elif start:
|
||
period = start
|
||
elif end:
|
||
period = end
|
||
else:
|
||
period = ""
|
||
|
||
bullets: list[str] = exp.get("bullets") or []
|
||
responsibilities = "\n".join(b for b in bullets if b)
|
||
|
||
experience.append({
|
||
"title": exp.get("title") or "",
|
||
"company": exp.get("company") or "",
|
||
"period": period,
|
||
"location": exp.get("location") or "",
|
||
"industry": "", # not captured by parser
|
||
"responsibilities": responsibilities,
|
||
"skills": [],
|
||
})
|
||
|
||
education = []
|
||
for edu in struct_json.get("education") or []:
|
||
education.append({
|
||
"institution": edu.get("institution") or "",
|
||
"degree": edu.get("degree") or "",
|
||
"field": edu.get("field") or "",
|
||
"start_date": edu.get("start_date") or "",
|
||
"end_date": edu.get("end_date") or "",
|
||
})
|
||
|
||
return {
|
||
"name": name,
|
||
"surname": surname,
|
||
"email": struct_json.get("email") or "",
|
||
"phone": struct_json.get("phone") or "",
|
||
"career_summary": struct_json.get("career_summary") or "",
|
||
"experience": experience,
|
||
"skills": list(struct_json.get("skills") or []),
|
||
"education": education,
|
||
"achievements": list(struct_json.get("achievements") or []),
|
||
}
|
||
|
||
|
||
def profile_to_library(payload: dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
||
"""Transform ResumePayload content fields to (plain_text, struct_json).
|
||
|
||
Inverse of library_to_profile_content. The plain_text is a best-effort
|
||
reconstruction for display and re-parsing. struct_json is the canonical
|
||
structured representation stored in the resumes table.
|
||
"""
|
||
name_parts = [payload.get("name") or "", payload.get("surname") or ""]
|
||
full_name = " ".join(p for p in name_parts if p).strip()
|
||
|
||
career_summary = (payload.get("career_summary") or "").strip()
|
||
|
||
lines: list[str] = []
|
||
if full_name:
|
||
lines.append(full_name)
|
||
email = payload.get("email") or ""
|
||
phone = payload.get("phone") or ""
|
||
if email:
|
||
lines.append(email)
|
||
if phone:
|
||
lines.append(phone)
|
||
|
||
if career_summary:
|
||
lines += ["", "SUMMARY", career_summary]
|
||
|
||
experience_structs = []
|
||
for exp in payload.get("experience") or []:
|
||
title = (exp.get("title") or "").strip()
|
||
company = (exp.get("company") or "").strip()
|
||
period = (exp.get("period") or "").strip()
|
||
location = (exp.get("location") or "").strip()
|
||
|
||
# Split period back to start_date / end_date.
|
||
# Split on the dash/dash separator BEFORE normalising to plain hyphens
|
||
# so that ISO dates like "2023-01 – 2025-03" round-trip correctly.
|
||
if "\u2013" in period: # en-dash
|
||
date_parts = [p.strip() for p in period.split("\u2013", 1)]
|
||
elif "\u2014" in period: # em-dash
|
||
date_parts = [p.strip() for p in period.split("\u2014", 1)]
|
||
else:
|
||
date_parts = [period.strip()] if period.strip() else []
|
||
start_date = date_parts[0] if date_parts else ""
|
||
end_date = date_parts[1] if len(date_parts) > 1 else ""
|
||
|
||
resp = (exp.get("responsibilities") or "").strip()
|
||
bullets = [b.strip() for b in resp.split("\n") if b.strip()]
|
||
|
||
if title or company:
|
||
header = " | ".join(p for p in [title, company, period] if p)
|
||
lines += ["", header]
|
||
if location:
|
||
lines.append(location)
|
||
for b in bullets:
|
||
lines.append(f"\u2022 {b}")
|
||
|
||
experience_structs.append({
|
||
"title": title,
|
||
"company": company,
|
||
"start_date": start_date,
|
||
"end_date": end_date,
|
||
"location": location,
|
||
"bullets": bullets,
|
||
})
|
||
|
||
skills: list[str] = list(payload.get("skills") or [])
|
||
if skills:
|
||
lines += ["", "SKILLS", ", ".join(skills)]
|
||
|
||
education_structs = []
|
||
for edu in payload.get("education") or []:
|
||
institution = (edu.get("institution") or "").strip()
|
||
degree = (edu.get("degree") or "").strip()
|
||
field = (edu.get("field") or "").strip()
|
||
start_date = (edu.get("start_date") or "").strip()
|
||
end_date = (edu.get("end_date") or "").strip()
|
||
if institution or degree:
|
||
label = " ".join(p for p in [degree, field] if p)
|
||
lines.append(f"{label} \u2014 {institution}" if institution else label)
|
||
education_structs.append({
|
||
"institution": institution,
|
||
"degree": degree,
|
||
"field": field,
|
||
"start_date": start_date,
|
||
"end_date": end_date,
|
||
})
|
||
|
||
achievements: list[str] = list(payload.get("achievements") or [])
|
||
|
||
struct_json: dict[str, Any] = {
|
||
"name": full_name,
|
||
"email": email,
|
||
"phone": phone,
|
||
"career_summary": career_summary,
|
||
"experience": experience_structs,
|
||
"skills": skills,
|
||
"education": education_structs,
|
||
"achievements": achievements,
|
||
}
|
||
|
||
plain_text = "\n".join(lines).strip()
|
||
return plain_text, struct_json
|
||
|
||
|
||
def make_auto_backup_name(source_name: str) -> str:
|
||
"""Generate a timestamped auto-backup name.
|
||
|
||
Example: "Auto-backup before Senior Engineer Resume — 2026-04-16"
|
||
"""
|
||
today = date.today().isoformat()
|
||
return f"Auto-backup before {source_name} \u2014 {today}"
|
||
|
||
|
||
def blank_fields_on_import(struct_json: dict[str, Any]) -> list[str]:
|
||
"""Return content field names that will be blank after a library→profile import.
|
||
|
||
Used to warn the user in the confirmation modal so they know what to fill in.
|
||
"""
|
||
blank: list[str] = []
|
||
if struct_json.get("experience"):
|
||
# industry is always blank — parser never captures it
|
||
blank.append("experience[].industry")
|
||
# location may be blank for some entries
|
||
if any(not (e.get("location") or "").strip() for e in struct_json["experience"]):
|
||
blank.append("experience[].location")
|
||
return blank
|