From 312631a5d9a9c0b244315a1fc47ed142138735d1 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Tue, 5 May 2026 14:11:52 -0700 Subject: [PATCH] fix(resume-optimizer): strip double bullets and markdown formatting in rewrites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three root causes fixed: - _section_text_for_prompt: strip existing bullet chars from bullet text before adding the prompt's own marker (prevents • • text entering the LLM prompt) - _reparse_experience_bullets: use + quantifier to strip all leading bullet chars, not just the first (handles • • text from LLM output) - _apply_section_rewrite (summary): run _clean_summary_markup to remove markdown * bullets from career_summary before storing in struct Also adds 'no markdown formatting' to the LLM rewrite prompt CRITICAL RULES. --- scripts/resume_optimizer.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/scripts/resume_optimizer.py b/scripts/resume_optimizer.py index b71268a..7b13a20 100644 --- a/scripts/resume_optimizer.py +++ b/scripts/resume_optimizer.py @@ -278,7 +278,8 @@ def rewrite_for_ats( f"3. Only rephrase existing content — replace vague verbs/nouns with the " f" ATS-preferred equivalents listed above.\n" f"4. Keep the same number of bullet points in experience entries.\n" - f"5. Return ONLY the rewritten section content, no labels or explanation." + f"5. Do NOT use markdown formatting — no **, __, or * for bullets.\n" + f"6. Return ONLY the rewritten section content, no labels or explanation." f"{voice_note}\n\n" f"Original {section} section:\n{original_content}" ) @@ -305,7 +306,8 @@ def _section_text_for_prompt(resume: dict[str, Any], section: str) -> str: for exp in resume.get("experience", []): lines.append(f"{exp['title']} at {exp['company']} ({exp['start_date']}–{exp['end_date']})") for b in exp.get("bullets", []): - lines.append(f" • {b}") + clean_b = re.sub(r"^[•\-–—*◦▪▸►\s]+", "", b).strip() + lines.append(f" • {clean_b}") return "\n".join(lines) if lines else "(empty)" return "(unsupported section)" @@ -314,7 +316,7 @@ def _apply_section_rewrite(resume: dict[str, Any], section: str, rewritten: str) """Return a new resume dict with the given section replaced by rewritten text.""" updated = dict(resume) if section == "summary": - updated["career_summary"] = rewritten + updated["career_summary"] = _clean_summary_markup(rewritten) elif section == "skills": # LLM returns comma-separated or newline-separated skills skills = [s.strip() for s in re.split(r"[,\n•·]+", rewritten) if s.strip()] @@ -326,6 +328,19 @@ def _apply_section_rewrite(resume: dict[str, Any], section: str, rewritten: str) return updated +def _clean_summary_markup(text: str) -> str: + """Strip markdown/plain-text bullet markers from career summary lines. + + LLMs sometimes format summary content with '* item' or '• item' markdown. + This converts those lines to unmarked text so the summary renders cleanly. + """ + lines = [] + for line in text.splitlines(): + cleaned = re.sub(r"^[•*\-–—◦▪▸►]\s+", "", line.lstrip()) + lines.append(cleaned) + return "\n".join(lines).strip() + + def _reparse_experience_bullets( original_entries: list[dict], rewritten_text: str, @@ -355,9 +370,9 @@ def _reparse_experience_bullets( chunk = remaining bullets = [ - re.sub(r"^[•\-–—*◦▪▸►]\s*", "", line).strip() + re.sub(r"^([•\-–—*◦▪▸►]\s*)+", "", line.strip()).strip() for line in chunk.splitlines() - if re.match(r"^[•\-–—*◦▪▸►]\s*", line.strip()) + if re.match(r"^\s*[•\-–—*◦▪▸►]", line) ] new_entry = dict(entry) new_entry["bullets"] = bullets if bullets else entry["bullets"]