From 4e11cf3cfa03fc061b4c83f443c18ec92fff47c5 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Thu, 16 Apr 2026 11:11:50 -0700 Subject: [PATCH] fix: sanitize invalid JSON escape sequences from LLM output in resume optimizer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LLMs occasionally emit backslash sequences that are valid regex but not valid JSON (e.g. \s, \d, \p). This caused extract_jd_signals() to fall through to the exception handler, leaving llm_signals empty. With no LLM signals, the optimizer fell back to TF-IDF only — which is more conservative and can legitimately return zero gaps, making the UI appear to say the resume is fine. Fix: strip bare backslashes not followed by a recognised JSON escape character (" \ / b f n r t u) before parsing. Preserves \n, \", etc. Reproduces: cover letter generation concurrent with gap analysis raises the probability of a slightly malformed LLM response due to model load. --- scripts/resume_optimizer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/resume_optimizer.py b/scripts/resume_optimizer.py index 5fffd3e..3681ff8 100644 --- a/scripts/resume_optimizer.py +++ b/scripts/resume_optimizer.py @@ -70,7 +70,12 @@ def extract_jd_signals(description: str, resume_text: str = "") -> list[str]: # Extract JSON array from response (LLM may wrap it in markdown) match = re.search(r"\[.*\]", raw, re.DOTALL) if match: - llm_signals = json.loads(match.group(0)) + json_str = match.group(0) + # LLMs occasionally emit invalid JSON escape sequences (e.g. \s, \d, \p) + # that are valid regex but not valid JSON. Replace bare backslashes that + # aren't followed by a recognised JSON escape character. + json_str = re.sub(r'\\([^"\\/bfnrtu])', r'\1', json_str) + llm_signals = json.loads(json_str) llm_signals = [s.strip() for s in llm_signals if isinstance(s, str) and s.strip()] except Exception: log.warning("[resume_optimizer] LLM signal extraction failed", exc_info=True)