From 4e11cf3cfa03fc061b4c83f443c18ec92fff47c5 Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Thu, 16 Apr 2026 11:11:50 -0700
Subject: [PATCH] fix: sanitize invalid JSON escape sequences from LLM output
 in resume optimizer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

LLMs occasionally emit backslash sequences that are valid regex but not valid
JSON (e.g. \s, \d, \p). This caused extract_jd_signals() to fall through to
the exception handler, leaving llm_signals empty. With no LLM signals, the
optimizer fell back to TF-IDF only — which is more conservative and can
legitimately return zero gaps, making the UI appear to say the resume is fine.

Fix: strip bare backslashes not followed by a recognised JSON escape character
("  \  /  b  f  n  r  t  u) before parsing. Preserves \n, \", etc.

Reproduces: cover letter generation concurrent with gap analysis raises the
probability of a slightly malformed LLM response due to model load.
---
 scripts/resume_optimizer.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/scripts/resume_optimizer.py b/scripts/resume_optimizer.py
index 5fffd3e..3681ff8 100644
--- a/scripts/resume_optimizer.py
+++ b/scripts/resume_optimizer.py
@@ -70,7 +70,12 @@ def extract_jd_signals(description: str, resume_text: str = "") -> list[str]:
         # Extract JSON array from response (LLM may wrap it in markdown)
         match = re.search(r"\[.*\]", raw, re.DOTALL)
         if match:
-            llm_signals = json.loads(match.group(0))
+            json_str = match.group(0)
+            # LLMs occasionally emit invalid JSON escape sequences (e.g. \s, \d, \p)
+            # that are valid regex but not valid JSON. Replace bare backslashes that
+            # aren't followed by a recognised JSON escape character.
+            json_str = re.sub(r'\\([^"\\/bfnrtu])', r'\1', json_str)
+            llm_signals = json.loads(json_str)
             llm_signals = [s.strip() for s in llm_signals if isinstance(s, str) and s.strip()]
     except Exception:
         log.warning("[resume_optimizer] LLM signal extraction failed", exc_info=True)