From 65d0584f4a15eccc6ce028fedf1cf6107106a7b2 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Mon, 25 May 2026 21:01:14 -0700 Subject: [PATCH] fix(hypothesizer): extract first JSON array to handle reasoning model double-output Reasoning models (e.g. foundation-sec-8b) emit valid JSON then repeat it inside a markdown fence block. json.loads() fails on the combined text. extract_first_json_array() scans for the first '[' and walks to its matching ']' with proper string/escape/nesting handling, then returns just that slice. Combined with strip_json_fences(), this handles all observed output patterns: - bare JSON array (standard models) - fenced JSON array (fence-wrapping models) - bare array followed by fenced repeat (reasoning models) --- app/services/diagnose/_llm_client.py | 41 +++++++++++++++++++++++++++ app/services/diagnose/hypothesizer.py | 6 ++-- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/app/services/diagnose/_llm_client.py b/app/services/diagnose/_llm_client.py index a9e1481..7a93196 100644 --- a/app/services/diagnose/_llm_client.py +++ b/app/services/diagnose/_llm_client.py @@ -43,6 +43,47 @@ def strip_json_fences(raw: str) -> str: return _JSON_FENCE_RE.sub("", raw).strip() +def extract_first_json_array(raw: str) -> str: + """Extract the first complete JSON array from a string. + + Reasoning models (e.g. foundation-sec-8b) sometimes emit valid JSON and + then repeat it inside a markdown fence. Standard json.loads() fails on the + combined text. This function scans for the first '[' and walks to its + matching ']', handling nested structures. + + Returns the extracted substring, or the original string if no array found + (so the caller's json.loads() fails with the usual error message). + """ + start = raw.find("[") + if start == -1: + return raw + + depth = 0 + in_string = False + escape_next = False + + for i, ch in enumerate(raw[start:], start=start): + if escape_next: + escape_next = False + continue + if ch == "\\" and in_string: + escape_next = True + continue + if ch == '"': + in_string = not in_string + continue + if in_string: + continue + if ch == "[": + depth += 1 + elif ch == "]": + depth -= 1 + if depth == 0: + return raw[start : i + 1] + + return raw # unbalanced — return as-is so caller sees the error + + def call_llm( llm_url: str, llm_model: str, diff --git a/app/services/diagnose/hypothesizer.py b/app/services/diagnose/hypothesizer.py index 827332f..433d9fb 100644 --- a/app/services/diagnose/hypothesizer.py +++ b/app/services/diagnose/hypothesizer.py @@ -6,7 +6,7 @@ import logging from uuid import uuid4 from app.context.retriever import RetrievedContext -from app.services.diagnose._llm_client import call_llm, strip_json_fences +from app.services.diagnose._llm_client import call_llm, extract_first_json_array, strip_json_fences from app.services.diagnose.models import ( ClassifiedTimeline, EventCluster, @@ -129,7 +129,9 @@ class RootCauseHypothesizer: triple-backtick fences despite being instructed not to. """ try: - data = json.loads(strip_json_fences(raw)) + # extract_first_json_array handles reasoning models that emit valid + # JSON then repeat it inside a markdown fence block. + data = json.loads(extract_first_json_array(strip_json_fences(raw))) except json.JSONDecodeError: logger.warning( "Hypothesizer: invalid JSON from LLM (truncated): %.120s", raw