fix(hypothesizer): extract first JSON array to handle reasoning model double-output

Reasoning models (e.g. foundation-sec-8b) emit valid JSON then repeat it
inside a markdown fence block. json.loads() fails on the combined text.

extract_first_json_array() scans for the first '[' and walks to its
matching ']' with proper string/escape/nesting handling, then returns
just that slice. Combined with strip_json_fences(), this handles all
observed output patterns:
  - bare JSON array (standard models)
  - fenced JSON array (fence-wrapping models)
  - bare array followed by fenced repeat (reasoning models)
This commit is contained in:
pyr0ball 2026-05-25 21:01:14 -07:00
parent b19bea8f2a
commit e851099e5c
2 changed files with 45 additions and 2 deletions

View file

@ -43,6 +43,47 @@ def strip_json_fences(raw: str) -> str:
return _JSON_FENCE_RE.sub("", raw).strip()
def extract_first_json_array(raw: str) -> str:
"""Extract the first complete JSON array from a string.
Reasoning models (e.g. foundation-sec-8b) sometimes emit valid JSON and
then repeat it inside a markdown fence. Standard json.loads() fails on the
combined text. This function scans for the first '[' and walks to its
matching ']', handling nested structures.
Returns the extracted substring, or the original string if no array found
(so the caller's json.loads() fails with the usual error message).
"""
start = raw.find("[")
if start == -1:
return raw
depth = 0
in_string = False
escape_next = False
for i, ch in enumerate(raw[start:], start=start):
if escape_next:
escape_next = False
continue
if ch == "\\" and in_string:
escape_next = True
continue
if ch == '"':
in_string = not in_string
continue
if in_string:
continue
if ch == "[":
depth += 1
elif ch == "]":
depth -= 1
if depth == 0:
return raw[start : i + 1]
return raw # unbalanced — return as-is so caller sees the error
def call_llm(
llm_url: str,
llm_model: str,

View file

@ -6,7 +6,7 @@ import logging
from uuid import uuid4
from app.context.retriever import RetrievedContext
from app.services.diagnose._llm_client import call_llm, strip_json_fences
from app.services.diagnose._llm_client import call_llm, extract_first_json_array, strip_json_fences
from app.services.diagnose.models import (
ClassifiedTimeline,
EventCluster,
@ -129,7 +129,9 @@ class RootCauseHypothesizer:
triple-backtick fences despite being instructed not to.
"""
try:
data = json.loads(strip_json_fences(raw))
# extract_first_json_array handles reasoning models that emit valid
# JSON then repeat it inside a markdown fence block.
data = json.loads(extract_first_json_array(strip_json_fences(raw)))
except json.JSONDecodeError:
logger.warning(
"Hypothesizer: invalid JSON from LLM (truncated): %.120s", raw