fix(hypothesizer): extract first JSON array to handle reasoning model double-output

Reasoning models (e.g. foundation-sec-8b) emit valid JSON then repeat it inside a markdown fence block. json.loads() fails on the combined text. extract_first_json_array() scans for the first '[' and walks to its matching ']' with proper string/escape/nesting handling, then returns just that slice. Combined with strip_json_fences(), this handles all observed output patterns: - bare JSON array (standard models) - fenced JSON array (fence-wrapping models) - bare array followed by fenced repeat (reasoning models)
2026-05-25 21:01:14 -07:00 · 2026-05-25 21:01:14 -07:00 · 8849f3aa22
commit 8849f3aa22
parent a89d263476
2 changed files with 45 additions and 2 deletions
--- a/app/services/diagnose/_llm_client.py
+++ b/app/services/diagnose/_llm_client.py
@ -43,6 +43,47 @@ def strip_json_fences(raw: str) -> str:
    return _JSON_FENCE_RE.sub("", raw).strip()
 def extract_first_json_array(raw: str) -> str:
    """Extract the first complete JSON array from a string.
    Reasoning models (e.g. foundation-sec-8b) sometimes emit valid JSON and
    then repeat it inside a markdown fence. Standard json.loads() fails on the
    combined text. This function scans for the first '[' and walks to its
    matching ']', handling nested structures.
    Returns the extracted substring, or the original string if no array found
    (so the caller's json.loads() fails with the usual error message).
    """
    start = raw.find("[")
    if start == -1:
        return raw
    depth = 0
    in_string = False
    escape_next = False
    for i, ch in enumerate(raw[start:], start=start):
        if escape_next:
            escape_next = False
            continue
        if ch == "\\" and in_string:
            escape_next = True
            continue
        if ch == '"':
            in_string = not in_string
            continue
        if in_string:
            continue
        if ch == "[":
            depth += 1
        elif ch == "]":
            depth -= 1
            if depth == 0:
                return raw[start : i + 1]
    return raw  # unbalanced — return as-is so caller sees the error
 def call_llm(
    llm_url: str,
    llm_model: str,
--- a/app/services/diagnose/hypothesizer.py
+++ b/app/services/diagnose/hypothesizer.py
@ -6,7 +6,7 @@ import logging
 from uuid import uuid4
 from app.context.retriever import RetrievedContext
-from app.services.diagnose._llm_client import call_llm, strip_json_fences
+from app.services.diagnose._llm_client import call_llm, extract_first_json_array, strip_json_fences
 from app.services.diagnose.models import (
    ClassifiedTimeline,
    EventCluster,
@ -129,7 +129,9 @@ class RootCauseHypothesizer:
        triple-backtick fences despite being instructed not to.
        """
        try:
-            data = json.loads(strip_json_fences(raw))
+            # extract_first_json_array handles reasoning models that emit valid
            # JSON then repeat it inside a markdown fence block.
            data = json.loads(extract_first_json_array(strip_json_fences(raw)))
        except json.JSONDecodeError:
            logger.warning(
                "Hypothesizer: invalid JSON from LLM (truncated): %.120s", raw