From 65d0584f4a15eccc6ce028fedf1cf6107106a7b2 Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Mon, 25 May 2026 21:01:14 -0700
Subject: [PATCH] fix(hypothesizer): extract first JSON array to handle
 reasoning model double-output

Reasoning models (e.g. foundation-sec-8b) emit valid JSON then repeat it
inside a markdown fence block. json.loads() fails on the combined text.

extract_first_json_array() scans for the first '[' and walks to its
matching ']' with proper string/escape/nesting handling, then returns
just that slice. Combined with strip_json_fences(), this handles all
observed output patterns:
  - bare JSON array (standard models)
  - fenced JSON array (fence-wrapping models)
  - bare array followed by fenced repeat (reasoning models)
---
 app/services/diagnose/_llm_client.py  | 41 +++++++++++++++++++++++++++
 app/services/diagnose/hypothesizer.py |  6 ++--
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/app/services/diagnose/_llm_client.py b/app/services/diagnose/_llm_client.py
index a9e1481..7a93196 100644
--- a/app/services/diagnose/_llm_client.py
+++ b/app/services/diagnose/_llm_client.py
@@ -43,6 +43,47 @@ def strip_json_fences(raw: str) -> str:
     return _JSON_FENCE_RE.sub("", raw).strip()
 
 
+def extract_first_json_array(raw: str) -> str:
+    """Extract the first complete JSON array from a string.
+
+    Reasoning models (e.g. foundation-sec-8b) sometimes emit valid JSON and
+    then repeat it inside a markdown fence. Standard json.loads() fails on the
+    combined text. This function scans for the first '[' and walks to its
+    matching ']', handling nested structures.
+
+    Returns the extracted substring, or the original string if no array found
+    (so the caller's json.loads() fails with the usual error message).
+    """
+    start = raw.find("[")
+    if start == -1:
+        return raw
+
+    depth = 0
+    in_string = False
+    escape_next = False
+
+    for i, ch in enumerate(raw[start:], start=start):
+        if escape_next:
+            escape_next = False
+            continue
+        if ch == "\\" and in_string:
+            escape_next = True
+            continue
+        if ch == '"':
+            in_string = not in_string
+            continue
+        if in_string:
+            continue
+        if ch == "[":
+            depth += 1
+        elif ch == "]":
+            depth -= 1
+            if depth == 0:
+                return raw[start : i + 1]
+
+    return raw  # unbalanced — return as-is so caller sees the error
+
+
 def call_llm(
     llm_url: str,
     llm_model: str,
diff --git a/app/services/diagnose/hypothesizer.py b/app/services/diagnose/hypothesizer.py
index 827332f..433d9fb 100644
--- a/app/services/diagnose/hypothesizer.py
+++ b/app/services/diagnose/hypothesizer.py
@@ -6,7 +6,7 @@ import logging
 from uuid import uuid4
 
 from app.context.retriever import RetrievedContext
-from app.services.diagnose._llm_client import call_llm, strip_json_fences
+from app.services.diagnose._llm_client import call_llm, extract_first_json_array, strip_json_fences
 from app.services.diagnose.models import (
     ClassifiedTimeline,
     EventCluster,
@@ -129,7 +129,9 @@ class RootCauseHypothesizer:
         triple-backtick fences despite being instructed not to.
         """
         try:
-            data = json.loads(strip_json_fences(raw))
+            # extract_first_json_array handles reasoning models that emit valid
+            # JSON then repeat it inside a markdown fence block.
+            data = json.loads(extract_first_json_array(strip_json_fences(raw)))
         except json.JSONDecodeError:
             logger.warning(
                 "Hypothesizer: invalid JSON from LLM (truncated): %.120s", raw