From a2916f958a8ec257365e647b7b45d20f476e95c9 Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Mon, 25 May 2026 14:00:30 -0700
Subject: [PATCH] fix: defensive coercion for LLM confidence and cluster fields
 in hypothesizer

- Add _coerce_float() module-level helper: catches TypeError/ValueError from
  non-numeric LLM output (e.g. 'high', 'N/A') and returns a caller-supplied
  default instead of raising.
- Replace float(item.get('confidence', 0.5)) with
  _coerce_float(item.get('confidence'), 0.5) in _parse_response.
- Guard supporting_cluster_ids: tuple(item.get(...) or []) so a JSON null
  from the LLM does not cause TypeError('NoneType is not iterable').
- runbook_refs is hardcoded as () and not sourced from LLM output; no change
  needed there.
- Add test_non_numeric_confidence_uses_default (Test 10) to cover the 'high'
  string case: asserts no exception and confidence == 0.5.
- 341 tests passing (+1).

Closes: https://git.opensourcesolarpunk.com/Circuit-Forge/turnstone/issues/29
---
 app/services/diagnose/hypothesizer.py | 12 +++++++--
 tests/test_diagnose_hypothesizer.py   | 35 +++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/app/services/diagnose/hypothesizer.py b/app/services/diagnose/hypothesizer.py
index d7d3261..7c5c3e6 100644
--- a/app/services/diagnose/hypothesizer.py
+++ b/app/services/diagnose/hypothesizer.py
@@ -30,6 +30,14 @@ _SYSTEM_PROMPT = (
 )
 
 
+def _coerce_float(val: object, default: float) -> float:
+    """Safely coerce LLM output to float, returning default on failure."""
+    try:
+        return float(val)  # type: ignore[arg-type]
+    except (TypeError, ValueError):
+        return default
+
+
 def _validate_severity(s: str) -> SeverityLabel:
     """Map a raw severity string to a valid SeverityLabel, defaulting to ERROR."""
     upper = s.upper()
@@ -198,8 +206,8 @@ class RootCauseHypothesizer:
                 hypothesis_id=str(uuid4()),
                 title=str(item.get("title", "Unknown"))[:80],
                 description=str(item.get("description", "")),
-                confidence=float(item.get("confidence", 0.5)),
-                supporting_cluster_ids=tuple(item.get("supporting_clusters", [])),
+                confidence=_coerce_float(item.get("confidence"), 0.5),
+                supporting_cluster_ids=tuple(item.get("supporting_clusters") or []),
                 runbook_refs=(),
                 severity=severity,
             )
diff --git a/tests/test_diagnose_hypothesizer.py b/tests/test_diagnose_hypothesizer.py
index b0368ec..09ffbd9 100644
--- a/tests/test_diagnose_hypothesizer.py
+++ b/tests/test_diagnose_hypothesizer.py
@@ -449,3 +449,38 @@ def test_confidence_string_float_coercion():
     assert len(results) == 1
     assert isinstance(results[0].confidence, float)
     assert results[0].confidence == pytest.approx(0.8)
+
+
+# ---------------------------------------------------------------------------
+# Test 10: Non-numeric confidence string falls back to default 0.5
+# ---------------------------------------------------------------------------
+
+
+def test_non_numeric_confidence_uses_default():
+    """LLM returning 'high' for confidence should not raise and defaults to 0.5."""
+    cluster = _make_cluster()
+    classified = _make_classified(clusters=(cluster,))
+    ctx = _make_ctx()
+    hypothesizer = RootCauseHypothesizer()
+
+    items = [
+        {
+            "title": "t",
+            "description": "d",
+            "confidence": "high",
+            "severity": "ERROR",
+            "supporting_clusters": [],
+        }
+    ]
+    mock_resp = _llm_json_response(items)
+
+    with patch("httpx.post", return_value=mock_resp):
+        results = hypothesizer.hypothesize(
+            classified, ctx, query="test",
+            llm_url="http://localhost:11434",
+            llm_model="llama3",
+        )
+
+    assert len(results) == 1
+    assert isinstance(results[0].confidence, float)
+    assert results[0].confidence == pytest.approx(0.5)