From a2916f958a8ec257365e647b7b45d20f476e95c9 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Mon, 25 May 2026 14:00:30 -0700 Subject: [PATCH] fix: defensive coercion for LLM confidence and cluster fields in hypothesizer - Add _coerce_float() module-level helper: catches TypeError/ValueError from non-numeric LLM output (e.g. 'high', 'N/A') and returns a caller-supplied default instead of raising. - Replace float(item.get('confidence', 0.5)) with _coerce_float(item.get('confidence'), 0.5) in _parse_response. - Guard supporting_cluster_ids: tuple(item.get(...) or []) so a JSON null from the LLM does not cause TypeError('NoneType is not iterable'). - runbook_refs is hardcoded as () and not sourced from LLM output; no change needed there. - Add test_non_numeric_confidence_uses_default (Test 10) to cover the 'high' string case: asserts no exception and confidence == 0.5. - 341 tests passing (+1). Closes: https://git.opensourcesolarpunk.com/Circuit-Forge/turnstone/issues/29 --- app/services/diagnose/hypothesizer.py | 12 +++++++-- tests/test_diagnose_hypothesizer.py | 35 +++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/app/services/diagnose/hypothesizer.py b/app/services/diagnose/hypothesizer.py index d7d3261..7c5c3e6 100644 --- a/app/services/diagnose/hypothesizer.py +++ b/app/services/diagnose/hypothesizer.py @@ -30,6 +30,14 @@ _SYSTEM_PROMPT = ( ) +def _coerce_float(val: object, default: float) -> float: + """Safely coerce LLM output to float, returning default on failure.""" + try: + return float(val) # type: ignore[arg-type] + except (TypeError, ValueError): + return default + + def _validate_severity(s: str) -> SeverityLabel: """Map a raw severity string to a valid SeverityLabel, defaulting to ERROR.""" upper = s.upper() @@ -198,8 +206,8 @@ class RootCauseHypothesizer: hypothesis_id=str(uuid4()), title=str(item.get("title", "Unknown"))[:80], description=str(item.get("description", "")), - confidence=float(item.get("confidence", 0.5)), - supporting_cluster_ids=tuple(item.get("supporting_clusters", [])), + confidence=_coerce_float(item.get("confidence"), 0.5), + supporting_cluster_ids=tuple(item.get("supporting_clusters") or []), runbook_refs=(), severity=severity, ) diff --git a/tests/test_diagnose_hypothesizer.py b/tests/test_diagnose_hypothesizer.py index b0368ec..09ffbd9 100644 --- a/tests/test_diagnose_hypothesizer.py +++ b/tests/test_diagnose_hypothesizer.py @@ -449,3 +449,38 @@ def test_confidence_string_float_coercion(): assert len(results) == 1 assert isinstance(results[0].confidence, float) assert results[0].confidence == pytest.approx(0.8) + + +# --------------------------------------------------------------------------- +# Test 10: Non-numeric confidence string falls back to default 0.5 +# --------------------------------------------------------------------------- + + +def test_non_numeric_confidence_uses_default(): + """LLM returning 'high' for confidence should not raise and defaults to 0.5.""" + cluster = _make_cluster() + classified = _make_classified(clusters=(cluster,)) + ctx = _make_ctx() + hypothesizer = RootCauseHypothesizer() + + items = [ + { + "title": "t", + "description": "d", + "confidence": "high", + "severity": "ERROR", + "supporting_clusters": [], + } + ] + mock_resp = _llm_json_response(items) + + with patch("httpx.post", return_value=mock_resp): + results = hypothesizer.hypothesize( + classified, ctx, query="test", + llm_url="http://localhost:11434", + llm_model="llama3", + ) + + assert len(results) == 1 + assert isinstance(results[0].confidence, float) + assert results[0].confidence == pytest.approx(0.5)