fix: defensive coercion for LLM confidence and cluster fields in hypothesizer

- Add _coerce_float() module-level helper: catches TypeError/ValueError from non-numeric LLM output (e.g. 'high', 'N/A') and returns a caller-supplied default instead of raising. - Replace float(item.get('confidence', 0.5)) with _coerce_float(item.get('confidence'), 0.5) in _parse_response. - Guard supporting_cluster_ids: tuple(item.get(...) or []) so a JSON null from the LLM does not cause TypeError('NoneType is not iterable'). - runbook_refs is hardcoded as () and not sourced from LLM output; no change needed there. - Add test_non_numeric_confidence_uses_default (Test 10) to cover the 'high' string case: asserts no exception and confidence == 0.5. - 341 tests passing (+1). Closes: #29
2026-05-25 14:00:30 -07:00 · 2026-05-25 14:00:30 -07:00 · e8c66972fa
commit e8c66972fa
parent eefd65f903
2 changed files with 45 additions and 2 deletions
--- a/app/services/diagnose/hypothesizer.py
+++ b/app/services/diagnose/hypothesizer.py
@ -30,6 +30,14 @@ _SYSTEM_PROMPT = (
 )
 def _coerce_float(val: object, default: float) -> float:
    """Safely coerce LLM output to float, returning default on failure."""
    try:
        return float(val)  # type: ignore[arg-type]
    except (TypeError, ValueError):
        return default
 def _validate_severity(s: str) -> SeverityLabel:
    """Map a raw severity string to a valid SeverityLabel, defaulting to ERROR."""
    upper = s.upper()
@ -198,8 +206,8 @@ class RootCauseHypothesizer:
                hypothesis_id=str(uuid4()),
                title=str(item.get("title", "Unknown"))[:80],
                description=str(item.get("description", "")),
-                confidence=float(item.get("confidence", 0.5)),
+                confidence=_coerce_float(item.get("confidence"), 0.5),
-                supporting_cluster_ids=tuple(item.get("supporting_clusters", [])),
+                supporting_cluster_ids=tuple(item.get("supporting_clusters") or []),
                runbook_refs=(),
                severity=severity,
            )
--- a/tests/test_diagnose_hypothesizer.py
+++ b/tests/test_diagnose_hypothesizer.py
@ -449,3 +449,38 @@ def test_confidence_string_float_coercion():
    assert len(results) == 1
    assert isinstance(results[0].confidence, float)
    assert results[0].confidence == pytest.approx(0.8)
 # ---------------------------------------------------------------------------
 # Test 10: Non-numeric confidence string falls back to default 0.5
 # ---------------------------------------------------------------------------
 def test_non_numeric_confidence_uses_default():
    """LLM returning 'high' for confidence should not raise and defaults to 0.5."""
    cluster = _make_cluster()
    classified = _make_classified(clusters=(cluster,))
    ctx = _make_ctx()
    hypothesizer = RootCauseHypothesizer()
    items = [
        {
            "title": "t",
            "description": "d",
            "confidence": "high",
            "severity": "ERROR",
            "supporting_clusters": [],
        }
    ]
    mock_resp = _llm_json_response(items)
    with patch("httpx.post", return_value=mock_resp):
        results = hypothesizer.hypothesize(
            classified, ctx, query="test",
            llm_url="http://localhost:11434",
            llm_model="llama3",
        )
    assert len(results) == 1
    assert isinstance(results[0].confidence, float)
    assert results[0].confidence == pytest.approx(0.5)