Stage 5 (SummarySynthesizer) was only sending aggregate timeline stats to the LLM (cluster count, burst count, gap count) — the actual sequenced cluster data that Stage 1 reconstructed was never included. The LLM had no per-cluster timestamps, severity, burst flags, silence gaps, or representative text to write the TIMELINE section from. Added _build_timeline_block() to emit a numbered per-cluster summary matching the format Stage 3 uses for the hypothesizer, and included it in the user message alongside the hypothesis block. Also fixed _build_hypothesis_block() to include the 2-4 sentence description each hypothesis carries — previously only the title and novelty score reached the LLM. 11 new tests cover _build_timeline_block() directly (burst label, gap threshold, pattern tags, text truncation at 200 chars, null start_iso, multi-cluster numbering). 529 tests passing.
396 lines
14 KiB
Python
396 lines
14 KiB
Python
"""Tests for app/services/diagnose/synthesizer.py — SummarySynthesizer.
|
|
|
|
All tests use mocking; no real LLM calls are made.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
from app.context.retriever import RetrievedContext
|
|
from app.services.diagnose.models import EventCluster, Hypothesis, RankedHypothesis, TimelineResult
|
|
from app.services.diagnose.synthesizer import SummarySynthesizer, _build_timeline_block
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fixture helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _make_hypothesis(
|
|
hypothesis_id: str = "h1",
|
|
title: str = "SSH flood from external IPs",
|
|
description: str = "Repeated failed login attempts from multiple IPs.",
|
|
confidence: float = 0.87,
|
|
severity: str = "CRITICAL",
|
|
) -> Hypothesis:
|
|
return Hypothesis(
|
|
hypothesis_id=hypothesis_id,
|
|
title=title,
|
|
description=description,
|
|
confidence=confidence,
|
|
supporting_cluster_ids=("c1",),
|
|
runbook_refs=(),
|
|
severity=severity, # type: ignore[arg-type]
|
|
)
|
|
|
|
|
|
def _make_ranked(
|
|
hypothesis: Hypothesis | None = None,
|
|
novelty_score: float = 0.95,
|
|
similarity_to_known: float = 0.05,
|
|
suppress: bool = False,
|
|
suppression_reason: str | None = None,
|
|
) -> RankedHypothesis:
|
|
h = hypothesis or _make_hypothesis()
|
|
return RankedHypothesis(
|
|
hypothesis=h,
|
|
novelty_score=novelty_score,
|
|
similarity_to_known=similarity_to_known,
|
|
suppress=suppress,
|
|
suppression_reason=suppression_reason,
|
|
)
|
|
|
|
|
|
def _make_cluster(
|
|
cluster_id: str = "c1",
|
|
start_iso: str | None = "2026-01-01T00:05:00+00:00",
|
|
severity: str = "ERROR",
|
|
source_ids: tuple[str, ...] = ("syslog",),
|
|
pattern_tags: tuple[str, ...] = ("ssh_auth_failure",),
|
|
burst: bool = False,
|
|
gap_before_seconds: float = 0.0,
|
|
representative_text: str = "Failed password for root from 1.2.3.4 port 22",
|
|
) -> EventCluster:
|
|
return EventCluster(
|
|
cluster_id=cluster_id,
|
|
entries=("e1",),
|
|
start_iso=start_iso,
|
|
end_iso=None,
|
|
duration_seconds=30.0,
|
|
source_ids=source_ids,
|
|
pattern_tags=pattern_tags,
|
|
severity=severity, # type: ignore[arg-type]
|
|
burst=burst,
|
|
gap_before_seconds=gap_before_seconds,
|
|
representative_text=representative_text,
|
|
)
|
|
|
|
|
|
def _make_timeline(
|
|
total_entries: int = 42,
|
|
n_clusters: int = 3,
|
|
clusters: tuple[EventCluster, ...] | None = None,
|
|
) -> TimelineResult:
|
|
return TimelineResult(
|
|
clusters=clusters if clusters is not None else tuple(),
|
|
total_entries=total_entries,
|
|
window_start="2026-01-01T00:00:00+00:00",
|
|
window_end="2026-01-01T01:00:00+00:00",
|
|
gap_count=1,
|
|
burst_count=2,
|
|
dominant_sources=("syslog", "auth"),
|
|
)
|
|
|
|
|
|
def _make_ctx(chunks: list[dict] | None = None) -> RetrievedContext:
|
|
return RetrievedContext(
|
|
facts=[{"category": "network", "key": "host", "value": "heimdall", "source": "facts"}],
|
|
chunks=chunks or [{"filename": "runbook.md", "text": "Restart sshd if flooded"}],
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Test cases
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestSynthesizerWithHypotheses:
|
|
"""With hypotheses, result must contain VERDICT."""
|
|
|
|
def test_returns_verdict_string_with_llm(self):
|
|
synthesizer = SummarySynthesizer()
|
|
ranked = [_make_ranked()]
|
|
timeline = _make_timeline()
|
|
ctx = _make_ctx()
|
|
|
|
mock_resp = MagicMock()
|
|
mock_resp.status_code = 200
|
|
mock_resp.json.return_value = {
|
|
"choices": [{"message": {"content": "VERDICT: CRITICAL — SSH flood (87% confidence)\nTIMELINE: lots of hits."}}]
|
|
}
|
|
|
|
with patch("httpx.post", return_value=mock_resp):
|
|
result = synthesizer.synthesize(
|
|
ranked=ranked,
|
|
timeline=timeline,
|
|
ctx=ctx,
|
|
query="ssh brute force",
|
|
llm_url="http://localhost:11434",
|
|
llm_model="llama3",
|
|
)
|
|
|
|
assert "VERDICT" in result
|
|
|
|
def test_returns_nonempty_string(self):
|
|
synthesizer = SummarySynthesizer()
|
|
ranked = [_make_ranked()]
|
|
timeline = _make_timeline()
|
|
ctx = _make_ctx()
|
|
|
|
mock_resp = MagicMock()
|
|
mock_resp.status_code = 200
|
|
mock_resp.json.return_value = {
|
|
"choices": [{"message": {"content": "VERDICT: CRITICAL — SSH flood (87% confidence)"}}]
|
|
}
|
|
|
|
with patch("httpx.post", return_value=mock_resp):
|
|
result = synthesizer.synthesize(
|
|
ranked=ranked,
|
|
timeline=timeline,
|
|
ctx=ctx,
|
|
query="why is auth failing",
|
|
llm_url="http://localhost:11434",
|
|
llm_model="llama3",
|
|
)
|
|
|
|
assert isinstance(result, str)
|
|
assert len(result) > 0
|
|
|
|
|
|
class TestSynthesizerSuppressedHypotheses:
|
|
"""Suppressed hypotheses must be excluded from the LLM prompt."""
|
|
|
|
def test_suppressed_hypotheses_excluded_from_prompt(self):
|
|
suppressed = _make_ranked(
|
|
hypothesis=_make_hypothesis(
|
|
hypothesis_id="h2",
|
|
title="Wazuh alert processing backlog",
|
|
severity="ERROR",
|
|
confidence=0.72,
|
|
),
|
|
suppress=True,
|
|
suppression_reason="similar to 2025-04 SSH incident",
|
|
novelty_score=0.1,
|
|
)
|
|
active = _make_ranked(
|
|
hypothesis=_make_hypothesis(
|
|
hypothesis_id="h1",
|
|
title="SSH flood from external IPs",
|
|
severity="CRITICAL",
|
|
confidence=0.87,
|
|
),
|
|
suppress=False,
|
|
novelty_score=0.95,
|
|
)
|
|
|
|
captured_messages: list = []
|
|
|
|
def fake_post(url, json=None, headers=None, timeout=None):
|
|
if json and "payload" in json:
|
|
captured_messages.extend(json["payload"].get("messages", []))
|
|
elif json and "messages" in json:
|
|
captured_messages.extend(json.get("messages", []))
|
|
mock_resp = MagicMock()
|
|
mock_resp.status_code = 200
|
|
mock_resp.json.return_value = {
|
|
"choices": [{"message": {"content": "VERDICT: CRITICAL — SSH flood"}}]
|
|
}
|
|
return mock_resp
|
|
|
|
synthesizer = SummarySynthesizer()
|
|
with patch("httpx.post", side_effect=fake_post):
|
|
synthesizer.synthesize(
|
|
ranked=[active, suppressed],
|
|
timeline=_make_timeline(),
|
|
ctx=_make_ctx(),
|
|
query="auth failures",
|
|
llm_url="http://localhost:11434",
|
|
llm_model="llama3",
|
|
)
|
|
|
|
# The user message should contain the active hypothesis title
|
|
# and NOT contain the suppressed one (or mark it suppressed)
|
|
user_content = next(
|
|
(m["content"] for m in captured_messages if m.get("role") == "user"), ""
|
|
)
|
|
assert "SSH flood from external IPs" in user_content
|
|
# Wazuh should not appear as a standalone top-level hypothesis
|
|
# (suppressed items are excluded from the active list sent to the LLM)
|
|
assert "Wazuh alert processing backlog" not in user_content
|
|
|
|
|
|
class TestSynthesizerNoLLM:
|
|
"""No LLM configured: must return deterministic fallback (not empty)."""
|
|
|
|
def test_no_llm_url_returns_fallback(self):
|
|
synthesizer = SummarySynthesizer()
|
|
ranked = [_make_ranked()]
|
|
timeline = _make_timeline()
|
|
ctx = _make_ctx()
|
|
|
|
result = synthesizer.synthesize(
|
|
ranked=ranked,
|
|
timeline=timeline,
|
|
ctx=ctx,
|
|
query="disk errors",
|
|
)
|
|
|
|
assert isinstance(result, str)
|
|
assert len(result) > 0
|
|
assert "VERDICT" in result
|
|
|
|
def test_no_llm_model_returns_fallback(self):
|
|
synthesizer = SummarySynthesizer()
|
|
ranked = [_make_ranked()]
|
|
|
|
result = synthesizer.synthesize(
|
|
ranked=ranked,
|
|
timeline=_make_timeline(),
|
|
ctx=_make_ctx(),
|
|
query="oom killer",
|
|
llm_url="http://localhost:11434",
|
|
# llm_model omitted
|
|
)
|
|
|
|
assert "VERDICT" in result
|
|
assert "SSH flood from external IPs" in result
|
|
|
|
def test_llm_failure_returns_fallback(self):
|
|
synthesizer = SummarySynthesizer()
|
|
ranked = [_make_ranked()]
|
|
|
|
with patch("httpx.post", side_effect=ConnectionError("refused")):
|
|
result = synthesizer.synthesize(
|
|
ranked=ranked,
|
|
timeline=_make_timeline(),
|
|
ctx=_make_ctx(),
|
|
query="why is disk full",
|
|
llm_url="http://localhost:11434",
|
|
llm_model="llama3",
|
|
)
|
|
|
|
assert "VERDICT" in result
|
|
assert len(result) > 0
|
|
|
|
|
|
class TestSynthesizerEmptyRanked:
|
|
"""Empty ranked list: must return deterministic fallback text, not raise."""
|
|
|
|
def test_empty_ranked_no_llm_returns_fallback(self):
|
|
synthesizer = SummarySynthesizer()
|
|
result = synthesizer.synthesize(
|
|
ranked=[],
|
|
timeline=_make_timeline(),
|
|
ctx=_make_ctx(),
|
|
query="check everything",
|
|
)
|
|
|
|
assert isinstance(result, str)
|
|
assert len(result) > 0
|
|
assert "VERDICT" in result
|
|
|
|
def test_empty_ranked_with_llm_returns_fallback_or_llm_text(self):
|
|
"""Even with empty ranked, we attempt LLM and return something."""
|
|
synthesizer = SummarySynthesizer()
|
|
|
|
mock_resp = MagicMock()
|
|
mock_resp.status_code = 200
|
|
mock_resp.json.return_value = {
|
|
"choices": [{"message": {"content": "VERDICT: UNKNOWN — no hypotheses generated"}}]
|
|
}
|
|
|
|
with patch("httpx.post", return_value=mock_resp):
|
|
result = synthesizer.synthesize(
|
|
ranked=[],
|
|
timeline=_make_timeline(),
|
|
ctx=_make_ctx(),
|
|
query="nothing found",
|
|
llm_url="http://localhost:11434",
|
|
llm_model="llama3",
|
|
)
|
|
|
|
assert isinstance(result, str)
|
|
assert len(result) > 0
|
|
|
|
|
|
class TestBuildTimelineBlock:
|
|
"""Unit tests for _build_timeline_block helper."""
|
|
|
|
def test_empty_clusters_returns_placeholder(self):
|
|
timeline = _make_timeline(clusters=tuple())
|
|
assert _build_timeline_block(timeline) == "(no clusters)"
|
|
|
|
def test_single_cluster_basic_fields(self):
|
|
cluster = _make_cluster(
|
|
start_iso="2026-01-01T00:05:00+00:00",
|
|
severity="ERROR",
|
|
source_ids=("syslog",),
|
|
representative_text="Failed password for root",
|
|
)
|
|
timeline = _make_timeline(clusters=(cluster,))
|
|
block = _build_timeline_block(timeline)
|
|
assert "Cluster 1" in block
|
|
assert "2026-01-01T00:05:00+00:00" in block
|
|
assert "[ERROR]" in block
|
|
assert "syslog" in block
|
|
assert "Failed password for root" in block
|
|
|
|
def test_burst_label_applied(self):
|
|
cluster = _make_cluster(burst=True)
|
|
timeline = _make_timeline(clusters=(cluster,))
|
|
block = _build_timeline_block(timeline)
|
|
assert "[BURST]" in block
|
|
|
|
def test_no_burst_label_when_not_burst(self):
|
|
cluster = _make_cluster(burst=False)
|
|
timeline = _make_timeline(clusters=(cluster,))
|
|
block = _build_timeline_block(timeline)
|
|
assert "[BURST]" not in block
|
|
|
|
def test_gap_label_applied_when_over_threshold(self):
|
|
cluster = _make_cluster(gap_before_seconds=120.0)
|
|
timeline = _make_timeline(clusters=(cluster,))
|
|
block = _build_timeline_block(timeline)
|
|
assert "silence" in block
|
|
assert "120s" in block
|
|
|
|
def test_gap_label_omitted_when_under_threshold(self):
|
|
cluster = _make_cluster(gap_before_seconds=10.0)
|
|
timeline = _make_timeline(clusters=(cluster,))
|
|
block = _build_timeline_block(timeline)
|
|
assert "silence" not in block
|
|
|
|
def test_pattern_tags_included(self):
|
|
cluster = _make_cluster(pattern_tags=("ssh_auth_failure", "brute_force"))
|
|
timeline = _make_timeline(clusters=(cluster,))
|
|
block = _build_timeline_block(timeline)
|
|
assert "ssh_auth_failure" in block
|
|
assert "brute_force" in block
|
|
|
|
def test_no_patterns_section_when_empty(self):
|
|
cluster = _make_cluster(pattern_tags=tuple())
|
|
timeline = _make_timeline(clusters=(cluster,))
|
|
block = _build_timeline_block(timeline)
|
|
assert "[patterns:" not in block
|
|
|
|
def test_multiple_clusters_numbered(self):
|
|
c1 = _make_cluster(cluster_id="c1", representative_text="first event")
|
|
c2 = _make_cluster(cluster_id="c2", representative_text="second event")
|
|
timeline = _make_timeline(clusters=(c1, c2))
|
|
block = _build_timeline_block(timeline)
|
|
assert "Cluster 1" in block
|
|
assert "Cluster 2" in block
|
|
assert "first event" in block
|
|
assert "second event" in block
|
|
|
|
def test_representative_text_truncated_at_200_chars(self):
|
|
long_text = "x" * 300
|
|
cluster = _make_cluster(representative_text=long_text)
|
|
timeline = _make_timeline(clusters=(cluster,))
|
|
block = _build_timeline_block(timeline)
|
|
assert "x" * 200 in block
|
|
assert "x" * 201 not in block
|
|
|
|
def test_null_start_iso_renders_as_unknown(self):
|
|
cluster = _make_cluster(start_iso=None)
|
|
timeline = _make_timeline(clusters=(cluster,))
|
|
block = _build_timeline_block(timeline)
|
|
assert "unknown" in block
|