From 5da8db2bcdac2611b2d8aeded17c40a1229abdcf Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Tue, 16 Jun 2026 21:46:01 -0700 Subject: [PATCH] fix(diagnose): pass full timeline clusters and hypothesis descriptions to synthesizer LLM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stage 5 (SummarySynthesizer) was only sending aggregate timeline stats to the LLM (cluster count, burst count, gap count) — the actual sequenced cluster data that Stage 1 reconstructed was never included. The LLM had no per-cluster timestamps, severity, burst flags, silence gaps, or representative text to write the TIMELINE section from. Added _build_timeline_block() to emit a numbered per-cluster summary matching the format Stage 3 uses for the hypothesizer, and included it in the user message alongside the hypothesis block. Also fixed _build_hypothesis_block() to include the 2-4 sentence description each hypothesis carries — previously only the title and novelty score reached the LLM. 11 new tests cover _build_timeline_block() directly (burst label, gap threshold, pattern tags, text truncation at 200 chars, null start_iso, multi-cluster numbering). 529 tests passing. --- app/services/diagnose/synthesizer.py | 45 +++++++++-- tests/test_diagnose_synthesizer.py | 117 ++++++++++++++++++++++++++- 2 files changed, 152 insertions(+), 10 deletions(-) diff --git a/app/services/diagnose/synthesizer.py b/app/services/diagnose/synthesizer.py index 679fb43..523ead0 100644 --- a/app/services/diagnose/synthesizer.py +++ b/app/services/diagnose/synthesizer.py @@ -64,13 +64,43 @@ def _build_hypothesis_block(ranked: list[RankedHypothesis]) -> str: h = rh.hypothesis conf_pct = int(h.confidence * 100) novelty = f"{rh.novelty_score:.2f}" + desc = f"\n {h.description}" if h.description else "" lines.append( - f"- [{h.severity}, {conf_pct}%] {h.title}\n" - f" Novelty: {novelty}" + f"- [{h.severity}, {conf_pct}% conf, novelty {novelty}] {h.title}{desc}" ) return "\n".join(lines) +def _build_timeline_block(timeline: TimelineResult) -> str: + """Build a sequenced cluster block so the synthesizer can narrate what happened. + + Mirrors the format used by the hypothesizer, but adds gap information so the + LLM can reason about silence windows between bursts. + """ + if not timeline.clusters: + return "(no clusters)" + lines: list[str] = [] + for i, c in enumerate(timeline.clusters): + ts = c.start_iso or "unknown" + sources = ", ".join(list(c.source_ids)[:3]) + tags = ", ".join(list(c.pattern_tags)[:4]) + burst_label = " [BURST]" if c.burst else "" + gap_label = ( + f" (+{int(c.gap_before_seconds)}s silence)" + if c.gap_before_seconds > 30 + else "" + ) + text_preview = c.representative_text[:200] + line = ( + f"Cluster {i + 1}{burst_label}{gap_label} @ {ts} [{c.severity}] " + f"({sources}) — {text_preview}" + ) + if tags: + line += f" [patterns: {tags}]" + lines.append(line) + return "\n".join(lines) + + def _build_context_block(ctx: RetrievedContext) -> str: """Build the runbook context block for the prompt.""" parts: list[str] = [] @@ -144,17 +174,18 @@ class SummarySynthesizer: system_prompt = _SYSTEM_PROMPTS.get(tech_level, _SYSTEM_PROMPTS["sysadmin"]) hypothesis_block = _build_hypothesis_block(ranked) + timeline_block = _build_timeline_block(timeline) context_block = _build_context_block(ctx) dominant = ", ".join(timeline.dominant_sources[:5]) or "none" user_message = ( f"Query: {query}\n\n" - f"Timeline summary:\n" - f"- {len(timeline.clusters)} clusters, " + f"Timeline ({len(timeline.clusters)} clusters, " f"{timeline.burst_count} bursts, " - f"{timeline.gap_count} silence gaps\n" - f"- Primary sources: {dominant}\n\n" - f"Top hypotheses:\n{hypothesis_block}\n\n" + f"{timeline.gap_count} silence gaps; " + f"primary sources: {dominant}):\n" + f"{timeline_block}\n\n" + f"Root-cause hypotheses:\n{hypothesis_block}\n\n" f"Context from runbooks:\n{context_block}" ) diff --git a/tests/test_diagnose_synthesizer.py b/tests/test_diagnose_synthesizer.py index 5229c99..8f806dd 100644 --- a/tests/test_diagnose_synthesizer.py +++ b/tests/test_diagnose_synthesizer.py @@ -7,8 +7,8 @@ from __future__ import annotations from unittest.mock import MagicMock, patch from app.context.retriever import RetrievedContext -from app.services.diagnose.models import Hypothesis, RankedHypothesis, TimelineResult -from app.services.diagnose.synthesizer import SummarySynthesizer +from app.services.diagnose.models import EventCluster, Hypothesis, RankedHypothesis, TimelineResult +from app.services.diagnose.synthesizer import SummarySynthesizer, _build_timeline_block # --------------------------------------------------------------------------- @@ -50,12 +50,38 @@ def _make_ranked( ) +def _make_cluster( + cluster_id: str = "c1", + start_iso: str | None = "2026-01-01T00:05:00+00:00", + severity: str = "ERROR", + source_ids: tuple[str, ...] = ("syslog",), + pattern_tags: tuple[str, ...] = ("ssh_auth_failure",), + burst: bool = False, + gap_before_seconds: float = 0.0, + representative_text: str = "Failed password for root from 1.2.3.4 port 22", +) -> EventCluster: + return EventCluster( + cluster_id=cluster_id, + entries=("e1",), + start_iso=start_iso, + end_iso=None, + duration_seconds=30.0, + source_ids=source_ids, + pattern_tags=pattern_tags, + severity=severity, # type: ignore[arg-type] + burst=burst, + gap_before_seconds=gap_before_seconds, + representative_text=representative_text, + ) + + def _make_timeline( total_entries: int = 42, n_clusters: int = 3, + clusters: tuple[EventCluster, ...] | None = None, ) -> TimelineResult: return TimelineResult( - clusters=tuple(), + clusters=clusters if clusters is not None else tuple(), total_entries=total_entries, window_start="2026-01-01T00:00:00+00:00", window_end="2026-01-01T01:00:00+00:00", @@ -283,3 +309,88 @@ class TestSynthesizerEmptyRanked: assert isinstance(result, str) assert len(result) > 0 + + +class TestBuildTimelineBlock: + """Unit tests for _build_timeline_block helper.""" + + def test_empty_clusters_returns_placeholder(self): + timeline = _make_timeline(clusters=tuple()) + assert _build_timeline_block(timeline) == "(no clusters)" + + def test_single_cluster_basic_fields(self): + cluster = _make_cluster( + start_iso="2026-01-01T00:05:00+00:00", + severity="ERROR", + source_ids=("syslog",), + representative_text="Failed password for root", + ) + timeline = _make_timeline(clusters=(cluster,)) + block = _build_timeline_block(timeline) + assert "Cluster 1" in block + assert "2026-01-01T00:05:00+00:00" in block + assert "[ERROR]" in block + assert "syslog" in block + assert "Failed password for root" in block + + def test_burst_label_applied(self): + cluster = _make_cluster(burst=True) + timeline = _make_timeline(clusters=(cluster,)) + block = _build_timeline_block(timeline) + assert "[BURST]" in block + + def test_no_burst_label_when_not_burst(self): + cluster = _make_cluster(burst=False) + timeline = _make_timeline(clusters=(cluster,)) + block = _build_timeline_block(timeline) + assert "[BURST]" not in block + + def test_gap_label_applied_when_over_threshold(self): + cluster = _make_cluster(gap_before_seconds=120.0) + timeline = _make_timeline(clusters=(cluster,)) + block = _build_timeline_block(timeline) + assert "silence" in block + assert "120s" in block + + def test_gap_label_omitted_when_under_threshold(self): + cluster = _make_cluster(gap_before_seconds=10.0) + timeline = _make_timeline(clusters=(cluster,)) + block = _build_timeline_block(timeline) + assert "silence" not in block + + def test_pattern_tags_included(self): + cluster = _make_cluster(pattern_tags=("ssh_auth_failure", "brute_force")) + timeline = _make_timeline(clusters=(cluster,)) + block = _build_timeline_block(timeline) + assert "ssh_auth_failure" in block + assert "brute_force" in block + + def test_no_patterns_section_when_empty(self): + cluster = _make_cluster(pattern_tags=tuple()) + timeline = _make_timeline(clusters=(cluster,)) + block = _build_timeline_block(timeline) + assert "[patterns:" not in block + + def test_multiple_clusters_numbered(self): + c1 = _make_cluster(cluster_id="c1", representative_text="first event") + c2 = _make_cluster(cluster_id="c2", representative_text="second event") + timeline = _make_timeline(clusters=(c1, c2)) + block = _build_timeline_block(timeline) + assert "Cluster 1" in block + assert "Cluster 2" in block + assert "first event" in block + assert "second event" in block + + def test_representative_text_truncated_at_200_chars(self): + long_text = "x" * 300 + cluster = _make_cluster(representative_text=long_text) + timeline = _make_timeline(clusters=(cluster,)) + block = _build_timeline_block(timeline) + assert "x" * 200 in block + assert "x" * 201 not in block + + def test_null_start_iso_renders_as_unknown(self): + cluster = _make_cluster(start_iso=None) + timeline = _make_timeline(clusters=(cluster,)) + block = _build_timeline_block(timeline) + assert "unknown" in block