fix(diagnose): pass full timeline clusters and hypothesis descriptions to synthesizer LLM

Stage 5 (SummarySynthesizer) was only sending aggregate timeline stats to the
LLM (cluster count, burst count, gap count) — the actual sequenced cluster data
that Stage 1 reconstructed was never included. The LLM had no per-cluster
timestamps, severity, burst flags, silence gaps, or representative text to
write the TIMELINE section from.

Added _build_timeline_block() to emit a numbered per-cluster summary matching
the format Stage 3 uses for the hypothesizer, and included it in the user
message alongside the hypothesis block.

Also fixed _build_hypothesis_block() to include the 2-4 sentence description
each hypothesis carries — previously only the title and novelty score reached
the LLM.

11 new tests cover _build_timeline_block() directly (burst label, gap threshold,
pattern tags, text truncation at 200 chars, null start_iso, multi-cluster
numbering). 529 tests passing.
This commit is contained in:
pyr0ball 2026-06-16 21:46:01 -07:00
parent 4c1940d12e
commit 5da8db2bcd
2 changed files with 152 additions and 10 deletions

View file

@ -64,13 +64,43 @@ def _build_hypothesis_block(ranked: list[RankedHypothesis]) -> str:
h = rh.hypothesis
conf_pct = int(h.confidence * 100)
novelty = f"{rh.novelty_score:.2f}"
desc = f"\n {h.description}" if h.description else ""
lines.append(
f"- [{h.severity}, {conf_pct}%] {h.title}\n"
f" Novelty: {novelty}"
f"- [{h.severity}, {conf_pct}% conf, novelty {novelty}] {h.title}{desc}"
)
return "\n".join(lines)
def _build_timeline_block(timeline: TimelineResult) -> str:
"""Build a sequenced cluster block so the synthesizer can narrate what happened.
Mirrors the format used by the hypothesizer, but adds gap information so the
LLM can reason about silence windows between bursts.
"""
if not timeline.clusters:
return "(no clusters)"
lines: list[str] = []
for i, c in enumerate(timeline.clusters):
ts = c.start_iso or "unknown"
sources = ", ".join(list(c.source_ids)[:3])
tags = ", ".join(list(c.pattern_tags)[:4])
burst_label = " [BURST]" if c.burst else ""
gap_label = (
f" (+{int(c.gap_before_seconds)}s silence)"
if c.gap_before_seconds > 30
else ""
)
text_preview = c.representative_text[:200]
line = (
f"Cluster {i + 1}{burst_label}{gap_label} @ {ts} [{c.severity}] "
f"({sources}) — {text_preview}"
)
if tags:
line += f" [patterns: {tags}]"
lines.append(line)
return "\n".join(lines)
def _build_context_block(ctx: RetrievedContext) -> str:
"""Build the runbook context block for the prompt."""
parts: list[str] = []
@ -144,17 +174,18 @@ class SummarySynthesizer:
system_prompt = _SYSTEM_PROMPTS.get(tech_level, _SYSTEM_PROMPTS["sysadmin"])
hypothesis_block = _build_hypothesis_block(ranked)
timeline_block = _build_timeline_block(timeline)
context_block = _build_context_block(ctx)
dominant = ", ".join(timeline.dominant_sources[:5]) or "none"
user_message = (
f"Query: {query}\n\n"
f"Timeline summary:\n"
f"- {len(timeline.clusters)} clusters, "
f"Timeline ({len(timeline.clusters)} clusters, "
f"{timeline.burst_count} bursts, "
f"{timeline.gap_count} silence gaps\n"
f"- Primary sources: {dominant}\n\n"
f"Top hypotheses:\n{hypothesis_block}\n\n"
f"{timeline.gap_count} silence gaps; "
f"primary sources: {dominant}):\n"
f"{timeline_block}\n\n"
f"Root-cause hypotheses:\n{hypothesis_block}\n\n"
f"Context from runbooks:\n{context_block}"
)

View file

@ -7,8 +7,8 @@ from __future__ import annotations
from unittest.mock import MagicMock, patch
from app.context.retriever import RetrievedContext
from app.services.diagnose.models import Hypothesis, RankedHypothesis, TimelineResult
from app.services.diagnose.synthesizer import SummarySynthesizer
from app.services.diagnose.models import EventCluster, Hypothesis, RankedHypothesis, TimelineResult
from app.services.diagnose.synthesizer import SummarySynthesizer, _build_timeline_block
# ---------------------------------------------------------------------------
@ -50,12 +50,38 @@ def _make_ranked(
)
def _make_cluster(
cluster_id: str = "c1",
start_iso: str | None = "2026-01-01T00:05:00+00:00",
severity: str = "ERROR",
source_ids: tuple[str, ...] = ("syslog",),
pattern_tags: tuple[str, ...] = ("ssh_auth_failure",),
burst: bool = False,
gap_before_seconds: float = 0.0,
representative_text: str = "Failed password for root from 1.2.3.4 port 22",
) -> EventCluster:
return EventCluster(
cluster_id=cluster_id,
entries=("e1",),
start_iso=start_iso,
end_iso=None,
duration_seconds=30.0,
source_ids=source_ids,
pattern_tags=pattern_tags,
severity=severity, # type: ignore[arg-type]
burst=burst,
gap_before_seconds=gap_before_seconds,
representative_text=representative_text,
)
def _make_timeline(
total_entries: int = 42,
n_clusters: int = 3,
clusters: tuple[EventCluster, ...] | None = None,
) -> TimelineResult:
return TimelineResult(
clusters=tuple(),
clusters=clusters if clusters is not None else tuple(),
total_entries=total_entries,
window_start="2026-01-01T00:00:00+00:00",
window_end="2026-01-01T01:00:00+00:00",
@ -283,3 +309,88 @@ class TestSynthesizerEmptyRanked:
assert isinstance(result, str)
assert len(result) > 0
class TestBuildTimelineBlock:
"""Unit tests for _build_timeline_block helper."""
def test_empty_clusters_returns_placeholder(self):
timeline = _make_timeline(clusters=tuple())
assert _build_timeline_block(timeline) == "(no clusters)"
def test_single_cluster_basic_fields(self):
cluster = _make_cluster(
start_iso="2026-01-01T00:05:00+00:00",
severity="ERROR",
source_ids=("syslog",),
representative_text="Failed password for root",
)
timeline = _make_timeline(clusters=(cluster,))
block = _build_timeline_block(timeline)
assert "Cluster 1" in block
assert "2026-01-01T00:05:00+00:00" in block
assert "[ERROR]" in block
assert "syslog" in block
assert "Failed password for root" in block
def test_burst_label_applied(self):
cluster = _make_cluster(burst=True)
timeline = _make_timeline(clusters=(cluster,))
block = _build_timeline_block(timeline)
assert "[BURST]" in block
def test_no_burst_label_when_not_burst(self):
cluster = _make_cluster(burst=False)
timeline = _make_timeline(clusters=(cluster,))
block = _build_timeline_block(timeline)
assert "[BURST]" not in block
def test_gap_label_applied_when_over_threshold(self):
cluster = _make_cluster(gap_before_seconds=120.0)
timeline = _make_timeline(clusters=(cluster,))
block = _build_timeline_block(timeline)
assert "silence" in block
assert "120s" in block
def test_gap_label_omitted_when_under_threshold(self):
cluster = _make_cluster(gap_before_seconds=10.0)
timeline = _make_timeline(clusters=(cluster,))
block = _build_timeline_block(timeline)
assert "silence" not in block
def test_pattern_tags_included(self):
cluster = _make_cluster(pattern_tags=("ssh_auth_failure", "brute_force"))
timeline = _make_timeline(clusters=(cluster,))
block = _build_timeline_block(timeline)
assert "ssh_auth_failure" in block
assert "brute_force" in block
def test_no_patterns_section_when_empty(self):
cluster = _make_cluster(pattern_tags=tuple())
timeline = _make_timeline(clusters=(cluster,))
block = _build_timeline_block(timeline)
assert "[patterns:" not in block
def test_multiple_clusters_numbered(self):
c1 = _make_cluster(cluster_id="c1", representative_text="first event")
c2 = _make_cluster(cluster_id="c2", representative_text="second event")
timeline = _make_timeline(clusters=(c1, c2))
block = _build_timeline_block(timeline)
assert "Cluster 1" in block
assert "Cluster 2" in block
assert "first event" in block
assert "second event" in block
def test_representative_text_truncated_at_200_chars(self):
long_text = "x" * 300
cluster = _make_cluster(representative_text=long_text)
timeline = _make_timeline(clusters=(cluster,))
block = _build_timeline_block(timeline)
assert "x" * 200 in block
assert "x" * 201 not in block
def test_null_start_iso_renders_as_unknown(self):
cluster = _make_cluster(start_iso=None)
timeline = _make_timeline(clusters=(cluster,))
block = _build_timeline_block(timeline)
assert "unknown" in block