fix(diagnose): pass full timeline clusters and hypothesis descriptions to synthesizer LLM
Stage 5 (SummarySynthesizer) was only sending aggregate timeline stats to the LLM (cluster count, burst count, gap count) — the actual sequenced cluster data that Stage 1 reconstructed was never included. The LLM had no per-cluster timestamps, severity, burst flags, silence gaps, or representative text to write the TIMELINE section from. Added _build_timeline_block() to emit a numbered per-cluster summary matching the format Stage 3 uses for the hypothesizer, and included it in the user message alongside the hypothesis block. Also fixed _build_hypothesis_block() to include the 2-4 sentence description each hypothesis carries — previously only the title and novelty score reached the LLM. 11 new tests cover _build_timeline_block() directly (burst label, gap threshold, pattern tags, text truncation at 200 chars, null start_iso, multi-cluster numbering). 529 tests passing.
This commit is contained in:
parent
4c1940d12e
commit
5da8db2bcd
2 changed files with 152 additions and 10 deletions
|
|
@ -64,13 +64,43 @@ def _build_hypothesis_block(ranked: list[RankedHypothesis]) -> str:
|
|||
h = rh.hypothesis
|
||||
conf_pct = int(h.confidence * 100)
|
||||
novelty = f"{rh.novelty_score:.2f}"
|
||||
desc = f"\n {h.description}" if h.description else ""
|
||||
lines.append(
|
||||
f"- [{h.severity}, {conf_pct}%] {h.title}\n"
|
||||
f" Novelty: {novelty}"
|
||||
f"- [{h.severity}, {conf_pct}% conf, novelty {novelty}] {h.title}{desc}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _build_timeline_block(timeline: TimelineResult) -> str:
|
||||
"""Build a sequenced cluster block so the synthesizer can narrate what happened.
|
||||
|
||||
Mirrors the format used by the hypothesizer, but adds gap information so the
|
||||
LLM can reason about silence windows between bursts.
|
||||
"""
|
||||
if not timeline.clusters:
|
||||
return "(no clusters)"
|
||||
lines: list[str] = []
|
||||
for i, c in enumerate(timeline.clusters):
|
||||
ts = c.start_iso or "unknown"
|
||||
sources = ", ".join(list(c.source_ids)[:3])
|
||||
tags = ", ".join(list(c.pattern_tags)[:4])
|
||||
burst_label = " [BURST]" if c.burst else ""
|
||||
gap_label = (
|
||||
f" (+{int(c.gap_before_seconds)}s silence)"
|
||||
if c.gap_before_seconds > 30
|
||||
else ""
|
||||
)
|
||||
text_preview = c.representative_text[:200]
|
||||
line = (
|
||||
f"Cluster {i + 1}{burst_label}{gap_label} @ {ts} [{c.severity}] "
|
||||
f"({sources}) — {text_preview}"
|
||||
)
|
||||
if tags:
|
||||
line += f" [patterns: {tags}]"
|
||||
lines.append(line)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _build_context_block(ctx: RetrievedContext) -> str:
|
||||
"""Build the runbook context block for the prompt."""
|
||||
parts: list[str] = []
|
||||
|
|
@ -144,17 +174,18 @@ class SummarySynthesizer:
|
|||
|
||||
system_prompt = _SYSTEM_PROMPTS.get(tech_level, _SYSTEM_PROMPTS["sysadmin"])
|
||||
hypothesis_block = _build_hypothesis_block(ranked)
|
||||
timeline_block = _build_timeline_block(timeline)
|
||||
context_block = _build_context_block(ctx)
|
||||
dominant = ", ".join(timeline.dominant_sources[:5]) or "none"
|
||||
|
||||
user_message = (
|
||||
f"Query: {query}\n\n"
|
||||
f"Timeline summary:\n"
|
||||
f"- {len(timeline.clusters)} clusters, "
|
||||
f"Timeline ({len(timeline.clusters)} clusters, "
|
||||
f"{timeline.burst_count} bursts, "
|
||||
f"{timeline.gap_count} silence gaps\n"
|
||||
f"- Primary sources: {dominant}\n\n"
|
||||
f"Top hypotheses:\n{hypothesis_block}\n\n"
|
||||
f"{timeline.gap_count} silence gaps; "
|
||||
f"primary sources: {dominant}):\n"
|
||||
f"{timeline_block}\n\n"
|
||||
f"Root-cause hypotheses:\n{hypothesis_block}\n\n"
|
||||
f"Context from runbooks:\n{context_block}"
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ from __future__ import annotations
|
|||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from app.context.retriever import RetrievedContext
|
||||
from app.services.diagnose.models import Hypothesis, RankedHypothesis, TimelineResult
|
||||
from app.services.diagnose.synthesizer import SummarySynthesizer
|
||||
from app.services.diagnose.models import EventCluster, Hypothesis, RankedHypothesis, TimelineResult
|
||||
from app.services.diagnose.synthesizer import SummarySynthesizer, _build_timeline_block
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -50,12 +50,38 @@ def _make_ranked(
|
|||
)
|
||||
|
||||
|
||||
def _make_cluster(
|
||||
cluster_id: str = "c1",
|
||||
start_iso: str | None = "2026-01-01T00:05:00+00:00",
|
||||
severity: str = "ERROR",
|
||||
source_ids: tuple[str, ...] = ("syslog",),
|
||||
pattern_tags: tuple[str, ...] = ("ssh_auth_failure",),
|
||||
burst: bool = False,
|
||||
gap_before_seconds: float = 0.0,
|
||||
representative_text: str = "Failed password for root from 1.2.3.4 port 22",
|
||||
) -> EventCluster:
|
||||
return EventCluster(
|
||||
cluster_id=cluster_id,
|
||||
entries=("e1",),
|
||||
start_iso=start_iso,
|
||||
end_iso=None,
|
||||
duration_seconds=30.0,
|
||||
source_ids=source_ids,
|
||||
pattern_tags=pattern_tags,
|
||||
severity=severity, # type: ignore[arg-type]
|
||||
burst=burst,
|
||||
gap_before_seconds=gap_before_seconds,
|
||||
representative_text=representative_text,
|
||||
)
|
||||
|
||||
|
||||
def _make_timeline(
|
||||
total_entries: int = 42,
|
||||
n_clusters: int = 3,
|
||||
clusters: tuple[EventCluster, ...] | None = None,
|
||||
) -> TimelineResult:
|
||||
return TimelineResult(
|
||||
clusters=tuple(),
|
||||
clusters=clusters if clusters is not None else tuple(),
|
||||
total_entries=total_entries,
|
||||
window_start="2026-01-01T00:00:00+00:00",
|
||||
window_end="2026-01-01T01:00:00+00:00",
|
||||
|
|
@ -283,3 +309,88 @@ class TestSynthesizerEmptyRanked:
|
|||
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
|
||||
|
||||
class TestBuildTimelineBlock:
|
||||
"""Unit tests for _build_timeline_block helper."""
|
||||
|
||||
def test_empty_clusters_returns_placeholder(self):
|
||||
timeline = _make_timeline(clusters=tuple())
|
||||
assert _build_timeline_block(timeline) == "(no clusters)"
|
||||
|
||||
def test_single_cluster_basic_fields(self):
|
||||
cluster = _make_cluster(
|
||||
start_iso="2026-01-01T00:05:00+00:00",
|
||||
severity="ERROR",
|
||||
source_ids=("syslog",),
|
||||
representative_text="Failed password for root",
|
||||
)
|
||||
timeline = _make_timeline(clusters=(cluster,))
|
||||
block = _build_timeline_block(timeline)
|
||||
assert "Cluster 1" in block
|
||||
assert "2026-01-01T00:05:00+00:00" in block
|
||||
assert "[ERROR]" in block
|
||||
assert "syslog" in block
|
||||
assert "Failed password for root" in block
|
||||
|
||||
def test_burst_label_applied(self):
|
||||
cluster = _make_cluster(burst=True)
|
||||
timeline = _make_timeline(clusters=(cluster,))
|
||||
block = _build_timeline_block(timeline)
|
||||
assert "[BURST]" in block
|
||||
|
||||
def test_no_burst_label_when_not_burst(self):
|
||||
cluster = _make_cluster(burst=False)
|
||||
timeline = _make_timeline(clusters=(cluster,))
|
||||
block = _build_timeline_block(timeline)
|
||||
assert "[BURST]" not in block
|
||||
|
||||
def test_gap_label_applied_when_over_threshold(self):
|
||||
cluster = _make_cluster(gap_before_seconds=120.0)
|
||||
timeline = _make_timeline(clusters=(cluster,))
|
||||
block = _build_timeline_block(timeline)
|
||||
assert "silence" in block
|
||||
assert "120s" in block
|
||||
|
||||
def test_gap_label_omitted_when_under_threshold(self):
|
||||
cluster = _make_cluster(gap_before_seconds=10.0)
|
||||
timeline = _make_timeline(clusters=(cluster,))
|
||||
block = _build_timeline_block(timeline)
|
||||
assert "silence" not in block
|
||||
|
||||
def test_pattern_tags_included(self):
|
||||
cluster = _make_cluster(pattern_tags=("ssh_auth_failure", "brute_force"))
|
||||
timeline = _make_timeline(clusters=(cluster,))
|
||||
block = _build_timeline_block(timeline)
|
||||
assert "ssh_auth_failure" in block
|
||||
assert "brute_force" in block
|
||||
|
||||
def test_no_patterns_section_when_empty(self):
|
||||
cluster = _make_cluster(pattern_tags=tuple())
|
||||
timeline = _make_timeline(clusters=(cluster,))
|
||||
block = _build_timeline_block(timeline)
|
||||
assert "[patterns:" not in block
|
||||
|
||||
def test_multiple_clusters_numbered(self):
|
||||
c1 = _make_cluster(cluster_id="c1", representative_text="first event")
|
||||
c2 = _make_cluster(cluster_id="c2", representative_text="second event")
|
||||
timeline = _make_timeline(clusters=(c1, c2))
|
||||
block = _build_timeline_block(timeline)
|
||||
assert "Cluster 1" in block
|
||||
assert "Cluster 2" in block
|
||||
assert "first event" in block
|
||||
assert "second event" in block
|
||||
|
||||
def test_representative_text_truncated_at_200_chars(self):
|
||||
long_text = "x" * 300
|
||||
cluster = _make_cluster(representative_text=long_text)
|
||||
timeline = _make_timeline(clusters=(cluster,))
|
||||
block = _build_timeline_block(timeline)
|
||||
assert "x" * 200 in block
|
||||
assert "x" * 201 not in block
|
||||
|
||||
def test_null_start_iso_renders_as_unknown(self):
|
||||
cluster = _make_cluster(start_iso=None)
|
||||
timeline = _make_timeline(clusters=(cluster,))
|
||||
block = _build_timeline_block(timeline)
|
||||
assert "unknown" in block
|
||||
|
|
|
|||
Loading…
Reference in a new issue