fix(diagnose): pass full timeline clusters and hypothesis descriptions to synthesizer LLM
Stage 5 (SummarySynthesizer) was only sending aggregate timeline stats to the LLM (cluster count, burst count, gap count) — the actual sequenced cluster data that Stage 1 reconstructed was never included. The LLM had no per-cluster timestamps, severity, burst flags, silence gaps, or representative text to write the TIMELINE section from. Added _build_timeline_block() to emit a numbered per-cluster summary matching the format Stage 3 uses for the hypothesizer, and included it in the user message alongside the hypothesis block. Also fixed _build_hypothesis_block() to include the 2-4 sentence description each hypothesis carries — previously only the title and novelty score reached the LLM. 11 new tests cover _build_timeline_block() directly (burst label, gap threshold, pattern tags, text truncation at 200 chars, null start_iso, multi-cluster numbering). 529 tests passing.
This commit is contained in:
parent
4c1940d12e
commit
5da8db2bcd
2 changed files with 152 additions and 10 deletions
|
|
@ -64,13 +64,43 @@ def _build_hypothesis_block(ranked: list[RankedHypothesis]) -> str:
|
||||||
h = rh.hypothesis
|
h = rh.hypothesis
|
||||||
conf_pct = int(h.confidence * 100)
|
conf_pct = int(h.confidence * 100)
|
||||||
novelty = f"{rh.novelty_score:.2f}"
|
novelty = f"{rh.novelty_score:.2f}"
|
||||||
|
desc = f"\n {h.description}" if h.description else ""
|
||||||
lines.append(
|
lines.append(
|
||||||
f"- [{h.severity}, {conf_pct}%] {h.title}\n"
|
f"- [{h.severity}, {conf_pct}% conf, novelty {novelty}] {h.title}{desc}"
|
||||||
f" Novelty: {novelty}"
|
|
||||||
)
|
)
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_timeline_block(timeline: TimelineResult) -> str:
|
||||||
|
"""Build a sequenced cluster block so the synthesizer can narrate what happened.
|
||||||
|
|
||||||
|
Mirrors the format used by the hypothesizer, but adds gap information so the
|
||||||
|
LLM can reason about silence windows between bursts.
|
||||||
|
"""
|
||||||
|
if not timeline.clusters:
|
||||||
|
return "(no clusters)"
|
||||||
|
lines: list[str] = []
|
||||||
|
for i, c in enumerate(timeline.clusters):
|
||||||
|
ts = c.start_iso or "unknown"
|
||||||
|
sources = ", ".join(list(c.source_ids)[:3])
|
||||||
|
tags = ", ".join(list(c.pattern_tags)[:4])
|
||||||
|
burst_label = " [BURST]" if c.burst else ""
|
||||||
|
gap_label = (
|
||||||
|
f" (+{int(c.gap_before_seconds)}s silence)"
|
||||||
|
if c.gap_before_seconds > 30
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
text_preview = c.representative_text[:200]
|
||||||
|
line = (
|
||||||
|
f"Cluster {i + 1}{burst_label}{gap_label} @ {ts} [{c.severity}] "
|
||||||
|
f"({sources}) — {text_preview}"
|
||||||
|
)
|
||||||
|
if tags:
|
||||||
|
line += f" [patterns: {tags}]"
|
||||||
|
lines.append(line)
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
def _build_context_block(ctx: RetrievedContext) -> str:
|
def _build_context_block(ctx: RetrievedContext) -> str:
|
||||||
"""Build the runbook context block for the prompt."""
|
"""Build the runbook context block for the prompt."""
|
||||||
parts: list[str] = []
|
parts: list[str] = []
|
||||||
|
|
@ -144,17 +174,18 @@ class SummarySynthesizer:
|
||||||
|
|
||||||
system_prompt = _SYSTEM_PROMPTS.get(tech_level, _SYSTEM_PROMPTS["sysadmin"])
|
system_prompt = _SYSTEM_PROMPTS.get(tech_level, _SYSTEM_PROMPTS["sysadmin"])
|
||||||
hypothesis_block = _build_hypothesis_block(ranked)
|
hypothesis_block = _build_hypothesis_block(ranked)
|
||||||
|
timeline_block = _build_timeline_block(timeline)
|
||||||
context_block = _build_context_block(ctx)
|
context_block = _build_context_block(ctx)
|
||||||
dominant = ", ".join(timeline.dominant_sources[:5]) or "none"
|
dominant = ", ".join(timeline.dominant_sources[:5]) or "none"
|
||||||
|
|
||||||
user_message = (
|
user_message = (
|
||||||
f"Query: {query}\n\n"
|
f"Query: {query}\n\n"
|
||||||
f"Timeline summary:\n"
|
f"Timeline ({len(timeline.clusters)} clusters, "
|
||||||
f"- {len(timeline.clusters)} clusters, "
|
|
||||||
f"{timeline.burst_count} bursts, "
|
f"{timeline.burst_count} bursts, "
|
||||||
f"{timeline.gap_count} silence gaps\n"
|
f"{timeline.gap_count} silence gaps; "
|
||||||
f"- Primary sources: {dominant}\n\n"
|
f"primary sources: {dominant}):\n"
|
||||||
f"Top hypotheses:\n{hypothesis_block}\n\n"
|
f"{timeline_block}\n\n"
|
||||||
|
f"Root-cause hypotheses:\n{hypothesis_block}\n\n"
|
||||||
f"Context from runbooks:\n{context_block}"
|
f"Context from runbooks:\n{context_block}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,8 +7,8 @@ from __future__ import annotations
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
from app.context.retriever import RetrievedContext
|
from app.context.retriever import RetrievedContext
|
||||||
from app.services.diagnose.models import Hypothesis, RankedHypothesis, TimelineResult
|
from app.services.diagnose.models import EventCluster, Hypothesis, RankedHypothesis, TimelineResult
|
||||||
from app.services.diagnose.synthesizer import SummarySynthesizer
|
from app.services.diagnose.synthesizer import SummarySynthesizer, _build_timeline_block
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -50,12 +50,38 @@ def _make_ranked(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_cluster(
|
||||||
|
cluster_id: str = "c1",
|
||||||
|
start_iso: str | None = "2026-01-01T00:05:00+00:00",
|
||||||
|
severity: str = "ERROR",
|
||||||
|
source_ids: tuple[str, ...] = ("syslog",),
|
||||||
|
pattern_tags: tuple[str, ...] = ("ssh_auth_failure",),
|
||||||
|
burst: bool = False,
|
||||||
|
gap_before_seconds: float = 0.0,
|
||||||
|
representative_text: str = "Failed password for root from 1.2.3.4 port 22",
|
||||||
|
) -> EventCluster:
|
||||||
|
return EventCluster(
|
||||||
|
cluster_id=cluster_id,
|
||||||
|
entries=("e1",),
|
||||||
|
start_iso=start_iso,
|
||||||
|
end_iso=None,
|
||||||
|
duration_seconds=30.0,
|
||||||
|
source_ids=source_ids,
|
||||||
|
pattern_tags=pattern_tags,
|
||||||
|
severity=severity, # type: ignore[arg-type]
|
||||||
|
burst=burst,
|
||||||
|
gap_before_seconds=gap_before_seconds,
|
||||||
|
representative_text=representative_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _make_timeline(
|
def _make_timeline(
|
||||||
total_entries: int = 42,
|
total_entries: int = 42,
|
||||||
n_clusters: int = 3,
|
n_clusters: int = 3,
|
||||||
|
clusters: tuple[EventCluster, ...] | None = None,
|
||||||
) -> TimelineResult:
|
) -> TimelineResult:
|
||||||
return TimelineResult(
|
return TimelineResult(
|
||||||
clusters=tuple(),
|
clusters=clusters if clusters is not None else tuple(),
|
||||||
total_entries=total_entries,
|
total_entries=total_entries,
|
||||||
window_start="2026-01-01T00:00:00+00:00",
|
window_start="2026-01-01T00:00:00+00:00",
|
||||||
window_end="2026-01-01T01:00:00+00:00",
|
window_end="2026-01-01T01:00:00+00:00",
|
||||||
|
|
@ -283,3 +309,88 @@ class TestSynthesizerEmptyRanked:
|
||||||
|
|
||||||
assert isinstance(result, str)
|
assert isinstance(result, str)
|
||||||
assert len(result) > 0
|
assert len(result) > 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildTimelineBlock:
|
||||||
|
"""Unit tests for _build_timeline_block helper."""
|
||||||
|
|
||||||
|
def test_empty_clusters_returns_placeholder(self):
|
||||||
|
timeline = _make_timeline(clusters=tuple())
|
||||||
|
assert _build_timeline_block(timeline) == "(no clusters)"
|
||||||
|
|
||||||
|
def test_single_cluster_basic_fields(self):
|
||||||
|
cluster = _make_cluster(
|
||||||
|
start_iso="2026-01-01T00:05:00+00:00",
|
||||||
|
severity="ERROR",
|
||||||
|
source_ids=("syslog",),
|
||||||
|
representative_text="Failed password for root",
|
||||||
|
)
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "Cluster 1" in block
|
||||||
|
assert "2026-01-01T00:05:00+00:00" in block
|
||||||
|
assert "[ERROR]" in block
|
||||||
|
assert "syslog" in block
|
||||||
|
assert "Failed password for root" in block
|
||||||
|
|
||||||
|
def test_burst_label_applied(self):
|
||||||
|
cluster = _make_cluster(burst=True)
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "[BURST]" in block
|
||||||
|
|
||||||
|
def test_no_burst_label_when_not_burst(self):
|
||||||
|
cluster = _make_cluster(burst=False)
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "[BURST]" not in block
|
||||||
|
|
||||||
|
def test_gap_label_applied_when_over_threshold(self):
|
||||||
|
cluster = _make_cluster(gap_before_seconds=120.0)
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "silence" in block
|
||||||
|
assert "120s" in block
|
||||||
|
|
||||||
|
def test_gap_label_omitted_when_under_threshold(self):
|
||||||
|
cluster = _make_cluster(gap_before_seconds=10.0)
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "silence" not in block
|
||||||
|
|
||||||
|
def test_pattern_tags_included(self):
|
||||||
|
cluster = _make_cluster(pattern_tags=("ssh_auth_failure", "brute_force"))
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "ssh_auth_failure" in block
|
||||||
|
assert "brute_force" in block
|
||||||
|
|
||||||
|
def test_no_patterns_section_when_empty(self):
|
||||||
|
cluster = _make_cluster(pattern_tags=tuple())
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "[patterns:" not in block
|
||||||
|
|
||||||
|
def test_multiple_clusters_numbered(self):
|
||||||
|
c1 = _make_cluster(cluster_id="c1", representative_text="first event")
|
||||||
|
c2 = _make_cluster(cluster_id="c2", representative_text="second event")
|
||||||
|
timeline = _make_timeline(clusters=(c1, c2))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "Cluster 1" in block
|
||||||
|
assert "Cluster 2" in block
|
||||||
|
assert "first event" in block
|
||||||
|
assert "second event" in block
|
||||||
|
|
||||||
|
def test_representative_text_truncated_at_200_chars(self):
|
||||||
|
long_text = "x" * 300
|
||||||
|
cluster = _make_cluster(representative_text=long_text)
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "x" * 200 in block
|
||||||
|
assert "x" * 201 not in block
|
||||||
|
|
||||||
|
def test_null_start_iso_renders_as_unknown(self):
|
||||||
|
cluster = _make_cluster(start_iso=None)
|
||||||
|
timeline = _make_timeline(clusters=(cluster,))
|
||||||
|
block = _build_timeline_block(timeline)
|
||||||
|
assert "unknown" in block
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue