turnstone/app/services/diagnose/synthesizer.py
pyr0ball 5da8db2bcd fix(diagnose): pass full timeline clusters and hypothesis descriptions to synthesizer LLM
Stage 5 (SummarySynthesizer) was only sending aggregate timeline stats to the
LLM (cluster count, burst count, gap count) — the actual sequenced cluster data
that Stage 1 reconstructed was never included. The LLM had no per-cluster
timestamps, severity, burst flags, silence gaps, or representative text to
write the TIMELINE section from.

Added _build_timeline_block() to emit a numbered per-cluster summary matching
the format Stage 3 uses for the hypothesizer, and included it in the user
message alongside the hypothesis block.

Also fixed _build_hypothesis_block() to include the 2-4 sentence description
each hypothesis carries — previously only the title and novelty score reached
the LLM.

11 new tests cover _build_timeline_block() directly (burst label, gap threshold,
pattern tags, text truncation at 200 chars, null start_iso, multi-cluster
numbering). 529 tests passing.
2026-06-16 21:46:01 -07:00

203 lines
7.8 KiB
Python

"""Stage 5: Summary Synthesizer — deterministic narrative from ranked hypotheses.
Streaming upgrade (async SSE chunks) is tracked as a follow-up enhancement.
This implementation is synchronous to match the rest of the pipeline.
"""
from __future__ import annotations
import logging
from app.context.retriever import RetrievedContext
from app.services.diagnose._llm_client import call_llm
from app.services.diagnose.models import RankedHypothesis, TimelineResult
logger = logging.getLogger(__name__)
_SYSTEM_PROMPTS: dict[str, str] = {
"sysadmin": (
"You are a Linux sysadmin diagnosing a system incident. "
"Write a concise, actionable incident diagnosis.\n\n"
"Format your response exactly as:\n"
"1. VERDICT: [CRITICAL|ERROR|WARN|INFO] — <what happened> (<X>% confidence)\n"
"2. TIMELINE: <what the logs show in sequence, 2-3 sentences>\n"
"3. ROOT CAUSES:\n"
" - <hypothesis 1 title> (<confidence>%)\n"
" - <hypothesis 2 title> (<confidence>%)\n"
"4. RECOMMENDED ACTIONS:\n"
" - <action based on hypotheses>\n"
"5. INVESTIGATE FURTHER: <open questions, if any>"
),
"homelab": (
"You are explaining a system incident to a home lab enthusiast — someone "
"comfortable with Linux basics but not necessarily familiar with every daemon "
"or kernel subsystem. Be clear about what each service does; spell out "
"abbreviations; explain why each action helps.\n\n"
"Format your response exactly as:\n"
"1. VERDICT: [CRITICAL|ERROR|WARN|INFO] — <what happened in plain terms> (<X>% confidence)\n"
"2. TIMELINE: <what happened in sequence, 2-3 sentences; explain what each service is>\n"
"3. ROOT CAUSES:\n"
" - <hypothesis title — one sentence explaining what it means> (<confidence>%)\n"
"4. RECOMMENDED ACTIONS:\n"
" - <command or step — explain what it does and why>\n"
"5. INVESTIGATE FURTHER: <open questions in plain language>"
),
"executive": (
"You are summarizing a technical system incident for a non-technical stakeholder. "
"Focus on what broke, what the business impact was, and what the technical team is doing about it. "
"Use plain English. Do not use daemon names, kernel terms, log syntax, or technical jargon.\n\n"
"Format your response exactly as:\n"
"1. WHAT HAPPENED: <1-2 sentences describing the problem in plain English>\n"
"2. IMPACT: <which services or users were affected, and how>\n"
"3. CONFIDENCE: <High / Medium / Low — how certain we are of the diagnosis>\n"
"4. ACTION NEEDED: <what the IT team is doing or should do, in plain terms>"
),
}
def _build_hypothesis_block(ranked: list[RankedHypothesis]) -> str:
"""Build the hypothesis block for the prompt (non-suppressed only, top 3)."""
active = [rh for rh in ranked if not rh.suppress][:3]
if not active:
return "(none)"
lines: list[str] = []
for rh in active:
h = rh.hypothesis
conf_pct = int(h.confidence * 100)
novelty = f"{rh.novelty_score:.2f}"
desc = f"\n {h.description}" if h.description else ""
lines.append(
f"- [{h.severity}, {conf_pct}% conf, novelty {novelty}] {h.title}{desc}"
)
return "\n".join(lines)
def _build_timeline_block(timeline: TimelineResult) -> str:
"""Build a sequenced cluster block so the synthesizer can narrate what happened.
Mirrors the format used by the hypothesizer, but adds gap information so the
LLM can reason about silence windows between bursts.
"""
if not timeline.clusters:
return "(no clusters)"
lines: list[str] = []
for i, c in enumerate(timeline.clusters):
ts = c.start_iso or "unknown"
sources = ", ".join(list(c.source_ids)[:3])
tags = ", ".join(list(c.pattern_tags)[:4])
burst_label = " [BURST]" if c.burst else ""
gap_label = (
f" (+{int(c.gap_before_seconds)}s silence)"
if c.gap_before_seconds > 30
else ""
)
text_preview = c.representative_text[:200]
line = (
f"Cluster {i + 1}{burst_label}{gap_label} @ {ts} [{c.severity}] "
f"({sources}) — {text_preview}"
)
if tags:
line += f" [patterns: {tags}]"
lines.append(line)
return "\n".join(lines)
def _build_context_block(ctx: RetrievedContext) -> str:
"""Build the runbook context block for the prompt."""
parts: list[str] = []
for chunk in ctx.chunks[:5]:
filename = chunk.get("filename", "unknown")
text = chunk.get("text", "")[:300]
parts.append(f"[{filename}] {text}")
return "\n".join(parts) if parts else "(none)"
def _deterministic_fallback(
ranked: list[RankedHypothesis],
timeline: TimelineResult,
) -> str:
"""Build a deterministic fallback text when no LLM is available."""
active = [rh for rh in ranked if not rh.suppress][:3]
if active:
top = active[0]
verdict_severity = top.hypothesis.severity
verdict_title = top.hypothesis.title
verdict_conf = int(top.hypothesis.confidence * 100)
elif ranked:
top = ranked[0]
verdict_severity = top.hypothesis.severity
verdict_title = top.hypothesis.title
verdict_conf = int(top.hypothesis.confidence * 100)
else:
verdict_severity = "UNKNOWN"
verdict_title = "No hypotheses generated"
verdict_conf = 0
root_causes = ", ".join(
rh.hypothesis.title for rh in (active or ranked[:3])
) or "None"
return (
f"VERDICT: {verdict_severity}{verdict_title} ({verdict_conf}% confidence)\n"
f"TIMELINE: {timeline.total_entries} entries across {len(timeline.clusters)} clusters.\n"
f"ROOT CAUSES: {root_causes}"
)
class SummarySynthesizer:
"""Stage 5 of the multi-agent diagnose pipeline.
Synthesizes a human-readable incident narrative from ranked hypotheses,
the reconstructed timeline, and RAG context. When no LLM is configured,
returns a deterministic fallback built from the hypothesis data.
"""
def synthesize(
self,
ranked: list[RankedHypothesis],
timeline: TimelineResult,
ctx: RetrievedContext,
query: str,
llm_url: str | None = None,
llm_model: str | None = None,
llm_api_key: str | None = None,
tech_level: str = "sysadmin",
) -> str:
"""Return synthesis text (single string, synchronous).
Falls back to a deterministic narrative when no LLM URL or model is
provided, or when the LLM call fails.
"""
fallback = _deterministic_fallback(ranked, timeline)
if not llm_url or not llm_model:
return fallback
system_prompt = _SYSTEM_PROMPTS.get(tech_level, _SYSTEM_PROMPTS["sysadmin"])
hypothesis_block = _build_hypothesis_block(ranked)
timeline_block = _build_timeline_block(timeline)
context_block = _build_context_block(ctx)
dominant = ", ".join(timeline.dominant_sources[:5]) or "none"
user_message = (
f"Query: {query}\n\n"
f"Timeline ({len(timeline.clusters)} clusters, "
f"{timeline.burst_count} bursts, "
f"{timeline.gap_count} silence gaps; "
f"primary sources: {dominant}):\n"
f"{timeline_block}\n\n"
f"Root-cause hypotheses:\n{hypothesis_block}\n\n"
f"Context from runbooks:\n{context_block}"
)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
]
result = call_llm(
llm_url=llm_url,
llm_model=llm_model,
llm_api_key=llm_api_key,
messages=messages,
)
return result if result else fallback