"""Stage 5: Summary Synthesizer — deterministic narrative from ranked hypotheses. Streaming upgrade (async SSE chunks) is tracked as a follow-up enhancement. This implementation is synchronous to match the rest of the pipeline. """ from __future__ import annotations import logging from app.context.retriever import RetrievedContext from app.services.diagnose._llm_client import call_llm from app.services.diagnose.models import RankedHypothesis, TimelineResult logger = logging.getLogger(__name__) _SYSTEM_PROMPTS: dict[str, str] = { "sysadmin": ( "You are a Linux sysadmin diagnosing a system incident. " "Write a concise, actionable incident diagnosis.\n\n" "Format your response exactly as:\n" "1. VERDICT: [CRITICAL|ERROR|WARN|INFO] — (% confidence)\n" "2. TIMELINE: \n" "3. ROOT CAUSES:\n" " - (%)\n" " - (%)\n" "4. RECOMMENDED ACTIONS:\n" " - \n" "5. INVESTIGATE FURTHER: " ), "homelab": ( "You are explaining a system incident to a home lab enthusiast — someone " "comfortable with Linux basics but not necessarily familiar with every daemon " "or kernel subsystem. Be clear about what each service does; spell out " "abbreviations; explain why each action helps.\n\n" "Format your response exactly as:\n" "1. VERDICT: [CRITICAL|ERROR|WARN|INFO] — (% confidence)\n" "2. TIMELINE: \n" "3. ROOT CAUSES:\n" " - (%)\n" "4. RECOMMENDED ACTIONS:\n" " - \n" "5. INVESTIGATE FURTHER: " ), "executive": ( "You are summarizing a technical system incident for a non-technical stakeholder. " "Focus on what broke, what the business impact was, and what the technical team is doing about it. " "Use plain English. Do not use daemon names, kernel terms, log syntax, or technical jargon.\n\n" "Format your response exactly as:\n" "1. WHAT HAPPENED: <1-2 sentences describing the problem in plain English>\n" "2. IMPACT: \n" "3. CONFIDENCE: \n" "4. ACTION NEEDED: " ), } def _build_hypothesis_block(ranked: list[RankedHypothesis]) -> str: """Build the hypothesis block for the prompt (non-suppressed only, top 3).""" active = [rh for rh in ranked if not rh.suppress][:3] if not active: return "(none)" lines: list[str] = [] for rh in active: h = rh.hypothesis conf_pct = int(h.confidence * 100) novelty = f"{rh.novelty_score:.2f}" desc = f"\n {h.description}" if h.description else "" lines.append( f"- [{h.severity}, {conf_pct}% conf, novelty {novelty}] {h.title}{desc}" ) return "\n".join(lines) def _build_timeline_block(timeline: TimelineResult) -> str: """Build a sequenced cluster block so the synthesizer can narrate what happened. Mirrors the format used by the hypothesizer, but adds gap information so the LLM can reason about silence windows between bursts. """ if not timeline.clusters: return "(no clusters)" lines: list[str] = [] for i, c in enumerate(timeline.clusters): ts = c.start_iso or "unknown" sources = ", ".join(list(c.source_ids)[:3]) tags = ", ".join(list(c.pattern_tags)[:4]) burst_label = " [BURST]" if c.burst else "" gap_label = ( f" (+{int(c.gap_before_seconds)}s silence)" if c.gap_before_seconds > 30 else "" ) text_preview = c.representative_text[:200] line = ( f"Cluster {i + 1}{burst_label}{gap_label} @ {ts} [{c.severity}] " f"({sources}) — {text_preview}" ) if tags: line += f" [patterns: {tags}]" lines.append(line) return "\n".join(lines) def _build_context_block(ctx: RetrievedContext) -> str: """Build the runbook context block for the prompt.""" parts: list[str] = [] for chunk in ctx.chunks[:5]: filename = chunk.get("filename", "unknown") text = chunk.get("text", "")[:300] parts.append(f"[{filename}] {text}") return "\n".join(parts) if parts else "(none)" def _deterministic_fallback( ranked: list[RankedHypothesis], timeline: TimelineResult, ) -> str: """Build a deterministic fallback text when no LLM is available.""" active = [rh for rh in ranked if not rh.suppress][:3] if active: top = active[0] verdict_severity = top.hypothesis.severity verdict_title = top.hypothesis.title verdict_conf = int(top.hypothesis.confidence * 100) elif ranked: top = ranked[0] verdict_severity = top.hypothesis.severity verdict_title = top.hypothesis.title verdict_conf = int(top.hypothesis.confidence * 100) else: verdict_severity = "UNKNOWN" verdict_title = "No hypotheses generated" verdict_conf = 0 root_causes = ", ".join( rh.hypothesis.title for rh in (active or ranked[:3]) ) or "None" return ( f"VERDICT: {verdict_severity} — {verdict_title} ({verdict_conf}% confidence)\n" f"TIMELINE: {timeline.total_entries} entries across {len(timeline.clusters)} clusters.\n" f"ROOT CAUSES: {root_causes}" ) class SummarySynthesizer: """Stage 5 of the multi-agent diagnose pipeline. Synthesizes a human-readable incident narrative from ranked hypotheses, the reconstructed timeline, and RAG context. When no LLM is configured, returns a deterministic fallback built from the hypothesis data. """ def synthesize( self, ranked: list[RankedHypothesis], timeline: TimelineResult, ctx: RetrievedContext, query: str, llm_url: str | None = None, llm_model: str | None = None, llm_api_key: str | None = None, tech_level: str = "sysadmin", ) -> str: """Return synthesis text (single string, synchronous). Falls back to a deterministic narrative when no LLM URL or model is provided, or when the LLM call fails. """ fallback = _deterministic_fallback(ranked, timeline) if not llm_url or not llm_model: return fallback system_prompt = _SYSTEM_PROMPTS.get(tech_level, _SYSTEM_PROMPTS["sysadmin"]) hypothesis_block = _build_hypothesis_block(ranked) timeline_block = _build_timeline_block(timeline) context_block = _build_context_block(ctx) dominant = ", ".join(timeline.dominant_sources[:5]) or "none" user_message = ( f"Query: {query}\n\n" f"Timeline ({len(timeline.clusters)} clusters, " f"{timeline.burst_count} bursts, " f"{timeline.gap_count} silence gaps; " f"primary sources: {dominant}):\n" f"{timeline_block}\n\n" f"Root-cause hypotheses:\n{hypothesis_block}\n\n" f"Context from runbooks:\n{context_block}" ) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message}, ] result = call_llm( llm_url=llm_url, llm_model=llm_model, llm_api_key=llm_api_key, messages=messages, ) return result if result else fallback