turnstone/app/services/diagnose/pipeline.py
pyr0ball 94d796e103 refactor: pipeline cleanup — 6 follow-up fixes (#33-#38)
- #33: Wrap ClassifiedTimeline.cluster_severities in MappingProxyType for
  true immutability (frozen=True only blocks field reassignment, not dict
  mutation).

- #34: Remove dead suppression branch in synthesizer._build_hypothesis_block.
  active[] is already filtered to not rh.suppress, so the 'Yes — suppressed'
  branch was unreachable. Now shows novelty score only.

- #35: Extract shared _llm_client.py with call_llm() + extract_content() +
  strip_json_fences(). Both RootCauseHypothesizer and SummarySynthesizer
  now import from one source. Also strips JSON fences from LLM output before
  parsing in hypothesizer._parse_response.

- #36: Add per-stage try/except in pipeline.run_pipeline(). Unhandled
  stage exceptions now emit {type: 'error'} + {type: 'done'} SSE events
  instead of silently closing the stream.

- #37: Move format_context_block() call inside the legacy LLM branch in
  diagnose/__init__.py — it was being computed unconditionally but only
  used in the non-pipeline path.

- #38: Coerce supporting_cluster_ids items to str() in hypothesizer
  _parse_response to guard against LLMs returning integers instead of
  string cluster IDs.
2026-05-25 19:05:56 -07:00

162 lines
5.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Multi-agent diagnose pipeline orchestrator — Stage 15 wiring."""
from __future__ import annotations
import asyncio
import dataclasses
import logging
from collections.abc import AsyncGenerator
from pathlib import Path
from typing import Any
from app.context.retriever import RetrievedContext
from app.services.diagnose.classifier import SeverityClassifier
from app.services.diagnose.hypothesizer import RootCauseHypothesizer
from app.services.diagnose.suppressor import FalsePositiveSuppressor
from app.services.diagnose.synthesizer import SummarySynthesizer
from app.services.diagnose.timeline import TimelineReconstructor
from app.services.search import SearchResult
logger = logging.getLogger(__name__)
async def run_pipeline(
db_path: Path,
entries: list[SearchResult],
ctx: RetrievedContext,
query: str,
since: str | None, # reserved for future range-filtering in stage queries (#29 follow-up)
until: str | None, # reserved for future range-filtering in stage queries (#29 follow-up)
llm_url: str | None,
llm_model: str | None,
llm_api_key: str | None,
) -> AsyncGenerator[dict[str, Any], None]:
"""Async generator that runs all 5 pipeline stages and yields SSE event dicts.
Stages:
1. TimelineReconstructor — cluster log entries by time
2. SeverityClassifier — annotate clusters with severity
3. RootCauseHypothesizer — generate hypotheses via LLM
4. FalsePositiveSuppressor — rank and suppress known patterns
5. SummarySynthesizer — produce a narrative diagnosis
Yields events in order:
{"type": "status", "message": "Building timeline…"}
{"type": "pipeline_stage", "stage": 1, ...}
{"type": "pipeline_stage", "stage": 2, ...}
{"type": "pipeline_stage", "stage": 3, ...}
{"type": "pipeline_stage", "stage": 4, ...}
{"type": "hypotheses", "data": [...]}
{"type": "status", "message": "Synthesizing…"}
{"type": "reasoning", "text": "..."} — only when synthesis produces text
{"type": "done"}
"""
# Stage 1: Timeline reconstruction
yield {"type": "status", "message": "Building timeline…"}
try:
timeline = await asyncio.to_thread(
TimelineReconstructor().reconstruct, entries
)
except Exception as exc:
logger.exception("Stage 1 (timeline) failed: %s", exc)
yield {"type": "error", "message": "Pipeline error in stage 1 (timeline)"}
yield {"type": "done"}
return
n_clusters = len(timeline.clusters)
burst = timeline.burst_count
yield {
"type": "pipeline_stage",
"stage": 1,
"name": "timeline",
"message": f"Built {n_clusters} clusters, {burst} bursts",
}
# Stage 2: Severity classification
try:
classified = await asyncio.to_thread(
SeverityClassifier().classify, timeline
)
except Exception as exc:
logger.exception("Stage 2 (classifier) failed: %s", exc)
yield {"type": "error", "message": "Pipeline error in stage 2 (classifier)"}
yield {"type": "done"}
return
sev_counts: dict[str, int] = {}
for sev in classified.cluster_severities.values():
sev_counts[sev] = sev_counts.get(sev, 0) + 1
counts_str = ", ".join(f"{k}:{v}" for k, v in sorted(sev_counts.items()))
yield {
"type": "pipeline_stage",
"stage": 2,
"name": "classifier",
"message": f"{classified.classifier_used} classifier: {counts_str}",
}
# Stage 3: Root-cause hypotheses
try:
hypotheses = await asyncio.to_thread(
RootCauseHypothesizer().hypothesize,
classified,
ctx,
query,
llm_url,
llm_model,
llm_api_key,
)
except Exception as exc:
logger.exception("Stage 3 (hypothesizer) failed: %s", exc)
yield {"type": "error", "message": "Pipeline error in stage 3 (hypothesizer)"}
yield {"type": "done"}
return
yield {
"type": "pipeline_stage",
"stage": 3,
"name": "hypotheses",
"message": f"{len(hypotheses)} hypotheses generated",
}
# Stage 4: False-positive suppression
try:
ranked = await asyncio.to_thread(
FalsePositiveSuppressor().suppress, hypotheses, db_path
)
except Exception as exc:
logger.exception("Stage 4 (suppressor) failed: %s", exc)
yield {"type": "error", "message": "Pipeline error in stage 4 (suppressor)"}
yield {"type": "done"}
return
suppressed = sum(1 for rh in ranked if rh.suppress)
active = len(ranked) - suppressed
yield {
"type": "pipeline_stage",
"stage": 4,
"name": "suppressor",
"message": f"{suppressed} suppressed, {active} active",
}
yield {
"type": "hypotheses",
"data": [dataclasses.asdict(rh) for rh in ranked],
}
# Stage 5: Summary synthesis
yield {"type": "status", "message": "Synthesizing…"}
try:
synthesis_text = await asyncio.to_thread(
SummarySynthesizer().synthesize,
ranked,
timeline,
ctx,
query,
llm_url,
llm_model,
llm_api_key,
)
except Exception as exc:
logger.exception("Stage 5 (synthesizer) failed: %s", exc)
yield {"type": "error", "message": "Pipeline error in stage 5 (synthesizer)"}
yield {"type": "done"}
return
if synthesis_text:
yield {"type": "reasoning", "text": synthesis_text}
yield {"type": "done"}