turnstone/app/services/diagnose/models.py
pyr0ball 7816ceba61 refactor: pipeline cleanup — 6 follow-up fixes (#33-#38)
- #33: Wrap ClassifiedTimeline.cluster_severities in MappingProxyType for
  true immutability (frozen=True only blocks field reassignment, not dict
  mutation).

- #34: Remove dead suppression branch in synthesizer._build_hypothesis_block.
  active[] is already filtered to not rh.suppress, so the 'Yes — suppressed'
  branch was unreachable. Now shows novelty score only.

- #35: Extract shared _llm_client.py with call_llm() + extract_content() +
  strip_json_fences(). Both RootCauseHypothesizer and SummarySynthesizer
  now import from one source. Also strips JSON fences from LLM output before
  parsing in hypothesizer._parse_response.

- #36: Add per-stage try/except in pipeline.run_pipeline(). Unhandled
  stage exceptions now emit {type: 'error'} + {type: 'done'} SSE events
  instead of silently closing the stream.

- #37: Move format_context_block() call inside the legacy LLM branch in
  diagnose/__init__.py — it was being computed unconditionally but only
  used in the non-pipeline path.

- #38: Coerce supporting_cluster_ids items to str() in hypothesizer
  _parse_response to guard against LLMs returning integers instead of
  string cluster IDs.
2026-05-25 19:05:56 -07:00

77 lines
2 KiB
Python

"""Pipeline data types for the multi-agent diagnose pipeline."""
from __future__ import annotations
from dataclasses import dataclass
from types import MappingProxyType
from typing import Literal
SeverityLabel = Literal["CRITICAL", "ERROR", "WARN", "INFO", "DEBUG", "UNKNOWN"]
@dataclass(frozen=True)
class EventCluster:
"""A time-correlated group of log entries within the timeline."""
cluster_id: str
entries: tuple[str, ...] # entry_id refs
start_iso: str | None
end_iso: str | None
duration_seconds: float
source_ids: tuple[str, ...]
pattern_tags: tuple[str, ...]
severity: SeverityLabel
burst: bool
gap_before_seconds: float
representative_text: str
@dataclass(frozen=True)
class TimelineResult:
"""Structured timeline of event clusters built from log entries."""
clusters: tuple[EventCluster, ...]
total_entries: int
window_start: str | None
window_end: str | None
gap_count: int
burst_count: int
dominant_sources: tuple[str, ...]
@dataclass(frozen=True)
class ClassifiedTimeline:
"""Timeline annotated with ML-assigned severity per cluster.
``cluster_severities`` is a ``MappingProxyType`` so the mapping is
fully immutable — consistent with the ``frozen=True`` intent.
"""
timeline: TimelineResult
cluster_severities: MappingProxyType # MappingProxyType[str, SeverityLabel]
classifier_used: Literal["ml", "pattern_tags", "regex"]
model_id: str | None
@dataclass(frozen=True)
class Hypothesis:
"""A root-cause hypothesis generated by Stage 3."""
hypothesis_id: str
title: str
description: str
confidence: float
supporting_cluster_ids: tuple[str, ...]
runbook_refs: tuple[str, ...]
severity: SeverityLabel
@dataclass(frozen=True)
class RankedHypothesis:
"""A hypothesis enriched by Stage 4 false-positive suppression."""
hypothesis: Hypothesis
novelty_score: float
similarity_to_known: float
suppress: bool
suppression_reason: str | None