turnstone/app/services/diagnose/classifier.py
pyr0ball 5c95bc0e96 feat: Stage 2 — SeverityClassifier for multi-agent diagnose pipeline (issue #29)
Three-path classification: ML (transformers pipeline, lazy singleton) →
pattern_tags (YAML pattern severity dict) → regex (detect_severity).

- Path A: HF text-classification pipeline loaded lazily on first classify()
  call via module-level singleton; shim promotes ERROR+keyword hits to CRITICAL
  and demotes low-confidence INFO to DEBUG.
- Path B: maps cluster.pattern_tags through the loaded pattern severity dict;
  picks the highest severity across matching tags.
- Path C: falls back to detect_severity() regex scan on representative_text;
  defaults to INFO when no keyword matches.
- Pattern file resolved from constructor arg or TURNSTONE_PATTERNS env var
  (mirrors app/rest.py convention).
- No crash when transformers is not installed; ImportError on per-cluster ML
  inference triggers clean per-cluster fallback to pattern_tags/regex.
- ClassifiedTimeline.classifier_used reflects the primary session path.

Tests (10 new, 328 total, all passing):
- ML ERROR, CRITICAL promotion, DEBUG demotion, WARNING→WARN
- pattern_tags resolution from YAML fixture
- regex ERROR detection and INFO default
- ImportError clean fallback
- empty timeline no-crash
- ClassifiedTimeline FrozenInstanceError on mutation

Closes: #29
2026-05-25 13:27:17 -07:00

249 lines
8.3 KiB
Python

"""Stage 2: Severity Classifier — ML with two fallback levels.
Classification strategy (in priority order):
Path A — ML: Hugging Face text-classification pipeline, loaded lazily.
Path B — pattern_tags: Map cluster.pattern_tags through the loaded pattern
severity dict; pick the highest severity across matching tags.
Path C — regex: Call detect_severity() from app.glean.base on the cluster's
representative_text.
Each cluster is classified independently. The ``classifier_used`` field on the
returned ``ClassifiedTimeline`` reflects the primary path (the one that governed
the overall classification session, not individual cluster fallbacks).
"""
from __future__ import annotations
import logging
import os
from pathlib import Path
from typing import Any
from app.services.diagnose.models import (
ClassifiedTimeline,
EventCluster,
SeverityLabel,
TimelineResult,
)
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Module-level ML singleton — reset to None between tests via the fixture
# ---------------------------------------------------------------------------
_ml_classifier: Any | None = None
def _get_ml_classifier(model_id: str, device: str) -> Any:
"""Return the cached HF pipeline, loading it on first call."""
global _ml_classifier # noqa: PLW0603
if _ml_classifier is None:
from transformers import pipeline as hf_pipeline # type: ignore[import-untyped]
_ml_classifier = hf_pipeline(
"text-classification", model=model_id, device=device
)
return _ml_classifier
# ---------------------------------------------------------------------------
# Label mapping
# ---------------------------------------------------------------------------
_LABEL_MAP: dict[str, SeverityLabel] = {
"ERROR": "ERROR",
"WARNING": "WARN",
"WARN": "WARN",
"INFO": "INFO",
"DEBUG": "DEBUG",
"CRITICAL": "CRITICAL",
}
_CRITICAL_KEYWORDS: frozenset[str] = frozenset(
{
"panic",
"oom",
"fatal",
"critical",
"kernel panic",
"out of memory",
"segfault",
"segmentation fault",
}
)
_SEVERITY_ORDER: dict[str | None, int] = {
"CRITICAL": 5,
"ERROR": 4,
"WARN": 3,
"WARNING": 3,
"INFO": 2,
"DEBUG": 1,
None: 0,
}
def _map_label(label: str, score: float, text: str) -> SeverityLabel:
"""Apply the severity shim: promote to CRITICAL or demote to DEBUG where warranted."""
upper = label.upper()
if upper == "ERROR" and score > 0.95 and any(
k in text.lower() for k in _CRITICAL_KEYWORDS
):
return "CRITICAL"
if upper == "INFO" and score < 0.4:
return "DEBUG"
return _LABEL_MAP.get(upper, "UNKNOWN") # type: ignore[return-value]
def _highest_from_tags(
tags: tuple[str, ...], severity_map: dict[str, str]
) -> SeverityLabel | None:
"""Return the highest severity from the pattern_tags that appear in severity_map."""
best: str | None = None
best_rank = -1
for tag in tags:
sev = severity_map.get(tag)
rank = _SEVERITY_ORDER.get(sev, 0)
if rank > best_rank:
best_rank = rank
best = sev
if best is None:
return None
normalised = "WARN" if best.upper() == "WARNING" else best.upper()
return normalised # type: ignore[return-value]
# ---------------------------------------------------------------------------
# SeverityClassifier
# ---------------------------------------------------------------------------
class SeverityClassifier:
"""Classify each EventCluster's severity using ML, patterns, or regex fallback.
Parameters
----------
model_id:
Hugging Face model identifier. When empty (default), ML is skipped.
device:
Torch device string passed to the HF pipeline (e.g. ``"cpu"`` or ``"cuda:0"``).
pattern_file:
Path to the YAML pattern file. When ``None`` the classifier reads
``TURNSTONE_PATTERNS`` env var (same logic as ``app/rest.py``).
"""
def __init__(
self,
model_id: str = "",
device: str = "cpu",
pattern_file: Path | None = None,
) -> None:
self._model_id = model_id
self._device = device
self._pattern_file: Path | None = pattern_file
self._pattern_severity: dict[str, str] = {}
self._patterns_loaded = False
# ------------------------------------------------------------------
# Lazy loaders
# ------------------------------------------------------------------
def _resolve_pattern_file(self) -> Path | None:
"""Resolve pattern file from constructor arg or env var."""
if self._pattern_file is not None:
return self._pattern_file
env_dir = os.environ.get("TURNSTONE_PATTERNS")
if env_dir:
return Path(env_dir) / "default.yaml"
return None
def _ensure_patterns_loaded(self) -> None:
"""Populate _pattern_severity from the pattern YAML file (once)."""
if self._patterns_loaded:
return
self._patterns_loaded = True
path = self._resolve_pattern_file()
if path is None:
return
from app.glean.base import load_patterns
patterns = load_patterns(path)
self._pattern_severity = {p.name: p.severity for p in patterns}
# ------------------------------------------------------------------
# Per-cluster classification helpers
# ------------------------------------------------------------------
def _classify_cluster_ml(self, cluster: EventCluster) -> SeverityLabel | None:
"""Attempt ML classification. Returns None on any inference failure."""
try:
pipe = _get_ml_classifier(self._model_id, self._device)
results = pipe(cluster.representative_text)
if not results:
return None
hit = results[0]
return _map_label(hit["label"], hit["score"], cluster.representative_text)
except Exception: # noqa: BLE001
logger.warning(
"ML inference failed for cluster %s — falling back",
cluster.cluster_id,
)
return None
def _classify_cluster_pattern_tags(
self, cluster: EventCluster
) -> SeverityLabel | None:
"""Derive severity from the cluster's pattern_tags. Returns None if no match."""
return _highest_from_tags(cluster.pattern_tags, self._pattern_severity)
def _classify_cluster_regex(self, cluster: EventCluster) -> SeverityLabel:
"""Classify by scanning representative_text with the severity regex."""
from app.glean.base import detect_severity
raw = detect_severity(cluster.representative_text)
if raw is None:
return "INFO"
return _LABEL_MAP.get(raw.upper(), "INFO") # type: ignore[return-value]
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def classify(self, timeline: TimelineResult) -> ClassifiedTimeline:
"""Classify every cluster in *timeline* and return a ``ClassifiedTimeline``."""
self._ensure_patterns_loaded()
# Determine which primary path governs this session
ml_available = bool(self._model_id)
patterns_available = bool(self._pattern_severity)
if ml_available:
classifier_used: str = "ml"
elif patterns_available:
classifier_used = "pattern_tags"
else:
classifier_used = "regex"
cluster_severities: dict[str, SeverityLabel] = {}
for cluster in timeline.clusters:
severity: SeverityLabel | None = None
if ml_available:
severity = self._classify_cluster_ml(cluster)
if severity is None and patterns_available:
severity = self._classify_cluster_pattern_tags(cluster)
if severity is None:
severity = self._classify_cluster_regex(cluster)
cluster_severities[cluster.cluster_id] = severity
return ClassifiedTimeline(
timeline=timeline,
cluster_severities=cluster_severities,
classifier_used=classifier_used, # type: ignore[arg-type]
model_id=self._model_id if ml_available else None,
)