refactor: pipeline cleanup — 6 follow-up fixes (#33–#38) #40

Merged
pyr0ball merged 3 commits from feat/pipeline-cleanup into main 2026-05-25 20:00:11 -07:00
2 changed files with 23 additions and 1 deletions
Showing only changes of commit 4a2fd0fb0d - Show all commits

View file

@ -26,3 +26,18 @@
# --- Periodic batch glean --- # --- Periodic batch glean ---
# Seconds between automatic glean runs from sources.yaml. Set to 0 to disable. # Seconds between automatic glean runs from sources.yaml. Set to 0 to disable.
# TURNSTONE_GLEAN_INTERVAL=900 # TURNSTONE_GLEAN_INTERVAL=900
# --- Multi-agent diagnose pipeline (experimental) ---
# Enable the 5-stage ML pipeline instead of the single-LLM summarize() call.
# TURNSTONE_MULTI_AGENT_DIAGNOSE=true
# Stage 2 — ML severity classifier (optional; falls back to pattern_tags then regex).
# Recommended: byviz/bylastic_classification_logs (~300MB, downloaded from HuggingFace)
# TURNSTONE_CLASSIFIER_MODEL=byviz/bylastic_classification_logs
# Stage 4 — Embedding backend for false-positive suppression.
# sentence_transformers: in-process local model (downloads on first use)
# ollama: uses a running Ollama instance (no download needed if model is already pulled)
# TURNSTONE_EMBED_BACKEND=sentence_transformers
# TURNSTONE_EMBED_MODEL=BAAI/bge-small-en-v1.5
# TURNSTONE_EMBED_DEVICE=cpu

View file

@ -5,10 +5,17 @@ from __future__ import annotations
import asyncio import asyncio
import dataclasses import dataclasses
import logging import logging
import os
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
# Optional ML classifier model for Stage 2.
# When empty (default), Stage 2 falls back to pattern_tags then regex.
# Set TURNSTONE_CLASSIFIER_MODEL to a HuggingFace model ID to enable ML classification.
# Recommended: byviz/bylastic_classification_logs (DistilBERT, ~300MB)
_CLASSIFIER_MODEL: str = os.environ.get("TURNSTONE_CLASSIFIER_MODEL", "")
from app.context.retriever import RetrievedContext from app.context.retriever import RetrievedContext
from app.services.diagnose.classifier import SeverityClassifier from app.services.diagnose.classifier import SeverityClassifier
from app.services.diagnose.hypothesizer import RootCauseHypothesizer from app.services.diagnose.hypothesizer import RootCauseHypothesizer
@ -74,7 +81,7 @@ async def run_pipeline(
# Stage 2: Severity classification # Stage 2: Severity classification
try: try:
classified = await asyncio.to_thread( classified = await asyncio.to_thread(
SeverityClassifier().classify, timeline SeverityClassifier(model_id=_CLASSIFIER_MODEL).classify, timeline
) )
except Exception as exc: except Exception as exc:
logger.exception("Stage 2 (classifier) failed: %s", exc) logger.exception("Stage 2 (classifier) failed: %s", exc)