From 2375e073bada1334f753de1885b6ed1fb1aed4f9 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Mon, 25 May 2026 19:11:32 -0700 Subject: [PATCH] feat(pipeline): add TURNSTONE_CLASSIFIER_MODEL env var for Stage 2 ML config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Makes the HuggingFace classifier model for Stage 2 configurable via TURNSTONE_CLASSIFIER_MODEL. When unset (default), Stage 2 falls back to pattern_tags then regex — no download required on first run. Also documents TURNSTONE_MULTI_AGENT_DIAGNOSE, TURNSTONE_CLASSIFIER_MODEL, TURNSTONE_EMBED_BACKEND/MODEL/DEVICE in .env.example. --- .env.example | 15 +++++++++++++++ app/services/diagnose/pipeline.py | 9 ++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index c6a152c..5199a4d 100644 --- a/.env.example +++ b/.env.example @@ -26,3 +26,18 @@ # --- Periodic batch glean --- # Seconds between automatic glean runs from sources.yaml. Set to 0 to disable. # TURNSTONE_GLEAN_INTERVAL=900 + +# --- Multi-agent diagnose pipeline (experimental) --- +# Enable the 5-stage ML pipeline instead of the single-LLM summarize() call. +# TURNSTONE_MULTI_AGENT_DIAGNOSE=true + +# Stage 2 — ML severity classifier (optional; falls back to pattern_tags then regex). +# Recommended: byviz/bylastic_classification_logs (~300MB, downloaded from HuggingFace) +# TURNSTONE_CLASSIFIER_MODEL=byviz/bylastic_classification_logs + +# Stage 4 — Embedding backend for false-positive suppression. +# sentence_transformers: in-process local model (downloads on first use) +# ollama: uses a running Ollama instance (no download needed if model is already pulled) +# TURNSTONE_EMBED_BACKEND=sentence_transformers +# TURNSTONE_EMBED_MODEL=BAAI/bge-small-en-v1.5 +# TURNSTONE_EMBED_DEVICE=cpu diff --git a/app/services/diagnose/pipeline.py b/app/services/diagnose/pipeline.py index a60952f..6539b8f 100644 --- a/app/services/diagnose/pipeline.py +++ b/app/services/diagnose/pipeline.py @@ -5,10 +5,17 @@ from __future__ import annotations import asyncio import dataclasses import logging +import os from collections.abc import AsyncGenerator from pathlib import Path from typing import Any +# Optional ML classifier model for Stage 2. +# When empty (default), Stage 2 falls back to pattern_tags then regex. +# Set TURNSTONE_CLASSIFIER_MODEL to a HuggingFace model ID to enable ML classification. +# Recommended: byviz/bylastic_classification_logs (DistilBERT, ~300MB) +_CLASSIFIER_MODEL: str = os.environ.get("TURNSTONE_CLASSIFIER_MODEL", "") + from app.context.retriever import RetrievedContext from app.services.diagnose.classifier import SeverityClassifier from app.services.diagnose.hypothesizer import RootCauseHypothesizer @@ -74,7 +81,7 @@ async def run_pipeline( # Stage 2: Severity classification try: classified = await asyncio.to_thread( - SeverityClassifier().classify, timeline + SeverityClassifier(model_id=_CLASSIFIER_MODEL).classify, timeline ) except Exception as exc: logger.exception("Stage 2 (classifier) failed: %s", exc)