feat(pipeline): add TURNSTONE_CLASSIFIER_MODEL env var for Stage 2 ML config
Makes the HuggingFace classifier model for Stage 2 configurable via TURNSTONE_CLASSIFIER_MODEL. When unset (default), Stage 2 falls back to pattern_tags then regex — no download required on first run. Also documents TURNSTONE_MULTI_AGENT_DIAGNOSE, TURNSTONE_CLASSIFIER_MODEL, TURNSTONE_EMBED_BACKEND/MODEL/DEVICE in .env.example.
This commit is contained in:
parent
7816ceba61
commit
ddd95137a8
2 changed files with 23 additions and 1 deletions
15
.env.example
15
.env.example
|
|
@ -26,3 +26,18 @@
|
||||||
# --- Periodic batch glean ---
|
# --- Periodic batch glean ---
|
||||||
# Seconds between automatic glean runs from sources.yaml. Set to 0 to disable.
|
# Seconds between automatic glean runs from sources.yaml. Set to 0 to disable.
|
||||||
# TURNSTONE_GLEAN_INTERVAL=900
|
# TURNSTONE_GLEAN_INTERVAL=900
|
||||||
|
|
||||||
|
# --- Multi-agent diagnose pipeline (experimental) ---
|
||||||
|
# Enable the 5-stage ML pipeline instead of the single-LLM summarize() call.
|
||||||
|
# TURNSTONE_MULTI_AGENT_DIAGNOSE=true
|
||||||
|
|
||||||
|
# Stage 2 — ML severity classifier (optional; falls back to pattern_tags then regex).
|
||||||
|
# Recommended: byviz/bylastic_classification_logs (~300MB, downloaded from HuggingFace)
|
||||||
|
# TURNSTONE_CLASSIFIER_MODEL=byviz/bylastic_classification_logs
|
||||||
|
|
||||||
|
# Stage 4 — Embedding backend for false-positive suppression.
|
||||||
|
# sentence_transformers: in-process local model (downloads on first use)
|
||||||
|
# ollama: uses a running Ollama instance (no download needed if model is already pulled)
|
||||||
|
# TURNSTONE_EMBED_BACKEND=sentence_transformers
|
||||||
|
# TURNSTONE_EMBED_MODEL=BAAI/bge-small-en-v1.5
|
||||||
|
# TURNSTONE_EMBED_DEVICE=cpu
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,17 @@ from __future__ import annotations
|
||||||
import asyncio
|
import asyncio
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
from collections.abc import AsyncGenerator
|
from collections.abc import AsyncGenerator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
# Optional ML classifier model for Stage 2.
|
||||||
|
# When empty (default), Stage 2 falls back to pattern_tags then regex.
|
||||||
|
# Set TURNSTONE_CLASSIFIER_MODEL to a HuggingFace model ID to enable ML classification.
|
||||||
|
# Recommended: byviz/bylastic_classification_logs (DistilBERT, ~300MB)
|
||||||
|
_CLASSIFIER_MODEL: str = os.environ.get("TURNSTONE_CLASSIFIER_MODEL", "")
|
||||||
|
|
||||||
from app.context.retriever import RetrievedContext
|
from app.context.retriever import RetrievedContext
|
||||||
from app.services.diagnose.classifier import SeverityClassifier
|
from app.services.diagnose.classifier import SeverityClassifier
|
||||||
from app.services.diagnose.hypothesizer import RootCauseHypothesizer
|
from app.services.diagnose.hypothesizer import RootCauseHypothesizer
|
||||||
|
|
@ -74,7 +81,7 @@ async def run_pipeline(
|
||||||
# Stage 2: Severity classification
|
# Stage 2: Severity classification
|
||||||
try:
|
try:
|
||||||
classified = await asyncio.to_thread(
|
classified = await asyncio.to_thread(
|
||||||
SeverityClassifier().classify, timeline
|
SeverityClassifier(model_id=_CLASSIFIER_MODEL).classify, timeline
|
||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.exception("Stage 2 (classifier) failed: %s", exc)
|
logger.exception("Stage 2 (classifier) failed: %s", exc)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue