- Add app/services/anomaly.py: batch scorer using HF text-classification
pipeline; rewrites anomaly_score/anomaly_label/anomaly_scored_at on
log_entries; inserts high-confidence hits into detections table
- Add app/tasks/anomaly_scorer.py: background task (same shape as
glean_scheduler); triggered after each glean cycle when
TURNSTONE_ANOMALY_MODEL is set
- DB schema: add anomaly_score/anomaly_label/anomaly_scored_at columns to
log_entries (idempotent ALTER TABLE migration); add detections table
- Wire scorer into scheduler_loop and glean_scheduler.run_once; no-op when
model env var is empty (safe to leave unconfigured)
- REST endpoints: GET/POST /api/anomaly/status, /api/anomaly/run,
GET /api/anomaly/detections, POST /api/anomaly/detections/{id}/acknowledge
- Reuses Hybrid-BERT label map from diagnose/classifier.py; works with any
HF text-classification model
- 12 new tests; 406/406 passing
Closes: #10
114 lines
3.7 KiB
Python
114 lines
3.7 KiB
Python
"""Background anomaly scoring task.
|
|
|
|
Runs score_unscored() after each glean cycle (triggered by glean_scheduler)
|
|
or on its own interval when TURNSTONE_ANOMALY_INTERVAL is set.
|
|
|
|
Set TURNSTONE_ANOMALY_MODEL to a HuggingFace model ID to activate.
|
|
When the env var is empty (default) the scorer is a no-op.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import os
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
|
|
from app.services.anomaly import ScoringResult, score_unscored
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_DEFAULT_INTERVAL = int(os.environ.get("TURNSTONE_ANOMALY_INTERVAL", "0"))
|
|
|
|
_lock = asyncio.Lock()
|
|
|
|
|
|
@dataclass
|
|
class ScorerState:
|
|
last_run_at: str | None = None
|
|
last_duration_s: float | None = None
|
|
last_scored: int = 0
|
|
last_detections: int = 0
|
|
last_error: str | None = None
|
|
run_count: int = 0
|
|
next_run_at: str | None = None
|
|
running: bool = False
|
|
total_scored: int = 0
|
|
total_detections: int = 0
|
|
|
|
|
|
_state = ScorerState()
|
|
|
|
|
|
def get_state() -> ScorerState:
|
|
return _state
|
|
|
|
|
|
async def run_once(
|
|
db_path: Path,
|
|
model_id: str = "",
|
|
device: str = "cpu",
|
|
batch_size: int = 256,
|
|
threshold: float = 0.75,
|
|
) -> ScoringResult:
|
|
"""Score unscored entries once. Skips if already running or model not configured."""
|
|
if _lock.locked():
|
|
return ScoringResult(skipped=True, error="scorer already running")
|
|
|
|
async with _lock:
|
|
_state.running = True
|
|
started = datetime.now(tz=timezone.utc)
|
|
try:
|
|
loop = asyncio.get_running_loop()
|
|
result: ScoringResult = await loop.run_in_executor(
|
|
None,
|
|
lambda: score_unscored(db_path, model_id, device, batch_size, threshold),
|
|
)
|
|
duration = (datetime.now(tz=timezone.utc) - started).total_seconds()
|
|
_state.last_run_at = started.isoformat()
|
|
_state.last_duration_s = round(duration, 2)
|
|
_state.last_scored = result.scored
|
|
_state.last_detections = result.detections
|
|
_state.last_error = result.error
|
|
_state.run_count += 1
|
|
_state.total_scored += result.scored
|
|
_state.total_detections += result.detections
|
|
if not result.skipped:
|
|
logger.info(
|
|
"Anomaly scorer: %d scored, %d detections in %.1fs",
|
|
result.scored, result.detections, duration,
|
|
)
|
|
return result
|
|
except Exception as exc:
|
|
duration = (datetime.now(tz=timezone.utc) - started).total_seconds()
|
|
_state.last_run_at = started.isoformat()
|
|
_state.last_duration_s = round(duration, 2)
|
|
_state.last_error = str(exc)
|
|
_state.run_count += 1
|
|
logger.error("Anomaly scorer failed: %s", exc)
|
|
return ScoringResult(error=str(exc))
|
|
finally:
|
|
_state.running = False
|
|
|
|
|
|
async def scorer_loop(
|
|
db_path: Path,
|
|
model_id: str,
|
|
device: str,
|
|
interval_s: int,
|
|
batch_size: int = 256,
|
|
threshold: float = 0.75,
|
|
) -> None:
|
|
"""Score unscored entries every interval_s seconds until cancelled."""
|
|
logger.info("Anomaly scorer loop started — interval %ds, model: %s", interval_s, model_id)
|
|
while True:
|
|
await run_once(db_path, model_id, device, batch_size, threshold)
|
|
next_run = datetime.now(tz=timezone.utc) + timedelta(seconds=interval_s)
|
|
_state.next_run_at = next_run.isoformat()
|
|
try:
|
|
await asyncio.sleep(interval_s)
|
|
except asyncio.CancelledError:
|
|
logger.info("Anomaly scorer loop cancelled")
|
|
_state.next_run_at = None
|
|
raise
|