"""Background anomaly scoring task. Runs score_unscored() after each glean cycle (triggered by glean_scheduler) or on its own interval when TURNSTONE_ANOMALY_INTERVAL is set. Set TURNSTONE_ANOMALY_MODEL to a HuggingFace model ID to activate. When the env var is empty (default) the scorer is a no-op. """ from __future__ import annotations import asyncio import logging import os from dataclasses import dataclass, field from datetime import datetime, timedelta, timezone from pathlib import Path from app.services.anomaly import ScoringResult, score_unscored logger = logging.getLogger(__name__) _DEFAULT_INTERVAL = int(os.environ.get("TURNSTONE_ANOMALY_INTERVAL", "0")) _lock = asyncio.Lock() @dataclass class ScorerState: last_run_at: str | None = None last_duration_s: float | None = None last_scored: int = 0 last_detections: int = 0 last_error: str | None = None run_count: int = 0 next_run_at: str | None = None running: bool = False total_scored: int = 0 total_detections: int = 0 _state = ScorerState() def get_state() -> ScorerState: return _state async def run_once( db_path: Path, model_id: str = "", device: str = "cpu", batch_size: int = 256, threshold: float = 0.75, ) -> ScoringResult: """Score unscored entries once. Skips if already running or model not configured.""" if _lock.locked(): return ScoringResult(skipped=True, error="scorer already running") async with _lock: _state.running = True started = datetime.now(tz=timezone.utc) try: loop = asyncio.get_running_loop() result: ScoringResult = await loop.run_in_executor( None, lambda: score_unscored(db_path, model_id, device, batch_size, threshold), ) duration = (datetime.now(tz=timezone.utc) - started).total_seconds() _state.last_run_at = started.isoformat() _state.last_duration_s = round(duration, 2) _state.last_scored = result.scored _state.last_detections = result.detections _state.last_error = result.error _state.run_count += 1 _state.total_scored += result.scored _state.total_detections += result.detections if not result.skipped: logger.info( "Anomaly scorer: %d scored, %d detections in %.1fs", result.scored, result.detections, duration, ) return result except Exception as exc: duration = (datetime.now(tz=timezone.utc) - started).total_seconds() _state.last_run_at = started.isoformat() _state.last_duration_s = round(duration, 2) _state.last_error = str(exc) _state.run_count += 1 logger.error("Anomaly scorer failed: %s", exc) return ScoringResult(error=str(exc)) finally: _state.running = False async def scorer_loop( db_path: Path, model_id: str, device: str, interval_s: int, batch_size: int = 256, threshold: float = 0.75, ) -> None: """Score unscored entries every interval_s seconds until cancelled.""" logger.info("Anomaly scorer loop started — interval %ds, model: %s", interval_s, model_id) while True: await run_once(db_path, model_id, device, batch_size, threshold) next_run = datetime.now(tz=timezone.utc) + timedelta(seconds=interval_s) _state.next_run_at = next_run.isoformat() try: await asyncio.sleep(interval_s) except asyncio.CancelledError: logger.info("Anomaly scorer loop cancelled") _state.next_run_at = None raise