turnstone/app/tasks/cybersec_scorer.py

"""Background task wrapper for the cybersec zero-shot scoring pipeline."""
from __future__ import annotations

import asyncio
import logging
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path

from app.services.cybersec import score_security_entries

logger = logging.getLogger(__name__)

_lock = asyncio.Lock()


@dataclass
class CybersecState:
    last_run_at: str | None = None
    last_duration_s: float | None = None
    last_scored: int = 0
    last_detections: int = 0
    last_error: str | None = None
    run_count: int = 0
    running: bool = False
    total_scored: int = 0
    total_detections: int = 0


_state = CybersecState()


def get_state() -> dict:
    return {
        "last_run_at":    _state.last_run_at,
        "last_duration_s":_state.last_duration_s,
        "last_scored":    _state.last_scored,
        "last_detections":_state.last_detections,
        "last_error":     _state.last_error,
        "run_count":      _state.run_count,
        "running":        _state.running,
        "total_scored":   _state.total_scored,
        "total_detections": _state.total_detections,
    }


async def run_once(
    db_path: Path,
    model_id: str,
    device: str = "cpu",
    batch_size: int = 32,
    threshold: float = 0.60,
) -> None:
    """Single cybersec scoring pass — no-op if already running or no model set."""
    if not model_id or _lock.locked():
        return

    async with _lock:
        _state.running = True
        started = datetime.now(tz=timezone.utc)
        try:
            loop = asyncio.get_running_loop()
            result = await loop.run_in_executor(
                None,
                lambda: score_security_entries(db_path, model_id, device, batch_size, threshold),
            )
            elapsed = (datetime.now(tz=timezone.utc) - started).total_seconds()
            _state.last_run_at     = started.isoformat()
            _state.last_duration_s = elapsed
            _state.last_scored     = result.scored
            _state.last_detections = result.detections
            _state.last_error      = result.error
            _state.run_count      += 1
            _state.total_scored   += result.scored
            _state.total_detections += result.detections
            if result.error:
                logger.error("cybersec scorer error: %s", result.error)
            elif not result.skipped:
                logger.info(
                    "cybersec scorer: scored=%d detections=%d in %.1fs",
                    result.scored, result.detections, elapsed,
                )
        finally:
            _state.running = False