"""Tests for app/services/diagnose/suppressor.py — FalsePositiveSuppressor. All tests use mocking; no real model downloads are made. """ from __future__ import annotations import sqlite3 import tempfile from pathlib import Path from unittest.mock import MagicMock, patch import pytest import app.services.diagnose.suppressor as sup_module from app.services.diagnose.models import Hypothesis, RankedHypothesis from app.services.diagnose.suppressor import FalsePositiveSuppressor # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _make_hypothesis( title: str = "Test", description: str = "A test hypothesis.", confidence: float = 0.8, severity: str = "ERROR", ) -> Hypothesis: return Hypothesis( hypothesis_id="test-id", title=title, description=description, confidence=confidence, supporting_cluster_ids=(), runbook_refs=(), severity=severity, # type: ignore[arg-type] ) def _make_db_with_incidents(incidents: list[tuple[str, str]]) -> Path: """Create a temporary SQLite database with resolved incidents. Returns the db path.""" tmp = tempfile.mktemp(suffix=".db") db_path = Path(tmp) with sqlite3.connect(str(db_path)) as conn: conn.execute( "CREATE TABLE incidents " "(id INTEGER PRIMARY KEY, label TEXT, notes TEXT, ended_at TEXT)" ) for label, notes in incidents: conn.execute( "INSERT INTO incidents (label, notes, ended_at) VALUES (?, ?, ?)", (label, notes, "2024-01-01T00:00:00"), ) conn.commit() return db_path def _make_empty_db() -> Path: """Create a temporary SQLite DB with no incidents table.""" tmp = tempfile.mktemp(suffix=".db") db_path = Path(tmp) with sqlite3.connect(str(db_path)) as conn: conn.execute("CREATE TABLE unrelated (id INTEGER PRIMARY KEY)") conn.commit() return db_path def _make_mock_embedder( embed_return: list[float] | None = None, embed_batch_return: list[list[float]] | None = None, ) -> MagicMock: """Build a mock embedder with controllable embed/embed_batch responses.""" embedder = MagicMock() # Default: unit vector along first dimension default_vec = [1.0] + [0.0] * 383 raw_single = embed_return if embed_return is not None else default_vec raw_batch = embed_batch_return if embed_batch_return is not None else [default_vec] # Wrap scalars in numpy-like MagicMock with .tolist() def _wrap(vec: list[float]) -> MagicMock: m = MagicMock() m.tolist.return_value = vec return m embedder.embed.return_value = _wrap(raw_single) embedder.embed_batch.return_value = [_wrap(v) for v in raw_batch] return embedder # --------------------------------------------------------------------------- # Autouse fixture: reset module-level cache between tests # --------------------------------------------------------------------------- @pytest.fixture(autouse=True) def reset_suppressor_cache(): sup_module._corpus_cache.clear() yield sup_module._corpus_cache.clear() # --------------------------------------------------------------------------- # Test 1: No model configured — passthrough, ranked by confidence # --------------------------------------------------------------------------- def test_no_model_passthrough_ranked_by_confidence(): """model_id='' → all novelty_score=1.0, suppress=False, ranked by confidence desc.""" h_low = _make_hypothesis(title="Low", confidence=0.3) h_high = _make_hypothesis(title="High", confidence=0.9) h_mid = _make_hypothesis(title="Mid", confidence=0.6) db_path = Path(tempfile.mktemp(suffix=".db")) suppressor = FalsePositiveSuppressor(model_id="") results = suppressor.suppress([h_low, h_high, h_mid], db_path) assert len(results) == 3 assert all(isinstance(r, RankedHypothesis) for r in results) assert all(r.novelty_score == pytest.approx(1.0) for r in results) assert all(r.similarity_to_known == pytest.approx(0.0) for r in results) assert all(r.suppress is False for r in results) assert all(r.suppression_reason is None for r in results) # Ranked by confidence descending confidences = [r.hypothesis.confidence for r in results] assert confidences == sorted(confidences, reverse=True) # --------------------------------------------------------------------------- # Test 2: High similarity → suppressed # --------------------------------------------------------------------------- def test_high_similarity_suppresses_hypothesis(): """Hypothesis with embedding nearly identical to corpus → suppress=True.""" identical_vec = [1.0] + [0.0] * 383 corpus_vec = [1.0] + [0.0] * 383 # cosine similarity = 1.0 mock_embedder = _make_mock_embedder( embed_return=identical_vec, embed_batch_return=[corpus_vec], ) db_path = _make_db_with_incidents([("OOM killer", "Memory pressure caused OOM kill")]) suppressor = FalsePositiveSuppressor(model_id="test-model", suppress_threshold=0.85) with patch.object(suppressor, "_load_embedder", return_value=mock_embedder): results = suppressor.suppress([_make_hypothesis()], db_path) assert len(results) == 1 result = results[0] assert result.suppress is True assert result.suppression_reason is not None assert "Similar to resolved incident" in result.suppression_reason assert result.similarity_to_known == pytest.approx(1.0, abs=0.01) assert result.novelty_score == pytest.approx(0.0, abs=0.01) # --------------------------------------------------------------------------- # Test 3: Low similarity → not suppressed # --------------------------------------------------------------------------- def test_low_similarity_does_not_suppress(): """Hypothesis with embedding orthogonal to corpus → suppress=False.""" hypothesis_vec = [1.0] + [0.0] * 383 corpus_vec = [0.0, 1.0] + [0.0] * 382 # orthogonal → similarity = 0.0 mock_embedder = _make_mock_embedder( embed_return=hypothesis_vec, embed_batch_return=[corpus_vec], ) db_path = _make_db_with_incidents([("Disk I/O", "Storage saturation caused latency")]) suppressor = FalsePositiveSuppressor(model_id="test-model", suppress_threshold=0.85) with patch.object(suppressor, "_load_embedder", return_value=mock_embedder): results = suppressor.suppress([_make_hypothesis()], db_path) assert len(results) == 1 result = results[0] assert result.suppress is False assert result.suppression_reason is None assert result.similarity_to_known == pytest.approx(0.0, abs=0.01) assert result.novelty_score == pytest.approx(1.0, abs=0.01) # --------------------------------------------------------------------------- # Test 4: Empty hypotheses list returns [] # --------------------------------------------------------------------------- def test_empty_hypotheses_returns_empty(): """suppress([]) → [] regardless of model or db state.""" db_path = Path(tempfile.mktemp(suffix=".db")) suppressor = FalsePositiveSuppressor(model_id="test-model") results = suppressor.suppress([], db_path) assert results == [] # --------------------------------------------------------------------------- # Test 5: Ranking by novelty_score * confidence # --------------------------------------------------------------------------- def test_ranking_by_novelty_times_confidence(): """Results are sorted by novelty_score * confidence descending.""" # Hypothesis A: novelty=0.9, confidence=0.5 → score=0.45 # Hypothesis B: novelty=0.5, confidence=0.9 → score=0.45 (tie, order stable-ish) # Hypothesis C: novelty=0.8, confidence=0.9 → score=0.72 (highest) # Expected order: C, then A or B # We'll use orthogonal embeddings to get predictable similarities. # Corpus has 3 incidents with different embeddings. # We'll control novelty_score by setting similarity carefully. # Simplest: set up so each hypothesis gets a specific similarity to its corpus. # corpus_embs[0] = [1,0,0,...], [0,1,0,...], [0,0,1,...] — unit vectors # hyp A embed = [cos(0.1), sin(0.1), 0...] → sim to corpus[0] = cos(0.1) ≈ 0.995 high # This gets complex. Instead, mock _load_embedder to return None and rely # on passthrough with controlled confidence, then verify confidence-based ranking. # Then do a second test variant with manual novelty injection via embed return values. # Simpler approach: create 3 hypotheses and verify output is sorted correctly # by providing distinct embeddings that produce known similarities. import math # Corpus: single vector [1, 0, 0, ...] corpus_vec = [1.0] + [0.0] * 383 # H_A: similarity = 0.1 → novelty = 0.9, confidence = 0.5 → score = 0.45 angle_a = math.acos(0.1) vec_a = [0.1, math.sin(angle_a)] + [0.0] * 382 # H_B: similarity = 0.5 → novelty = 0.5, confidence = 0.9 → score = 0.45 angle_b = math.acos(0.5) vec_b = [0.5, math.sin(angle_b)] + [0.0] * 382 # H_C: similarity = 0.2 → novelty = 0.8, confidence = 0.9 → score = 0.72 (highest) angle_c = math.acos(0.2) vec_c = [0.2, math.sin(angle_c)] + [0.0] * 382 h_a = _make_hypothesis(title="A", confidence=0.5) h_b = _make_hypothesis(title="B", confidence=0.9) h_c = _make_hypothesis(title="C", confidence=0.9) call_count = [0] vecs_in_order = [vec_a, vec_b, vec_c] def side_effect_embed(text: str) -> MagicMock: m = MagicMock() m.tolist.return_value = vecs_in_order[call_count[0] % len(vecs_in_order)] call_count[0] += 1 return m mock_embedder = MagicMock() batch_m = MagicMock() batch_m.tolist.return_value = corpus_vec mock_embedder.embed_batch.return_value = [batch_m] mock_embedder.embed.side_effect = side_effect_embed db_path = _make_db_with_incidents([("OOM", "Memory exhaustion")]) suppressor = FalsePositiveSuppressor(model_id="test-model", suppress_threshold=0.85) with patch.object(suppressor, "_load_embedder", return_value=mock_embedder): results = suppressor.suppress([h_a, h_b, h_c], db_path) assert len(results) == 3 titles = [r.hypothesis.title for r in results] # H_C should be first (highest novelty*confidence score) assert titles[0] == "C", f"Expected C first, got {titles}" # Verify sort is descending by novelty*confidence scores = [r.novelty_score * r.hypothesis.confidence for r in results] assert scores == sorted(scores, reverse=True) # --------------------------------------------------------------------------- # Test 6: DB with no resolved incidents → novelty_score=1.0 # --------------------------------------------------------------------------- def test_no_resolved_incidents_in_db_passthrough(): """When incidents table is empty, all hypotheses get novelty_score=1.0.""" db_path = _make_db_with_incidents([]) # table exists but zero rows mock_embedder = _make_mock_embedder() suppressor = FalsePositiveSuppressor(model_id="test-model") with patch.object(suppressor, "_load_embedder", return_value=mock_embedder): results = suppressor.suppress([_make_hypothesis()], db_path) assert len(results) == 1 assert results[0].novelty_score == pytest.approx(1.0) assert results[0].suppress is False # embed_batch should NOT have been called (empty corpus short-circuits) mock_embedder.embed_batch.assert_not_called() # --------------------------------------------------------------------------- # Test 7: DB query failure → graceful fallback, no crash # --------------------------------------------------------------------------- def test_db_query_failure_graceful_fallback(): """When the incidents table is missing, suppress() returns passthrough without raising.""" db_path = _make_empty_db() # no 'incidents' table mock_embedder = _make_mock_embedder() suppressor = FalsePositiveSuppressor(model_id="test-model") with patch.object(suppressor, "_load_embedder", return_value=mock_embedder): results = suppressor.suppress([_make_hypothesis()], db_path) assert len(results) == 1 assert results[0].novelty_score == pytest.approx(1.0) assert results[0].suppress is False # --------------------------------------------------------------------------- # Test 8: Embedding service unavailable (returns None) → graceful fallback # --------------------------------------------------------------------------- def test_embedding_service_unavailable_passthrough(): """When get_embedder() returns None, suppress() falls back without crashing.""" db_path = _make_db_with_incidents([("OOM", "Memory pressure")]) suppressor = FalsePositiveSuppressor(model_id="test-model") with patch.object(suppressor, "_load_embedder", return_value=None): results = suppressor.suppress([_make_hypothesis(confidence=0.7)], db_path) assert len(results) == 1 assert results[0].novelty_score == pytest.approx(1.0) assert results[0].suppress is False assert results[0].suppression_reason is None # --------------------------------------------------------------------------- # Test 9: Corpus cache invalidated when corpus changes # --------------------------------------------------------------------------- def test_corpus_cache_invalidated_on_corpus_change(): """When the corpus changes between calls, embed_batch is called again.""" # First DB: one incident db_path = _make_db_with_incidents([("OOM", "Memory pressure")]) corpus_vec_1 = [1.0] + [0.0] * 383 corpus_vec_2 = [0.0, 1.0] + [0.0] * 382 hyp_vec = [1.0] + [0.0] * 383 # embedder will be called twice for embed_batch (different corpus each time) mock_embedder = MagicMock() single_m = MagicMock() single_m.tolist.return_value = hyp_vec batch_m1 = MagicMock() batch_m1.tolist.return_value = corpus_vec_1 batch_m2 = MagicMock() batch_m2.tolist.return_value = corpus_vec_2 mock_embedder.embed.return_value = single_m mock_embedder.embed_batch.side_effect = [[batch_m1], [batch_m2]] suppressor = FalsePositiveSuppressor(model_id="test-model", suppress_threshold=0.85) with patch.object(suppressor, "_load_embedder", return_value=mock_embedder): # First call — populates cache results_1 = suppressor.suppress([_make_hypothesis()], db_path) assert mock_embedder.embed_batch.call_count == 1 # Mutate the DB to add a second incident (changes corpus) with sqlite3.connect(str(db_path)) as conn: conn.execute( "INSERT INTO incidents (label, notes, ended_at) VALUES (?, ?, ?)", ("Disk I/O", "Storage saturation", "2024-01-02T00:00:00"), ) conn.commit() # Second call — corpus changed, should re-embed results_2 = suppressor.suppress([_make_hypothesis()], db_path) assert mock_embedder.embed_batch.call_count == 2, ( "embed_batch should be called again when corpus changes" ) assert len(results_1) == 1 assert len(results_2) == 1