From 86361f6c794ec51512cda6e71f6ce09186c78f35 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Mon, 25 May 2026 18:58:52 -0700 Subject: [PATCH] fix: invert suppress_threshold semantics to similarity_threshold in FalsePositiveSuppressor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Was suppressing when novelty_score < 0.85 (i.e. similarity > 0.15), which would suppress nearly every hypothesis once embeddings are active. Now suppresses when max_sim >= similarity_threshold (0.85), meaning only hypotheses that are 85%+ similar to a resolved incident are suppressed. Also renames suppress_threshold → similarity_threshold for clarity and adds a borderline boundary test (0.85 suppressed, 0.84 not suppressed). Closes: https://git.opensourcesolarpunk.com/Circuit-Forge/turnstone/issues/29 --- app/services/diagnose/suppressor.py | 8 +++-- tests/test_diagnose_suppressor.py | 53 ++++++++++++++++++++++++++--- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/app/services/diagnose/suppressor.py b/app/services/diagnose/suppressor.py index bdcaca5..a77d74d 100644 --- a/app/services/diagnose/suppressor.py +++ b/app/services/diagnose/suppressor.py @@ -113,12 +113,14 @@ class FalsePositiveSuppressor: self, model_id: str = "", device: str = "cpu", - suppress_threshold: float = 0.85, + similarity_threshold: float = 0.85, ) -> None: self._model_id = model_id self._device = device # _device stored for future use when get_embedder() supports device selection - self._suppress_threshold = suppress_threshold + # Suppress when cosine similarity to a known resolved incident >= threshold. + # A threshold of 0.85 means "suppress if 85%+ similar to something already resolved." + self._similarity_threshold = similarity_threshold def suppress( self, @@ -199,7 +201,7 @@ class FalsePositiveSuppressor: ) novelty_score = 1.0 - max_sim - suppress = bool(novelty_score < self._suppress_threshold) + suppress = bool(max_sim >= self._similarity_threshold) suppression_reason = ( f"Similar to resolved incident (similarity {max_sim:.2f})" if suppress diff --git a/tests/test_diagnose_suppressor.py b/tests/test_diagnose_suppressor.py index 348a05a..fb13e77 100644 --- a/tests/test_diagnose_suppressor.py +++ b/tests/test_diagnose_suppressor.py @@ -139,7 +139,7 @@ def test_high_similarity_suppresses_hypothesis(tmp_path): [("OOM killer", "Memory pressure caused OOM kill")], tmp_path / "turnstone.db", ) - suppressor = FalsePositiveSuppressor(model_id="test-model", suppress_threshold=0.85) + suppressor = FalsePositiveSuppressor(model_id="test-model", similarity_threshold=0.85) with patch.object(suppressor, "_load_embedder", return_value=mock_embedder): results = suppressor.suppress([_make_hypothesis()], db_path) @@ -171,7 +171,7 @@ def test_low_similarity_does_not_suppress(tmp_path): [("Disk I/O", "Storage saturation caused latency")], tmp_path / "turnstone.db", ) - suppressor = FalsePositiveSuppressor(model_id="test-model", suppress_threshold=0.85) + suppressor = FalsePositiveSuppressor(model_id="test-model", similarity_threshold=0.85) with patch.object(suppressor, "_load_embedder", return_value=mock_embedder): results = suppressor.suppress([_make_hypothesis()], db_path) @@ -184,6 +184,51 @@ def test_low_similarity_does_not_suppress(tmp_path): assert result.novelty_score == pytest.approx(1.0, abs=0.01) +# --------------------------------------------------------------------------- +# Test 3b: Borderline similarity — exactly at threshold vs. just below +# --------------------------------------------------------------------------- + +def test_similarity_threshold_boundary(tmp_path): + """similarity == threshold is suppressed; similarity just below threshold is not. + + This test locks down the boundary semantics: suppress when max_sim >= threshold, + not when novelty_score < threshold (the inverted form that was the original bug). + With threshold=0.85: + - similarity=0.85 → suppressed (at boundary, inclusive) + - similarity=0.84 → NOT suppressed (just below) + """ + db_path = _make_db_with_incidents( + [("Disk I/O", "Storage saturation caused latency")], + tmp_path / "turnstone.db", + ) + + # Corpus unit vector along first axis + corpus_vec = [1.0] + [0.0] * 383 + + for sim_value, expected_suppress in [(0.85, True), (0.84, False)]: + # Build a hypothesis embedding whose cosine similarity to corpus_vec ≈ sim_value. + # query = [sim, sqrt(1 - sim^2), 0, ...] → cosine sim = sim exactly. + import math + hyp_vec = [sim_value, math.sqrt(max(0.0, 1.0 - sim_value ** 2))] + [0.0] * 382 + + mock_embedder = _make_mock_embedder( + embed_return=hyp_vec, + embed_batch_return=[corpus_vec], + ) + + suppressor = FalsePositiveSuppressor(model_id="test-model", similarity_threshold=0.85) + + with patch.object(suppressor, "_load_embedder", return_value=mock_embedder): + results = suppressor.suppress([_make_hypothesis()], db_path) + + assert len(results) == 1 + result = results[0] + assert result.suppress is expected_suppress, ( + f"similarity={sim_value:.2f}: expected suppress={expected_suppress}, " + f"got suppress={result.suppress} (similarity_to_known={result.similarity_to_known:.4f})" + ) + + # --------------------------------------------------------------------------- # Test 4: Empty hypotheses list returns [] # --------------------------------------------------------------------------- @@ -259,7 +304,7 @@ def test_ranking_by_novelty_times_confidence(tmp_path): [("OOM", "Memory exhaustion")], tmp_path / "turnstone.db", ) - suppressor = FalsePositiveSuppressor(model_id="test-model", suppress_threshold=0.85) + suppressor = FalsePositiveSuppressor(model_id="test-model", similarity_threshold=0.85) with patch.object(suppressor, "_load_embedder", return_value=mock_embedder): results = suppressor.suppress([h_a, h_b, h_c], db_path) @@ -362,7 +407,7 @@ def test_corpus_cache_invalidated_on_corpus_change(tmp_path): mock_embedder.embed.return_value = single_m mock_embedder.embed_batch.side_effect = [[batch_m1], [batch_m2]] - suppressor = FalsePositiveSuppressor(model_id="test-model", suppress_threshold=0.85) + suppressor = FalsePositiveSuppressor(model_id="test-model", similarity_threshold=0.85) with patch.object(suppressor, "_load_embedder", return_value=mock_embedder): # First call — populates cache