fix: invert suppress_threshold semantics to similarity_threshold in FalsePositiveSuppressor
Was suppressing when novelty_score < 0.85 (i.e. similarity > 0.15), which would suppress nearly every hypothesis once embeddings are active. Now suppresses when max_sim >= similarity_threshold (0.85), meaning only hypotheses that are 85%+ similar to a resolved incident are suppressed. Also renames suppress_threshold → similarity_threshold for clarity and adds a borderline boundary test (0.85 suppressed, 0.84 not suppressed). Closes: #29
This commit is contained in:
parent
1b949337da
commit
25b7ae340b
2 changed files with 54 additions and 7 deletions
|
|
@ -113,12 +113,14 @@ class FalsePositiveSuppressor:
|
||||||
self,
|
self,
|
||||||
model_id: str = "",
|
model_id: str = "",
|
||||||
device: str = "cpu",
|
device: str = "cpu",
|
||||||
suppress_threshold: float = 0.85,
|
similarity_threshold: float = 0.85,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._model_id = model_id
|
self._model_id = model_id
|
||||||
self._device = device
|
self._device = device
|
||||||
# _device stored for future use when get_embedder() supports device selection
|
# _device stored for future use when get_embedder() supports device selection
|
||||||
self._suppress_threshold = suppress_threshold
|
# Suppress when cosine similarity to a known resolved incident >= threshold.
|
||||||
|
# A threshold of 0.85 means "suppress if 85%+ similar to something already resolved."
|
||||||
|
self._similarity_threshold = similarity_threshold
|
||||||
|
|
||||||
def suppress(
|
def suppress(
|
||||||
self,
|
self,
|
||||||
|
|
@ -199,7 +201,7 @@ class FalsePositiveSuppressor:
|
||||||
)
|
)
|
||||||
|
|
||||||
novelty_score = 1.0 - max_sim
|
novelty_score = 1.0 - max_sim
|
||||||
suppress = bool(novelty_score < self._suppress_threshold)
|
suppress = bool(max_sim >= self._similarity_threshold)
|
||||||
suppression_reason = (
|
suppression_reason = (
|
||||||
f"Similar to resolved incident (similarity {max_sim:.2f})"
|
f"Similar to resolved incident (similarity {max_sim:.2f})"
|
||||||
if suppress
|
if suppress
|
||||||
|
|
|
||||||
|
|
@ -139,7 +139,7 @@ def test_high_similarity_suppresses_hypothesis(tmp_path):
|
||||||
[("OOM killer", "Memory pressure caused OOM kill")],
|
[("OOM killer", "Memory pressure caused OOM kill")],
|
||||||
tmp_path / "turnstone.db",
|
tmp_path / "turnstone.db",
|
||||||
)
|
)
|
||||||
suppressor = FalsePositiveSuppressor(model_id="test-model", suppress_threshold=0.85)
|
suppressor = FalsePositiveSuppressor(model_id="test-model", similarity_threshold=0.85)
|
||||||
|
|
||||||
with patch.object(suppressor, "_load_embedder", return_value=mock_embedder):
|
with patch.object(suppressor, "_load_embedder", return_value=mock_embedder):
|
||||||
results = suppressor.suppress([_make_hypothesis()], db_path)
|
results = suppressor.suppress([_make_hypothesis()], db_path)
|
||||||
|
|
@ -171,7 +171,7 @@ def test_low_similarity_does_not_suppress(tmp_path):
|
||||||
[("Disk I/O", "Storage saturation caused latency")],
|
[("Disk I/O", "Storage saturation caused latency")],
|
||||||
tmp_path / "turnstone.db",
|
tmp_path / "turnstone.db",
|
||||||
)
|
)
|
||||||
suppressor = FalsePositiveSuppressor(model_id="test-model", suppress_threshold=0.85)
|
suppressor = FalsePositiveSuppressor(model_id="test-model", similarity_threshold=0.85)
|
||||||
|
|
||||||
with patch.object(suppressor, "_load_embedder", return_value=mock_embedder):
|
with patch.object(suppressor, "_load_embedder", return_value=mock_embedder):
|
||||||
results = suppressor.suppress([_make_hypothesis()], db_path)
|
results = suppressor.suppress([_make_hypothesis()], db_path)
|
||||||
|
|
@ -184,6 +184,51 @@ def test_low_similarity_does_not_suppress(tmp_path):
|
||||||
assert result.novelty_score == pytest.approx(1.0, abs=0.01)
|
assert result.novelty_score == pytest.approx(1.0, abs=0.01)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test 3b: Borderline similarity — exactly at threshold vs. just below
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_similarity_threshold_boundary(tmp_path):
|
||||||
|
"""similarity == threshold is suppressed; similarity just below threshold is not.
|
||||||
|
|
||||||
|
This test locks down the boundary semantics: suppress when max_sim >= threshold,
|
||||||
|
not when novelty_score < threshold (the inverted form that was the original bug).
|
||||||
|
With threshold=0.85:
|
||||||
|
- similarity=0.85 → suppressed (at boundary, inclusive)
|
||||||
|
- similarity=0.84 → NOT suppressed (just below)
|
||||||
|
"""
|
||||||
|
db_path = _make_db_with_incidents(
|
||||||
|
[("Disk I/O", "Storage saturation caused latency")],
|
||||||
|
tmp_path / "turnstone.db",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Corpus unit vector along first axis
|
||||||
|
corpus_vec = [1.0] + [0.0] * 383
|
||||||
|
|
||||||
|
for sim_value, expected_suppress in [(0.85, True), (0.84, False)]:
|
||||||
|
# Build a hypothesis embedding whose cosine similarity to corpus_vec ≈ sim_value.
|
||||||
|
# query = [sim, sqrt(1 - sim^2), 0, ...] → cosine sim = sim exactly.
|
||||||
|
import math
|
||||||
|
hyp_vec = [sim_value, math.sqrt(max(0.0, 1.0 - sim_value ** 2))] + [0.0] * 382
|
||||||
|
|
||||||
|
mock_embedder = _make_mock_embedder(
|
||||||
|
embed_return=hyp_vec,
|
||||||
|
embed_batch_return=[corpus_vec],
|
||||||
|
)
|
||||||
|
|
||||||
|
suppressor = FalsePositiveSuppressor(model_id="test-model", similarity_threshold=0.85)
|
||||||
|
|
||||||
|
with patch.object(suppressor, "_load_embedder", return_value=mock_embedder):
|
||||||
|
results = suppressor.suppress([_make_hypothesis()], db_path)
|
||||||
|
|
||||||
|
assert len(results) == 1
|
||||||
|
result = results[0]
|
||||||
|
assert result.suppress is expected_suppress, (
|
||||||
|
f"similarity={sim_value:.2f}: expected suppress={expected_suppress}, "
|
||||||
|
f"got suppress={result.suppress} (similarity_to_known={result.similarity_to_known:.4f})"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Test 4: Empty hypotheses list returns []
|
# Test 4: Empty hypotheses list returns []
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -259,7 +304,7 @@ def test_ranking_by_novelty_times_confidence(tmp_path):
|
||||||
[("OOM", "Memory exhaustion")],
|
[("OOM", "Memory exhaustion")],
|
||||||
tmp_path / "turnstone.db",
|
tmp_path / "turnstone.db",
|
||||||
)
|
)
|
||||||
suppressor = FalsePositiveSuppressor(model_id="test-model", suppress_threshold=0.85)
|
suppressor = FalsePositiveSuppressor(model_id="test-model", similarity_threshold=0.85)
|
||||||
|
|
||||||
with patch.object(suppressor, "_load_embedder", return_value=mock_embedder):
|
with patch.object(suppressor, "_load_embedder", return_value=mock_embedder):
|
||||||
results = suppressor.suppress([h_a, h_b, h_c], db_path)
|
results = suppressor.suppress([h_a, h_b, h_c], db_path)
|
||||||
|
|
@ -362,7 +407,7 @@ def test_corpus_cache_invalidated_on_corpus_change(tmp_path):
|
||||||
mock_embedder.embed.return_value = single_m
|
mock_embedder.embed.return_value = single_m
|
||||||
mock_embedder.embed_batch.side_effect = [[batch_m1], [batch_m2]]
|
mock_embedder.embed_batch.side_effect = [[batch_m1], [batch_m2]]
|
||||||
|
|
||||||
suppressor = FalsePositiveSuppressor(model_id="test-model", suppress_threshold=0.85)
|
suppressor = FalsePositiveSuppressor(model_id="test-model", similarity_threshold=0.85)
|
||||||
|
|
||||||
with patch.object(suppressor, "_load_embedder", return_value=mock_embedder):
|
with patch.object(suppressor, "_load_embedder", return_value=mock_embedder):
|
||||||
# First call — populates cache
|
# First call — populates cache
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue