fix(classifier): quality fixes for DEFAULT_EXEMPLARS — remove forward __all__ entry, tighten tests, fix survey exemplar

This commit is contained in:
pyr0ball 2026-05-04 20:03:18 -07:00
parent 3be5055e31
commit c177fb1628
2 changed files with 14 additions and 3 deletions

View file

@ -20,7 +20,6 @@ __all__ = [
"GLiClassAdapter", "GLiClassAdapter",
"RerankerAdapter", "RerankerAdapter",
"FineTunedAdapter", "FineTunedAdapter",
"EmbeddingKNNAdapter",
] ]
LABELS: list[str] = [ LABELS: list[str] = [
@ -158,7 +157,7 @@ DEFAULT_EXEMPLARS: dict[str, list[str]] = {
"Subject: Culture Fit Assessment\n\nAs part of our process, we ask all candidates to complete a short assessment.", "Subject: Culture Fit Assessment\n\nAs part of our process, we ask all candidates to complete a short assessment.",
"Subject: Skills Assessment\n\nWe'd like you to complete our online coding assessment before proceeding.", "Subject: Skills Assessment\n\nWe'd like you to complete our online coding assessment before proceeding.",
"Subject: Personality Assessment\n\nPlease complete the following assessment as the next step in our process.", "Subject: Personality Assessment\n\nPlease complete the following assessment as the next step in our process.",
"Subject: Interview Feedback Survey\n\nThank you for interviewing — your feedback helps us improve our process.", "Subject: Pre-interview questionnaire\n\nBefore we schedule your interview, please complete this brief skills survey.",
], ],
"neutral": [ "neutral": [
"Subject: Application Received\n\nWe have received your application and will be in touch.", "Subject: Application Received\n\nWe have received your application and will be in touch.",
@ -194,6 +193,7 @@ DEFAULT_EXEMPLARS: dict[str, list[str]] = {
], ],
} }
class ClassifierAdapter(abc.ABC): class ClassifierAdapter(abc.ABC):
"""Abstract base for all email classifier adapters.""" """Abstract base for all email classifier adapters."""

View file

@ -301,7 +301,7 @@ def test_default_exemplars_covers_all_labels():
from scripts.classifier_adapters import DEFAULT_EXEMPLARS, LABELS from scripts.classifier_adapters import DEFAULT_EXEMPLARS, LABELS
for label in LABELS: for label in LABELS:
assert label in DEFAULT_EXEMPLARS, f"DEFAULT_EXEMPLARS missing label: {label}" assert label in DEFAULT_EXEMPLARS, f"DEFAULT_EXEMPLARS missing label: {label}"
assert len(DEFAULT_EXEMPLARS[label]) >= 1, f"{label} has no exemplar texts" assert len(DEFAULT_EXEMPLARS[label]) >= 4, f"{label} needs >= 4 exemplars for k=3 voting"
def test_default_exemplars_sparse_labels_have_at_least_four(): def test_default_exemplars_sparse_labels_have_at_least_four():
@ -311,3 +311,14 @@ def test_default_exemplars_sparse_labels_have_at_least_four():
assert len(DEFAULT_EXEMPLARS[label]) >= 4, ( assert len(DEFAULT_EXEMPLARS[label]) >= 4, (
f"{label} needs >= 4 exemplars for k=3 voting to work reliably" f"{label} needs >= 4 exemplars for k=3 voting to work reliably"
) )
def test_default_exemplars_strings_are_formatted_correctly():
from scripts.classifier_adapters import DEFAULT_EXEMPLARS
for label, texts in DEFAULT_EXEMPLARS.items():
for text in texts:
assert text.startswith("Subject: "), (
f"{label!r} exemplar missing 'Subject: ' prefix: {text[:50]!r}"
)
assert "\n\n" in text, (
f"{label!r} exemplar missing double-newline separator: {text[:50]!r}"
)