App: Peregrine Company: Circuit Forge LLC Source: github.com/pyr0ball/job-seeker (personal fork, not linked)
159 lines
5.2 KiB
Python
159 lines
5.2 KiB
Python
#!/usr/bin/env python
|
|
"""
|
|
Compare email classifiers across models on a live sample from IMAP.
|
|
|
|
Usage:
|
|
conda run -n job-seeker python scripts/test_email_classify.py
|
|
conda run -n job-seeker python scripts/test_email_classify.py --limit 30
|
|
conda run -n job-seeker python scripts/test_email_classify.py --dry-run # phrase filter only, no LLM
|
|
|
|
Outputs a table: subject | phrase_blocked | phi3 | llama3.1 | vllm
|
|
"""
|
|
import argparse
|
|
import re
|
|
import sys
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from scripts.imap_sync import (
|
|
load_config, connect, _search_folder, _parse_message,
|
|
_has_recruitment_keyword, _has_rejection_or_ats_signal,
|
|
_CLASSIFY_SYSTEM, _CLASSIFY_LABELS,
|
|
_REJECTION_PHRASES, _SPAM_PHRASES, _ATS_CONFIRM_SUBJECTS, _SPAM_SUBJECT_PREFIXES,
|
|
)
|
|
from scripts.llm_router import LLMRouter
|
|
|
|
_ROUTER = LLMRouter()
|
|
|
|
MODELS = {
|
|
"phi3": ("phi3:mini", ["ollama_research"]),
|
|
"llama3": ("llama3.1:8b", ["ollama_research"]),
|
|
"vllm": ("__auto__", ["vllm"]),
|
|
}
|
|
|
|
BROAD_TERMS = ["interview", "opportunity", "offer letter", "job offer", "application", "recruiting"]
|
|
|
|
|
|
def _classify(subject: str, body: str, model_override: str, fallback_order: list) -> str:
|
|
try:
|
|
prompt = f"Subject: {subject}\n\nEmail: {body[:600]}"
|
|
raw = _ROUTER.complete(
|
|
prompt,
|
|
system=_CLASSIFY_SYSTEM,
|
|
model_override=model_override,
|
|
fallback_order=fallback_order,
|
|
)
|
|
text = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).lower().strip()
|
|
for label in _CLASSIFY_LABELS:
|
|
if text.startswith(label) or label in text:
|
|
return label
|
|
return f"? ({text[:30]})"
|
|
except Exception as e:
|
|
return f"ERR: {e!s:.20}"
|
|
|
|
|
|
def _short(s: str, n: int = 55) -> str:
|
|
return s if len(s) <= n else s[:n - 1] + "…"
|
|
|
|
|
|
def _explain_block(subject: str, body: str) -> str:
|
|
"""Return the first phrase/rule that triggered a block."""
|
|
subject_lower = subject.lower().strip()
|
|
for p in _SPAM_SUBJECT_PREFIXES:
|
|
if subject_lower.startswith(p):
|
|
return f"subject prefix: {p!r}"
|
|
for p in _ATS_CONFIRM_SUBJECTS:
|
|
if p in subject_lower:
|
|
return f"ATS subject: {p!r}"
|
|
haystack = subject_lower + " " + body[:800].lower()
|
|
for p in _REJECTION_PHRASES + _SPAM_PHRASES:
|
|
if p in haystack:
|
|
return f"phrase: {p!r}"
|
|
return "unknown"
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--limit", type=int, default=20, help="Max emails to test")
|
|
parser.add_argument("--days", type=int, default=90)
|
|
parser.add_argument("--dry-run", action="store_true",
|
|
help="Skip LLM calls — show phrase filter only")
|
|
parser.add_argument("--verbose", action="store_true",
|
|
help="Show which phrase triggered each BLOCK")
|
|
args = parser.parse_args()
|
|
|
|
cfg = load_config()
|
|
since = (datetime.now() - timedelta(days=args.days)).strftime("%d-%b-%Y")
|
|
|
|
print(f"Connecting to {cfg.get('host')} …")
|
|
conn = connect(cfg)
|
|
|
|
# Collect unique UIDs across broad terms
|
|
all_uids: dict[bytes, None] = {}
|
|
for term in BROAD_TERMS:
|
|
for uid in _search_folder(conn, "INBOX", f'(SUBJECT "{term}")', since):
|
|
all_uids[uid] = None
|
|
|
|
sample = list(all_uids.keys())[: args.limit]
|
|
print(f"Fetched {len(all_uids)} matching UIDs, testing {len(sample)}\n")
|
|
|
|
# Header
|
|
if args.dry_run:
|
|
print(f"{'Subject':<56} {'RK':3} {'Phrase':7}")
|
|
print("-" * 72)
|
|
else:
|
|
print(f"{'Subject':<56} {'RK':3} {'Phrase':7} {'phi3':<20} {'llama3':<20} {'vllm':<20}")
|
|
print("-" * 130)
|
|
|
|
passed = skipped = 0
|
|
rows = []
|
|
|
|
for uid in sample:
|
|
parsed = _parse_message(conn, uid)
|
|
if not parsed:
|
|
continue
|
|
subj = parsed["subject"]
|
|
body = parsed["body"]
|
|
|
|
has_rk = _has_recruitment_keyword(subj)
|
|
phrase_block = _has_rejection_or_ats_signal(subj, body)
|
|
|
|
if args.dry_run:
|
|
rk_mark = "✓" if has_rk else "✗"
|
|
pb_mark = "BLOCK" if phrase_block else "pass"
|
|
line = f"{_short(subj):<56} {rk_mark:3} {pb_mark:7}"
|
|
if phrase_block and args.verbose:
|
|
reason = _explain_block(subj, body)
|
|
line += f" [{reason}]"
|
|
print(line)
|
|
continue
|
|
|
|
if phrase_block or not has_rk:
|
|
skipped += 1
|
|
rk_mark = "✓" if has_rk else "✗"
|
|
pb_mark = "BLOCK" if phrase_block else "pass"
|
|
print(f"{_short(subj):<56} {rk_mark:3} {pb_mark:7} {'—':<20} {'—':<20} {'—':<20}")
|
|
continue
|
|
|
|
passed += 1
|
|
results = {}
|
|
for name, (model, fallback) in MODELS.items():
|
|
results[name] = _classify(subj, body, model, fallback)
|
|
|
|
pb_mark = "pass"
|
|
print(f"{_short(subj):<56} {'✓':3} {pb_mark:7} "
|
|
f"{results['phi3']:<20} {results['llama3']:<20} {results['vllm']:<20}")
|
|
|
|
if not args.dry_run:
|
|
print(f"\nPhrase-blocked or no-keyword: {skipped} | Reached LLMs: {passed}")
|
|
|
|
try:
|
|
conn.logout()
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|