"""Tests for imap_sync helpers (no live IMAP connection required).""" import pytest from unittest.mock import patch, MagicMock def test_classify_stage_signal_interview(): """classify_stage_signal returns interview_scheduled for a call-scheduling email.""" from scripts.imap_sync import classify_stage_signal with patch("scripts.imap_sync._CLASSIFIER_ROUTER") as mock_router: mock_router.complete.return_value = "interview_scheduled" result = classify_stage_signal( "Let's schedule a call", "Hi Alex, we'd love to book a 30-min phone screen with you.", ) assert result == "interview_scheduled" def test_classify_stage_signal_returns_none_on_error(): """classify_stage_signal returns None when LLM call raises.""" from scripts.imap_sync import classify_stage_signal with patch("scripts.imap_sync._CLASSIFIER_ROUTER") as mock_router: mock_router.complete.side_effect = RuntimeError("model not loaded") result = classify_stage_signal("subject", "body") assert result is None def test_classify_stage_signal_strips_think_tags(): """classify_stage_signal strips ... blocks before parsing.""" from scripts.imap_sync import classify_stage_signal with patch("scripts.imap_sync._CLASSIFIER_ROUTER") as mock_router: mock_router.complete.return_value = "Let me think...\nrejected" result = classify_stage_signal("Update on your application", "We went with another candidate.") assert result == "rejected" def test_normalise_company(): """_normalise_company strips legal suffixes.""" from scripts.imap_sync import _normalise_company assert _normalise_company("DataStax, Inc.") == "DataStax" assert _normalise_company("Wiz Ltd") == "Wiz" assert _normalise_company("Crusoe Energy") == "Crusoe Energy" def test_company_search_terms_excludes_job_board_sld(): """Job-board domains like linkedin.com are never used as match terms.""" from scripts.imap_sync import _company_search_terms # LinkedIn-sourced job: SLD "linkedin" must not appear in the terms terms = _company_search_terms("Bamboo Health", "https://www.linkedin.com/jobs/view/123") assert "linkedin" not in terms assert "bamboo health" in terms # Company with its own domain: SLD should be included terms = _company_search_terms("Crusoe Energy", "https://crusoe.ai/jobs/456") assert "crusoe" in terms # Indeed-sourced job: "indeed" excluded terms = _company_search_terms("DoorDash", "https://www.indeed.com/viewjob?jk=abc") assert "indeed" not in terms assert "doordash" in terms def test_has_recruitment_keyword(): """_has_recruitment_keyword matches known keywords.""" from scripts.imap_sync import _has_recruitment_keyword assert _has_recruitment_keyword("Interview Invitation — Senior TAM") assert _has_recruitment_keyword("Your application with DataStax") assert not _has_recruitment_keyword("Team lunch tomorrow") def test_extract_lead_info_returns_company_and_title(): """extract_lead_info parses LLM JSON response into (company, title).""" from scripts.imap_sync import extract_lead_info with patch("scripts.imap_sync._CLASSIFIER_ROUTER") as mock_router: mock_router.complete.return_value = '{"company": "Wiz", "title": "Senior TAM"}' result = extract_lead_info("Senior TAM at Wiz", "Hi Alex, we have a role…", "recruiter@wiz.com") assert result == ("Wiz", "Senior TAM") def test_extract_lead_info_returns_none_on_bad_json(): """extract_lead_info returns (None, None) when LLM returns unparseable output.""" from scripts.imap_sync import extract_lead_info with patch("scripts.imap_sync._CLASSIFIER_ROUTER") as mock_router: mock_router.complete.return_value = "I cannot determine the company." result = extract_lead_info("Job opportunity", "blah", "noreply@example.com") assert result == (None, None) def test_classify_labels_includes_survey_received(): """_CLASSIFY_LABELS includes survey_received.""" from scripts.imap_sync import _CLASSIFY_LABELS assert "survey_received" in _CLASSIFY_LABELS def test_classify_stage_signal_returns_survey_received(): """classify_stage_signal returns 'survey_received' when LLM outputs that label.""" from unittest.mock import patch from scripts.imap_sync import classify_stage_signal with patch("scripts.imap_sync._CLASSIFIER_ROUTER") as mock_router: mock_router.complete.return_value = "survey_received" result = classify_stage_signal("Complete our culture survey", "Please fill out this form") assert result == "survey_received" def test_sync_job_emails_classifies_inbound(tmp_path): """sync_job_emails classifies inbound emails and stores the stage_signal.""" from scripts.db import init_db, insert_job, get_contacts from scripts.imap_sync import sync_job_emails db_path = tmp_path / "test.db" init_db(db_path) job_id = insert_job(db_path, { "title": "CSM", "company": "Acme", "url": "https://acme.com/jobs/1", "source": "linkedin", "location": "Remote", "is_remote": True, "salary": "", "description": "", "date_found": "2026-02-21", }) job = {"id": job_id, "company": "Acme", "url": "https://acme.com/jobs/1"} fake_msg_bytes = ( b"From: recruiter@acme.com\r\n" b"To: alex@example.com\r\n" b"Subject: Interview Invitation\r\n" b"Message-ID: \r\n" b"\r\n" b"Hi Alex, we'd like to schedule a phone screen." ) conn_mock = MagicMock() conn_mock.select.return_value = ("OK", [b"1"]) conn_mock.search.return_value = ("OK", [b"1"]) conn_mock.fetch.return_value = ("OK", [(b"1 (RFC822 {123})", fake_msg_bytes)]) with patch("scripts.imap_sync.classify_stage_signal", return_value="interview_scheduled"): inb, out = sync_job_emails(job, conn_mock, {"lookback_days": 90}, db_path) assert inb == 1 contacts = get_contacts(db_path, job_id=job_id) assert contacts[0]["stage_signal"] == "interview_scheduled" def test_parse_linkedin_alert_extracts_jobs(): from scripts.imap_sync import parse_linkedin_alert body = """\ Your job alert for customer success manager in United States New jobs match your preferences. Manage alerts: https://www.linkedin.com/comm/jobs/alerts?... Customer Success Manager Reflow California, United States View job: https://www.linkedin.com/comm/jobs/view/4376518925/?trackingId=abc%3D%3D&refId=xyz --------------------------------------------------------- Customer Engagement Manager Bitwarden United States 2 school alumni Apply with resume & profile View job: https://www.linkedin.com/comm/jobs/view/4359824983/?trackingId=def%3D%3D --------------------------------------------------------- """ jobs = parse_linkedin_alert(body) assert len(jobs) == 2 assert jobs[0]["title"] == "Customer Success Manager" assert jobs[0]["company"] == "Reflow" assert jobs[0]["location"] == "California, United States" assert jobs[0]["url"] == "https://www.linkedin.com/jobs/view/4376518925/" assert jobs[1]["title"] == "Customer Engagement Manager" assert jobs[1]["company"] == "Bitwarden" assert jobs[1]["url"] == "https://www.linkedin.com/jobs/view/4359824983/" def test_parse_linkedin_alert_skips_blocks_without_view_job(): from scripts.imap_sync import parse_linkedin_alert body = """\ Customer Success Manager Some Company United States --------------------------------------------------------- Valid Job Title Valid Company Remote View job: https://www.linkedin.com/comm/jobs/view/1111111/?x=y --------------------------------------------------------- """ jobs = parse_linkedin_alert(body) assert len(jobs) == 1 assert jobs[0]["title"] == "Valid Job Title" def test_parse_linkedin_alert_empty_body(): from scripts.imap_sync import parse_linkedin_alert assert parse_linkedin_alert("") == [] assert parse_linkedin_alert("No jobs here.") == [] # ── _scan_unmatched_leads integration ───────────────────────────────────────── _ALERT_BODY = """\ Your job alert for customer success manager in United States New jobs match your preferences. Customer Success Manager Acme Corp California, United States View job: https://www.linkedin.com/comm/jobs/view/9999001/?trackingId=abc --------------------------------------------------------- Director of Customer Success Beta Inc Remote View job: https://www.linkedin.com/comm/jobs/view/9999002/?trackingId=def --------------------------------------------------------- """ _ALERT_EMAIL = { "message_id": "", "from_addr": "jobalerts-noreply@linkedin.com", "to_addr": "alex@example.com", "subject": "2 new jobs for customer success manager", "body": _ALERT_BODY, "date": "2026-02-24 12:00:00", } def test_scan_unmatched_leads_linkedin_alert_inserts_jobs(tmp_path): """_scan_unmatched_leads detects a LinkedIn alert and inserts each job card.""" import sqlite3 from unittest.mock import patch, MagicMock from scripts.db import init_db db_path = tmp_path / "test.db" init_db(db_path) conn_mock = MagicMock() with patch("scripts.imap_sync._search_folder", return_value=[b"1"]), \ patch("scripts.imap_sync._parse_message", return_value=_ALERT_EMAIL), \ patch("scripts.task_runner.submit_task") as mock_submit: from scripts.imap_sync import _scan_unmatched_leads known_ids: set = set() new_leads = _scan_unmatched_leads(conn_mock, {"lookback_days": 90}, db_path, known_ids) assert new_leads == 2 # Message ID added so it won't be reprocessed assert "" in known_ids # Both jobs inserted with correct fields conn = sqlite3.connect(db_path) conn.row_factory = sqlite3.Row jobs = conn.execute("SELECT * FROM jobs ORDER BY id").fetchall() conn.close() assert len(jobs) == 2 assert jobs[0]["title"] == "Customer Success Manager" assert jobs[0]["company"] == "Acme Corp" assert jobs[0]["url"] == "https://www.linkedin.com/jobs/view/9999001/" assert jobs[0]["source"] == "linkedin" assert jobs[1]["title"] == "Director of Customer Success" assert jobs[1]["url"] == "https://www.linkedin.com/jobs/view/9999002/" # scrape_url task submitted for each inserted job assert mock_submit.call_count == 2 task_types = [call.args[1] for call in mock_submit.call_args_list] assert task_types == ["scrape_url", "scrape_url"] def test_scan_unmatched_leads_linkedin_alert_skips_duplicates(tmp_path): """URLs already in the DB are not re-inserted.""" from unittest.mock import patch, MagicMock from scripts.db import init_db, insert_job db_path = tmp_path / "test.db" init_db(db_path) # Pre-insert one of the two URLs insert_job(db_path, { "title": "Customer Success Manager", "company": "Acme Corp", "url": "https://www.linkedin.com/jobs/view/9999001/", "source": "linkedin", "location": "", "is_remote": 0, "salary": "", "description": "", "date_found": "2026-02-24", }) conn_mock = MagicMock() with patch("scripts.imap_sync._search_folder", return_value=[b"1"]), \ patch("scripts.imap_sync._parse_message", return_value=_ALERT_EMAIL), \ patch("scripts.task_runner.submit_task") as mock_submit: from scripts.imap_sync import _scan_unmatched_leads new_leads = _scan_unmatched_leads(conn_mock, {"lookback_days": 90}, db_path, set()) # Only one new job (the duplicate was skipped) assert new_leads == 1 assert mock_submit.call_count == 1 def test_scan_unmatched_leads_linkedin_alert_skips_llm_path(tmp_path): """After a LinkedIn alert email, the LLM extraction path is never reached.""" from unittest.mock import patch, MagicMock from scripts.db import init_db db_path = tmp_path / "test.db" init_db(db_path) conn_mock = MagicMock() with patch("scripts.imap_sync._search_folder", return_value=[b"1"]), \ patch("scripts.imap_sync._parse_message", return_value=_ALERT_EMAIL), \ patch("scripts.task_runner.submit_task"), \ patch("scripts.imap_sync.extract_lead_info") as mock_llm: from scripts.imap_sync import _scan_unmatched_leads _scan_unmatched_leads(conn_mock, {"lookback_days": 90}, db_path, set()) # LLM extraction must never be called for alert emails mock_llm.assert_not_called()