"""Tests for label_tool HTML extraction utilities. These functions are stdlib-only and safe to test without an IMAP connection. """ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from app.label_tool import _extract_body, _strip_html # ── _strip_html ────────────────────────────────────────────────────────────── def test_strip_html_removes_tags(): assert _strip_html("

Hello world

") == "Hello world" def test_strip_html_skips_script_content(): result = _strip_html("

real

") assert "doEvil" not in result assert "real" in result def test_strip_html_skips_style_content(): result = _strip_html("

visible

") assert ".foo" not in result assert "visible" in result def test_strip_html_handles_br_as_newline(): result = _strip_html("line1
line2") assert "line1" in result assert "line2" in result def test_strip_html_decodes_entities(): # convert_charrefs=True on HTMLParser handles & etc. result = _strip_html("

Hello & welcome

") assert "&" not in result assert "Hello" in result assert "welcome" in result def test_strip_html_empty_string(): assert _strip_html("") == "" def test_strip_html_plain_text_passthrough(): assert _strip_html("no tags here") == "no tags here" # ── _extract_body ──────────────────────────────────────────────────────────── def test_extract_body_prefers_plain_over_html(): msg = MIMEMultipart("alternative") msg.attach(MIMEText("plain body", "plain")) msg.attach(MIMEText("html body", "html")) assert _extract_body(msg) == "plain body" def test_extract_body_falls_back_to_html_when_no_plain(): msg = MIMEMultipart("alternative") msg.attach(MIMEText("

HTML only email

", "html")) result = _extract_body(msg) assert "HTML only email" in result assert "<" not in result # no raw HTML tags leaked through def test_extract_body_non_multipart_html_stripped(): msg = MIMEText("

Solo HTML

", "html") result = _extract_body(msg) assert "Solo HTML" in result assert "" not in result def test_extract_body_non_multipart_plain_unchanged(): msg = MIMEText("just plain text", "plain") assert _extract_body(msg) == "just plain text" def test_extract_body_empty_message(): msg = MIMEText("", "plain") assert _extract_body(msg) == "" def test_extract_body_multipart_empty_returns_empty(): msg = MIMEMultipart("alternative") assert _extract_body(msg) == ""