diff --git a/scripts/imap_sync.py b/scripts/imap_sync.py
index 220a54f..e900aed 100644
--- a/scripts/imap_sync.py
+++ b/scripts/imap_sync.py
@@ -698,21 +698,43 @@ def _parse_message(conn: imaplib.IMAP4, uid: bytes) -> Optional[dict]:
             return None
         msg = email.message_from_bytes(data[0][1])
 
-        body = ""
+        # Prefer text/html (preserves href attributes for digest link extraction);
+        # fall back to text/plain if no HTML part exists.
+        html_body = ""
+        plain_body = ""
         if msg.is_multipart():
             for part in msg.walk():
-                if part.get_content_type() == "text/plain":
+                ct = part.get_content_type()
+                if ct == "text/html" and not html_body:
                     try:
-                        body = part.get_payload(decode=True).decode("utf-8", errors="replace")
+                        html_body = part.get_payload(decode=True).decode("utf-8", errors="replace")
+                    except Exception:
+                        pass
+                elif ct == "text/plain" and not plain_body:
+                    try:
+                        plain_body = part.get_payload(decode=True).decode("utf-8", errors="replace")
                     except Exception:
                         pass
-                    break
         else:
+            ct = msg.get_content_type()
             try:
-                body = msg.get_payload(decode=True).decode("utf-8", errors="replace")
+                raw = msg.get_payload(decode=True).decode("utf-8", errors="replace")
+                if ct == "text/html":
+                    html_body = raw
+                else:
+                    plain_body = raw
             except Exception:
                 pass
 
+        if html_body:
+            # Strip <head>…</head> (CSS, meta, title) and any stray <style> blocks.
+            # Keeps <body> HTML intact so href attributes survive for digest extraction.
+            body = re.sub(r"<head[\s\S]*?</head>", "", html_body, flags=re.I)
+            body = re.sub(r"<style[\s\S]*?</style>", "", body, flags=re.I)
+            body = re.sub(r"<script[\s\S]*?</script>", "", body, flags=re.I)
+        else:
+            body = plain_body
+
         mid = msg.get("Message-ID", "").strip()
         if not mid:
             return None  # No Message-ID → can't dedup; skip to avoid repeat inserts
@@ -723,7 +745,7 @@ def _parse_message(conn: imaplib.IMAP4, uid: bytes) -> Optional[dict]:
             "from_addr":  _decode_str(msg.get("From")),
             "to_addr":    _decode_str(msg.get("To")),
             "date":       _decode_str(msg.get("Date")),
-            "body":       body[:4000],
+            "body":       body,  # no truncation — digest emails need full content
         }
     except Exception:
         return None
diff --git a/tests/test_imap_sync.py b/tests/test_imap_sync.py
index f9cc4e5..5bdc687 100644
--- a/tests/test_imap_sync.py
+++ b/tests/test_imap_sync.py
@@ -1024,8 +1024,8 @@ def test_sync_all_per_job_exception_continues(tmp_path):
 
 # ── Performance / edge cases ──────────────────────────────────────────────────
 
-def test_parse_message_large_body_truncated():
-    """Body longer than 4000 chars is silently truncated to 4000."""
+def test_parse_message_large_body_not_truncated():
+    """Body longer than 4000 chars is stored in full (no truncation)."""
     from scripts.imap_sync import _parse_message
 
     big_body = ("x" * 10_000).encode()
@@ -1037,7 +1037,7 @@ def test_parse_message_large_body_truncated():
     conn.fetch.return_value = ("OK", [(b"1 (RFC822)", raw)])
     result = _parse_message(conn, b"1")
     assert result is not None
-    assert len(result["body"]) <= 4000
+    assert len(result["body"]) == 10_000
 
 
 def test_parse_message_binary_attachment_no_crash():