From 1dc1ca89d77267aaadc924e70538ab1413c7f5ca Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Tue, 24 Feb 2026 18:25:39 -0800
Subject: [PATCH] chore: seed Peregrine from personal job-seeker
 (pre-generalization)

App: Peregrine
Company: Circuit Forge LLC
Source: github.com/pyr0ball/job-seeker (personal fork, not linked)
---
 .gitignore                             |  20 +
 app/.streamlit/config.toml             |   7 +
 app/Home.py                            | 475 +++++++++++++
 app/app.py                             | 119 ++++
 app/pages/1_Job_Review.py              | 203 ++++++
 app/pages/2_Settings.py                | 842 +++++++++++++++++++++++
 app/pages/3_Resume_Editor.py           | 191 ++++++
 app/pages/4_Apply.py                   | 388 +++++++++++
 app/pages/5_Interviews.py              | 539 +++++++++++++++
 app/pages/6_Interview_Prep.py          | 371 ++++++++++
 app/pages/7_Survey.py                  | 274 ++++++++
 config/adzuna.yaml.example             |   5 +
 config/blocklist.yaml                  |  15 +
 config/craigslist.yaml.example         |  24 +
 config/email.yaml.example              |  38 ++
 config/llm.yaml                        |  66 ++
 config/llm.yaml.example                |  66 ++
 config/notion.yaml.example             |  24 +
 config/resume_keywords.yaml            |  23 +
 config/resume_keywords.yaml.example    |  33 +
 config/search_profiles.yaml            | 123 ++++
 data/survey_screenshots/.gitkeep       |   0
 environment.yml                        |  68 ++
 pytest.ini                             |   2 +
 scripts/__init__.py                    |   0
 scripts/company_research.py            | 468 +++++++++++++
 scripts/custom_boards/__init__.py      |   1 +
 scripts/custom_boards/adzuna.py        | 160 +++++
 scripts/custom_boards/craigslist.py    | 177 +++++
 scripts/custom_boards/theladders.py    | 179 +++++
 scripts/db.py                          | 728 ++++++++++++++++++++
 scripts/discover.py                    | 285 ++++++++
 scripts/enrich_descriptions.py         | 284 ++++++++
 scripts/finetune_local.py              | 248 +++++++
 scripts/generate_cover_letter.py       | 224 ++++++
 scripts/imap_sync.py                   | 906 +++++++++++++++++++++++++
 scripts/llm_router.py                  | 170 +++++
 scripts/manage-ui.sh                   | 106 +++
 scripts/manage-vision.sh               | 113 +++
 scripts/manage-vllm.sh                 | 160 +++++
 scripts/match.py                       | 156 +++++
 scripts/prepare_training_data.py       | 134 ++++
 scripts/scrape_url.py                  | 228 +++++++
 scripts/sync.py                        |  97 +++
 scripts/task_runner.py                 | 155 +++++
 scripts/test_email_classify.py         | 159 +++++
 scripts/vision_service/environment.yml |  17 +
 scripts/vision_service/main.py         |  98 +++
 tests/__init__.py                      |   0
 tests/test_company_research.py         |  84 +++
 tests/test_cover_letter.py             | 120 ++++
 tests/test_craigslist.py               | 211 ++++++
 tests/test_db.py                       | 560 +++++++++++++++
 tests/test_discover.py                 | 185 +++++
 tests/test_enrich_descriptions.py      |  96 +++
 tests/test_imap_sync.py                | 330 +++++++++
 tests/test_llm_router.py               | 135 ++++
 tests/test_match.py                    |  47 ++
 tests/test_scrape_url.py               | 135 ++++
 tests/test_sync.py                     |  88 +++
 tests/test_task_runner.py              | 210 ++++++
 61 files changed, 11370 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 app/.streamlit/config.toml
 create mode 100644 app/Home.py
 create mode 100644 app/app.py
 create mode 100644 app/pages/1_Job_Review.py
 create mode 100644 app/pages/2_Settings.py
 create mode 100644 app/pages/3_Resume_Editor.py
 create mode 100644 app/pages/4_Apply.py
 create mode 100644 app/pages/5_Interviews.py
 create mode 100644 app/pages/6_Interview_Prep.py
 create mode 100644 app/pages/7_Survey.py
 create mode 100644 config/adzuna.yaml.example
 create mode 100644 config/blocklist.yaml
 create mode 100644 config/craigslist.yaml.example
 create mode 100644 config/email.yaml.example
 create mode 100644 config/llm.yaml
 create mode 100644 config/llm.yaml.example
 create mode 100644 config/notion.yaml.example
 create mode 100644 config/resume_keywords.yaml
 create mode 100644 config/resume_keywords.yaml.example
 create mode 100644 config/search_profiles.yaml
 create mode 100644 data/survey_screenshots/.gitkeep
 create mode 100644 environment.yml
 create mode 100644 pytest.ini
 create mode 100644 scripts/__init__.py
 create mode 100644 scripts/company_research.py
 create mode 100644 scripts/custom_boards/__init__.py
 create mode 100644 scripts/custom_boards/adzuna.py
 create mode 100644 scripts/custom_boards/craigslist.py
 create mode 100644 scripts/custom_boards/theladders.py
 create mode 100644 scripts/db.py
 create mode 100644 scripts/discover.py
 create mode 100644 scripts/enrich_descriptions.py
 create mode 100644 scripts/finetune_local.py
 create mode 100644 scripts/generate_cover_letter.py
 create mode 100644 scripts/imap_sync.py
 create mode 100644 scripts/llm_router.py
 create mode 100755 scripts/manage-ui.sh
 create mode 100755 scripts/manage-vision.sh
 create mode 100755 scripts/manage-vllm.sh
 create mode 100644 scripts/match.py
 create mode 100644 scripts/prepare_training_data.py
 create mode 100644 scripts/scrape_url.py
 create mode 100644 scripts/sync.py
 create mode 100644 scripts/task_runner.py
 create mode 100644 scripts/test_email_classify.py
 create mode 100644 scripts/vision_service/environment.yml
 create mode 100644 scripts/vision_service/main.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_company_research.py
 create mode 100644 tests/test_cover_letter.py
 create mode 100644 tests/test_craigslist.py
 create mode 100644 tests/test_db.py
 create mode 100644 tests/test_discover.py
 create mode 100644 tests/test_enrich_descriptions.py
 create mode 100644 tests/test_imap_sync.py
 create mode 100644 tests/test_llm_router.py
 create mode 100644 tests/test_match.py
 create mode 100644 tests/test_scrape_url.py
 create mode 100644 tests/test_sync.py
 create mode 100644 tests/test_task_runner.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..75174d4
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,20 @@
+.env
+config/notion.yaml
+config/tokens.yaml
+config/email.yaml
+config/adzuna.yaml
+config/craigslist.yaml
+__pycache__/
+*.pyc
+.pytest_cache/
+output/
+aihawk/
+resume_matcher/
+staging.db
+.streamlit.log
+.streamlit.pid
+.coverage
+log/
+unsloth_compiled_cache/
+data/survey_screenshots/*
+!data/survey_screenshots/.gitkeep
diff --git a/app/.streamlit/config.toml b/app/.streamlit/config.toml
new file mode 100644
index 0000000..218fba5
--- /dev/null
+++ b/app/.streamlit/config.toml
@@ -0,0 +1,7 @@
+[theme]
+base = "dark"
+primaryColor = "#2DD4BF"
+backgroundColor = "#0F172A"
+secondaryBackgroundColor = "#1E293B"
+textColor = "#F1F5F9"
+font = "sans serif"
diff --git a/app/Home.py b/app/Home.py
new file mode 100644
index 0000000..c516250
--- /dev/null
+++ b/app/Home.py
@@ -0,0 +1,475 @@
+# app/Home.py
+"""
+Job Seeker Dashboard — Home page.
+Shows counts, Run Discovery button, and Sync to Notion button.
+"""
+import subprocess
+import sys
+from pathlib import Path
+
+import streamlit as st
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from scripts.db import DEFAULT_DB, init_db, get_job_counts, purge_jobs, purge_email_data, \
+    purge_non_remote, archive_jobs, kill_stuck_tasks, get_task_for_job, get_active_tasks, \
+    insert_job, get_existing_urls
+from scripts.task_runner import submit_task
+
+init_db(DEFAULT_DB)
+
+
+def _dismissible(key: str, status: str, msg: str) -> None:
+    """Render a dismissible success/error message. key must be unique per task result."""
+    if st.session_state.get(f"dismissed_{key}"):
+        return
+    col_msg, col_x = st.columns([10, 1])
+    with col_msg:
+        if status == "completed":
+            st.success(msg)
+        else:
+            st.error(msg)
+    with col_x:
+        st.write("")
+        if st.button("✕", key=f"dismiss_{key}", help="Dismiss"):
+            st.session_state[f"dismissed_{key}"] = True
+            st.rerun()
+
+
+def _queue_url_imports(db_path: Path, urls: list) -> int:
+    """Insert each URL as a pending manual job and queue a scrape_url task.
+    Returns count of newly queued jobs."""
+    from datetime import datetime
+    from scripts.scrape_url import canonicalize_url
+    existing = get_existing_urls(db_path)
+    queued = 0
+    for url in urls:
+        url = canonicalize_url(url.strip())
+        if not url.startswith("http"):
+            continue
+        if url in existing:
+            continue
+        job_id = insert_job(db_path, {
+            "title": "Importing…",
+            "company": "",
+            "url": url,
+            "source": "manual",
+            "location": "",
+            "description": "",
+            "date_found": datetime.now().isoformat()[:10],
+        })
+        if job_id:
+            submit_task(db_path, "scrape_url", job_id)
+            queued += 1
+    return queued
+
+
+st.title("🔍 Alex's Job Search")
+st.caption("Discover → Review → Sync to Notion")
+
+st.divider()
+
+
+@st.fragment(run_every=10)
+def _live_counts():
+    counts = get_job_counts(DEFAULT_DB)
+    col1, col2, col3, col4, col5 = st.columns(5)
+    col1.metric("Pending Review", counts.get("pending", 0))
+    col2.metric("Approved", counts.get("approved", 0))
+    col3.metric("Applied", counts.get("applied", 0))
+    col4.metric("Synced to Notion", counts.get("synced", 0))
+    col5.metric("Rejected", counts.get("rejected", 0))
+
+
+_live_counts()
+
+st.divider()
+
+left, enrich_col, mid, right = st.columns(4)
+
+with left:
+    st.subheader("Find New Jobs")
+    st.caption("Scrapes all configured boards and adds new listings to your review queue.")
+
+    _disc_task = get_task_for_job(DEFAULT_DB, "discovery", 0)
+    _disc_running = _disc_task and _disc_task["status"] in ("queued", "running")
+
+    if st.button("🚀 Run Discovery", use_container_width=True, type="primary",
+                 disabled=bool(_disc_running)):
+        submit_task(DEFAULT_DB, "discovery", 0)
+        st.rerun()
+
+    if _disc_running:
+        @st.fragment(run_every=4)
+        def _disc_status():
+            t = get_task_for_job(DEFAULT_DB, "discovery", 0)
+            if t and t["status"] in ("queued", "running"):
+                lbl = "Queued…" if t["status"] == "queued" else "Scraping job boards… this may take a minute"
+                st.info(f"⏳ {lbl}")
+            else:
+                st.rerun()
+        _disc_status()
+    elif _disc_task and _disc_task["status"] == "completed":
+        _dismissible(f"disc_{_disc_task['id']}", "completed",
+                     f"✅ Discovery complete — {_disc_task.get('error', '')}. Head to Job Review.")
+    elif _disc_task and _disc_task["status"] == "failed":
+        _dismissible(f"disc_{_disc_task['id']}", "failed",
+                     f"Discovery failed: {_disc_task.get('error', '')}")
+
+with enrich_col:
+    st.subheader("Enrich Descriptions")
+    st.caption("Re-fetch missing descriptions for any listing (LinkedIn, Indeed, Glassdoor, Adzuna, The Ladders, generic).")
+
+    _enrich_task = get_task_for_job(DEFAULT_DB, "enrich_descriptions", 0)
+    _enrich_running = _enrich_task and _enrich_task["status"] in ("queued", "running")
+
+    if st.button("🔍 Fill Missing Descriptions", use_container_width=True, type="primary",
+                 disabled=bool(_enrich_running)):
+        submit_task(DEFAULT_DB, "enrich_descriptions", 0)
+        st.rerun()
+
+    if _enrich_running:
+        @st.fragment(run_every=4)
+        def _enrich_status():
+            t = get_task_for_job(DEFAULT_DB, "enrich_descriptions", 0)
+            if t and t["status"] in ("queued", "running"):
+                st.info("⏳ Fetching descriptions…")
+            else:
+                st.rerun()
+        _enrich_status()
+    elif _enrich_task and _enrich_task["status"] == "completed":
+        _dismissible(f"enrich_{_enrich_task['id']}", "completed",
+                     f"✅ {_enrich_task.get('error', 'Done')}")
+    elif _enrich_task and _enrich_task["status"] == "failed":
+        _dismissible(f"enrich_{_enrich_task['id']}", "failed",
+                     f"Enrich failed: {_enrich_task.get('error', '')}")
+
+with mid:
+    unscored = sum(1 for j in __import__("scripts.db", fromlist=["get_jobs_by_status"])
+                   .get_jobs_by_status(DEFAULT_DB, "pending")
+                   if j.get("match_score") is None and j.get("description"))
+    st.subheader("Score Listings")
+    st.caption(f"Run TF-IDF match scoring against Alex's resume. {unscored} pending job{'s' if unscored != 1 else ''} unscored.")
+    if st.button("📊 Score All Unscored Jobs", use_container_width=True, type="primary",
+                 disabled=unscored == 0):
+        with st.spinner("Scoring…"):
+            result = subprocess.run(
+                ["conda", "run", "-n", "job-seeker", "python", "scripts/match.py"],
+                capture_output=True, text=True,
+                cwd=str(Path(__file__).parent.parent),
+            )
+        if result.returncode == 0:
+            st.success("Scoring complete!")
+            st.code(result.stdout)
+        else:
+            st.error("Scoring failed.")
+            st.code(result.stderr)
+        st.rerun()
+
+with right:
+    approved_count = get_job_counts(DEFAULT_DB).get("approved", 0)
+    st.subheader("Send to Notion")
+    st.caption("Push all approved jobs to your Notion tracking database.")
+    if approved_count == 0:
+        st.info("No approved jobs yet. Review and approve some listings first.")
+    else:
+        if st.button(
+            f"📤 Sync {approved_count} approved job{'s' if approved_count != 1 else ''} → Notion",
+            use_container_width=True, type="primary",
+        ):
+            with st.spinner("Syncing to Notion…"):
+                from scripts.sync import sync_to_notion
+                count = sync_to_notion(DEFAULT_DB)
+            st.success(f"Synced {count} job{'s' if count != 1 else ''} to Notion!")
+            st.rerun()
+
+st.divider()
+
+# ── Email Sync ────────────────────────────────────────────────────────────────
+email_left, email_right = st.columns([3, 1])
+
+with email_left:
+    st.subheader("Sync Emails")
+    st.caption("Pull inbound recruiter emails and match them to active applications. "
+               "New recruiter outreach is added to your Job Review queue.")
+
+with email_right:
+    _email_task = get_task_for_job(DEFAULT_DB, "email_sync", 0)
+    _email_running = _email_task and _email_task["status"] in ("queued", "running")
+
+    if st.button("📧 Sync Emails", use_container_width=True, type="primary",
+                 disabled=bool(_email_running)):
+        submit_task(DEFAULT_DB, "email_sync", 0)
+        st.rerun()
+
+    if _email_running:
+        @st.fragment(run_every=4)
+        def _email_status():
+            t = get_task_for_job(DEFAULT_DB, "email_sync", 0)
+            if t and t["status"] in ("queued", "running"):
+                st.info("⏳ Syncing emails…")
+            else:
+                st.rerun()
+        _email_status()
+    elif _email_task and _email_task["status"] == "completed":
+        _dismissible(f"email_{_email_task['id']}", "completed",
+                     f"✅ {_email_task.get('error', 'Done')}")
+    elif _email_task and _email_task["status"] == "failed":
+        _dismissible(f"email_{_email_task['id']}", "failed",
+                     f"Sync failed: {_email_task.get('error', '')}")
+
+st.divider()
+
+# ── Add Jobs by URL ───────────────────────────────────────────────────────────
+add_left, _add_right = st.columns([3, 1])
+with add_left:
+    st.subheader("Add Jobs by URL")
+    st.caption("Paste job listing URLs to import and scrape in the background. "
+               "Supports LinkedIn, Indeed, Glassdoor, and most job boards.")
+
+url_tab, csv_tab = st.tabs(["Paste URLs", "Upload CSV"])
+
+with url_tab:
+    url_text = st.text_area(
+        "urls",
+        placeholder="https://www.linkedin.com/jobs/view/1234567/\nhttps://www.indeed.com/viewjob?jk=abc",
+        height=100,
+        label_visibility="collapsed",
+    )
+    if st.button("📥 Add Jobs", key="add_urls_btn", use_container_width=True,
+                 disabled=not (url_text or "").strip()):
+        _urls = [u.strip() for u in url_text.strip().splitlines() if u.strip().startswith("http")]
+        if _urls:
+            _n = _queue_url_imports(DEFAULT_DB, _urls)
+            if _n:
+                st.success(f"Queued {_n} job{'s' if _n != 1 else ''} for import. Check Job Review shortly.")
+            else:
+                st.info("All URLs already in the database.")
+            st.rerun()
+
+with csv_tab:
+    csv_file = st.file_uploader("CSV with a URL column", type=["csv"],
+                                label_visibility="collapsed")
+    if csv_file:
+        import csv as _csv
+        import io as _io
+        reader = _csv.DictReader(_io.StringIO(csv_file.read().decode("utf-8", errors="replace")))
+        _csv_urls = []
+        for row in reader:
+            for val in row.values():
+                if val and val.strip().startswith("http"):
+                    _csv_urls.append(val.strip())
+                    break
+        if _csv_urls:
+            st.caption(f"Found {len(_csv_urls)} URL(s) in CSV.")
+            if st.button("📥 Import CSV Jobs", key="add_csv_btn", use_container_width=True):
+                _n = _queue_url_imports(DEFAULT_DB, _csv_urls)
+                st.success(f"Queued {_n} job{'s' if _n != 1 else ''} for import.")
+                st.rerun()
+        else:
+            st.warning("No URLs found — CSV must have a column whose values start with http.")
+
+
+@st.fragment(run_every=3)
+def _scrape_status():
+    import sqlite3 as _sq
+    conn = _sq.connect(DEFAULT_DB)
+    conn.row_factory = _sq.Row
+    rows = conn.execute(
+        """SELECT bt.status, bt.error, j.title, j.company, j.url
+           FROM background_tasks bt
+           JOIN jobs j ON j.id = bt.job_id
+           WHERE bt.task_type = 'scrape_url'
+             AND bt.updated_at >= datetime('now', '-5 minutes')
+           ORDER BY bt.updated_at DESC LIMIT 20"""
+    ).fetchall()
+    conn.close()
+    if not rows:
+        return
+    st.caption("Recent URL imports:")
+    for r in rows:
+        if r["status"] == "running":
+            st.info(f"⏳ Scraping {r['url']}")
+        elif r["status"] == "completed":
+            label = r["title"] + (f" @ {r['company']}" if r["company"] else "")
+            st.success(f"✅ {label}")
+        elif r["status"] == "failed":
+            st.error(f"❌ {r['url']} — {r['error'] or 'scrape failed'}")
+
+
+_scrape_status()
+
+st.divider()
+
+# ── Danger zone: purge + re-scrape ────────────────────────────────────────────
+with st.expander("⚠️ Danger Zone", expanded=False):
+    st.caption(
+        "**Purge** permanently deletes jobs from the local database. "
+        "Applied and synced jobs are never touched."
+    )
+
+    purge_col, rescrape_col, email_col, tasks_col = st.columns(4)
+
+    with purge_col:
+        st.markdown("**Purge pending & rejected**")
+        st.caption("Removes all _pending_ and _rejected_ listings so the next discovery starts fresh.")
+        if st.button("🗑 Purge Pending + Rejected", use_container_width=True):
+            st.session_state["confirm_purge"] = "partial"
+
+        if st.session_state.get("confirm_purge") == "partial":
+            st.warning("Are you sure? This cannot be undone.")
+            c1, c2 = st.columns(2)
+            if c1.button("Yes, purge", type="primary", use_container_width=True):
+                deleted = purge_jobs(DEFAULT_DB, statuses=["pending", "rejected"])
+                st.success(f"Purged {deleted} jobs.")
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
+            if c2.button("Cancel", use_container_width=True):
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
+
+    with email_col:
+        st.markdown("**Purge email data**")
+        st.caption("Clears all email thread logs and email-sourced pending jobs so the next sync starts fresh.")
+        if st.button("📧 Purge Email Data", use_container_width=True):
+            st.session_state["confirm_purge"] = "email"
+
+        if st.session_state.get("confirm_purge") == "email":
+            st.warning("This deletes all email contacts and email-sourced jobs. Cannot be undone.")
+            c1, c2 = st.columns(2)
+            if c1.button("Yes, purge emails", type="primary", use_container_width=True):
+                contacts, jobs = purge_email_data(DEFAULT_DB)
+                st.success(f"Purged {contacts} email contacts, {jobs} email jobs.")
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
+            if c2.button("Cancel  ", use_container_width=True):
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
+
+    with tasks_col:
+        _active = get_active_tasks(DEFAULT_DB)
+        st.markdown("**Kill stuck tasks**")
+        st.caption(f"Force-fail all queued/running background tasks. Currently **{len(_active)}** active.")
+        if st.button("⏹ Kill All Tasks", use_container_width=True, disabled=len(_active) == 0):
+            killed = kill_stuck_tasks(DEFAULT_DB)
+            st.success(f"Killed {killed} task(s).")
+            st.rerun()
+
+    with rescrape_col:
+        st.markdown("**Purge all & re-scrape**")
+        st.caption("Wipes _all_ non-applied, non-synced jobs then immediately runs a fresh discovery.")
+        if st.button("🔄 Purge All + Re-scrape", use_container_width=True):
+            st.session_state["confirm_purge"] = "full"
+
+        if st.session_state.get("confirm_purge") == "full":
+            st.warning("This will delete ALL pending, approved, and rejected jobs, then re-scrape. Applied and synced records are kept.")
+            c1, c2 = st.columns(2)
+            if c1.button("Yes, wipe + scrape", type="primary", use_container_width=True):
+                purge_jobs(DEFAULT_DB, statuses=["pending", "approved", "rejected"])
+                submit_task(DEFAULT_DB, "discovery", 0)
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
+            if c2.button("Cancel ", use_container_width=True):
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
+
+    st.divider()
+
+    pending_col, nonremote_col, approved_col, _ = st.columns(4)
+
+    with pending_col:
+        st.markdown("**Purge pending review**")
+        st.caption("Removes only _pending_ listings, keeping your rejected history intact.")
+        if st.button("🗑 Purge Pending Only", use_container_width=True):
+            st.session_state["confirm_purge"] = "pending_only"
+
+        if st.session_state.get("confirm_purge") == "pending_only":
+            st.warning("Deletes all pending jobs. Rejected jobs are kept. Cannot be undone.")
+            c1, c2 = st.columns(2)
+            if c1.button("Yes, purge pending", type="primary", use_container_width=True):
+                deleted = purge_jobs(DEFAULT_DB, statuses=["pending"])
+                st.success(f"Purged {deleted} pending jobs.")
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
+            if c2.button("Cancel   ", use_container_width=True):
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
+
+    with nonremote_col:
+        st.markdown("**Purge non-remote**")
+        st.caption("Removes pending/approved/rejected jobs where remote is not set. Keeps anything already in the pipeline.")
+        if st.button("🏢 Purge On-site Jobs", use_container_width=True):
+            st.session_state["confirm_purge"] = "non_remote"
+
+        if st.session_state.get("confirm_purge") == "non_remote":
+            st.warning("Deletes all non-remote jobs not yet applied to. Cannot be undone.")
+            c1, c2 = st.columns(2)
+            if c1.button("Yes, purge on-site", type="primary", use_container_width=True):
+                deleted = purge_non_remote(DEFAULT_DB)
+                st.success(f"Purged {deleted} non-remote jobs.")
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
+            if c2.button("Cancel    ", use_container_width=True):
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
+
+    with approved_col:
+        st.markdown("**Purge approved (unapplied)**")
+        st.caption("Removes _approved_ jobs you haven't applied to yet — e.g. to reset after a review pass.")
+        if st.button("🗑 Purge Approved", use_container_width=True):
+            st.session_state["confirm_purge"] = "approved_only"
+
+        if st.session_state.get("confirm_purge") == "approved_only":
+            st.warning("Deletes all approved-but-not-applied jobs. Cannot be undone.")
+            c1, c2 = st.columns(2)
+            if c1.button("Yes, purge approved", type="primary", use_container_width=True):
+                deleted = purge_jobs(DEFAULT_DB, statuses=["approved"])
+                st.success(f"Purged {deleted} approved jobs.")
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
+            if c2.button("Cancel     ", use_container_width=True):
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
+
+    st.divider()
+
+    archive_col1, archive_col2, _, _ = st.columns(4)
+
+    with archive_col1:
+        st.markdown("**Archive remaining**")
+        st.caption(
+            "Move all _pending_ and _rejected_ jobs to archived status. "
+            "Archived jobs stay in the DB for dedup — they just won't appear in Job Review."
+        )
+        if st.button("📦 Archive Pending + Rejected", use_container_width=True):
+            st.session_state["confirm_purge"] = "archive_remaining"
+
+        if st.session_state.get("confirm_purge") == "archive_remaining":
+            st.info("Jobs will be archived (not deleted) — URLs are kept for dedup.")
+            c1, c2 = st.columns(2)
+            if c1.button("Yes, archive", type="primary", use_container_width=True):
+                archived = archive_jobs(DEFAULT_DB, statuses=["pending", "rejected"])
+                st.success(f"Archived {archived} jobs.")
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
+            if c2.button("Cancel      ", use_container_width=True):
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
+
+    with archive_col2:
+        st.markdown("**Archive approved (unapplied)**")
+        st.caption("Archive _approved_ listings you decided to skip — keeps history without cluttering the apply queue.")
+        if st.button("📦 Archive Approved", use_container_width=True):
+            st.session_state["confirm_purge"] = "archive_approved"
+
+        if st.session_state.get("confirm_purge") == "archive_approved":
+            st.info("Approved jobs will be archived (not deleted).")
+            c1, c2 = st.columns(2)
+            if c1.button("Yes, archive approved", type="primary", use_container_width=True):
+                archived = archive_jobs(DEFAULT_DB, statuses=["approved"])
+                st.success(f"Archived {archived} approved jobs.")
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
+            if c2.button("Cancel       ", use_container_width=True):
+                st.session_state.pop("confirm_purge", None)
+                st.rerun()
diff --git a/app/app.py b/app/app.py
new file mode 100644
index 0000000..5f29348
--- /dev/null
+++ b/app/app.py
@@ -0,0 +1,119 @@
+# app/app.py
+"""
+Streamlit entry point — uses st.navigation() to control the sidebar.
+Main workflow pages are listed at the top; Settings is separated into
+a "System" section so it doesn't crowd the navigation.
+
+Run: streamlit run app/app.py
+     bash scripts/manage-ui.sh start
+"""
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+import streamlit as st
+from scripts.db import DEFAULT_DB, init_db, get_active_tasks
+import sqlite3
+
+st.set_page_config(
+    page_title="Job Seeker",
+    page_icon="💼",
+    layout="wide",
+)
+
+init_db(DEFAULT_DB)
+
+# ── Startup cleanup — runs once per server process via cache_resource ──────────
+@st.cache_resource
+def _startup() -> None:
+    """Runs exactly once per server lifetime (st.cache_resource).
+    1. Marks zombie tasks as failed.
+    2. Auto-queues re-runs for any research generated without SearXNG data,
+       if SearXNG is now reachable.
+    """
+    conn = sqlite3.connect(DEFAULT_DB)
+    conn.execute(
+        "UPDATE background_tasks SET status='failed', error='Interrupted by server restart',"
+        " finished_at=datetime('now') WHERE status IN ('queued','running')"
+    )
+    conn.commit()
+
+    # Auto-recovery: re-run LLM-only research when SearXNG is available
+    try:
+        import requests as _req
+        if _req.get("http://localhost:8888/", timeout=3).status_code == 200:
+            from scripts.task_runner import submit_task
+            _ACTIVE_STAGES = ("phone_screen", "interviewing", "offer", "hired")
+            rows = conn.execute(
+                """SELECT cr.job_id FROM company_research cr
+                   JOIN jobs j ON j.id = cr.job_id
+                   WHERE (cr.scrape_used IS NULL OR cr.scrape_used = 0)
+                   AND j.status IN ({})""".format(",".join("?" * len(_ACTIVE_STAGES))),
+                _ACTIVE_STAGES,
+            ).fetchall()
+            for (job_id,) in rows:
+                submit_task(str(DEFAULT_DB), "company_research", job_id)
+    except Exception:
+        pass  # never block startup
+
+    conn.close()
+
+_startup()
+
+# ── Navigation ─────────────────────────────────────────────────────────────────
+# st.navigation() must be called before any sidebar writes so it can establish
+# the navigation structure first; sidebar additions come after.
+pages = {
+    "": [
+        st.Page("Home.py",                   title="Home",            icon="🏠"),
+        st.Page("pages/1_Job_Review.py",     title="Job Review",      icon="📋"),
+        st.Page("pages/4_Apply.py",          title="Apply Workspace", icon="🚀"),
+        st.Page("pages/5_Interviews.py",     title="Interviews",      icon="🎯"),
+        st.Page("pages/6_Interview_Prep.py", title="Interview Prep",  icon="📞"),
+        st.Page("pages/7_Survey.py",         title="Survey Assistant", icon="📋"),
+    ],
+    "System": [
+        st.Page("pages/2_Settings.py",       title="Settings",        icon="⚙️"),
+    ],
+}
+
+pg = st.navigation(pages)
+
+# ── Background task sidebar indicator ─────────────────────────────────────────
+# Fragment polls every 3s so stage labels update live without a full page reload.
+# The sidebar context WRAPS the fragment call — do not write to st.sidebar inside it.
+@st.fragment(run_every=3)
+def _task_indicator():
+    tasks = get_active_tasks(DEFAULT_DB)
+    if not tasks:
+        return
+    st.divider()
+    st.markdown(f"**⏳ {len(tasks)} task(s) running**")
+    for t in tasks:
+        icon = "⏳" if t["status"] == "running" else "🕐"
+        task_type = t["task_type"]
+        if task_type == "cover_letter":
+            label = "Cover letter"
+        elif task_type == "company_research":
+            label = "Research"
+        elif task_type == "email_sync":
+            label = "Email sync"
+        elif task_type == "discovery":
+            label = "Discovery"
+        elif task_type == "enrich_descriptions":
+            label = "Enriching"
+        elif task_type == "scrape_url":
+            label = "Scraping URL"
+        elif task_type == "enrich_craigslist":
+            label = "Enriching listing"
+        else:
+            label = task_type.replace("_", " ").title()
+        stage = t.get("stage") or ""
+        detail = f" · {stage}" if stage else (f" — {t.get('company')}" if t.get("company") else "")
+        st.caption(f"{icon} {label}{detail}")
+
+with st.sidebar:
+    _task_indicator()
+
+pg.run()
diff --git a/app/pages/1_Job_Review.py b/app/pages/1_Job_Review.py
new file mode 100644
index 0000000..8f2c397
--- /dev/null
+++ b/app/pages/1_Job_Review.py
@@ -0,0 +1,203 @@
+# app/pages/1_Job_Review.py
+"""
+Job Review — browse listings, approve/reject inline, generate cover letters,
+and mark approved jobs as applied.
+"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+import streamlit as st
+from scripts.db import (
+    DEFAULT_DB, init_db, get_jobs_by_status, update_job_status,
+    update_cover_letter, mark_applied, get_email_leads,
+)
+
+st.title("📋 Job Review")
+
+init_db(DEFAULT_DB)
+
+_email_leads = get_email_leads(DEFAULT_DB)
+
+# ── Sidebar filters ────────────────────────────────────────────────────────────
+with st.sidebar:
+    st.header("Filters")
+    show_status = st.selectbox(
+        "Show",
+        ["pending", "approved", "applied", "rejected", "synced"],
+        index=0,
+    )
+    remote_only = st.checkbox("Remote only", value=False)
+    min_score = st.slider("Min match score", 0, 100, 0)
+
+    st.header("Sort")
+    sort_by = st.selectbox(
+        "Sort by",
+        ["Date Found (newest)", "Date Found (oldest)", "Match Score (high→low)", "Match Score (low→high)", "Company A–Z", "Title A–Z"],
+        index=0,
+    )
+
+jobs = get_jobs_by_status(DEFAULT_DB, show_status)
+
+if remote_only:
+    jobs = [j for j in jobs if j.get("is_remote")]
+if min_score > 0:
+    jobs = [j for j in jobs if (j.get("match_score") or 0) >= min_score]
+
+# Apply sort
+if sort_by == "Date Found (newest)":
+    jobs = sorted(jobs, key=lambda j: j.get("date_found") or "", reverse=True)
+elif sort_by == "Date Found (oldest)":
+    jobs = sorted(jobs, key=lambda j: j.get("date_found") or "")
+elif sort_by == "Match Score (high→low)":
+    jobs = sorted(jobs, key=lambda j: j.get("match_score") or 0, reverse=True)
+elif sort_by == "Match Score (low→high)":
+    jobs = sorted(jobs, key=lambda j: j.get("match_score") or 0)
+elif sort_by == "Company A–Z":
+    jobs = sorted(jobs, key=lambda j: (j.get("company") or "").lower())
+elif sort_by == "Title A–Z":
+    jobs = sorted(jobs, key=lambda j: (j.get("title") or "").lower())
+
+if not jobs:
+    st.info(f"No {show_status} jobs matching your filters.")
+    st.stop()
+
+st.caption(f"Showing {len(jobs)} {show_status} job{'s' if len(jobs) != 1 else ''}")
+st.divider()
+
+if show_status == "pending" and _email_leads:
+    st.subheader(f"📧 Email Leads ({len(_email_leads)})")
+    st.caption(
+        "Inbound recruiter emails not yet matched to a scraped listing. "
+        "Approve to add to Job Review; Reject to dismiss."
+    )
+    for lead in _email_leads:
+        lead_id = lead["id"]
+        with st.container(border=True):
+            left_l, right_l = st.columns([7, 3])
+            with left_l:
+                st.markdown(f"**{lead['title']}** — {lead['company']}")
+                badge_cols = st.columns(4)
+                badge_cols[0].caption("📧 Email Lead")
+                badge_cols[1].caption(f"📅 {lead.get('date_found', '')}")
+                if lead.get("description"):
+                    with st.expander("📄 Email excerpt", expanded=False):
+                        st.text(lead["description"][:500])
+            with right_l:
+                if st.button("✅ Approve", key=f"el_approve_{lead_id}",
+                             type="primary", use_container_width=True):
+                    update_job_status(DEFAULT_DB, [lead_id], "approved")
+                    st.rerun()
+                if st.button("❌ Reject", key=f"el_reject_{lead_id}",
+                             use_container_width=True):
+                    update_job_status(DEFAULT_DB, [lead_id], "rejected")
+                    st.rerun()
+    st.divider()
+
+# Filter email leads out of the main pending list (already shown above)
+if show_status == "pending":
+    jobs = [j for j in jobs if j.get("source") != "email"]
+
+# ── Job cards ──────────────────────────────────────────────────────────────────
+for job in jobs:
+    job_id = job["id"]
+
+    score = job.get("match_score")
+    if score is None:
+        score_badge = "⬜ No score"
+    elif score >= 70:
+        score_badge = f"🟢 {score:.0f}%"
+    elif score >= 40:
+        score_badge = f"🟡 {score:.0f}%"
+    else:
+        score_badge = f"🔴 {score:.0f}%"
+
+    remote_badge = "🌐 Remote" if job.get("is_remote") else "🏢 On-site"
+    src = (job.get("source") or "").lower()
+    source_badge = f"🤖 {src.title()}" if src == "linkedin" else f"👤 {src.title() or 'Manual'}"
+
+    with st.container(border=True):
+        left, right = st.columns([7, 3])
+
+        # ── Left: job info ─────────────────────────────────────────────────────
+        with left:
+            st.markdown(f"**{job['title']}** — {job['company']}")
+
+            badge_cols = st.columns(4)
+            badge_cols[0].caption(remote_badge)
+            badge_cols[1].caption(source_badge)
+            badge_cols[2].caption(score_badge)
+            badge_cols[3].caption(f"📅 {job.get('date_found', '')}")
+
+            if job.get("keyword_gaps"):
+                st.caption(f"**Keyword gaps:** {job['keyword_gaps']}")
+
+            # Cover letter expander (approved view)
+            if show_status == "approved":
+                _cl_key = f"cl_{job_id}"
+                if _cl_key not in st.session_state:
+                    st.session_state[_cl_key] = job.get("cover_letter") or ""
+
+                cl_exists = bool(st.session_state[_cl_key])
+                with st.expander("📝 Cover Letter", expanded=cl_exists):
+                    gen_label = "Regenerate" if cl_exists else "Generate Cover Letter"
+                    if st.button(gen_label, key=f"gen_{job_id}"):
+                        with st.spinner("Generating via LLM…"):
+                            try:
+                                from scripts.generate_cover_letter import generate as _gen
+                                st.session_state[_cl_key] = _gen(
+                                    job.get("title", ""),
+                                    job.get("company", ""),
+                                    job.get("description", ""),
+                                )
+                                st.rerun()
+                            except Exception as e:
+                                st.error(f"Generation failed: {e}")
+
+                    st.text_area(
+                        "cover_letter_edit",
+                        key=_cl_key,
+                        height=300,
+                        label_visibility="collapsed",
+                    )
+                    save_col, _ = st.columns([2, 5])
+                    if save_col.button("💾 Save draft", key=f"save_cl_{job_id}"):
+                        update_cover_letter(DEFAULT_DB, job_id, st.session_state[_cl_key])
+                        st.success("Saved!")
+
+            # Applied date + cover letter preview (applied/synced)
+            if show_status in ("applied", "synced") and job.get("applied_at"):
+                st.caption(f"✅ Applied: {job['applied_at']}")
+            if show_status in ("applied", "synced") and job.get("cover_letter"):
+                with st.expander("📝 Cover Letter (sent)"):
+                    st.text(job["cover_letter"])
+
+        # ── Right: actions ─────────────────────────────────────────────────────
+        with right:
+            if job.get("url"):
+                st.link_button("View listing →", job["url"], use_container_width=True)
+            if job.get("salary"):
+                st.caption(f"💰 {job['salary']}")
+
+            if show_status == "pending":
+                if st.button("✅ Approve", key=f"approve_{job_id}",
+                             type="primary", use_container_width=True):
+                    update_job_status(DEFAULT_DB, [job_id], "approved")
+                    st.rerun()
+                if st.button("❌ Reject", key=f"reject_{job_id}",
+                             use_container_width=True):
+                    update_job_status(DEFAULT_DB, [job_id], "rejected")
+                    st.rerun()
+
+            elif show_status == "approved":
+                if st.button("🚀 Apply →", key=f"apply_page_{job_id}",
+                             type="primary", use_container_width=True):
+                    st.session_state["apply_job_id"] = job_id
+                    st.switch_page("pages/4_Apply.py")
+                if st.button("✅ Mark Applied", key=f"applied_{job_id}",
+                             use_container_width=True):
+                    cl_text = st.session_state.get(f"cl_{job_id}", "")
+                    if cl_text:
+                        update_cover_letter(DEFAULT_DB, job_id, cl_text)
+                    mark_applied(DEFAULT_DB, [job_id])
+                    st.rerun()
diff --git a/app/pages/2_Settings.py b/app/pages/2_Settings.py
new file mode 100644
index 0000000..9e37a04
--- /dev/null
+++ b/app/pages/2_Settings.py
@@ -0,0 +1,842 @@
+# app/pages/2_Settings.py
+"""
+Settings — edit search profiles, LLM backends, Notion connection, services,
+and resume profile (paste-able bullets used in Apply Workspace).
+"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+import streamlit as st
+import yaml
+
+st.title("⚙️ Settings")
+
+CONFIG_DIR = Path(__file__).parent.parent.parent / "config"
+SEARCH_CFG = CONFIG_DIR / "search_profiles.yaml"
+BLOCKLIST_CFG = CONFIG_DIR / "blocklist.yaml"
+LLM_CFG = CONFIG_DIR / "llm.yaml"
+NOTION_CFG = CONFIG_DIR / "notion.yaml"
+RESUME_PATH = Path(__file__).parent.parent.parent / "aihawk" / "data_folder" / "plain_text_resume.yaml"
+KEYWORDS_CFG = CONFIG_DIR / "resume_keywords.yaml"
+
+def load_yaml(path: Path) -> dict:
+    if path.exists():
+        return yaml.safe_load(path.read_text()) or {}
+    return {}
+
+def save_yaml(path: Path, data: dict) -> None:
+    path.write_text(yaml.dump(data, default_flow_style=False, allow_unicode=True))
+
+
+def _suggest_search_terms(current_titles: list[str], resume_path: Path) -> dict:
+    """Call LLM to suggest additional job titles and exclude keywords."""
+    import json
+    import re
+    from scripts.llm_router import LLMRouter
+
+    resume_context = ""
+    if resume_path.exists():
+        resume = load_yaml(resume_path)
+        lines = []
+        for exp in (resume.get("experience_details") or [])[:3]:
+            pos = exp.get("position", "")
+            co = exp.get("company", "")
+            skills = ", ".join((exp.get("skills_acquired") or [])[:5])
+            lines.append(f"- {pos} at {co}: {skills}")
+        resume_context = "\n".join(lines)
+
+    titles_str = "\n".join(f"- {t}" for t in current_titles)
+    prompt = f"""You are helping a job seeker optimize their search criteria.
+
+Their background (from resume):
+{resume_context or "Customer success and technical account management leader"}
+
+Current job titles being searched:
+{titles_str}
+
+Suggest:
+1. 5-8 additional job titles they might be missing (alternative names, adjacent roles, senior variants)
+2. 3-5 keywords to add to the exclusion filter (to screen out irrelevant postings)
+
+Return ONLY valid JSON in this exact format:
+{{"suggested_titles": ["Title 1", "Title 2"], "suggested_excludes": ["keyword 1", "keyword 2"]}}"""
+
+    result = LLMRouter().complete(prompt).strip()
+    m = re.search(r"\{.*\}", result, re.DOTALL)
+    if m:
+        try:
+            return json.loads(m.group())
+        except Exception:
+            pass
+    return {"suggested_titles": [], "suggested_excludes": []}
+
+tab_search, tab_llm, tab_notion, tab_services, tab_resume, tab_email, tab_skills = st.tabs(
+    ["🔎 Search", "🤖 LLM Backends", "📚 Notion", "🔌 Services", "📝 Resume Profile", "📧 Email", "🏷️ Skills"]
+)
+
+# ── Search tab ───────────────────────────────────────────────────────────────
+with tab_search:
+    cfg = load_yaml(SEARCH_CFG)
+    profiles = cfg.get("profiles", [{}])
+    p = profiles[0] if profiles else {}
+
+    # Seed session state from config on first load (or when config changes after save)
+    _sp_hash = str(p.get("titles", [])) + str(p.get("exclude_keywords", []))
+    if st.session_state.get("_sp_hash") != _sp_hash:
+        st.session_state["_sp_titles"] = "\n".join(p.get("titles", []))
+        st.session_state["_sp_excludes"] = "\n".join(p.get("exclude_keywords", []))
+        st.session_state["_sp_hash"] = _sp_hash
+
+    # ── Titles ────────────────────────────────────────────────────────────────
+    title_row, suggest_btn_col = st.columns([4, 1])
+    with title_row:
+        st.subheader("Job Titles to Search")
+    with suggest_btn_col:
+        st.write("")  # vertical align
+        _run_suggest = st.button("✨ Suggest", key="sp_suggest_btn",
+                                  help="Ask the LLM to suggest additional titles and exclude keywords based on your resume")
+
+    titles_text = st.text_area(
+        "One title per line",
+        key="_sp_titles",
+        height=150,
+        help="JobSpy will search for any of these titles across all configured boards.",
+        label_visibility="visible",
+    )
+
+    # ── LLM suggestions panel ────────────────────────────────────────────────
+    if _run_suggest:
+        current = [t.strip() for t in titles_text.splitlines() if t.strip()]
+        with st.spinner("Asking LLM for suggestions…"):
+            suggestions = _suggest_search_terms(current, RESUME_PATH)
+        st.session_state["_sp_suggestions"] = suggestions
+
+    if st.session_state.get("_sp_suggestions"):
+        sugg = st.session_state["_sp_suggestions"]
+        s_titles = sugg.get("suggested_titles", [])
+        s_excl = sugg.get("suggested_excludes", [])
+
+        existing_titles = {t.lower() for t in titles_text.splitlines() if t.strip()}
+        existing_excl = {e.lower() for e in st.session_state.get("_sp_excludes", "").splitlines() if e.strip()}
+
+        if s_titles:
+            st.caption("**Suggested titles** — click to add:")
+            cols = st.columns(min(len(s_titles), 4))
+            for i, title in enumerate(s_titles):
+                with cols[i % 4]:
+                    if title.lower() not in existing_titles:
+                        if st.button(f"+ {title}", key=f"sp_add_title_{i}"):
+                            st.session_state["_sp_titles"] = (
+                                st.session_state.get("_sp_titles", "").rstrip("\n") + f"\n{title}"
+                            )
+                            st.rerun()
+                    else:
+                        st.caption(f"✓ {title}")
+
+        if s_excl:
+            st.caption("**Suggested exclusions** — click to add:")
+            cols2 = st.columns(min(len(s_excl), 4))
+            for i, kw in enumerate(s_excl):
+                with cols2[i % 4]:
+                    if kw.lower() not in existing_excl:
+                        if st.button(f"+ {kw}", key=f"sp_add_excl_{i}"):
+                            st.session_state["_sp_excludes"] = (
+                                st.session_state.get("_sp_excludes", "").rstrip("\n") + f"\n{kw}"
+                            )
+                            st.rerun()
+                    else:
+                        st.caption(f"✓ {kw}")
+
+        if st.button("✕ Clear suggestions", key="sp_clear_sugg"):
+            st.session_state.pop("_sp_suggestions", None)
+            st.rerun()
+
+    st.subheader("Locations")
+    locations_text = st.text_area(
+        "One location per line",
+        value="\n".join(p.get("locations", [])),
+        height=100,
+    )
+
+    st.subheader("Exclude Keywords")
+    st.caption("Jobs whose **title or description** contain any of these words are silently dropped before entering the queue. Case-insensitive.")
+    exclude_text = st.text_area(
+        "One keyword or phrase per line",
+        key="_sp_excludes",
+        height=150,
+        help="e.g. 'sales', 'account executive', 'SDR'",
+    )
+
+    st.subheader("Job Boards")
+    board_options = ["linkedin", "indeed", "glassdoor", "zip_recruiter", "google"]
+    selected_boards = st.multiselect(
+        "Standard boards (via JobSpy)", board_options,
+        default=[b for b in p.get("boards", board_options) if b in board_options],
+        help="Google Jobs aggregates listings from many sources and often finds roles the other boards miss.",
+    )
+
+    _custom_board_options = ["adzuna", "theladders"]
+    _custom_board_labels = {
+        "adzuna":     "Adzuna (free API — requires app_id + app_key in config/adzuna.yaml)",
+        "theladders": "The Ladders (curl_cffi scraper — $100K+ roles, requires curl_cffi)",
+    }
+    st.caption("**Custom boards** — scrapers built into this app, not part of JobSpy.")
+    selected_custom = st.multiselect(
+        "Custom boards",
+        options=_custom_board_options,
+        default=[b for b in p.get("custom_boards", []) if b in _custom_board_options],
+        format_func=lambda b: _custom_board_labels.get(b, b),
+    )
+
+    col1, col2 = st.columns(2)
+    results_per = col1.slider("Results per board", 5, 100, p.get("results_per_board", 25))
+    hours_old = col2.slider("How far back to look (hours)", 24, 720, p.get("hours_old", 72))
+
+    if st.button("💾 Save search settings", type="primary"):
+        profiles[0] = {
+            **p,
+            "titles": [t.strip() for t in titles_text.splitlines() if t.strip()],
+            "locations": [loc.strip() for loc in locations_text.splitlines() if loc.strip()],
+            "boards": selected_boards,
+            "custom_boards": selected_custom,
+            "results_per_board": results_per,
+            "hours_old": hours_old,
+            "exclude_keywords": [k.strip() for k in exclude_text.splitlines() if k.strip()],
+        }
+        save_yaml(SEARCH_CFG, {"profiles": profiles})
+        st.session_state["_sp_hash"] = ""  # force re-seed on next load
+        st.session_state.pop("_sp_suggestions", None)
+        st.success("Search settings saved!")
+
+    st.divider()
+
+    # ── Blocklist ──────────────────────────────────────────────────────────────
+    with st.expander("🚫 Blocklist — companies, industries, and locations I will never work at", expanded=False):
+        st.caption(
+            "Listings matching any rule below are **silently dropped before entering the review queue**, "
+            "across all search profiles and custom boards. Changes take effect on the next discovery run."
+        )
+        bl = load_yaml(BLOCKLIST_CFG)
+
+        bl_companies = st.text_area(
+            "Company names (partial match, one per line)",
+            value="\n".join(bl.get("companies", [])),
+            height=120,
+            help="e.g. 'Amazon' blocks any listing where the company name contains 'amazon' (case-insensitive).",
+            key="bl_companies",
+        )
+        bl_industries = st.text_area(
+            "Industry / content keywords (one per line)",
+            value="\n".join(bl.get("industries", [])),
+            height=100,
+            help="Blocked if the keyword appears in the company name OR job description. "
+                 "e.g. 'gambling', 'crypto', 'tobacco', 'defense contractor'.",
+            key="bl_industries",
+        )
+        bl_locations = st.text_area(
+            "Location strings to exclude (one per line)",
+            value="\n".join(bl.get("locations", [])),
+            height=80,
+            help="e.g. 'Dallas' blocks any listing whose location contains 'dallas'.",
+            key="bl_locations",
+        )
+
+        if st.button("💾 Save blocklist", type="primary", key="save_blocklist"):
+            save_yaml(BLOCKLIST_CFG, {
+                "companies":  [c.strip() for c in bl_companies.splitlines() if c.strip()],
+                "industries": [i.strip() for i in bl_industries.splitlines() if i.strip()],
+                "locations":  [loc.strip() for loc in bl_locations.splitlines() if loc.strip()],
+            })
+            st.success("Blocklist saved — takes effect on next discovery run.")
+
+# ── LLM Backends tab ─────────────────────────────────────────────────────────
+with tab_llm:
+    import requests as _req
+
+    def _ollama_models(base_url: str) -> list[str]:
+        """Fetch installed model names from the Ollama /api/tags endpoint."""
+        try:
+            r = _req.get(base_url.rstrip("/v1").rstrip("/") + "/api/tags", timeout=2)
+            if r.ok:
+                return [m["name"] for m in r.json().get("models", [])]
+        except Exception:
+            pass
+        return []
+
+    cfg = load_yaml(LLM_CFG)
+    backends = cfg.get("backends", {})
+    fallback_order = cfg.get("fallback_order", list(backends.keys()))
+
+    # Persist reordering across reruns triggered by ↑↓ buttons.
+    # Reset to config order whenever the config file is fresher than the session key.
+    _cfg_key = str(fallback_order)
+    if st.session_state.get("_llm_order_cfg_key") != _cfg_key:
+        st.session_state["_llm_order"] = list(fallback_order)
+        st.session_state["_llm_order_cfg_key"] = _cfg_key
+    new_order: list[str] = st.session_state["_llm_order"]
+
+    # All known backends (in current order first, then any extras)
+    all_names = list(new_order) + [n for n in backends if n not in new_order]
+
+    st.caption("Enable/disable backends and drag their priority with the ↑ ↓ buttons. "
+               "First enabled + reachable backend wins on each call.")
+
+    updated_backends = {}
+
+    for name in all_names:
+        b = backends.get(name, {})
+        enabled = b.get("enabled", True)
+        label = name.replace("_", " ").title()
+        pos = new_order.index(name) + 1 if name in new_order else "—"
+        header = f"{'🟢' if enabled else '⚫'} **{pos}. {label}**"
+
+        with st.expander(header, expanded=False):
+            col_tog, col_up, col_dn, col_spacer = st.columns([2, 1, 1, 4])
+
+            new_enabled = col_tog.checkbox("Enabled", value=enabled, key=f"{name}_enabled")
+
+            # Up / Down only apply to backends currently in the order
+            if name in new_order:
+                idx = new_order.index(name)
+                if col_up.button("↑", key=f"{name}_up", disabled=idx == 0):
+                    new_order[idx], new_order[idx - 1] = new_order[idx - 1], new_order[idx]
+                    st.session_state["_llm_order"] = new_order
+                    st.rerun()
+                if col_dn.button("↓", key=f"{name}_dn", disabled=idx == len(new_order) - 1):
+                    new_order[idx], new_order[idx + 1] = new_order[idx + 1], new_order[idx]
+                    st.session_state["_llm_order"] = new_order
+                    st.rerun()
+
+            if b.get("type") == "openai_compat":
+                url = st.text_input("URL", value=b.get("base_url", ""), key=f"{name}_url")
+
+                # Ollama gets a live model picker; other backends get a text input
+                if name == "ollama":
+                    ollama_models = _ollama_models(b.get("base_url", "http://localhost:11434"))
+                    current_model = b.get("model", "")
+                    if ollama_models:
+                        options = ollama_models
+                        idx_default = options.index(current_model) if current_model in options else 0
+                        model = st.selectbox(
+                            "Model",
+                            options,
+                            index=idx_default,
+                            key=f"{name}_model",
+                            help="Lists models currently installed in Ollama. Pull new ones with `ollama pull <name>`.",
+                        )
+                    else:
+                        st.caption("_Ollama not reachable — enter model name manually_")
+                        model = st.text_input("Model", value=current_model, key=f"{name}_model")
+                else:
+                    model = st.text_input("Model", value=b.get("model", ""), key=f"{name}_model")
+
+                updated_backends[name] = {**b, "base_url": url, "model": model, "enabled": new_enabled}
+            elif b.get("type") == "anthropic":
+                model = st.text_input("Model", value=b.get("model", ""), key=f"{name}_model")
+                updated_backends[name] = {**b, "model": model, "enabled": new_enabled}
+            else:
+                updated_backends[name] = {**b, "enabled": new_enabled}
+
+            if b.get("type") == "openai_compat":
+                if st.button(f"Test connection", key=f"test_{name}"):
+                    with st.spinner("Testing…"):
+                        try:
+                            from scripts.llm_router import LLMRouter
+                            r = LLMRouter()
+                            reachable = r._is_reachable(b.get("base_url", ""))
+                            if reachable:
+                                st.success("Reachable ✓")
+                            else:
+                                st.warning("Not reachable ✗")
+                        except Exception as e:
+                            st.error(f"Error: {e}")
+
+    st.divider()
+    st.caption("Current priority: " + " → ".join(
+        f"{'✓' if backends.get(n, {}).get('enabled', True) else '✗'} {n}"
+        for n in new_order
+    ))
+
+    if st.button("💾 Save LLM settings", type="primary"):
+        save_yaml(LLM_CFG, {**cfg, "backends": updated_backends, "fallback_order": new_order})
+        st.session_state.pop("_llm_order", None)
+        st.session_state.pop("_llm_order_cfg_key", None)
+        st.success("LLM settings saved!")
+
+# ── Notion tab ────────────────────────────────────────────────────────────────
+with tab_notion:
+    cfg = load_yaml(NOTION_CFG) if NOTION_CFG.exists() else {}
+
+    st.subheader("Notion Connection")
+    token = st.text_input(
+        "Integration Token",
+        value=cfg.get("token", ""),
+        type="password",
+        help="Find this at notion.so/my-integrations → your integration → Internal Integration Token",
+    )
+    db_id = st.text_input(
+        "Database ID",
+        value=cfg.get("database_id", ""),
+        help="The 32-character ID from your Notion database URL",
+    )
+
+    col_save, col_test = st.columns(2)
+    if col_save.button("💾 Save Notion settings", type="primary"):
+        save_yaml(NOTION_CFG, {**cfg, "token": token, "database_id": db_id})
+        st.success("Notion settings saved!")
+
+    if col_test.button("🔌 Test connection"):
+        with st.spinner("Connecting…"):
+            try:
+                from notion_client import Client
+                n = Client(auth=token)
+                db = n.databases.retrieve(db_id)
+                st.success(f"Connected to: **{db['title'][0]['plain_text']}**")
+            except Exception as e:
+                st.error(f"Connection failed: {e}")
+
+# ── Services tab ───────────────────────────────────────────────────────────────
+with tab_services:
+    import socket
+    import subprocess as _sp
+
+    TOKENS_CFG = CONFIG_DIR / "tokens.yaml"
+    PFP_DIR = Path("/Library/Documents/Post Fight Processing")
+
+    # Service definitions: (display_name, port, start_cmd, stop_cmd, notes)
+    SERVICES = [
+        {
+            "name": "Streamlit UI",
+            "port": 8501,
+            "start": ["bash", str(Path(__file__).parent.parent.parent / "scripts/manage-ui.sh"), "start"],
+            "stop":  ["bash", str(Path(__file__).parent.parent.parent / "scripts/manage-ui.sh"), "stop"],
+            "cwd":   str(Path(__file__).parent.parent.parent),
+            "note":  "Job Seeker web interface",
+        },
+        {
+            "name": "Ollama (local LLM)",
+            "port": 11434,
+            "start": ["sudo", "systemctl", "start", "ollama"],
+            "stop":  ["sudo", "systemctl", "stop", "ollama"],
+            "cwd":   "/",
+            "note":  "Local inference engine — systemd service",
+        },
+        {
+            "name": "Claude Code Wrapper",
+            "port": 3009,
+            "start": ["bash", str(PFP_DIR / "manage-services.sh"), "start"],
+            "stop":  ["bash", str(PFP_DIR / "manage-services.sh"), "stop"],
+            "cwd":   str(PFP_DIR),
+            "note":  "OpenAI-compat proxy → Claude Code (port 3009)",
+        },
+        {
+            "name": "GitHub Copilot Wrapper",
+            "port": 3010,
+            "start": ["bash", str(PFP_DIR / "manage-copilot.sh"), "start"],
+            "stop":  ["bash", str(PFP_DIR / "manage-copilot.sh"), "stop"],
+            "cwd":   str(PFP_DIR),
+            "note":  "OpenAI-compat proxy → GitHub Copilot (port 3010)",
+        },
+        {
+            "name": "vLLM Server",
+            "port": 8000,
+            "start": ["bash", str(Path(__file__).parent.parent.parent / "scripts/manage-vllm.sh"), "start"],
+            "stop":  ["bash", str(Path(__file__).parent.parent.parent / "scripts/manage-vllm.sh"), "stop"],
+            "cwd":   str(Path(__file__).parent.parent.parent),
+            "model_dir": "/Library/Assets/LLM/vllm/models",
+            "note":  "Local vLLM inference — Ouro model family (port 8000, GPU 1)",
+        },
+        {
+            "name": "Vision Service (moondream2)",
+            "port": 8002,
+            "start": ["bash", str(Path(__file__).parent.parent.parent / "scripts/manage-vision.sh"), "start"],
+            "stop":  ["bash", str(Path(__file__).parent.parent.parent / "scripts/manage-vision.sh"), "stop"],
+            "cwd":   str(Path(__file__).parent.parent.parent),
+            "note":  "Survey screenshot analysis — moondream2 (port 8002, optional)",
+        },
+        {
+            "name": "SearXNG (company scraper)",
+            "port": 8888,
+            "start": ["docker", "compose", "up", "-d"],
+            "stop":  ["docker", "compose", "down"],
+            "cwd":   str(Path("/Library/Development/scrapers/SearXNG")),
+            "note":  "Privacy-respecting meta-search used for company research (port 8888)",
+        },
+    ]
+
+    def _port_open(port: int) -> bool:
+        try:
+            with socket.create_connection(("127.0.0.1", port), timeout=1):
+                return True
+        except OSError:
+            return False
+
+    st.caption("Monitor and control the LLM backend services. Status is checked live on each page load.")
+
+    for svc in SERVICES:
+        up = _port_open(svc["port"])
+        badge = "🟢 Running" if up else "🔴 Stopped"
+        header = f"**{svc['name']}** — {badge}"
+
+        with st.container(border=True):
+            left_col, right_col = st.columns([3, 1])
+            with left_col:
+                st.markdown(header)
+                st.caption(f"Port {svc['port']} · {svc['note']}")
+
+                # Model selector for services backed by a local model directory (e.g. vLLM)
+                if "model_dir" in svc:
+                    _mdir = Path(svc["model_dir"])
+                    _models = (
+                        sorted(d.name for d in _mdir.iterdir() if d.is_dir())
+                        if _mdir.exists() else []
+                    )
+                    _mk = f"svc_model_{svc['port']}"
+                    _loaded_file = Path("/tmp/vllm-server.model")
+                    _loaded = _loaded_file.read_text().strip() if (_loaded_file.exists()) else ""
+                    if _models:
+                        _default = _models.index(_loaded) if _loaded in _models else 0
+                        st.selectbox(
+                            "Model",
+                            _models,
+                            index=_default,
+                            key=_mk,
+                            disabled=up,
+                            help="Model to load on start. Stop then Start to swap models.",
+                        )
+                    else:
+                        st.caption(f"_No models found in {svc['model_dir']}_")
+
+            with right_col:
+                if svc["start"] is None:
+                    st.caption("_Manual start only_")
+                elif up:
+                    if st.button("⏹ Stop", key=f"svc_stop_{svc['port']}", use_container_width=True):
+                        with st.spinner(f"Stopping {svc['name']}…"):
+                            r = _sp.run(svc["stop"], capture_output=True, text=True, cwd=svc["cwd"])
+                        if r.returncode == 0:
+                            st.success("Stopped.")
+                        else:
+                            st.error(f"Error: {r.stderr or r.stdout}")
+                        st.rerun()
+                else:
+                    # Build start command, appending selected model for services with model_dir
+                    _start_cmd = list(svc["start"])
+                    if "model_dir" in svc:
+                        _sel = st.session_state.get(f"svc_model_{svc['port']}")
+                        if _sel:
+                            _start_cmd.append(_sel)
+                    if st.button("▶ Start", key=f"svc_start_{svc['port']}", use_container_width=True, type="primary"):
+                        with st.spinner(f"Starting {svc['name']}…"):
+                            r = _sp.run(_start_cmd, capture_output=True, text=True, cwd=svc["cwd"])
+                        if r.returncode == 0:
+                            st.success("Started!")
+                        else:
+                            st.error(f"Error: {r.stderr or r.stdout}")
+                        st.rerun()
+
+    st.divider()
+    st.subheader("🤗 Hugging Face")
+    st.caption(
+        "Used for uploading training data and running fine-tune jobs on HF infrastructure. "
+        "Token is stored in `config/tokens.yaml` (git-ignored). "
+        "Create a **write-permission** token at huggingface.co/settings/tokens."
+    )
+
+    tok_cfg = load_yaml(TOKENS_CFG) if TOKENS_CFG.exists() else {}
+    hf_token = st.text_input(
+        "HF Token",
+        value=tok_cfg.get("hf_token", ""),
+        type="password",
+        placeholder="hf_…",
+    )
+
+    col_save_hf, col_test_hf = st.columns(2)
+    if col_save_hf.button("💾 Save HF token", type="primary"):
+        save_yaml(TOKENS_CFG, {**tok_cfg, "hf_token": hf_token})
+        TOKENS_CFG.chmod(0o600)
+        st.success("Saved!")
+
+    if col_test_hf.button("🔌 Test HF token"):
+        with st.spinner("Checking…"):
+            try:
+                import requests as _r
+                resp = _r.get(
+                    "https://huggingface.co/api/whoami",
+                    headers={"Authorization": f"Bearer {hf_token}"},
+                    timeout=5,
+                )
+                if resp.ok:
+                    info = resp.json()
+                    name = info.get("name") or info.get("fullname") or "unknown"
+                    auth = info.get("auth", {})
+                    perm = auth.get("accessToken", {}).get("role", "read")
+                    st.success(f"Logged in as **{name}** · permission: `{perm}`")
+                    if perm == "read":
+                        st.warning("Token is read-only — create a **write** token to upload datasets and run Jobs.")
+                else:
+                    st.error(f"Invalid token ({resp.status_code})")
+            except Exception as e:
+                st.error(f"Error: {e}")
+
+# ── Resume Profile tab ────────────────────────────────────────────────────────
+with tab_resume:
+    st.caption(
+        "Edit Alex's application profile. "
+        "Bullets are used as paste-able shortcuts in the Apply Workspace."
+    )
+
+    if not RESUME_PATH.exists():
+        st.error(f"Resume YAML not found at `{RESUME_PATH}`. Is AIHawk cloned?")
+        st.stop()
+
+    _data = yaml.safe_load(RESUME_PATH.read_text()) or {}
+
+    def _field(label: str, value: str, key: str, help: str = "", password: bool = False) -> str:
+        needs_attention = str(value).startswith("FILL_IN") or value == ""
+        if needs_attention:
+            st.markdown(
+                '<p style="color:#F59E0B;font-size:0.8em;margin-bottom:2px">⚠️ Needs attention</p>',
+                unsafe_allow_html=True,
+            )
+        return st.text_input(label, value=value or "", key=key, help=help,
+                             type="password" if password else "default")
+
+    # ── Personal Info ─────────────────────────────────────────────────────────
+    with st.expander("👤 Personal Information", expanded=True):
+        _info = _data.get("personal_information", {})
+        _c1, _c2 = st.columns(2)
+        with _c1:
+            _name     = _field("First Name", _info.get("name", ""),    "rp_name")
+            _email    = _field("Email",      _info.get("email", ""),   "rp_email")
+            _phone    = _field("Phone",      _info.get("phone", ""),   "rp_phone")
+            _city     = _field("City",       _info.get("city", ""),    "rp_city")
+        with _c2:
+            _surname  = _field("Last Name",  _info.get("surname", ""), "rp_surname")
+            _linkedin = _field("LinkedIn URL", _info.get("linkedin", ""), "rp_linkedin")
+            _zip_code = _field("Zip Code",   _info.get("zip_code", ""), "rp_zip")
+            _dob      = _field("Date of Birth", _info.get("date_of_birth", ""), "rp_dob",
+                               help="MM/DD/YYYY")
+
+    # ── Experience ────────────────────────────────────────────────────────────
+    with st.expander("💼 Work Experience"):
+        _exp_list = _data.get("experience_details", [{}])
+        if "rp_exp_count" not in st.session_state:
+            st.session_state.rp_exp_count = len(_exp_list)
+        if st.button("+ Add Experience Entry", key="rp_add_exp"):
+            st.session_state.rp_exp_count += 1
+            _exp_list.append({})
+
+        _updated_exp = []
+        for _i in range(st.session_state.rp_exp_count):
+            _exp = _exp_list[_i] if _i < len(_exp_list) else {}
+            st.markdown(f"**Position {_i + 1}**")
+            _ec1, _ec2 = st.columns(2)
+            with _ec1:
+                _pos    = _field("Job Title",    _exp.get("position", ""),          f"rp_pos_{_i}")
+                _co     = _field("Company",      _exp.get("company", ""),           f"rp_co_{_i}")
+                _period = _field("Period",        _exp.get("employment_period", ""), f"rp_period_{_i}",
+                                 help="e.g. 01/2022 - Present")
+            with _ec2:
+                _loc = st.text_input("Location", _exp.get("location", ""), key=f"rp_loc_{_i}")
+                _ind = st.text_input("Industry", _exp.get("industry", ""), key=f"rp_ind_{_i}")
+            _resp_raw = st.text_area(
+                "Key Responsibilities (one per line)",
+                value="\n".join(
+                    r.get(f"responsibility_{j+1}", "") if isinstance(r, dict) else str(r)
+                    for j, r in enumerate(_exp.get("key_responsibilities", []))
+                ),
+                key=f"rp_resp_{_i}", height=100,
+            )
+            _skills_raw = st.text_input(
+                "Skills (comma-separated)",
+                value=", ".join(_exp.get("skills_acquired", [])),
+                key=f"rp_skills_{_i}",
+            )
+            _updated_exp.append({
+                "position": _pos, "company": _co, "employment_period": _period,
+                "location": _loc, "industry": _ind,
+                "key_responsibilities": [{"responsibility_1": r.strip()} for r in _resp_raw.splitlines() if r.strip()],
+                "skills_acquired": [s.strip() for s in _skills_raw.split(",") if s.strip()],
+            })
+            st.divider()
+
+    # ── Preferences ───────────────────────────────────────────────────────────
+    with st.expander("⚙️ Preferences & Availability"):
+        _wp   = _data.get("work_preferences", {})
+        _sal  = _data.get("salary_expectations", {})
+        _avail = _data.get("availability", {})
+        _pc1, _pc2 = st.columns(2)
+        with _pc1:
+            _salary_range = st.text_input("Salary Range (USD)", _sal.get("salary_range_usd", ""),
+                                          key="rp_salary", help="e.g. 120000 - 180000")
+            _notice = st.text_input("Notice Period", _avail.get("notice_period", "2 weeks"), key="rp_notice")
+        with _pc2:
+            _remote      = st.checkbox("Open to Remote",     value=_wp.get("remote_work", "Yes") == "Yes",         key="rp_remote")
+            _reloc       = st.checkbox("Open to Relocation", value=_wp.get("open_to_relocation", "No") == "Yes",   key="rp_reloc")
+            _assessments = st.checkbox("Willing to complete assessments",
+                                       value=_wp.get("willing_to_complete_assessments", "Yes") == "Yes",           key="rp_assess")
+            _bg          = st.checkbox("Willing to undergo background checks",
+                                       value=_wp.get("willing_to_undergo_background_checks", "Yes") == "Yes",      key="rp_bg")
+
+    # ── Self-ID ───────────────────────────────────────────────────────────────
+    with st.expander("🏳️‍🌈 Self-Identification (optional)"):
+        _sid = _data.get("self_identification", {})
+        _sc1, _sc2 = st.columns(2)
+        with _sc1:
+            _gender    = st.text_input("Gender identity", _sid.get("gender", "Non-binary"),   key="rp_gender")
+            _pronouns  = st.text_input("Pronouns",        _sid.get("pronouns", "Any"),         key="rp_pronouns")
+            _ethnicity = _field("Ethnicity", _sid.get("ethnicity", ""), "rp_ethnicity")
+        with _sc2:
+            _vet_opts = ["No", "Yes", "Prefer not to say"]
+            _veteran  = st.selectbox("Veteran status", _vet_opts,
+                                     index=_vet_opts.index(_sid.get("veteran", "No")), key="rp_vet")
+            _dis_opts = ["Prefer not to say", "No", "Yes"]
+            _disability = st.selectbox("Disability disclosure", _dis_opts,
+                                       index=_dis_opts.index(_sid.get("disability", "Prefer not to say")),
+                                       key="rp_dis")
+
+    st.divider()
+    if st.button("💾 Save Resume Profile", type="primary", use_container_width=True, key="rp_save"):
+        _data["personal_information"] = {
+            **_data.get("personal_information", {}),
+            "name": _name, "surname": _surname, "email": _email, "phone": _phone,
+            "city": _city, "zip_code": _zip_code, "linkedin": _linkedin, "date_of_birth": _dob,
+        }
+        _data["experience_details"] = _updated_exp
+        _data["salary_expectations"] = {"salary_range_usd": _salary_range}
+        _data["availability"] = {"notice_period": _notice}
+        _data["work_preferences"] = {
+            **_data.get("work_preferences", {}),
+            "remote_work": "Yes" if _remote else "No",
+            "open_to_relocation": "Yes" if _reloc else "No",
+            "willing_to_complete_assessments": "Yes" if _assessments else "No",
+            "willing_to_undergo_background_checks": "Yes" if _bg else "No",
+        }
+        _data["self_identification"] = {
+            "gender": _gender, "pronouns": _pronouns, "veteran": _veteran,
+            "disability": _disability, "ethnicity": _ethnicity,
+        }
+        RESUME_PATH.write_text(yaml.dump(_data, default_flow_style=False, allow_unicode=True))
+        st.success("✅ Resume profile saved!")
+        st.balloons()
+
+# ── Email tab ─────────────────────────────────────────────────────────────────
+with tab_email:
+    EMAIL_CFG = CONFIG_DIR / "email.yaml"
+    EMAIL_EXAMPLE = CONFIG_DIR / "email.yaml.example"
+
+    st.caption(
+        "Connect Alex's email via IMAP to automatically associate recruitment "
+        "emails with job applications. Only emails that mention the company name "
+        "AND contain a recruitment keyword are ever imported — no personal emails "
+        "are touched."
+    )
+
+    if not EMAIL_CFG.exists():
+        st.info("No email config found — fill in your credentials below and click **Save** to create it.")
+
+    em_cfg = load_yaml(EMAIL_CFG) if EMAIL_CFG.exists() else {}
+
+    col_a, col_b = st.columns(2)
+    with col_a:
+        em_host = st.text_input("IMAP Host", em_cfg.get("host", "imap.gmail.com"), key="em_host")
+        em_port = st.number_input("Port", value=int(em_cfg.get("port", 993)),
+                                  min_value=1, max_value=65535, key="em_port")
+        em_ssl  = st.checkbox("Use SSL", value=em_cfg.get("use_ssl", True), key="em_ssl")
+    with col_b:
+        em_user = st.text_input("Username (email address)", em_cfg.get("username", ""), key="em_user")
+        em_pass = st.text_input("Password / App Password", em_cfg.get("password", ""),
+                                type="password", key="em_pass")
+        em_sent = st.text_input("Sent folder (blank = auto-detect)",
+                                em_cfg.get("sent_folder", ""), key="em_sent",
+                                placeholder='e.g. "[Gmail]/Sent Mail"')
+
+    em_days = st.slider("Look-back window (days)", 14, 365,
+                        int(em_cfg.get("lookback_days", 90)), key="em_days")
+
+    st.caption(
+        "**Gmail users:** create an App Password at "
+        "myaccount.google.com/apppasswords (requires 2-Step Verification). "
+        "Enable IMAP at Gmail Settings → Forwarding and POP/IMAP."
+    )
+
+    col_save, col_test = st.columns(2)
+
+    if col_save.button("💾 Save email settings", type="primary", key="em_save"):
+        save_yaml(EMAIL_CFG, {
+            "host": em_host, "port": int(em_port), "use_ssl": em_ssl,
+            "username": em_user, "password": em_pass,
+            "sent_folder": em_sent, "lookback_days": int(em_days),
+        })
+        EMAIL_CFG.chmod(0o600)
+        st.success("Saved!")
+
+    if col_test.button("🔌 Test connection", key="em_test"):
+        with st.spinner("Connecting…"):
+            try:
+                import imaplib as _imap
+                _conn = (_imap.IMAP4_SSL if em_ssl else _imap.IMAP4)(em_host, int(em_port))
+                _conn.login(em_user, em_pass)
+                _, _caps = _conn.capability()
+                _conn.logout()
+                st.success(f"Connected successfully to {em_host}")
+            except Exception as e:
+                st.error(f"Connection failed: {e}")
+
+# ── Skills & Keywords tab ─────────────────────────────────────────────────────
+with tab_skills:
+    st.subheader("🏷️ Skills & Keywords")
+    st.caption(
+        "These are matched against job descriptions to select Alex's most relevant "
+        "experience and highlight keyword overlap in the research brief."
+    )
+
+    if not KEYWORDS_CFG.exists():
+        st.warning("resume_keywords.yaml not found — create it at config/resume_keywords.yaml")
+    else:
+        kw_data = load_yaml(KEYWORDS_CFG)
+
+        changed = False
+        for category in ["skills", "domains", "keywords"]:
+            st.markdown(f"**{category.title()}**")
+            tags: list[str] = kw_data.get(category, [])
+
+            if not tags:
+                st.caption("No tags yet — add one below.")
+
+            # Render existing tags as removable chips (value-based keys for stability)
+            n_cols = min(max(len(tags), 1), 6)
+            cols = st.columns(n_cols)
+            to_remove = None
+            for i, tag in enumerate(tags):
+                with cols[i % n_cols]:
+                    if st.button(f"× {tag}", key=f"rm_{category}_{tag}", use_container_width=True):
+                        to_remove = tag
+            if to_remove:
+                tags.remove(to_remove)
+                kw_data[category] = tags
+                changed = True
+
+            # Add new tag
+            new_col, btn_col = st.columns([4, 1])
+            new_tag = new_col.text_input(
+                "Add",
+                key=f"new_{category}",
+                label_visibility="collapsed",
+                placeholder=f"Add {category[:-1] if category.endswith('s') else category}…",
+            )
+            if btn_col.button("＋ Add", key=f"add_{category}"):
+                tag = new_tag.strip()
+                if tag and tag not in tags:
+                    tags.append(tag)
+                    kw_data[category] = tags
+                    changed = True
+
+            st.markdown("---")
+
+        if changed:
+            save_yaml(KEYWORDS_CFG, kw_data)
+            st.success("Saved.")
+            st.rerun()
diff --git a/app/pages/3_Resume_Editor.py b/app/pages/3_Resume_Editor.py
new file mode 100644
index 0000000..092c2a3
--- /dev/null
+++ b/app/pages/3_Resume_Editor.py
@@ -0,0 +1,191 @@
+# app/pages/3_Resume_Editor.py
+"""
+Resume Editor — form-based editor for Alex's AIHawk profile YAML.
+FILL_IN fields highlighted in amber.
+"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+import streamlit as st
+import yaml
+
+st.set_page_config(page_title="Resume Editor", page_icon="📝", layout="wide")
+st.title("📝 Resume Editor")
+st.caption("Edit Alex's application profile used by AIHawk for LinkedIn Easy Apply.")
+
+RESUME_PATH = Path(__file__).parent.parent.parent / "aihawk" / "data_folder" / "plain_text_resume.yaml"
+
+if not RESUME_PATH.exists():
+    st.error(f"Resume file not found at `{RESUME_PATH}`. Is AIHawk cloned?")
+    st.stop()
+
+data = yaml.safe_load(RESUME_PATH.read_text()) or {}
+
+
+def field(label: str, value: str, key: str, help: str = "", password: bool = False) -> str:
+    """Render a text input, highlighted amber if value is FILL_IN or empty."""
+    needs_attention = str(value).startswith("FILL_IN") or value == ""
+    if needs_attention:
+        st.markdown(
+            '<p style="color:#F59E0B;font-size:0.8em;margin-bottom:2px">⚠️ Needs your attention</p>',
+            unsafe_allow_html=True,
+        )
+    return st.text_input(label, value=value or "", key=key, help=help,
+                         type="password" if password else "default")
+
+
+st.divider()
+
+# ── Personal Info ─────────────────────────────────────────────────────────────
+with st.expander("👤 Personal Information", expanded=True):
+    info = data.get("personal_information", {})
+    col1, col2 = st.columns(2)
+    with col1:
+        name = field("First Name", info.get("name", ""), "pi_name")
+        email = field("Email", info.get("email", ""), "pi_email")
+        phone = field("Phone", info.get("phone", ""), "pi_phone")
+        city = field("City", info.get("city", ""), "pi_city")
+    with col2:
+        surname = field("Last Name", info.get("surname", ""), "pi_surname")
+        linkedin = field("LinkedIn URL", info.get("linkedin", ""), "pi_linkedin")
+        zip_code = field("Zip Code", info.get("zip_code", ""), "pi_zip")
+        dob = field("Date of Birth", info.get("date_of_birth", ""), "pi_dob",
+                    help="Format: MM/DD/YYYY")
+
+# ── Education ─────────────────────────────────────────────────────────────────
+with st.expander("🎓 Education"):
+    edu_list = data.get("education_details", [{}])
+    updated_edu = []
+    degree_options = ["Bachelor's Degree", "Master's Degree", "Some College",
+                      "Associate's Degree", "High School", "Other"]
+    for i, edu in enumerate(edu_list):
+        st.markdown(f"**Entry {i+1}**")
+        col1, col2 = st.columns(2)
+        with col1:
+            inst = field("Institution", edu.get("institution", ""), f"edu_inst_{i}")
+            field_study = st.text_input("Field of Study", edu.get("field_of_study", ""), key=f"edu_field_{i}")
+            start = st.text_input("Start Year", edu.get("start_date", ""), key=f"edu_start_{i}")
+        with col2:
+            current_level = edu.get("education_level", "Some College")
+            level_idx = degree_options.index(current_level) if current_level in degree_options else 2
+            level = st.selectbox("Degree Level", degree_options, index=level_idx, key=f"edu_level_{i}")
+            end = st.text_input("Completion Year", edu.get("year_of_completion", ""), key=f"edu_end_{i}")
+        updated_edu.append({
+            "education_level": level, "institution": inst, "field_of_study": field_study,
+            "start_date": start, "year_of_completion": end, "final_evaluation_grade": "", "exam": {},
+        })
+        st.divider()
+
+# ── Experience ────────────────────────────────────────────────────────────────
+with st.expander("💼 Work Experience"):
+    exp_list = data.get("experience_details", [{}])
+    if "exp_count" not in st.session_state:
+        st.session_state.exp_count = len(exp_list)
+    if st.button("+ Add Experience Entry"):
+        st.session_state.exp_count += 1
+        exp_list.append({})
+
+    updated_exp = []
+    for i in range(st.session_state.exp_count):
+        exp = exp_list[i] if i < len(exp_list) else {}
+        st.markdown(f"**Position {i+1}**")
+        col1, col2 = st.columns(2)
+        with col1:
+            pos = field("Job Title", exp.get("position", ""), f"exp_pos_{i}")
+            company = field("Company", exp.get("company", ""), f"exp_co_{i}")
+            period = field("Employment Period", exp.get("employment_period", ""), f"exp_period_{i}",
+                           help="e.g. 01/2022 - Present")
+        with col2:
+            location = st.text_input("Location", exp.get("location", ""), key=f"exp_loc_{i}")
+            industry = st.text_input("Industry", exp.get("industry", ""), key=f"exp_ind_{i}")
+
+        responsibilities = st.text_area(
+            "Key Responsibilities (one per line)",
+            value="\n".join(
+                r.get(f"responsibility_{j+1}", "") if isinstance(r, dict) else str(r)
+                for j, r in enumerate(exp.get("key_responsibilities", []))
+            ),
+            key=f"exp_resp_{i}", height=100,
+        )
+        skills = st.text_input(
+            "Skills (comma-separated)",
+            value=", ".join(exp.get("skills_acquired", [])),
+            key=f"exp_skills_{i}",
+        )
+        resp_list = [{"responsibility_1": r.strip()} for r in responsibilities.splitlines() if r.strip()]
+        skill_list = [s.strip() for s in skills.split(",") if s.strip()]
+        updated_exp.append({
+            "position": pos, "company": company, "employment_period": period,
+            "location": location, "industry": industry,
+            "key_responsibilities": resp_list, "skills_acquired": skill_list,
+        })
+        st.divider()
+
+# ── Preferences ───────────────────────────────────────────────────────────────
+with st.expander("⚙️ Preferences & Availability"):
+    wp = data.get("work_preferences", {})
+    sal = data.get("salary_expectations", {})
+    avail = data.get("availability", {})
+    col1, col2 = st.columns(2)
+    with col1:
+        salary_range = st.text_input("Salary Range (USD)", sal.get("salary_range_usd", ""),
+                                     key="pref_salary", help="e.g. 120000 - 180000")
+        notice = st.text_input("Notice Period", avail.get("notice_period", "2 weeks"), key="pref_notice")
+    with col2:
+        remote_work = st.checkbox("Open to Remote", value=wp.get("remote_work", "Yes") == "Yes", key="pref_remote")
+        relocation = st.checkbox("Open to Relocation", value=wp.get("open_to_relocation", "No") == "Yes", key="pref_reloc")
+        assessments = st.checkbox("Willing to complete assessments",
+                                  value=wp.get("willing_to_complete_assessments", "Yes") == "Yes", key="pref_assess")
+        bg_checks = st.checkbox("Willing to undergo background checks",
+                                value=wp.get("willing_to_undergo_background_checks", "Yes") == "Yes", key="pref_bg")
+        drug_tests = st.checkbox("Willing to undergo drug tests",
+                                 value=wp.get("willing_to_undergo_drug_tests", "No") == "Yes", key="pref_drug")
+
+# ── Self-ID ───────────────────────────────────────────────────────────────────
+with st.expander("🏳️‍🌈 Self-Identification (optional)"):
+    sid = data.get("self_identification", {})
+    col1, col2 = st.columns(2)
+    with col1:
+        gender = st.text_input("Gender identity", sid.get("gender", "Non-binary"), key="sid_gender",
+                               help="Select 'Non-binary' or 'Prefer not to say' when options allow")
+        pronouns = st.text_input("Pronouns", sid.get("pronouns", "Any"), key="sid_pronouns")
+        ethnicity = field("Ethnicity", sid.get("ethnicity", ""), "sid_ethnicity",
+                          help="'Prefer not to say' is always an option")
+    with col2:
+        vet_options = ["No", "Yes", "Prefer not to say"]
+        veteran = st.selectbox("Veteran status", vet_options,
+                               index=vet_options.index(sid.get("veteran", "No")), key="sid_vet")
+        dis_options = ["Prefer not to say", "No", "Yes"]
+        disability = st.selectbox("Disability disclosure", dis_options,
+                                  index=dis_options.index(sid.get("disability", "Prefer not to say")),
+                                  key="sid_dis")
+
+st.divider()
+
+# ── Save ──────────────────────────────────────────────────────────────────────
+if st.button("💾 Save Resume Profile", type="primary", use_container_width=True):
+    data["personal_information"] = {
+        **data.get("personal_information", {}),
+        "name": name, "surname": surname, "email": email, "phone": phone,
+        "city": city, "zip_code": zip_code, "linkedin": linkedin, "date_of_birth": dob,
+    }
+    data["education_details"] = updated_edu
+    data["experience_details"] = updated_exp
+    data["salary_expectations"] = {"salary_range_usd": salary_range}
+    data["availability"] = {"notice_period": notice}
+    data["work_preferences"] = {
+        **data.get("work_preferences", {}),
+        "remote_work": "Yes" if remote_work else "No",
+        "open_to_relocation": "Yes" if relocation else "No",
+        "willing_to_complete_assessments": "Yes" if assessments else "No",
+        "willing_to_undergo_background_checks": "Yes" if bg_checks else "No",
+        "willing_to_undergo_drug_tests": "Yes" if drug_tests else "No",
+    }
+    data["self_identification"] = {
+        "gender": gender, "pronouns": pronouns, "veteran": veteran,
+        "disability": disability, "ethnicity": ethnicity,
+    }
+    RESUME_PATH.write_text(yaml.dump(data, default_flow_style=False, allow_unicode=True))
+    st.success("✅ Profile saved!")
+    st.balloons()
diff --git a/app/pages/4_Apply.py b/app/pages/4_Apply.py
new file mode 100644
index 0000000..123f1f4
--- /dev/null
+++ b/app/pages/4_Apply.py
@@ -0,0 +1,388 @@
+# app/pages/4_Apply.py
+"""
+Apply Workspace — side-by-side cover letter tools and job description.
+Generates a PDF cover letter saved to the JobSearch docs folder.
+"""
+import re
+import sys
+from datetime import datetime
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+import streamlit as st
+import streamlit.components.v1 as components
+import yaml
+
+from scripts.db import (
+    DEFAULT_DB, init_db, get_jobs_by_status,
+    update_cover_letter, mark_applied, update_job_status,
+    get_task_for_job,
+)
+from scripts.task_runner import submit_task
+
+DOCS_DIR = Path("/Library/Documents/JobSearch")
+RESUME_YAML = Path(__file__).parent.parent.parent / "aihawk" / "data_folder" / "plain_text_resume.yaml"
+
+st.title("🚀 Apply Workspace")
+
+init_db(DEFAULT_DB)
+
+# ── PDF generation ─────────────────────────────────────────────────────────────
+def _make_cover_letter_pdf(job: dict, cover_letter: str, output_dir: Path) -> Path:
+    from reportlab.lib.pagesizes import letter
+    from reportlab.lib.units import inch
+    from reportlab.lib.colors import HexColor
+    from reportlab.lib.styles import ParagraphStyle
+    from reportlab.lib.enums import TA_LEFT
+    from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, HRFlowable
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+    company_safe = re.sub(r"[^a-zA-Z0-9]", "", job.get("company", "Company"))
+    date_str = datetime.now().strftime("%Y-%m-%d")
+    out_path = output_dir / f"CoverLetter_{company_safe}_{date_str}.pdf"
+
+    doc = SimpleDocTemplate(
+        str(out_path),
+        pagesize=letter,
+        leftMargin=inch, rightMargin=inch,
+        topMargin=inch, bottomMargin=inch,
+    )
+
+    teal  = HexColor("#2DD4BF")
+    dark  = HexColor("#0F172A")
+    slate = HexColor("#64748B")
+
+    name_style = ParagraphStyle(
+        "Name", fontName="Helvetica-Bold", fontSize=22,
+        textColor=teal, spaceAfter=6,
+    )
+    contact_style = ParagraphStyle(
+        "Contact", fontName="Helvetica", fontSize=9,
+        textColor=slate, spaceAfter=4,
+    )
+    date_style = ParagraphStyle(
+        "Date", fontName="Helvetica", fontSize=11,
+        textColor=dark, spaceBefore=16, spaceAfter=14,
+    )
+    body_style = ParagraphStyle(
+        "Body", fontName="Helvetica", fontSize=11,
+        textColor=dark, leading=16, spaceAfter=12, alignment=TA_LEFT,
+    )
+
+    story = [
+        Paragraph("ALEX RIVERA", name_style),
+        Paragraph(
+            "alex@example.com  ·  (555) 867-5309  ·  "
+            "linkedin.com/in/AlexMcCann  ·  hirealexmccann.site",
+            contact_style,
+        ),
+        HRFlowable(width="100%", thickness=1, color=teal, spaceBefore=8, spaceAfter=0),
+        Paragraph(datetime.now().strftime("%B %d, %Y"), date_style),
+    ]
+
+    for para in cover_letter.strip().split("\n\n"):
+        para = para.strip()
+        if para:
+            story.append(Paragraph(para.replace("\n", "<br/>"), body_style))
+
+    story += [
+        Spacer(1, 6),
+        Paragraph("Warm regards,<br/><br/>Alex Rivera", body_style),
+    ]
+
+    doc.build(story)
+    return out_path
+
+# ── Application Q&A helper ─────────────────────────────────────────────────────
+def _answer_question(job: dict, question: str) -> str:
+    """Call the LLM to answer an application question in Alex's voice.
+
+    Uses research_fallback_order (claude_code → vllm → ollama_research)
+    rather than the default cover-letter order — the fine-tuned cover letter
+    model is not suited for answering general application questions.
+    """
+    from scripts.llm_router import LLMRouter
+    router = LLMRouter()
+    fallback = router.config.get("research_fallback_order") or router.config.get("fallback_order")
+    description_snippet = (job.get("description") or "")[:1200].strip()
+    prompt = f"""You are answering job application questions for Alex Rivera, a customer success leader.
+
+Background:
+- 6+ years in customer success, technical account management, and CS leadership
+- Most recent role: led Americas Customer Success at UpGuard (cybersecurity SaaS), NPS consistently ≥95
+- Also founder of M3 Consulting, a CS advisory practice for SaaS startups
+- Based in SF Bay Area; open to remote/hybrid; pronouns: any
+
+Role she's applying to: {job.get("title", "")} at {job.get("company", "")}
+{f"Job description excerpt:{chr(10)}{description_snippet}" if description_snippet else ""}
+
+Application Question:
+{question}
+
+Answer in Alex's voice — specific, warm, and confident. If the question specifies a word or character limit, respect it. Answer only the question with no preamble or sign-off."""
+    return router.complete(prompt, fallback_order=fallback).strip()
+
+
+# ── Copy-to-clipboard button ───────────────────────────────────────────────────
+def _copy_btn(text: str, label: str = "📋 Copy", done: str = "✅ Copied!", height: int = 44) -> None:
+    import json
+    # Each components.html call renders in its own sandboxed iframe, so a fixed
+    # element id is fine. json.dumps handles all special chars (quotes, newlines,
+    # backslashes, etc.) — avoids the fragile inline-onclick escaping approach.
+    components.html(
+        f"""<button id="b"
+            style="width:100%;background:#2DD4BF;color:#0F172A;border:none;
+                   padding:6px 10px;border-radius:6px;cursor:pointer;
+                   font-size:13px;font-weight:600">{label}</button>
+        <script>
+        document.getElementById('b').addEventListener('click', function() {{
+            navigator.clipboard.writeText({json.dumps(text)});
+            this.textContent = {json.dumps(done)};
+            setTimeout(() => this.textContent = {json.dumps(label)}, 2000);
+        }});
+        </script>""",
+        height=height,
+    )
+
+# ── Job selection ──────────────────────────────────────────────────────────────
+approved = get_jobs_by_status(DEFAULT_DB, "approved")
+if not approved:
+    st.info("No approved jobs — head to Job Review to approve some listings first.")
+    st.stop()
+
+preselect_id = st.session_state.pop("apply_job_id", None)
+job_options = {j["id"]: f"{j['title']} — {j['company']}" for j in approved}
+ids = list(job_options.keys())
+default_idx = ids.index(preselect_id) if preselect_id in ids else 0
+
+selected_id = st.selectbox(
+    "Job",
+    options=ids,
+    format_func=lambda x: job_options[x],
+    index=default_idx,
+    label_visibility="collapsed",
+)
+job = next(j for j in approved if j["id"] == selected_id)
+
+st.divider()
+
+# ── Two-column workspace ───────────────────────────────────────────────────────
+col_tools, col_jd = st.columns([2, 3])
+
+# ════════════════════════════════════════════════
+#  RIGHT — job description
+# ════════════════════════════════════════════════
+with col_jd:
+    score = job.get("match_score")
+    score_badge = (
+        "⬜ No score" if score is None else
+        f"🟢 {score:.0f}%" if score >= 70 else
+        f"🟡 {score:.0f}%" if score >= 40 else f"🔴 {score:.0f}%"
+    )
+    remote_badge = "🌐 Remote" if job.get("is_remote") else "🏢 On-site"
+    src = (job.get("source") or "").lower()
+    source_badge = f"🤖 {src.title()}" if src == "linkedin" else f"👤 {src.title() or 'Manual'}"
+
+    st.subheader(job["title"])
+    st.caption(
+        f"**{job['company']}**  ·  {job.get('location', '')}  ·  "
+        f"{remote_badge}  ·  {source_badge}  ·  {score_badge}"
+    )
+    if job.get("salary"):
+        st.caption(f"💰 {job['salary']}")
+    if job.get("keyword_gaps"):
+        st.caption(f"**Gaps to address in letter:** {job['keyword_gaps']}")
+
+    st.divider()
+    st.markdown(job.get("description") or "_No description scraped for this listing._")
+
+# ════════════════════════════════════════════════
+#  LEFT — copy tools
+# ════════════════════════════════════════════════
+with col_tools:
+
+    # ── Cover letter ──────────────────────────────
+    st.subheader("📝 Cover Letter")
+
+    _cl_key = f"cl_{selected_id}"
+    if _cl_key not in st.session_state:
+        st.session_state[_cl_key] = job.get("cover_letter") or ""
+
+    _cl_task = get_task_for_job(DEFAULT_DB, "cover_letter", selected_id)
+    _cl_running = _cl_task and _cl_task["status"] in ("queued", "running")
+
+    if st.button("✨ Generate / Regenerate", use_container_width=True, disabled=bool(_cl_running)):
+        submit_task(DEFAULT_DB, "cover_letter", selected_id)
+        st.rerun()
+
+    if _cl_running:
+        @st.fragment(run_every=3)
+        def _cl_status_fragment():
+            t = get_task_for_job(DEFAULT_DB, "cover_letter", selected_id)
+            if t and t["status"] in ("queued", "running"):
+                lbl = "Queued…" if t["status"] == "queued" else "Generating via LLM…"
+                st.info(f"⏳ {lbl}")
+            else:
+                st.rerun()  # full page rerun — reloads cover letter from DB
+        _cl_status_fragment()
+    elif _cl_task and _cl_task["status"] == "failed":
+        st.error(f"Generation failed: {_cl_task.get('error', 'unknown error')}")
+
+    # Refresh session state only when a NEW task has just completed — not on every rerun.
+    # Without this guard, every Save Draft click would overwrite the edited text with the
+    # old DB value before cl_text could be captured.
+    _cl_loaded_key = f"cl_loaded_{selected_id}"
+    if not _cl_running and _cl_task and _cl_task["status"] == "completed":
+        if st.session_state.get(_cl_loaded_key) != _cl_task["id"]:
+            st.session_state[_cl_key] = job.get("cover_letter") or ""
+            st.session_state[_cl_loaded_key] = _cl_task["id"]
+
+    cl_text = st.text_area(
+        "cover_letter_body",
+        key=_cl_key,
+        height=280,
+        label_visibility="collapsed",
+    )
+
+    # Copy + Save row
+    c1, c2 = st.columns(2)
+    with c1:
+        if cl_text:
+            _copy_btn(cl_text, label="📋 Copy Letter")
+    with c2:
+        if st.button("💾 Save draft", use_container_width=True):
+            update_cover_letter(DEFAULT_DB, selected_id, cl_text)
+            st.success("Saved!")
+
+    # PDF generation
+    if cl_text:
+        if st.button("📄 Export PDF → JobSearch folder", use_container_width=True, type="primary"):
+            with st.spinner("Generating PDF…"):
+                try:
+                    pdf_path = _make_cover_letter_pdf(job, cl_text, DOCS_DIR)
+                    update_cover_letter(DEFAULT_DB, selected_id, cl_text)
+                    st.success(f"Saved: `{pdf_path.name}`")
+                except Exception as e:
+                    st.error(f"PDF error: {e}")
+
+    st.divider()
+
+    # Open listing + Mark Applied
+    c3, c4 = st.columns(2)
+    with c3:
+        if job.get("url"):
+            st.link_button("Open listing ↗", job["url"], use_container_width=True)
+    with c4:
+        if st.button("✅ Mark as Applied", use_container_width=True, type="primary"):
+            if cl_text:
+                update_cover_letter(DEFAULT_DB, selected_id, cl_text)
+            mark_applied(DEFAULT_DB, [selected_id])
+            st.success("Marked as applied!")
+            st.rerun()
+
+    if st.button("🚫 Reject listing", use_container_width=True):
+        update_job_status(DEFAULT_DB, [selected_id], "rejected")
+        # Advance selectbox to next job so list doesn't snap to first item
+        current_idx = ids.index(selected_id) if selected_id in ids else 0
+        if current_idx + 1 < len(ids):
+            st.session_state["apply_job_id"] = ids[current_idx + 1]
+        st.rerun()
+
+    st.divider()
+
+    # ── Resume highlights ─────────────────────────
+    with st.expander("📄 Resume Highlights"):
+        if RESUME_YAML.exists():
+            resume = yaml.safe_load(RESUME_YAML.read_text()) or {}
+            for exp in resume.get("experience_details", []):
+                position = exp.get("position", "")
+                company  = exp.get("company", "")
+                period   = exp.get("employment_period", "")
+
+                # Parse start / end dates (handles "MM/YYYY - Present" style)
+                if " - " in period:
+                    date_start, date_end = [p.strip() for p in period.split(" - ", 1)]
+                else:
+                    date_start, date_end = period, ""
+
+                # Flatten bullets
+                bullets = [
+                    v
+                    for resp_dict in exp.get("key_responsibilities", [])
+                    for v in resp_dict.values()
+                ]
+                all_duties = "\n".join(f"• {b}" for b in bullets)
+
+                # ── Header ────────────────────────────────────────────────────
+                st.markdown(
+                    f"**{position}** &nbsp;·&nbsp; "
+                    f"{company} &nbsp;·&nbsp; "
+                    f"*{period}*"
+                )
+
+                # ── Copy row: title | start | end | all duties ────────────────
+                cp_t, cp_s, cp_e, cp_d = st.columns(4)
+                with cp_t:
+                    st.caption("Title")
+                    _copy_btn(position, label="📋 Copy", height=34)
+                with cp_s:
+                    st.caption("Start")
+                    _copy_btn(date_start, label="📋 Copy", height=34)
+                with cp_e:
+                    st.caption("End")
+                    _copy_btn(date_end or period, label="📋 Copy", height=34)
+                with cp_d:
+                    st.caption("All Duties")
+                    if bullets:
+                        _copy_btn(all_duties, label="📋 Copy", height=34)
+
+                # ── Individual bullets ────────────────────────────────────────
+                for bullet in bullets:
+                    b_col, cp_col = st.columns([6, 1])
+                    b_col.caption(f"• {bullet}")
+                    with cp_col:
+                        _copy_btn(bullet, label="📋", done="✅", height=32)
+
+                st.markdown("---")
+        else:
+            st.warning("Resume YAML not found — check that AIHawk is cloned.")
+
+    # ── Application Q&A ───────────────────────────────────────────────────────
+    with st.expander("💬 Answer Application Questions"):
+        st.caption("Paste a question from the application and get an answer in your voice.")
+
+        _qa_key = f"qa_list_{selected_id}"
+        if _qa_key not in st.session_state:
+            st.session_state[_qa_key] = []
+
+        q_input = st.text_area(
+            "Paste question",
+            placeholder="In 200 words or less, explain why you're a strong fit for this role.",
+            height=80,
+            key=f"qa_input_{selected_id}",
+            label_visibility="collapsed",
+        )
+        if st.button("✨ Generate Answer", key=f"qa_gen_{selected_id}",
+                     use_container_width=True,
+                     disabled=not (q_input or "").strip()):
+            with st.spinner("Generating answer…"):
+                _answer = _answer_question(job, q_input.strip())
+            st.session_state[_qa_key].append({"q": q_input.strip(), "a": _answer})
+            st.rerun()
+
+        for _i, _pair in enumerate(reversed(st.session_state[_qa_key])):
+            _real_idx = len(st.session_state[_qa_key]) - 1 - _i
+            st.markdown(f"**Q:** {_pair['q']}")
+            _a_key = f"qa_ans_{selected_id}_{_real_idx}"
+            if _a_key not in st.session_state:
+                st.session_state[_a_key] = _pair["a"]
+            _answer_text = st.text_area(
+                "answer",
+                key=_a_key,
+                height=120,
+                label_visibility="collapsed",
+            )
+            _copy_btn(_answer_text, label="📋 Copy Answer")
+            if _i < len(st.session_state[_qa_key]) - 1:
+                st.markdown("---")
diff --git a/app/pages/5_Interviews.py b/app/pages/5_Interviews.py
new file mode 100644
index 0000000..7d624e3
--- /dev/null
+++ b/app/pages/5_Interviews.py
@@ -0,0 +1,539 @@
+# app/pages/5_Interviews.py
+"""
+Interviews — Kanban board for tracking post-application engagement.
+
+Pipeline: applied → phone_screen → interviewing → offer → hired
+          (or rejected at any stage, with stage captured for analytics)
+
+Features:
+  - Kanban columns for each interview stage
+  - Company research brief auto-generated when advancing to Phone Screen
+  - Contact / email log per job
+  - Email reply drafter via LLM
+  - Interview date tracking with calendar push hint
+  - Rejection analytics
+"""
+import sys
+from collections import Counter
+from datetime import date, datetime
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+import streamlit as st
+
+from scripts.db import (
+    DEFAULT_DB, init_db,
+    get_interview_jobs, advance_to_stage, reject_at_stage,
+    set_interview_date, add_contact, get_contacts,
+    get_research, get_task_for_job, get_job_by_id,
+    get_unread_stage_signals, dismiss_stage_signal,
+)
+from scripts.task_runner import submit_task
+
+st.title("🎯 Interviews")
+
+init_db(DEFAULT_DB)
+
+# ── Sidebar: Email sync ────────────────────────────────────────────────────────
+with st.sidebar:
+    st.markdown("### 📧 Email Sync")
+    _email_task = get_task_for_job(DEFAULT_DB, "email_sync", 0)
+    _email_running = _email_task and _email_task["status"] in ("queued", "running")
+
+    if st.button("🔄 Sync Emails", use_container_width=True, type="primary",
+                 disabled=bool(_email_running)):
+        submit_task(DEFAULT_DB, "email_sync", 0)
+        st.rerun()
+
+    if _email_running:
+        @st.fragment(run_every=4)
+        def _email_sidebar_status():
+            t = get_task_for_job(DEFAULT_DB, "email_sync", 0)
+            if t and t["status"] in ("queued", "running"):
+                st.info("⏳ Syncing…")
+            else:
+                st.rerun()
+        _email_sidebar_status()
+    elif _email_task and _email_task["status"] == "completed":
+        st.success(_email_task.get("error", "Done"))
+    elif _email_task and _email_task["status"] == "failed":
+        msg = _email_task.get("error", "")
+        if "not configured" in msg.lower():
+            st.error("Email not configured. Go to **Settings → Email**.")
+        else:
+            st.error(f"Sync failed: {msg}")
+
+# ── Constants ─────────────────────────────────────────────────────────────────
+STAGE_LABELS = {
+    "phone_screen": "📞 Phone Screen",
+    "interviewing":  "🎯 Interviewing",
+    "offer":         "📜 Offer / Hired",
+}
+STAGE_NEXT = {
+    "survey":       "phone_screen",
+    "applied":      "phone_screen",
+    "phone_screen": "interviewing",
+    "interviewing": "offer",
+    "offer":        "hired",
+}
+STAGE_NEXT_LABEL = {
+    "survey":       "📞 Phone Screen",
+    "applied":      "📞 Phone Screen",
+    "phone_screen": "🎯 Interviewing",
+    "interviewing": "📜 Offer",
+    "offer":        "🎉 Hired",
+}
+
+# ── Data ──────────────────────────────────────────────────────────────────────
+jobs_by_stage = get_interview_jobs(DEFAULT_DB)
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+def _days_ago(date_str: str | None) -> str:
+    if not date_str:
+        return "—"
+    try:
+        d = date.fromisoformat(date_str[:10])
+        delta = (date.today() - d).days
+        if delta == 0:
+            return "today"
+        if delta == 1:
+            return "yesterday"
+        return f"{delta}d ago"
+    except Exception:
+        return date_str[:10]
+
+@st.dialog("🔬 Company Research", width="large")
+def _research_modal(job: dict) -> None:
+    job_id = job["id"]
+    st.caption(f"**{job.get('company')}** — {job.get('title')}")
+    research = get_research(DEFAULT_DB, job_id=job_id)
+    task = get_task_for_job(DEFAULT_DB, "company_research", job_id)
+    running = task and task["status"] in ("queued", "running")
+
+    if running:
+        task_stage = (task.get("stage") or "")
+        lbl = "Queued…" if task["status"] == "queued" else (task_stage or "Generating…")
+        st.info(f"⏳ {lbl}")
+    elif research:
+        scrape_used = research.get("scrape_used")
+        if not scrape_used:
+            import socket as _sock
+            _searxng_up = False
+            try:
+                with _sock.create_connection(("127.0.0.1", 8888), timeout=1):
+                    _searxng_up = True
+            except OSError:
+                pass
+            if _searxng_up:
+                st.warning(
+                    "⚠️ This brief was generated without live web data and may contain "
+                    "inaccuracies. SearXNG is now available — re-run to get verified facts."
+                )
+                if st.button("🔄 Re-run with live data", key=f"modal_rescrape_{job_id}", type="primary"):
+                    submit_task(DEFAULT_DB, "company_research", job_id)
+                    st.rerun()
+                st.divider()
+            else:
+                st.warning(
+                    "⚠️ Generated without live web data (SearXNG was offline). "
+                    "Key facts like CEO, investors, and founding date may be hallucinated — "
+                    "verify before the call. Start SearXNG in Settings → Services to re-run."
+                )
+                st.divider()
+        st.caption(
+            f"Generated {research.get('generated_at', '')} "
+            f"{'· web data used ✓' if scrape_used else '· LLM knowledge only'}"
+        )
+        st.markdown(research["raw_output"])
+        if st.button("🔄 Refresh", key=f"modal_regen_{job_id}", disabled=bool(running)):
+            submit_task(DEFAULT_DB, "company_research", job_id)
+            st.rerun()
+    else:
+        st.info("No research brief yet.")
+        if task and task["status"] == "failed":
+            st.error(f"Last attempt failed: {task.get('error', '')}")
+        if st.button("🔬 Generate now", key=f"modal_gen_{job_id}"):
+            submit_task(DEFAULT_DB, "company_research", job_id)
+            st.rerun()
+
+
+@st.dialog("📧 Email History", width="large")
+def _email_modal(job: dict) -> None:
+    job_id = job["id"]
+    st.caption(f"**{job.get('company')}** — {job.get('title')}")
+    contacts = get_contacts(DEFAULT_DB, job_id=job_id)
+
+    if not contacts:
+        st.info("No emails logged yet. Use the form below to add one.")
+    else:
+        for c in contacts:
+            icon = "📥" if c["direction"] == "inbound" else "📤"
+            st.markdown(
+                f"{icon} **{c.get('subject') or '(no subject)'}** "
+                f"· _{c.get('received_at', '')[:10]}_"
+            )
+            if c.get("from_addr"):
+                st.caption(f"From: {c['from_addr']}")
+            if c.get("body"):
+                st.text(c["body"][:500] + ("…" if len(c["body"]) > 500 else ""))
+            st.divider()
+
+        inbound = [c for c in contacts if c["direction"] == "inbound"]
+        if inbound:
+            last = inbound[-1]
+            if st.button("✍️ Draft reply", key=f"modal_draft_{job_id}"):
+                with st.spinner("Drafting…"):
+                    try:
+                        from scripts.llm_router import complete
+                        draft = complete(
+                            prompt=(
+                                f"Draft a professional, warm reply to this email.\n\n"
+                                f"From: {last.get('from_addr', '')}\n"
+                                f"Subject: {last.get('subject', '')}\n\n"
+                                f"{last.get('body', '')}\n\n"
+                                f"Context: Alex Rivera is a Customer Success / "
+                                f"Technical Account Manager applying for "
+                                f"{job.get('title')} at {job.get('company')}."
+                            ),
+                            system=(
+                                "You are Alex Rivera's professional email assistant. "
+                                "Write concise, warm, and professional replies in her voice. "
+                                "Keep it to 3–5 sentences unless more is needed."
+                            ),
+                        )
+                        st.session_state[f"modal_draft_text_{job_id}"] = draft
+                        st.rerun()
+                    except Exception as e:
+                        st.error(f"Draft failed: {e}")
+
+            if f"modal_draft_text_{job_id}" in st.session_state:
+                st.text_area(
+                    "Draft (edit before sending)",
+                    value=st.session_state[f"modal_draft_text_{job_id}"],
+                    height=160,
+                    key=f"modal_draft_area_{job_id}",
+                )
+
+    st.divider()
+    st.markdown("**Log a contact**")
+    with st.form(key=f"contact_form_modal_{job_id}", clear_on_submit=True):
+        col_a, col_b = st.columns(2)
+        direction = col_a.radio(
+            "Direction", ["inbound", "outbound"],
+            horizontal=True, key=f"dir_modal_{job_id}",
+        )
+        recv_at = col_b.text_input(
+            "Date (YYYY-MM-DD)", value=str(date.today()), key=f"recv_modal_{job_id}"
+        )
+        subject = st.text_input("Subject", key=f"subj_modal_{job_id}")
+        from_addr = st.text_input("From", key=f"from_modal_{job_id}")
+        body_text = st.text_area("Body / notes", height=80, key=f"body_modal_{job_id}")
+        if st.form_submit_button("📧 Save contact"):
+            add_contact(
+                DEFAULT_DB, job_id=job_id,
+                direction=direction, subject=subject,
+                from_addr=from_addr, body=body_text, received_at=recv_at,
+            )
+            st.rerun()
+
+def _render_card(job: dict, stage: str, compact: bool = False) -> None:
+    """Render a single job card appropriate for the given stage."""
+    job_id = job["id"]
+    contacts = get_contacts(DEFAULT_DB, job_id=job_id)
+    last_contact = contacts[-1] if contacts else None
+
+    with st.container(border=True):
+        st.markdown(f"**{job.get('company', '?')}**")
+        st.caption(job.get("title", ""))
+
+        col_a, col_b = st.columns(2)
+        col_a.caption(f"Applied: {_days_ago(job.get('applied_at'))}")
+        if last_contact:
+            col_b.caption(f"Last contact: {_days_ago(last_contact.get('received_at'))}")
+
+        # Interview date picker (phone_screen / interviewing stages)
+        if stage in ("phone_screen", "interviewing"):
+            current_idate = job.get("interview_date") or ""
+            with st.form(key=f"idate_form_{job_id}"):
+                new_date = st.date_input(
+                    "Interview date",
+                    value=date.fromisoformat(current_idate) if current_idate else None,
+                    key=f"idate_{job_id}",
+                    format="YYYY-MM-DD",
+                )
+                if st.form_submit_button("📅 Save date"):
+                    set_interview_date(DEFAULT_DB, job_id=job_id, date_str=str(new_date))
+                    st.success("Saved!")
+                    st.rerun()
+
+        if not compact:
+            if stage in ("applied", "phone_screen", "interviewing"):
+                signals = get_unread_stage_signals(DEFAULT_DB, job_id=job_id)
+                if signals:
+                    sig = signals[-1]
+                    _SIGNAL_TO_STAGE = {
+                        "interview_scheduled": ("phone_screen", "📞 Phone Screen"),
+                        "positive_response":   ("phone_screen", "📞 Phone Screen"),
+                        "offer_received":      ("offer",        "📜 Offer"),
+                        "survey_received":     ("survey",       "📋 Survey"),
+                    }
+                    target_stage, target_label = _SIGNAL_TO_STAGE.get(
+                        sig["stage_signal"], (None, None)
+                    )
+                    with st.container(border=True):
+                        st.caption(
+                            f"💡 Email suggests: **{sig['stage_signal'].replace('_', ' ')}**  \n"
+                            f"_{sig.get('subject', '')}_ · {(sig.get('received_at') or '')[:10]}"
+                        )
+                        b1, b2 = st.columns(2)
+                        if sig["stage_signal"] == "rejected":
+                            if b1.button("✗ Reject", key=f"sig_rej_{sig['id']}",
+                                         use_container_width=True):
+                                reject_at_stage(DEFAULT_DB, job_id=job_id, rejection_stage=stage)
+                                dismiss_stage_signal(DEFAULT_DB, sig["id"])
+                                st.rerun(scope="app")
+                        elif target_stage and b1.button(
+                            f"→ {target_label}", key=f"sig_adv_{sig['id']}",
+                            use_container_width=True, type="primary",
+                        ):
+                            if target_stage == "phone_screen" and stage == "applied":
+                                advance_to_stage(DEFAULT_DB, job_id=job_id, stage="phone_screen")
+                                submit_task(DEFAULT_DB, "company_research", job_id)
+                            elif target_stage:
+                                advance_to_stage(DEFAULT_DB, job_id=job_id, stage=target_stage)
+                            dismiss_stage_signal(DEFAULT_DB, sig["id"])
+                            st.rerun(scope="app")
+                        if b2.button("Dismiss", key=f"sig_dis_{sig['id']}",
+                                     use_container_width=True):
+                            dismiss_stage_signal(DEFAULT_DB, sig["id"])
+                            st.rerun()
+
+            # Advance / Reject buttons
+            next_stage = STAGE_NEXT.get(stage)
+            c1, c2 = st.columns(2)
+            if next_stage:
+                next_label = STAGE_NEXT_LABEL.get(stage, next_stage)
+                if c1.button(
+                    f"→ {next_label}", key=f"adv_{job_id}",
+                    use_container_width=True, type="primary",
+                ):
+                    advance_to_stage(DEFAULT_DB, job_id=job_id, stage=next_stage)
+                    if next_stage == "phone_screen":
+                        submit_task(DEFAULT_DB, "company_research", job_id)
+                    st.rerun(scope="app")  # full rerun — card must appear in new column
+
+            if c2.button(
+                "✗ Reject", key=f"rej_{job_id}",
+                use_container_width=True,
+            ):
+                reject_at_stage(DEFAULT_DB, job_id=job_id, rejection_stage=stage)
+                st.rerun()  # fragment-scope rerun — card disappears without scroll-to-top
+
+            if job.get("url"):
+                st.link_button("Open listing ↗", job["url"], use_container_width=True)
+
+            if stage in ("phone_screen", "interviewing", "offer"):
+                if st.button(
+                    "📋 Open Prep Sheet", key=f"prep_{job_id}",
+                    use_container_width=True,
+                    help="Open the Interview Prep page for this job",
+                ):
+                    st.session_state["prep_job_id"] = job_id
+                    st.switch_page("pages/6_Interview_Prep.py")
+
+            # Detail modals — full-width overlays replace narrow inline expanders
+            if stage in ("phone_screen", "interviewing", "offer"):
+                mc1, mc2 = st.columns(2)
+                if mc1.button("🔬 Research", key=f"res_btn_{job_id}", use_container_width=True):
+                    _research_modal(job)
+                if mc2.button("📧 Emails", key=f"email_btn_{job_id}", use_container_width=True):
+                    _email_modal(job)
+            else:
+                if st.button("📧 Emails", key=f"email_btn_{job_id}", use_container_width=True):
+                    _email_modal(job)
+
+# ── Fragment wrappers — keep scroll position on card actions ─────────────────
+@st.fragment
+def _card_fragment(job_id: int, stage: str) -> None:
+    """Re-fetches the job on each fragment rerun; renders nothing if moved/rejected."""
+    job = get_job_by_id(DEFAULT_DB, job_id)
+    if job is None or job.get("status") != stage:
+        return
+    _render_card(job, stage)
+
+
+@st.fragment
+def _pre_kanban_row_fragment(job_id: int) -> None:
+    """Pre-kanban compact row for applied and survey-stage jobs."""
+    job = get_job_by_id(DEFAULT_DB, job_id)
+    if job is None or job.get("status") not in ("applied", "survey"):
+        return
+    stage = job["status"]
+    contacts = get_contacts(DEFAULT_DB, job_id=job_id)
+    last_contact = contacts[-1] if contacts else None
+
+    with st.container(border=True):
+        left, mid, right = st.columns([3, 2, 2])
+        badge = " 📋 **Survey**" if stage == "survey" else ""
+        left.markdown(f"**{job.get('company')}** — {job.get('title', '')}{badge}")
+        left.caption(f"Applied: {_days_ago(job.get('applied_at'))}")
+
+        with mid:
+            if last_contact:
+                st.caption(f"Last contact: {_days_ago(last_contact.get('received_at'))}")
+            if st.button("📧 Emails", key=f"email_pre_{job_id}", use_container_width=True):
+                _email_modal(job)
+
+            # Stage signal hint (email-detected next steps)
+            signals = get_unread_stage_signals(DEFAULT_DB, job_id=job_id)
+            if signals:
+                sig = signals[-1]
+                _SIGNAL_TO_STAGE = {
+                    "interview_scheduled": ("phone_screen", "📞 Phone Screen"),
+                    "positive_response":   ("phone_screen", "📞 Phone Screen"),
+                    "offer_received":      ("offer",        "📜 Offer"),
+                    "survey_received":     ("survey",       "📋 Survey"),
+                }
+                target_stage, target_label = _SIGNAL_TO_STAGE.get(
+                    sig["stage_signal"], (None, None)
+                )
+                with st.container(border=True):
+                    st.caption(
+                        f"💡 **{sig['stage_signal'].replace('_', ' ')}**  \n"
+                        f"_{sig.get('subject', '')}_ · {(sig.get('received_at') or '')[:10]}"
+                    )
+                    s1, s2 = st.columns(2)
+                    if target_stage and s1.button(
+                        f"→ {target_label}", key=f"sig_adv_pre_{sig['id']}",
+                        use_container_width=True, type="primary",
+                    ):
+                        if target_stage == "phone_screen":
+                            advance_to_stage(DEFAULT_DB, job_id=job_id, stage="phone_screen")
+                            submit_task(DEFAULT_DB, "company_research", job_id)
+                        else:
+                            advance_to_stage(DEFAULT_DB, job_id=job_id, stage=target_stage)
+                        dismiss_stage_signal(DEFAULT_DB, sig["id"])
+                        st.rerun(scope="app")
+                    if s2.button("Dismiss", key=f"sig_dis_pre_{sig['id']}",
+                                 use_container_width=True):
+                        dismiss_stage_signal(DEFAULT_DB, sig["id"])
+                        st.rerun()
+
+        with right:
+            if st.button(
+                "→ 📞 Phone Screen", key=f"adv_pre_{job_id}",
+                use_container_width=True, type="primary",
+            ):
+                advance_to_stage(DEFAULT_DB, job_id=job_id, stage="phone_screen")
+                submit_task(DEFAULT_DB, "company_research", job_id)
+                st.rerun(scope="app")
+            col_a, col_b = st.columns(2)
+            if stage == "applied" and col_a.button(
+                "📋 Survey", key=f"to_survey_{job_id}", use_container_width=True,
+            ):
+                advance_to_stage(DEFAULT_DB, job_id=job_id, stage="survey")
+                st.rerun(scope="app")
+            if col_b.button("✗ Reject", key=f"rej_pre_{job_id}", use_container_width=True):
+                reject_at_stage(DEFAULT_DB, job_id=job_id, rejection_stage=stage)
+                st.rerun()
+
+
+@st.fragment
+def _hired_card_fragment(job_id: int) -> None:
+    """Compact hired job card — shown in the Offer/Hired column."""
+    job = get_job_by_id(DEFAULT_DB, job_id)
+    if job is None or job.get("status") != "hired":
+        return
+    with st.container(border=True):
+        st.markdown(f"✅ **{job.get('company', '?')}**")
+        st.caption(job.get("title", ""))
+        st.caption(f"Hired {_days_ago(job.get('hired_at'))}")
+
+
+# ── Stats bar ─────────────────────────────────────────────────────────────────
+c1, c2, c3, c4, c5, c6 = st.columns(6)
+c1.metric("Applied",      len(jobs_by_stage.get("applied", [])))
+c2.metric("Survey",       len(jobs_by_stage.get("survey", [])))
+c3.metric("Phone Screen", len(jobs_by_stage.get("phone_screen", [])))
+c4.metric("Interviewing", len(jobs_by_stage.get("interviewing", [])))
+c5.metric("Offer/Hired",  len(jobs_by_stage.get("offer", [])) + len(jobs_by_stage.get("hired", [])))
+c6.metric("Rejected",     len(jobs_by_stage.get("rejected", [])))
+
+st.divider()
+
+# ── Pre-kanban: Applied + Survey ───────────────────────────────────────────────
+applied_jobs = jobs_by_stage.get("applied", [])
+survey_jobs  = jobs_by_stage.get("survey", [])
+pre_kanban   = survey_jobs + applied_jobs  # survey shown first
+
+if pre_kanban:
+    st.subheader(f"📋 Pre-pipeline ({len(pre_kanban)})")
+    st.caption(
+        "Move a job to **Phone Screen** once you receive an outreach. "
+        "A company research brief will be auto-generated to help you prepare."
+    )
+    for job in pre_kanban:
+        _pre_kanban_row_fragment(job["id"])
+    st.divider()
+
+# ── Kanban columns ─────────────────────────────────────────────────────────────
+kanban_stages = ["phone_screen", "interviewing", "offer"]
+cols = st.columns(len(kanban_stages))
+
+for col, stage in zip(cols, kanban_stages):
+    with col:
+        stage_jobs = jobs_by_stage.get(stage, [])
+        hired_jobs = jobs_by_stage.get("hired", []) if stage == "offer" else []
+        all_col_jobs = stage_jobs + hired_jobs
+        st.markdown(f"### {STAGE_LABELS[stage]}")
+        st.caption(f"{len(all_col_jobs)} job{'s' if len(all_col_jobs) != 1 else ''}")
+        st.divider()
+
+        if not all_col_jobs:
+            st.caption("_Empty_")
+        else:
+            for job in stage_jobs:
+                _card_fragment(job["id"], stage)
+            for job in hired_jobs:
+                _hired_card_fragment(job["id"])
+
+st.divider()
+
+# ── Rejected log + analytics ───────────────────────────────────────────────────
+rejected_jobs = jobs_by_stage.get("rejected", [])
+if rejected_jobs:
+    with st.expander(f"❌ Rejected ({len(rejected_jobs)})", expanded=False):
+        # Stage breakdown
+        stage_counts = Counter(
+            j.get("rejection_stage") or "unknown" for j in rejected_jobs
+        )
+        st.caption(
+            "Rejection by stage: "
+            + " · ".join(f"**{k}**: {v}" for k, v in stage_counts.most_common())
+        )
+
+        # Rejection rate timeline (simple)
+        if len(rejected_jobs) > 1:
+            by_month: dict[str, int] = {}
+            for j in rejected_jobs:
+                mo = (j.get("applied_at") or "")[:7]
+                if mo:
+                    by_month[mo] = by_month.get(mo, 0) + 1
+            if by_month:
+                import pandas as pd
+                chart_data = pd.DataFrame(
+                    list(by_month.items()), columns=["Month", "Rejections"]
+                ).sort_values("Month")
+                st.bar_chart(chart_data.set_index("Month"))
+
+        st.divider()
+        for job in rejected_jobs:
+            r_stage = job.get("rejection_stage") or "unknown"
+            company = job.get("company") or "?"
+            title = job.get("title") or ""
+            applied = _days_ago(job.get("applied_at"))
+            st.markdown(
+                f"**{company}** — {title}  "
+                f"· rejected at _**{r_stage}**_ · applied {applied}"
+            )
diff --git a/app/pages/6_Interview_Prep.py b/app/pages/6_Interview_Prep.py
new file mode 100644
index 0000000..533a111
--- /dev/null
+++ b/app/pages/6_Interview_Prep.py
@@ -0,0 +1,371 @@
+# app/pages/6_Interview_Prep.py
+"""
+Interview Prep — a clean, glanceable reference you can keep open during a call.
+
+Left panel  : talking points, company brief, CEO info, practice Q&A
+Right panel : job description, email / contact history, cover letter snippet
+"""
+import sys
+from datetime import date
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+import streamlit as st
+
+from scripts.db import (
+    DEFAULT_DB, init_db,
+    get_interview_jobs, get_contacts, get_research,
+    get_task_for_job,
+)
+from scripts.task_runner import submit_task
+
+init_db(DEFAULT_DB)
+
+# ── Job selection ─────────────────────────────────────────────────────────────
+jobs_by_stage = get_interview_jobs(DEFAULT_DB)
+active_stages = ["phone_screen", "interviewing", "offer"]
+active_jobs = [
+    j for stage in active_stages
+    for j in jobs_by_stage.get(stage, [])
+]
+
+if not active_jobs:
+    st.title("📋 Interview Prep")
+    st.info(
+        "No active interviews found. "
+        "Move a job to **Phone Screen** on the Interviews page first."
+    )
+    st.stop()
+
+# Allow pre-selecting via session state (e.g., from Interviews page)
+preselect_id = st.session_state.pop("prep_job_id", None)
+job_options = {
+    j["id"]: f"{j['title']} — {j['company']} ({j['status'].replace('_', ' ').title()})"
+    for j in active_jobs
+}
+ids = list(job_options.keys())
+default_idx = ids.index(preselect_id) if preselect_id in ids else 0
+
+selected_id = st.selectbox(
+    "Job",
+    options=ids,
+    format_func=lambda x: job_options[x],
+    index=default_idx,
+    label_visibility="collapsed",
+)
+job = next(j for j in active_jobs if j["id"] == selected_id)
+
+# ── Header bar ────────────────────────────────────────────────────────────────
+stage_label = job["status"].replace("_", " ").title()
+idate = job.get("interview_date")
+countdown = ""
+if idate:
+    try:
+        delta = (date.fromisoformat(idate) - date.today()).days
+        if delta == 0:
+            countdown = "  🔴 **TODAY**"
+        elif delta == 1:
+            countdown = "  🟡 **TOMORROW**"
+        elif delta > 0:
+            countdown = f"  🟢 in {delta} days"
+        else:
+            countdown = f"  (was {abs(delta)}d ago)"
+    except Exception:
+        countdown = ""
+
+st.title(f"📋 {job.get('company')} — {job.get('title')}")
+st.caption(
+    f"Stage: **{stage_label}**"
+    + (f"  ·  Interview: {idate}{countdown}" if idate else "")
+    + (f"  ·  Applied: {job.get('applied_at', '')[:10]}" if job.get("applied_at") else "")
+)
+
+if job.get("url"):
+    st.link_button("Open job listing ↗", job["url"])
+
+st.divider()
+
+# ── Two-column layout ─────────────────────────────────────────────────────────
+col_prep, col_context = st.columns([2, 3])
+
+# ════════════════════════════════════════════════
+#  LEFT — prep materials
+# ════════════════════════════════════════════════
+with col_prep:
+
+    research = get_research(DEFAULT_DB, job_id=selected_id)
+
+    # Refresh / generate research
+    _res_task = get_task_for_job(DEFAULT_DB, "company_research", selected_id)
+    _res_running = _res_task and _res_task["status"] in ("queued", "running")
+
+    if not research:
+        if not _res_running:
+            st.warning("No research brief yet for this job.")
+            if _res_task and _res_task["status"] == "failed":
+                st.error(f"Last attempt failed: {_res_task.get('error', '')}")
+            if st.button("🔬 Generate research brief", type="primary", use_container_width=True):
+                submit_task(DEFAULT_DB, "company_research", selected_id)
+                st.rerun()
+
+        if _res_running:
+            @st.fragment(run_every=3)
+            def _res_status_initial():
+                t = get_task_for_job(DEFAULT_DB, "company_research", selected_id)
+                if t and t["status"] in ("queued", "running"):
+                    stage = t.get("stage") or ""
+                    lbl = "Queued…" if t["status"] == "queued" else (stage or "Generating… this may take 30–60 seconds")
+                    st.info(f"⏳ {lbl}")
+                else:
+                    st.rerun()
+            _res_status_initial()
+
+        st.stop()
+    else:
+        generated_at = research.get("generated_at", "")
+        col_ts, col_btn = st.columns([3, 1])
+        col_ts.caption(f"Research generated: {generated_at}")
+        if col_btn.button("🔄 Refresh", use_container_width=True, disabled=bool(_res_running)):
+            submit_task(DEFAULT_DB, "company_research", selected_id)
+            st.rerun()
+
+        if _res_running:
+            @st.fragment(run_every=3)
+            def _res_status_refresh():
+                t = get_task_for_job(DEFAULT_DB, "company_research", selected_id)
+                if t and t["status"] in ("queued", "running"):
+                    stage = t.get("stage") or ""
+                    lbl = "Queued…" if t["status"] == "queued" else (stage or "Refreshing research…")
+                    st.info(f"⏳ {lbl}")
+                else:
+                    st.rerun()
+            _res_status_refresh()
+        elif _res_task and _res_task["status"] == "failed":
+            st.error(f"Refresh failed: {_res_task.get('error', '')}")
+
+    st.divider()
+
+    # ── Talking points (top — most useful during a call) ──────────────────────
+    st.subheader("🎯 Talking Points")
+    tp = (research.get("talking_points") or "").strip()
+    if tp:
+        st.markdown(tp)
+    else:
+        st.caption("_No talking points extracted — try regenerating._")
+
+    st.divider()
+
+    # ── Company brief ─────────────────────────────────────────────────────────
+    st.subheader("🏢 Company Overview")
+    st.markdown(research.get("company_brief", "_—_"))
+
+    st.divider()
+
+    # ── Leadership brief ──────────────────────────────────────────────────────
+    st.subheader("👤 Leadership & Culture")
+    st.markdown(research.get("ceo_brief", "_—_"))
+
+    st.divider()
+
+    # ── Tech Stack & Product ───────────────────────────────────────────────────
+    tech = (research.get("tech_brief") or "").strip()
+    if tech:
+        st.subheader("⚙️ Tech Stack & Product")
+        st.markdown(tech)
+        st.divider()
+
+    # ── Funding & Market Position ──────────────────────────────────────────────
+    funding = (research.get("funding_brief") or "").strip()
+    if funding:
+        st.subheader("💰 Funding & Market Position")
+        st.markdown(funding)
+        st.divider()
+
+    # ── Red Flags & Watch-outs ────────────────────────────────────────────────
+    red = (research.get("red_flags") or "").strip()
+    if red and "no significant red flags" not in red.lower():
+        st.subheader("⚠️ Red Flags & Watch-outs")
+        st.warning(red)
+        st.divider()
+
+    # ── Inclusion & Accessibility ─────────────────────────────────────────────
+    access = (research.get("accessibility_brief") or "").strip()
+    if access:
+        st.subheader("♿ Inclusion & Accessibility")
+        st.caption("For your personal evaluation — not disclosed in any application.")
+        st.markdown(access)
+        st.divider()
+
+    # ── Practice Q&A (collapsible — use before the call) ─────────────────────
+    with st.expander("🎤 Practice Q&A (pre-call prep)", expanded=False):
+        st.caption(
+            "The LLM will play the interviewer. Type your answers below. "
+            "Use this before the call to warm up."
+        )
+
+        qa_key = f"qa_{selected_id}"
+        if qa_key not in st.session_state:
+            st.session_state[qa_key] = []
+
+        if st.button("🔄 Start / Reset session", key=f"qa_reset_{selected_id}"):
+            st.session_state[qa_key] = []
+            st.rerun()
+
+        # Display history
+        for msg in st.session_state[qa_key]:
+            with st.chat_message(msg["role"]):
+                st.markdown(msg["content"])
+
+        # Initial question if session is empty
+        if not st.session_state[qa_key]:
+            with st.spinner("Setting up your mock interview…"):
+                try:
+                    from scripts.llm_router import complete
+                    opening = complete(
+                        prompt=(
+                            f"Start a mock phone screen for the {job.get('title')} "
+                            f"role at {job.get('company')}. Ask your first question. "
+                            f"Keep it realistic and concise."
+                        ),
+                        system=(
+                            f"You are a recruiter at {job.get('company')} conducting "
+                            f"a phone screen for the {job.get('title')} role. "
+                            f"Ask one question at a time. After Alex answers, give "
+                            f"brief feedback (1–2 sentences), then ask your next question. "
+                            f"Be professional but warm."
+                        ),
+                    )
+                    st.session_state[qa_key] = [{"role": "assistant", "content": opening}]
+                    st.rerun()
+                except Exception as e:
+                    st.error(f"LLM error: {e}")
+
+        # Answer input
+        answer = st.chat_input("Your answer…", key=f"qa_input_{selected_id}")
+        if answer and st.session_state[qa_key]:
+            history = st.session_state[qa_key]
+            history.append({"role": "user", "content": answer})
+
+            messages = [
+                {
+                    "role": "system",
+                    "content": (
+                        f"You are a recruiter at {job.get('company')} conducting "
+                        f"a phone screen for the {job.get('title')} role. "
+                        f"Ask one question at a time. After Alex answers, give "
+                        f"brief feedback (1–2 sentences), then ask your next question."
+                    ),
+                }
+            ] + history
+
+            with st.spinner("…"):
+                try:
+                    from scripts.llm_router import LLMRouter
+                    router = LLMRouter()
+                    # Build prompt from history for single-turn backends
+                    convo = "\n\n".join(
+                        f"{'Interviewer' if m['role'] == 'assistant' else 'Alex'}: {m['content']}"
+                        for m in history
+                    )
+                    response = router.complete(
+                        prompt=convo + "\n\nInterviewer:",
+                        system=messages[0]["content"],
+                    )
+                    history.append({"role": "assistant", "content": response})
+                    st.session_state[qa_key] = history
+                    st.rerun()
+                except Exception as e:
+                    st.error(f"Error: {e}")
+
+# ════════════════════════════════════════════════
+#  RIGHT — context / reference
+# ════════════════════════════════════════════════
+with col_context:
+
+    tab_jd, tab_emails, tab_letter = st.tabs(
+        ["📄 Job Description", "📧 Email History", "📝 Cover Letter"]
+    )
+
+    with tab_jd:
+        score = job.get("match_score")
+        if score is not None:
+            badge = (
+                f"🟢 {score:.0f}% match" if score >= 70 else
+                f"🟡 {score:.0f}% match" if score >= 40 else
+                f"🔴 {score:.0f}% match"
+            )
+            st.caption(badge)
+        if job.get("keyword_gaps"):
+            st.caption(f"**Gaps to address:** {job['keyword_gaps']}")
+        st.markdown(job.get("description") or "_No description saved for this listing._")
+
+    with tab_emails:
+        contacts = get_contacts(DEFAULT_DB, job_id=selected_id)
+        if not contacts:
+            st.info("No contacts logged yet. Use the Interviews page to log emails.")
+        else:
+            for c in contacts:
+                icon = "📥" if c["direction"] == "inbound" else "📤"
+                recv = (c.get("received_at") or "")[:10]
+                st.markdown(
+                    f"{icon} **{c.get('subject') or '(no subject)'}** · _{recv}_"
+                )
+                if c.get("from_addr"):
+                    st.caption(f"From: {c['from_addr']}")
+                if c.get("body"):
+                    st.text(c["body"][:500] + ("…" if len(c["body"]) > 500 else ""))
+                st.divider()
+
+            # Quick draft reply
+            inbound = [c for c in contacts if c["direction"] == "inbound"]
+            if inbound:
+                last = inbound[-1]
+                if st.button("✍️ Draft reply to last email"):
+                    with st.spinner("Drafting…"):
+                        try:
+                            from scripts.llm_router import complete
+                            draft = complete(
+                                prompt=(
+                                    f"Draft a professional, warm reply.\n\n"
+                                    f"From: {last.get('from_addr', '')}\n"
+                                    f"Subject: {last.get('subject', '')}\n\n"
+                                    f"{last.get('body', '')}\n\n"
+                                    f"Context: Alex is a CS/TAM professional applying "
+                                    f"for {job.get('title')} at {job.get('company')}."
+                                ),
+                                system=(
+                                    "You are Alex Rivera's professional email assistant. "
+                                    "Write concise, warm, and professional replies in her voice."
+                                ),
+                            )
+                            st.session_state[f"draft_{selected_id}"] = draft
+                        except Exception as e:
+                            st.error(f"Draft failed: {e}")
+
+                if f"draft_{selected_id}" in st.session_state:
+                    st.text_area(
+                        "Draft (edit before sending)",
+                        value=st.session_state[f"draft_{selected_id}"],
+                        height=180,
+                    )
+
+    with tab_letter:
+        cl = (job.get("cover_letter") or "").strip()
+        if cl:
+            st.markdown(cl)
+        else:
+            st.info("No cover letter saved for this job.")
+
+    st.divider()
+
+    # ── Notes (freeform, stored in session only — not persisted to DB) ────────
+    st.subheader("📝 Call Notes")
+    st.caption("Notes are per-session only — copy anything important before navigating away.")
+    st.text_area(
+        "notes",
+        placeholder="Type notes during or after the call…",
+        height=200,
+        key=f"notes_{selected_id}",
+        label_visibility="collapsed",
+    )
diff --git a/app/pages/7_Survey.py b/app/pages/7_Survey.py
new file mode 100644
index 0000000..d5f00ed
--- /dev/null
+++ b/app/pages/7_Survey.py
@@ -0,0 +1,274 @@
+# app/pages/7_Survey.py
+"""
+Survey Assistant — real-time help with culture-fit surveys.
+
+Supports text paste and screenshot (via clipboard or file upload).
+Quick mode: "pick B" + one-liner. Detailed mode: option-by-option breakdown.
+"""
+import base64
+import io
+import sys
+from datetime import datetime
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+import requests
+import streamlit as st
+
+from scripts.db import (
+    DEFAULT_DB, init_db,
+    get_interview_jobs, get_job_by_id,
+    insert_survey_response, get_survey_responses,
+)
+from scripts.llm_router import LLMRouter
+
+st.title("📋 Survey Assistant")
+
+init_db(DEFAULT_DB)
+
+
+# ── Vision service health check ────────────────────────────────────────────────
+def _vision_available() -> bool:
+    try:
+        r = requests.get("http://localhost:8002/health", timeout=2)
+        return r.status_code == 200
+    except Exception:
+        return False
+
+
+vision_up = _vision_available()
+
+# ── Job selector ───────────────────────────────────────────────────────────────
+jobs_by_stage = get_interview_jobs(DEFAULT_DB)
+survey_jobs = jobs_by_stage.get("survey", [])
+other_jobs = (
+    jobs_by_stage.get("applied", []) +
+    jobs_by_stage.get("phone_screen", []) +
+    jobs_by_stage.get("interviewing", []) +
+    jobs_by_stage.get("offer", [])
+)
+all_jobs = survey_jobs + other_jobs
+
+if not all_jobs:
+    st.info("No active jobs found. Add jobs in Job Review first.")
+    st.stop()
+
+job_labels = {j["id"]: f"{j.get('company', '?')} — {j.get('title', '')}" for j in all_jobs}
+selected_job_id = st.selectbox(
+    "Job",
+    options=[j["id"] for j in all_jobs],
+    format_func=lambda jid: job_labels[jid],
+    index=0,
+)
+selected_job = get_job_by_id(DEFAULT_DB, selected_job_id)
+
+# ── LLM prompt builders ────────────────────────────────────────────────────────
+_SURVEY_SYSTEM = (
+    "You are a job application advisor helping a candidate answer a culture-fit survey. "
+    "The candidate values collaborative teamwork, clear communication, growth, and impact. "
+    "Choose answers that present them in the best professional light."
+)
+
+
+def _build_text_prompt(text: str, mode: str) -> str:
+    if mode == "Quick":
+        return (
+            "Answer each survey question below. For each, give ONLY the letter of the best "
+            "option and a single-sentence reason. Format exactly as:\n"
+            "1. B — reason here\n2. A — reason here\n\n"
+            f"Survey:\n{text}"
+        )
+    return (
+        "Analyze each survey question below. For each question:\n"
+        "- Briefly evaluate each option (1 sentence each)\n"
+        "- State your recommendation with reasoning\n\n"
+        f"Survey:\n{text}"
+    )
+
+
+def _build_image_prompt(mode: str) -> str:
+    if mode == "Quick":
+        return (
+            "This is a screenshot of a culture-fit survey. Read all questions and answer each "
+            "with the letter of the best option for a collaborative, growth-oriented candidate. "
+            "Format: '1. B — brief reason' on separate lines."
+        )
+    return (
+        "This is a screenshot of a culture-fit survey. For each question, evaluate each option "
+        "and recommend the best choice for a collaborative, growth-oriented candidate. "
+        "Include a brief breakdown per option and a clear recommendation."
+    )
+
+
+# ── Layout ─────────────────────────────────────────────────────────────────────
+left_col, right_col = st.columns([1, 1], gap="large")
+
+with left_col:
+    survey_name = st.text_input(
+        "Survey name (optional)",
+        placeholder="e.g. Culture Fit Round 1",
+        key="survey_name",
+    )
+    mode = st.radio("Mode", ["Quick", "Detailed"], horizontal=True, key="survey_mode")
+    st.caption(
+        "**Quick** — best answer + one-liner per question  |  "
+        "**Detailed** — option-by-option breakdown"
+    )
+
+    # Input tabs
+    if vision_up:
+        tab_text, tab_screenshot = st.tabs(["📝 Paste Text", "🖼️ Screenshot"])
+    else:
+        st.info(
+            "📷 Screenshot input unavailable — vision service not running.  \n"
+            "Start it with: `bash scripts/manage-vision.sh start`"
+        )
+        tab_text = st.container()
+        tab_screenshot = None
+
+    image_b64: str | None = None
+    raw_text: str = ""
+
+    with tab_text:
+        raw_text = st.text_area(
+            "Paste survey questions here",
+            height=280,
+            placeholder=(
+                "Q1: Which describes your ideal work environment?\n"
+                "A. Solo focused work\nB. Collaborative team\n"
+                "C. Mix of both\nD. Depends on the task"
+            ),
+            key="survey_text",
+        )
+
+    if tab_screenshot is not None:
+        with tab_screenshot:
+            st.caption("Paste from clipboard or upload a screenshot file.")
+            paste_col, upload_col = st.columns(2)
+
+            with paste_col:
+                try:
+                    from streamlit_paste_button import paste_image_button
+                    paste_result = paste_image_button("📋 Paste from clipboard", key="paste_btn")
+                    if paste_result and paste_result.image_data:
+                        buf = io.BytesIO()
+                        paste_result.image_data.save(buf, format="PNG")
+                        image_b64 = base64.b64encode(buf.getvalue()).decode()
+                        st.image(
+                            paste_result.image_data,
+                            caption="Pasted image",
+                            use_container_width=True,
+                        )
+                except ImportError:
+                    st.warning("streamlit-paste-button not installed. Use file upload.")
+
+            with upload_col:
+                uploaded = st.file_uploader(
+                    "Upload screenshot",
+                    type=["png", "jpg", "jpeg"],
+                    key="survey_upload",
+                    label_visibility="collapsed",
+                )
+                if uploaded:
+                    image_b64 = base64.b64encode(uploaded.read()).decode()
+                    st.image(uploaded, caption="Uploaded image", use_container_width=True)
+
+    # Analyze button
+    has_input = bool(raw_text.strip()) or bool(image_b64)
+    if st.button("🔍 Analyze", type="primary", disabled=not has_input, use_container_width=True):
+        with st.spinner("Analyzing…"):
+            try:
+                router = LLMRouter()
+                if image_b64:
+                    prompt = _build_image_prompt(mode)
+                    output = router.complete(
+                        prompt,
+                        images=[image_b64],
+                        fallback_order=router.config.get("vision_fallback_order"),
+                    )
+                    source = "screenshot"
+                else:
+                    prompt = _build_text_prompt(raw_text, mode)
+                    output = router.complete(
+                        prompt,
+                        system=_SURVEY_SYSTEM,
+                        fallback_order=router.config.get("research_fallback_order"),
+                    )
+                    source = "text_paste"
+                st.session_state["survey_output"] = output
+                st.session_state["survey_source"] = source
+                st.session_state["survey_image_b64"] = image_b64
+                st.session_state["survey_raw_text"] = raw_text
+            except Exception as e:
+                st.error(f"Analysis failed: {e}")
+
+with right_col:
+    output = st.session_state.get("survey_output")
+    if output:
+        st.markdown("### Analysis")
+        st.markdown(output)
+
+        st.divider()
+        with st.form("save_survey_form"):
+            reported_score = st.text_input(
+                "Reported score (optional)",
+                placeholder="e.g. 82% or 4.2/5",
+                key="reported_score_input",
+            )
+            if st.form_submit_button("💾 Save to Job"):
+                source = st.session_state.get("survey_source", "text_paste")
+                image_b64_saved = st.session_state.get("survey_image_b64")
+                raw_text_saved = st.session_state.get("survey_raw_text", "")
+
+                image_path = ""
+                if image_b64_saved:
+                    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+                    save_dir = (
+                        Path(__file__).parent.parent.parent
+                        / "data"
+                        / "survey_screenshots"
+                        / str(selected_job_id)
+                    )
+                    save_dir.mkdir(parents=True, exist_ok=True)
+                    img_file = save_dir / f"{ts}.png"
+                    img_file.write_bytes(base64.b64decode(image_b64_saved))
+                    image_path = str(img_file)
+
+                insert_survey_response(
+                    DEFAULT_DB,
+                    job_id=selected_job_id,
+                    survey_name=survey_name,
+                    source=source,
+                    raw_input=raw_text_saved,
+                    image_path=image_path,
+                    mode=mode.lower(),
+                    llm_output=output,
+                    reported_score=reported_score,
+                )
+                st.success("Saved!")
+                del st.session_state["survey_output"]
+                st.rerun()
+    else:
+        st.markdown("### Analysis")
+        st.caption("Results will appear here after analysis.")
+
+# ── History ────────────────────────────────────────────────────────────────────
+st.divider()
+st.subheader("📂 Response History")
+history = get_survey_responses(DEFAULT_DB, job_id=selected_job_id)
+
+if not history:
+    st.caption("No saved responses for this job yet.")
+else:
+    for resp in history:
+        label = resp.get("survey_name") or "Survey response"
+        ts = (resp.get("created_at") or "")[:16]
+        score = resp.get("reported_score")
+        score_str = f" · Score: {score}" if score else ""
+        with st.expander(f"{label} · {ts}{score_str}"):
+            st.caption(f"Mode: {resp.get('mode', '?')} · Source: {resp.get('source', '?')}")
+            if resp.get("raw_input"):
+                with st.expander("Original input"):
+                    st.text(resp["raw_input"])
+            st.markdown(resp.get("llm_output", ""))
diff --git a/config/adzuna.yaml.example b/config/adzuna.yaml.example
new file mode 100644
index 0000000..e58a46f
--- /dev/null
+++ b/config/adzuna.yaml.example
@@ -0,0 +1,5 @@
+# Adzuna Jobs API credentials
+# Register at https://developer.adzuna.com/admin/applications
+# Both app_id and app_key are required.
+app_id: ""   # short alphanumeric ID from your developer dashboard
+app_key: ""  # 32-character hex key from your developer dashboard
diff --git a/config/blocklist.yaml b/config/blocklist.yaml
new file mode 100644
index 0000000..398064d
--- /dev/null
+++ b/config/blocklist.yaml
@@ -0,0 +1,15 @@
+# Discovery blocklist — entries matching any rule are silently dropped before DB insert.
+# Applies globally across all search profiles and custom boards.
+
+# Company name blocklist — partial case-insensitive match on the company field.
+# e.g. "Amazon" blocks any listing where company contains "amazon".
+companies: []
+
+# Industry/content blocklist — blocked if company name OR job description contains any keyword.
+# Use this for industries you will never work in regardless of company.
+# e.g. "gambling", "crypto", "tobacco", "defense"
+industries: []
+
+# Location blocklist — blocked if the location field contains any of these strings.
+# e.g. "Dallas", "Austin, TX"
+locations: []
diff --git a/config/craigslist.yaml.example b/config/craigslist.yaml.example
new file mode 100644
index 0000000..578dcb8
--- /dev/null
+++ b/config/craigslist.yaml.example
@@ -0,0 +1,24 @@
+# Craigslist metro subdomains to search.
+# Copy to config/craigslist.yaml and adjust for your markets.
+# Full subdomain list: https://www.craigslist.org/about/sites
+metros:
+  - sfbay
+  - newyork
+  - chicago
+  - losangeles
+  - seattle
+  - austin
+
+# Maps search profile location strings → Craigslist metro subdomain.
+# Locations not listed here are silently skipped.
+location_map:
+  "San Francisco Bay Area, CA": sfbay
+  "New York, NY": newyork
+  "Chicago, IL": chicago
+  "Los Angeles, CA": losangeles
+  "Seattle, WA": seattle
+  "Austin, TX": austin
+
+# Craigslist job category. Defaults to 'jjj' (general jobs) if omitted.
+# Other options: csr (customer service), mar (marketing), sof (software/qa/dba)
+# category: jjj
diff --git a/config/email.yaml.example b/config/email.yaml.example
new file mode 100644
index 0000000..b234cc1
--- /dev/null
+++ b/config/email.yaml.example
@@ -0,0 +1,38 @@
+# config/email.yaml — IMAP email sync configuration
+# Copy this to config/email.yaml and fill in your credentials.
+# config/email.yaml is gitignored — never commit real credentials.
+#
+# Gmail setup:
+#   1. Enable IMAP: Gmail Settings → See all settings → Forwarding and POP/IMAP
+#   2. Create App Password: myaccount.google.com/apppasswords
+#      (requires 2-Step Verification to be enabled)
+#   3. Use your Gmail address as username, App Password as password.
+#
+# Outlook / Office 365:
+#   host: outlook.office365.com
+#   port: 993
+#   use_ssl: true
+#   (Use your regular email + password, or an App Password if MFA is enabled)
+
+host: imap.gmail.com
+port: 993
+use_ssl: true
+
+# Your full email address
+username: your.email@gmail.com
+
+# Gmail: use an App Password (16-char code, no spaces)
+# Other providers: use your regular password (or App Password if MFA enabled)
+password: xxxx-xxxx-xxxx-xxxx
+
+# Sent folder name — leave blank to auto-detect
+# Gmail: "[Gmail]/Sent Mail"   Outlook: "Sent Items"   Generic: "Sent"
+sent_folder: ""
+
+# How many days back to search (90 = ~3 months)
+lookback_days: 90
+
+# Optional: Gmail label to scan for action-needed emails (e.g. "TO DO JOBS").
+# Emails in this label are matched to pipeline jobs by company name, then
+# filtered by action keywords in the subject. Leave blank to disable.
+todo_label: ""
diff --git a/config/llm.yaml b/config/llm.yaml
new file mode 100644
index 0000000..e5a58e5
--- /dev/null
+++ b/config/llm.yaml
@@ -0,0 +1,66 @@
+backends:
+  anthropic:
+    api_key_env: ANTHROPIC_API_KEY
+    enabled: false
+    model: claude-sonnet-4-6
+    type: anthropic
+    supports_images: true
+  claude_code:
+    api_key: any
+    base_url: http://localhost:3009/v1
+    enabled: false
+    model: claude-code-terminal
+    type: openai_compat
+    supports_images: true
+  github_copilot:
+    api_key: any
+    base_url: http://localhost:3010/v1
+    enabled: false
+    model: gpt-4o
+    type: openai_compat
+    supports_images: false
+  ollama:
+    api_key: ollama
+    base_url: http://localhost:11434/v1
+    enabled: true
+    model: alex-cover-writer:latest
+    type: openai_compat
+    supports_images: false
+  ollama_research:
+    api_key: ollama
+    base_url: http://localhost:11434/v1
+    enabled: true
+    model: llama3.1:8b
+    type: openai_compat
+    supports_images: false
+  vllm:
+    api_key: ''
+    base_url: http://localhost:8000/v1
+    enabled: true
+    model: __auto__
+    type: openai_compat
+    supports_images: false
+  vision_service:
+    base_url: http://localhost:8002
+    enabled: false
+    type: vision_service
+    supports_images: true
+fallback_order:
+- ollama
+- claude_code
+- vllm
+- github_copilot
+- anthropic
+research_fallback_order:
+- claude_code
+- vllm
+- ollama_research
+- github_copilot
+- anthropic
+vision_fallback_order:
+- vision_service
+- claude_code
+- anthropic
+# Note: 'ollama' (alex-cover-writer) intentionally excluded — research
+# must never use the fine-tuned writer model, and this also avoids evicting
+# the writer from GPU memory while a cover letter task is in flight.
diff --git a/config/llm.yaml.example b/config/llm.yaml.example
new file mode 100644
index 0000000..e5a58e5
--- /dev/null
+++ b/config/llm.yaml.example
@@ -0,0 +1,66 @@
+backends:
+  anthropic:
+    api_key_env: ANTHROPIC_API_KEY
+    enabled: false
+    model: claude-sonnet-4-6
+    type: anthropic
+    supports_images: true
+  claude_code:
+    api_key: any
+    base_url: http://localhost:3009/v1
+    enabled: false
+    model: claude-code-terminal
+    type: openai_compat
+    supports_images: true
+  github_copilot:
+    api_key: any
+    base_url: http://localhost:3010/v1
+    enabled: false
+    model: gpt-4o
+    type: openai_compat
+    supports_images: false
+  ollama:
+    api_key: ollama
+    base_url: http://localhost:11434/v1
+    enabled: true
+    model: alex-cover-writer:latest
+    type: openai_compat
+    supports_images: false
+  ollama_research:
+    api_key: ollama
+    base_url: http://localhost:11434/v1
+    enabled: true
+    model: llama3.1:8b
+    type: openai_compat
+    supports_images: false
+  vllm:
+    api_key: ''
+    base_url: http://localhost:8000/v1
+    enabled: true
+    model: __auto__
+    type: openai_compat
+    supports_images: false
+  vision_service:
+    base_url: http://localhost:8002
+    enabled: false
+    type: vision_service
+    supports_images: true
+fallback_order:
+- ollama
+- claude_code
+- vllm
+- github_copilot
+- anthropic
+research_fallback_order:
+- claude_code
+- vllm
+- ollama_research
+- github_copilot
+- anthropic
+vision_fallback_order:
+- vision_service
+- claude_code
+- anthropic
+# Note: 'ollama' (alex-cover-writer) intentionally excluded — research
+# must never use the fine-tuned writer model, and this also avoids evicting
+# the writer from GPU memory while a cover letter task is in flight.
diff --git a/config/notion.yaml.example b/config/notion.yaml.example
new file mode 100644
index 0000000..55977dd
--- /dev/null
+++ b/config/notion.yaml.example
@@ -0,0 +1,24 @@
+# Copy to config/notion.yaml and fill in your values.
+# notion.yaml is gitignored — never commit it.
+#
+# Get your integration token from: https://www.notion.so/my-integrations
+# Then share the "Tracking Job Applications" database with your integration:
+#   Open the DB in Notion → ... menu → Add connections → select your integration
+#
+token: "secret_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+database_id: "1bd75cff-7708-8007-8c00-f1de36620a0a"
+
+field_map:
+  title_field: "Salary"
+  job_title: "Job Title"
+  company: "Company Name"
+  url: "Role Link"
+  source: "Job Source"
+  status: "Status of Application"
+  status_new: "Application Submitted"
+  date_found: "Date Found"
+  remote: "Remote"
+  match_score: "Match Score"
+  keyword_gaps: "Keyword Gaps"
+  notes: "Notes"
+  job_description: "Job Description"
diff --git a/config/resume_keywords.yaml b/config/resume_keywords.yaml
new file mode 100644
index 0000000..7cfdab3
--- /dev/null
+++ b/config/resume_keywords.yaml
@@ -0,0 +1,23 @@
+domains:
+- B2B SaaS
+- enterprise software
+- security
+- compliance
+- post-sale lifecycle
+- SaaS metrics
+- web security
+keywords:
+- churn reduction
+- escalation management
+- cross-functional
+- product feedback loop
+- customer advocacy
+skills:
+- Customer Success
+- Technical Account Management
+- Revenue Operations
+- data analysis
+- stakeholder management
+- project management
+- onboarding
+- renewal management
diff --git a/config/resume_keywords.yaml.example b/config/resume_keywords.yaml.example
new file mode 100644
index 0000000..6ff978c
--- /dev/null
+++ b/config/resume_keywords.yaml.example
@@ -0,0 +1,33 @@
+skills:
+  - Customer Success
+  - Technical Account Management
+  - Revenue Operations
+  - Salesforce
+  - Gainsight
+  - data analysis
+  - stakeholder management
+  - project management
+  - onboarding
+  - renewal management
+
+domains:
+  - B2B SaaS
+  - enterprise software
+  - security
+  - compliance
+  - post-sale lifecycle
+  - SaaS metrics
+
+keywords:
+  - QBR
+  - churn reduction
+  - NRR
+  - ARR
+  - MRR
+  - executive sponsorship
+  - VOC
+  - health score
+  - escalation management
+  - cross-functional
+  - product feedback loop
+  - customer advocacy
diff --git a/config/search_profiles.yaml b/config/search_profiles.yaml
new file mode 100644
index 0000000..bada59a
--- /dev/null
+++ b/config/search_profiles.yaml
@@ -0,0 +1,123 @@
+profiles:
+- boards:
+  - linkedin
+  - indeed
+  - glassdoor
+  - zip_recruiter
+  - google
+  custom_boards:
+  - adzuna
+  - theladders
+  - craigslist
+  exclude_keywords:
+  - sales
+  - account executive
+  - sales engineer
+  - SDR
+  - BDR
+  - business development
+  - sales development
+  - sales manager
+  - sales representative
+  - sales rep
+  hours_old: 240
+  locations:
+  - Remote
+  - San Francisco Bay Area, CA
+  name: cs_leadership
+  results_per_board: 75
+  titles:
+  - Customer Success Manager
+  - Customer Engagement Manager
+  - Director of Customer Success
+  - VP Customer Success
+  - Head of Customer Success
+  - Technical Account Manager
+  - TAM
+  - Customer Experience Lead
+  - CSM
+  - CX
+  - Customer Success Consultant
+- boards:
+  - linkedin
+  - indeed
+  custom_boards:
+  - adzuna
+  - craigslist
+  exclude_keywords:
+  - sales
+  - account executive
+  - SDR
+  - BDR
+  - sales development
+  hours_old: 336
+  locations:
+  - Remote
+  - San Francisco Bay Area, CA
+  mission_tags:
+  - music
+  name: music_industry
+  results_per_board: 50
+  titles:
+  - Customer Success Manager
+  - Partner Success Manager
+  - Artist Success Manager
+  - Creator Success Manager
+  - Technical Account Manager
+  - Community Manager
+  - Account Manager
+  - Label Relations Manager
+- boards:
+  - linkedin
+  - indeed
+  custom_boards:
+  - adzuna
+  - craigslist
+  exclude_keywords:
+  - sales
+  - account executive
+  - SDR
+  - BDR
+  hours_old: 336
+  locations:
+  - Remote
+  - San Francisco Bay Area, CA
+  mission_tags:
+  - animal_welfare
+  name: animal_welfare
+  results_per_board: 50
+  titles:
+  - Customer Success Manager
+  - Program Manager
+  - Community Engagement Manager
+  - Operations Manager
+  - Partner Success Manager
+  - Account Manager
+  - Development Manager
+- boards:
+  - linkedin
+  - indeed
+  custom_boards:
+  - adzuna
+  - craigslist
+  exclude_keywords:
+  - sales
+  - account executive
+  - SDR
+  - BDR
+  hours_old: 336
+  locations:
+  - Remote
+  - San Francisco Bay Area, CA
+  mission_tags:
+  - education
+  name: education
+  results_per_board: 50
+  titles:
+  - Customer Success Manager
+  - District Success Manager
+  - Implementation Specialist
+  - Partner Success Manager
+  - Account Manager
+  - School Success Manager
+  - Customer Experience Manager
diff --git a/data/survey_screenshots/.gitkeep b/data/survey_screenshots/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000..d381d9d
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,68 @@
+name: job-seeker
+# Recreate: conda env create -f environment.yml
+# Update pinned snapshot: conda env export --no-builds > environment.yml
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - python=3.12
+  - pip
+  - pip:
+    # ── Web UI ────────────────────────────────────────────────────────────────
+    - streamlit>=1.35
+    - watchdog            # live reload
+    - reportlab>=4.0      # PDF cover letter export
+    - pandas>=2.0
+    - pyarrow             # streamlit data tables
+    - streamlit-paste-button>=0.1.0
+
+    # ── Job scraping ──────────────────────────────────────────────────────────
+    - python-jobspy>=1.1
+    - playwright          # browser automation (run: playwright install chromium)
+    - selenium
+    - undetected-chromedriver
+    - webdriver-manager
+    - beautifulsoup4
+    - requests
+    - curl_cffi           # Chrome TLS fingerprint — bypasses Cloudflare on The Ladders
+    - fake-useragent      # company scraper rotation
+
+    # ── LLM / AI backends ─────────────────────────────────────────────────────
+    - openai>=1.0         # used for OpenAI-compat backends (ollama, vllm, wrappers)
+    - anthropic>=0.80     # direct Anthropic API fallback
+    - ollama              # Python client for Ollama management
+    - langchain>=0.2
+    - langchain-openai
+    - langchain-anthropic
+    - langchain-ollama
+    - langchain-community
+    - langchain-google-genai
+    - google-generativeai
+    - tiktoken
+
+    # ── Resume matching ───────────────────────────────────────────────────────
+    - scikit-learn>=1.3
+    - rapidfuzz
+    - lib-resume-builder-aihawk
+
+    # ── Notion integration ────────────────────────────────────────────────────
+    - notion-client>=3.0
+
+    # ── Document handling ─────────────────────────────────────────────────────
+    - pypdf
+    - pdfminer-six
+    - pyyaml>=6.0
+    - python-dotenv
+
+    # ── Utilities ─────────────────────────────────────────────────────────────
+    - sqlalchemy
+    - tqdm
+    - loguru
+    - rich
+    - tenacity
+    - httpx
+
+    # ── Testing ───────────────────────────────────────────────────────────────
+    - pytest>=9.0
+    - pytest-cov
+    - pytest-mock
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..5ee6477
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+testpaths = tests
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/company_research.py b/scripts/company_research.py
new file mode 100644
index 0000000..3c7069c
--- /dev/null
+++ b/scripts/company_research.py
@@ -0,0 +1,468 @@
+# scripts/company_research.py
+"""
+Pre-interview company research generator.
+
+Three-phase approach:
+  1. If SearXNG is available (port 8888), use companyScraper.py to fetch live
+     data: CEO name, HQ address, LinkedIn, contact info.
+  1b. Use Phase 1 data (company name + CEO if found) to query SearXNG for
+      recent news snippets (funding, launches, leadership changes, etc.).
+  2. Feed all real data into an LLM prompt to synthesise a structured brief
+     covering company overview, leadership, recent developments, and talking
+     points tailored to Alex.
+
+Falls back to pure LLM knowledge when SearXNG is offline.
+
+Usage (standalone):
+    conda run -n job-seeker python scripts/company_research.py --job-id 42
+    conda run -n job-seeker python scripts/company_research.py --job-id 42 --no-scrape
+"""
+import re
+import sys
+from pathlib import Path
+from types import SimpleNamespace
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+# ── SearXNG scraper integration ───────────────────────────────────────────────
+_SCRAPER_DIR = Path("/Library/Development/scrapers")
+_SCRAPER_AVAILABLE = False
+
+if _SCRAPER_DIR.exists():
+    sys.path.insert(0, str(_SCRAPER_DIR))
+    try:
+        from companyScraper import EnhancedCompanyScraper, Config as _ScraperConfig
+        _SCRAPER_AVAILABLE = True
+    except (ImportError, SystemExit):
+        # companyScraper calls sys.exit(1) if bs4/fake-useragent aren't installed
+        pass
+
+
+def _searxng_running() -> bool:
+    """Quick check whether SearXNG is reachable."""
+    try:
+        import requests
+        r = requests.get("http://localhost:8888/", timeout=3)
+        return r.status_code == 200
+    except Exception:
+        return False
+
+
+def _scrape_company(company: str) -> dict:
+    """
+    Use companyScraper in minimal mode to pull live CEO / HQ data.
+    Returns a dict with keys: ceo, headquarters, linkedin (may be 'Not found').
+    """
+    mock_args = SimpleNamespace(
+        mode="minimal",
+        verbose=False,
+        dry_run=False,
+        debug=False,
+        use_cache=True,
+        save_raw=False,
+        target_staff=None,
+        include_types=None,
+        exclude_types=None,
+        include_contact=False,
+        include_address=False,
+        include_social=True,   # grab LinkedIn while we're at it
+        timeout=20,
+        input_file=None,
+        output_file="/dev/null",
+        searxng_url="http://localhost:8888/",
+    )
+    # Override the singleton Config URL
+    _ScraperConfig.SEARXNG_URL = "http://localhost:8888/"
+
+    scraper = EnhancedCompanyScraper(mock_args)
+    scraper.companies = [company]
+
+    result: dict = {"ceo": "Not found", "headquarters": "Not found", "linkedin": "Not found"}
+    for search_type in ["ceo", "hq", "social"]:
+        html = scraper.search_company(company, search_type)
+        if search_type == "ceo":
+            result["ceo"] = scraper.extract_ceo(html, company)
+        elif search_type == "hq":
+            result["headquarters"] = scraper.extract_address(html, company)
+        elif search_type == "social":
+            social = scraper.extract_social(html, company)
+            # Pull out just the LinkedIn entry
+            for part in (social or "").split(";"):
+                if "linkedin" in part.lower():
+                    result["linkedin"] = part.strip()
+                    break
+
+    return result
+
+
+_SEARCH_QUERIES = {
+    "news":          '"{company}" news 2025 2026',
+    "funding":       '"{company}" funding round investors Series valuation',
+    "tech":          '"{company}" tech stack engineering technology platform',
+    "competitors":   '"{company}" competitors alternatives vs market',
+    "culture":       '"{company}" glassdoor culture reviews employees',
+    "accessibility": '"{company}" ADA accessibility disability inclusion accommodation ERG',
+    "ceo_press":     '"{ceo}" "{company}"',  # only used if ceo is known
+}
+
+
+def _run_search_query(query: str, results: dict, key: str) -> None:
+    """Thread target: run one SearXNG JSON query, store up to 4 snippets in results[key]."""
+    import requests
+
+    snippets: list[str] = []
+    seen: set[str] = set()
+    try:
+        resp = requests.get(
+            "http://localhost:8888/search",
+            params={"q": query, "format": "json", "language": "en-US"},
+            timeout=12,
+        )
+        if resp.status_code != 200:
+            return
+        for r in resp.json().get("results", [])[:4]:
+            url = r.get("url", "")
+            if url in seen:
+                continue
+            seen.add(url)
+            title = r.get("title", "").strip()
+            content = r.get("content", "").strip()
+            if title or content:
+                snippets.append(f"- **{title}**\n  {content}\n  <{url}>")
+    except Exception:
+        pass
+    results[key] = "\n\n".join(snippets)
+
+
+def _fetch_search_data(company: str, ceo: str = "") -> dict[str, str]:
+    """
+    Run all search queries in parallel threads.
+    Returns dict keyed by search type (news, funding, tech, competitors, culture, ceo_press).
+    Missing/failed queries produce empty strings.
+    """
+    import threading
+
+    results: dict[str, str] = {}
+    threads = []
+
+    keys: list[str] = []
+    for key, pattern in _SEARCH_QUERIES.items():
+        if key == "ceo_press" and not ceo or (ceo or "").lower() == "not found":
+            continue
+        # Use replace() not .format() — company names may contain curly braces
+        query = pattern.replace("{company}", company).replace("{ceo}", ceo)
+        t = threading.Thread(
+            target=_run_search_query,
+            args=(query, results, key),
+            daemon=True,
+        )
+        threads.append(t)
+        keys.append(key)
+        t.start()
+
+    for t, key in zip(threads, keys):
+        t.join(timeout=15)
+        # Thread may still be alive after timeout — pre-populate key so
+        # the results dict contract ("missing queries → empty string") holds
+        if t.is_alive():
+            results.setdefault(key, "")
+
+    return results
+
+
+def _parse_sections(text: str) -> dict[str, str]:
+    """Split LLM markdown output on ## headers into named sections."""
+    sections: dict[str, str] = {}
+    pattern = re.compile(r"^##\s+(.+)$", re.MULTILINE)
+    matches = list(pattern.finditer(text))
+    for i, match in enumerate(matches):
+        name = match.group(1).strip()
+        start = match.end()
+        end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
+        sections[name] = text[start:end].strip()
+    return sections
+
+
+_RESUME_YAML = Path(__file__).parent.parent / "aihawk" / "data_folder" / "plain_text_resume.yaml"
+_KEYWORDS_YAML = Path(__file__).parent.parent / "config" / "resume_keywords.yaml"
+
+# Companies where Alex has an NDA — reference as generic label unless
+# the role is security-focused (score >= 3 matching JD keywords).
+_NDA_COMPANIES = {"upguard"}
+
+
+def _score_experiences(experiences: list[dict], keywords: list[str], jd: str) -> list[dict]:
+    """Score each experience entry by keyword overlap with JD; return sorted descending."""
+    jd_lower = jd.lower()
+    scored = []
+    for exp in experiences:
+        text = " ".join([
+            exp.get("position", ""),
+            exp.get("company", ""),
+            " ".join(
+                v
+                for resp in exp.get("key_responsibilities", [])
+                for v in resp.values()
+            ),
+        ]).lower()
+        score = sum(1 for kw in keywords if kw.lower() in text and kw.lower() in jd_lower)
+        scored.append({**exp, "score": score})
+    return sorted(scored, key=lambda x: x["score"], reverse=True)
+
+
+def _build_resume_context(resume: dict, keywords: list[str], jd: str) -> str:
+    """
+    Build the resume section of the LLM context block.
+    Top 2 scored experiences included in full detail; rest as one-liners.
+    Applies UpGuard NDA rule: reference as 'enterprise security vendor (NDA)'
+    unless the role is security-focused (score >= 3).
+    """
+    experiences = resume.get("experience_details", [])
+    if not experiences:
+        return ""
+
+    scored = _score_experiences(experiences, keywords, jd)
+    top2 = scored[:2]
+    rest = scored[2:]
+
+    def _company_label(exp: dict) -> str:
+        company = exp.get("company", "")
+        if company.lower() in _NDA_COMPANIES and exp.get("score", 0) < 3:
+            return "enterprise security vendor (NDA)"
+        return company
+
+    def _exp_header(exp: dict) -> str:
+        return f"{exp.get('position', '')} @ {_company_label(exp)} ({exp.get('employment_period', '')})"
+
+    def _exp_bullets(exp: dict) -> str:
+        bullets = [v for resp in exp.get("key_responsibilities", []) for v in resp.values()]
+        return "\n".join(f"  - {b}" for b in bullets)
+
+    lines = ["## Alex's Matched Experience"]
+    for exp in top2:
+        lines.append(f"\n**{_exp_header(exp)}** (match score: {exp['score']})")
+        lines.append(_exp_bullets(exp))
+
+    if rest:
+        condensed = ", ".join(_exp_header(e) for e in rest)
+        lines.append(f"\nAlso in Alex's background: {condensed}")
+
+    return "\n".join(lines)
+
+
+def _load_resume_and_keywords() -> tuple[dict, list[str]]:
+    """Load resume YAML and keywords config. Returns (resume_dict, all_keywords_list)."""
+    import yaml as _yaml
+
+    resume = {}
+    if _RESUME_YAML.exists():
+        resume = _yaml.safe_load(_RESUME_YAML.read_text()) or {}
+
+    keywords: list[str] = []
+    if _KEYWORDS_YAML.exists():
+        kw_cfg = _yaml.safe_load(_KEYWORDS_YAML.read_text()) or {}
+        for lst in kw_cfg.values():
+            if isinstance(lst, list):
+                keywords.extend(lst)
+
+    return resume, keywords
+
+
+def research_company(job: dict, use_scraper: bool = True, on_stage=None) -> dict:
+    """
+    Generate a pre-interview research brief for a job.
+
+    Parameters
+    ----------
+    job : dict
+        Job row from the DB (needs at least 'company', 'title', 'description').
+    use_scraper : bool
+        Whether to attempt live data via SearXNG before falling back to LLM.
+
+    Returns
+    -------
+    dict with keys: raw_output, company_brief, ceo_brief, tech_brief,
+    funding_brief, competitors_brief, red_flags, talking_points
+    """
+    from scripts.llm_router import LLMRouter
+
+    router = LLMRouter()
+    research_order = router.config.get("research_fallback_order") or router.config["fallback_order"]
+    company = job.get("company") or "the company"
+    title = job.get("title") or "this role"
+    jd_excerpt = (job.get("description") or "")[:1500]
+
+    resume, keywords = _load_resume_and_keywords()
+    matched_keywords = [kw for kw in keywords if kw.lower() in jd_excerpt.lower()]
+    resume_context = _build_resume_context(resume, keywords, jd_excerpt)
+    keywords_note = (
+        f"\n\n## Matched Skills & Keywords\nSkills matching this JD: {', '.join(matched_keywords)}"
+        if matched_keywords else ""
+    )
+
+    def _stage(msg: str) -> None:
+        if on_stage:
+            try:
+                on_stage(msg)
+            except Exception:
+                pass  # never let stage callbacks break the task
+
+    # ── Phase 1: live scrape (optional) ──────────────────────────────────────
+    live_data: dict = {}
+    scrape_note = ""
+    _stage("Checking for live company data…")
+    if use_scraper and _SCRAPER_AVAILABLE and _searxng_running():
+        _stage("Scraping CEO & HQ data…")
+        try:
+            live_data = _scrape_company(company)
+            parts = []
+            if live_data.get("ceo") not in (None, "Not found"):
+                parts.append(f"CEO: {live_data['ceo']}")
+            if live_data.get("headquarters") not in (None, "Not found"):
+                parts.append(f"HQ: {live_data['headquarters']}")
+            if live_data.get("linkedin") not in (None, "Not found"):
+                parts.append(f"LinkedIn: {live_data['linkedin']}")
+            if parts:
+                scrape_note = (
+                    "\n\n**Live data retrieved via SearXNG:**\n"
+                    + "\n".join(f"- {p}" for p in parts)
+                    + "\n\nIncorporate these facts where relevant."
+                )
+        except BaseException as e:
+            scrape_note = f"\n\n_(Live scrape attempted but failed: {e})_"
+
+    # ── Phase 1b: parallel search queries ────────────────────────────────────
+    search_data: dict[str, str] = {}
+    _stage("Running web searches…")
+    if use_scraper and _searxng_running():
+        _stage("Running web searches (news, funding, tech, culture)…")
+        try:
+            ceo_name = (live_data.get("ceo") or "") if live_data else ""
+            search_data = _fetch_search_data(company, ceo=ceo_name)
+        except BaseException:
+            pass  # best-effort; never fail the whole task
+
+    # Track whether SearXNG actually contributed usable data to this brief.
+    scrape_used = 1 if (live_data or any(v.strip() for v in search_data.values())) else 0
+
+    def _section_note(key: str, label: str) -> str:
+        text = search_data.get(key, "").strip()
+        return f"\n\n## {label} (live web search)\n\n{text}" if text else ""
+
+    news_note          = _section_note("news",          "News & Press")
+    funding_note       = _section_note("funding",       "Funding & Investors")
+    tech_note          = _section_note("tech",          "Tech Stack")
+    competitors_note   = _section_note("competitors",   "Competitors")
+    culture_note       = _section_note("culture",       "Culture & Employee Signals")
+    accessibility_note = _section_note("accessibility", "Accessibility & Disability Inclusion")
+    ceo_press_note     = _section_note("ceo_press",     "CEO in the News")
+
+    # ── Phase 2: LLM synthesis ────────────────────────────────────────────────
+    _stage("Generating brief with LLM… (30–90 seconds)")
+    prompt = f"""You are preparing Alex Rivera for a job interview.
+
+Role: **{title}** at **{company}**
+
+## Job Description
+{jd_excerpt}
+{resume_context}{keywords_note}
+
+## Live Company Data
+{scrape_note.strip() or "_(scrape unavailable)_"}
+{news_note}{funding_note}{tech_note}{competitors_note}{culture_note}{accessibility_note}{ceo_press_note}
+
+---
+
+Produce a structured research brief using **exactly** these eight markdown section headers
+(include all eight even if a section has limited data — say so honestly):
+
+## Company Overview
+What {company} does, core product/service, business model, size/stage (startup / scale-up / enterprise), market positioning.
+
+## Leadership & Culture
+CEO background and leadership style, key execs, mission/values statements, Glassdoor themes.
+
+## Tech Stack & Product
+Technologies, platforms, and product direction relevant to the {title} role.
+
+## Funding & Market Position
+Funding stage, key investors, recent rounds, burn/growth signals, competitor landscape.
+
+## Recent Developments
+News, launches, acquisitions, exec moves, pivots, or press from the past 12–18 months.
+Draw on the live snippets above; if none available, note what is publicly known.
+
+## Red Flags & Watch-outs
+Culture issues, layoffs, exec departures, financial stress, or Glassdoor concerns worth knowing before the call.
+If nothing notable, write "No significant red flags identified."
+
+## Inclusion & Accessibility
+Assess {company}'s commitment to disability inclusion and accessibility. Cover:
+- ADA accommodation language in job postings or company policy
+- Disability Employee Resource Group (ERG) or affinity group
+- Product or service accessibility (WCAG compliance, adaptive features, AT integrations)
+- Any public disability/accessibility advocacy, partnerships, or certifications
+- Glassdoor or press signals about how employees with disabilities experience the company
+If no specific signals are found, say so clearly — absence of public commitment is itself signal.
+This section is for Alex's personal decision-making only and will not appear in any application.
+
+## Talking Points for Alex
+Five specific talking points for the phone screen. Each must:
+- Reference a concrete experience from Alex's matched background by name
+  (UpGuard NDA rule: say "enterprise security vendor" unless the role has a clear security/compliance focus)
+- Connect to a specific signal from the JD or company context above
+- Be 1–2 sentences, ready to speak aloud
+- Never give generic advice
+
+---
+⚠️ This brief combines live web data and LLM training knowledge. Verify key facts before the call.
+"""
+
+    raw = router.complete(prompt, fallback_order=research_order)
+    # Strip <think>…</think> blocks emitted by reasoning models (e.g. DeepSeek, Qwen-R)
+    raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
+    sections = _parse_sections(raw)
+
+    return {
+        "raw_output":        raw,
+        "company_brief":     sections.get("Company Overview", ""),
+        "ceo_brief":         sections.get("Leadership & Culture", ""),
+        "tech_brief":        sections.get("Tech Stack & Product", ""),
+        "funding_brief":     sections.get("Funding & Market Position", ""),
+        "competitors_brief": sections.get("Funding & Market Position", ""),  # competitor landscape is in the funding section
+        "red_flags":         sections.get("Red Flags & Watch-outs", ""),
+        "accessibility_brief": sections.get("Inclusion & Accessibility", ""),
+        "talking_points":    sections.get("Talking Points for Alex", ""),
+        "scrape_used":       scrape_used,
+    }
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Generate company research brief")
+    parser.add_argument("--job-id", type=int, required=True, help="Job ID in staging.db")
+    parser.add_argument("--no-scrape", action="store_true", help="Skip SearXNG live scrape")
+    args = parser.parse_args()
+
+    from scripts.db import DEFAULT_DB, init_db, save_research
+    import sqlite3
+
+    init_db(DEFAULT_DB)
+    conn = sqlite3.connect(DEFAULT_DB)
+    conn.row_factory = sqlite3.Row
+    row = conn.execute("SELECT * FROM jobs WHERE id = ?", (args.job_id,)).fetchone()
+    conn.close()
+
+    if not row:
+        sys.exit(f"Job {args.job_id} not found in {DEFAULT_DB}")
+
+    job = dict(row)
+    print(f"Researching: {job['title']} @ {job['company']} …\n")
+    if _SCRAPER_AVAILABLE and not args.no_scrape:
+        print(f"SearXNG available: {_searxng_running()}")
+
+    result = research_company(job, use_scraper=not args.no_scrape)
+    save_research(DEFAULT_DB, job_id=args.job_id, **result)
+    print(result["raw_output"])
+    print(f"\n[Saved to company_research for job {args.job_id}]")
diff --git a/scripts/custom_boards/__init__.py b/scripts/custom_boards/__init__.py
new file mode 100644
index 0000000..7b12ac1
--- /dev/null
+++ b/scripts/custom_boards/__init__.py
@@ -0,0 +1 @@
+# Custom job board scrapers — each module exposes scrape(profile, location, results_wanted) -> list[dict]
diff --git a/scripts/custom_boards/adzuna.py b/scripts/custom_boards/adzuna.py
new file mode 100644
index 0000000..fa57bdc
--- /dev/null
+++ b/scripts/custom_boards/adzuna.py
@@ -0,0 +1,160 @@
+"""Adzuna Jobs API scraper.
+
+API docs: https://developer.adzuna.com/docs/search
+Config:   config/adzuna.yaml  (gitignored — contains app_id + app_key)
+
+Each title in the search profile is queried as an exact phrase per location.
+Returns a list of dicts compatible with scripts.db.insert_job().
+"""
+from __future__ import annotations
+
+import time
+from pathlib import Path
+
+import requests
+import yaml
+
+_CONFIG_PATH = Path(__file__).parent.parent.parent / "config" / "adzuna.yaml"
+_BASE_URL = "https://api.adzuna.com/v1/api/jobs/us/search"
+
+
+def _load_config() -> tuple[str, str]:
+    if not _CONFIG_PATH.exists():
+        raise FileNotFoundError(
+            f"Adzuna config not found: {_CONFIG_PATH}\n"
+            "Copy config/adzuna.yaml.example → config/adzuna.yaml and fill in credentials."
+        )
+    cfg = yaml.safe_load(_CONFIG_PATH.read_text())
+    app_id = (cfg.get("app_id") or "").strip()
+    app_key = (cfg.get("app_key") or "").strip()
+    if not app_id or not app_key:
+        raise ValueError(
+            "config/adzuna.yaml requires both 'app_id' and 'app_key'.\n"
+            "Find your App ID at https://developer.adzuna.com/admin/applications"
+        )
+    return app_id, app_key
+
+
+def _salary_str(job: dict) -> str:
+    lo = job.get("salary_min")
+    hi = job.get("salary_max")
+    try:
+        if lo and hi:
+            return f"${int(lo):,} – ${int(hi):,}"
+        if lo:
+            return f"${int(lo):,}+"
+    except (TypeError, ValueError):
+        pass
+    return ""
+
+
+def _is_remote(location_display: str) -> bool:
+    return "remote" in location_display.lower()
+
+
+def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]:
+    """Fetch jobs from the Adzuna API for a single location.
+
+    Args:
+        profile: Search profile dict from search_profiles.yaml.
+        location: Location string (e.g. "Remote" or "San Francisco Bay Area, CA").
+        results_wanted: Maximum results to return across all titles.
+
+    Returns:
+        List of job dicts with keys: title, company, url, source, location,
+        is_remote, salary, description.
+    """
+    try:
+        app_id, app_key = _load_config()
+    except (FileNotFoundError, ValueError) as exc:
+        print(f"    [adzuna] Skipped — {exc}")
+        return []
+
+    titles = profile.get("titles", [])
+    hours_old = profile.get("hours_old", 240)
+    max_days_old = max(1, hours_old // 24)
+    is_remote_search = location.lower() == "remote"
+
+    session = requests.Session()
+    session.headers.update({"Accept": "application/json", "User-Agent": "Mozilla/5.0"})
+
+    seen_ids: set[str] = set()
+    results: list[dict] = []
+
+    for title in titles:
+        if len(results) >= results_wanted:
+            break
+
+        page = 1
+        while len(results) < results_wanted:
+            # Adzuna doesn't support where=remote — it treats it as a city name and
+            # returns 0 results. For remote searches, append "remote" to the what param.
+            if is_remote_search:
+                params = {
+                    "app_id": app_id,
+                    "app_key": app_key,
+                    "results_per_page": 50,
+                    "what": f'"{title}" remote',
+                    "sort_by": "date",
+                    "max_days_old": max_days_old,
+                }
+            else:
+                params = {
+                    "app_id": app_id,
+                    "app_key": app_key,
+                    "results_per_page": 50,
+                    "what_phrase": title,
+                    "where": location,
+                    "sort_by": "date",
+                    "max_days_old": max_days_old,
+                }
+            try:
+                resp = session.get(f"{_BASE_URL}/{page}", params=params, timeout=20)
+            except requests.RequestException as exc:
+                print(f"    [adzuna] Request error ({title}): {exc}")
+                break
+
+            if resp.status_code == 401:
+                print("    [adzuna] Auth failed — check app_id and app_key in config/adzuna.yaml")
+                return results
+            if resp.status_code != 200:
+                print(f"    [adzuna] HTTP {resp.status_code} for '{title}' page {page}")
+                break
+
+            data = resp.json()
+            jobs = data.get("results", [])
+            if not jobs:
+                break
+
+            for job in jobs:
+                job_id = str(job.get("id", ""))
+                if job_id in seen_ids:
+                    continue
+                seen_ids.add(job_id)
+
+                loc_display = job.get("location", {}).get("display_name", "")
+                redirect_url = job.get("redirect_url", "")
+                if not redirect_url:
+                    continue
+
+                results.append({
+                    "title":       job.get("title", ""),
+                    "company":     job.get("company", {}).get("display_name", ""),
+                    "url":         redirect_url,
+                    "source":      "adzuna",
+                    "location":    loc_display,
+                    "is_remote":   is_remote_search or _is_remote(loc_display),
+                    "salary":      _salary_str(job),
+                    "description": job.get("description", ""),
+                })
+
+            total = data.get("count", 0)
+            if len(results) >= total or len(jobs) < 50:
+                break  # last page
+
+            page += 1
+            time.sleep(0.5)  # polite pacing between pages
+
+        time.sleep(0.5)  # between titles
+
+    return results[:results_wanted]
diff --git a/scripts/custom_boards/craigslist.py b/scripts/custom_boards/craigslist.py
new file mode 100644
index 0000000..30226ae
--- /dev/null
+++ b/scripts/custom_boards/craigslist.py
@@ -0,0 +1,177 @@
+"""Craigslist job scraper — RSS-based.
+
+Uses Craigslist's native RSS feed endpoint for discovery.
+Full job description is populated by the scrape_url background task.
+Company name and salary (not structured in Craigslist listings) are
+extracted from the description body by the enrich_craigslist task.
+
+Config: config/craigslist.yaml  (gitignored — metro list + location map)
+        config/craigslist.yaml.example  (committed template)
+
+Returns a list of dicts compatible with scripts.db.insert_job().
+"""
+from __future__ import annotations
+
+import time
+import xml.etree.ElementTree as ET
+from datetime import datetime, timezone
+from email.utils import parsedate_to_datetime
+from pathlib import Path
+from urllib.parse import quote_plus
+
+import requests
+import yaml
+
+_CONFIG_PATH = Path(__file__).parent.parent.parent / "config" / "craigslist.yaml"
+_DEFAULT_CATEGORY = "jjj"
+_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
+        "(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
+    )
+}
+_TIMEOUT = 15
+_SLEEP = 0.5  # seconds between requests — easy to make configurable later
+
+
+def _load_config() -> dict:
+    if not _CONFIG_PATH.exists():
+        raise FileNotFoundError(
+            f"Craigslist config not found: {_CONFIG_PATH}\n"
+            "Copy config/craigslist.yaml.example → config/craigslist.yaml "
+            "and configure your target metros."
+        )
+    cfg = yaml.safe_load(_CONFIG_PATH.read_text()) or {}
+    if not cfg.get("metros"):
+        raise ValueError(
+            "config/craigslist.yaml must contain at least one entry under 'metros'."
+        )
+    return cfg
+
+
+def _rss_url(metro: str, category: str, query: str) -> str:
+    return (
+        f"https://{metro}.craigslist.org/search/{category}"
+        f"?query={quote_plus(query)}&format=rss&sort=date"
+    )
+
+
+def _parse_pubdate(pubdate_str: str) -> datetime | None:
+    """Parse an RSS pubDate string to a timezone-aware datetime."""
+    try:
+        return parsedate_to_datetime(pubdate_str)
+    except Exception:
+        return None
+
+
+def _fetch_rss(url: str) -> list[dict]:
+    """Fetch and parse a Craigslist RSS feed. Returns list of raw item dicts."""
+    resp = requests.get(url, headers=_HEADERS, timeout=_TIMEOUT)
+    resp.raise_for_status()
+    try:
+        root = ET.fromstring(resp.content)
+    except ET.ParseError as exc:
+        raise ValueError(f"Malformed RSS XML: {exc}") from exc
+
+    items = []
+    for item in root.findall(".//item"):
+        def _text(tag: str, _item=item) -> str:
+            el = _item.find(tag)
+            return (el.text or "").strip() if el is not None else ""
+
+        items.append({
+            "title":       _text("title"),
+            "link":        _text("link"),
+            "description": _text("description"),
+            "pubDate":     _text("pubDate"),
+        })
+    return items
+
+
+def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]:
+    """Fetch jobs from Craigslist RSS for a single location.
+
+    Args:
+        profile: Search profile dict from search_profiles.yaml.
+        location: Location string (e.g. "Remote" or "San Francisco Bay Area, CA").
+        results_wanted: Maximum results to return across all metros and titles.
+
+    Returns:
+        List of job dicts with keys: title, company, url, source, location,
+        is_remote, salary, description.
+        company/salary are empty — filled later by enrich_craigslist task.
+    """
+    try:
+        cfg = _load_config()
+    except (FileNotFoundError, ValueError) as exc:
+        print(f"    [craigslist] Skipped — {exc}")
+        return []
+
+    metros_all: list[str] = cfg.get("metros", [])
+    location_map: dict[str, str] = cfg.get("location_map", {})
+    category: str = cfg.get("category") or _DEFAULT_CATEGORY
+
+    is_remote_search = location.lower() == "remote"
+    if is_remote_search:
+        metros = metros_all
+    else:
+        metro = location_map.get(location)
+        if not metro:
+            print(f"    [craigslist] No metro mapping for '{location}' — skipping")
+            return []
+        metros = [metro]
+
+    titles: list[str] = profile.get("titles", [])
+    hours_old: int = profile.get("hours_old", 240)
+    cutoff = datetime.now(tz=timezone.utc).timestamp() - (hours_old * 3600)
+
+    seen_urls: set[str] = set()
+    results: list[dict] = []
+
+    for metro in metros:
+        if len(results) >= results_wanted:
+            break
+
+        for title in titles:
+            if len(results) >= results_wanted:
+                break
+
+            url = _rss_url(metro, category, title)
+            try:
+                items = _fetch_rss(url)
+            except requests.RequestException as exc:
+                print(f"    [craigslist] HTTP error ({metro}/{title}): {exc}")
+                time.sleep(_SLEEP)
+                continue
+            except ValueError as exc:
+                print(f"    [craigslist] Parse error ({metro}/{title}): {exc}")
+                time.sleep(_SLEEP)
+                continue
+
+            for item in items:
+                if len(results) >= results_wanted:
+                    break
+
+                item_url = item.get("link", "")
+                if not item_url or item_url in seen_urls:
+                    continue
+
+                pub = _parse_pubdate(item.get("pubDate", ""))
+                if pub and pub.timestamp() < cutoff:
+                    continue
+
+                seen_urls.add(item_url)
+                results.append({
+                    "title":       item.get("title", ""),
+                    "company":     "",
+                    "url":         item_url,
+                    "source":      "craigslist",
+                    "location":    f"{metro} (Craigslist)",
+                    "is_remote":   is_remote_search,
+                    "salary":      "",
+                    "description": "",
+                })
+
+            time.sleep(_SLEEP)
+
+    return results[:results_wanted]
diff --git a/scripts/custom_boards/theladders.py b/scripts/custom_boards/theladders.py
new file mode 100644
index 0000000..d7330af
--- /dev/null
+++ b/scripts/custom_boards/theladders.py
@@ -0,0 +1,179 @@
+"""The Ladders scraper — Playwright-based (requires chromium installed).
+
+The Ladders is a client-side React app (no SSR __NEXT_DATA__). We use Playwright
+to execute JS, wait for job cards to render, then extract from the DOM.
+
+Company names are hidden from guest (non-logged-in) users, but are encoded in
+the job URL slug: /job/{title-slug}-{company-slug}-{location-slug}_{id}
+
+curl_cffi is no longer needed for this scraper; plain Playwright is sufficient.
+playwright must be installed: `conda run -n job-seeker python -m playwright install chromium`
+
+Returns a list of dicts compatible with scripts.db.insert_job().
+"""
+from __future__ import annotations
+
+import re
+import time
+from typing import Any
+
+_BASE = "https://www.theladders.com"
+_SEARCH_PATH = "/jobs/searchjobs/{slug}"
+
+# Location slug in URLs for remote jobs
+_REMOTE_SLUG = "virtual-travel"
+
+
+def _company_from_url(href: str, title_slug: str) -> str:
+    """
+    Extract company name from The Ladders job URL slug.
+
+    URL format: /job/{title-slug}-{company-slug}-{location-slug}_{id}?ir=1
+    Example: /job/customer-success-manager-gainsight-virtual-travel_85434789
+             → "Gainsight"
+    """
+    # Strip path prefix and query
+    slug = href.split("/job/", 1)[-1].split("?")[0]
+    # Strip numeric ID suffix (e.g. _85434789)
+    slug = re.sub(r"_\d+$", "", slug)
+    # Strip known title prefix
+    if slug.startswith(title_slug + "-"):
+        slug = slug[len(title_slug) + 1:]
+    # Strip common location suffixes
+    for loc_suffix in [f"-{_REMOTE_SLUG}", "-new-york", "-los-angeles",
+                       "-san-francisco", "-chicago", "-austin", "-seattle",
+                       "-boston", "-atlanta", "-remote"]:
+        if slug.endswith(loc_suffix):
+            slug = slug[: -len(loc_suffix)]
+            break
+    # Convert kebab-case → title case
+    return slug.replace("-", " ").title() if slug else ""
+
+
+def _extract_jobs_js() -> str:
+    """JS to run in page context — extracts job data from rendered card elements."""
+    return """() => {
+        const cards = document.querySelectorAll('[class*=job-card-container]');
+        return Array.from(cards).map(card => {
+            const link = card.querySelector('p.job-link-wrapper a, a.clipped-text');
+            const salary = card.querySelector('p.salary, .salary-info p');
+            const locEl = card.querySelector('.remote-location-text, .location-info');
+            const remoteEl = card.querySelector('.remote-flag-badge-remote');
+            return {
+                title: link ? link.textContent.trim() : null,
+                href: link ? link.getAttribute('href') : null,
+                salary: salary ? salary.textContent.replace('*','').trim() : null,
+                location: locEl ? locEl.textContent.trim() : null,
+                is_remote: !!remoteEl,
+            };
+        }).filter(j => j.title && j.href);
+    }"""
+
+
+def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]:
+    """
+    Scrape job listings from The Ladders using Playwright.
+
+    Args:
+        profile: Search profile dict (uses 'titles').
+        location: Location string (e.g. "Remote" or "San Francisco Bay Area, CA").
+        results_wanted: Maximum results to return across all titles.
+
+    Returns:
+        List of job dicts with keys: title, company, url, source, location,
+        is_remote, salary, description.
+    """
+    try:
+        from playwright.sync_api import sync_playwright
+    except ImportError:
+        print(
+            "    [theladders] playwright not installed.\n"
+            "    Install: conda run -n job-seeker pip install playwright && "
+            "conda run -n job-seeker python -m playwright install chromium"
+        )
+        return []
+
+    is_remote_search = location.lower() == "remote"
+    results: list[dict] = []
+    seen_urls: set[str] = set()
+
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=True)
+        ctx = browser.new_context(
+            user_agent=(
+                "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
+                "(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
+            )
+        )
+        page = ctx.new_page()
+
+        for title in profile.get("titles", []):
+            if len(results) >= results_wanted:
+                break
+
+            slug = title.lower().replace(" ", "-").replace("/", "-")
+            title_slug = slug  # used for company extraction from URL
+
+            params: dict[str, str] = {}
+            if is_remote_search:
+                params["remote"] = "true"
+            elif location:
+                params["location"] = location
+
+            url = _BASE + _SEARCH_PATH.format(slug=slug)
+            if params:
+                query = "&".join(f"{k}={v}" for k, v in params.items())
+                url = f"{url}?{query}"
+
+            try:
+                page.goto(url, timeout=30_000)
+                page.wait_for_load_state("networkidle", timeout=20_000)
+            except Exception as exc:
+                print(f"    [theladders] Page load error for '{title}': {exc}")
+                continue
+
+            try:
+                raw_jobs: list[dict[str, Any]] = page.evaluate(_extract_jobs_js())
+            except Exception as exc:
+                print(f"    [theladders] JS extract error for '{title}': {exc}")
+                continue
+
+            if not raw_jobs:
+                print(f"    [theladders] No cards found for '{title}' — selector may need updating")
+                continue
+
+            for job in raw_jobs:
+                href = job.get("href", "")
+                if not href:
+                    continue
+                full_url = _BASE + href if href.startswith("/") else href
+                if full_url in seen_urls:
+                    continue
+                seen_urls.add(full_url)
+
+                company = _company_from_url(href, title_slug)
+                loc_text = (job.get("location") or "").replace("Remote", "").strip(", ")
+                if is_remote_search or job.get("is_remote"):
+                    loc_display = "Remote" + (f" — {loc_text}" if loc_text and loc_text != "US-Anywhere" else "")
+                else:
+                    loc_display = loc_text or location
+
+                results.append({
+                    "title":       job.get("title", ""),
+                    "company":     company,
+                    "url":         full_url,
+                    "source":      "theladders",
+                    "location":    loc_display,
+                    "is_remote":   bool(job.get("is_remote") or is_remote_search),
+                    "salary":      job.get("salary") or "",
+                    "description": "",  # not available in card view; scrape_url will fill in
+                })
+
+                if len(results) >= results_wanted:
+                    break
+
+            time.sleep(1)  # polite pacing between titles
+
+        browser.close()
+
+    return results[:results_wanted]
diff --git a/scripts/db.py b/scripts/db.py
new file mode 100644
index 0000000..b2443a1
--- /dev/null
+++ b/scripts/db.py
@@ -0,0 +1,728 @@
+"""
+SQLite staging layer for job listings.
+Jobs flow: pending → approved/rejected → applied → synced
+          applied → phone_screen → interviewing → offer → hired (or rejected)
+"""
+import sqlite3
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+
+DEFAULT_DB = Path(__file__).parent.parent / "staging.db"
+
+CREATE_JOBS = """
+CREATE TABLE IF NOT EXISTS jobs (
+    id              INTEGER PRIMARY KEY AUTOINCREMENT,
+    title           TEXT,
+    company         TEXT,
+    url             TEXT UNIQUE,
+    source          TEXT,
+    location        TEXT,
+    is_remote       INTEGER DEFAULT 0,
+    salary          TEXT,
+    description     TEXT,
+    match_score     REAL,
+    keyword_gaps    TEXT,
+    date_found      TEXT,
+    status          TEXT DEFAULT 'pending',
+    notion_page_id  TEXT,
+    cover_letter    TEXT,
+    applied_at      TEXT
+);
+"""
+
+CREATE_JOB_CONTACTS = """
+CREATE TABLE IF NOT EXISTS job_contacts (
+    id                 INTEGER PRIMARY KEY AUTOINCREMENT,
+    job_id             INTEGER NOT NULL,
+    direction          TEXT DEFAULT 'inbound',
+    subject            TEXT,
+    from_addr          TEXT,
+    to_addr            TEXT,
+    body               TEXT,
+    received_at        TEXT,
+    is_response_needed INTEGER DEFAULT 0,
+    responded_at       TEXT,
+    message_id         TEXT,
+    FOREIGN KEY (job_id) REFERENCES jobs(id)
+);
+"""
+
+_CONTACT_MIGRATIONS = [
+    ("message_id",           "TEXT"),
+    ("stage_signal",         "TEXT"),
+    ("suggestion_dismissed", "INTEGER DEFAULT 0"),
+]
+
+_RESEARCH_MIGRATIONS = [
+    ("tech_brief",          "TEXT"),
+    ("funding_brief",       "TEXT"),
+    ("competitors_brief",   "TEXT"),
+    ("red_flags",           "TEXT"),
+    ("scrape_used",         "INTEGER"),  # 1 = SearXNG contributed data, 0 = LLM-only
+    ("accessibility_brief", "TEXT"),     # Inclusion & Accessibility section
+]
+
+CREATE_COMPANY_RESEARCH = """
+CREATE TABLE IF NOT EXISTS company_research (
+    id               INTEGER PRIMARY KEY AUTOINCREMENT,
+    job_id           INTEGER NOT NULL UNIQUE,
+    generated_at     TEXT,
+    company_brief    TEXT,
+    ceo_brief        TEXT,
+    talking_points   TEXT,
+    raw_output       TEXT,
+    tech_brief       TEXT,
+    funding_brief    TEXT,
+    competitors_brief TEXT,
+    red_flags        TEXT,
+    FOREIGN KEY (job_id) REFERENCES jobs(id)
+);
+"""
+
+CREATE_BACKGROUND_TASKS = """
+CREATE TABLE IF NOT EXISTS background_tasks (
+    id          INTEGER PRIMARY KEY AUTOINCREMENT,
+    task_type   TEXT NOT NULL,
+    job_id      INTEGER NOT NULL,
+    status      TEXT NOT NULL DEFAULT 'queued',
+    error       TEXT,
+    created_at  DATETIME DEFAULT (datetime('now')),
+    started_at  DATETIME,
+    finished_at DATETIME,
+    stage       TEXT,
+    updated_at  DATETIME
+)
+"""
+
+CREATE_SURVEY_RESPONSES = """
+CREATE TABLE IF NOT EXISTS survey_responses (
+    id             INTEGER PRIMARY KEY AUTOINCREMENT,
+    job_id         INTEGER NOT NULL REFERENCES jobs(id),
+    survey_name    TEXT,
+    received_at    DATETIME,
+    source         TEXT,
+    raw_input      TEXT,
+    image_path     TEXT,
+    mode           TEXT,
+    llm_output     TEXT,
+    reported_score TEXT,
+    created_at     DATETIME DEFAULT CURRENT_TIMESTAMP
+);
+"""
+
+_MIGRATIONS = [
+    ("cover_letter",    "TEXT"),
+    ("applied_at",      "TEXT"),
+    ("interview_date",  "TEXT"),
+    ("rejection_stage", "TEXT"),
+    ("phone_screen_at", "TEXT"),
+    ("interviewing_at", "TEXT"),
+    ("offer_at",        "TEXT"),
+    ("hired_at",        "TEXT"),
+    ("survey_at",       "TEXT"),
+]
+
+
+def _migrate_db(db_path: Path) -> None:
+    """Add new columns to existing tables without breaking old data."""
+    conn = sqlite3.connect(db_path)
+    for col, coltype in _MIGRATIONS:
+        try:
+            conn.execute(f"ALTER TABLE jobs ADD COLUMN {col} {coltype}")
+        except sqlite3.OperationalError:
+            pass  # column already exists
+    for col, coltype in _CONTACT_MIGRATIONS:
+        try:
+            conn.execute(f"ALTER TABLE job_contacts ADD COLUMN {col} {coltype}")
+        except sqlite3.OperationalError:
+            pass
+    for col, coltype in _RESEARCH_MIGRATIONS:
+        try:
+            conn.execute(f"ALTER TABLE company_research ADD COLUMN {col} {coltype}")
+        except sqlite3.OperationalError:
+            pass
+    try:
+        conn.execute("ALTER TABLE background_tasks ADD COLUMN stage TEXT")
+    except sqlite3.OperationalError:
+        pass
+    try:
+        conn.execute("ALTER TABLE background_tasks ADD COLUMN updated_at TEXT")
+    except sqlite3.OperationalError:
+        pass
+    conn.commit()
+    conn.close()
+
+
+def init_db(db_path: Path = DEFAULT_DB) -> None:
+    """Create tables if they don't exist, then run migrations."""
+    conn = sqlite3.connect(db_path)
+    conn.execute(CREATE_JOBS)
+    conn.execute(CREATE_JOB_CONTACTS)
+    conn.execute(CREATE_COMPANY_RESEARCH)
+    conn.execute(CREATE_BACKGROUND_TASKS)
+    conn.execute(CREATE_SURVEY_RESPONSES)
+    conn.commit()
+    conn.close()
+    _migrate_db(db_path)
+
+
+def insert_job(db_path: Path = DEFAULT_DB, job: dict = None) -> Optional[int]:
+    """Insert a job. Returns row id, or None if URL already exists."""
+    if job is None:
+        return None
+    conn = sqlite3.connect(db_path)
+    try:
+        cursor = conn.execute(
+            """INSERT INTO jobs
+               (title, company, url, source, location, is_remote, salary, description, date_found)
+               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+            (
+                job.get("title", ""),
+                job.get("company", ""),
+                job.get("url", ""),
+                job.get("source", ""),
+                job.get("location", ""),
+                int(bool(job.get("is_remote", False))),
+                job.get("salary", ""),
+                job.get("description", ""),
+                job.get("date_found", ""),
+            ),
+        )
+        conn.commit()
+        return cursor.lastrowid
+    except sqlite3.IntegrityError:
+        return None  # duplicate URL
+    finally:
+        conn.close()
+
+
+def get_job_by_id(db_path: Path = DEFAULT_DB, job_id: int = None) -> Optional[dict]:
+    """Return a single job by ID, or None if not found."""
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    row = conn.execute("SELECT * FROM jobs WHERE id=?", (job_id,)).fetchone()
+    conn.close()
+    return dict(row) if row else None
+
+
+def get_jobs_by_status(db_path: Path = DEFAULT_DB, status: str = "pending") -> list[dict]:
+    """Return all jobs with the given status as a list of dicts."""
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    cursor = conn.execute(
+        "SELECT * FROM jobs WHERE status = ? ORDER BY date_found DESC, id DESC",
+        (status,),
+    )
+    rows = [dict(row) for row in cursor.fetchall()]
+    conn.close()
+    return rows
+
+
+def get_email_leads(db_path: Path = DEFAULT_DB) -> list[dict]:
+    """Return pending jobs with source='email', newest first."""
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    rows = conn.execute(
+        "SELECT * FROM jobs WHERE source = 'email' AND status = 'pending' "
+        "ORDER BY date_found DESC, id DESC"
+    ).fetchall()
+    conn.close()
+    return [dict(r) for r in rows]
+
+
+def get_job_counts(db_path: Path = DEFAULT_DB) -> dict:
+    """Return counts per status."""
+    conn = sqlite3.connect(db_path)
+    cursor = conn.execute(
+        "SELECT status, COUNT(*) as n FROM jobs GROUP BY status"
+    )
+    counts = {row[0]: row[1] for row in cursor.fetchall()}
+    conn.close()
+    return counts
+
+
+def update_job_status(db_path: Path = DEFAULT_DB, ids: list[int] = None, status: str = "approved") -> None:
+    """Batch-update status for a list of job IDs."""
+    if not ids:
+        return
+    conn = sqlite3.connect(db_path)
+    conn.execute(
+        f"UPDATE jobs SET status = ? WHERE id IN ({','.join('?' * len(ids))})",
+        [status] + list(ids),
+    )
+    conn.commit()
+    conn.close()
+
+
+def get_existing_urls(db_path: Path = DEFAULT_DB) -> set[str]:
+    """Return all URLs already in staging (any status)."""
+    conn = sqlite3.connect(db_path)
+    cursor = conn.execute("SELECT url FROM jobs")
+    urls = {row[0] for row in cursor.fetchall()}
+    conn.close()
+    return urls
+
+
+def write_match_scores(db_path: Path = DEFAULT_DB, job_id: int = None,
+                       score: float = 0.0, gaps: str = "") -> None:
+    """Write match score and keyword gaps back to a job row."""
+    conn = sqlite3.connect(db_path)
+    conn.execute(
+        "UPDATE jobs SET match_score = ?, keyword_gaps = ? WHERE id = ?",
+        (score, gaps, job_id),
+    )
+    conn.commit()
+    conn.close()
+
+
+def update_cover_letter(db_path: Path = DEFAULT_DB, job_id: int = None, text: str = "") -> None:
+    """Persist a generated/edited cover letter for a job."""
+    if job_id is None:
+        return
+    conn = sqlite3.connect(db_path)
+    conn.execute("UPDATE jobs SET cover_letter = ? WHERE id = ?", (text, job_id))
+    conn.commit()
+    conn.close()
+
+
+_UPDATABLE_JOB_COLS = {
+    "title", "company", "url", "source", "location", "is_remote",
+    "salary", "description", "match_score", "keyword_gaps",
+}
+
+
+def update_job_fields(db_path: Path = DEFAULT_DB, job_id: int = None,
+                      fields: dict = None) -> None:
+    """Update arbitrary job columns. Unknown keys are silently ignored."""
+    if job_id is None or not fields:
+        return
+    safe = {k: v for k, v in fields.items() if k in _UPDATABLE_JOB_COLS}
+    if not safe:
+        return
+    conn = sqlite3.connect(db_path)
+    sets = ", ".join(f"{col} = ?" for col in safe)
+    conn.execute(
+        f"UPDATE jobs SET {sets} WHERE id = ?",
+        (*safe.values(), job_id),
+    )
+    conn.commit()
+    conn.close()
+
+
+def mark_applied(db_path: Path = DEFAULT_DB, ids: list[int] = None) -> None:
+    """Set status='applied' and record today's date for a list of job IDs."""
+    if not ids:
+        return
+    today = datetime.now().isoformat()[:10]
+    conn = sqlite3.connect(db_path)
+    conn.execute(
+        f"UPDATE jobs SET status = 'applied', applied_at = ? WHERE id IN ({','.join('?' * len(ids))})",
+        [today] + list(ids),
+    )
+    conn.commit()
+    conn.close()
+
+
+def kill_stuck_tasks(db_path: Path = DEFAULT_DB) -> int:
+    """Mark all queued/running background tasks as failed. Returns count killed."""
+    conn = sqlite3.connect(db_path)
+    count = conn.execute(
+        "UPDATE background_tasks SET status='failed', error='Killed by user',"
+        " finished_at=datetime('now') WHERE status IN ('queued','running')"
+    ).rowcount
+    conn.commit()
+    conn.close()
+    return count
+
+
+def purge_email_data(db_path: Path = DEFAULT_DB) -> tuple[int, int]:
+    """Delete all job_contacts rows and email-sourced pending jobs.
+    Returns (contacts_deleted, jobs_deleted).
+    """
+    conn = sqlite3.connect(db_path)
+    c1 = conn.execute("DELETE FROM job_contacts").rowcount
+    c2 = conn.execute("DELETE FROM jobs WHERE source='email'").rowcount
+    conn.commit()
+    conn.close()
+    return c1, c2
+
+
+def purge_jobs(db_path: Path = DEFAULT_DB, statuses: list[str] = None) -> int:
+    """Delete jobs matching given statuses. Returns number of rows deleted.
+    If statuses is None or empty, deletes ALL jobs (full reset).
+    """
+    conn = sqlite3.connect(db_path)
+    if statuses:
+        placeholders = ",".join("?" * len(statuses))
+        cur = conn.execute(f"DELETE FROM jobs WHERE status IN ({placeholders})", statuses)
+    else:
+        cur = conn.execute("DELETE FROM jobs")
+    count = cur.rowcount
+    conn.commit()
+    conn.close()
+    return count
+
+
+def purge_non_remote(db_path: Path = DEFAULT_DB) -> int:
+    """Delete non-remote jobs that are not yet in the active pipeline.
+    Preserves applied, phone_screen, interviewing, offer, hired, and synced records.
+    Returns number of rows deleted.
+    """
+    _safe = ("applied", "phone_screen", "interviewing", "offer", "hired", "synced")
+    placeholders = ",".join("?" * len(_safe))
+    conn = sqlite3.connect(db_path)
+    count = conn.execute(
+        f"DELETE FROM jobs WHERE (is_remote = 0 OR is_remote IS NULL)"
+        f" AND status NOT IN ({placeholders})",
+        _safe,
+    ).rowcount
+    conn.commit()
+    conn.close()
+    return count
+
+
+def archive_jobs(db_path: Path = DEFAULT_DB, statuses: list[str] = None) -> int:
+    """Set status='archived' for jobs matching given statuses.
+
+    Archived jobs stay in the DB (preserving dedup by URL) but are invisible
+    to Job Review and other pipeline views.
+    Returns number of rows updated.
+    """
+    if not statuses:
+        return 0
+    placeholders = ",".join("?" * len(statuses))
+    conn = sqlite3.connect(db_path)
+    count = conn.execute(
+        f"UPDATE jobs SET status = 'archived' WHERE status IN ({placeholders})",
+        statuses,
+    ).rowcount
+    conn.commit()
+    conn.close()
+    return count
+
+
+# ── Interview pipeline helpers ────────────────────────────────────────────────
+
+_STAGE_TS_COL = {
+    "phone_screen": "phone_screen_at",
+    "interviewing":  "interviewing_at",
+    "offer":         "offer_at",
+    "hired":         "hired_at",
+    "survey":        "survey_at",
+}
+
+
+def get_interview_jobs(db_path: Path = DEFAULT_DB) -> dict[str, list[dict]]:
+    """Return jobs grouped by interview/post-apply stage."""
+    stages = ["applied", "survey", "phone_screen", "interviewing", "offer", "hired", "rejected"]
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    result: dict[str, list[dict]] = {}
+    for stage in stages:
+        cursor = conn.execute(
+            "SELECT * FROM jobs WHERE status = ? ORDER BY applied_at DESC, id DESC",
+            (stage,),
+        )
+        result[stage] = [dict(row) for row in cursor.fetchall()]
+    conn.close()
+    return result
+
+
+def advance_to_stage(db_path: Path = DEFAULT_DB, job_id: int = None, stage: str = "") -> None:
+    """Move a job to the next interview stage and record a timestamp."""
+    now = datetime.now().isoformat()[:16]
+    ts_col = _STAGE_TS_COL.get(stage)
+    conn = sqlite3.connect(db_path)
+    if ts_col:
+        conn.execute(
+            f"UPDATE jobs SET status = ?, {ts_col} = ? WHERE id = ?",
+            (stage, now, job_id),
+        )
+    else:
+        conn.execute("UPDATE jobs SET status = ? WHERE id = ?", (stage, job_id))
+    conn.commit()
+    conn.close()
+
+
+def reject_at_stage(db_path: Path = DEFAULT_DB, job_id: int = None,
+                    rejection_stage: str = "") -> None:
+    """Mark a job as rejected and record at which stage it was rejected."""
+    conn = sqlite3.connect(db_path)
+    conn.execute(
+        "UPDATE jobs SET status = 'rejected', rejection_stage = ? WHERE id = ?",
+        (rejection_stage, job_id),
+    )
+    conn.commit()
+    conn.close()
+
+
+def set_interview_date(db_path: Path = DEFAULT_DB, job_id: int = None,
+                       date_str: str = "") -> None:
+    """Persist an interview date for a job."""
+    conn = sqlite3.connect(db_path)
+    conn.execute("UPDATE jobs SET interview_date = ? WHERE id = ?", (date_str, job_id))
+    conn.commit()
+    conn.close()
+
+
+# ── Contact log helpers ───────────────────────────────────────────────────────
+
+def add_contact(db_path: Path = DEFAULT_DB, job_id: int = None,
+                direction: str = "inbound", subject: str = "",
+                from_addr: str = "", to_addr: str = "",
+                body: str = "", received_at: str = "",
+                message_id: str = "",
+                stage_signal: str = "") -> int:
+    """Log an email contact. Returns the new row id."""
+    ts = received_at or datetime.now().isoformat()[:16]
+    conn = sqlite3.connect(db_path)
+    cur = conn.execute(
+        """INSERT INTO job_contacts
+           (job_id, direction, subject, from_addr, to_addr, body,
+            received_at, message_id, stage_signal)
+           VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+        (job_id, direction, subject, from_addr, to_addr, body,
+         ts, message_id, stage_signal or None),
+    )
+    conn.commit()
+    row_id = cur.lastrowid
+    conn.close()
+    return row_id
+
+
+def get_contacts(db_path: Path = DEFAULT_DB, job_id: int = None) -> list[dict]:
+    """Return all contact log entries for a job, oldest first."""
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    cursor = conn.execute(
+        "SELECT * FROM job_contacts WHERE job_id = ? ORDER BY received_at ASC",
+        (job_id,),
+    )
+    rows = [dict(row) for row in cursor.fetchall()]
+    conn.close()
+    return rows
+
+
+def get_unread_stage_signals(db_path: Path = DEFAULT_DB,
+                             job_id: int = None) -> list[dict]:
+    """Return inbound contacts with a non-neutral, non-dismissed stage signal."""
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    rows = conn.execute(
+        """SELECT * FROM job_contacts
+           WHERE job_id = ?
+             AND direction = 'inbound'
+             AND stage_signal IS NOT NULL
+             AND stage_signal != 'neutral'
+             AND (suggestion_dismissed IS NULL OR suggestion_dismissed = 0)
+           ORDER BY received_at ASC""",
+        (job_id,),
+    ).fetchall()
+    conn.close()
+    return [dict(r) for r in rows]
+
+
+def dismiss_stage_signal(db_path: Path = DEFAULT_DB,
+                         contact_id: int = None) -> None:
+    """Mark a stage signal suggestion as dismissed."""
+    conn = sqlite3.connect(db_path)
+    conn.execute(
+        "UPDATE job_contacts SET suggestion_dismissed = 1 WHERE id = ?",
+        (contact_id,),
+    )
+    conn.commit()
+    conn.close()
+
+
+def get_all_message_ids(db_path: Path = DEFAULT_DB) -> set[str]:
+    """Return all known Message-IDs across all job contacts."""
+    conn = sqlite3.connect(db_path)
+    rows = conn.execute(
+        "SELECT message_id FROM job_contacts WHERE message_id IS NOT NULL AND message_id != ''"
+    ).fetchall()
+    conn.close()
+    return {r[0] for r in rows}
+
+
+# ── Company research helpers ──────────────────────────────────────────────────
+
+def save_research(db_path: Path = DEFAULT_DB, job_id: int = None,
+                  company_brief: str = "", ceo_brief: str = "",
+                  talking_points: str = "", raw_output: str = "",
+                  tech_brief: str = "", funding_brief: str = "",
+                  competitors_brief: str = "", red_flags: str = "",
+                  accessibility_brief: str = "",
+                  scrape_used: int = 0) -> None:
+    """Insert or replace a company research record for a job."""
+    now = datetime.now().isoformat()[:16]
+    conn = sqlite3.connect(db_path)
+    conn.execute(
+        """INSERT INTO company_research
+               (job_id, generated_at, company_brief, ceo_brief, talking_points,
+                raw_output, tech_brief, funding_brief, competitors_brief, red_flags,
+                accessibility_brief, scrape_used)
+           VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+           ON CONFLICT(job_id) DO UPDATE SET
+               generated_at        = excluded.generated_at,
+               company_brief       = excluded.company_brief,
+               ceo_brief           = excluded.ceo_brief,
+               talking_points      = excluded.talking_points,
+               raw_output          = excluded.raw_output,
+               tech_brief          = excluded.tech_brief,
+               funding_brief       = excluded.funding_brief,
+               competitors_brief   = excluded.competitors_brief,
+               red_flags           = excluded.red_flags,
+               accessibility_brief = excluded.accessibility_brief,
+               scrape_used         = excluded.scrape_used""",
+        (job_id, now, company_brief, ceo_brief, talking_points, raw_output,
+         tech_brief, funding_brief, competitors_brief, red_flags,
+         accessibility_brief, scrape_used),
+    )
+    conn.commit()
+    conn.close()
+
+
+def get_research(db_path: Path = DEFAULT_DB, job_id: int = None) -> Optional[dict]:
+    """Return the company research record for a job, or None if absent."""
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    cursor = conn.execute(
+        "SELECT * FROM company_research WHERE job_id = ?", (job_id,)
+    )
+    row = cursor.fetchone()
+    conn.close()
+    return dict(row) if row else None
+
+
+# ── Survey response helpers ───────────────────────────────────────────────────
+
+def insert_survey_response(
+    db_path: Path = DEFAULT_DB,
+    job_id: int = None,
+    survey_name: str = "",
+    received_at: str = "",
+    source: str = "text_paste",
+    raw_input: str = "",
+    image_path: str = "",
+    mode: str = "quick",
+    llm_output: str = "",
+    reported_score: str = "",
+) -> int:
+    """Insert a survey response row. Returns the new row id."""
+    conn = sqlite3.connect(db_path)
+    cur = conn.execute(
+        """INSERT INTO survey_responses
+           (job_id, survey_name, received_at, source, raw_input,
+            image_path, mode, llm_output, reported_score)
+           VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+        (job_id, survey_name or None, received_at or None,
+         source, raw_input or None, image_path or None,
+         mode, llm_output, reported_score or None),
+    )
+    conn.commit()
+    row_id = cur.lastrowid
+    conn.close()
+    return row_id
+
+
+def get_survey_responses(db_path: Path = DEFAULT_DB, job_id: int = None) -> list[dict]:
+    """Return all survey responses for a job, newest first."""
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    rows = conn.execute(
+        "SELECT * FROM survey_responses WHERE job_id = ? ORDER BY created_at DESC",
+        (job_id,),
+    ).fetchall()
+    conn.close()
+    return [dict(r) for r in rows]
+
+
+# ── Background task helpers ───────────────────────────────────────────────────
+
+def insert_task(db_path: Path = DEFAULT_DB, task_type: str = "",
+                job_id: int = None) -> tuple[int, bool]:
+    """Insert a new background task.
+
+    Returns (task_id, True) if inserted, or (existing_id, False) if a
+    queued/running task for the same (task_type, job_id) already exists.
+    """
+    conn = sqlite3.connect(db_path)
+    existing = conn.execute(
+        "SELECT id FROM background_tasks WHERE task_type=? AND job_id=? AND status IN ('queued','running')",
+        (task_type, job_id),
+    ).fetchone()
+    if existing:
+        conn.close()
+        return existing[0], False
+    cur = conn.execute(
+        "INSERT INTO background_tasks (task_type, job_id, status) VALUES (?, ?, 'queued')",
+        (task_type, job_id),
+    )
+    task_id = cur.lastrowid
+    conn.commit()
+    conn.close()
+    return task_id, True
+
+
+def update_task_status(db_path: Path = DEFAULT_DB, task_id: int = None,
+                       status: str = "", error: Optional[str] = None) -> None:
+    """Update a task's status and set the appropriate timestamp."""
+    now = datetime.now().isoformat()[:16]
+    conn = sqlite3.connect(db_path)
+    if status == "running":
+        conn.execute(
+            "UPDATE background_tasks SET status=?, started_at=?, updated_at=? WHERE id=?",
+            (status, now, now, task_id),
+        )
+    elif status in ("completed", "failed"):
+        conn.execute(
+            "UPDATE background_tasks SET status=?, finished_at=?, updated_at=?, error=? WHERE id=?",
+            (status, now, now, error, task_id),
+        )
+    else:
+        conn.execute(
+            "UPDATE background_tasks SET status=?, updated_at=? WHERE id=?",
+            (status, now, task_id),
+        )
+    conn.commit()
+    conn.close()
+
+
+def update_task_stage(db_path: Path = DEFAULT_DB, task_id: int = None,
+                      stage: str = "") -> None:
+    """Update the stage label on a running task (for progress display)."""
+    conn = sqlite3.connect(db_path)
+    conn.execute("UPDATE background_tasks SET stage=? WHERE id=?", (stage, task_id))
+    conn.commit()
+    conn.close()
+
+
+def get_active_tasks(db_path: Path = DEFAULT_DB) -> list[dict]:
+    """Return all queued/running tasks with job title and company joined in."""
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    rows = conn.execute("""
+        SELECT bt.*, j.title, j.company
+        FROM background_tasks bt
+        LEFT JOIN jobs j ON j.id = bt.job_id
+        WHERE bt.status IN ('queued', 'running')
+        ORDER BY bt.created_at ASC
+    """).fetchall()
+    conn.close()
+    return [dict(r) for r in rows]
+
+
+def get_task_for_job(db_path: Path = DEFAULT_DB, task_type: str = "",
+                     job_id: int = None) -> Optional[dict]:
+    """Return the most recent task row for a (task_type, job_id) pair, or None."""
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    row = conn.execute(
+        """SELECT * FROM background_tasks
+           WHERE task_type=? AND job_id=?
+           ORDER BY id DESC LIMIT 1""",
+        (task_type, job_id),
+    ).fetchone()
+    conn.close()
+    return dict(row) if row else None
diff --git a/scripts/discover.py b/scripts/discover.py
new file mode 100644
index 0000000..bd7530a
--- /dev/null
+++ b/scripts/discover.py
@@ -0,0 +1,285 @@
+# scripts/discover.py
+"""
+JobSpy → SQLite staging pipeline (default) or Notion (notion_push=True).
+
+Usage:
+    conda run -n job-seeker python scripts/discover.py
+"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+import yaml
+from datetime import datetime
+
+import pandas as pd
+from jobspy import scrape_jobs
+from notion_client import Client
+
+from scripts.db import DEFAULT_DB, init_db, insert_job, get_existing_urls as db_existing_urls
+from scripts.custom_boards import adzuna as _adzuna
+from scripts.custom_boards import theladders as _theladders
+from scripts.custom_boards import craigslist as _craigslist
+
+CONFIG_DIR = Path(__file__).parent.parent / "config"
+NOTION_CFG = CONFIG_DIR / "notion.yaml"
+PROFILES_CFG = CONFIG_DIR / "search_profiles.yaml"
+BLOCKLIST_CFG = CONFIG_DIR / "blocklist.yaml"
+
+# Registry of custom board scrapers keyed by name used in search_profiles.yaml
+CUSTOM_SCRAPERS: dict[str, object] = {
+    "adzuna": _adzuna.scrape,
+    "theladders": _theladders.scrape,
+    "craigslist": _craigslist.scrape,
+}
+
+
+def load_config() -> tuple[dict, dict]:
+    profiles = yaml.safe_load(PROFILES_CFG.read_text())
+    notion_cfg = yaml.safe_load(NOTION_CFG.read_text())
+    return profiles, notion_cfg
+
+
+def load_blocklist() -> dict:
+    """Load global blocklist config. Returns dict with companies, industries, locations lists."""
+    if not BLOCKLIST_CFG.exists():
+        return {"companies": [], "industries": [], "locations": []}
+    raw = yaml.safe_load(BLOCKLIST_CFG.read_text()) or {}
+    return {
+        "companies":  [c.lower() for c in raw.get("companies", []) if c],
+        "industries": [i.lower() for i in raw.get("industries", []) if i],
+        "locations":  [loc.lower() for loc in raw.get("locations", []) if loc],
+    }
+
+
+def _is_blocklisted(job_row: dict, blocklist: dict) -> bool:
+    """Return True if this job matches any global blocklist rule."""
+    company_lower = (job_row.get("company") or "").lower()
+    location_lower = (job_row.get("location") or "").lower()
+    desc_lower = (job_row.get("description") or "").lower()
+    content_lower = f"{company_lower} {desc_lower}"
+
+    if any(bl in company_lower for bl in blocklist["companies"]):
+        return True
+    if any(bl in content_lower for bl in blocklist["industries"]):
+        return True
+    if any(bl in location_lower for bl in blocklist["locations"]):
+        return True
+    return False
+
+
+def get_existing_urls(notion: Client, db_id: str, url_field: str) -> set[str]:
+    """Return the set of all job URLs already tracked in Notion (for notion_push mode)."""
+    existing: set[str] = set()
+    has_more = True
+    start_cursor = None
+    while has_more:
+        kwargs: dict = {"database_id": db_id, "page_size": 100}
+        if start_cursor:
+            kwargs["start_cursor"] = start_cursor
+        resp = notion.databases.query(**kwargs)
+        for page in resp["results"]:
+            url = page["properties"].get(url_field, {}).get("url")
+            if url:
+                existing.add(url)
+        has_more = resp.get("has_more", False)
+        start_cursor = resp.get("next_cursor")
+    return existing
+
+
+def push_to_notion(notion: Client, db_id: str, job: dict, fm: dict) -> None:
+    """Create a new page in the Notion jobs database for a single listing."""
+    min_amt = job.get("min_amount")
+    max_amt = job.get("max_amount")
+    if min_amt and max_amt and not (pd.isna(min_amt) or pd.isna(max_amt)):
+        title_content = f"${int(min_amt):,} – ${int(max_amt):,}"
+    elif job.get("salary_source") and str(job["salary_source"]) not in ("nan", "None", ""):
+        title_content = str(job["salary_source"])
+    else:
+        title_content = str(job.get("title", "Unknown"))
+
+    job_url = str(job.get("job_url", "") or "")
+    if job_url in ("nan", "None"):
+        job_url = ""
+
+    notion.pages.create(
+        parent={"database_id": db_id},
+        properties={
+            fm["title_field"]: {"title": [{"text": {"content": title_content}}]},
+            fm["job_title"]:   {"rich_text": [{"text": {"content": str(job.get("title", "Unknown"))}}]},
+            fm["company"]:     {"rich_text": [{"text": {"content": str(job.get("company", "") or "")}}]},
+            fm["url"]:         {"url": job_url or None},
+            fm["source"]:      {"multi_select": [{"name": str(job.get("site", "unknown")).title()}]},
+            fm["status"]:      {"select": {"name": fm["status_new"]}},
+            fm["remote"]:      {"checkbox": bool(job.get("is_remote", False))},
+            fm["date_found"]:  {"date": {"start": datetime.now().isoformat()[:10]}},
+        },
+    )
+
+
+def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None:
+    profiles_cfg, notion_cfg = load_config()
+    fm = notion_cfg["field_map"]
+    blocklist = load_blocklist()
+
+    _bl_summary = {k: len(v) for k, v in blocklist.items() if v}
+    if _bl_summary:
+        print(f"[discover] Blocklist active: {_bl_summary}")
+
+    # SQLite dedup — by URL and by (title, company) to catch cross-board reposts
+    init_db(db_path)
+    existing_urls = db_existing_urls(db_path)
+
+    import sqlite3 as _sqlite3
+    _conn = _sqlite3.connect(db_path)
+    existing_tc = {
+        (r[0].lower().strip()[:80], r[1].lower().strip())
+        for r in _conn.execute("SELECT title, company FROM jobs").fetchall()
+    }
+    _conn.close()
+
+    # Notion dedup (only in notion_push mode)
+    notion = None
+    if notion_push:
+        notion = Client(auth=notion_cfg["token"])
+        existing_urls |= get_existing_urls(notion, notion_cfg["database_id"], fm["url"])
+
+    print(f"[discover] {len(existing_urls)} existing listings in DB")
+    new_count = 0
+
+    def _s(val, default="") -> str:
+        """Convert a value to str, treating pandas NaN/None as default."""
+        if val is None:
+            return default
+        s = str(val)
+        return default if s in ("nan", "None", "NaN") else s
+
+    def _insert_if_new(job_row: dict, source_label: str) -> bool:
+        """Dedup-check, blocklist-check, and insert a job dict. Returns True if inserted."""
+        url = job_row.get("url", "")
+        if not url or url in existing_urls:
+            return False
+
+        # Global blocklist — checked before anything else
+        if _is_blocklisted(job_row, blocklist):
+            return False
+
+        title_lower = job_row.get("title", "").lower()
+        desc_lower  = job_row.get("description", "").lower()
+        exclude_kw  = job_row.get("_exclude_kw", [])
+        if any(kw in title_lower or kw in desc_lower for kw in exclude_kw):
+            return False
+
+        tc_key = (title_lower[:80], job_row.get("company", "").lower().strip())
+        if tc_key in existing_tc:
+            return False
+        existing_tc.add(tc_key)
+
+        insert_job(db_path, {
+            "title":       job_row.get("title", ""),
+            "company":     job_row.get("company", ""),
+            "url":         url,
+            "source":      job_row.get("source", source_label),
+            "location":    job_row.get("location", ""),
+            "is_remote":   bool(job_row.get("is_remote", False)),
+            "salary":      job_row.get("salary", ""),
+            "description": job_row.get("description", ""),
+            "date_found":  datetime.now().isoformat()[:10],
+        })
+        existing_urls.add(url)
+        return True
+
+    for profile in profiles_cfg["profiles"]:
+        print(f"\n[discover] ── Profile: {profile['name']} ──")
+        boards = profile.get("boards", [])
+        custom_boards = profile.get("custom_boards", [])
+        exclude_kw = [kw.lower() for kw in profile.get("exclude_keywords", [])]
+        results_per_board = profile.get("results_per_board", 25)
+
+        for location in profile["locations"]:
+
+            # ── JobSpy boards ──────────────────────────────────────────────────
+            if boards:
+                print(f"  [jobspy] {location} — boards: {', '.join(boards)}")
+                try:
+                    jobs: pd.DataFrame = scrape_jobs(
+                        site_name=boards,
+                        search_term=" OR ".join(f'"{t}"' for t in profile["titles"]),
+                        location=location,
+                        results_wanted=results_per_board,
+                        hours_old=profile.get("hours_old", 72),
+                        linkedin_fetch_description=True,
+                    )
+                    print(f"  [jobspy] {len(jobs)} raw results")
+                except Exception as exc:
+                    print(f"  [jobspy] ERROR: {exc}")
+                    jobs = pd.DataFrame()
+
+                jobspy_new = 0
+                for _, job in jobs.iterrows():
+                    url = str(job.get("job_url", "") or "")
+                    if not url or url in ("nan", "None"):
+                        continue
+
+                    job_dict = job.to_dict()
+
+                    # Build salary string from JobSpy numeric fields
+                    min_amt = job_dict.get("min_amount")
+                    max_amt = job_dict.get("max_amount")
+                    salary_str = ""
+                    if min_amt and max_amt and not (pd.isna(min_amt) or pd.isna(max_amt)):
+                        salary_str = f"${int(min_amt):,} – ${int(max_amt):,}"
+                    elif job_dict.get("salary_source") and str(job_dict["salary_source"]) not in ("nan", "None", ""):
+                        salary_str = str(job_dict["salary_source"])
+
+                    row = {
+                        "url":         url,
+                        "title":       _s(job_dict.get("title")),
+                        "company":     _s(job_dict.get("company")),
+                        "source":      _s(job_dict.get("site")),
+                        "location":    _s(job_dict.get("location")),
+                        "is_remote":   bool(job_dict.get("is_remote", False)),
+                        "salary":      salary_str,
+                        "description": _s(job_dict.get("description")),
+                        "_exclude_kw": exclude_kw,
+                    }
+                    if _insert_if_new(row, _s(job_dict.get("site"))):
+                        if notion_push:
+                            push_to_notion(notion, notion_cfg["database_id"], job_dict, fm)
+                        new_count += 1
+                        jobspy_new += 1
+                        print(f"    + {row['title']} @ {row['company']} [{row['source']}]")
+
+                print(f"  [jobspy] {jobspy_new} new listings from {location}")
+
+            # ── Custom boards ──────────────────────────────────────────────────
+            for board_name in custom_boards:
+                scraper_fn = CUSTOM_SCRAPERS.get(board_name)
+                if scraper_fn is None:
+                    print(f"  [{board_name}] Unknown scraper — skipping (not in CUSTOM_SCRAPERS registry)")
+                    continue
+
+                print(f"  [{board_name}] {location} — fetching up to {results_per_board} results …")
+                try:
+                    custom_jobs = scraper_fn(profile, location, results_wanted=results_per_board)
+                except Exception as exc:
+                    print(f"  [{board_name}] ERROR: {exc}")
+                    custom_jobs = []
+
+                print(f"  [{board_name}] {len(custom_jobs)} raw results")
+                board_new = 0
+                for job in custom_jobs:
+                    row = {**job, "_exclude_kw": exclude_kw}
+                    if _insert_if_new(row, board_name):
+                        new_count += 1
+                        board_new += 1
+                        print(f"    + {job.get('title')} @ {job.get('company')} [{board_name}]")
+
+                print(f"  [{board_name}] {board_new} new listings from {location}")
+
+    print(f"\n[discover] Done — {new_count} new listings staged total.")
+    return new_count
+
+
+if __name__ == "__main__":
+    run_discovery()
diff --git a/scripts/enrich_descriptions.py b/scripts/enrich_descriptions.py
new file mode 100644
index 0000000..dce1cae
--- /dev/null
+++ b/scripts/enrich_descriptions.py
@@ -0,0 +1,284 @@
+# scripts/enrich_descriptions.py
+"""
+Post-discovery enrichment: retry Glassdoor job description fetches that
+returned empty/null during the initial scrape (usually rate-limit 429s or
+expired listings mid-batch).
+
+Fetches descriptions one at a time with a configurable delay between
+requests to stay under Glassdoor's rate limit.
+
+Usage:
+    conda run -n job-seeker python scripts/enrich_descriptions.py
+    conda run -n job-seeker python scripts/enrich_descriptions.py --dry-run
+    conda run -n job-seeker python scripts/enrich_descriptions.py --delay 2.0
+"""
+import re
+import sqlite3
+import sys
+import time
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from scripts.db import DEFAULT_DB, init_db
+
+DELAY_SECS = 1.5  # seconds between description fetches
+
+
+def _extract_job_id(url: str) -> str | None:
+    """Pull the Glassdoor listing ID from a job URL (…?jl=1234567890)."""
+    m = re.search(r"jl=(\d+)", url or "")
+    return m.group(1) if m else None
+
+
+def _setup_scraper():
+    """
+    Create a Glassdoor scraper instance initialised just enough to call
+    _fetch_job_description() — skips the full job-search setup.
+    """
+    from jobspy.glassdoor import Glassdoor
+    from jobspy.glassdoor.constant import fallback_token, headers
+    from jobspy.model import ScraperInput, Site
+    from jobspy.util import create_session
+
+    scraper = Glassdoor()
+    scraper.base_url = "https://www.glassdoor.com/"
+    scraper.session = create_session(has_retry=True)
+    token = scraper._get_csrf_token()
+    headers["gd-csrf-token"] = token if token else fallback_token
+    scraper.scraper_input = ScraperInput(site_type=[Site.GLASSDOOR])
+    return scraper
+
+
+def enrich_glassdoor_descriptions(
+    db_path: Path = DEFAULT_DB,
+    dry_run: bool = False,
+    delay: float = DELAY_SECS,
+) -> dict:
+    """
+    Find Glassdoor jobs with missing descriptions and re-fetch them.
+
+    Returns:
+        {"attempted": N, "succeeded": N, "failed": N, "errors": [...]}
+    """
+    init_db(db_path)
+
+    conn = sqlite3.connect(db_path)
+    rows = conn.execute(
+        """SELECT id, url, company, title FROM jobs
+           WHERE source = 'glassdoor'
+             AND (description IS NULL OR TRIM(description) = '')
+           ORDER BY id ASC"""
+    ).fetchall()
+    conn.close()
+
+    result = {"attempted": len(rows), "succeeded": 0, "failed": 0, "errors": []}
+
+    if not rows:
+        print("[enrich] No Glassdoor jobs missing descriptions.")
+        return result
+
+    print(f"[enrich] {len(rows)} Glassdoor job(s) missing descriptions — fetching…")
+
+    try:
+        scraper = _setup_scraper()
+    except Exception as e:
+        msg = f"Glassdoor scraper init failed: {e}"
+        result["errors"].append(msg)
+        result["failed"] = len(rows)
+        print(f"[enrich] ERROR — {msg}")
+        return result
+
+    for db_id, url, company, title in rows:
+        job_id = _extract_job_id(url)
+        if not job_id:
+            msg = f"job #{db_id}: cannot extract listing ID from URL: {url}"
+            result["errors"].append(msg)
+            result["failed"] += 1
+            print(f"[enrich] SKIP — {msg}")
+            continue
+
+        try:
+            description = scraper._fetch_job_description(int(job_id))
+            if description and description.strip():
+                if not dry_run:
+                    upd = sqlite3.connect(db_path)
+                    upd.execute(
+                        "UPDATE jobs SET description = ? WHERE id = ?",
+                        (description, db_id),
+                    )
+                    upd.commit()
+                    upd.close()
+                tag = "[DRY-RUN] " if dry_run else ""
+                print(f"[enrich] {tag}{company} — {title}: {len(description)} chars")
+                result["succeeded"] += 1
+            else:
+                print(f"[enrich] {company} — {title}: empty response (expired listing?)")
+                result["failed"] += 1
+        except Exception as e:
+            msg = f"job #{db_id} ({company}): {e}"
+            result["errors"].append(msg)
+            result["failed"] += 1
+            print(f"[enrich] ERROR — {msg}")
+
+        if delay > 0:
+            time.sleep(delay)
+
+    return result
+
+
+def enrich_all_descriptions(
+    db_path: Path = DEFAULT_DB,
+    dry_run: bool = False,
+    delay: float = DELAY_SECS,
+) -> dict:
+    """
+    Find ALL jobs with missing/empty descriptions (any source) and re-fetch them.
+
+    Uses scrape_job_url for every source — it handles LinkedIn, Indeed, Glassdoor,
+    Adzuna, The Ladders, and any generic URL via JSON-LD / og: tags.
+
+    Returns:
+        {"attempted": N, "succeeded": N, "failed": N, "errors": [...]}
+    """
+    from scripts.scrape_url import scrape_job_url
+
+    init_db(db_path)
+
+    conn = sqlite3.connect(db_path)
+    rows = conn.execute(
+        """SELECT id, url, company, title, source FROM jobs
+           WHERE (description IS NULL OR TRIM(description) = '')
+             AND url IS NOT NULL AND url != ''
+           ORDER BY source, id ASC"""
+    ).fetchall()
+    conn.close()
+
+    result = {"attempted": len(rows), "succeeded": 0, "failed": 0, "errors": []}
+
+    if not rows:
+        print("[enrich] No jobs with missing descriptions.")
+        return result
+
+    print(f"[enrich] {len(rows)} job(s) missing descriptions — fetching…")
+
+    for db_id, url, company, title, source in rows:
+        if not url.startswith("http"):
+            result["failed"] += 1
+            continue
+
+        tag = "[DRY-RUN] " if dry_run else ""
+        try:
+            fields = {} if dry_run else scrape_job_url(db_path, db_id)
+            if fields or dry_run:
+                desc_len = len(fields.get("description", "") or "")
+                print(f"[enrich] {tag}[{source}] {company} — {title}: {desc_len} chars")
+                result["succeeded"] += 1
+            else:
+                print(f"[enrich] [{source}] {company} — {title}: no data returned")
+                result["failed"] += 1
+        except Exception as e:
+            msg = f"job #{db_id} ({company}): {e}"
+            result["errors"].append(msg)
+            result["failed"] += 1
+            print(f"[enrich] ERROR — {msg}")
+
+        if delay > 0:
+            time.sleep(delay)
+
+    return result
+
+
+def enrich_craigslist_fields(
+    db_path: Path = DEFAULT_DB,
+    job_id: int = None,
+) -> dict:
+    """
+    Use LLM to extract company name and salary from a Craigslist job description.
+
+    Called after scrape_url populates the description for a craigslist job.
+    Only runs when: source='craigslist', company='', description non-empty.
+
+    Returns dict with keys 'company' and/or 'salary' (may be empty strings).
+    """
+    import json
+
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    row = conn.execute(
+        "SELECT id, description, company, source FROM jobs WHERE id=?", (job_id,)
+    ).fetchone()
+    conn.close()
+
+    if not row:
+        return {}
+    if row["source"] != "craigslist":
+        return {}
+    if row["company"]:  # already populated
+        return {}
+    if not (row["description"] or "").strip():
+        return {}
+
+    from scripts.llm_router import LLMRouter
+
+    prompt = (
+        "Extract the following from this job posting. "
+        "Return JSON only, no commentary.\n\n"
+        '{"company": "<company name or empty string>", '
+        '"salary": "<salary/compensation or empty string>"}\n\n'
+        f"Posting:\n{row['description'][:3000]}"
+    )
+
+    try:
+        router = LLMRouter()
+        raw = router.complete(prompt)
+    except Exception as exc:
+        print(f"[enrich_craigslist] LLM error for job {job_id}: {exc}")
+        return {}
+
+    try:
+        clean = re.sub(r"```(?:json)?|```", "", raw).strip()
+        fields = json.loads(clean)
+    except (json.JSONDecodeError, ValueError):
+        print(f"[enrich_craigslist] Could not parse LLM response for job {job_id}: {raw!r}")
+        return {}
+
+    extracted = {
+        k: (fields.get(k) or "").strip()
+        for k in ("company", "salary")
+        if (fields.get(k) or "").strip()
+    }
+
+    if extracted:
+        from scripts.db import update_job_fields
+        update_job_fields(db_path, job_id, extracted)
+        print(f"[enrich_craigslist] job {job_id}: "
+              f"company={extracted.get('company', '—')} "
+              f"salary={extracted.get('salary', '—')}")
+
+    return extracted
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Re-fetch missing job descriptions (all sources)"
+    )
+    parser.add_argument("--glassdoor-only", action="store_true",
+                        help="Only re-fetch Glassdoor listings (legacy behaviour)")
+    parser.add_argument("--dry-run", action="store_true",
+                        help="Show what would be fetched without saving")
+    parser.add_argument("--delay", type=float, default=DELAY_SECS,
+                        help=f"Seconds between requests (default: {DELAY_SECS})")
+    args = parser.parse_args()
+
+    if args.glassdoor_only:
+        r = enrich_glassdoor_descriptions(dry_run=args.dry_run, delay=args.delay)
+    else:
+        r = enrich_all_descriptions(dry_run=args.dry_run, delay=args.delay)
+
+    print(
+        f"\n[enrich] Done — {r['succeeded']} fetched, {r['failed']} failed"
+        + (f", {len(r['errors'])} error(s)" if r["errors"] else "")
+    )
diff --git a/scripts/finetune_local.py b/scripts/finetune_local.py
new file mode 100644
index 0000000..6dfa406
--- /dev/null
+++ b/scripts/finetune_local.py
@@ -0,0 +1,248 @@
+#!/usr/bin/env python3
+# scripts/finetune_local.py
+"""
+Local LoRA fine-tune on Alex's cover letter corpus.
+No HuggingFace account or internet required after the base model is cached.
+
+Usage:
+    conda run -n ogma python scripts/finetune_local.py
+    conda run -n ogma python scripts/finetune_local.py --model unsloth/Llama-3.2-3B-Instruct
+    conda run -n ogma python scripts/finetune_local.py --epochs 15 --rank 16
+
+After training, follow the printed instructions to load the model into Ollama.
+"""
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+
+# Limit CUDA to GPU 0. device_map={"":0} in FastLanguageModel.from_pretrained
+# pins every layer to GPU 0, avoiding the accelerate None-device bug that
+# occurs with device_map="auto" on multi-GPU machines with 4-bit quantisation.
+# Do NOT set WORLD_SIZE/RANK — that triggers torch.distributed initialisation.
+os.environ.setdefault("CUDA_VISIBLE_DEVICES", "0")
+
+# ── Config ────────────────────────────────────────────────────────────────────
+DEFAULT_MODEL   = "unsloth/Llama-3.2-3B-Instruct"   # safe on 8 GB VRAM
+LETTERS_JSONL   = Path("/Library/Documents/JobSearch/training_data/cover_letters.jsonl")
+OUTPUT_DIR      = Path("/Library/Documents/JobSearch/training_data/finetune_output")
+GGUF_DIR        = Path("/Library/Documents/JobSearch/training_data/gguf")
+OLLAMA_NAME     = "alex-cover-writer"
+
+SYSTEM_PROMPT = (
+    "You are Alex Rivera's personal cover letter writer. "
+    "Write professional, warm, and results-focused cover letters in Alex's voice. "
+    "Draw on her background in customer success, technical account management, "
+    "and revenue operations. Be specific and avoid generic filler."
+)
+
+# ── Args ──────────────────────────────────────────────────────────────────────
+parser = argparse.ArgumentParser()
+parser.add_argument("--model",  default=DEFAULT_MODEL, help="Base model (HF repo id or local path)")
+parser.add_argument("--epochs", type=int, default=10,  help="Training epochs (default: 10)")
+parser.add_argument("--rank",   type=int, default=16,  help="LoRA rank (default: 16)")
+parser.add_argument("--batch",  type=int, default=2,   help="Per-device batch size (default: 2)")
+parser.add_argument("--no-gguf", action="store_true",  help="Skip GGUF export")
+parser.add_argument("--max-length", type=int, default=1024, help="Max token length (default: 1024)")
+args = parser.parse_args()
+
+print(f"\n{'='*60}")
+print(f"  Alex Cover Letter Fine-Tuner")
+print(f"  Base model : {args.model}")
+print(f"  Epochs     : {args.epochs}")
+print(f"  LoRA rank  : {args.rank}")
+print(f"  Dataset    : {LETTERS_JSONL}")
+print(f"{'='*60}\n")
+
+# ── Load dataset ──────────────────────────────────────────────────────────────
+if not LETTERS_JSONL.exists():
+    sys.exit(f"ERROR: Dataset not found at {LETTERS_JSONL}\n"
+             "Run: conda run -n job-seeker python scripts/prepare_training_data.py")
+
+records = [json.loads(l) for l in LETTERS_JSONL.read_text().splitlines() if l.strip()]
+print(f"Loaded {len(records)} training examples.")
+
+# Convert to chat format expected by SFTTrainer
+def to_messages(rec: dict) -> dict:
+    return {"messages": [
+        {"role": "system",    "content": SYSTEM_PROMPT},
+        {"role": "user",      "content": rec["instruction"]},
+        {"role": "assistant", "content": rec["output"]},
+    ]}
+
+chat_data = [to_messages(r) for r in records]
+
+# ── Load model with unsloth ────────────────────────────────────────────────────
+try:
+    from unsloth import FastLanguageModel
+    USE_UNSLOTH = True
+except ImportError:
+    USE_UNSLOTH = False
+    print("WARNING: unsloth not found — falling back to standard transformers + PEFT")
+    print("  Install: pip install 'unsloth[cu121-torch230] @ git+https://github.com/unslothai/unsloth.git'")
+
+import torch
+
+if USE_UNSLOTH:
+    model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name   = args.model,
+        max_seq_length = args.max_length,
+        load_in_4bit = True,          # QLoRA — fits 7-9B in 8 GB VRAM
+        dtype        = None,          # auto-detect
+        device_map   = {"": 0},       # pin everything to GPU 0; avoids accelerate None-device bug
+    )
+    model = FastLanguageModel.get_peft_model(
+        model,
+        r              = args.rank,
+        lora_alpha     = args.rank * 2,
+        lora_dropout   = 0,      # 0 = full unsloth kernel patching (faster)
+        target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
+                          "gate_proj", "up_proj", "down_proj"],
+        bias           = "none",
+        use_gradient_checkpointing = "unsloth",
+    )
+else:
+    from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+    from peft import LoraConfig, get_peft_model, TaskType
+
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_compute_dtype=torch.bfloat16,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(args.model)
+    model = AutoModelForCausalLM.from_pretrained(
+        args.model,
+        quantization_config=bnb_config,
+        device_map="auto",
+    )
+    lora_config = LoraConfig(
+        r=args.rank,
+        lora_alpha=args.rank * 2,
+        lora_dropout=0.05,
+        task_type=TaskType.CAUSAL_LM,
+    )
+    model = get_peft_model(model, lora_config)
+    model.print_trainable_parameters()
+
+# ── Build HF Dataset ──────────────────────────────────────────────────────────
+from datasets import Dataset
+
+raw = Dataset.from_list(chat_data)
+split = raw.train_test_split(test_size=0.1, seed=42)
+train_ds = split["train"]
+eval_ds  = split["test"]
+print(f"Train: {len(train_ds)}  Eval: {len(eval_ds)}")
+
+# formatting_func must ALWAYS return a list of strings.
+# Unsloth tests it with a single example dict; during training it gets batches.
+# Gemma 2 has no "system" role — fold it into the first user turn.
+def _apply_template(msgs):
+    msgs = list(msgs)
+    if msgs and msgs[0]["role"] == "system":
+        sys_text = msgs.pop(0)["content"]
+        if msgs and msgs[0]["role"] == "user":
+            msgs[0] = {"role": "user", "content": f"{sys_text}\n\n{msgs[0]['content']}"}
+    return tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=False)
+
+def formatting_func(example):
+    msgs_field = example["messages"]
+    # Single example: messages is a list of role dicts {"role":..., "content":...}
+    # Batched example: messages is a list of those lists
+    if msgs_field and isinstance(msgs_field[0], dict):
+        return [_apply_template(msgs_field)]
+    return [_apply_template(m) for m in msgs_field]
+
+# ── Train ─────────────────────────────────────────────────────────────────────
+from trl import SFTTrainer, SFTConfig
+
+OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+
+trainer = SFTTrainer(
+    model=model,
+    tokenizer=tokenizer,
+    train_dataset=train_ds,
+    eval_dataset=eval_ds,
+    formatting_func=formatting_func,
+    args=SFTConfig(
+        output_dir                  = str(OUTPUT_DIR),
+        num_train_epochs            = args.epochs,
+        per_device_train_batch_size = args.batch,
+        gradient_accumulation_steps = max(1, 8 // args.batch),
+        learning_rate               = 2e-4,
+        warmup_ratio                = 0.1,
+        lr_scheduler_type           = "cosine",
+        fp16                        = not torch.cuda.is_bf16_supported(),
+        bf16                        = torch.cuda.is_bf16_supported(),
+        logging_steps               = 5,
+        eval_strategy               = "epoch",
+        save_strategy               = "epoch",
+        load_best_model_at_end      = True,
+        max_length                  = args.max_length,
+        report_to                   = "none",
+        push_to_hub                 = False,        # local only
+    ),
+)
+
+print("\nStarting training…")
+trainer.train()
+print("Training complete.")
+
+# ── Save adapter ──────────────────────────────────────────────────────────────
+adapter_path = OUTPUT_DIR / "adapter"
+model.save_pretrained(str(adapter_path))
+tokenizer.save_pretrained(str(adapter_path))
+print(f"\nLoRA adapter saved to: {adapter_path}")
+
+# ── GGUF export ───────────────────────────────────────────────────────────────
+if not args.no_gguf and USE_UNSLOTH:
+    GGUF_DIR.mkdir(parents=True, exist_ok=True)
+    gguf_path = GGUF_DIR / f"{OLLAMA_NAME}.gguf"
+    print(f"\nExporting GGUF → {gguf_path} …")
+    model.save_pretrained_gguf(
+        str(GGUF_DIR / OLLAMA_NAME),
+        tokenizer,
+        quantization_method="q4_k_m",
+    )
+    # unsloth names the file automatically — find it
+    gguf_files = list(GGUF_DIR.glob("*.gguf"))
+    if gguf_files:
+        gguf_path = gguf_files[0]
+        print(f"GGUF written: {gguf_path}")
+    else:
+        print("GGUF export may have succeeded — check GGUF_DIR above.")
+else:
+    gguf_path = None
+
+# ── Print next steps ──────────────────────────────────────────────────────────
+print(f"\n{'='*60}")
+print("  DONE — next steps to load into Ollama:")
+print(f"{'='*60}")
+
+if gguf_path and gguf_path.exists():
+    modelfile = OUTPUT_DIR / "Modelfile"
+    modelfile.write_text(f"""FROM {gguf_path}
+SYSTEM \"\"\"
+{SYSTEM_PROMPT}
+\"\"\"
+PARAMETER temperature 0.7
+PARAMETER top_p 0.9
+PARAMETER num_ctx 32768
+""")
+    print(f"\n1. Modelfile written to: {modelfile}")
+    print(f"\n2. Create the Ollama model:")
+    print(f"     ollama create {OLLAMA_NAME} -f {modelfile}")
+    print(f"\n3. Test it:")
+    print(f"     ollama run {OLLAMA_NAME} 'Write a cover letter for a Senior Customer Success Manager position at Acme Corp.'")
+    print(f"\n4. Update llm.yaml to use '{OLLAMA_NAME}:latest' as the ollama model,")
+    print(f"   then pick it in Settings → LLM Backends → Ollama → Model.")
+else:
+    print(f"\n  Adapter only (no GGUF). To convert manually:")
+    print(f"  1. Merge adapter:")
+    print(f"       conda run -n ogma python -c \"")
+    print(f"         from peft import AutoPeftModelForCausalLM")
+    print(f"         m = AutoPeftModelForCausalLM.from_pretrained('{adapter_path}')")
+    print(f"         m.merge_and_unload().save_pretrained('{OUTPUT_DIR}/merged')\"")
+    print(f"  2. Convert to GGUF using textgen env's convert_hf_to_gguf.py")
+    print(f"  3. ollama create {OLLAMA_NAME} -f Modelfile")
+print()
diff --git a/scripts/generate_cover_letter.py b/scripts/generate_cover_letter.py
new file mode 100644
index 0000000..071dd41
--- /dev/null
+++ b/scripts/generate_cover_letter.py
@@ -0,0 +1,224 @@
+# scripts/generate_cover_letter.py
+"""
+Generate a cover letter in Alex's voice using few-shot examples from her corpus.
+
+Usage:
+    conda run -n job-seeker python scripts/generate_cover_letter.py \
+        --title "Director of Customer Success" \
+        --company "Acme Corp" \
+        --description "We are looking for..."
+
+    Or pass a staging DB job ID:
+        conda run -n job-seeker python scripts/generate_cover_letter.py --job-id 42
+"""
+import argparse
+import re
+import sys
+from pathlib import Path
+
+LETTERS_DIR = Path("/Library/Documents/JobSearch")
+LETTER_GLOB = "*Cover Letter*.md"
+
+# Background injected into every prompt so the model has Alex's facts
+SYSTEM_CONTEXT = """You are writing cover letters for Alex Rivera, a customer success leader.
+
+Background:
+- 6+ years in customer success, technical account management, and CS leadership
+- Most recent role: led Americas Customer Success at UpGuard (cybersecurity SaaS), managing enterprise + Fortune 500 accounts, drove NPS consistently above 95
+- Also founder of M3 Consulting, a CS advisory practice for SaaS startups
+- Attended Texas State (2 yrs), CSU East Bay (1 yr); completed degree elsewhere
+- Based in San Francisco Bay Area; open to remote/hybrid
+- Pronouns: any
+
+Voice guidelines:
+- Warm, confident, and specific — never generic
+- Opens with "I'm delighted/thrilled to apply for [role] at [company]."
+- 3–4 focused paragraphs, ~250–350 words total
+- Para 2: concrete experience (cite UpGuard and/or M3 Consulting with a specific metric)
+- Para 3: genuine connection to THIS company's mission/product
+- Closes with "Thank you for considering my application." + warm sign-off
+- Never use: "I am writing to express my interest", "passionate about making a difference",
+  "I look forward to hearing from you", or any hollow filler phrases
+"""
+
+
+# ── Mission-alignment detection ───────────────────────────────────────────────
+# When a company/JD signals one of these preferred industries, the cover letter
+# prompt injects a hint so Para 3 can reflect genuine personal connection.
+# This does NOT disclose any personal disability or family information.
+
+_MISSION_SIGNALS: dict[str, list[str]] = {
+    "music": [
+        "music", "spotify", "tidal", "soundcloud", "bandcamp", "apple music",
+        "distrokid", "cd baby", "landr", "beatport", "reverb", "vinyl",
+        "streaming", "artist", "label", "live nation", "ticketmaster", "aeg",
+        "songkick", "concert", "venue", "festival", "audio", "podcast",
+        "studio", "record", "musician", "playlist",
+    ],
+    "animal_welfare": [
+        "animal", "shelter", "rescue", "humane society", "spca", "aspca",
+        "veterinary", "vet ", "wildlife", "pet ", "adoption", "foster",
+        "dog", "cat", "feline", "canine", "sanctuary", "zoo",
+    ],
+    "education": [
+        "education", "school", "learning", "student", "edtech", "classroom",
+        "curriculum", "tutoring", "academic", "university", "kids", "children",
+        "youth", "literacy", "khan academy", "duolingo", "chegg", "coursera",
+        "instructure", "canvas lms", "clever", "district", "teacher",
+        "k-12", "k12", "grade", "pedagogy",
+    ],
+}
+
+_MISSION_NOTES: dict[str, str] = {
+    "music": (
+        "This company is in the music industry, which is one of Alex's genuinely "
+        "ideal work environments — she has a real personal passion for the music scene. "
+        "Para 3 should warmly and specifically reflect this authentic alignment, not as "
+        "a generic fan statement, but as an honest statement of where she'd love to apply "
+        "her CS skills."
+    ),
+    "animal_welfare": (
+        "This organization works in animal welfare/rescue — one of Alex's dream-job "
+        "domains and a genuine personal passion. Para 3 should reflect this authentic "
+        "connection warmly and specifically, tying her CS skills to this mission."
+    ),
+    "education": (
+        "This company works in children's education or EdTech — one of Alex's ideal "
+        "work domains, reflecting genuine personal values around learning and young people. "
+        "Para 3 should reflect this authentic connection specifically and warmly."
+    ),
+}
+
+
+def detect_mission_alignment(company: str, description: str) -> str | None:
+    """Return a mission hint string if company/JD matches a preferred industry, else None."""
+    text = f"{company} {description}".lower()
+    for industry, signals in _MISSION_SIGNALS.items():
+        if any(sig in text for sig in signals):
+            return _MISSION_NOTES[industry]
+    return None
+
+
+def load_corpus() -> list[dict]:
+    """Load all .md cover letters from LETTERS_DIR. Returns list of {path, company, text}."""
+    corpus = []
+    for path in sorted(LETTERS_DIR.glob(LETTER_GLOB)):
+        text = path.read_text(encoding="utf-8", errors="ignore").strip()
+        if not text:
+            continue
+        # Extract company from filename: "Tailscale Cover Letter.md" → "Tailscale"
+        company = re.sub(r"\s*Cover Letter.*", "", path.stem, flags=re.IGNORECASE).strip()
+        corpus.append({"path": path, "company": company, "text": text})
+    return corpus
+
+
+def find_similar_letters(job_description: str, corpus: list[dict], top_k: int = 3) -> list[dict]:
+    """Return the top_k letters most similar to the job description by TF-IDF cosine sim."""
+    from sklearn.feature_extraction.text import TfidfVectorizer
+    from sklearn.metrics.pairwise import cosine_similarity
+
+    if not corpus:
+        return []
+
+    docs = [job_description] + [c["text"] for c in corpus]
+    vectorizer = TfidfVectorizer(stop_words="english", max_features=500)
+    tfidf = vectorizer.fit_transform(docs)
+    sims = cosine_similarity(tfidf[0:1], tfidf[1:])[0]
+
+    ranked = sorted(zip(sims, corpus), key=lambda x: x[0], reverse=True)
+    return [entry for _, entry in ranked[:top_k]]
+
+
+def build_prompt(
+    title: str,
+    company: str,
+    description: str,
+    examples: list[dict],
+    mission_hint: str | None = None,
+) -> str:
+    parts = [SYSTEM_CONTEXT.strip(), ""]
+    if examples:
+        parts.append("=== STYLE EXAMPLES (Alex's past letters) ===\n")
+        for i, ex in enumerate(examples, 1):
+            parts.append(f"--- Example {i} ({ex['company']}) ---")
+            parts.append(ex["text"])
+            parts.append("")
+        parts.append("=== END EXAMPLES ===\n")
+
+    if mission_hint:
+        parts.append(f"⭐ Mission alignment note (for Para 3): {mission_hint}\n")
+
+    parts.append(f"Now write a new cover letter for:")
+    parts.append(f"  Role: {title}")
+    parts.append(f"  Company: {company}")
+    if description:
+        snippet = description[:1500].strip()
+        parts.append(f"\nJob description excerpt:\n{snippet}")
+    parts.append("\nWrite the full cover letter now:")
+    return "\n".join(parts)
+
+
+def generate(title: str, company: str, description: str = "", _router=None) -> str:
+    """Generate a cover letter and return it as a string.
+
+    _router is an optional pre-built LLMRouter (used in tests to avoid real LLM calls).
+    """
+    corpus = load_corpus()
+    examples = find_similar_letters(description or f"{title} {company}", corpus)
+    mission_hint = detect_mission_alignment(company, description)
+    if mission_hint:
+        print(f"[cover-letter] Mission alignment detected for {company}", file=sys.stderr)
+    prompt = build_prompt(title, company, description, examples, mission_hint=mission_hint)
+
+    if _router is None:
+        sys.path.insert(0, str(Path(__file__).parent.parent))
+        from scripts.llm_router import LLMRouter
+        _router = LLMRouter()
+
+    print(f"[cover-letter] Generating for: {title} @ {company}", file=sys.stderr)
+    print(f"[cover-letter] Style examples: {[e['company'] for e in examples]}", file=sys.stderr)
+
+    result = _router.complete(prompt)
+    return result.strip()
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Generate a cover letter in Alex's voice")
+    parser.add_argument("--title", help="Job title")
+    parser.add_argument("--company", help="Company name")
+    parser.add_argument("--description", default="", help="Job description text")
+    parser.add_argument("--job-id", type=int, help="Load job from staging.db by ID")
+    parser.add_argument("--output", help="Write output to this file path")
+    args = parser.parse_args()
+
+    title, company, description = args.title, args.company, args.description
+
+    if args.job_id is not None:
+        from scripts.db import DEFAULT_DB
+        import sqlite3
+        conn = sqlite3.connect(DEFAULT_DB)
+        conn.row_factory = sqlite3.Row
+        row = conn.execute("SELECT * FROM jobs WHERE id = ?", (args.job_id,)).fetchone()
+        conn.close()
+        if not row:
+            print(f"No job with id={args.job_id} in staging.db", file=sys.stderr)
+            sys.exit(1)
+        job = dict(row)
+        title = title or job.get("title", "")
+        company = company or job.get("company", "")
+        description = description or job.get("description", "")
+
+    if not title or not company:
+        parser.error("--title and --company are required (or use --job-id)")
+
+    letter = generate(title, company, description)
+
+    if args.output:
+        Path(args.output).write_text(letter)
+        print(f"Saved to {args.output}", file=sys.stderr)
+    else:
+        print(letter)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/imap_sync.py b/scripts/imap_sync.py
new file mode 100644
index 0000000..220a54f
--- /dev/null
+++ b/scripts/imap_sync.py
@@ -0,0 +1,906 @@
+# scripts/imap_sync.py
+"""
+IMAP email sync — associates recruitment emails with job applications.
+
+Safety / privacy design:
+  - Only imports emails that pass BOTH checks:
+      1. Sender or subject contains the exact company name (or derived domain)
+      2. Subject contains at least one recruitment keyword
+  - Fuzzy / partial company name matches are rejected
+  - Emails between known personal contacts are never imported
+  - Only the INBOX and Sent folders are touched; no other folders
+  - Credentials stored in config/email.yaml (gitignored)
+
+Config: config/email.yaml  (see config/email.yaml.example)
+
+Usage:
+    conda run -n job-seeker python scripts/imap_sync.py
+    conda run -n job-seeker python scripts/imap_sync.py --job-id 42
+    conda run -n job-seeker python scripts/imap_sync.py --dry-run
+"""
+import email
+import imaplib
+import re
+import sys
+from datetime import datetime, timedelta
+from email.header import decode_header as _raw_decode_header
+from pathlib import Path
+from typing import Optional
+from urllib.parse import urlparse
+
+import yaml
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from scripts.db import DEFAULT_DB, init_db, get_interview_jobs, add_contact, get_contacts
+from scripts.llm_router import LLMRouter
+
+_CLASSIFIER_ROUTER = LLMRouter()
+
+_CLASSIFY_SYSTEM = (
+    "You are an email classifier. Classify the recruitment email into exactly ONE of these categories:\n"
+    "  interview_scheduled, offer_received, rejected, positive_response, survey_received, neutral\n\n"
+    "Rules:\n"
+    "- interview_scheduled: recruiter wants to book a call/interview\n"
+    "- offer_received: job offer is being extended\n"
+    "- rejected: explicitly not moving forward\n"
+    "- positive_response: interested/impressed but no interview booked yet\n"
+    "- survey_received: link or request to complete a survey, assessment, or questionnaire\n"
+    "- neutral: auto-confirmation, generic update, no clear signal\n\n"
+    "Respond with ONLY the category name. No explanation."
+)
+
+_CLASSIFY_LABELS = [
+    "interview_scheduled", "offer_received", "rejected",
+    "positive_response", "survey_received", "neutral",
+]
+
+CONFIG_PATH = Path(__file__).parent.parent / "config" / "email.yaml"
+
+# ── Recruitment keyword filter ────────────────────────────────────────────────
+# An email must match at least one of these in its subject line to be imported.
+RECRUITMENT_KEYWORDS = {
+    # Application lifecycle
+    "interview", "application", "applicant", "apply", "applied",
+    "position", "opportunity", "role", "opening", "vacancy",
+    "offer", "offer letter", "schedule", "scheduling",
+    "screening", "screen", "phone screen", "video call",
+    "assessment", "hiring", "hired", "recruiter", "recruitment",
+    "talent", "candidate", "recruiting", "next steps", "follow up", "follow-up",
+    "onboarding", "start date", "background check", "reference",
+    "congratulations", "unfortunately", "decision", "update",
+    # Job board / ATS notifications
+    "viewed your profile", "interested in your background",
+    "job alert", "new job", "job match", "job opportunity",
+    "your application", "application received", "application status",
+    "application update", "we received", "thank you for applying",
+    "thanks for applying", "moved forward", "moving forward",
+    "not moving forward", "decided to", "other candidates",
+    "keep your resume", "keep you in mind",
+    # Recruiter outreach
+    "reaching out", "i came across", "your experience",
+    "connect with you", "exciting opportunity", "great fit",
+    "perfect fit", "right fit", "strong fit", "ideal candidate",
+}
+
+# ── Rejection / ATS-confirm phrase filter ─────────────────────────────────────
+# Checked against subject + first 800 chars of body BEFORE calling any LLM.
+# Covers the cases phi3:mini consistently mis-classifies as "neutral".
+_REJECTION_PHRASES = [
+    # Explicit rejection — safe to check subject + body
+    "not moving forward", "decided not to move forward",
+    "not selected", "not be moving forward", "will not be moving forward",
+    "unfortunately", "regret to inform", "regret to let you know",
+    "decided to go with other", "decided to pursue other",
+    "other candidates", "other applicants", "position has been filled",
+    "filled the position", "no longer moving forward",
+    "we have decided", "we've decided", "after careful consideration",
+    "at this time we", "at this point we",
+    "we will not", "we won't be", "we are not able",
+    "wish you the best", "best of luck in your",
+    "keep your resume on file",
+]
+
+# ATS-confirm phrases — checked against SUBJECT ONLY.
+# Do NOT check these in the body: recruiters often quote ATS thread history,
+# so "thank you for applying" can appear in a genuine follow-up body.
+_ATS_CONFIRM_SUBJECTS = [
+    "application received", "application confirmation",
+    "thanks for applying", "thank you for applying",
+    "thank you for your application",
+    "we received your application",
+    "application has been received",
+    "has received your application",
+    "successfully submitted",
+    "your application for",
+    "you applied to",
+]
+
+# Phrases that immediately identify a non-recruitment email (retail, spam, etc.)
+_SPAM_PHRASES = [
+    # Retail / commerce offers
+    "special offer", "private offer", "exclusive offer", "limited time offer",
+    "limited-time offer", "sent you a special offer", "sent you an offer",
+    "holiday offer", "seasonal offer", "membership offer",
+    "round trip from $", "bonus points",
+    "% off", "% discount", "save up to", "free shipping",
+    "unsubscribe", "view in browser", "view this email in",
+    "update your preferences", "email preferences",
+    # LinkedIn apply confirmations & digests (not new inbound leads)
+    "your application was sent to",
+    "your application was viewed by",
+    "application updates this week",
+    "don't forget to complete your application",
+    "view your application updates",
+    "you have new application updates",
+    # Indeed apply confirmations
+    "indeed application:",
+    # DocuSign / e-signature
+    "requests you to sign",
+    "has sent you a reminder",
+    "please sign",
+    # Security / MFA codes
+    "security code for your application",
+    "verification code",
+]
+
+# Subject prefixes that identify non-job emails
+_SPAM_SUBJECT_PREFIXES = [
+    "@",                    # "@user sent you a special offer" — Depop / social commerce
+    "re: fw:",              # forwarded chains unlikely to be first-contact recruitment
+    "accepted:",            # Google Calendar accepted invite
+    "notification:",        # Google Calendar notification
+    "[meeting reminder]",   # Google Calendar meeting reminder
+    "updated invitation:",  # Google Calendar update
+    "[updated]",            # Google Calendar update
+    "reminder:",            # Generic reminder (AAA digital interview reminders, etc.)
+    "📄",                   # Newsletter/article emoji prefix
+    "invitation from",      # Google Calendar invite forwarded by name
+]
+
+# Unicode-safe "don't forget" variants (Gmail renders typographic apostrophes)
+_DONT_FORGET_VARIANTS = [
+    "don't forget to complete your application",          # straight apostrophe
+    "don\u2019t forget to complete your application",    # right single quotation mark '
+    "don\u2018t forget to complete your application",    # left single quotation mark '
+]
+
+
+def _has_rejection_or_ats_signal(subject: str, body: str) -> bool:
+    """Return True if the email is a rejection, ATS auto-confirmation, or non-recruitment spam."""
+    subject_lower = subject.lower().strip()
+
+    # Fast subject-prefix checks (Depop "@user", etc.)
+    if any(subject_lower.startswith(p) for p in _SPAM_SUBJECT_PREFIXES):
+        return True
+
+    # Fast subject-only check for ATS confirmations
+    if any(phrase in subject_lower for phrase in _ATS_CONFIRM_SUBJECTS):
+        return True
+
+    # Check subject + opening body for rejection and spam phrases
+    haystack = subject_lower + " " + body[:1500].lower()
+    if any(phrase in haystack for phrase in _REJECTION_PHRASES + _SPAM_PHRASES):
+        return True
+    # Unicode-safe "don't forget" check (handles straight, right, and left apostrophes)
+    raw = (subject + " " + body[:1500]).lower()
+    return any(phrase in raw for phrase in _DONT_FORGET_VARIANTS)
+
+
+# Legal entity suffixes to strip when normalising company names
+_LEGAL_SUFFIXES = re.compile(
+    r",?\s*\b(Inc|LLC|Ltd|Limited|Corp|Corporation|Co|GmbH|AG|plc|PLC|SAS|SA|NV|BV|LP|LLP)\b\.?\s*$",
+    re.IGNORECASE,
+)
+
+# Job-board SLDs that must never be used as company-match search terms.
+# A LinkedIn job URL has domain "linkedin.com" → SLD "linkedin", which would
+# incorrectly match every LinkedIn notification email against every LinkedIn job.
+_JOB_BOARD_SLDS = {
+    "linkedin", "indeed", "glassdoor", "ziprecruiter", "monster",
+    "careerbuilder", "dice", "simplyhired", "wellfound", "angellist",
+    "greenhouse", "lever", "workday", "taleo", "icims", "smartrecruiters",
+    "bamboohr", "ashby", "rippling", "jobvite", "workable", "gusto",
+    "paylocity", "paycom", "adp", "breezy", "recruitee", "jazz",
+}
+
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+def _decode_str(value: Optional[str]) -> str:
+    """Decode an RFC2047-encoded header value to a plain Python string."""
+    if not value:
+        return ""
+    parts = _raw_decode_header(value)
+    result = []
+    for part, encoding in parts:
+        if isinstance(part, bytes):
+            result.append(part.decode(encoding or "utf-8", errors="replace"))
+        else:
+            result.append(str(part))
+    return " ".join(result).strip()
+
+
+def _extract_domain(url_or_email: str) -> str:
+    """
+    Pull the bare domain from a URL (https://company.com/jobs/...) or
+    an email address (recruiter@company.com).  Returns '' if none found.
+    """
+    url_or_email = url_or_email.strip()
+    if "@" in url_or_email:
+        return url_or_email.split("@")[-1].split(">")[0].strip().lower()
+    try:
+        parsed = urlparse(url_or_email)
+        host = parsed.netloc or parsed.path
+        # strip www.
+        return re.sub(r"^www\.", "", host).lower()
+    except Exception:
+        return ""
+
+
+def _normalise_company(company: str) -> str:
+    """Strip legal suffixes and extra whitespace from a company name."""
+    return _LEGAL_SUFFIXES.sub("", company).strip()
+
+
+def _company_search_terms(company: str, job_url: str = "") -> list[str]:
+    """
+    Return a list of strings that must appear (case-insensitively) in the
+    email's from-address or subject for it to be considered a match.
+
+    We are deliberately conservative:
+      - Use the full normalised company name (not just the first word)
+      - Also include the company domain derived from the job URL, but ONLY
+        when the domain belongs to the actual company (not a job board).
+        LinkedIn jobs link to linkedin.com — if we used "linkedin" as a term
+        we'd match every LinkedIn notification email against every LinkedIn job.
+    """
+    terms = []
+    clean = _normalise_company(company)
+    if len(clean) >= 3:
+        terms.append(clean.lower())
+
+    domain = _extract_domain(job_url)
+    if domain and len(domain) > 4:
+        sld = domain.split(".")[0]
+        if len(sld) >= 3 and sld not in terms and sld not in _JOB_BOARD_SLDS:
+            terms.append(sld)
+
+    return terms
+
+
+def _has_recruitment_keyword(subject: str) -> bool:
+    """Return True if the subject contains at least one recruitment keyword."""
+    subject_lower = subject.lower()
+    return any(kw in subject_lower for kw in RECRUITMENT_KEYWORDS)
+
+
+def _email_is_relevant(from_addr: str, subject: str, search_terms: list[str]) -> bool:
+    """
+    Two-gate filter:
+      Gate 1 — from-address OR subject must contain an exact company term
+      Gate 2 — subject must contain a recruitment keyword
+
+    Both gates must pass.  This prevents importing unrelated emails that
+    happen to mention a company name in passing.
+    """
+    combined = (from_addr + " " + subject).lower()
+
+    gate1 = any(term in combined for term in search_terms)
+    gate2 = _has_recruitment_keyword(subject)
+
+    return gate1 and gate2
+
+
+def _get_existing_message_ids(job_id: int, db_path: Path) -> set[str]:
+    contacts = get_contacts(db_path, job_id=job_id)
+    return {c.get("message_id", "") for c in contacts if c.get("message_id")}
+
+
+def classify_stage_signal(subject: str, body: str) -> Optional[str]:
+    """Classify an inbound email into a pipeline stage signal.
+
+    Returns one of the 5 label strings, or None on failure.
+    Uses phi3:mini via Ollama (benchmarked 100% on 12-case test set).
+    """
+    try:
+        prompt = f"Subject: {subject}\n\nEmail: {body[:400]}"
+        raw = _CLASSIFIER_ROUTER.complete(
+            prompt,
+            system=_CLASSIFY_SYSTEM,
+            model_override="llama3.1:8b",
+            fallback_order=["ollama_research"],
+        )
+        # Strip <think> blocks (in case a reasoning model slips through)
+        text = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL)
+        text = text.lower().strip()
+        for label in _CLASSIFY_LABELS:
+            if text.startswith(label) or label in text:
+                return label
+        return "neutral"
+    except Exception:
+        return None
+
+
+_EXTRACT_SYSTEM = (
+    "Extract the hiring company name and job title from this recruitment email, "
+    "but ONLY if it represents genuine new recruiter outreach — i.e. a recruiter "
+    "contacting you about an open role for the first time.\n\n"
+    "Return {\"company\": null, \"title\": null} if the email is any of:\n"
+    "  - A rejection or 'not moving forward' notice\n"
+    "  - An ATS auto-confirmation ('we received your application')\n"
+    "  - A status update for an application already in progress\n"
+    "  - A generic job-alert digest or newsletter\n"
+    "  - A follow-up you sent, not a reply from a recruiter\n\n"
+    "Otherwise respond with ONLY valid JSON: "
+    '{"company": "Company Name", "title": "Job Title"}.'
+)
+
+
+def extract_lead_info(subject: str, body: str,
+                      from_addr: str) -> tuple[Optional[str], Optional[str]]:
+    """Use LLM to extract (company, title) from an unmatched recruitment email.
+
+    Returns (company, title) or (None, None) on failure / low confidence.
+    """
+    import json as _json
+    try:
+        prompt = (
+            f"From: {from_addr}\n"
+            f"Subject: {subject}\n\n"
+            f"Email excerpt:\n{body[:600]}"
+        )
+        raw = _CLASSIFIER_ROUTER.complete(
+            prompt,
+            system=_EXTRACT_SYSTEM,
+            fallback_order=["ollama_research"],
+        )
+        text = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).strip()
+        m = re.search(r'\{.*\}', text, re.DOTALL)
+        if not m:
+            return None, None
+        data = _json.loads(m.group())
+        company = data.get("company") or None
+        title   = data.get("title") or None
+        return company, title
+    except Exception:
+        return None, None
+
+
+# Keywords that indicate an email in a curated label needs attention.
+# Intentionally separate from RECRUITMENT_KEYWORDS — these are action-oriented.
+_TODO_LABEL_KEYWORDS = {
+    "action needed", "action required",
+    "please complete", "please submit", "please respond", "please reply",
+    "response needed", "response required",
+    "next steps", "next step",
+    "follow up", "follow-up",
+    "deadline", "by end of",
+    "your offer", "offer letter",
+    "background check", "reference check",
+    "onboarding", "start date",
+    "congrats", "congratulations",
+    "we'd like to", "we would like to",
+    "interview", "schedule", "scheduling",
+}
+
+
+def _has_todo_keyword(subject: str) -> bool:
+    """Return True if the subject contains a TODO-label action keyword."""
+    subject_lower = subject.lower()
+    return any(kw in subject_lower for kw in _TODO_LABEL_KEYWORDS)
+
+
+_LINKEDIN_ALERT_SENDER = "jobalerts-noreply@linkedin.com"
+
+# Social-proof / nav lines to skip when parsing alert blocks
+_ALERT_SKIP_PHRASES = {
+    "school alumni", "apply with", "actively hiring", "manage alerts",
+    "view all jobs", "your job alert", "new jobs match",
+    "unsubscribe", "linkedin corporation",
+}
+
+
+def parse_linkedin_alert(body: str) -> list[dict]:
+    """
+    Parse the plain-text body of a LinkedIn Job Alert digest email.
+
+    Returns a list of dicts: {title, company, location, url}.
+    URL is canonicalized to https://www.linkedin.com/jobs/view/<id>/
+    (tracking parameters stripped).
+    """
+    jobs = []
+    # Split on separator lines (10+ dashes)
+    blocks = re.split(r"\n\s*-{10,}\s*\n", body)
+    for block in blocks:
+        lines = [ln.strip() for ln in block.strip().splitlines() if ln.strip()]
+
+        # Find "View job:" URL
+        url = None
+        for line in lines:
+            m = re.search(r"View job:\s*(https?://\S+)", line, re.IGNORECASE)
+            if m:
+                raw_url = m.group(1)
+                job_id_m = re.search(r"/jobs/view/(\d+)", raw_url)
+                if job_id_m:
+                    url = f"https://www.linkedin.com/jobs/view/{job_id_m.group(1)}/"
+                break
+        if not url:
+            continue
+
+        # Filter noise lines
+        content = [
+            ln for ln in lines
+            if not any(p in ln.lower() for p in _ALERT_SKIP_PHRASES)
+            and not ln.lower().startswith("view job:")
+            and not ln.startswith("http")
+        ]
+        if len(content) < 2:
+            continue
+
+        jobs.append({
+            "title": content[0],
+            "company": content[1],
+            "location": content[2] if len(content) > 2 else "",
+            "url": url,
+        })
+    return jobs
+
+
+def _scan_todo_label(conn: imaplib.IMAP4, cfg: dict, db_path: Path,
+                     active_jobs: list[dict],
+                     known_message_ids: set) -> int:
+    """Scan the configured Gmail label for action emails, matching them to pipeline jobs.
+
+    Two gates per email:
+      1. Company name appears in from-address or subject (same as sync_job_emails)
+      2. Subject contains a TODO-label action keyword
+
+    Returns count of new contacts attached.
+    """
+    label = cfg.get("todo_label", "").strip()
+    if not label:
+        return 0
+
+    lookback = int(cfg.get("lookback_days", 90))
+    since = (datetime.now() - timedelta(days=lookback)).strftime("%d-%b-%Y")
+
+    # Search the label folder for any emails (no keyword pre-filter — it's curated)
+    uids = _search_folder(conn, label, "ALL", since)
+    if not uids:
+        return 0
+
+    # Build a lookup: search_term → [job, ...] for all active jobs
+    term_to_jobs: dict[str, list[dict]] = {}
+    for job in active_jobs:
+        for term in _company_search_terms(job.get("company", ""), job.get("url", "")):
+            term_to_jobs.setdefault(term, []).append(job)
+
+    added = 0
+    for uid in uids:
+        parsed = _parse_message(conn, uid)
+        if not parsed:
+            continue
+        mid = parsed["message_id"]
+        if mid in known_message_ids:
+            continue
+
+        # Gate 1: company name match — from_addr + subject + first 300 chars of body
+        # Body fallback catches ATS emails (e.g. noreply@greenhouse.io) where the
+        # company name only appears in the email body, not the sender or subject.
+        combined = (
+            parsed["from_addr"] + " " +
+            parsed["subject"] + " " +
+            parsed["body"][:300]
+        ).lower()
+        matched_jobs = []
+        for term, jobs in term_to_jobs.items():
+            if term in combined:
+                matched_jobs.extend(jobs)
+        # Deduplicate by job id
+        seen_ids: set[int] = set()
+        matched_jobs = [j for j in matched_jobs if not (j["id"] in seen_ids or seen_ids.add(j["id"]))]  # type: ignore[func-returns-value]
+        if not matched_jobs:
+            continue
+
+        # Gate 2: action keyword in subject
+        if not _has_todo_keyword(parsed["subject"]):
+            continue
+
+        for job in matched_jobs:
+            contact_id = add_contact(
+                db_path, job_id=job["id"], direction="inbound",
+                subject=parsed["subject"],
+                from_addr=parsed["from_addr"],
+                to_addr=parsed["to_addr"],
+                body=parsed["body"],
+                received_at=parsed["date"][:16] if parsed["date"] else since,
+                message_id=mid,
+            )
+            signal = classify_stage_signal(parsed["subject"], parsed["body"])
+            if signal and signal != "neutral":
+                _update_contact_signal(db_path, contact_id, signal)
+
+        known_message_ids.add(mid)
+        added += 1
+        print(f"[imap] TODO label → {matched_jobs[0].get('company')} — {parsed['subject'][:60]}")
+
+    return added
+
+
+def _scan_unmatched_leads(conn: imaplib.IMAP4, cfg: dict,
+                          db_path: Path,
+                          known_message_ids: set) -> int:
+    """Scan INBOX for recruitment emails not matched to any pipeline job.
+
+    Calls LLM to extract company/title; inserts qualifying emails as pending jobs.
+    Returns the count of new leads inserted.
+    """
+    from scripts.db import get_existing_urls, insert_job, add_contact as _add_contact
+
+    lookback = int(cfg.get("lookback_days", 90))
+    since = (datetime.now() - timedelta(days=lookback)).strftime("%d-%b-%Y")
+
+    broad_terms = ["interview", "opportunity", "offer letter", "job offer", "application", "recruiting"]
+    all_uids: set = set()
+    for term in broad_terms:
+        uids = _search_folder(conn, "INBOX", f'(SUBJECT "{term}")', since)
+        all_uids.update(uids)
+
+    existing_urls = get_existing_urls(db_path)
+    new_leads = 0
+
+    for uid in all_uids:
+        parsed = _parse_message(conn, uid)
+        if not parsed:
+            continue
+        mid = parsed["message_id"]
+        if mid in known_message_ids:
+            continue
+
+        # ── LinkedIn Job Alert digest — parse each card individually ──────
+        if _LINKEDIN_ALERT_SENDER in parsed["from_addr"].lower():
+            cards = parse_linkedin_alert(parsed["body"])
+            for card in cards:
+                if card["url"] in existing_urls:
+                    continue
+                job_id = insert_job(db_path, {
+                    "title": card["title"],
+                    "company": card["company"],
+                    "url": card["url"],
+                    "source": "linkedin",
+                    "location": card["location"],
+                    "is_remote": 0,
+                    "salary": "",
+                    "description": "",
+                    "date_found": datetime.now().isoformat()[:10],
+                })
+                if job_id:
+                    from scripts.task_runner import submit_task
+                    submit_task(db_path, "scrape_url", job_id)
+                    existing_urls.add(card["url"])
+                    new_leads += 1
+                    print(f"[imap] LinkedIn alert → {card['company']} — {card['title']}")
+            known_message_ids.add(mid)
+            continue  # skip normal LLM extraction path
+
+        if not _has_recruitment_keyword(parsed["subject"]):
+            continue
+
+        # Fast phrase-based rejection / ATS-confirm filter (catches what phi3 misses)
+        if _has_rejection_or_ats_signal(parsed["subject"], parsed["body"]):
+            continue
+
+        # LLM classification as secondary gate — skip on rejection or classifier failure
+        signal = classify_stage_signal(parsed["subject"], parsed["body"])
+        if signal is None or signal == "rejected":
+            continue
+
+        company, title = extract_lead_info(
+            parsed["subject"], parsed["body"], parsed["from_addr"]
+        )
+        if not company:
+            continue
+
+        from_domain = _extract_domain(parsed["from_addr"]) or "unknown"
+        mid_hash = str(abs(hash(mid)))[:10]
+        synthetic_url = f"email://{from_domain}/{mid_hash}"
+
+        if synthetic_url in existing_urls:
+            continue
+
+        job_id = insert_job(db_path, {
+            "title": title or "(untitled)",
+            "company": company,
+            "url": synthetic_url,
+            "source": "email",
+            "location": "",
+            "is_remote": 0,
+            "salary": "",
+            "description": parsed["body"][:2000],
+            "date_found": datetime.now().isoformat()[:10],
+        })
+        if job_id:
+            _add_contact(db_path, job_id=job_id, direction="inbound",
+                         subject=parsed["subject"],
+                         from_addr=parsed["from_addr"],
+                         body=parsed["body"],
+                         received_at=parsed["date"][:16] if parsed["date"] else "",
+                         message_id=mid)
+            known_message_ids.add(mid)
+            existing_urls.add(synthetic_url)
+            new_leads += 1
+
+    return new_leads
+
+
+# ── IMAP connection ───────────────────────────────────────────────────────────
+
+def load_config() -> dict:
+    if not CONFIG_PATH.exists():
+        raise FileNotFoundError(
+            f"Email config not found: {CONFIG_PATH}\n"
+            f"Copy config/email.yaml.example → config/email.yaml and fill it in."
+        )
+    return yaml.safe_load(CONFIG_PATH.read_text()) or {}
+
+
+def connect(cfg: dict) -> imaplib.IMAP4:
+    host = cfg.get("host", "imap.gmail.com")
+    port = int(cfg.get("port", 993))
+    use_ssl = cfg.get("use_ssl", True)
+    conn = (imaplib.IMAP4_SSL if use_ssl else imaplib.IMAP4)(host, port)
+    conn.login(cfg["username"], cfg["password"])
+    return conn
+
+
+def _detect_sent_folder(conn: imaplib.IMAP4) -> str:
+    """Try to auto-detect the Sent folder name."""
+    candidates = ["[Gmail]/Sent Mail", "Sent", "Sent Items", "Sent Messages", "INBOX.Sent"]
+    try:
+        _, folder_list = conn.list()
+        flat = " ".join(f.decode() for f in (folder_list or []))
+        for candidate in candidates:
+            if candidate.lower() in flat.lower():
+                return candidate
+    except Exception:
+        pass
+    return "Sent"
+
+
+def _quote_folder(name: str) -> str:
+    """Quote an IMAP folder name if it contains spaces.
+    Escapes internal backslashes and double-quotes per RFC 3501.
+    e.g. 'TO DO JOBS' → '"TO DO JOBS"', 'My "Jobs"' → '"My \\"Jobs\\""'
+    """
+    if " " in name:
+        escaped = name.replace("\\", "\\\\").replace('"', '\\"')
+        return f'"{escaped}"'
+    return name
+
+
+def _search_folder(conn: imaplib.IMAP4, folder: str, criteria: str,
+                   since: str) -> list[bytes]:
+    """SELECT a folder and return matching UID list (empty on any error)."""
+    try:
+        conn.select(_quote_folder(folder), readonly=True)
+        _, data = conn.search(None, f'(SINCE "{since}" {criteria})')
+        return data[0].split() if data and data[0] else []
+    except Exception:
+        return []
+
+
+def _parse_message(conn: imaplib.IMAP4, uid: bytes) -> Optional[dict]:
+    """Fetch and parse one message.  Returns None on failure."""
+    try:
+        _, data = conn.fetch(uid, "(RFC822)")
+        if not data or not data[0]:
+            return None
+        msg = email.message_from_bytes(data[0][1])
+
+        body = ""
+        if msg.is_multipart():
+            for part in msg.walk():
+                if part.get_content_type() == "text/plain":
+                    try:
+                        body = part.get_payload(decode=True).decode("utf-8", errors="replace")
+                    except Exception:
+                        pass
+                    break
+        else:
+            try:
+                body = msg.get_payload(decode=True).decode("utf-8", errors="replace")
+            except Exception:
+                pass
+
+        mid = msg.get("Message-ID", "").strip()
+        if not mid:
+            return None  # No Message-ID → can't dedup; skip to avoid repeat inserts
+
+        return {
+            "message_id": mid,
+            "subject":    _decode_str(msg.get("Subject")),
+            "from_addr":  _decode_str(msg.get("From")),
+            "to_addr":    _decode_str(msg.get("To")),
+            "date":       _decode_str(msg.get("Date")),
+            "body":       body[:4000],
+        }
+    except Exception:
+        return None
+
+
+# ── Per-job sync ──────────────────────────────────────────────────────────────
+
+def _update_contact_signal(db_path: Path, contact_id: int, signal: str) -> None:
+    """Write a stage signal onto an existing contact row."""
+    import sqlite3 as _sqlite3
+    conn = _sqlite3.connect(db_path)
+    conn.execute(
+        "UPDATE job_contacts SET stage_signal = ? WHERE id = ?",
+        (signal, contact_id),
+    )
+    conn.commit()
+    conn.close()
+
+
+def sync_job_emails(job: dict, conn: imaplib.IMAP4, cfg: dict,
+                    db_path: Path, dry_run: bool = False) -> tuple[int, int]:
+    """
+    Sync recruitment emails for one job.
+    Returns (inbound_added, outbound_added).
+    """
+    company = (job.get("company") or "").strip()
+    if not company:
+        return 0, 0
+
+    search_terms = _company_search_terms(company, job.get("url", ""))
+    if not search_terms:
+        return 0, 0
+
+    lookback = int(cfg.get("lookback_days", 90))
+    since = (datetime.now() - timedelta(days=lookback)).strftime("%d-%b-%Y")
+    existing_ids = _get_existing_message_ids(job["id"], db_path)
+
+    inbound = outbound = 0
+
+    for term in search_terms:
+        # ── INBOX — inbound ───────────────────────────────────────────────
+        uids = _search_folder(
+            conn, "INBOX",
+            f'(OR FROM "{term}" SUBJECT "{term}")',
+            since,
+        )
+        for uid in uids:
+            parsed = _parse_message(conn, uid)
+            if not parsed:
+                continue
+            if parsed["message_id"] in existing_ids:
+                continue
+            if not _email_is_relevant(parsed["from_addr"], parsed["subject"], search_terms):
+                continue
+
+            if not dry_run:
+                contact_id = add_contact(
+                    db_path, job_id=job["id"], direction="inbound",
+                    subject=parsed["subject"], from_addr=parsed["from_addr"],
+                    to_addr=parsed["to_addr"], body=parsed["body"],
+                    received_at=parsed["date"][:16] if parsed["date"] else since,
+                    message_id=parsed["message_id"],
+                )
+                signal = classify_stage_signal(parsed["subject"], parsed["body"])
+                if signal and signal != "neutral":
+                    _update_contact_signal(db_path, contact_id, signal)
+            existing_ids.add(parsed["message_id"])
+            inbound += 1
+
+        # ── Sent — outbound ───────────────────────────────────────────────
+        sent_folder = cfg.get("sent_folder") or _detect_sent_folder(conn)
+        uids = _search_folder(
+            conn, sent_folder,
+            f'(OR TO "{term}" SUBJECT "{term}")',
+            since,
+        )
+        for uid in uids:
+            parsed = _parse_message(conn, uid)
+            if not parsed:
+                continue
+            if parsed["message_id"] in existing_ids:
+                continue
+            if not _email_is_relevant(parsed["to_addr"], parsed["subject"], search_terms):
+                continue
+
+            if not dry_run:
+                add_contact(
+                    db_path, job_id=job["id"], direction="outbound",
+                    subject=parsed["subject"], from_addr=parsed["from_addr"],
+                    to_addr=parsed["to_addr"], body=parsed["body"],
+                    received_at=parsed["date"][:16] if parsed["date"] else since,
+                    message_id=parsed["message_id"],
+                )
+            existing_ids.add(parsed["message_id"])
+            outbound += 1
+
+    return inbound, outbound
+
+
+# ── Main entry ────────────────────────────────────────────────────────────────
+
+def sync_all(db_path: Path = DEFAULT_DB,
+             dry_run: bool = False,
+             job_ids: Optional[list[int]] = None,
+             on_stage=None) -> dict:
+    """
+    Sync emails for all active pipeline jobs (or a specific subset).
+
+    Returns a summary dict:
+        {"synced": N, "inbound": N, "outbound": N, "errors": [...]}
+    """
+    def _stage(msg: str) -> None:
+        if on_stage:
+            on_stage(msg)
+
+    cfg = load_config()
+    init_db(db_path)
+
+    jobs_by_stage = get_interview_jobs(db_path)
+    active_stages = ["applied", "phone_screen", "interviewing", "offer", "hired"]
+    all_active = [j for stage in active_stages for j in jobs_by_stage.get(stage, [])]
+
+    if job_ids:
+        all_active = [j for j in all_active if j["id"] in job_ids]
+
+    if not all_active:
+        return {"synced": 0, "inbound": 0, "outbound": 0, "new_leads": 0, "todo_attached": 0, "errors": []}
+
+    _stage("connecting")
+    print(f"[imap] Connecting to {cfg.get('host', 'imap.gmail.com')} …")
+    conn = connect(cfg)
+    summary = {"synced": 0, "inbound": 0, "outbound": 0, "new_leads": 0, "errors": []}
+
+    try:
+        for i, job in enumerate(all_active, 1):
+            _stage(f"job {i}/{len(all_active)}")
+            try:
+                inb, out = sync_job_emails(job, conn, cfg, db_path, dry_run=dry_run)
+                label = "DRY-RUN " if dry_run else ""
+                print(f"[imap] {label}{job.get('company'):30s}  +{inb} in  +{out} out")
+                if inb + out > 0:
+                    summary["synced"] += 1
+                summary["inbound"]  += inb
+                summary["outbound"] += out
+            except Exception as e:
+                msg = f"{job.get('company')}: {e}"
+                summary["errors"].append(msg)
+                print(f"[imap] ERROR — {msg}")
+
+        _stage("scanning todo label")
+        from scripts.db import get_all_message_ids
+        known_mids = get_all_message_ids(db_path)
+        summary["todo_attached"] = _scan_todo_label(conn, cfg, db_path, all_active, known_mids)
+
+        _stage("scanning leads")
+        summary["new_leads"] = _scan_unmatched_leads(conn, cfg, db_path, known_mids)
+    finally:
+        try:
+            conn.logout()
+        except Exception:
+            pass
+
+    return summary
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Sync IMAP emails to job contacts")
+    parser.add_argument("--job-id", type=int, nargs="+", help="Sync only these job IDs")
+    parser.add_argument("--dry-run", action="store_true", help="Show matches without saving")
+    args = parser.parse_args()
+
+    result = sync_all(
+        dry_run=args.dry_run,
+        job_ids=args.job_id,
+    )
+    print(f"\n[imap] Done — {result['synced']} jobs updated, "
+          f"{result['inbound']} inbound, {result['outbound']} outbound"
+          + (f", {len(result['errors'])} errors" if result["errors"] else ""))
diff --git a/scripts/llm_router.py b/scripts/llm_router.py
new file mode 100644
index 0000000..d4eb237
--- /dev/null
+++ b/scripts/llm_router.py
@@ -0,0 +1,170 @@
+"""
+LLM abstraction layer with priority fallback chain.
+Reads config/llm.yaml. Tries backends in order; falls back on any error.
+"""
+import os
+import yaml
+import requests
+from pathlib import Path
+from openai import OpenAI
+
+CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"
+
+
+class LLMRouter:
+    def __init__(self, config_path: Path = CONFIG_PATH):
+        with open(config_path) as f:
+            self.config = yaml.safe_load(f)
+
+    def _is_reachable(self, base_url: str) -> bool:
+        """Quick health-check ping. Returns True if backend is up."""
+        health_url = base_url.rstrip("/").removesuffix("/v1") + "/health"
+        try:
+            resp = requests.get(health_url, timeout=2)
+            return resp.status_code < 500
+        except Exception:
+            return False
+
+    def _resolve_model(self, client: OpenAI, model: str) -> str:
+        """Resolve __auto__ to the first model served by vLLM."""
+        if model != "__auto__":
+            return model
+        models = client.models.list()
+        return models.data[0].id
+
+    def complete(self, prompt: str, system: str | None = None,
+                 model_override: str | None = None,
+                 fallback_order: list[str] | None = None,
+                 images: list[str] | None = None) -> str:
+        """
+        Generate a completion. Tries each backend in fallback_order.
+
+        model_override: when set, replaces the configured model for
+        openai_compat backends (e.g. pass a research-specific ollama model).
+        fallback_order: when set, overrides config fallback_order for this
+        call (e.g. pass config["research_fallback_order"] for research tasks).
+        images: optional list of base64-encoded PNG/JPG strings. When provided,
+        backends without supports_images=true are skipped. vision_service backends
+        are only tried when images is provided.
+        Raises RuntimeError if all backends are exhausted.
+        """
+        order = fallback_order if fallback_order is not None else self.config["fallback_order"]
+        for name in order:
+            backend = self.config["backends"][name]
+
+            if not backend.get("enabled", True):
+                print(f"[LLMRouter] {name}: disabled, skipping")
+                continue
+
+            supports_images = backend.get("supports_images", False)
+            is_vision_service = backend["type"] == "vision_service"
+
+            # vision_service only used when images provided
+            if is_vision_service and not images:
+                print(f"[LLMRouter] {name}: vision_service skipped (no images)")
+                continue
+
+            # non-vision backends skipped when images provided and they don't support it
+            if images and not supports_images and not is_vision_service:
+                print(f"[LLMRouter] {name}: no image support, skipping")
+                continue
+
+            if is_vision_service:
+                if not self._is_reachable(backend["base_url"]):
+                    print(f"[LLMRouter] {name}: unreachable, skipping")
+                    continue
+                try:
+                    resp = requests.post(
+                        backend["base_url"].rstrip("/") + "/analyze",
+                        json={
+                            "prompt": prompt,
+                            "image_base64": images[0] if images else "",
+                        },
+                        timeout=60,
+                    )
+                    resp.raise_for_status()
+                    print(f"[LLMRouter] Used backend: {name} (vision_service)")
+                    return resp.json()["text"]
+                except Exception as e:
+                    print(f"[LLMRouter] {name}: error — {e}, trying next")
+                    continue
+
+            elif backend["type"] == "openai_compat":
+                if not self._is_reachable(backend["base_url"]):
+                    print(f"[LLMRouter] {name}: unreachable, skipping")
+                    continue
+                try:
+                    client = OpenAI(
+                        base_url=backend["base_url"],
+                        api_key=backend.get("api_key") or "any",
+                    )
+                    raw_model = model_override or backend["model"]
+                    model = self._resolve_model(client, raw_model)
+                    messages = []
+                    if system:
+                        messages.append({"role": "system", "content": system})
+                    if images and supports_images:
+                        content = [{"type": "text", "text": prompt}]
+                        for img in images:
+                            content.append({
+                                "type": "image_url",
+                                "image_url": {"url": f"data:image/png;base64,{img}"},
+                            })
+                        messages.append({"role": "user", "content": content})
+                    else:
+                        messages.append({"role": "user", "content": prompt})
+
+                    resp = client.chat.completions.create(
+                        model=model, messages=messages
+                    )
+                    print(f"[LLMRouter] Used backend: {name} ({model})")
+                    return resp.choices[0].message.content
+
+                except Exception as e:
+                    print(f"[LLMRouter] {name}: error — {e}, trying next")
+                    continue
+
+            elif backend["type"] == "anthropic":
+                api_key = os.environ.get(backend["api_key_env"], "")
+                if not api_key:
+                    print(f"[LLMRouter] {name}: {backend['api_key_env']} not set, skipping")
+                    continue
+                try:
+                    import anthropic as _anthropic
+                    client = _anthropic.Anthropic(api_key=api_key)
+                    if images and supports_images:
+                        content = []
+                        for img in images:
+                            content.append({
+                                "type": "image",
+                                "source": {"type": "base64", "media_type": "image/png", "data": img},
+                            })
+                        content.append({"type": "text", "text": prompt})
+                    else:
+                        content = prompt
+                    kwargs: dict = {
+                        "model": backend["model"],
+                        "max_tokens": 4096,
+                        "messages": [{"role": "user", "content": content}],
+                    }
+                    if system:
+                        kwargs["system"] = system
+                    msg = client.messages.create(**kwargs)
+                    print(f"[LLMRouter] Used backend: {name}")
+                    return msg.content[0].text
+                except Exception as e:
+                    print(f"[LLMRouter] {name}: error — {e}, trying next")
+                    continue
+
+        raise RuntimeError("All LLM backends exhausted")
+
+
+# Module-level singleton for convenience
+_router: LLMRouter | None = None
+
+
+def complete(prompt: str, system: str | None = None) -> str:
+    global _router
+    if _router is None:
+        _router = LLMRouter()
+    return _router.complete(prompt, system)
diff --git a/scripts/manage-ui.sh b/scripts/manage-ui.sh
new file mode 100755
index 0000000..55cadd9
--- /dev/null
+++ b/scripts/manage-ui.sh
@@ -0,0 +1,106 @@
+#!/usr/bin/env bash
+# scripts/manage-ui.sh — manage the Streamlit job-seeker web UI
+# Usage: bash scripts/manage-ui.sh [start|stop|restart|status|logs]
+
+set -euo pipefail
+
+REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+STREAMLIT_BIN="/devl/miniconda3/envs/job-seeker/bin/streamlit"
+APP_ENTRY="$REPO_DIR/app/app.py"
+PID_FILE="$REPO_DIR/.streamlit.pid"
+LOG_FILE="$REPO_DIR/.streamlit.log"
+PORT="${STREAMLIT_PORT:-8501}"
+
+start() {
+    if is_running; then
+        echo "Already running (PID $(cat "$PID_FILE")). Use 'restart' to reload."
+        return 0
+    fi
+
+    echo "Starting Streamlit on http://localhost:$PORT …"
+    "$STREAMLIT_BIN" run "$APP_ENTRY" \
+        --server.port "$PORT" \
+        --server.headless true \
+        --server.fileWatcherType none \
+        > "$LOG_FILE" 2>&1 &
+    echo $! > "$PID_FILE"
+    sleep 2
+
+    if is_running; then
+        echo "Started (PID $(cat "$PID_FILE")). Logs: $LOG_FILE"
+    else
+        echo "Failed to start. Check logs: $LOG_FILE"
+        tail -20 "$LOG_FILE"
+        exit 1
+    fi
+}
+
+stop() {
+    if ! is_running; then
+        echo "Not running."
+        rm -f "$PID_FILE"
+        return 0
+    fi
+
+    PID=$(cat "$PID_FILE")
+    echo "Stopping PID $PID …"
+    kill "$PID" 2>/dev/null || true
+    sleep 1
+    if kill -0 "$PID" 2>/dev/null; then
+        kill -9 "$PID" 2>/dev/null || true
+    fi
+    rm -f "$PID_FILE"
+    echo "Stopped."
+}
+
+restart() {
+    stop
+    sleep 1
+    start
+}
+
+status() {
+    if is_running; then
+        echo "Running (PID $(cat "$PID_FILE")) on http://localhost:$PORT"
+    else
+        echo "Not running."
+    fi
+}
+
+logs() {
+    if [[ -f "$LOG_FILE" ]]; then
+        tail -50 "$LOG_FILE"
+    else
+        echo "No log file found at $LOG_FILE"
+    fi
+}
+
+is_running() {
+    if [[ -f "$PID_FILE" ]]; then
+        PID=$(cat "$PID_FILE")
+        if kill -0 "$PID" 2>/dev/null; then
+            return 0
+        fi
+    fi
+    return 1
+}
+
+CMD="${1:-help}"
+case "$CMD" in
+    start)   start ;;
+    stop)    stop ;;
+    restart) restart ;;
+    status)  status ;;
+    logs)    logs ;;
+    *)
+        echo "Usage: bash scripts/manage-ui.sh [start|stop|restart|status|logs]"
+        echo ""
+        echo "  start    Start the Streamlit UI (default port: $PORT)"
+        echo "  stop     Stop the running UI"
+        echo "  restart  Stop then start"
+        echo "  status   Show whether it's running"
+        echo "  logs     Tail the last 50 lines of the log"
+        echo ""
+        echo "  STREAMLIT_PORT=8502 bash scripts/manage-ui.sh start  (custom port)"
+        ;;
+esac
diff --git a/scripts/manage-vision.sh b/scripts/manage-vision.sh
new file mode 100755
index 0000000..43b089c
--- /dev/null
+++ b/scripts/manage-vision.sh
@@ -0,0 +1,113 @@
+#!/usr/bin/env bash
+# scripts/manage-vision.sh — manage the moondream2 vision service
+# Usage: bash scripts/manage-vision.sh start|stop|restart|status|logs
+#
+# First-time setup:
+#   conda env create -f scripts/vision_service/environment.yml
+#
+# On first start, moondream2 is downloaded from HuggingFace (~1.8GB).
+# Model stays resident in memory between requests.
+
+set -euo pipefail
+
+CONDA_ENV="job-seeker-vision"
+UVICORN_BIN="/devl/miniconda3/envs/${CONDA_ENV}/bin/uvicorn"
+PID_FILE="/tmp/vision-service.pid"
+LOG_FILE="/tmp/vision-service.log"
+PORT=8002
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(dirname "$SCRIPT_DIR")"
+
+is_running() {
+    if [[ -f "$PID_FILE" ]]; then
+        PID=$(cat "$PID_FILE")
+        if kill -0 "$PID" 2>/dev/null; then
+            return 0
+        fi
+    fi
+    return 1
+}
+
+start() {
+    if is_running; then
+        echo "Already running (PID $(cat "$PID_FILE"))."
+        return 0
+    fi
+
+    if [[ ! -f "$UVICORN_BIN" ]]; then
+        echo "ERROR: conda env '$CONDA_ENV' not found."
+        echo "Install with: conda env create -f scripts/vision_service/environment.yml"
+        exit 1
+    fi
+
+    echo "Starting vision service (moondream2) on port $PORT…"
+    cd "$REPO_ROOT"
+    PYTHONPATH="$REPO_ROOT" "$UVICORN_BIN" \
+        scripts.vision_service.main:app \
+        --host 0.0.0.0 \
+        --port "$PORT" \
+        > "$LOG_FILE" 2>&1 &
+    echo $! > "$PID_FILE"
+    sleep 2
+
+    if is_running; then
+        echo "Started (PID $(cat "$PID_FILE")). Logs: $LOG_FILE"
+        echo "Health: http://localhost:$PORT/health"
+    else
+        echo "Failed to start. Check logs: $LOG_FILE"
+        tail -20 "$LOG_FILE"
+        rm -f "$PID_FILE"
+        exit 1
+    fi
+}
+
+stop() {
+    if ! is_running; then
+        echo "Not running."
+        rm -f "$PID_FILE"
+        return 0
+    fi
+    PID=$(cat "$PID_FILE")
+    echo "Stopping PID $PID…"
+    kill "$PID" 2>/dev/null || true
+    sleep 2
+    if kill -0 "$PID" 2>/dev/null; then
+        kill -9 "$PID" 2>/dev/null || true
+    fi
+    rm -f "$PID_FILE"
+    echo "Stopped."
+}
+
+restart() { stop; sleep 1; start; }
+
+status() {
+    if is_running; then
+        echo "Running (PID $(cat "$PID_FILE")) — http://localhost:$PORT"
+        curl -s "http://localhost:$PORT/health" | python3 -m json.tool 2>/dev/null || true
+    else
+        echo "Not running."
+    fi
+}
+
+logs() {
+    if [[ -f "$LOG_FILE" ]]; then
+        tail -50 "$LOG_FILE"
+    else
+        echo "No log file at $LOG_FILE"
+    fi
+}
+
+CMD="${1:-help}"
+case "$CMD" in
+    start)   start ;;
+    stop)    stop ;;
+    restart) restart ;;
+    status)  status ;;
+    logs)    logs ;;
+    *)
+        echo "Usage: bash scripts/manage-vision.sh start|stop|restart|status|logs"
+        echo ""
+        echo "  Manages the moondream2 vision service on port $PORT."
+        echo "  First-time setup: conda env create -f scripts/vision_service/environment.yml"
+        ;;
+esac
diff --git a/scripts/manage-vllm.sh b/scripts/manage-vllm.sh
new file mode 100755
index 0000000..8386e20
--- /dev/null
+++ b/scripts/manage-vllm.sh
@@ -0,0 +1,160 @@
+#!/usr/bin/env bash
+# scripts/manage-vllm.sh — manage the vLLM inference server
+# Usage: bash scripts/manage-vllm.sh [start [model]|stop|restart [model]|status|logs|list]
+
+set -euo pipefail
+
+VLLM_BIN="/devl/miniconda3/envs/vllm/bin/python"
+MODEL_DIR="/Library/Assets/LLM/vllm/models"
+PID_FILE="/tmp/vllm-server.pid"
+LOG_FILE="/tmp/vllm-server.log"
+MODEL_FILE="/tmp/vllm-server.model"
+PORT=8000
+GPU=1
+
+_list_model_names() {
+    if [[ -d "$MODEL_DIR" ]]; then
+        find "$MODEL_DIR" -maxdepth 1 -mindepth 1 -type d -printf '%f\n' 2>/dev/null | sort
+    fi
+}
+
+is_running() {
+    if [[ -f "$PID_FILE" ]]; then
+        PID=$(cat "$PID_FILE")
+        if kill -0 "$PID" 2>/dev/null; then
+            return 0
+        fi
+    fi
+    return 1
+}
+
+start() {
+    local model_name="${1:-}"
+
+    if [[ -z "$model_name" ]]; then
+        model_name=$(_list_model_names | head -1)
+        if [[ -z "$model_name" ]]; then
+            echo "No models found in $MODEL_DIR"
+            exit 1
+        fi
+    fi
+
+    local model_path
+    if [[ "$model_name" == /* ]]; then
+        model_path="$model_name"
+        model_name=$(basename "$model_path")
+    else
+        model_path="$MODEL_DIR/$model_name"
+    fi
+
+    if [[ ! -d "$model_path" ]]; then
+        echo "Model not found: $model_path"
+        exit 1
+    fi
+
+    if is_running; then
+        echo "Already running (PID $(cat "$PID_FILE")). Use 'restart' to reload."
+        return 0
+    fi
+
+    echo "Starting vLLM with model: $model_name (GPU $GPU, port $PORT)…"
+    echo "$model_name" > "$MODEL_FILE"
+
+    # Ouro LoopLM uses total_ut_steps=4 which multiplies KV cache by 4x vs a standard
+    # transformer. On 8 GiB GPUs: 1.4B models support ~4096 tokens; 2.6B only ~928.
+    CUDA_VISIBLE_DEVICES="$GPU" "$VLLM_BIN" -m vllm.entrypoints.openai.api_server \
+        --model "$model_path" \
+        --trust-remote-code \
+        --max-model-len 3072 \
+        --gpu-memory-utilization 0.75 \
+        --enforce-eager \
+        --max-num-seqs 8 \
+        --port "$PORT" \
+        > "$LOG_FILE" 2>&1 &
+    echo $! > "$PID_FILE"
+    sleep 3
+
+    if is_running; then
+        echo "Started (PID $(cat "$PID_FILE")). Logs: $LOG_FILE"
+    else
+        echo "Failed to start. Check logs: $LOG_FILE"
+        tail -20 "$LOG_FILE"
+        rm -f "$PID_FILE" "$MODEL_FILE"
+        exit 1
+    fi
+}
+
+stop() {
+    if ! is_running; then
+        echo "Not running."
+        rm -f "$PID_FILE"
+        return 0
+    fi
+
+    PID=$(cat "$PID_FILE")
+    echo "Stopping PID $PID …"
+    kill "$PID" 2>/dev/null || true
+    sleep 2
+    if kill -0 "$PID" 2>/dev/null; then
+        kill -9 "$PID" 2>/dev/null || true
+    fi
+    rm -f "$PID_FILE" "$MODEL_FILE"
+    echo "Stopped."
+}
+
+restart() {
+    local model_name="${1:-}"
+    stop
+    sleep 1
+    start "$model_name"
+}
+
+status() {
+    if is_running; then
+        local model=""
+        if [[ -f "$MODEL_FILE" ]]; then
+            model=" — model: $(cat "$MODEL_FILE")"
+        fi
+        echo "Running (PID $(cat "$PID_FILE")) on http://localhost:$PORT$model"
+    else
+        echo "Not running."
+    fi
+}
+
+logs() {
+    if [[ -f "$LOG_FILE" ]]; then
+        tail -50 "$LOG_FILE"
+    else
+        echo "No log file found at $LOG_FILE"
+    fi
+}
+
+list() {
+    echo "Available models in $MODEL_DIR:"
+    _list_model_names | while read -r name; do
+        echo "  - $name"
+    done
+}
+
+CMD="${1:-help}"
+case "$CMD" in
+    start)   start "${2:-}" ;;
+    stop)    stop ;;
+    restart) restart "${2:-}" ;;
+    status)  status ;;
+    logs)    logs ;;
+    list)    list ;;
+    *)
+        echo "Usage: bash scripts/manage-vllm.sh [start [model]|stop|restart [model]|status|logs|list]"
+        echo ""
+        echo "  start [model]    Start vLLM with the specified model (default: first in $MODEL_DIR)"
+        echo "  stop             Stop the running vLLM server"
+        echo "  restart [model]  Stop then start (pass a new model name to swap)"
+        echo "  status           Show whether it's running and which model is loaded"
+        echo "  logs             Tail the last 50 lines of the log"
+        echo "  list             List available models"
+        echo ""
+        echo "  GPU:  $GPU (CUDA_VISIBLE_DEVICES)"
+        echo "  Port: $PORT"
+        ;;
+esac
diff --git a/scripts/match.py b/scripts/match.py
new file mode 100644
index 0000000..af1d000
--- /dev/null
+++ b/scripts/match.py
@@ -0,0 +1,156 @@
+"""
+Resume match scoring.
+
+Two modes:
+  1. SQLite batch — score all unscored pending/approved jobs in staging.db
+     Usage: python scripts/match.py
+
+  2. Notion single — score one Notion page by URL/ID and write results back
+     Usage: python scripts/match.py <notion-page-url-or-id>
+"""
+import re
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+import requests
+import yaml
+from bs4 import BeautifulSoup
+from notion_client import Client
+
+CONFIG_DIR = Path(__file__).parent.parent / "config"
+RESUME_PATH = Path("/Library/Documents/JobSearch/Alex_Rivera_Resume_02-19-2025.pdf")
+
+
+def load_notion() -> tuple[Client, dict]:
+    cfg = yaml.safe_load((CONFIG_DIR / "notion.yaml").read_text())
+    return Client(auth=cfg["token"]), cfg["field_map"]
+
+
+def extract_page_id(url_or_id: str) -> str:
+    """Extract 32-char Notion page ID from a URL or return as-is."""
+    clean = url_or_id.replace("-", "")
+    match = re.search(r"[0-9a-f]{32}", clean)
+    return match.group(0) if match else url_or_id.strip()
+
+
+def get_job_url_from_notion(notion: Client, page_id: str, url_field: str) -> str:
+    page = notion.pages.retrieve(page_id)
+    return page["properties"][url_field]["url"] or ""
+
+
+def extract_job_description(url: str) -> str:
+    """Fetch a job listing URL and return its visible text."""
+    resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
+    resp.raise_for_status()
+    soup = BeautifulSoup(resp.text, "html.parser")
+    for tag in soup(["script", "style", "nav", "header", "footer"]):
+        tag.decompose()
+    return " ".join(soup.get_text(separator=" ").split())
+
+
+def read_resume_text() -> str:
+    """Extract text from the ATS-clean PDF resume."""
+    import pypdf
+    reader = pypdf.PdfReader(str(RESUME_PATH))
+    return " ".join(page.extract_text() or "" for page in reader.pages)
+
+
+def match_score(resume_text: str, job_text: str) -> tuple[float, list[str]]:
+    """
+    Score resume against job description using TF-IDF cosine similarity.
+    Returns (score 0–100, list of high-value job keywords missing from resume).
+    """
+    import numpy as np
+    from sklearn.feature_extraction.text import TfidfVectorizer
+    from sklearn.metrics.pairwise import cosine_similarity
+
+    vectorizer = TfidfVectorizer(stop_words="english", max_features=200)
+    tfidf = vectorizer.fit_transform([resume_text, job_text])
+    score = float(cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]) * 100
+
+    resume_terms = set(resume_text.lower().split())
+    feature_names = vectorizer.get_feature_names_out()
+    job_tfidf = tfidf[1].toarray()[0]
+    top_indices = np.argsort(job_tfidf)[::-1][:30]
+    top_job_terms = [feature_names[i] for i in top_indices if job_tfidf[i] > 0]
+    gaps = [t for t in top_job_terms if t not in resume_terms and t == t][:10]  # t==t drops NaN
+
+    return round(score, 1), gaps
+
+
+def write_match_to_notion(notion: Client, page_id: str, score: float, gaps: list[str], fm: dict) -> None:
+    notion.pages.update(
+        page_id=page_id,
+        properties={
+            fm["match_score"]:   {"number": score},
+            fm["keyword_gaps"]:  {"rich_text": [{"text": {"content": ", ".join(gaps)}}]},
+        },
+    )
+
+
+def run_match(page_url_or_id: str) -> None:
+    notion, fm = load_notion()
+    page_id = extract_page_id(page_url_or_id)
+
+    print(f"[match] Page ID: {page_id}")
+    job_url = get_job_url_from_notion(notion, page_id, fm["url"])
+    print(f"[match] Fetching job description from: {job_url}")
+
+    job_text = extract_job_description(job_url)
+    resume_text = read_resume_text()
+
+    score, gaps = match_score(resume_text, job_text)
+    print(f"[match] Score: {score}/100")
+    print(f"[match] Keyword gaps: {', '.join(gaps) or 'none'}")
+
+    write_match_to_notion(notion, page_id, score, gaps, fm)
+    print("[match] Written to Notion.")
+
+
+def score_pending_jobs(db_path: Path = None) -> int:
+    """
+    Score all unscored jobs (any status) in SQLite using the description
+    already scraped during discovery. Writes match_score + keyword_gaps back.
+    Returns the number of jobs scored.
+    """
+    from scripts.db import DEFAULT_DB, write_match_scores
+
+    if db_path is None:
+        db_path = DEFAULT_DB
+
+    import sqlite3
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    rows = conn.execute(
+        "SELECT id, title, company, description FROM jobs "
+        "WHERE match_score IS NULL "
+        "AND description IS NOT NULL AND description != '' AND description != 'nan'"
+    ).fetchall()
+    conn.close()
+
+    if not rows:
+        print("[match] No unscored jobs with descriptions found.")
+        return 0
+
+    resume_text = read_resume_text()
+    scored = 0
+    for row in rows:
+        job_id, title, company, description = row["id"], row["title"], row["company"], row["description"]
+        try:
+            score, gaps = match_score(resume_text, description)
+            write_match_scores(db_path, job_id, score, ", ".join(gaps))
+            print(f"[match] {title} @ {company}: {score}/100  gaps: {', '.join(gaps) or 'none'}")
+            scored += 1
+        except Exception as e:
+            print(f"[match] Error scoring job {job_id}: {e}")
+
+    print(f"[match] Done — {scored} jobs scored.")
+    return scored
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        score_pending_jobs()
+    else:
+        run_match(sys.argv[1])
diff --git a/scripts/prepare_training_data.py b/scripts/prepare_training_data.py
new file mode 100644
index 0000000..5b2010b
--- /dev/null
+++ b/scripts/prepare_training_data.py
@@ -0,0 +1,134 @@
+# scripts/prepare_training_data.py
+"""
+Extract training pairs from Alex's cover letter corpus for LoRA fine-tuning.
+
+Outputs a JSONL file where each line is:
+  {"instruction": "Write a cover letter for the [role] position at [company].",
+   "output": "<full letter text>"}
+
+Usage:
+    conda run -n job-seeker python scripts/prepare_training_data.py
+    conda run -n job-seeker python scripts/prepare_training_data.py --output /path/to/out.jsonl
+"""
+import argparse
+import json
+import re
+import sys
+from pathlib import Path
+
+LETTERS_DIR = Path("/Library/Documents/JobSearch")
+# Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter")
+LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]
+DEFAULT_OUTPUT = LETTERS_DIR / "training_data" / "cover_letters.jsonl"
+
+# Patterns that appear in opening sentences to extract role
+ROLE_PATTERNS = [
+    r"apply for (?:the )?(.+?) (?:position|role|opportunity) at",
+    r"apply for (?:the )?(.+?) (?:at|with)\b",
+]
+
+
+def extract_role_from_text(text: str) -> str:
+    """Try to extract the role title from the first ~500 chars of a cover letter."""
+    # Search the opening of the letter, skipping past any greeting line
+    search_text = text[:600]
+    for pattern in ROLE_PATTERNS:
+        m = re.search(pattern, search_text, re.IGNORECASE)
+        if m:
+            role = m.group(1).strip().rstrip(".")
+            # Filter out noise — role should be ≤6 words
+            if 1 <= len(role.split()) <= 6:
+                return role
+    return ""
+
+
+def extract_company_from_filename(stem: str) -> str:
+    """Extract company name from cover letter filename stem."""
+    return re.sub(r"\s*Cover Letter.*", "", stem, flags=re.IGNORECASE).strip()
+
+
+def strip_greeting(text: str) -> str:
+    """Remove the 'Dear X,' line so the output is just the letter body + sign-off."""
+    lines = text.splitlines()
+    for i, line in enumerate(lines):
+        if line.strip().lower().startswith("dear "):
+            # Skip the greeting line and any following blank lines
+            rest = lines[i + 1:]
+            while rest and not rest[0].strip():
+                rest = rest[1:]
+            return "\n".join(rest).strip()
+    return text.strip()
+
+
+def build_records(letters_dir: Path = LETTERS_DIR) -> list[dict]:
+    """Parse all cover letters and return list of training records."""
+    records = []
+    seen: set[Path] = set()
+    all_paths = []
+    for glob in LETTER_GLOBS:
+        for p in letters_dir.glob(glob):
+            if p not in seen:
+                seen.add(p)
+                all_paths.append(p)
+    for path in sorted(all_paths):
+        text = path.read_text(encoding="utf-8", errors="ignore").strip()
+        if not text or len(text) < 100:
+            continue
+
+        company = extract_company_from_filename(path.stem)
+        role = extract_role_from_text(text)
+        body = strip_greeting(text)
+
+        if not role:
+            # Use a generic instruction when role extraction fails
+            instruction = f"Write a cover letter for a position at {company}."
+        else:
+            instruction = f"Write a cover letter for the {role} position at {company}."
+
+        records.append({
+            "instruction": instruction,
+            "output": body,
+            "source_file": path.name,
+        })
+
+    return records
+
+
+def write_jsonl(records: list[dict], output_path: Path) -> None:
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, "w", encoding="utf-8") as f:
+        for record in records:
+            f.write(json.dumps(record, ensure_ascii=False) + "\n")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Prepare LoRA training data from cover letter corpus")
+    parser.add_argument("--output", default=str(DEFAULT_OUTPUT), help="Output JSONL path")
+    parser.add_argument("--letters-dir", default=str(LETTERS_DIR), help="Directory of cover letters")
+    parser.add_argument("--stats", action="store_true", help="Print statistics and exit")
+    args = parser.parse_args()
+
+    records = build_records(Path(args.letters_dir))
+
+    if args.stats:
+        print(f"Total letters: {len(records)}")
+        with_role = sum(1 for r in records if not r["instruction"].startswith("Write a cover letter for a position"))
+        print(f"Role extracted: {with_role}/{len(records)}")
+        avg_len = sum(len(r["output"]) for r in records) / max(len(records), 1)
+        print(f"Avg letter length: {avg_len:.0f} chars")
+        for r in records:
+            print(f"  {r['source_file']!r:55s} → {r['instruction'][:70]}")
+        return
+
+    output_path = Path(args.output)
+    write_jsonl(records, output_path)
+    print(f"Wrote {len(records)} training records to {output_path}")
+    print()
+    print("Next step for LoRA fine-tuning:")
+    print("  1. Download base model: huggingface-cli download meta-llama/Meta-Llama-3.1-8B-Instruct")
+    print("  2. Fine-tune with TRL: see docs/plans/lora-finetune.md (to be created)")
+    print("  3. Or use HuggingFace Jobs: bash scripts/manage-ui.sh — hugging-face-model-trainer skill")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/scrape_url.py b/scripts/scrape_url.py
new file mode 100644
index 0000000..e577fe6
--- /dev/null
+++ b/scripts/scrape_url.py
@@ -0,0 +1,228 @@
+# scripts/scrape_url.py
+"""
+Scrape a job listing from its URL and update the job record.
+
+Supports:
+  - LinkedIn  (guest jobs API — no auth required)
+  - Indeed    (HTML parse)
+  - Glassdoor (JobSpy internal scraper, same as enrich_descriptions.py)
+  - Generic   (JSON-LD → og:tags fallback)
+
+Usage (background task — called by task_runner):
+    from scripts.scrape_url import scrape_job_url
+    scrape_job_url(db_path, job_id)
+"""
+import json
+import re
+import sqlite3
+import sys
+from pathlib import Path
+from typing import Optional
+from urllib.parse import urlparse, urlencode, parse_qsl
+
+import requests
+from bs4 import BeautifulSoup
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from scripts.db import DEFAULT_DB, update_job_fields
+
+_STRIP_PARAMS = {
+    "utm_source", "utm_medium", "utm_campaign", "utm_content", "utm_term",
+    "trk", "trkEmail", "refId", "trackingId", "lipi", "midToken", "midSig",
+    "eid", "otpToken", "ssid", "fmid",
+}
+
+_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
+        "(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
+    )
+}
+_TIMEOUT = 12
+
+
+def _detect_board(url: str) -> str:
+    """Return 'linkedin', 'indeed', 'glassdoor', or 'generic'."""
+    url_lower = url.lower()
+    if "linkedin.com" in url_lower:
+        return "linkedin"
+    if "indeed.com" in url_lower:
+        return "indeed"
+    if "glassdoor.com" in url_lower:
+        return "glassdoor"
+    return "generic"
+
+
+def _extract_linkedin_job_id(url: str) -> Optional[str]:
+    """Extract numeric job ID from a LinkedIn job URL."""
+    m = re.search(r"/jobs/view/(\d+)", url)
+    return m.group(1) if m else None
+
+
+def canonicalize_url(url: str) -> str:
+    """
+    Strip tracking parameters from a job URL and return a clean canonical form.
+
+    LinkedIn:  https://www.linkedin.com/jobs/view/<id>/?trk=...  →  https://www.linkedin.com/jobs/view/<id>/
+    Others:    strips utm_source/utm_medium/utm_campaign/trk/refId/trackingId
+    """
+    url = url.strip()
+    if "linkedin.com" in url.lower():
+        job_id = _extract_linkedin_job_id(url)
+        if job_id:
+            return f"https://www.linkedin.com/jobs/view/{job_id}/"
+    parsed = urlparse(url)
+    clean_qs = urlencode([(k, v) for k, v in parse_qsl(parsed.query) if k not in _STRIP_PARAMS])
+    return parsed._replace(query=clean_qs).geturl()
+
+
+def _scrape_linkedin(url: str) -> dict:
+    """Fetch via LinkedIn guest jobs API (no auth required)."""
+    job_id = _extract_linkedin_job_id(url)
+    if not job_id:
+        return {}
+    api_url = f"https://www.linkedin.com/jobs-guest/jobs/api/jobPosting/{job_id}"
+    resp = requests.get(api_url, headers=_HEADERS, timeout=_TIMEOUT)
+    resp.raise_for_status()
+    soup = BeautifulSoup(resp.text, "html.parser")
+
+    def _text(selector, **kwargs):
+        tag = soup.find(selector, **kwargs)
+        return tag.get_text(strip=True) if tag else ""
+
+    title = _text("h2", class_="top-card-layout__title")
+    company = _text("a", class_="topcard__org-name-link") or _text("span", class_="topcard__org-name-link")
+    location = _text("span", class_="topcard__flavor--bullet")
+    desc_div = soup.find("div", class_="show-more-less-html__markup")
+    description = desc_div.get_text(separator="\n", strip=True) if desc_div else ""
+
+    return {k: v for k, v in {
+        "title": title,
+        "company": company,
+        "location": location,
+        "description": description,
+        "source": "linkedin",
+    }.items() if v}
+
+
+def _scrape_indeed(url: str) -> dict:
+    """Scrape an Indeed job page."""
+    resp = requests.get(url, headers=_HEADERS, timeout=_TIMEOUT)
+    resp.raise_for_status()
+    return _parse_json_ld_or_og(resp.text) or {}
+
+
+def _scrape_glassdoor(url: str) -> dict:
+    """Re-use JobSpy's Glassdoor scraper for description fetch."""
+    m = re.search(r"jl=(\d+)", url)
+    if not m:
+        return {}
+    try:
+        from jobspy.glassdoor import Glassdoor
+        from jobspy.glassdoor.constant import fallback_token, headers
+        from jobspy.model import ScraperInput, Site
+        from jobspy.util import create_session
+
+        scraper = Glassdoor()
+        scraper.base_url = "https://www.glassdoor.com/"
+        scraper.session = create_session(has_retry=True)
+        token = scraper._get_csrf_token()
+        headers["gd-csrf-token"] = token if token else fallback_token
+        scraper.scraper_input = ScraperInput(site_type=[Site.GLASSDOOR])
+        description = scraper._fetch_job_description(int(m.group(1)))
+        return {"description": description} if description else {}
+    except Exception:
+        return {}
+
+
+def _parse_json_ld_or_og(html: str) -> dict:
+    """Extract job fields from JSON-LD structured data, then og: meta tags."""
+    soup = BeautifulSoup(html, "html.parser")
+
+    for script in soup.find_all("script", type="application/ld+json"):
+        try:
+            data = json.loads(script.string or "")
+            if isinstance(data, list):
+                data = next((d for d in data if d.get("@type") == "JobPosting"), {})
+            if data.get("@type") == "JobPosting":
+                org = data.get("hiringOrganization") or {}
+                loc = data.get("jobLocation") or {}
+                if isinstance(loc, list):
+                    loc = loc[0] if loc else {}
+                addr = loc.get("address") or {}
+                location = (
+                    addr.get("addressLocality", "") or
+                    addr.get("addressRegion", "") or
+                    addr.get("addressCountry", "")
+                )
+                return {k: v for k, v in {
+                    "title": data.get("title", ""),
+                    "company": org.get("name", ""),
+                    "location": location,
+                    "description": data.get("description", ""),
+                    "salary": str(data.get("baseSalary", "")) if data.get("baseSalary") else "",
+                }.items() if v}
+        except Exception:
+            continue
+
+    def _meta(prop):
+        tag = soup.find("meta", property=prop) or soup.find("meta", attrs={"name": prop})
+        return tag.get("content", "") if tag else ""
+
+    title_tag = soup.find("title")
+    title = _meta("og:title") or (title_tag.get_text(strip=True) if title_tag else "")
+    description = _meta("og:description")
+    return {k: v for k, v in {"title": title, "description": description}.items() if v}
+
+
+def _scrape_generic(url: str) -> dict:
+    resp = requests.get(url, headers=_HEADERS, timeout=_TIMEOUT)
+    resp.raise_for_status()
+    return _parse_json_ld_or_og(resp.text) or {}
+
+
+def scrape_job_url(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict:
+    """
+    Fetch the job listing at the stored URL and update the job record.
+
+    Returns the dict of fields scraped (may be empty on failure).
+    Does not raise — failures are logged and the job row is left as-is.
+    """
+    if job_id is None:
+        return {}
+
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    row = conn.execute("SELECT url FROM jobs WHERE id=?", (job_id,)).fetchone()
+    conn.close()
+    if not row:
+        return {}
+
+    url = row["url"] or ""
+    if not url.startswith("http"):
+        return {}
+
+    board = _detect_board(url)
+    try:
+        if board == "linkedin":
+            fields = _scrape_linkedin(url)
+        elif board == "indeed":
+            fields = _scrape_indeed(url)
+        elif board == "glassdoor":
+            fields = _scrape_glassdoor(url)
+        else:
+            fields = _scrape_generic(url)
+    except requests.RequestException as exc:
+        print(f"[scrape_url] HTTP error for job {job_id} ({url}): {exc}")
+        return {}
+    except Exception as exc:
+        print(f"[scrape_url] Error scraping job {job_id} ({url}): {exc}")
+        return {}
+
+    if fields:
+        fields.pop("url", None)
+        update_job_fields(db_path, job_id, fields)
+        print(f"[scrape_url] job {job_id}: scraped '{fields.get('title', '?')}' @ {fields.get('company', '?')}")
+
+    return fields
diff --git a/scripts/sync.py b/scripts/sync.py
new file mode 100644
index 0000000..ddb5634
--- /dev/null
+++ b/scripts/sync.py
@@ -0,0 +1,97 @@
+# scripts/sync.py
+"""
+Push approved jobs from SQLite staging to Notion.
+
+Usage:
+    conda run -n job-seeker python scripts/sync.py
+"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+import yaml
+from datetime import datetime
+
+from notion_client import Client
+
+from scripts.db import DEFAULT_DB, get_jobs_by_status, update_job_status
+
+CONFIG_DIR = Path(__file__).parent.parent / "config"
+
+
+def load_notion_config() -> dict:
+    return yaml.safe_load((CONFIG_DIR / "notion.yaml").read_text())
+
+
+def _build_properties(job: dict, fm: dict, include_optional: bool = True) -> dict:
+    """Build the Notion properties dict for a job. Optional fields (match_score,
+    keyword_gaps) are included by default but can be dropped for DBs that don't
+    have those columns yet."""
+    props = {
+        fm["title_field"]: {"title": [{"text": {"content": job.get("salary") or job.get("title", "")}}]},
+        fm["job_title"]:   {"rich_text": [{"text": {"content": job.get("title", "")}}]},
+        fm["company"]:     {"rich_text": [{"text": {"content": job.get("company", "")}}]},
+        fm["url"]:         {"url": job.get("url") or None},
+        fm["source"]:      {"multi_select": [{"name": job.get("source", "unknown").title()}]},
+        fm["status"]:      {"select": {"name": fm["status_new"]}},
+        fm["remote"]:      {"checkbox": bool(job.get("is_remote", 0))},
+        fm["date_found"]:  {"date": {"start": job.get("date_found", datetime.now().isoformat()[:10])}},
+    }
+    if include_optional:
+        score = job.get("match_score")
+        if score is not None and fm.get("match_score"):
+            props[fm["match_score"]] = {"number": score}
+        gaps = job.get("keyword_gaps")
+        if gaps and fm.get("keyword_gaps"):
+            props[fm["keyword_gaps"]] = {"rich_text": [{"text": {"content": gaps}}]}
+    return props
+
+
+def sync_to_notion(db_path: Path = DEFAULT_DB) -> int:
+    """Push all approved and applied jobs to Notion. Returns count synced."""
+    cfg = load_notion_config()
+    notion = Client(auth=cfg["token"])
+    db_id = cfg["database_id"]
+    fm = cfg["field_map"]
+
+    approved = get_jobs_by_status(db_path, "approved")
+    applied = get_jobs_by_status(db_path, "applied")
+    pending_sync = approved + applied
+    if not pending_sync:
+        print("[sync] No approved/applied jobs to sync.")
+        return 0
+
+    synced_ids = []
+    for job in pending_sync:
+        try:
+            notion.pages.create(
+                parent={"database_id": db_id},
+                properties=_build_properties(job, fm, include_optional=True),
+            )
+            synced_ids.append(job["id"])
+            print(f"[sync] + {job.get('title')} @ {job.get('company')}")
+        except Exception as e:
+            err = str(e)
+            # Notion returns 400 validation_error when a property column doesn't exist yet.
+            # Fall back to core fields only and warn the user.
+            if "validation_error" in err or "Could not find property" in err:
+                try:
+                    notion.pages.create(
+                        parent={"database_id": db_id},
+                        properties=_build_properties(job, fm, include_optional=False),
+                    )
+                    synced_ids.append(job["id"])
+                    print(f"[sync] + {job.get('title')} @ {job.get('company')} "
+                          f"(skipped optional fields — add Match Score / Keyword Gaps columns to Notion DB)")
+                except Exception as e2:
+                    print(f"[sync] Error syncing {job.get('url')}: {e2}")
+            else:
+                print(f"[sync] Error syncing {job.get('url')}: {e}")
+
+    update_job_status(db_path, synced_ids, "synced")
+    print(f"[sync] Done — {len(synced_ids)} jobs synced to Notion.")
+    return len(synced_ids)
+
+
+if __name__ == "__main__":
+    sync_to_notion()
diff --git a/scripts/task_runner.py b/scripts/task_runner.py
new file mode 100644
index 0000000..9e6cafd
--- /dev/null
+++ b/scripts/task_runner.py
@@ -0,0 +1,155 @@
+# scripts/task_runner.py
+"""
+Background task runner for LLM generation tasks.
+
+Submitting a task inserts a row in background_tasks and spawns a daemon thread.
+The thread calls the appropriate generator, writes results to existing tables,
+and marks the task completed or failed.
+
+Deduplication: only one queued/running task per (task_type, job_id) is allowed.
+Different task types for the same job run concurrently (e.g. cover letter + research).
+"""
+import sqlite3
+import threading
+from pathlib import Path
+
+from scripts.db import (
+    DEFAULT_DB,
+    insert_task,
+    update_task_status,
+    update_task_stage,
+    update_cover_letter,
+    save_research,
+)
+
+
+def submit_task(db_path: Path = DEFAULT_DB, task_type: str = "",
+                job_id: int = None) -> tuple[int, bool]:
+    """Submit a background LLM task.
+
+    Returns (task_id, True) if a new task was queued and a thread spawned.
+    Returns (existing_id, False) if an identical task is already in-flight.
+    """
+    task_id, is_new = insert_task(db_path, task_type, job_id)
+    if is_new:
+        t = threading.Thread(
+            target=_run_task,
+            args=(db_path, task_id, task_type, job_id),
+            daemon=True,
+        )
+        t.start()
+    return task_id, is_new
+
+
+def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int) -> None:
+    """Thread body: run the generator and persist the result."""
+    # job_id == 0 means a global task (e.g. discovery) with no associated job row.
+    job: dict = {}
+    if job_id:
+        conn = sqlite3.connect(db_path)
+        conn.row_factory = sqlite3.Row
+        row = conn.execute("SELECT * FROM jobs WHERE id=?", (job_id,)).fetchone()
+        conn.close()
+        if row is None:
+            update_task_status(db_path, task_id, "failed", error=f"Job {job_id} not found")
+            return
+        job = dict(row)
+
+    update_task_status(db_path, task_id, "running")
+
+    try:
+        if task_type == "discovery":
+            from scripts.discover import run_discovery
+            new_count = run_discovery(db_path)
+            n = new_count or 0
+            update_task_status(
+                db_path, task_id, "completed",
+                error=f"{n} new listing{'s' if n != 1 else ''} added",
+            )
+            return
+
+        elif task_type == "cover_letter":
+            from scripts.generate_cover_letter import generate
+            result = generate(
+                job.get("title", ""),
+                job.get("company", ""),
+                job.get("description", ""),
+            )
+            update_cover_letter(db_path, job_id, result)
+
+        elif task_type == "company_research":
+            from scripts.company_research import research_company
+            result = research_company(
+                job,
+                on_stage=lambda s: update_task_stage(db_path, task_id, s),
+            )
+            save_research(db_path, job_id=job_id, **result)
+
+        elif task_type == "enrich_descriptions":
+            from scripts.enrich_descriptions import enrich_all_descriptions
+            r = enrich_all_descriptions(db_path)
+            errs = len(r.get("errors", []))
+            msg = (
+                f"{r['succeeded']} description(s) fetched, {r['failed']} failed"
+                + (f", {errs} error(s)" if errs else "")
+            )
+            update_task_status(db_path, task_id, "completed", error=msg)
+            return
+
+        elif task_type == "scrape_url":
+            from scripts.scrape_url import scrape_job_url
+            fields = scrape_job_url(db_path, job_id)
+            title = fields.get("title") or job.get("url", "?")
+            company = fields.get("company", "")
+            msg = f"{title}" + (f" @ {company}" if company else "")
+            update_task_status(db_path, task_id, "completed", error=msg)
+            # Auto-enrich company/salary for Craigslist jobs
+            conn = sqlite3.connect(db_path)
+            conn.row_factory = sqlite3.Row
+            job_row = conn.execute(
+                "SELECT source, company FROM jobs WHERE id=?", (job_id,)
+            ).fetchone()
+            conn.close()
+            if job_row and job_row["source"] == "craigslist" and not job_row["company"]:
+                submit_task(db_path, "enrich_craigslist", job_id)
+            return
+
+        elif task_type == "enrich_craigslist":
+            from scripts.enrich_descriptions import enrich_craigslist_fields
+            extracted = enrich_craigslist_fields(db_path, job_id)
+            company = extracted.get("company", "")
+            msg = f"company={company}" if company else "no company found"
+            update_task_status(db_path, task_id, "completed", error=msg)
+            return
+
+        elif task_type == "email_sync":
+            try:
+                from scripts.imap_sync import sync_all
+                result = sync_all(db_path,
+                                  on_stage=lambda s: update_task_stage(db_path, task_id, s))
+                leads = result.get("new_leads", 0)
+                todo  = result.get("todo_attached", 0)
+                errs  = len(result.get("errors", []))
+                msg = (
+                    f"{result['synced']} jobs updated, "
+                    f"+{result['inbound']} in, +{result['outbound']} out"
+                    + (f", {leads} new lead(s)" if leads else "")
+                    + (f", {todo} todo attached" if todo else "")
+                    + (f", {errs} error(s)" if errs else "")
+                )
+                update_task_status(db_path, task_id, "completed", error=msg)
+                return
+            except FileNotFoundError:
+                update_task_status(db_path, task_id, "failed",
+                                   error="Email not configured — go to Settings → Email")
+                return
+
+        else:
+            raise ValueError(f"Unknown task_type: {task_type!r}")
+
+        update_task_status(db_path, task_id, "completed")
+
+    except BaseException as exc:
+        # BaseException catches SystemExit (from companyScraper sys.exit calls)
+        # in addition to regular exceptions.
+        update_task_status(db_path, task_id, "failed", error=str(exc))
diff --git a/scripts/test_email_classify.py b/scripts/test_email_classify.py
new file mode 100644
index 0000000..8ac47f2
--- /dev/null
+++ b/scripts/test_email_classify.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+"""
+Compare email classifiers across models on a live sample from IMAP.
+
+Usage:
+    conda run -n job-seeker python scripts/test_email_classify.py
+    conda run -n job-seeker python scripts/test_email_classify.py --limit 30
+    conda run -n job-seeker python scripts/test_email_classify.py --dry-run  # phrase filter only, no LLM
+
+Outputs a table: subject | phrase_blocked | phi3 | llama3.1 | vllm
+"""
+import argparse
+import re
+import sys
+from datetime import datetime, timedelta
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from scripts.imap_sync import (
+    load_config, connect, _search_folder, _parse_message,
+    _has_recruitment_keyword, _has_rejection_or_ats_signal,
+    _CLASSIFY_SYSTEM, _CLASSIFY_LABELS,
+    _REJECTION_PHRASES, _SPAM_PHRASES, _ATS_CONFIRM_SUBJECTS, _SPAM_SUBJECT_PREFIXES,
+)
+from scripts.llm_router import LLMRouter
+
+_ROUTER = LLMRouter()
+
+MODELS = {
+    "phi3":    ("phi3:mini",     ["ollama_research"]),
+    "llama3":  ("llama3.1:8b",  ["ollama_research"]),
+    "vllm":    ("__auto__",     ["vllm"]),
+}
+
+BROAD_TERMS = ["interview", "opportunity", "offer letter", "job offer", "application", "recruiting"]
+
+
+def _classify(subject: str, body: str, model_override: str, fallback_order: list) -> str:
+    try:
+        prompt = f"Subject: {subject}\n\nEmail: {body[:600]}"
+        raw = _ROUTER.complete(
+            prompt,
+            system=_CLASSIFY_SYSTEM,
+            model_override=model_override,
+            fallback_order=fallback_order,
+        )
+        text = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL).lower().strip()
+        for label in _CLASSIFY_LABELS:
+            if text.startswith(label) or label in text:
+                return label
+        return f"? ({text[:30]})"
+    except Exception as e:
+        return f"ERR: {e!s:.20}"
+
+
+def _short(s: str, n: int = 55) -> str:
+    return s if len(s) <= n else s[:n - 1] + "…"
+
+
+def _explain_block(subject: str, body: str) -> str:
+    """Return the first phrase/rule that triggered a block."""
+    subject_lower = subject.lower().strip()
+    for p in _SPAM_SUBJECT_PREFIXES:
+        if subject_lower.startswith(p):
+            return f"subject prefix: {p!r}"
+    for p in _ATS_CONFIRM_SUBJECTS:
+        if p in subject_lower:
+            return f"ATS subject: {p!r}"
+    haystack = subject_lower + " " + body[:800].lower()
+    for p in _REJECTION_PHRASES + _SPAM_PHRASES:
+        if p in haystack:
+            return f"phrase: {p!r}"
+    return "unknown"
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--limit", type=int, default=20, help="Max emails to test")
+    parser.add_argument("--days", type=int, default=90)
+    parser.add_argument("--dry-run", action="store_true",
+                        help="Skip LLM calls — show phrase filter only")
+    parser.add_argument("--verbose", action="store_true",
+                        help="Show which phrase triggered each BLOCK")
+    args = parser.parse_args()
+
+    cfg = load_config()
+    since = (datetime.now() - timedelta(days=args.days)).strftime("%d-%b-%Y")
+
+    print(f"Connecting to {cfg.get('host')} …")
+    conn = connect(cfg)
+
+    # Collect unique UIDs across broad terms
+    all_uids: dict[bytes, None] = {}
+    for term in BROAD_TERMS:
+        for uid in _search_folder(conn, "INBOX", f'(SUBJECT "{term}")', since):
+            all_uids[uid] = None
+
+    sample = list(all_uids.keys())[: args.limit]
+    print(f"Fetched {len(all_uids)} matching UIDs, testing {len(sample)}\n")
+
+    # Header
+    if args.dry_run:
+        print(f"{'Subject':<56}  {'RK':3}  {'Phrase':7}")
+        print("-" * 72)
+    else:
+        print(f"{'Subject':<56}  {'RK':3}  {'Phrase':7}  {'phi3':<20}  {'llama3':<20}  {'vllm':<20}")
+        print("-" * 130)
+
+    passed = skipped = 0
+    rows = []
+
+    for uid in sample:
+        parsed = _parse_message(conn, uid)
+        if not parsed:
+            continue
+        subj = parsed["subject"]
+        body = parsed["body"]
+
+        has_rk      = _has_recruitment_keyword(subj)
+        phrase_block = _has_rejection_or_ats_signal(subj, body)
+
+        if args.dry_run:
+            rk_mark = "✓" if has_rk else "✗"
+            pb_mark = "BLOCK" if phrase_block else "pass"
+            line = f"{_short(subj):<56}  {rk_mark:3}  {pb_mark:7}"
+            if phrase_block and args.verbose:
+                reason = _explain_block(subj, body)
+                line += f"  [{reason}]"
+            print(line)
+            continue
+
+        if phrase_block or not has_rk:
+            skipped += 1
+            rk_mark = "✓" if has_rk else "✗"
+            pb_mark = "BLOCK" if phrase_block else "pass"
+            print(f"{_short(subj):<56}  {rk_mark:3}  {pb_mark:7}  {'—':<20}  {'—':<20}  {'—':<20}")
+            continue
+
+        passed += 1
+        results = {}
+        for name, (model, fallback) in MODELS.items():
+            results[name] = _classify(subj, body, model, fallback)
+
+        pb_mark = "pass"
+        print(f"{_short(subj):<56}  {'✓':3}  {pb_mark:7}  "
+              f"{results['phi3']:<20}  {results['llama3']:<20}  {results['vllm']:<20}")
+
+    if not args.dry_run:
+        print(f"\nPhrase-blocked or no-keyword: {skipped}  |  Reached LLMs: {passed}")
+
+    try:
+        conn.logout()
+    except Exception:
+        pass
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/vision_service/environment.yml b/scripts/vision_service/environment.yml
new file mode 100644
index 0000000..bbbe697
--- /dev/null
+++ b/scripts/vision_service/environment.yml
@@ -0,0 +1,17 @@
+name: job-seeker-vision
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - python=3.11
+  - pip
+  - pip:
+    - torch>=2.0.0
+    - torchvision>=0.15.0
+    - transformers>=4.40.0
+    - accelerate>=0.26.0
+    - bitsandbytes>=0.43.0
+    - einops>=0.7.0
+    - Pillow>=10.0.0
+    - fastapi>=0.110.0
+    - "uvicorn[standard]>=0.27.0"
diff --git a/scripts/vision_service/main.py b/scripts/vision_service/main.py
new file mode 100644
index 0000000..0cdbf3d
--- /dev/null
+++ b/scripts/vision_service/main.py
@@ -0,0 +1,98 @@
+"""
+Vision service — moondream2 inference for survey screenshot analysis.
+
+Start: bash scripts/manage-vision.sh start
+Or directly: conda run -n job-seeker-vision uvicorn scripts.vision_service.main:app --port 8002
+
+First run downloads moondream2 from HuggingFace (~1.8GB).
+Model is loaded lazily on first /analyze request and stays resident.
+GPU is used if available (CUDA); falls back to CPU.
+4-bit quantization on GPU keeps VRAM footprint ~1.5GB.
+"""
+import base64
+import io
+
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+
+app = FastAPI(title="Job Seeker Vision Service")
+
+# Module-level model state — lazy loaded on first /analyze request
+_model = None
+_tokenizer = None
+_device = "cpu"
+_loading = False
+
+
+def _load_model() -> None:
+    global _model, _tokenizer, _device, _loading
+    if _model is not None:
+        return
+    _loading = True
+    print("[vision] Loading moondream2…")
+    import torch
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+
+    model_id = "vikhyatk/moondream2"
+    revision = "2025-01-09"
+    _device = "cuda" if torch.cuda.is_available() else "cpu"
+
+    if _device == "cuda":
+        from transformers import BitsAndBytesConfig
+        bnb = BitsAndBytesConfig(load_in_4bit=True)
+        _model = AutoModelForCausalLM.from_pretrained(
+            model_id, revision=revision,
+            quantization_config=bnb,
+            trust_remote_code=True,
+            device_map="auto",
+        )
+    else:
+        _model = AutoModelForCausalLM.from_pretrained(
+            model_id, revision=revision,
+            trust_remote_code=True,
+        )
+        _model.to(_device)
+
+    _tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
+    _loading = False
+    print(f"[vision] moondream2 ready on {_device}")
+
+
+class AnalyzeRequest(BaseModel):
+    prompt: str
+    image_base64: str
+
+
+class AnalyzeResponse(BaseModel):
+    text: str
+
+
+@app.get("/health")
+def health():
+    import torch
+    return {
+        "status": "loading" if _loading else "ok",
+        "model": "moondream2",
+        "gpu": torch.cuda.is_available(),
+        "loaded": _model is not None,
+    }
+
+
+@app.post("/analyze", response_model=AnalyzeResponse)
+def analyze(req: AnalyzeRequest):
+    from PIL import Image
+    import torch
+
+    _load_model()
+
+    try:
+        image_data = base64.b64decode(req.image_base64)
+        image = Image.open(io.BytesIO(image_data)).convert("RGB")
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Invalid image: {e}")
+
+    with torch.no_grad():
+        enc_image = _model.encode_image(image)
+        answer = _model.answer_question(enc_image, req.prompt, _tokenizer)
+
+    return AnalyzeResponse(text=answer)
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_company_research.py b/tests/test_company_research.py
new file mode 100644
index 0000000..ea696dd
--- /dev/null
+++ b/tests/test_company_research.py
@@ -0,0 +1,84 @@
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from scripts.company_research import _score_experiences, _build_resume_context, _load_resume_and_keywords
+
+
+RESUME = {
+    "experience_details": [
+        {
+            "position": "Lead Technical Account Manager",
+            "company": "UpGuard",
+            "employment_period": "10/2022 - 05/2023",
+            "key_responsibilities": [
+                {"r1": "Managed enterprise security accounts worth $2M ARR"},
+                {"r2": "Led QBR cadence with C-suite stakeholders"},
+            ],
+        },
+        {
+            "position": "Founder and Principal Consultant",
+            "company": "M3 Consulting Services",
+            "employment_period": "07/2023 - Present",
+            "key_responsibilities": [
+                {"r1": "Revenue operations consulting for SaaS clients"},
+                {"r2": "Built customer success frameworks"},
+            ],
+        },
+        {
+            "position": "Customer Success Manager",
+            "company": "Generic Co",
+            "employment_period": "01/2020 - 09/2022",
+            "key_responsibilities": [
+                {"r1": "Managed SMB portfolio"},
+            ],
+        },
+    ]
+}
+
+KEYWORDS = ["ARR", "QBR", "enterprise", "security", "stakeholder"]
+JD = "Looking for a TAM with enterprise ARR experience and QBR facilitation skills."
+
+
+def test_score_experiences_returns_sorted():
+    """UpGuard entry should score highest — most keywords present in text and JD."""
+    scored = _score_experiences(RESUME["experience_details"], KEYWORDS, JD)
+    assert scored[0]["company"] == "UpGuard"
+
+
+def test_score_experiences_adds_score_key():
+    """Each returned entry has a 'score' integer key."""
+    scored = _score_experiences(RESUME["experience_details"], KEYWORDS, JD)
+    for e in scored:
+        assert isinstance(e["score"], int)
+
+
+def test_build_resume_context_top2_in_full():
+    """Top 2 experiences appear with full bullet detail."""
+    ctx = _build_resume_context(RESUME, KEYWORDS, JD)
+    assert "Lead Technical Account Manager" in ctx
+    assert "Managed enterprise security accounts" in ctx
+    assert "Founder and Principal Consultant" in ctx
+
+
+def test_build_resume_context_rest_condensed():
+    """Remaining experiences appear as condensed one-liners, not full bullets."""
+    ctx = _build_resume_context(RESUME, KEYWORDS, JD)
+    assert "Also in Alex" in ctx
+    assert "Generic Co" in ctx
+    # Generic Co bullets should NOT appear in full
+    assert "Managed SMB portfolio" not in ctx
+
+
+def test_upguard_nda_low_score():
+    """UpGuard name replaced with 'enterprise security vendor' when score < 3."""
+    ctx = _build_resume_context(RESUME, ["python", "kubernetes"], "python kubernetes devops")
+    assert "enterprise security vendor" in ctx
+
+
+def test_load_resume_and_keywords_returns_lists():
+    """_load_resume_and_keywords returns a tuple of (dict, list[str])."""
+    resume, keywords = _load_resume_and_keywords()
+    assert isinstance(resume, dict)
+    assert isinstance(keywords, list)
+    assert all(isinstance(k, str) for k in keywords)
diff --git a/tests/test_cover_letter.py b/tests/test_cover_letter.py
new file mode 100644
index 0000000..558d261
--- /dev/null
+++ b/tests/test_cover_letter.py
@@ -0,0 +1,120 @@
+# tests/test_cover_letter.py
+import pytest
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+
+# ── prepare_training_data tests ──────────────────────────────────────────────
+
+def test_extract_role_from_text():
+    """extract_role_from_text pulls the role title from the opening sentence."""
+    from scripts.prepare_training_data import extract_role_from_text
+
+    text = "Dear Tailscale Hiring Team,\n\nI'm delighted to apply for the Customer Support Manager position at Tailscale."
+    assert extract_role_from_text(text) == "Customer Support Manager"
+
+
+def test_extract_role_handles_missing():
+    """extract_role_from_text returns empty string if no role found."""
+    from scripts.prepare_training_data import extract_role_from_text
+
+    assert extract_role_from_text("Dear Team,\n\nHello there.") == ""
+
+
+def test_extract_company_from_filename():
+    """extract_company_from_filename strips 'Cover Letter' suffix."""
+    from scripts.prepare_training_data import extract_company_from_filename
+
+    assert extract_company_from_filename("Tailscale Cover Letter") == "Tailscale"
+    assert extract_company_from_filename("Dagster Labs Cover Letter.md") == "Dagster Labs"
+
+
+def test_strip_greeting():
+    """strip_greeting removes the 'Dear X,' line and returns the body."""
+    from scripts.prepare_training_data import strip_greeting
+
+    text = "Dear Hiring Team,\n\nI'm delighted to apply for the CSM role.\n\nBest regards,\nAlex"
+    result = strip_greeting(text)
+    assert result.startswith("I'm delighted")
+    assert "Dear" not in result
+
+
+def test_build_records_from_tmp_corpus(tmp_path):
+    """build_records parses a small corpus directory into training records."""
+    from scripts.prepare_training_data import build_records
+
+    letter = tmp_path / "Acme Corp Cover Letter.md"
+    letter.write_text(
+        "Dear Acme Hiring Team,\n\n"
+        "I'm delighted to apply for the Director of Customer Success position at Acme Corp. "
+        "With six years of experience, I bring strong skills.\n\n"
+        "Best regards,\nAlex Rivera"
+    )
+
+    records = build_records(tmp_path)
+    assert len(records) == 1
+    assert "Acme Corp" in records[0]["instruction"]
+    assert "Director of Customer Success" in records[0]["instruction"]
+    assert records[0]["output"].startswith("I'm delighted")
+
+
+def test_build_records_skips_empty_files(tmp_path):
+    """build_records ignores empty or very short files."""
+    from scripts.prepare_training_data import build_records
+
+    (tmp_path / "Empty Cover Letter.md").write_text("")
+    (tmp_path / "Tiny Cover Letter.md").write_text("Hi")
+
+    records = build_records(tmp_path)
+    assert len(records) == 0
+
+
+# ── generate_cover_letter tests ───────────────────────────────────────────────
+
+def test_find_similar_letters_returns_top_k():
+    """find_similar_letters returns at most top_k entries."""
+    from scripts.generate_cover_letter import find_similar_letters
+
+    corpus = [
+        {"company": "Acme", "text": "customer success technical account management SaaS"},
+        {"company": "Beta", "text": "software engineering backend python"},
+        {"company": "Gamma", "text": "customer onboarding enterprise NPS"},
+        {"company": "Delta", "text": "customer success manager renewal QBR"},
+    ]
+    results = find_similar_letters("customer success manager enterprise SaaS", corpus, top_k=2)
+    assert len(results) == 2
+    # Should prefer customer success companies over software engineering
+    companies = [r["company"] for r in results]
+    assert "Beta" not in companies
+
+
+def test_load_corpus_returns_list():
+    """load_corpus returns a list (may be empty if LETTERS_DIR absent, must not crash)."""
+    from scripts.generate_cover_letter import load_corpus, LETTERS_DIR
+
+    if LETTERS_DIR.exists():
+        corpus = load_corpus()
+        assert isinstance(corpus, list)
+        if corpus:
+            assert "company" in corpus[0]
+            assert "text" in corpus[0]
+    else:
+        pytest.skip("LETTERS_DIR not present in this environment")
+
+
+def test_generate_calls_llm_router():
+    """generate() calls the router's complete() and returns its output."""
+    from scripts.generate_cover_letter import generate
+
+    fake_corpus = [
+        {"company": "Acme", "text": "I'm delighted to apply for the CSM role at Acme."},
+    ]
+    mock_router = MagicMock()
+    mock_router.complete.return_value = "Dear Hiring Team,\n\nI'm delighted to apply.\n\nWarm regards,\nAlex Rivera"
+
+    with patch("scripts.generate_cover_letter.load_corpus", return_value=fake_corpus):
+        result = generate("Customer Success Manager", "TestCo", "Looking for a CSM",
+                          _router=mock_router)
+
+    mock_router.complete.assert_called_once()
+    assert "Alex Rivera" in result
diff --git a/tests/test_craigslist.py b/tests/test_craigslist.py
new file mode 100644
index 0000000..1fccaf4
--- /dev/null
+++ b/tests/test_craigslist.py
@@ -0,0 +1,211 @@
+"""Tests for Craigslist RSS scraper."""
+from datetime import datetime, timezone, timedelta
+from email.utils import format_datetime
+from unittest.mock import patch, MagicMock
+import xml.etree.ElementTree as ET
+
+import pytest
+import requests
+
+
+# ── RSS fixture helpers ────────────────────────────────────────────────────────
+
+def _make_rss(items: list[dict]) -> bytes:
+    """Build minimal Craigslist-style RSS XML from a list of item dicts."""
+    channel = ET.Element("channel")
+    for item_data in items:
+        item = ET.SubElement(channel, "item")
+        for tag, value in item_data.items():
+            el = ET.SubElement(item, tag)
+            el.text = value
+    rss = ET.Element("rss")
+    rss.append(channel)
+    return ET.tostring(rss, encoding="utf-8", xml_declaration=True)
+
+
+def _pubdate(hours_ago: float = 1.0) -> str:
+    """Return an RFC 2822 pubDate string for N hours ago."""
+    dt = datetime.now(tz=timezone.utc) - timedelta(hours=hours_ago)
+    return format_datetime(dt)
+
+
+def _mock_resp(content: bytes, status_code: int = 200) -> MagicMock:
+    mock = MagicMock()
+    mock.status_code = status_code
+    mock.content = content
+    mock.raise_for_status = MagicMock()
+    if status_code >= 400:
+        mock.raise_for_status.side_effect = requests.HTTPError(f"HTTP {status_code}")
+    return mock
+
+
+# ── Fixtures ──────────────────────────────────────────────────────────────────
+
+_SAMPLE_RSS = _make_rss([{
+    "title": "Customer Success Manager",
+    "link": "https://sfbay.craigslist.org/jjj/d/csm-role/1234567890.html",
+    "description": "Great CSM role at Acme Corp. Salary $120k.",
+    "pubDate": _pubdate(1),
+}])
+
+_TWO_ITEM_RSS = _make_rss([
+    {
+        "title": "Customer Success Manager",
+        "link": "https://sfbay.craigslist.org/jjj/d/csm-role/1111111111.html",
+        "description": "CSM role 1.",
+        "pubDate": _pubdate(1),
+    },
+    {
+        "title": "Account Manager",
+        "link": "https://sfbay.craigslist.org/jjj/d/am-role/2222222222.html",
+        "description": "AM role.",
+        "pubDate": _pubdate(2),
+    },
+])
+
+_OLD_ITEM_RSS = _make_rss([{
+    "title": "Old Job",
+    "link": "https://sfbay.craigslist.org/jjj/d/old-job/9999999999.html",
+    "description": "Very old posting.",
+    "pubDate": _pubdate(hours_ago=500),
+}])
+
+_TWO_METRO_CONFIG = {
+    "metros": ["sfbay", "newyork"],
+    "location_map": {
+        "San Francisco Bay Area, CA": "sfbay",
+        "New York, NY": "newyork",
+    },
+    "category": "jjj",
+}
+
+_SINGLE_METRO_CONFIG = {
+    "metros": ["sfbay"],
+    "location_map": {"San Francisco Bay Area, CA": "sfbay"},
+}
+
+_PROFILE = {"titles": ["Customer Success Manager"], "hours_old": 240}
+
+
+# ── Tests ─────────────────────────────────────────────────────────────────────
+
+def test_scrape_returns_empty_on_missing_config():
+    """Missing craigslist.yaml → returns [] without raising."""
+    from scripts.custom_boards import craigslist
+    with patch("scripts.custom_boards.craigslist._load_config",
+               side_effect=FileNotFoundError("config not found")):
+        result = craigslist.scrape(_PROFILE, "San Francisco Bay Area, CA")
+    assert result == []
+
+
+def test_scrape_remote_hits_all_metros():
+    """location='Remote' triggers one RSS fetch per configured metro."""
+    with patch("scripts.custom_boards.craigslist._load_config",
+               return_value=_TWO_METRO_CONFIG):
+        with patch("scripts.custom_boards.craigslist.requests.get",
+                   return_value=_mock_resp(_SAMPLE_RSS)) as mock_get:
+            from scripts.custom_boards import craigslist
+            result = craigslist.scrape(_PROFILE, "Remote")
+
+    assert mock_get.call_count == 2
+    fetched_urls = [call.args[0] for call in mock_get.call_args_list]
+    assert any("sfbay" in u for u in fetched_urls)
+    assert any("newyork" in u for u in fetched_urls)
+    assert all(r["is_remote"] for r in result)
+
+
+def test_scrape_location_map_resolves():
+    """Known location string maps to exactly one metro."""
+    with patch("scripts.custom_boards.craigslist._load_config",
+               return_value=_TWO_METRO_CONFIG):
+        with patch("scripts.custom_boards.craigslist.requests.get",
+                   return_value=_mock_resp(_SAMPLE_RSS)) as mock_get:
+            from scripts.custom_boards import craigslist
+            result = craigslist.scrape(_PROFILE, "San Francisco Bay Area, CA")
+
+    assert mock_get.call_count == 1
+    assert "sfbay" in mock_get.call_args.args[0]
+    assert len(result) == 1
+    assert result[0]["is_remote"] is False
+
+
+def test_scrape_location_not_in_map_returns_empty():
+    """Location not in location_map → [] without raising."""
+    with patch("scripts.custom_boards.craigslist._load_config",
+               return_value=_SINGLE_METRO_CONFIG):
+        with patch("scripts.custom_boards.craigslist.requests.get") as mock_get:
+            from scripts.custom_boards import craigslist
+            result = craigslist.scrape(_PROFILE, "Portland, OR")
+
+    assert result == []
+    mock_get.assert_not_called()
+
+
+def test_hours_old_filter():
+    """Items older than hours_old are excluded."""
+    profile = {"titles": ["Customer Success Manager"], "hours_old": 48}
+    with patch("scripts.custom_boards.craigslist._load_config",
+               return_value=_SINGLE_METRO_CONFIG):
+        with patch("scripts.custom_boards.craigslist.requests.get",
+                   return_value=_mock_resp(_OLD_ITEM_RSS)):
+            from scripts.custom_boards import craigslist
+            result = craigslist.scrape(profile, "San Francisco Bay Area, CA")
+
+    assert result == []
+
+
+def test_dedup_within_run():
+    """Same URL from two different metros is only returned once."""
+    same_url_rss = _make_rss([{
+        "title": "CSM Role",
+        "link": "https://sfbay.craigslist.org/jjj/d/csm/1234.html",
+        "description": "Same job.",
+        "pubDate": _pubdate(1),
+    }])
+    with patch("scripts.custom_boards.craigslist._load_config",
+               return_value=_TWO_METRO_CONFIG):
+        with patch("scripts.custom_boards.craigslist.requests.get",
+                   return_value=_mock_resp(same_url_rss)):
+            from scripts.custom_boards import craigslist
+            result = craigslist.scrape(_PROFILE, "Remote")
+
+    urls = [r["url"] for r in result]
+    assert len(urls) == len(set(urls))
+
+
+def test_http_error_graceful():
+    """HTTP error → [] without raising."""
+    with patch("scripts.custom_boards.craigslist._load_config",
+               return_value=_SINGLE_METRO_CONFIG):
+        with patch("scripts.custom_boards.craigslist.requests.get",
+                   side_effect=requests.RequestException("timeout")):
+            from scripts.custom_boards import craigslist
+            result = craigslist.scrape(_PROFILE, "San Francisco Bay Area, CA")
+
+    assert result == []
+
+
+def test_malformed_xml_graceful():
+    """Malformed RSS XML → [] without raising."""
+    bad_resp = MagicMock()
+    bad_resp.content = b"this is not xml <<<<"
+    bad_resp.raise_for_status = MagicMock()
+    with patch("scripts.custom_boards.craigslist._load_config",
+               return_value=_SINGLE_METRO_CONFIG):
+        with patch("scripts.custom_boards.craigslist.requests.get",
+                   return_value=bad_resp):
+            from scripts.custom_boards import craigslist
+            result = craigslist.scrape(_PROFILE, "San Francisco Bay Area, CA")
+    assert result == []
+
+
+def test_results_wanted_cap():
+    """Never returns more than results_wanted items."""
+    with patch("scripts.custom_boards.craigslist._load_config",
+               return_value=_TWO_METRO_CONFIG):
+        with patch("scripts.custom_boards.craigslist.requests.get",
+                   return_value=_mock_resp(_TWO_ITEM_RSS)):
+            from scripts.custom_boards import craigslist
+            result = craigslist.scrape(_PROFILE, "Remote", results_wanted=1)
+
+    assert len(result) <= 1
diff --git a/tests/test_db.py b/tests/test_db.py
new file mode 100644
index 0000000..95e7ca7
--- /dev/null
+++ b/tests/test_db.py
@@ -0,0 +1,560 @@
+import pytest
+import sqlite3
+from pathlib import Path
+from unittest.mock import patch
+
+
+def test_init_db_creates_jobs_table(tmp_path):
+    """init_db creates a jobs table with correct schema."""
+    from scripts.db import init_db
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    conn = sqlite3.connect(db_path)
+    cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='jobs'")
+    assert cursor.fetchone() is not None
+    conn.close()
+
+
+def test_insert_job_returns_id(tmp_path):
+    """insert_job inserts a row and returns its id."""
+    from scripts.db import init_db, insert_job
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job = {
+        "title": "CSM", "company": "Acme", "url": "https://example.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "$100k", "description": "Great role", "date_found": "2026-02-20",
+    }
+    row_id = insert_job(db_path, job)
+    assert isinstance(row_id, int)
+    assert row_id > 0
+
+
+def test_insert_job_skips_duplicate_url(tmp_path):
+    """insert_job returns None if URL already exists."""
+    from scripts.db import init_db, insert_job
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job = {"title": "CSM", "company": "Acme", "url": "https://example.com/1",
+           "source": "linkedin", "location": "Remote", "is_remote": True,
+           "salary": "", "description": "", "date_found": "2026-02-20"}
+    insert_job(db_path, job)
+    result = insert_job(db_path, job)
+    assert result is None
+
+
+def test_get_jobs_by_status(tmp_path):
+    """get_jobs_by_status returns only jobs with matching status."""
+    from scripts.db import init_db, insert_job, get_jobs_by_status, update_job_status
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job = {"title": "CSM", "company": "Acme", "url": "https://example.com/1",
+           "source": "linkedin", "location": "Remote", "is_remote": True,
+           "salary": "", "description": "", "date_found": "2026-02-20"}
+    row_id = insert_job(db_path, job)
+    update_job_status(db_path, [row_id], "approved")
+    approved = get_jobs_by_status(db_path, "approved")
+    pending = get_jobs_by_status(db_path, "pending")
+    assert len(approved) == 1
+    assert len(pending) == 0
+
+
+def test_update_job_status_batch(tmp_path):
+    """update_job_status updates multiple rows at once."""
+    from scripts.db import init_db, insert_job, update_job_status, get_jobs_by_status
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    ids = []
+    for i in range(3):
+        job = {"title": f"Job {i}", "company": "Co", "url": f"https://example.com/{i}",
+               "source": "indeed", "location": "Remote", "is_remote": True,
+               "salary": "", "description": "", "date_found": "2026-02-20"}
+        ids.append(insert_job(db_path, job))
+    update_job_status(db_path, ids, "rejected")
+    assert len(get_jobs_by_status(db_path, "rejected")) == 3
+
+
+def test_migrate_db_adds_columns_to_existing_db(tmp_path):
+    """_migrate_db adds cover_letter and applied_at to a db created without them."""
+    import sqlite3
+    from scripts.db import _migrate_db
+    db_path = tmp_path / "legacy.db"
+    # Create old-style table without the new columns
+    conn = sqlite3.connect(db_path)
+    conn.execute("""CREATE TABLE jobs (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        title TEXT, company TEXT, url TEXT UNIQUE, status TEXT DEFAULT 'pending'
+    )""")
+    conn.commit()
+    conn.close()
+    _migrate_db(db_path)
+    conn = sqlite3.connect(db_path)
+    cols = {row[1] for row in conn.execute("PRAGMA table_info(jobs)").fetchall()}
+    conn.close()
+    assert "cover_letter" in cols
+    assert "applied_at" in cols
+
+
+def test_update_cover_letter(tmp_path):
+    """update_cover_letter persists text to the DB."""
+    from scripts.db import init_db, insert_job, update_cover_letter, get_jobs_by_status
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-20",
+    })
+    update_cover_letter(db_path, job_id, "Dear Hiring Manager,\nGreat role!")
+    rows = get_jobs_by_status(db_path, "pending")
+    assert rows[0]["cover_letter"] == "Dear Hiring Manager,\nGreat role!"
+
+
+def test_mark_applied_sets_status_and_date(tmp_path):
+    """mark_applied sets status='applied' and populates applied_at."""
+    from scripts.db import init_db, insert_job, mark_applied, get_jobs_by_status
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-20",
+    })
+    mark_applied(db_path, [job_id])
+    applied = get_jobs_by_status(db_path, "applied")
+    assert len(applied) == 1
+    assert applied[0]["status"] == "applied"
+    assert applied[0]["applied_at"] is not None
+
+
+# ── background_tasks tests ────────────────────────────────────────────────────
+
+def test_init_db_creates_background_tasks_table(tmp_path):
+    """init_db creates a background_tasks table."""
+    from scripts.db import init_db
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    import sqlite3
+    conn = sqlite3.connect(db_path)
+    cur = conn.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='background_tasks'"
+    )
+    assert cur.fetchone() is not None
+    conn.close()
+
+
+def test_insert_task_returns_id_and_true(tmp_path):
+    """insert_task returns (task_id, True) for a new task."""
+    from scripts.db import init_db, insert_job, insert_task
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-20",
+    })
+    task_id, is_new = insert_task(db_path, "cover_letter", job_id)
+    assert isinstance(task_id, int) and task_id > 0
+    assert is_new is True
+
+
+def test_insert_task_deduplicates_active_task(tmp_path):
+    """insert_task returns (existing_id, False) if a queued/running task already exists."""
+    from scripts.db import init_db, insert_job, insert_task
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-20",
+    })
+    first_id, _ = insert_task(db_path, "cover_letter", job_id)
+    second_id, is_new = insert_task(db_path, "cover_letter", job_id)
+    assert second_id == first_id
+    assert is_new is False
+
+
+def test_insert_task_allows_different_types_same_job(tmp_path):
+    """insert_task allows cover_letter and company_research for the same job concurrently."""
+    from scripts.db import init_db, insert_job, insert_task
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-20",
+    })
+    _, cl_new = insert_task(db_path, "cover_letter", job_id)
+    _, res_new = insert_task(db_path, "company_research", job_id)
+    assert cl_new is True
+    assert res_new is True
+
+
+def test_update_task_status_running(tmp_path):
+    """update_task_status('running') sets started_at."""
+    from scripts.db import init_db, insert_job, insert_task, update_task_status
+    import sqlite3
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-20",
+    })
+    task_id, _ = insert_task(db_path, "cover_letter", job_id)
+    update_task_status(db_path, task_id, "running")
+    conn = sqlite3.connect(db_path)
+    row = conn.execute("SELECT status, started_at FROM background_tasks WHERE id=?", (task_id,)).fetchone()
+    conn.close()
+    assert row[0] == "running"
+    assert row[1] is not None
+
+
+def test_update_task_status_completed(tmp_path):
+    """update_task_status('completed') sets finished_at."""
+    from scripts.db import init_db, insert_job, insert_task, update_task_status
+    import sqlite3
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-20",
+    })
+    task_id, _ = insert_task(db_path, "cover_letter", job_id)
+    update_task_status(db_path, task_id, "completed")
+    conn = sqlite3.connect(db_path)
+    row = conn.execute("SELECT status, finished_at FROM background_tasks WHERE id=?", (task_id,)).fetchone()
+    conn.close()
+    assert row[0] == "completed"
+    assert row[1] is not None
+
+
+def test_update_task_status_failed_stores_error(tmp_path):
+    """update_task_status('failed') stores error message and sets finished_at."""
+    from scripts.db import init_db, insert_job, insert_task, update_task_status
+    import sqlite3
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-20",
+    })
+    task_id, _ = insert_task(db_path, "cover_letter", job_id)
+    update_task_status(db_path, task_id, "failed", error="LLM timeout")
+    conn = sqlite3.connect(db_path)
+    row = conn.execute("SELECT status, error, finished_at FROM background_tasks WHERE id=?", (task_id,)).fetchone()
+    conn.close()
+    assert row[0] == "failed"
+    assert row[1] == "LLM timeout"
+    assert row[2] is not None
+
+
+def test_get_active_tasks_returns_only_active(tmp_path):
+    """get_active_tasks returns only queued/running tasks with job info joined."""
+    from scripts.db import init_db, insert_job, insert_task, update_task_status, get_active_tasks
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-20",
+    })
+    active_id, _ = insert_task(db_path, "cover_letter", job_id)
+    done_id, _ = insert_task(db_path, "company_research", job_id)
+    update_task_status(db_path, done_id, "completed")
+
+    tasks = get_active_tasks(db_path)
+    assert len(tasks) == 1
+    assert tasks[0]["id"] == active_id
+    assert tasks[0]["company"] == "Acme"
+    assert tasks[0]["title"] == "CSM"
+
+
+def test_get_task_for_job_returns_latest(tmp_path):
+    """get_task_for_job returns the most recent task for the given type+job."""
+    from scripts.db import init_db, insert_job, insert_task, update_task_status, get_task_for_job
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-20",
+    })
+    first_id, _ = insert_task(db_path, "cover_letter", job_id)
+    update_task_status(db_path, first_id, "completed")
+    second_id, _ = insert_task(db_path, "cover_letter", job_id)  # allowed since first is done
+
+    task = get_task_for_job(db_path, "cover_letter", job_id)
+    assert task is not None
+    assert task["id"] == second_id
+
+
+def test_get_task_for_job_returns_none_when_absent(tmp_path):
+    """get_task_for_job returns None when no task exists for that job+type."""
+    from scripts.db import init_db, insert_job, get_task_for_job
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-20",
+    })
+    assert get_task_for_job(db_path, "cover_letter", job_id) is None
+
+
+# ── company_research new-column tests ─────────────────────────────────────────
+
+def test_company_research_has_new_columns(tmp_path):
+    """init_db creates company_research with the four extended columns."""
+    from scripts.db import init_db
+    db = tmp_path / "test.db"
+    init_db(db)
+    conn = sqlite3.connect(db)
+    cols = [r[1] for r in conn.execute("PRAGMA table_info(company_research)").fetchall()]
+    conn.close()
+    assert "tech_brief" in cols
+    assert "funding_brief" in cols
+    assert "competitors_brief" in cols
+    assert "red_flags" in cols
+
+def test_save_and_get_research_new_fields(tmp_path):
+    """save_research persists and get_research returns the four new brief fields."""
+    from scripts.db import init_db, insert_job, save_research, get_research
+    db = tmp_path / "test.db"
+    init_db(db)
+    job_id = insert_job(db, {
+        "title": "TAM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-21",
+    })
+
+    save_research(db, job_id=job_id,
+                  company_brief="overview", ceo_brief="ceo",
+                  talking_points="points", raw_output="raw",
+                  tech_brief="tech stack", funding_brief="series B",
+                  competitors_brief="vs competitors", red_flags="none")
+    r = get_research(db, job_id=job_id)
+    assert r["tech_brief"] == "tech stack"
+    assert r["funding_brief"] == "series B"
+    assert r["competitors_brief"] == "vs competitors"
+    assert r["red_flags"] == "none"
+
+
+# ── stage_signal / suggestion_dismissed tests ─────────────────────────────────
+
+def test_stage_signal_columns_exist(tmp_path):
+    """init_db creates stage_signal and suggestion_dismissed columns on job_contacts."""
+    from scripts.db import init_db
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    conn = sqlite3.connect(db_path)
+    cols = {row[1] for row in conn.execute("PRAGMA table_info(job_contacts)").fetchall()}
+    conn.close()
+    assert "stage_signal" in cols
+    assert "suggestion_dismissed" in cols
+
+
+def test_add_contact_with_stage_signal(tmp_path):
+    """add_contact stores stage_signal when provided."""
+    from scripts.db import init_db, insert_job, add_contact, get_contacts
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-21",
+    })
+    add_contact(db_path, job_id=job_id, direction="inbound",
+                subject="Interview invite", stage_signal="interview_scheduled")
+    contacts = get_contacts(db_path, job_id=job_id)
+    assert contacts[0]["stage_signal"] == "interview_scheduled"
+
+
+def test_get_unread_stage_signals(tmp_path):
+    """get_unread_stage_signals returns only non-neutral, non-dismissed signals."""
+    from scripts.db import (init_db, insert_job, add_contact,
+                            get_unread_stage_signals, dismiss_stage_signal)
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-21",
+    })
+    c1 = add_contact(db_path, job_id=job_id, direction="inbound",
+                     subject="Interview invite", stage_signal="interview_scheduled")
+    add_contact(db_path, job_id=job_id, direction="inbound",
+                subject="Auto-confirm", stage_signal="neutral")
+    signals = get_unread_stage_signals(db_path, job_id)
+    assert len(signals) == 1
+    assert signals[0]["stage_signal"] == "interview_scheduled"
+
+    dismiss_stage_signal(db_path, c1)
+    assert get_unread_stage_signals(db_path, job_id) == []
+
+
+def test_get_email_leads(tmp_path):
+    """get_email_leads returns only source='email' pending jobs."""
+    from scripts.db import init_db, insert_job, get_email_leads
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-21",
+    })
+    insert_job(db_path, {
+        "title": "TAM", "company": "Wiz", "url": "email://wiz.com/abc123",
+        "source": "email", "location": "", "is_remote": 0,
+        "salary": "", "description": "Hi Alex…", "date_found": "2026-02-21",
+    })
+    leads = get_email_leads(db_path)
+    assert len(leads) == 1
+    assert leads[0]["company"] == "Wiz"
+    assert leads[0]["source"] == "email"
+
+
+def test_get_all_message_ids(tmp_path):
+    """get_all_message_ids returns all message IDs across jobs."""
+    from scripts.db import init_db, insert_job, add_contact, get_all_message_ids
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-21",
+    })
+    add_contact(db_path, job_id=job_id, message_id="<msg-001@acme.com>")
+    add_contact(db_path, job_id=job_id, message_id="<msg-002@acme.com>")
+    mids = get_all_message_ids(db_path)
+    assert "<msg-001@acme.com>" in mids
+    assert "<msg-002@acme.com>" in mids
+
+
+# ── survey_responses tests ────────────────────────────────────────────────────
+
+def test_survey_responses_table_created(tmp_path):
+    """init_db creates survey_responses table."""
+    from scripts.db import init_db
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    import sqlite3
+    conn = sqlite3.connect(db_path)
+    cur = conn.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='survey_responses'"
+    )
+    assert cur.fetchone() is not None
+    conn.close()
+
+
+def test_survey_at_column_exists(tmp_path):
+    """jobs table has survey_at column after init_db."""
+    from scripts.db import init_db
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    import sqlite3
+    conn = sqlite3.connect(db_path)
+    cols = [row[1] for row in conn.execute("PRAGMA table_info(jobs)").fetchall()]
+    assert "survey_at" in cols
+    conn.close()
+
+
+def test_insert_and_get_survey_response(tmp_path):
+    """insert_survey_response inserts a row; get_survey_responses returns it."""
+    from scripts.db import init_db, insert_job, insert_survey_response, get_survey_responses
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-23",
+    })
+    row_id = insert_survey_response(
+        db_path, job_id=job_id, survey_name="Culture Fit",
+        source="text_paste", raw_input="Q1: A B C", mode="quick",
+        llm_output="1. B — collaborative", reported_score="82%",
+    )
+    assert isinstance(row_id, int)
+    responses = get_survey_responses(db_path, job_id=job_id)
+    assert len(responses) == 1
+    assert responses[0]["survey_name"] == "Culture Fit"
+    assert responses[0]["reported_score"] == "82%"
+
+
+def test_get_interview_jobs_includes_survey(tmp_path):
+    """get_interview_jobs returns survey-stage jobs."""
+    from scripts.db import init_db, insert_job, update_job_status, get_interview_jobs
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/2",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-23",
+    })
+    update_job_status(db_path, [job_id], "survey")
+    result = get_interview_jobs(db_path)
+    assert any(j["id"] == job_id for j in result.get("survey", []))
+
+
+def test_advance_to_survey_sets_survey_at(tmp_path):
+    """advance_to_stage('survey') sets survey_at timestamp."""
+    from scripts.db import init_db, insert_job, update_job_status, advance_to_stage, get_job_by_id
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/3",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-23",
+    })
+    update_job_status(db_path, [job_id], "applied")
+    advance_to_stage(db_path, job_id=job_id, stage="survey")
+    job = get_job_by_id(db_path, job_id=job_id)
+    assert job["status"] == "survey"
+    assert job["survey_at"] is not None
+
+
+def test_update_job_fields(tmp_path):
+    from scripts.db import init_db, insert_job, update_job_fields
+    db = tmp_path / "test.db"
+    init_db(db)
+    job_id = insert_job(db, {
+        "title": "Importing…", "company": "", "url": "https://example.com/job/1",
+        "source": "manual", "location": "", "description": "", "date_found": "2026-02-24",
+    })
+    update_job_fields(db, job_id, {
+        "title": "Customer Success Manager",
+        "company": "Acme Corp",
+        "location": "San Francisco, CA",
+        "description": "Great role.",
+        "salary": "$120k",
+        "is_remote": 1,
+    })
+    import sqlite3
+    conn = sqlite3.connect(db)
+    conn.row_factory = sqlite3.Row
+    row = dict(conn.execute("SELECT * FROM jobs WHERE id=?", (job_id,)).fetchone())
+    conn.close()
+    assert row["title"] == "Customer Success Manager"
+    assert row["company"] == "Acme Corp"
+    assert row["description"] == "Great role."
+    assert row["is_remote"] == 1
+
+
+def test_update_job_fields_ignores_unknown_columns(tmp_path):
+    from scripts.db import init_db, insert_job, update_job_fields
+    db = tmp_path / "test.db"
+    init_db(db)
+    job_id = insert_job(db, {
+        "title": "Importing…", "company": "", "url": "https://example.com/job/2",
+        "source": "manual", "location": "", "description": "", "date_found": "2026-02-24",
+    })
+    # Should not raise even with an unknown column
+    update_job_fields(db, job_id, {"title": "Real Title", "nonexistent_col": "ignored"})
+    import sqlite3
+    conn = sqlite3.connect(db)
+    conn.row_factory = sqlite3.Row
+    row = dict(conn.execute("SELECT * FROM jobs WHERE id=?", (job_id,)).fetchone())
+    conn.close()
+    assert row["title"] == "Real Title"
diff --git a/tests/test_discover.py b/tests/test_discover.py
new file mode 100644
index 0000000..4cc0fee
--- /dev/null
+++ b/tests/test_discover.py
@@ -0,0 +1,185 @@
+# tests/test_discover.py
+import pytest
+from unittest.mock import patch, MagicMock
+import pandas as pd
+from pathlib import Path
+
+SAMPLE_JOB = {
+    "title": "Customer Success Manager",
+    "company": "Acme Corp",
+    "location": "Remote",
+    "is_remote": True,
+    "job_url": "https://linkedin.com/jobs/view/123456",
+    "site": "linkedin",
+    "min_amount": 90000,
+    "max_amount": 120000,
+    "salary_source": "$90,000 - $120,000",
+    "description": "Great CS role",
+}
+
+SAMPLE_FM = {
+    "title_field": "Salary", "job_title": "Job Title", "company": "Company Name",
+    "url": "Role Link", "source": "Job Source", "status": "Status of Application",
+    "status_new": "Application Submitted", "date_found": "Date Found",
+    "remote": "Remote", "match_score": "Match Score",
+    "keyword_gaps": "Keyword Gaps", "notes": "Notes", "job_description": "Job Description",
+}
+
+SAMPLE_NOTION_CFG = {"token": "secret_test", "database_id": "fake-db-id", "field_map": SAMPLE_FM}
+SAMPLE_PROFILES_CFG = {
+    "profiles": [{"name": "cs", "titles": ["Customer Success Manager"],
+                  "locations": ["Remote"], "boards": ["linkedin"],
+                  "results_per_board": 5, "hours_old": 72}]
+}
+
+
+def make_jobs_df(jobs=None):
+    return pd.DataFrame(jobs or [SAMPLE_JOB])
+
+
+def test_discover_writes_to_sqlite(tmp_path):
+    """run_discovery inserts new jobs into SQLite staging db."""
+    from scripts.discover import run_discovery
+    from scripts.db import get_jobs_by_status
+
+    db_path = tmp_path / "test.db"
+    with patch("scripts.discover.load_config", return_value=(SAMPLE_PROFILES_CFG, SAMPLE_NOTION_CFG)), \
+         patch("scripts.discover.scrape_jobs", return_value=make_jobs_df()), \
+         patch("scripts.discover.Client"):
+        run_discovery(db_path=db_path)
+
+    jobs = get_jobs_by_status(db_path, "pending")
+    assert len(jobs) == 1
+    assert jobs[0]["title"] == "Customer Success Manager"
+
+
+def test_discover_skips_duplicate_urls(tmp_path):
+    """run_discovery does not insert a job whose URL is already in SQLite."""
+    from scripts.discover import run_discovery
+    from scripts.db import init_db, insert_job, get_jobs_by_status
+
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    insert_job(db_path, {
+        "title": "Old", "company": "X", "url": "https://linkedin.com/jobs/view/123456",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-01-01",
+    })
+
+    with patch("scripts.discover.load_config", return_value=(SAMPLE_PROFILES_CFG, SAMPLE_NOTION_CFG)), \
+         patch("scripts.discover.scrape_jobs", return_value=make_jobs_df()), \
+         patch("scripts.discover.Client"):
+        run_discovery(db_path=db_path)
+
+    jobs = get_jobs_by_status(db_path, "pending")
+    assert len(jobs) == 1  # only the pre-existing one, not a duplicate
+
+
+def test_discover_pushes_new_jobs(tmp_path):
+    """Legacy: discover still calls push_to_notion when notion_push=True."""
+    from scripts.discover import run_discovery
+    db_path = tmp_path / "test.db"
+    with patch("scripts.discover.load_config", return_value=(SAMPLE_PROFILES_CFG, SAMPLE_NOTION_CFG)), \
+         patch("scripts.discover.scrape_jobs", return_value=make_jobs_df()), \
+         patch("scripts.discover.push_to_notion") as mock_push, \
+         patch("scripts.discover.get_existing_urls", return_value=set()), \
+         patch("scripts.discover.Client"):
+        run_discovery(db_path=db_path, notion_push=True)
+    assert mock_push.call_count == 1
+
+
+def test_push_to_notion_sets_status_new():
+    """push_to_notion always sets Status to the configured status_new value."""
+    from scripts.discover import push_to_notion
+    mock_notion = MagicMock()
+    push_to_notion(mock_notion, "fake-db-id", SAMPLE_JOB, SAMPLE_FM)
+    call_kwargs = mock_notion.pages.create.call_args[1]
+    status = call_kwargs["properties"]["Status of Application"]["select"]["name"]
+    assert status == "Application Submitted"
+
+
+# ── Custom boards integration ─────────────────────────────────────────────────
+
+_PROFILE_WITH_CUSTOM = {
+    "profiles": [{
+        "name": "cs", "titles": ["Customer Success Manager"],
+        "locations": ["Remote"], "boards": [],
+        "custom_boards": ["adzuna"],
+        "results_per_board": 5, "hours_old": 72,
+    }]
+}
+
+_ADZUNA_JOB = {
+    "title": "Customer Success Manager",
+    "company": "TestCo",
+    "url": "https://www.adzuna.com/jobs/details/999",
+    "source": "adzuna",
+    "location": "Remote",
+    "is_remote": True,
+    "salary": "$90,000 – $120,000",
+    "description": "Great remote CSM role",
+}
+
+
+def test_discover_custom_board_inserts_jobs(tmp_path):
+    """run_discovery dispatches custom_boards scrapers and inserts returned jobs."""
+    from scripts.discover import run_discovery
+    from scripts.db import get_jobs_by_status
+
+    db_path = tmp_path / "test.db"
+    with patch("scripts.discover.load_config", return_value=(_PROFILE_WITH_CUSTOM, SAMPLE_NOTION_CFG)), \
+         patch("scripts.discover.scrape_jobs", return_value=pd.DataFrame()), \
+         patch("scripts.discover.CUSTOM_SCRAPERS", {"adzuna": lambda *a, **kw: [_ADZUNA_JOB]}), \
+         patch("scripts.discover.Client"):
+        count = run_discovery(db_path=db_path)
+
+    assert count == 1
+    jobs = get_jobs_by_status(db_path, "pending")
+    assert jobs[0]["title"] == "Customer Success Manager"
+    assert jobs[0]["source"] == "adzuna"
+
+
+def test_discover_custom_board_skips_unknown(tmp_path, capsys):
+    """run_discovery logs and skips an unregistered custom board name."""
+    from scripts.discover import run_discovery
+
+    profile_unknown = {
+        "profiles": [{
+            "name": "cs", "titles": ["CSM"], "locations": ["Remote"],
+            "boards": [], "custom_boards": ["nonexistent_board"],
+            "results_per_board": 5, "hours_old": 72,
+        }]
+    }
+    db_path = tmp_path / "test.db"
+    with patch("scripts.discover.load_config", return_value=(profile_unknown, SAMPLE_NOTION_CFG)), \
+         patch("scripts.discover.scrape_jobs", return_value=pd.DataFrame()), \
+         patch("scripts.discover.Client"):
+        run_discovery(db_path=db_path)
+
+    captured = capsys.readouterr()
+    assert "nonexistent_board" in captured.out
+    assert "Unknown scraper" in captured.out
+
+
+def test_discover_custom_board_deduplicates(tmp_path):
+    """Custom board results are deduplicated by URL against pre-existing jobs."""
+    from scripts.discover import run_discovery
+    from scripts.db import init_db, insert_job, get_jobs_by_status
+
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    insert_job(db_path, {
+        "title": "CSM", "company": "TestCo",
+        "url": "https://www.adzuna.com/jobs/details/999",
+        "source": "adzuna", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-01-01",
+    })
+
+    with patch("scripts.discover.load_config", return_value=(_PROFILE_WITH_CUSTOM, SAMPLE_NOTION_CFG)), \
+         patch("scripts.discover.scrape_jobs", return_value=pd.DataFrame()), \
+         patch("scripts.discover.CUSTOM_SCRAPERS", {"adzuna": lambda *a, **kw: [_ADZUNA_JOB]}), \
+         patch("scripts.discover.Client"):
+        count = run_discovery(db_path=db_path)
+
+    assert count == 0  # duplicate skipped
+    assert len(get_jobs_by_status(db_path, "pending")) == 1
diff --git a/tests/test_enrich_descriptions.py b/tests/test_enrich_descriptions.py
new file mode 100644
index 0000000..f3df6e7
--- /dev/null
+++ b/tests/test_enrich_descriptions.py
@@ -0,0 +1,96 @@
+# tests/test_enrich_descriptions.py
+"""Tests for scripts/enrich_descriptions.py — enrich_craigslist_fields()."""
+from unittest.mock import patch, MagicMock
+import sqlite3
+
+
+def test_enrich_craigslist_fields_skips_non_craigslist(tmp_path):
+    """Non-craigslist source → returns {} without calling LLM."""
+    from scripts.db import init_db, insert_job
+    from scripts.enrich_descriptions import enrich_craigslist_fields
+    db = tmp_path / "test.db"
+    init_db(db)
+    job_id = insert_job(db, {
+        "title": "CSM", "company": "", "url": "https://example.com/1",
+        "source": "linkedin", "location": "", "description": "Some company here.",
+        "date_found": "2026-02-24",
+    })
+    with patch("scripts.llm_router.LLMRouter") as mock_llm:
+        result = enrich_craigslist_fields(db, job_id)
+    assert result == {}
+    mock_llm.assert_not_called()
+
+
+def test_enrich_craigslist_fields_skips_populated_company(tmp_path):
+    """Company already set → returns {} without calling LLM."""
+    from scripts.db import init_db, insert_job
+    from scripts.enrich_descriptions import enrich_craigslist_fields
+    db = tmp_path / "test.db"
+    init_db(db)
+    job_id = insert_job(db, {
+        "title": "CSM", "company": "Acme Corp", "url": "https://sfbay.craigslist.org/jjj/d/1.html",
+        "source": "craigslist", "location": "", "description": "Join Acme Corp today.",
+        "date_found": "2026-02-24",
+    })
+    with patch("scripts.llm_router.LLMRouter") as mock_llm:
+        result = enrich_craigslist_fields(db, job_id)
+    assert result == {}
+    mock_llm.assert_not_called()
+
+
+def test_enrich_craigslist_fields_skips_empty_description(tmp_path):
+    """Empty description → returns {} without calling LLM."""
+    from scripts.db import init_db, insert_job
+    from scripts.enrich_descriptions import enrich_craigslist_fields
+    db = tmp_path / "test.db"
+    init_db(db)
+    job_id = insert_job(db, {
+        "title": "CSM", "company": "", "url": "https://sfbay.craigslist.org/jjj/d/2.html",
+        "source": "craigslist", "location": "", "description": "",
+        "date_found": "2026-02-24",
+    })
+    with patch("scripts.llm_router.LLMRouter") as mock_llm:
+        result = enrich_craigslist_fields(db, job_id)
+    assert result == {}
+    mock_llm.assert_not_called()
+
+
+def test_enrich_craigslist_fields_extracts_and_updates(tmp_path):
+    """Valid LLM response → updates company/salary in DB, returns extracted dict."""
+    from scripts.db import init_db, insert_job
+    from scripts.enrich_descriptions import enrich_craigslist_fields
+    db = tmp_path / "test.db"
+    init_db(db)
+    job_id = insert_job(db, {
+        "title": "CSM", "company": "", "url": "https://sfbay.craigslist.org/jjj/d/3.html",
+        "source": "craigslist", "location": "", "description": "Join Acme Corp. Pay: $120k/yr.",
+        "date_found": "2026-02-24",
+    })
+    mock_router = MagicMock()
+    mock_router.complete.return_value = '{"company": "Acme Corp", "salary": "$120k/yr"}'
+    with patch("scripts.llm_router.LLMRouter", return_value=mock_router):
+        result = enrich_craigslist_fields(db, job_id)
+    assert result == {"company": "Acme Corp", "salary": "$120k/yr"}
+    conn = sqlite3.connect(db)
+    row = conn.execute("SELECT company, salary FROM jobs WHERE id=?", (job_id,)).fetchone()
+    conn.close()
+    assert row[0] == "Acme Corp"
+    assert row[1] == "$120k/yr"
+
+
+def test_enrich_craigslist_fields_handles_bad_llm_json(tmp_path):
+    """Unparseable LLM response → returns {} without raising."""
+    from scripts.db import init_db, insert_job
+    from scripts.enrich_descriptions import enrich_craigslist_fields
+    db = tmp_path / "test.db"
+    init_db(db)
+    job_id = insert_job(db, {
+        "title": "CSM", "company": "", "url": "https://sfbay.craigslist.org/jjj/d/4.html",
+        "source": "craigslist", "location": "", "description": "Great opportunity.",
+        "date_found": "2026-02-24",
+    })
+    mock_router = MagicMock()
+    mock_router.complete.return_value = "Sorry, I cannot extract that."
+    with patch("scripts.llm_router.LLMRouter", return_value=mock_router):
+        result = enrich_craigslist_fields(db, job_id)
+    assert result == {}
diff --git a/tests/test_imap_sync.py b/tests/test_imap_sync.py
new file mode 100644
index 0000000..d6d057b
--- /dev/null
+++ b/tests/test_imap_sync.py
@@ -0,0 +1,330 @@
+"""Tests for imap_sync helpers (no live IMAP connection required)."""
+import pytest
+from unittest.mock import patch, MagicMock
+
+
+def test_classify_stage_signal_interview():
+    """classify_stage_signal returns interview_scheduled for a call-scheduling email."""
+    from scripts.imap_sync import classify_stage_signal
+    with patch("scripts.imap_sync._CLASSIFIER_ROUTER") as mock_router:
+        mock_router.complete.return_value = "interview_scheduled"
+        result = classify_stage_signal(
+            "Let's schedule a call",
+            "Hi Alex, we'd love to book a 30-min phone screen with you.",
+        )
+    assert result == "interview_scheduled"
+
+
+def test_classify_stage_signal_returns_none_on_error():
+    """classify_stage_signal returns None when LLM call raises."""
+    from scripts.imap_sync import classify_stage_signal
+    with patch("scripts.imap_sync._CLASSIFIER_ROUTER") as mock_router:
+        mock_router.complete.side_effect = RuntimeError("model not loaded")
+        result = classify_stage_signal("subject", "body")
+    assert result is None
+
+
+def test_classify_stage_signal_strips_think_tags():
+    """classify_stage_signal strips <think>...</think> blocks before parsing."""
+    from scripts.imap_sync import classify_stage_signal
+    with patch("scripts.imap_sync._CLASSIFIER_ROUTER") as mock_router:
+        mock_router.complete.return_value = "<think>Let me think...</think>\nrejected"
+        result = classify_stage_signal("Update on your application", "We went with another candidate.")
+    assert result == "rejected"
+
+
+def test_normalise_company():
+    """_normalise_company strips legal suffixes."""
+    from scripts.imap_sync import _normalise_company
+    assert _normalise_company("DataStax, Inc.") == "DataStax"
+    assert _normalise_company("Wiz Ltd") == "Wiz"
+    assert _normalise_company("Crusoe Energy") == "Crusoe Energy"
+
+
+def test_company_search_terms_excludes_job_board_sld():
+    """Job-board domains like linkedin.com are never used as match terms."""
+    from scripts.imap_sync import _company_search_terms
+    # LinkedIn-sourced job: SLD "linkedin" must not appear in the terms
+    terms = _company_search_terms("Bamboo Health", "https://www.linkedin.com/jobs/view/123")
+    assert "linkedin" not in terms
+    assert "bamboo health" in terms
+
+    # Company with its own domain: SLD should be included
+    terms = _company_search_terms("Crusoe Energy", "https://crusoe.ai/jobs/456")
+    assert "crusoe" in terms
+
+    # Indeed-sourced job: "indeed" excluded
+    terms = _company_search_terms("DoorDash", "https://www.indeed.com/viewjob?jk=abc")
+    assert "indeed" not in terms
+    assert "doordash" in terms
+
+
+def test_has_recruitment_keyword():
+    """_has_recruitment_keyword matches known keywords."""
+    from scripts.imap_sync import _has_recruitment_keyword
+    assert _has_recruitment_keyword("Interview Invitation — Senior TAM")
+    assert _has_recruitment_keyword("Your application with DataStax")
+    assert not _has_recruitment_keyword("Team lunch tomorrow")
+
+
+def test_extract_lead_info_returns_company_and_title():
+    """extract_lead_info parses LLM JSON response into (company, title)."""
+    from scripts.imap_sync import extract_lead_info
+    with patch("scripts.imap_sync._CLASSIFIER_ROUTER") as mock_router:
+        mock_router.complete.return_value = '{"company": "Wiz", "title": "Senior TAM"}'
+        result = extract_lead_info("Senior TAM at Wiz", "Hi Alex, we have a role…", "recruiter@wiz.com")
+    assert result == ("Wiz", "Senior TAM")
+
+
+def test_extract_lead_info_returns_none_on_bad_json():
+    """extract_lead_info returns (None, None) when LLM returns unparseable output."""
+    from scripts.imap_sync import extract_lead_info
+    with patch("scripts.imap_sync._CLASSIFIER_ROUTER") as mock_router:
+        mock_router.complete.return_value = "I cannot determine the company."
+        result = extract_lead_info("Job opportunity", "blah", "noreply@example.com")
+    assert result == (None, None)
+
+
+def test_classify_labels_includes_survey_received():
+    """_CLASSIFY_LABELS includes survey_received."""
+    from scripts.imap_sync import _CLASSIFY_LABELS
+    assert "survey_received" in _CLASSIFY_LABELS
+
+
+def test_classify_stage_signal_returns_survey_received():
+    """classify_stage_signal returns 'survey_received' when LLM outputs that label."""
+    from unittest.mock import patch
+    from scripts.imap_sync import classify_stage_signal
+
+    with patch("scripts.imap_sync._CLASSIFIER_ROUTER") as mock_router:
+        mock_router.complete.return_value = "survey_received"
+        result = classify_stage_signal("Complete our culture survey", "Please fill out this form")
+    assert result == "survey_received"
+
+
+def test_sync_job_emails_classifies_inbound(tmp_path):
+    """sync_job_emails classifies inbound emails and stores the stage_signal."""
+    from scripts.db import init_db, insert_job, get_contacts
+    from scripts.imap_sync import sync_job_emails
+
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    job_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme",
+        "url": "https://acme.com/jobs/1",
+        "source": "linkedin", "location": "Remote",
+        "is_remote": True, "salary": "", "description": "",
+        "date_found": "2026-02-21",
+    })
+    job = {"id": job_id, "company": "Acme", "url": "https://acme.com/jobs/1"}
+
+    fake_msg_bytes = (
+        b"From: recruiter@acme.com\r\n"
+        b"To: alex@example.com\r\n"
+        b"Subject: Interview Invitation\r\n"
+        b"Message-ID: <unique-001@acme.com>\r\n"
+        b"\r\n"
+        b"Hi Alex, we'd like to schedule a phone screen."
+    )
+
+    conn_mock = MagicMock()
+    conn_mock.select.return_value = ("OK", [b"1"])
+    conn_mock.search.return_value = ("OK", [b"1"])
+    conn_mock.fetch.return_value = ("OK", [(b"1 (RFC822 {123})", fake_msg_bytes)])
+
+    with patch("scripts.imap_sync.classify_stage_signal", return_value="interview_scheduled"):
+        inb, out = sync_job_emails(job, conn_mock, {"lookback_days": 90}, db_path)
+
+    assert inb == 1
+    contacts = get_contacts(db_path, job_id=job_id)
+    assert contacts[0]["stage_signal"] == "interview_scheduled"
+
+
+def test_parse_linkedin_alert_extracts_jobs():
+    from scripts.imap_sync import parse_linkedin_alert
+    body = """\
+Your job alert for customer success manager in United States
+New jobs match your preferences.
+Manage alerts: https://www.linkedin.com/comm/jobs/alerts?...
+
+Customer Success Manager
+Reflow
+California, United States
+View job: https://www.linkedin.com/comm/jobs/view/4376518925/?trackingId=abc%3D%3D&refId=xyz
+
+---------------------------------------------------------
+
+Customer Engagement Manager
+Bitwarden
+United States
+
+2 school alumni
+Apply with resume & profile
+View job: https://www.linkedin.com/comm/jobs/view/4359824983/?trackingId=def%3D%3D
+
+---------------------------------------------------------
+
+"""
+    jobs = parse_linkedin_alert(body)
+    assert len(jobs) == 2
+    assert jobs[0]["title"] == "Customer Success Manager"
+    assert jobs[0]["company"] == "Reflow"
+    assert jobs[0]["location"] == "California, United States"
+    assert jobs[0]["url"] == "https://www.linkedin.com/jobs/view/4376518925/"
+    assert jobs[1]["title"] == "Customer Engagement Manager"
+    assert jobs[1]["company"] == "Bitwarden"
+    assert jobs[1]["url"] == "https://www.linkedin.com/jobs/view/4359824983/"
+
+
+def test_parse_linkedin_alert_skips_blocks_without_view_job():
+    from scripts.imap_sync import parse_linkedin_alert
+    body = """\
+Customer Success Manager
+Some Company
+United States
+
+---------------------------------------------------------
+
+Valid Job Title
+Valid Company
+Remote
+View job: https://www.linkedin.com/comm/jobs/view/1111111/?x=y
+
+---------------------------------------------------------
+"""
+    jobs = parse_linkedin_alert(body)
+    assert len(jobs) == 1
+    assert jobs[0]["title"] == "Valid Job Title"
+
+
+def test_parse_linkedin_alert_empty_body():
+    from scripts.imap_sync import parse_linkedin_alert
+    assert parse_linkedin_alert("") == []
+    assert parse_linkedin_alert("No jobs here.") == []
+
+
+# ── _scan_unmatched_leads integration ─────────────────────────────────────────
+
+_ALERT_BODY = """\
+Your job alert for customer success manager in United States
+New jobs match your preferences.
+
+Customer Success Manager
+Acme Corp
+California, United States
+View job: https://www.linkedin.com/comm/jobs/view/9999001/?trackingId=abc
+
+---------------------------------------------------------
+
+Director of Customer Success
+Beta Inc
+Remote
+View job: https://www.linkedin.com/comm/jobs/view/9999002/?trackingId=def
+
+---------------------------------------------------------
+"""
+
+_ALERT_EMAIL = {
+    "message_id": "<alert-001@linkedin.com>",
+    "from_addr": "jobalerts-noreply@linkedin.com",
+    "to_addr": "alex@example.com",
+    "subject": "2 new jobs for customer success manager",
+    "body": _ALERT_BODY,
+    "date": "2026-02-24 12:00:00",
+}
+
+
+def test_scan_unmatched_leads_linkedin_alert_inserts_jobs(tmp_path):
+    """_scan_unmatched_leads detects a LinkedIn alert and inserts each job card."""
+    import sqlite3
+    from unittest.mock import patch, MagicMock
+    from scripts.db import init_db
+
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+
+    conn_mock = MagicMock()
+
+    with patch("scripts.imap_sync._search_folder", return_value=[b"1"]), \
+         patch("scripts.imap_sync._parse_message", return_value=_ALERT_EMAIL), \
+         patch("scripts.task_runner.submit_task") as mock_submit:
+
+        from scripts.imap_sync import _scan_unmatched_leads
+        known_ids: set = set()
+        new_leads = _scan_unmatched_leads(conn_mock, {"lookback_days": 90}, db_path, known_ids)
+
+    assert new_leads == 2
+
+    # Message ID added so it won't be reprocessed
+    assert "<alert-001@linkedin.com>" in known_ids
+
+    # Both jobs inserted with correct fields
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    jobs = conn.execute("SELECT * FROM jobs ORDER BY id").fetchall()
+    conn.close()
+
+    assert len(jobs) == 2
+    assert jobs[0]["title"] == "Customer Success Manager"
+    assert jobs[0]["company"] == "Acme Corp"
+    assert jobs[0]["url"] == "https://www.linkedin.com/jobs/view/9999001/"
+    assert jobs[0]["source"] == "linkedin"
+    assert jobs[1]["title"] == "Director of Customer Success"
+    assert jobs[1]["url"] == "https://www.linkedin.com/jobs/view/9999002/"
+
+    # scrape_url task submitted for each inserted job
+    assert mock_submit.call_count == 2
+    task_types = [call.args[1] for call in mock_submit.call_args_list]
+    assert task_types == ["scrape_url", "scrape_url"]
+
+
+def test_scan_unmatched_leads_linkedin_alert_skips_duplicates(tmp_path):
+    """URLs already in the DB are not re-inserted."""
+    from unittest.mock import patch, MagicMock
+    from scripts.db import init_db, insert_job
+
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+
+    # Pre-insert one of the two URLs
+    insert_job(db_path, {
+        "title": "Customer Success Manager", "company": "Acme Corp",
+        "url": "https://www.linkedin.com/jobs/view/9999001/",
+        "source": "linkedin", "location": "", "is_remote": 0,
+        "salary": "", "description": "", "date_found": "2026-02-24",
+    })
+
+    conn_mock = MagicMock()
+
+    with patch("scripts.imap_sync._search_folder", return_value=[b"1"]), \
+         patch("scripts.imap_sync._parse_message", return_value=_ALERT_EMAIL), \
+         patch("scripts.task_runner.submit_task") as mock_submit:
+
+        from scripts.imap_sync import _scan_unmatched_leads
+        new_leads = _scan_unmatched_leads(conn_mock, {"lookback_days": 90}, db_path, set())
+
+    # Only one new job (the duplicate was skipped)
+    assert new_leads == 1
+    assert mock_submit.call_count == 1
+
+
+def test_scan_unmatched_leads_linkedin_alert_skips_llm_path(tmp_path):
+    """After a LinkedIn alert email, the LLM extraction path is never reached."""
+    from unittest.mock import patch, MagicMock
+    from scripts.db import init_db
+
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+
+    conn_mock = MagicMock()
+
+    with patch("scripts.imap_sync._search_folder", return_value=[b"1"]), \
+         patch("scripts.imap_sync._parse_message", return_value=_ALERT_EMAIL), \
+         patch("scripts.task_runner.submit_task"), \
+         patch("scripts.imap_sync.extract_lead_info") as mock_llm:
+
+        from scripts.imap_sync import _scan_unmatched_leads
+        _scan_unmatched_leads(conn_mock, {"lookback_days": 90}, db_path, set())
+
+    # LLM extraction must never be called for alert emails
+    mock_llm.assert_not_called()
diff --git a/tests/test_llm_router.py b/tests/test_llm_router.py
new file mode 100644
index 0000000..0d5a897
--- /dev/null
+++ b/tests/test_llm_router.py
@@ -0,0 +1,135 @@
+import pytest
+from unittest.mock import patch, MagicMock
+from pathlib import Path
+import yaml
+
+CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"
+
+
+def test_config_loads():
+    """Config file is valid YAML with required keys."""
+    cfg = yaml.safe_load(CONFIG_PATH.read_text())
+    assert "fallback_order" in cfg
+    assert "backends" in cfg
+    assert len(cfg["fallback_order"]) >= 1
+
+
+def test_router_uses_first_reachable_backend():
+    """Router skips unreachable backends and uses the first that responds."""
+    from scripts.llm_router import LLMRouter
+
+    router = LLMRouter(CONFIG_PATH)
+
+    mock_response = MagicMock()
+    mock_response.choices[0].message.content = "hello"
+
+    with patch.object(router, "_is_reachable", side_effect=[False, True, True, True, True]), \
+         patch("scripts.llm_router.OpenAI") as MockOpenAI:
+        instance = MockOpenAI.return_value
+        instance.chat.completions.create.return_value = mock_response
+        mock_model = MagicMock()
+        mock_model.id = "test-model"
+        instance.models.list.return_value.data = [mock_model]
+
+        result = router.complete("say hello")
+
+    assert result == "hello"
+
+
+def test_router_raises_when_all_backends_fail():
+    """Router raises RuntimeError when every backend is unreachable or errors."""
+    from scripts.llm_router import LLMRouter
+
+    router = LLMRouter(CONFIG_PATH)
+
+    with patch.object(router, "_is_reachable", return_value=False):
+        with pytest.raises(RuntimeError, match="All LLM backends exhausted"):
+            router.complete("say hello")
+
+
+def test_is_reachable_returns_false_on_connection_error():
+    """_is_reachable returns False when the health endpoint is unreachable."""
+    from scripts.llm_router import LLMRouter
+    import requests
+
+    router = LLMRouter(CONFIG_PATH)
+
+    with patch("scripts.llm_router.requests.get", side_effect=requests.ConnectionError):
+        result = router._is_reachable("http://localhost:9999/v1")
+
+    assert result is False
+
+
+def test_complete_skips_backend_without_image_support(tmp_path):
+    """When images= is passed, backends without supports_images are skipped."""
+    import yaml
+    from scripts.llm_router import LLMRouter
+
+    cfg = {
+        "fallback_order": ["ollama", "vision_service"],
+        "backends": {
+            "ollama": {
+                "type": "openai_compat",
+                "base_url": "http://localhost:11434/v1",
+                "model": "llava",
+                "api_key": "ollama",
+                "enabled": True,
+                "supports_images": False,
+            },
+            "vision_service": {
+                "type": "vision_service",
+                "base_url": "http://localhost:8002",
+                "enabled": True,
+                "supports_images": True,
+            },
+        },
+    }
+    cfg_file = tmp_path / "llm.yaml"
+    cfg_file.write_text(yaml.dump(cfg))
+
+    from unittest.mock import patch, MagicMock
+    mock_resp = MagicMock()
+    mock_resp.status_code = 200
+    mock_resp.json.return_value = {"text": "B — collaborative"}
+
+    with patch("scripts.llm_router.requests.get") as mock_get, \
+         patch("scripts.llm_router.requests.post") as mock_post:
+        # health check returns ok for vision_service
+        mock_get.return_value = MagicMock(status_code=200)
+        mock_post.return_value = mock_resp
+
+        router = LLMRouter(config_path=cfg_file)
+        result = router.complete("Which option?", images=["base64data"])
+
+    assert result == "B — collaborative"
+    # vision_service POST /analyze should have been called
+    assert mock_post.called
+
+
+def test_complete_without_images_skips_vision_service(tmp_path):
+    """When images=None, vision_service backend is skipped."""
+    import yaml
+    from scripts.llm_router import LLMRouter
+    from unittest.mock import patch, MagicMock
+
+    cfg = {
+        "fallback_order": ["vision_service"],
+        "backends": {
+            "vision_service": {
+                "type": "vision_service",
+                "base_url": "http://localhost:8002",
+                "enabled": True,
+                "supports_images": True,
+            },
+        },
+    }
+    cfg_file = tmp_path / "llm.yaml"
+    cfg_file.write_text(yaml.dump(cfg))
+
+    router = LLMRouter(config_path=cfg_file)
+    with patch("scripts.llm_router.requests.post") as mock_post:
+        try:
+            router.complete("text only prompt")
+        except RuntimeError:
+            pass  # all backends exhausted is expected
+        assert not mock_post.called
diff --git a/tests/test_match.py b/tests/test_match.py
new file mode 100644
index 0000000..25a823e
--- /dev/null
+++ b/tests/test_match.py
@@ -0,0 +1,47 @@
+import pytest
+from unittest.mock import patch, MagicMock
+
+
+def test_extract_job_description_from_url():
+    """extract_job_description fetches and returns visible text from a URL."""
+    from scripts.match import extract_job_description
+
+    with patch("scripts.match.requests.get") as mock_get:
+        mock_get.return_value.text = "<html><body><p>We need a CSM with Salesforce.</p></body></html>"
+        mock_get.return_value.raise_for_status = MagicMock()
+        result = extract_job_description("https://example.com/job/123")
+
+    assert "CSM" in result
+    assert "Salesforce" in result
+
+
+def test_score_is_between_0_and_100():
+    """match_score returns a float in [0, 100] and a list of keyword gaps."""
+    from scripts.match import match_score
+
+    score, gaps = match_score(
+        resume_text="Customer Success Manager with Salesforce experience",
+        job_text="Looking for a Customer Success Manager who knows Salesforce and Gainsight",
+    )
+    assert 0 <= score <= 100
+    assert isinstance(gaps, list)
+
+
+def test_write_score_to_notion():
+    """write_match_to_notion updates the Notion page with score and gaps."""
+    from scripts.match import write_match_to_notion
+
+    mock_notion = MagicMock()
+
+    SAMPLE_FM = {
+        "match_score": "Match Score",
+        "keyword_gaps": "Keyword Gaps",
+    }
+
+    write_match_to_notion(mock_notion, "page-id-abc", 85.5, ["Gainsight", "Churnzero"], SAMPLE_FM)
+
+    mock_notion.pages.update.assert_called_once()
+    call_kwargs = mock_notion.pages.update.call_args[1]
+    assert call_kwargs["page_id"] == "page-id-abc"
+    score_val = call_kwargs["properties"]["Match Score"]["number"]
+    assert score_val == 85.5
diff --git a/tests/test_scrape_url.py b/tests/test_scrape_url.py
new file mode 100644
index 0000000..37eace4
--- /dev/null
+++ b/tests/test_scrape_url.py
@@ -0,0 +1,135 @@
+"""Tests for URL-based job scraping."""
+from unittest.mock import patch, MagicMock
+
+
+def _make_db(tmp_path, url="https://www.linkedin.com/jobs/view/99999/"):
+    from scripts.db import init_db, insert_job
+    db = tmp_path / "test.db"
+    init_db(db)
+    job_id = insert_job(db, {
+        "title": "Importing…", "company": "", "url": url,
+        "source": "manual", "location": "", "description": "", "date_found": "2026-02-24",
+    })
+    return db, job_id
+
+
+def test_canonicalize_url_linkedin():
+    from scripts.scrape_url import canonicalize_url
+    messy = (
+        "https://www.linkedin.com/jobs/view/4376518925/"
+        "?trk=eml-email_job_alert&refId=abc%3D%3D&trackingId=xyz"
+    )
+    assert canonicalize_url(messy) == "https://www.linkedin.com/jobs/view/4376518925/"
+
+
+def test_canonicalize_url_linkedin_comm():
+    from scripts.scrape_url import canonicalize_url
+    comm = "https://www.linkedin.com/comm/jobs/view/4376518925/?trackingId=abc"
+    assert canonicalize_url(comm) == "https://www.linkedin.com/jobs/view/4376518925/"
+
+
+def test_canonicalize_url_generic_strips_utm():
+    from scripts.scrape_url import canonicalize_url
+    url = "https://jobs.example.com/post/42?utm_source=linkedin&utm_medium=email&jk=real_param"
+    result = canonicalize_url(url)
+    assert "utm_source" not in result
+    assert "real_param" in result
+
+
+def test_detect_board_linkedin():
+    from scripts.scrape_url import _detect_board
+    assert _detect_board("https://www.linkedin.com/jobs/view/12345/") == "linkedin"
+    assert _detect_board("https://linkedin.com/jobs/view/12345/?tracking=abc") == "linkedin"
+
+
+def test_detect_board_indeed():
+    from scripts.scrape_url import _detect_board
+    assert _detect_board("https://www.indeed.com/viewjob?jk=abc123") == "indeed"
+
+
+def test_detect_board_glassdoor():
+    from scripts.scrape_url import _detect_board
+    assert _detect_board("https://www.glassdoor.com/job-listing/foo-bar-123.htm") == "glassdoor"
+
+
+def test_detect_board_generic():
+    from scripts.scrape_url import _detect_board
+    assert _detect_board("https://jobs.example.com/posting/42") == "generic"
+
+
+def test_extract_linkedin_job_id():
+    from scripts.scrape_url import _extract_linkedin_job_id
+    assert _extract_linkedin_job_id("https://www.linkedin.com/jobs/view/4376518925/") == "4376518925"
+    assert _extract_linkedin_job_id("https://www.linkedin.com/comm/jobs/view/4376518925/?tracking=x") == "4376518925"
+    assert _extract_linkedin_job_id("https://example.com/no-id") is None
+
+
+def test_scrape_linkedin_updates_job(tmp_path):
+    db, job_id = _make_db(tmp_path)
+
+    linkedin_html = """<html><head></head><body>
+        <h2 class="top-card-layout__title">Customer Success Manager</h2>
+        <a class="topcard__org-name-link">Acme Corp</a>
+        <span class="topcard__flavor--bullet">San Francisco, CA</span>
+        <div class="show-more-less-html__markup">Exciting CSM role with great benefits.</div>
+    </body></html>"""
+
+    mock_resp = MagicMock()
+    mock_resp.text = linkedin_html
+    mock_resp.raise_for_status = MagicMock()
+
+    with patch("scripts.scrape_url.requests.get", return_value=mock_resp):
+        from scripts.scrape_url import scrape_job_url
+        result = scrape_job_url(db, job_id)
+
+    assert result.get("title") == "Customer Success Manager"
+    assert result.get("company") == "Acme Corp"
+    assert "CSM role" in result.get("description", "")
+
+    import sqlite3
+    conn = sqlite3.connect(db)
+    conn.row_factory = sqlite3.Row
+    row = dict(conn.execute("SELECT * FROM jobs WHERE id=?", (job_id,)).fetchone())
+    conn.close()
+    assert row["title"] == "Customer Success Manager"
+    assert row["company"] == "Acme Corp"
+
+
+def test_scrape_url_generic_json_ld(tmp_path):
+    db, job_id = _make_db(tmp_path, url="https://jobs.example.com/post/42")
+
+    json_ld_html = """<html><head>
+        <script type="application/ld+json">
+        {"@type": "JobPosting", "title": "TAM Role", "description": "Tech account mgmt.",
+         "hiringOrganization": {"name": "TechCo"},
+         "jobLocation": {"address": {"addressLocality": "Austin, TX"}}}
+        </script>
+    </head><body></body></html>"""
+
+    mock_resp = MagicMock()
+    mock_resp.text = json_ld_html
+    mock_resp.raise_for_status = MagicMock()
+
+    with patch("scripts.scrape_url.requests.get", return_value=mock_resp):
+        from scripts.scrape_url import scrape_job_url
+        result = scrape_job_url(db, job_id)
+
+    assert result.get("title") == "TAM Role"
+    assert result.get("company") == "TechCo"
+
+
+def test_scrape_url_graceful_on_http_error(tmp_path):
+    db, job_id = _make_db(tmp_path)
+    import requests as req
+
+    with patch("scripts.scrape_url.requests.get", side_effect=req.RequestException("timeout")):
+        from scripts.scrape_url import scrape_job_url
+        result = scrape_job_url(db, job_id)
+
+    # Should return empty dict and not raise; job row still exists
+    assert isinstance(result, dict)
+    import sqlite3
+    conn = sqlite3.connect(db)
+    row = conn.execute("SELECT id FROM jobs WHERE id=?", (job_id,)).fetchone()
+    conn.close()
+    assert row is not None
diff --git a/tests/test_sync.py b/tests/test_sync.py
new file mode 100644
index 0000000..21c3eea
--- /dev/null
+++ b/tests/test_sync.py
@@ -0,0 +1,88 @@
+# tests/test_sync.py
+import pytest
+from unittest.mock import patch, MagicMock
+from pathlib import Path
+
+
+SAMPLE_FM = {
+    "title_field": "Salary", "job_title": "Job Title", "company": "Company Name",
+    "url": "Role Link", "source": "Job Source", "status": "Status of Application",
+    "status_new": "Application Submitted", "date_found": "Date Found",
+    "remote": "Remote", "match_score": "Match Score",
+    "keyword_gaps": "Keyword Gaps", "notes": "Notes", "job_description": "Job Description",
+}
+
+SAMPLE_NOTION_CFG = {"token": "secret_test", "database_id": "fake-db-id", "field_map": SAMPLE_FM}
+
+
+def test_sync_pushes_approved_jobs(tmp_path):
+    """sync_to_notion pushes approved jobs and marks them synced."""
+    from scripts.sync import sync_to_notion
+    from scripts.db import init_db, insert_job, get_jobs_by_status, update_job_status
+
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    row_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://example.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "$100k", "description": "Good role", "date_found": "2026-02-20",
+    })
+    update_job_status(db_path, [row_id], "approved")
+
+    mock_notion = MagicMock()
+    mock_notion.pages.create.return_value = {"id": "notion-page-abc"}
+
+    with patch("scripts.sync.load_notion_config", return_value=SAMPLE_NOTION_CFG), \
+         patch("scripts.sync.Client", return_value=mock_notion):
+        count = sync_to_notion(db_path=db_path)
+
+    assert count == 1
+    mock_notion.pages.create.assert_called_once()
+    synced = get_jobs_by_status(db_path, "synced")
+    assert len(synced) == 1
+
+
+def test_sync_falls_back_to_core_fields_on_validation_error(tmp_path):
+    """When Notion returns a validation_error (missing column), sync retries without optional fields."""
+    from scripts.sync import sync_to_notion
+    from scripts.db import init_db, insert_job, get_jobs_by_status, update_job_status
+
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+    row_id = insert_job(db_path, {
+        "title": "CSM", "company": "Acme", "url": "https://example.com/2",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "", "date_found": "2026-02-20",
+    })
+    update_job_status(db_path, [row_id], "approved")
+
+    mock_notion = MagicMock()
+    # First call raises validation_error; second call (fallback) succeeds
+    mock_notion.pages.create.side_effect = [
+        Exception("validation_error: Could not find property with name: Match Score"),
+        {"id": "notion-page-fallback"},
+    ]
+
+    with patch("scripts.sync.load_notion_config", return_value=SAMPLE_NOTION_CFG), \
+         patch("scripts.sync.Client", return_value=mock_notion):
+        count = sync_to_notion(db_path=db_path)
+
+    assert count == 1
+    assert mock_notion.pages.create.call_count == 2
+    synced = get_jobs_by_status(db_path, "synced")
+    assert len(synced) == 1
+
+
+def test_sync_returns_zero_when_nothing_approved(tmp_path):
+    """sync_to_notion returns 0 when there are no approved jobs."""
+    from scripts.sync import sync_to_notion
+    from scripts.db import init_db
+
+    db_path = tmp_path / "test.db"
+    init_db(db_path)
+
+    with patch("scripts.sync.load_notion_config", return_value=SAMPLE_NOTION_CFG), \
+         patch("scripts.sync.Client"):
+        count = sync_to_notion(db_path=db_path)
+
+    assert count == 0
diff --git a/tests/test_task_runner.py b/tests/test_task_runner.py
new file mode 100644
index 0000000..3ea5090
--- /dev/null
+++ b/tests/test_task_runner.py
@@ -0,0 +1,210 @@
+import threading
+import time
+import pytest
+from pathlib import Path
+from unittest.mock import patch
+import sqlite3
+
+
+def _make_db(tmp_path):
+    from scripts.db import init_db, insert_job
+    db = tmp_path / "test.db"
+    init_db(db)
+    job_id = insert_job(db, {
+        "title": "CSM", "company": "Acme", "url": "https://ex.com/1",
+        "source": "linkedin", "location": "Remote", "is_remote": True,
+        "salary": "", "description": "Great role.", "date_found": "2026-02-20",
+    })
+    return db, job_id
+
+
+def test_submit_task_returns_id_and_true(tmp_path):
+    """submit_task returns (task_id, True) and spawns a thread."""
+    db, job_id = _make_db(tmp_path)
+    with patch("scripts.task_runner._run_task"):  # don't actually call LLM
+        from scripts.task_runner import submit_task
+        task_id, is_new = submit_task(db, "cover_letter", job_id)
+    assert isinstance(task_id, int) and task_id > 0
+    assert is_new is True
+
+
+def test_submit_task_deduplicates(tmp_path):
+    """submit_task returns (existing_id, False) for a duplicate in-flight task."""
+    db, job_id = _make_db(tmp_path)
+    with patch("scripts.task_runner._run_task"):
+        from scripts.task_runner import submit_task
+        first_id, _ = submit_task(db, "cover_letter", job_id)
+        second_id, is_new = submit_task(db, "cover_letter", job_id)
+    assert second_id == first_id
+    assert is_new is False
+
+
+def test_run_task_cover_letter_success(tmp_path):
+    """_run_task marks running→completed and saves cover letter to DB."""
+    db, job_id = _make_db(tmp_path)
+    from scripts.db import insert_task, get_task_for_job
+    task_id, _ = insert_task(db, "cover_letter", job_id)
+
+    with patch("scripts.generate_cover_letter.generate", return_value="Dear Hiring Manager,\nGreat fit!"):
+        from scripts.task_runner import _run_task
+        _run_task(db, task_id, "cover_letter", job_id)
+
+    task = get_task_for_job(db, "cover_letter", job_id)
+    assert task["status"] == "completed"
+    assert task["error"] is None
+
+    conn = sqlite3.connect(db)
+    row = conn.execute("SELECT cover_letter FROM jobs WHERE id=?", (job_id,)).fetchone()
+    conn.close()
+    assert row[0] == "Dear Hiring Manager,\nGreat fit!"
+
+
+def test_run_task_company_research_success(tmp_path):
+    """_run_task marks running→completed and saves research to DB."""
+    db, job_id = _make_db(tmp_path)
+    from scripts.db import insert_task, get_task_for_job, get_research
+
+    task_id, _ = insert_task(db, "company_research", job_id)
+    fake_result = {
+        "raw_output": "raw", "company_brief": "brief",
+        "ceo_brief": "ceo", "talking_points": "points",
+    }
+    with patch("scripts.company_research.research_company", return_value=fake_result):
+        from scripts.task_runner import _run_task
+        _run_task(db, task_id, "company_research", job_id)
+
+    task = get_task_for_job(db, "company_research", job_id)
+    assert task["status"] == "completed"
+
+    research = get_research(db, job_id=job_id)
+    assert research["company_brief"] == "brief"
+
+
+def test_run_task_marks_failed_on_exception(tmp_path):
+    """_run_task marks status=failed and stores error when generator raises."""
+    db, job_id = _make_db(tmp_path)
+    from scripts.db import insert_task, get_task_for_job
+    task_id, _ = insert_task(db, "cover_letter", job_id)
+
+    with patch("scripts.generate_cover_letter.generate", side_effect=RuntimeError("LLM timeout")):
+        from scripts.task_runner import _run_task
+        _run_task(db, task_id, "cover_letter", job_id)
+
+    task = get_task_for_job(db, "cover_letter", job_id)
+    assert task["status"] == "failed"
+    assert "LLM timeout" in task["error"]
+
+
+def test_run_task_discovery_success(tmp_path):
+    """_run_task with task_type=discovery calls run_discovery and stores count in error field."""
+    from scripts.db import init_db, insert_task, get_task_for_job
+    db = tmp_path / "test.db"
+    init_db(db)
+    task_id, _ = insert_task(db, "discovery", 0)
+
+    with patch("scripts.discover.run_discovery", return_value=7):
+        from scripts.task_runner import _run_task
+        _run_task(db, task_id, "discovery", 0)
+
+    task = get_task_for_job(db, "discovery", 0)
+    assert task["status"] == "completed"
+    assert "7 new listings" in task["error"]
+
+
+def test_run_task_email_sync_success(tmp_path):
+    """email_sync task calls sync_all and marks completed with summary."""
+    db, _ = _make_db(tmp_path)
+    from scripts.db import insert_task, get_task_for_job
+    task_id, _ = insert_task(db, "email_sync", 0)
+
+    summary = {"synced": 3, "inbound": 5, "outbound": 2, "new_leads": 1, "errors": []}
+    with patch("scripts.imap_sync.sync_all", return_value=summary):
+        from scripts.task_runner import _run_task
+        _run_task(db, task_id, "email_sync", 0)
+
+    task = get_task_for_job(db, "email_sync", 0)
+    assert task["status"] == "completed"
+    assert "3 jobs" in task["error"]
+
+
+def test_run_task_email_sync_file_not_found(tmp_path):
+    """email_sync marks failed with helpful message when config is missing."""
+    db, _ = _make_db(tmp_path)
+    from scripts.db import insert_task, get_task_for_job
+    task_id, _ = insert_task(db, "email_sync", 0)
+
+    with patch("scripts.imap_sync.sync_all", side_effect=FileNotFoundError("config/email.yaml")):
+        from scripts.task_runner import _run_task
+        _run_task(db, task_id, "email_sync", 0)
+
+    task = get_task_for_job(db, "email_sync", 0)
+    assert task["status"] == "failed"
+    assert "email" in task["error"].lower()
+
+
+def test_submit_task_actually_completes(tmp_path):
+    """Integration: submit_task spawns a thread that completes asynchronously."""
+    db, job_id = _make_db(tmp_path)
+    from scripts.db import get_task_for_job
+
+    with patch("scripts.generate_cover_letter.generate", return_value="Cover letter text"):
+        from scripts.task_runner import submit_task
+        task_id, _ = submit_task(db, "cover_letter", job_id)
+        # Wait for thread to complete (max 5s)
+        for _ in range(50):
+            task = get_task_for_job(db, "cover_letter", job_id)
+            if task and task["status"] in ("completed", "failed"):
+                break
+            time.sleep(0.1)
+
+    task = get_task_for_job(db, "cover_letter", job_id)
+    assert task["status"] == "completed"
+
+
+def test_run_task_enrich_craigslist_success(tmp_path):
+    """enrich_craigslist task calls enrich_craigslist_fields and marks completed."""
+    from scripts.db import init_db, insert_job, insert_task, get_task_for_job
+    from unittest.mock import MagicMock
+    db = tmp_path / "test.db"
+    init_db(db)
+    job_id = insert_job(db, {
+        "title": "CSM", "company": "", "url": "https://sfbay.craigslist.org/jjj/d/9.html",
+        "source": "craigslist", "location": "", "description": "Join Acme Corp. Pay: $100k.",
+        "date_found": "2026-02-24",
+    })
+    task_id, _ = insert_task(db, "enrich_craigslist", job_id)
+
+    with patch("scripts.enrich_descriptions.enrich_craigslist_fields",
+               return_value={"company": "Acme Corp", "salary": "$100k"}) as mock_enrich:
+        from scripts.task_runner import _run_task
+        _run_task(db, task_id, "enrich_craigslist", job_id)
+
+    mock_enrich.assert_called_once_with(db, job_id)
+    task = get_task_for_job(db, "enrich_craigslist", job_id)
+    assert task["status"] == "completed"
+
+
+def test_scrape_url_submits_enrich_craigslist_for_craigslist_job(tmp_path):
+    """After scrape_url completes for a craigslist job with empty company, enrich_craigslist is queued."""
+    from scripts.db import init_db, insert_job, insert_task, get_task_for_job
+    db = tmp_path / "test.db"
+    init_db(db)
+    job_id = insert_job(db, {
+        "title": "CSM", "company": "", "url": "https://sfbay.craigslist.org/jjj/d/10.html",
+        "source": "craigslist", "location": "", "description": "",
+        "date_found": "2026-02-24",
+    })
+    task_id, _ = insert_task(db, "scrape_url", job_id)
+
+    with patch("scripts.scrape_url.scrape_job_url", return_value={"title": "CSM", "company": ""}):
+        with patch("scripts.task_runner.submit_task", wraps=None) as mock_submit:
+            # Use wraps=None so we can capture calls without actually spawning threads
+            mock_submit.return_value = (99, True)
+            from scripts.task_runner import _run_task
+            _run_task(db, task_id, "scrape_url", job_id)
+
+    # submit_task should have been called with enrich_craigslist
+    assert mock_submit.called
+    call_args = mock_submit.call_args
+    assert call_args[0][1] == "enrich_craigslist"
+    assert call_args[0][2] == job_id