peregrine/app/Home.py
pyr0ball 60f067dd0d fix: Settings widget crash, stale setup banners, Docker service controls
- Settings → Search: add-title (+) and Import buttons crashed with
  StreamlitAPIException when writing to _sp_titles_multi after it was
  already instantiated. Fix: pending-key pattern (_sp_titles_pending /
  _sp_locs_pending) applied before widget renders on next pass.

- Home setup banners: fired for email/notion/keywords even when those
  features were already configured. Add 'done' condition callables
  (_email_configured, _notion_configured, _keywords_configured) to
  suppress banners automatically when config files are present.

- Services tab start/stop buttons: docker CLI was unavailable inside
  the container so _docker_available was False and buttons never showed.
  Bind-mount host /usr/bin/docker (ro) + /var/run/docker.sock into the
  app container so it can control sibling containers via DooD pattern.
2026-03-04 12:11:23 -08:00

567 lines
25 KiB
Python

# app/Home.py
"""
Job Seeker Dashboard — Home page.
Shows counts, Run Discovery button, and Sync to Notion button.
"""
import subprocess
import sys
from pathlib import Path
import streamlit as st
import yaml
sys.path.insert(0, str(Path(__file__).parent.parent))
from scripts.user_profile import UserProfile
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
_name = _profile.name if _profile else "Job Seeker"
from scripts.db import DEFAULT_DB, init_db, get_job_counts, purge_jobs, purge_email_data, \
purge_non_remote, archive_jobs, kill_stuck_tasks, get_task_for_job, get_active_tasks, \
insert_job, get_existing_urls
from scripts.task_runner import submit_task
init_db(DEFAULT_DB)
def _email_configured() -> bool:
_e = Path(__file__).parent.parent / "config" / "email.yaml"
if not _e.exists():
return False
import yaml as _yaml
_cfg = _yaml.safe_load(_e.read_text()) or {}
return bool(_cfg.get("username") or _cfg.get("user") or _cfg.get("imap_host"))
def _notion_configured() -> bool:
_n = Path(__file__).parent.parent / "config" / "notion.yaml"
if not _n.exists():
return False
import yaml as _yaml
_cfg = _yaml.safe_load(_n.read_text()) or {}
return bool(_cfg.get("token"))
def _keywords_configured() -> bool:
_k = Path(__file__).parent.parent / "config" / "resume_keywords.yaml"
if not _k.exists():
return False
import yaml as _yaml
_cfg = _yaml.safe_load(_k.read_text()) or {}
return bool(_cfg.get("keywords") or _cfg.get("required") or _cfg.get("preferred"))
_SETUP_BANNERS = [
{"key": "connect_cloud", "text": "Connect a cloud service for resume/cover letter storage",
"link_label": "Settings → Integrations",
"done": _notion_configured},
{"key": "setup_email", "text": "Set up email sync to catch recruiter outreach",
"link_label": "Settings → Email",
"done": _email_configured},
{"key": "setup_email_labels", "text": "Set up email label filters for auto-classification",
"link_label": "Settings → Email (label guide)",
"done": _email_configured},
{"key": "tune_mission", "text": "Tune your mission preferences for better cover letters",
"link_label": "Settings → My Profile"},
{"key": "configure_keywords", "text": "Configure keywords and blocklist for smarter search",
"link_label": "Settings → Search",
"done": _keywords_configured},
{"key": "upload_corpus", "text": "Upload your cover letter corpus for voice fine-tuning",
"link_label": "Settings → Fine-Tune"},
{"key": "configure_linkedin", "text": "Configure LinkedIn Easy Apply automation",
"link_label": "Settings → AIHawk"},
{"key": "setup_searxng", "text": "Set up company research with SearXNG",
"link_label": "Settings → Services"},
{"key": "target_companies", "text": "Build a target company list for focused outreach",
"link_label": "Settings → Search"},
{"key": "setup_notifications", "text": "Set up notifications for stage changes",
"link_label": "Settings → Integrations"},
{"key": "tune_model", "text": "Tune a custom cover letter model on your writing",
"link_label": "Settings → Fine-Tune"},
{"key": "review_training", "text": "Review and curate training data for model tuning",
"link_label": "Settings → Fine-Tune"},
{"key": "setup_calendar", "text": "Set up calendar sync to track interview dates",
"link_label": "Settings → Integrations"},
]
def _dismissible(key: str, status: str, msg: str) -> None:
"""Render a dismissible success/error message. key must be unique per task result."""
if st.session_state.get(f"dismissed_{key}"):
return
col_msg, col_x = st.columns([10, 1])
with col_msg:
if status == "completed":
st.success(msg)
else:
st.error(msg)
with col_x:
st.write("")
if st.button("", key=f"dismiss_{key}", help="Dismiss"):
st.session_state[f"dismissed_{key}"] = True
st.rerun()
def _queue_url_imports(db_path: Path, urls: list) -> int:
"""Insert each URL as a pending manual job and queue a scrape_url task.
Returns count of newly queued jobs."""
from datetime import datetime
from scripts.scrape_url import canonicalize_url
existing = get_existing_urls(db_path)
queued = 0
for url in urls:
url = canonicalize_url(url.strip())
if not url.startswith("http"):
continue
if url in existing:
continue
job_id = insert_job(db_path, {
"title": "Importing…",
"company": "",
"url": url,
"source": "manual",
"location": "",
"description": "",
"date_found": datetime.now().isoformat()[:10],
})
if job_id:
submit_task(db_path, "scrape_url", job_id)
queued += 1
return queued
st.title(f"🔍 {_name}'s Job Search")
st.caption("Discover → Review → Sync to Notion")
st.divider()
@st.fragment(run_every=10)
def _live_counts():
counts = get_job_counts(DEFAULT_DB)
col1, col2, col3, col4, col5 = st.columns(5)
col1.metric("Pending Review", counts.get("pending", 0))
col2.metric("Approved", counts.get("approved", 0))
col3.metric("Applied", counts.get("applied", 0))
col4.metric("Synced to Notion", counts.get("synced", 0))
col5.metric("Rejected", counts.get("rejected", 0))
_live_counts()
st.divider()
left, enrich_col, mid, right = st.columns(4)
with left:
st.subheader("Find New Jobs")
st.caption("Scrapes all configured boards and adds new listings to your review queue.")
_disc_task = get_task_for_job(DEFAULT_DB, "discovery", 0)
_disc_running = _disc_task and _disc_task["status"] in ("queued", "running")
if st.button("🚀 Run Discovery", use_container_width=True, type="primary",
disabled=bool(_disc_running)):
submit_task(DEFAULT_DB, "discovery", 0)
st.rerun()
if _disc_running:
@st.fragment(run_every=4)
def _disc_status():
t = get_task_for_job(DEFAULT_DB, "discovery", 0)
if t and t["status"] in ("queued", "running"):
lbl = "Queued…" if t["status"] == "queued" else "Scraping job boards… this may take a minute"
st.info(f"{lbl}")
else:
st.rerun()
_disc_status()
elif _disc_task and _disc_task["status"] == "completed":
_dismissible(f"disc_{_disc_task['id']}", "completed",
f"✅ Discovery complete — {_disc_task.get('error', '')}. Head to Job Review.")
elif _disc_task and _disc_task["status"] == "failed":
_dismissible(f"disc_{_disc_task['id']}", "failed",
f"Discovery failed: {_disc_task.get('error', '')}")
with enrich_col:
st.subheader("Enrich Descriptions")
st.caption("Re-fetch missing descriptions for any listing (LinkedIn, Indeed, Glassdoor, Adzuna, The Ladders, generic).")
_enrich_task = get_task_for_job(DEFAULT_DB, "enrich_descriptions", 0)
_enrich_running = _enrich_task and _enrich_task["status"] in ("queued", "running")
if st.button("🔍 Fill Missing Descriptions", use_container_width=True, type="primary",
disabled=bool(_enrich_running)):
submit_task(DEFAULT_DB, "enrich_descriptions", 0)
st.rerun()
if _enrich_running:
@st.fragment(run_every=4)
def _enrich_status():
t = get_task_for_job(DEFAULT_DB, "enrich_descriptions", 0)
if t and t["status"] in ("queued", "running"):
st.info("⏳ Fetching descriptions…")
else:
st.rerun()
_enrich_status()
elif _enrich_task and _enrich_task["status"] == "completed":
_dismissible(f"enrich_{_enrich_task['id']}", "completed",
f"{_enrich_task.get('error', 'Done')}")
elif _enrich_task and _enrich_task["status"] == "failed":
_dismissible(f"enrich_{_enrich_task['id']}", "failed",
f"Enrich failed: {_enrich_task.get('error', '')}")
with mid:
unscored = sum(1 for j in __import__("scripts.db", fromlist=["get_jobs_by_status"])
.get_jobs_by_status(DEFAULT_DB, "pending")
if j.get("match_score") is None and j.get("description"))
st.subheader("Score Listings")
st.caption(f"Run TF-IDF match scoring against {_name}'s resume. {unscored} pending job{'s' if unscored != 1 else ''} unscored.")
if st.button("📊 Score All Unscored Jobs", use_container_width=True, type="primary",
disabled=unscored == 0):
with st.spinner("Scoring…"):
result = subprocess.run(
["conda", "run", "-n", "job-seeker", "python", "scripts/match.py"],
capture_output=True, text=True,
cwd=str(Path(__file__).parent.parent),
)
if result.returncode == 0:
st.success("Scoring complete!")
st.code(result.stdout)
else:
st.error("Scoring failed.")
st.code(result.stderr)
st.rerun()
with right:
approved_count = get_job_counts(DEFAULT_DB).get("approved", 0)
st.subheader("Send to Notion")
st.caption("Push all approved jobs to your Notion tracking database.")
if approved_count == 0:
st.info("No approved jobs yet. Review and approve some listings first.")
else:
if st.button(
f"📤 Sync {approved_count} approved job{'s' if approved_count != 1 else ''} → Notion",
use_container_width=True, type="primary",
):
with st.spinner("Syncing to Notion…"):
from scripts.sync import sync_to_notion
count = sync_to_notion(DEFAULT_DB)
st.success(f"Synced {count} job{'s' if count != 1 else ''} to Notion!")
st.rerun()
st.divider()
# ── Email Sync ────────────────────────────────────────────────────────────────
email_left, email_right = st.columns([3, 1])
with email_left:
st.subheader("Sync Emails")
st.caption("Pull inbound recruiter emails and match them to active applications. "
"New recruiter outreach is added to your Job Review queue.")
with email_right:
_email_task = get_task_for_job(DEFAULT_DB, "email_sync", 0)
_email_running = _email_task and _email_task["status"] in ("queued", "running")
if st.button("📧 Sync Emails", use_container_width=True, type="primary",
disabled=bool(_email_running)):
submit_task(DEFAULT_DB, "email_sync", 0)
st.rerun()
if _email_running:
@st.fragment(run_every=4)
def _email_status():
t = get_task_for_job(DEFAULT_DB, "email_sync", 0)
if t and t["status"] in ("queued", "running"):
st.info("⏳ Syncing emails…")
else:
st.rerun()
_email_status()
elif _email_task and _email_task["status"] == "completed":
_dismissible(f"email_{_email_task['id']}", "completed",
f"{_email_task.get('error', 'Done')}")
elif _email_task and _email_task["status"] == "failed":
_dismissible(f"email_{_email_task['id']}", "failed",
f"Sync failed: {_email_task.get('error', '')}")
st.divider()
# ── Add Jobs by URL ───────────────────────────────────────────────────────────
add_left, _add_right = st.columns([3, 1])
with add_left:
st.subheader("Add Jobs by URL")
st.caption("Paste job listing URLs to import and scrape in the background. "
"Supports LinkedIn, Indeed, Glassdoor, and most job boards.")
url_tab, csv_tab = st.tabs(["Paste URLs", "Upload CSV"])
with url_tab:
url_text = st.text_area(
"urls",
placeholder="https://www.linkedin.com/jobs/view/1234567/\nhttps://www.indeed.com/viewjob?jk=abc",
height=100,
label_visibility="collapsed",
)
if st.button("📥 Add Jobs", key="add_urls_btn", use_container_width=True,
disabled=not (url_text or "").strip()):
_urls = [u.strip() for u in url_text.strip().splitlines() if u.strip().startswith("http")]
if _urls:
_n = _queue_url_imports(DEFAULT_DB, _urls)
if _n:
st.success(f"Queued {_n} job{'s' if _n != 1 else ''} for import. Check Job Review shortly.")
else:
st.info("All URLs already in the database.")
st.rerun()
with csv_tab:
csv_file = st.file_uploader("CSV with a URL column", type=["csv"],
label_visibility="collapsed")
if csv_file:
import csv as _csv
import io as _io
reader = _csv.DictReader(_io.StringIO(csv_file.read().decode("utf-8", errors="replace")))
_csv_urls = []
for row in reader:
for val in row.values():
if val and val.strip().startswith("http"):
_csv_urls.append(val.strip())
break
if _csv_urls:
st.caption(f"Found {len(_csv_urls)} URL(s) in CSV.")
if st.button("📥 Import CSV Jobs", key="add_csv_btn", use_container_width=True):
_n = _queue_url_imports(DEFAULT_DB, _csv_urls)
st.success(f"Queued {_n} job{'s' if _n != 1 else ''} for import.")
st.rerun()
else:
st.warning("No URLs found — CSV must have a column whose values start with http.")
@st.fragment(run_every=3)
def _scrape_status():
import sqlite3 as _sq
conn = _sq.connect(DEFAULT_DB)
conn.row_factory = _sq.Row
rows = conn.execute(
"""SELECT bt.status, bt.error, j.title, j.company, j.url
FROM background_tasks bt
JOIN jobs j ON j.id = bt.job_id
WHERE bt.task_type = 'scrape_url'
AND bt.updated_at >= datetime('now', '-5 minutes')
ORDER BY bt.updated_at DESC LIMIT 20"""
).fetchall()
conn.close()
if not rows:
return
st.caption("Recent URL imports:")
for r in rows:
if r["status"] == "running":
st.info(f"⏳ Scraping {r['url']}")
elif r["status"] == "completed":
label = r["title"] + (f" @ {r['company']}" if r["company"] else "")
st.success(f"{label}")
elif r["status"] == "failed":
st.error(f"{r['url']}{r['error'] or 'scrape failed'}")
_scrape_status()
st.divider()
# ── Danger zone: purge + re-scrape ────────────────────────────────────────────
with st.expander("⚠️ Danger Zone", expanded=False):
st.caption(
"**Purge** permanently deletes jobs from the local database. "
"Applied and synced jobs are never touched."
)
purge_col, rescrape_col, email_col, tasks_col = st.columns(4)
with purge_col:
st.markdown("**Purge pending & rejected**")
st.caption("Removes all _pending_ and _rejected_ listings so the next discovery starts fresh.")
if st.button("🗑 Purge Pending + Rejected", use_container_width=True):
st.session_state["confirm_purge"] = "partial"
if st.session_state.get("confirm_purge") == "partial":
st.warning("Are you sure? This cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge", type="primary", use_container_width=True):
deleted = purge_jobs(DEFAULT_DB, statuses=["pending", "rejected"])
st.success(f"Purged {deleted} jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
with email_col:
st.markdown("**Purge email data**")
st.caption("Clears all email thread logs and email-sourced pending jobs so the next sync starts fresh.")
if st.button("📧 Purge Email Data", use_container_width=True):
st.session_state["confirm_purge"] = "email"
if st.session_state.get("confirm_purge") == "email":
st.warning("This deletes all email contacts and email-sourced jobs. Cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge emails", type="primary", use_container_width=True):
contacts, jobs = purge_email_data(DEFAULT_DB)
st.success(f"Purged {contacts} email contacts, {jobs} email jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
with tasks_col:
_active = get_active_tasks(DEFAULT_DB)
st.markdown("**Kill stuck tasks**")
st.caption(f"Force-fail all queued/running background tasks. Currently **{len(_active)}** active.")
if st.button("⏹ Kill All Tasks", use_container_width=True, disabled=len(_active) == 0):
killed = kill_stuck_tasks(DEFAULT_DB)
st.success(f"Killed {killed} task(s).")
st.rerun()
with rescrape_col:
st.markdown("**Purge all & re-scrape**")
st.caption("Wipes _all_ non-applied, non-synced jobs then immediately runs a fresh discovery.")
if st.button("🔄 Purge All + Re-scrape", use_container_width=True):
st.session_state["confirm_purge"] = "full"
if st.session_state.get("confirm_purge") == "full":
st.warning("This will delete ALL pending, approved, and rejected jobs, then re-scrape. Applied and synced records are kept.")
c1, c2 = st.columns(2)
if c1.button("Yes, wipe + scrape", type="primary", use_container_width=True):
purge_jobs(DEFAULT_DB, statuses=["pending", "approved", "rejected"])
submit_task(DEFAULT_DB, "discovery", 0)
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
st.divider()
pending_col, nonremote_col, approved_col, _ = st.columns(4)
with pending_col:
st.markdown("**Purge pending review**")
st.caption("Removes only _pending_ listings, keeping your rejected history intact.")
if st.button("🗑 Purge Pending Only", use_container_width=True):
st.session_state["confirm_purge"] = "pending_only"
if st.session_state.get("confirm_purge") == "pending_only":
st.warning("Deletes all pending jobs. Rejected jobs are kept. Cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge pending", type="primary", use_container_width=True):
deleted = purge_jobs(DEFAULT_DB, statuses=["pending"])
st.success(f"Purged {deleted} pending jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
with nonremote_col:
st.markdown("**Purge non-remote**")
st.caption("Removes pending/approved/rejected jobs where remote is not set. Keeps anything already in the pipeline.")
if st.button("🏢 Purge On-site Jobs", use_container_width=True):
st.session_state["confirm_purge"] = "non_remote"
if st.session_state.get("confirm_purge") == "non_remote":
st.warning("Deletes all non-remote jobs not yet applied to. Cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge on-site", type="primary", use_container_width=True):
deleted = purge_non_remote(DEFAULT_DB)
st.success(f"Purged {deleted} non-remote jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
with approved_col:
st.markdown("**Purge approved (unapplied)**")
st.caption("Removes _approved_ jobs you haven't applied to yet — e.g. to reset after a review pass.")
if st.button("🗑 Purge Approved", use_container_width=True):
st.session_state["confirm_purge"] = "approved_only"
if st.session_state.get("confirm_purge") == "approved_only":
st.warning("Deletes all approved-but-not-applied jobs. Cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge approved", type="primary", use_container_width=True):
deleted = purge_jobs(DEFAULT_DB, statuses=["approved"])
st.success(f"Purged {deleted} approved jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
st.divider()
archive_col1, archive_col2, _, _ = st.columns(4)
with archive_col1:
st.markdown("**Archive remaining**")
st.caption(
"Move all _pending_ and _rejected_ jobs to archived status. "
"Archived jobs stay in the DB for dedup — they just won't appear in Job Review."
)
if st.button("📦 Archive Pending + Rejected", use_container_width=True):
st.session_state["confirm_purge"] = "archive_remaining"
if st.session_state.get("confirm_purge") == "archive_remaining":
st.info("Jobs will be archived (not deleted) — URLs are kept for dedup.")
c1, c2 = st.columns(2)
if c1.button("Yes, archive", type="primary", use_container_width=True):
archived = archive_jobs(DEFAULT_DB, statuses=["pending", "rejected"])
st.success(f"Archived {archived} jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
with archive_col2:
st.markdown("**Archive approved (unapplied)**")
st.caption("Archive _approved_ listings you decided to skip — keeps history without cluttering the apply queue.")
if st.button("📦 Archive Approved", use_container_width=True):
st.session_state["confirm_purge"] = "archive_approved"
if st.session_state.get("confirm_purge") == "archive_approved":
st.info("Approved jobs will be archived (not deleted).")
c1, c2 = st.columns(2)
if c1.button("Yes, archive approved", type="primary", use_container_width=True):
archived = archive_jobs(DEFAULT_DB, statuses=["approved"])
st.success(f"Archived {archived} approved jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
# ── Setup banners ─────────────────────────────────────────────────────────────
if _profile and _profile.wizard_complete:
_dismissed = set(_profile.dismissed_banners)
_pending_banners = [
b for b in _SETUP_BANNERS
if b["key"] not in _dismissed and not b.get("done", lambda: False)()
]
if _pending_banners:
st.divider()
st.markdown("#### Finish setting up Peregrine")
for banner in _pending_banners:
_bcol, _bdismiss = st.columns([10, 1])
with _bcol:
_ic, _lc = st.columns([3, 1])
_ic.info(f"💡 {banner['text']}")
with _lc:
st.write("")
st.page_link("pages/2_Settings.py", label=banner['link_label'], icon="⚙️")
with _bdismiss:
st.write("")
if st.button("", key=f"dismiss_banner_{banner['key']}", help="Dismiss"):
_data = yaml.safe_load(_USER_YAML.read_text()) if _USER_YAML.exists() else {}
_data.setdefault("dismissed_banners", [])
if banner["key"] not in _data["dismissed_banners"]:
_data["dismissed_banners"].append(banner["key"])
_USER_YAML.write_text(yaml.dump(_data, default_flow_style=False, allow_unicode=True))
st.rerun()