diff --git a/.env.example b/.env.example index 5f07e82..8f7b8fd 100644 --- a/.env.example +++ b/.env.example @@ -20,3 +20,10 @@ OLLAMA_DEFAULT_MODEL=llama3.2:3b ANTHROPIC_API_KEY= OPENAI_COMPAT_URL= OPENAI_COMPAT_KEY= + +# Feedback button — Forgejo issue filing +FORGEJO_API_TOKEN= +FORGEJO_REPO=pyr0ball/peregrine +FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1 +# GITHUB_TOKEN= # future — enable when public mirror is active +# GITHUB_REPO= # future diff --git a/PRIVACY.md b/PRIVACY.md new file mode 100644 index 0000000..afc7b9f --- /dev/null +++ b/PRIVACY.md @@ -0,0 +1,7 @@ +# Privacy Policy + +CircuitForge LLC's privacy policy applies to this product and is published at: + +**** + +Last reviewed: March 2026. diff --git a/app/Home.py b/app/Home.py index 45cda39..2e51e35 100644 --- a/app/Home.py +++ b/app/Home.py @@ -25,17 +25,45 @@ from scripts.task_runner import submit_task init_db(DEFAULT_DB) +def _email_configured() -> bool: + _e = Path(__file__).parent.parent / "config" / "email.yaml" + if not _e.exists(): + return False + import yaml as _yaml + _cfg = _yaml.safe_load(_e.read_text()) or {} + return bool(_cfg.get("username") or _cfg.get("user") or _cfg.get("imap_host")) + +def _notion_configured() -> bool: + _n = Path(__file__).parent.parent / "config" / "notion.yaml" + if not _n.exists(): + return False + import yaml as _yaml + _cfg = _yaml.safe_load(_n.read_text()) or {} + return bool(_cfg.get("token")) + +def _keywords_configured() -> bool: + _k = Path(__file__).parent.parent / "config" / "resume_keywords.yaml" + if not _k.exists(): + return False + import yaml as _yaml + _cfg = _yaml.safe_load(_k.read_text()) or {} + return bool(_cfg.get("keywords") or _cfg.get("required") or _cfg.get("preferred")) + _SETUP_BANNERS = [ {"key": "connect_cloud", "text": "Connect a cloud service for resume/cover letter storage", - "link_label": "Settings → Integrations"}, + "link_label": "Settings → Integrations", + "done": _notion_configured}, {"key": "setup_email", "text": "Set up email sync to catch recruiter outreach", - "link_label": "Settings → Email"}, + "link_label": "Settings → Email", + "done": _email_configured}, {"key": "setup_email_labels", "text": "Set up email label filters for auto-classification", - "link_label": "Settings → Email (label guide)"}, + "link_label": "Settings → Email (label guide)", + "done": _email_configured}, {"key": "tune_mission", "text": "Tune your mission preferences for better cover letters", "link_label": "Settings → My Profile"}, {"key": "configure_keywords", "text": "Configure keywords and blocklist for smarter search", - "link_label": "Settings → Search"}, + "link_label": "Settings → Search", + "done": _keywords_configured}, {"key": "upload_corpus", "text": "Upload your cover letter corpus for voice fine-tuning", "link_label": "Settings → Fine-Tune"}, {"key": "configure_linkedin", "text": "Configure LinkedIn Easy Apply automation", @@ -513,7 +541,10 @@ with st.expander("⚠️ Danger Zone", expanded=False): # ── Setup banners ───────────────────────────────────────────────────────────── if _profile and _profile.wizard_complete: _dismissed = set(_profile.dismissed_banners) - _pending_banners = [b for b in _SETUP_BANNERS if b["key"] not in _dismissed] + _pending_banners = [ + b for b in _SETUP_BANNERS + if b["key"] not in _dismissed and not b.get("done", lambda: False)() + ] if _pending_banners: st.divider() st.markdown("#### Finish setting up Peregrine") diff --git a/app/app.py b/app/app.py index c4558e5..4d47bd6 100644 --- a/app/app.py +++ b/app/app.py @@ -21,6 +21,7 @@ IS_DEMO = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes") import streamlit as st from scripts.db import DEFAULT_DB, init_db, get_active_tasks +from app.feedback import inject_feedback_button import sqlite3 st.set_page_config( @@ -162,7 +163,27 @@ with st.sidebar: icon="🔒", ) _task_indicator() + + # Cloud LLM indicator — shown whenever any cloud backend is active + _llm_cfg_path = Path(__file__).parent.parent / "config" / "llm.yaml" + try: + import yaml as _yaml + from scripts.byok_guard import cloud_backends as _cloud_backends + _active_cloud = _cloud_backends(_yaml.safe_load(_llm_cfg_path.read_text(encoding="utf-8")) or {}) + except Exception: + _active_cloud = [] + if _active_cloud: + _provider_names = ", ".join(b.replace("_", " ").title() for b in _active_cloud) + st.warning( + f"**Cloud LLM active**\n\n" + f"{_provider_names}\n\n" + "AI features send content to this provider. " + "[Change in Settings](2_Settings)", + icon="🔓", + ) + st.divider() st.caption(f"Peregrine {_get_version()}") + inject_feedback_button(page=pg.title) pg.run() diff --git a/app/components/paste_image.py b/app/components/paste_image.py new file mode 100644 index 0000000..9fdb46e --- /dev/null +++ b/app/components/paste_image.py @@ -0,0 +1,31 @@ +""" +Paste-from-clipboard / drag-and-drop image component. + +Uses st.components.v1.declare_component so JS can return image bytes to Python +(st.components.v1.html() is one-way only). No build step required — the +frontend is a single index.html file. +""" +from __future__ import annotations + +import base64 +from pathlib import Path + +import streamlit.components.v1 as components + +_FRONTEND = Path(__file__).parent / "paste_image_ui" + +_paste_image = components.declare_component("paste_image", path=str(_FRONTEND)) + + +def paste_image_component(key: str | None = None) -> bytes | None: + """ + Render the paste/drop zone. Returns PNG/JPEG bytes when an image is + pasted or dropped, or None if nothing has been submitted yet. + """ + result = _paste_image(key=key) + if result: + try: + return base64.b64decode(result) + except Exception: + return None + return None diff --git a/app/components/paste_image_ui/index.html b/app/components/paste_image_ui/index.html new file mode 100644 index 0000000..9fe83cb --- /dev/null +++ b/app/components/paste_image_ui/index.html @@ -0,0 +1,142 @@ + + + + + + + +
+ 📋 + Click here, then Ctrl+V to paste + or drag & drop an image file +
+
+ + + + diff --git a/app/feedback.py b/app/feedback.py new file mode 100644 index 0000000..e0e62f3 --- /dev/null +++ b/app/feedback.py @@ -0,0 +1,247 @@ +""" +Floating feedback button + dialog — thin Streamlit shell. +All business logic lives in scripts/feedback_api.py. +""" +from __future__ import annotations + +import os +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +import streamlit as st + +# ── CSS: float the button to the bottom-right corner ───────────────────────── +# Targets the button by its aria-label (set via `help=` parameter). +_FLOAT_CSS = """ + +""" + + +@st.dialog("Send Feedback", width="large") +def _feedback_dialog(page: str) -> None: + """Two-step feedback dialog: form → consent/attachments → submit.""" + from scripts.feedback_api import ( + collect_context, collect_logs, collect_listings, + build_issue_body, create_forgejo_issue, upload_attachment, + ) + from scripts.db import DEFAULT_DB + + # ── Initialise step counter ─────────────────────────────────────────────── + if "fb_step" not in st.session_state: + st.session_state.fb_step = 1 + + # ═════════════════════════════════════════════════════════════════════════ + # STEP 1 — Form + # ═════════════════════════════════════════════════════════════════════════ + if st.session_state.fb_step == 1: + st.subheader("What's on your mind?") + + fb_type = st.selectbox( + "Type", ["Bug", "Feature Request", "Other"], key="fb_type" + ) + fb_title = st.text_input( + "Title", placeholder="Short summary of the issue or idea", key="fb_title" + ) + fb_desc = st.text_area( + "Description", + placeholder="Describe what happened or what you'd like to see...", + key="fb_desc", + ) + if fb_type == "Bug": + st.text_area( + "Reproduction steps", + placeholder="1. Go to...\n2. Click...\n3. See error", + key="fb_repro", + ) + + col_cancel, _, col_next = st.columns([1, 3, 1]) + with col_cancel: + if st.button("Cancel"): + _clear_feedback_state() + st.rerun() # intentionally closes the dialog + with col_next: + if st.button("Next →", type="primary"): + # Read widget values NOW (same rerun as the click — values are + # available here even on first click). Copy to non-widget keys + # so they survive step 2's render (Streamlit removes widget + # state for widgets that are no longer rendered). + title = fb_title.strip() + desc = fb_desc.strip() + if not title or not desc: + st.error("Please fill in both Title and Description.") + else: + st.session_state.fb_data_type = fb_type + st.session_state.fb_data_title = title + st.session_state.fb_data_desc = desc + st.session_state.fb_data_repro = st.session_state.get("fb_repro", "") + st.session_state.fb_step = 2 + + # ═════════════════════════════════════════════════════════════════════════ + # STEP 2 — Consent + attachments + # ═════════════════════════════════════════════════════════════════════════ + elif st.session_state.fb_step == 2: + st.subheader("Optional: attach diagnostic data") + + # ── Diagnostic data toggle + preview ───────────────────────────────── + include_diag = st.toggle( + "Include diagnostic data (logs + recent listings)", key="fb_diag" + ) + if include_diag: + with st.expander("Preview what will be sent", expanded=True): + st.caption("**App logs (last 100 lines, PII masked):**") + st.code(collect_logs(100), language=None) + st.caption("**Recent listings (title / company / URL only):**") + for j in collect_listings(DEFAULT_DB, 5): + st.write(f"- {j['title']} @ {j['company']} — {j['url']}") + + # ── Screenshot ──────────────────────────────────────────────────────── + st.divider() + st.caption("**Screenshot** (optional)") + + from app.components.paste_image import paste_image_component + + # Keyed so we can reset the component when the user removes the image + if "fb_paste_key" not in st.session_state: + st.session_state.fb_paste_key = 0 + + pasted = paste_image_component(key=f"fb_paste_{st.session_state.fb_paste_key}") + if pasted: + st.session_state.fb_screenshot = pasted + + st.caption("or upload a file:") + uploaded = st.file_uploader( + "Upload screenshot", + type=["png", "jpg", "jpeg"], + label_visibility="collapsed", + key="fb_upload", + ) + if uploaded: + st.session_state.fb_screenshot = uploaded.read() + + if st.session_state.get("fb_screenshot"): + st.image( + st.session_state["fb_screenshot"], + caption="Screenshot preview — this will be attached to the issue", + use_container_width=True, + ) + if st.button("🗑 Remove screenshot"): + st.session_state.pop("fb_screenshot", None) + st.session_state.fb_paste_key = st.session_state.get("fb_paste_key", 0) + 1 + # no st.rerun() — button click already re-renders the dialog + + # ── Attribution consent ─────────────────────────────────────────────── + st.divider() + submitter: str | None = None + try: + import yaml + _ROOT = Path(__file__).parent.parent + user = yaml.safe_load((_ROOT / "config" / "user.yaml").read_text()) or {} + name = (user.get("name") or "").strip() + email = (user.get("email") or "").strip() + if name or email: + label = f"Include my name & email in the report: **{name}** ({email})" + if st.checkbox(label, key="fb_attr"): + submitter = f"{name} <{email}>" + except Exception: + pass + + # ── Navigation ──────────────────────────────────────────────────────── + col_back, _, col_submit = st.columns([1, 3, 2]) + with col_back: + if st.button("← Back"): + st.session_state.fb_step = 1 + # no st.rerun() — button click already re-renders the dialog + + with col_submit: + if st.button("Submit Feedback", type="primary"): + _submit(page, include_diag, submitter, collect_context, + collect_logs, collect_listings, build_issue_body, + create_forgejo_issue, upload_attachment, DEFAULT_DB) + + +def _submit(page, include_diag, submitter, collect_context, collect_logs, + collect_listings, build_issue_body, create_forgejo_issue, + upload_attachment, db_path) -> None: + """Handle form submission: build body, file issue, upload screenshot.""" + with st.spinner("Filing issue…"): + context = collect_context(page) + attachments: dict = {} + if include_diag: + attachments["logs"] = collect_logs(100) + attachments["listings"] = collect_listings(db_path, 5) + if submitter: + attachments["submitter"] = submitter + + fb_type = st.session_state.get("fb_data_type", "Other") + type_key = {"Bug": "bug", "Feature Request": "feature", "Other": "other"}.get( + fb_type, "other" + ) + labels = ["beta-feedback", "needs-triage"] + labels.append( + {"bug": "bug", "feature": "feature-request"}.get(type_key, "question") + ) + + form = { + "type": type_key, + "description": st.session_state.get("fb_data_desc", ""), + "repro": st.session_state.get("fb_data_repro", "") if type_key == "bug" else "", + } + + body = build_issue_body(form, context, attachments) + + try: + result = create_forgejo_issue( + st.session_state.get("fb_data_title", "Feedback"), body, labels + ) + screenshot = st.session_state.get("fb_screenshot") + if screenshot: + upload_attachment(result["number"], screenshot) + + _clear_feedback_state() + st.success(f"Issue filed! [View on Forgejo]({result['url']})") + st.balloons() + + except Exception as exc: + st.error(f"Failed to file issue: {exc}") + + +def _clear_feedback_state() -> None: + for key in [ + "fb_step", + "fb_type", "fb_title", "fb_desc", "fb_repro", # widget keys + "fb_data_type", "fb_data_title", "fb_data_desc", "fb_data_repro", # saved data + "fb_diag", "fb_upload", "fb_attr", "fb_screenshot", "fb_paste_key", + ]: + st.session_state.pop(key, None) + + +def inject_feedback_button(page: str = "Unknown") -> None: + """ + Inject the floating feedback button. Call once per page render in app.py. + Hidden automatically in DEMO_MODE. + """ + if os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes"): + return + if not os.environ.get("FORGEJO_API_TOKEN"): + return # silently skip if not configured + + st.markdown(_FLOAT_CSS, unsafe_allow_html=True) + if st.button( + "💬 Feedback", + key="__feedback_floating_btn__", + help="Send feedback or report a bug", + ): + _feedback_dialog(page) diff --git a/app/pages/2_Settings.py b/app/pages/2_Settings.py index 383918a..e50f40f 100644 --- a/app/pages/2_Settings.py +++ b/app/pages/2_Settings.py @@ -36,47 +36,18 @@ def save_yaml(path: Path, data: dict) -> None: path.write_text(yaml.dump(data, default_flow_style=False, allow_unicode=True)) -def _suggest_search_terms(current_titles: list[str], resume_path: Path) -> dict: - """Call LLM to suggest additional job titles and exclude keywords.""" - import json - import re - from scripts.llm_router import LLMRouter +from scripts.suggest_helpers import ( + suggest_search_terms as _suggest_search_terms_impl, + suggest_resume_keywords as _suggest_resume_keywords, +) - resume_context = "" - if resume_path.exists(): - resume = load_yaml(resume_path) - lines = [] - for exp in (resume.get("experience_details") or [])[:3]: - pos = exp.get("position", "") - co = exp.get("company", "") - skills = ", ".join((exp.get("skills_acquired") or [])[:5]) - lines.append(f"- {pos} at {co}: {skills}") - resume_context = "\n".join(lines) - - titles_str = "\n".join(f"- {t}" for t in current_titles) - prompt = f"""You are helping a job seeker optimize their search criteria. - -Their background (from resume): -{resume_context or "Customer success and technical account management leader"} - -Current job titles being searched: -{titles_str} - -Suggest: -1. 5-8 additional job titles they might be missing (alternative names, adjacent roles, senior variants) -2. 3-5 keywords to add to the exclusion filter (to screen out irrelevant postings) - -Return ONLY valid JSON in this exact format: -{{"suggested_titles": ["Title 1", "Title 2"], "suggested_excludes": ["keyword 1", "keyword 2"]}}""" - - result = LLMRouter().complete(prompt).strip() - m = re.search(r"\{.*\}", result, re.DOTALL) - if m: - try: - return json.loads(m.group()) - except Exception: - pass - return {"suggested_titles": [], "suggested_excludes": []} +def _suggest_search_terms(current_titles, resume_path, blocklist=None, user_profile=None): + return _suggest_search_terms_impl( + current_titles, + resume_path, + blocklist or {}, + user_profile or {}, + ) _show_finetune = bool(_profile and _profile.inference_profile in ("single-gpu", "dual-gpu")) @@ -324,6 +295,18 @@ with tab_search: st.session_state["_sp_excludes"] = "\n".join(p.get("exclude_keywords", [])) st.session_state["_sp_hash"] = _sp_hash + # Apply any pending programmatic updates BEFORE widgets are instantiated. + # Streamlit forbids writing to a widget's key after it renders on the same pass; + # button handlers write to *_pending keys instead, consumed here on the next pass. + for _pend, _wkey in [("_sp_titles_pending", "_sp_titles_multi"), + ("_sp_locs_pending", "_sp_locations_multi"), + ("_sp_new_title_pending", "_sp_new_title"), + ("_sp_paste_titles_pending", "_sp_paste_titles"), + ("_sp_new_loc_pending", "_sp_new_loc"), + ("_sp_paste_locs_pending", "_sp_paste_locs")]: + if _pend in st.session_state: + st.session_state[_wkey] = st.session_state.pop(_pend) + # ── Titles ──────────────────────────────────────────────────────────────── _title_row, _suggest_btn_col = st.columns([4, 1]) with _title_row: @@ -331,7 +314,7 @@ with tab_search: with _suggest_btn_col: st.write("") _run_suggest = st.button("✨ Suggest", key="sp_suggest_btn", - help="Ask the LLM to suggest additional titles and exclude keywords based on your resume") + help="Ask the LLM to suggest additional titles and smarter exclude keywords — using your blocklist, mission values, and career background.") st.multiselect( "Job titles", @@ -355,8 +338,8 @@ with tab_search: st.session_state["_sp_title_options"] = _opts if _t not in _sel: _sel.append(_t) - st.session_state["_sp_titles_multi"] = _sel - st.session_state["_sp_new_title"] = "" + st.session_state["_sp_titles_pending"] = _sel + st.session_state["_sp_new_title_pending"] = "" st.rerun() with st.expander("📋 Paste a list of titles"): st.text_area("One title per line", key="_sp_paste_titles", height=80, label_visibility="collapsed", @@ -371,23 +354,34 @@ with tab_search: if _t not in _sel: _sel.append(_t) st.session_state["_sp_title_options"] = _opts - st.session_state["_sp_titles_multi"] = _sel - st.session_state["_sp_paste_titles"] = "" + st.session_state["_sp_titles_pending"] = _sel + st.session_state["_sp_paste_titles_pending"] = "" st.rerun() # ── LLM suggestions panel ──────────────────────────────────────────────── if _run_suggest: _current_titles = list(st.session_state.get("_sp_titles_multi", [])) + _blocklist = load_yaml(BLOCKLIST_CFG) + _user_profile = load_yaml(USER_CFG) with st.spinner("Asking LLM for suggestions…"): - suggestions = _suggest_search_terms(_current_titles, RESUME_PATH) - # Add suggested titles to options list (not auto-selected — user picks from dropdown) - _opts = list(st.session_state.get("_sp_title_options", [])) - for _t in suggestions.get("suggested_titles", []): - if _t not in _opts: - _opts.append(_t) - st.session_state["_sp_title_options"] = _opts - st.session_state["_sp_suggestions"] = suggestions - st.rerun() + try: + suggestions = _suggest_search_terms(_current_titles, RESUME_PATH, _blocklist, _user_profile) + except RuntimeError as _e: + st.warning( + f"No LLM backend available: {_e}. " + "Check that Ollama is running and has GPU access, or enable a cloud backend in Settings → System → LLM.", + icon="⚠️", + ) + suggestions = None + if suggestions is not None: + # Add suggested titles to options list (not auto-selected — user picks from dropdown) + _opts = list(st.session_state.get("_sp_title_options", [])) + for _t in suggestions.get("suggested_titles", []): + if _t not in _opts: + _opts.append(_t) + st.session_state["_sp_title_options"] = _opts + st.session_state["_sp_suggestions"] = suggestions + st.rerun() if st.session_state.get("_sp_suggestions"): sugg = st.session_state["_sp_suggestions"] @@ -436,8 +430,8 @@ with tab_search: st.session_state["_sp_loc_options"] = _opts if _l not in _sel: _sel.append(_l) - st.session_state["_sp_locations_multi"] = _sel - st.session_state["_sp_new_loc"] = "" + st.session_state["_sp_locs_pending"] = _sel + st.session_state["_sp_new_loc_pending"] = "" st.rerun() with st.expander("📋 Paste a list of locations"): st.text_area("One location per line", key="_sp_paste_locs", height=80, label_visibility="collapsed", @@ -452,8 +446,8 @@ with tab_search: if _l not in _sel: _sel.append(_l) st.session_state["_sp_loc_options"] = _opts - st.session_state["_sp_locations_multi"] = _sel - st.session_state["_sp_paste_locs"] = "" + st.session_state["_sp_locs_pending"] = _sel + st.session_state["_sp_paste_locs_pending"] = "" st.rerun() st.subheader("Exclude Keywords") @@ -747,11 +741,33 @@ with tab_resume: st.balloons() st.divider() - st.subheader("🏷️ Skills & Keywords") - st.caption( - f"Matched against job descriptions to surface {_name}'s most relevant experience " - "and highlight keyword overlap in research briefs. Search the bundled list or add your own." - ) + _kw_header_col, _kw_btn_col = st.columns([5, 1]) + with _kw_header_col: + st.subheader("🏷️ Skills & Keywords") + st.caption( + f"Matched against job descriptions to surface {_name}'s most relevant experience " + "and highlight keyword overlap in research briefs. Search the bundled list or add your own." + ) + with _kw_btn_col: + st.write("") + st.write("") + _run_kw_suggest = st.button( + "✨ Suggest", key="kw_suggest_btn", + help="Ask the LLM to suggest skills, domains, and keywords based on your resume.", + ) + + if _run_kw_suggest: + _kw_current = load_yaml(KEYWORDS_CFG) if KEYWORDS_CFG.exists() else {} + with st.spinner("Asking LLM for keyword suggestions…"): + try: + _kw_sugg = _suggest_resume_keywords(RESUME_PATH, _kw_current) + st.session_state["_kw_suggestions"] = _kw_sugg + except RuntimeError as _e: + st.warning( + f"No LLM backend available: {_e}. " + "Check that Ollama is running and has GPU access, or enable a cloud backend in Settings → System → LLM.", + icon="⚠️", + ) from scripts.skills_utils import load_suggestions as _load_sugg, filter_tag as _filter_tag @@ -815,6 +831,33 @@ with tab_resume: save_yaml(KEYWORDS_CFG, kw_data) st.rerun() + # ── LLM keyword suggestion chips ────────────────────────────────────── + _kw_sugg_data = st.session_state.get("_kw_suggestions") + if _kw_sugg_data: + _KW_ICONS = {"skills": "🛠️", "domains": "🏢", "keywords": "🔑"} + _any_shown = False + for _cat, _icon in _KW_ICONS.items(): + _cat_sugg = [t for t in _kw_sugg_data.get(_cat, []) + if t not in kw_data.get(_cat, [])] + if not _cat_sugg: + continue + _any_shown = True + st.caption(f"**{_icon} {_cat.capitalize()} suggestions** — click to add:") + _chip_cols = st.columns(min(len(_cat_sugg), 4)) + for _i, _tag in enumerate(_cat_sugg): + with _chip_cols[_i % 4]: + if st.button(f"+ {_tag}", key=f"kw_sugg_{_cat}_{_i}"): + _new_list = list(kw_data.get(_cat, [])) + [_tag] + kw_data[_cat] = _new_list + save_yaml(KEYWORDS_CFG, kw_data) + _kw_sugg_data[_cat] = [t for t in _kw_sugg_data[_cat] if t != _tag] + st.session_state["_kw_suggestions"] = _kw_sugg_data + st.rerun() + if _any_shown: + if st.button("✕ Clear suggestions", key="kw_clear_sugg"): + st.session_state.pop("_kw_suggestions", None) + st.rerun() + # ── System tab ──────────────────────────────────────────────────────────────── with tab_system: st.caption("Infrastructure, LLM backends, integrations, and service connections.") @@ -1005,18 +1048,88 @@ with tab_system: f"{'✓' if llm_backends.get(n, {}).get('enabled', True) else '✗'} {n}" for n in llm_new_order )) - if st.button("💾 Save LLM settings", type="primary", key="sys_save_llm"): - save_yaml(LLM_CFG, {**llm_cfg, "backends": llm_updated_backends, "fallback_order": llm_new_order}) + # ── Cloud backend warning + acknowledgment ───────────────────────────── + from scripts.byok_guard import cloud_backends as _cloud_backends + + _pending_cfg = {**llm_cfg, "backends": llm_updated_backends, "fallback_order": llm_new_order} + _pending_cloud = set(_cloud_backends(_pending_cfg)) + + _user_cfg_for_ack = yaml.safe_load(USER_CFG.read_text(encoding="utf-8")) or {} if USER_CFG.exists() else {} + _already_acked = set(_user_cfg_for_ack.get("byok_acknowledged_backends", [])) + # Intentional: once a backend is acknowledged, it stays acknowledged even if + # temporarily disabled and re-enabled. This avoids nagging returning users. + _unacknowledged = _pending_cloud - _already_acked + + def _do_save_llm(ack_backends: set) -> None: + """Write llm.yaml and update acknowledgment in user.yaml.""" + save_yaml(LLM_CFG, _pending_cfg) st.session_state.pop("_llm_order", None) st.session_state.pop("_llm_order_cfg_key", None) + if ack_backends: + # Re-read user.yaml at save time (not at render time) to avoid + # overwriting changes made by other processes between render and save. + _uy = yaml.safe_load(USER_CFG.read_text(encoding="utf-8")) or {} if USER_CFG.exists() else {} + _uy["byok_acknowledged_backends"] = sorted(_already_acked | ack_backends) + save_yaml(USER_CFG, _uy) st.success("LLM settings saved!") + if _unacknowledged: + _provider_labels = ", ".join(b.replace("_", " ").title() for b in sorted(_unacknowledged)) + _policy_links = [] + for _b in sorted(_unacknowledged): + if _b in ("anthropic", "claude_code"): + _policy_links.append("[Anthropic privacy policy](https://www.anthropic.com/privacy)") + elif _b == "openai": + _policy_links.append("[OpenAI privacy policy](https://openai.com/policies/privacy-policy)") + _policy_str = " · ".join(_policy_links) if _policy_links else "Review your provider's documentation." + + st.warning( + f"**Cloud LLM active — your data will leave this machine**\n\n" + f"Enabling **{_provider_labels}** means AI features will send content " + f"directly to that provider. CircuitForge does not receive or log it, " + f"but their privacy policy governs it — not ours.\n\n" + f"**What leaves your machine:**\n" + f"- Cover letter generation: your resume, job description, and profile\n" + f"- Keyword suggestions: your skills list and resume summary\n" + f"- Survey assistant: survey question text\n" + f"- Company research / Interview prep: company name and role only\n\n" + f"**What stays local always:** your jobs database, email credentials, " + f"license key, and Notion token.\n\n" + f"For sensitive data (disability, immigration, medical), a local model is " + f"strongly recommended. These tools assist with paperwork — they don't " + f"replace professional advice.\n\n" + f"{_policy_str} · " + f"[CircuitForge privacy policy](https://circuitforge.tech/privacy)", + icon="⚠️", + ) + + _ack = st.checkbox( + f"I understand — content will be sent to **{_provider_labels}** when I use AI features", + key="byok_ack_checkbox", + ) + _col_cancel, _col_save = st.columns(2) + if _col_cancel.button("Cancel", key="byok_cancel"): + st.session_state.pop("byok_ack_checkbox", None) + st.rerun() + if _col_save.button( + "💾 Save with cloud LLM", + type="primary", + key="sys_save_llm_cloud", + disabled=not _ack, + ): + _do_save_llm(_unacknowledged) + else: + if st.button("💾 Save LLM settings", type="primary", key="sys_save_llm"): + _do_save_llm(set()) + # ── Services ────────────────────────────────────────────────────────────── with st.expander("🔌 Services", expanded=True): import subprocess as _sp import shutil as _shutil + import os as _os TOKENS_CFG = CONFIG_DIR / "tokens.yaml" COMPOSE_DIR = str(Path(__file__).parent.parent.parent) + _compose_env = {**_os.environ, "COMPOSE_PROJECT_NAME": "peregrine"} _docker_available = bool(_shutil.which("docker")) _sys_profile_name = _profile.inference_profile if _profile else "remote" SYS_SERVICES = [ @@ -1108,7 +1221,7 @@ with tab_system: elif up: if st.button("⏹ Stop", key=f"sys_svc_stop_{svc['port']}", use_container_width=True): with st.spinner(f"Stopping {svc['name']}…"): - r = _sp.run(svc["stop"], capture_output=True, text=True, cwd=svc["cwd"]) + r = _sp.run(svc["stop"], capture_output=True, text=True, cwd=svc["cwd"], env=_compose_env) st.success("Stopped.") if r.returncode == 0 else st.error(r.stderr or r.stdout) st.rerun() else: @@ -1119,7 +1232,7 @@ with tab_system: _start_cmd.append(_sel) if st.button("▶ Start", key=f"sys_svc_start_{svc['port']}", use_container_width=True, type="primary"): with st.spinner(f"Starting {svc['name']}…"): - r = _sp.run(_start_cmd, capture_output=True, text=True, cwd=svc["cwd"]) + r = _sp.run(_start_cmd, capture_output=True, text=True, cwd=svc["cwd"], env=_compose_env) st.success("Started!") if r.returncode == 0 else st.error(r.stderr or r.stdout) st.rerun() diff --git a/compose.yml b/compose.yml index 4c4f732..186dd97 100644 --- a/compose.yml +++ b/compose.yml @@ -4,12 +4,20 @@ services: app: build: . + command: > + bash -c "streamlit run app/app.py + --server.port=8501 + --server.headless=true + --server.fileWatcherType=none + 2>&1 | tee /app/data/.streamlit.log" ports: - "${STREAMLIT_PORT:-8501}:8501" volumes: - ./config:/app/config - ./data:/app/data - ${DOCS_DIR:-~/Documents/JobSearch}:/docs + - /var/run/docker.sock:/var/run/docker.sock + - /usr/bin/docker:/usr/bin/docker:ro environment: - STAGING_DB=/app/data/staging.db - DOCS_DIR=/docs @@ -20,6 +28,9 @@ services: - PEREGRINE_GPU_NAMES=${PEREGRINE_GPU_NAMES:-} - RECOMMENDED_PROFILE=${RECOMMENDED_PROFILE:-remote} - STREAMLIT_SERVER_BASE_URL_PATH=${STREAMLIT_BASE_URL_PATH:-} + - FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-} + - FORGEJO_REPO=${FORGEJO_REPO:-} + - FORGEJO_API_URL=${FORGEJO_API_URL:-} - PYTHONUNBUFFERED=1 - PYTHONLOGGING=WARNING depends_on: diff --git a/docs/plans/2026-03-03-feedback-button-design.md b/docs/plans/2026-03-03-feedback-button-design.md new file mode 100644 index 0000000..95bed8d --- /dev/null +++ b/docs/plans/2026-03-03-feedback-button-design.md @@ -0,0 +1,185 @@ +# Feedback Button — Design + +**Date:** 2026-03-03 +**Status:** Approved +**Product:** Peregrine (`PRNG`) + +--- + +## Overview + +A floating feedback button visible on every Peregrine page that lets beta testers file +Forgejo issues directly from the UI. Supports optional attachment of diagnostic data +(logs, recent listings) and screenshots — all with explicit per-item user consent and +PII masking before anything leaves the app. + +The backend is intentionally decoupled from Streamlit so it can be wrapped in a +FastAPI route when Peregrine moves to a proper Vue/Nuxt frontend. + +--- + +## Goals + +- Zero-friction bug reporting for beta testers +- Privacy-first: nothing is sent without explicit consent + PII preview +- Future-proof: backend callable from Streamlit now, FastAPI/Vue later +- GitHub support as a config option once public mirrors are active + +--- + +## Architecture + +### Files + +| File | Role | +|---|---| +| `scripts/feedback_api.py` | Pure Python backend — no Streamlit imports | +| `app/feedback.py` | Thin Streamlit UI shell — floating button + dialog | +| `app/components/screenshot_capture.py` | Custom Streamlit component using `html2canvas` | +| `app/app.py` | One-line addition: inject feedback button in sidebar block | +| `.env` / `.env.example` | Add `FORGEJO_API_TOKEN`, `FORGEJO_REPO` | + +### Config additions (`.env`) + +``` +FORGEJO_API_TOKEN=... +FORGEJO_REPO=pyr0ball/peregrine +# GITHUB_TOKEN= # future — filed when public mirror is active +# GITHUB_REPO= # future +``` + +--- + +## Backend (`scripts/feedback_api.py`) + +Pure Python. No Streamlit dependency. All functions return plain dicts or bytes. + +### Functions + +| Function | Signature | Purpose | +|---|---|---| +| `collect_context` | `(page: str) → dict` | Page name, app version (git describe), tier, LLM backend, OS, timestamp | +| `collect_logs` | `(n: int = 100) → str` | Tail of `.streamlit.log`; `mask_pii()` applied before return | +| `collect_listings` | `(n: int = 5) → list[dict]` | Recent jobs from DB — `title`, `company`, `url` only | +| `mask_pii` | `(text: str) → str` | Regex: emails → `[email redacted]`, phones → `[phone redacted]` | +| `build_issue_body` | `(form, context, attachments) → str` | Assembles final markdown issue body | +| `create_forgejo_issue` | `(title, body, labels) → dict` | POST to Forgejo API; returns `{number, url}` | +| `upload_attachment` | `(issue_number, image_bytes, filename) → str` | POST screenshot to issue assets; returns attachment URL | +| `screenshot_page` | `(port: int) → bytes` | Server-side Playwright fallback screenshot; returns PNG bytes | + +### Issue creation — two-step + +1. `create_forgejo_issue()` → issue number +2. `upload_attachment(issue_number, ...)` → attachment auto-linked by Forgejo + +### Labels + +Always applied: `beta-feedback`, `needs-triage` +Type-based: `bug` / `feature-request` / `question` + +### Future multi-destination + +`feedback_api.py` checks both `FORGEJO_API_TOKEN` and `GITHUB_TOKEN` (when present) +and files to whichever destinations are configured. No structural changes needed when +GitHub support is added. + +--- + +## UI Flow (`app/feedback.py`) + +### Floating button + +A real Streamlit button inside a keyed container. CSS injected via +`st.markdown(unsafe_allow_html=True)` applies `position: fixed; bottom: 2rem; +right: 2rem; z-index: 9999` to the container. Hidden entirely when `IS_DEMO=true`. + +### Dialog — Step 1: Form + +- **Type selector:** Bug / Feature Request / Other +- **Title:** short text input +- **Description:** free-text area +- **Reproduction steps:** appears only when Bug is selected (adaptive) + +### Dialog — Step 2: Consent + Attachments + +``` +┌─ Include diagnostic data? ─────────────────────────────┐ +│ [toggle] │ +│ └─ if on → expandable preview of exactly what's sent │ +│ (logs tailed + masked, listings title/company/url) │ +├─ Screenshot ───────────────────────────────────────────┤ +│ [📸 Capture current view] → inline thumbnail preview │ +│ [📎 Upload screenshot] → inline thumbnail preview │ +├─ Attribution ──────────────────────────────────────────┤ +│ [ ] Include my name & email (shown from user.yaml) │ +└────────────────────────────────────────────────────────┘ +[Submit] +``` + +### Post-submit + +- Success: "Issue filed → [view on Forgejo]" with clickable link +- Error: friendly message + copy-to-clipboard fallback (issue body as text) + +--- + +## Screenshot Component (`app/components/screenshot_capture.py`) + +Uses `st.components.v1.html()` with `html2canvas` loaded from CDN (no build step). +On capture, JS renders the visible viewport to a canvas, encodes as base64 PNG, and +returns it to Python via the component value. + +Server-side Playwright (`screenshot_page()`) is the fallback when the JS component +can't return data (e.g., cross-origin iframe restrictions). It screenshots +`localhost:` from the server — captures layout/UI state but not user session +state. + +Both paths return `bytes`. The UI shows an inline thumbnail so the user can review +before submitting. + +--- + +## Privacy & PII Rules + +| Data | Included? | Condition | +|---|---|---| +| App logs | Optional | User toggles on + sees masked preview | +| Job listings | Optional (title/company/url only) | User toggles on | +| Cover letters / notes | Never | — | +| Resume content | Never | — | +| Name + email | Optional | User checks attribution checkbox | +| Screenshots | Optional | User captures or uploads | + +`mask_pii()` is applied to all text before it appears in the preview and before +submission. Users see exactly what will be sent. + +--- + +## Future: FastAPI wrapper + +When Peregrine moves to Vue/Nuxt: + +```python +# server.py (FastAPI) +from scripts.feedback_api import build_issue_body, create_forgejo_issue, upload_attachment + +@app.post("/api/feedback") +async def submit_feedback(payload: FeedbackPayload): + body = build_issue_body(payload.form, payload.context, payload.attachments) + result = create_forgejo_issue(payload.title, body, payload.labels) + if payload.screenshot: + upload_attachment(result["number"], payload.screenshot, "screenshot.png") + return {"url": result["url"]} +``` + +The Streamlit layer is replaced by a Vue `` component that POSTs +to this endpoint. Backend unchanged. + +--- + +## Out of Scope + +- Rate limiting (beta testers are trusted; add later if abused) +- Issue deduplication +- In-app issue status tracking +- Video / screen recording diff --git a/docs/plans/2026-03-03-feedback-button-plan.md b/docs/plans/2026-03-03-feedback-button-plan.md new file mode 100644 index 0000000..7c53195 --- /dev/null +++ b/docs/plans/2026-03-03-feedback-button-plan.md @@ -0,0 +1,1136 @@ +# Feedback Button — Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Add a floating feedback button to Peregrine that lets beta testers file Forgejo issues directly from the UI, with optional PII-masked diagnostic data and screenshot attachments. + +**Architecture:** Pure Python backend in `scripts/feedback_api.py` (no Streamlit dep, wrappable in FastAPI later) + thin Streamlit shell in `app/feedback.py`. Floating button uses CSS `position: fixed` targeting via `aria-label`. Screenshots via server-side Playwright (capture) and `st.file_uploader` (upload). + +**Tech Stack:** Python `requests`, `re`, `playwright` (optional), Streamlit 1.54 (`@st.dialog`), Forgejo REST API v1. + +--- + +## Task 1: Project setup — env config + Playwright dep + +**Files:** +- Modify: `.env.example` +- Modify: `requirements.txt` + +**Step 1: Add env vars to `.env.example`** + +Open `.env.example` and add after the existing API keys block: + +``` +# Feedback button — Forgejo issue filing +FORGEJO_API_TOKEN= +FORGEJO_REPO=pyr0ball/peregrine +FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1 +# GITHUB_TOKEN= # future — enable when public mirror is active +# GITHUB_REPO= # future +``` + +**Step 2: Add playwright to requirements.txt** + +Add to `requirements.txt`: + +``` +playwright>=1.40 +``` + +**Step 3: Install playwright and its browsers** + +```bash +conda run -n job-seeker pip install playwright +conda run -n job-seeker playwright install chromium --with-deps +``` + +Expected: chromium browser downloaded to playwright cache. + +**Step 4: Add FORGEJO_API_TOKEN to your local `.env`** + +Open `.env` and add: +``` +FORGEJO_API_TOKEN=your-forgejo-api-token-here +FORGEJO_REPO=pyr0ball/peregrine +FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1 +``` + +**Step 5: Commit** + +```bash +git add requirements.txt .env.example +git commit -m "chore: add playwright dep and Forgejo env config for feedback button" +``` + +--- + +## Task 2: Backend — PII masking + context collection + +**Files:** +- Create: `scripts/feedback_api.py` +- Create: `tests/test_feedback_api.py` + +**Step 1: Write failing tests** + +Create `tests/test_feedback_api.py`: + +```python +"""Tests for the feedback API backend.""" +import pytest +from unittest.mock import patch, MagicMock +from pathlib import Path + + +# ── mask_pii ────────────────────────────────────────────────────────────────── + +def test_mask_pii_email(): + from scripts.feedback_api import mask_pii + assert mask_pii("contact foo@bar.com please") == "contact [email redacted] please" + + +def test_mask_pii_phone_dashes(): + from scripts.feedback_api import mask_pii + assert mask_pii("call 555-123-4567 now") == "call [phone redacted] now" + + +def test_mask_pii_phone_parens(): + from scripts.feedback_api import mask_pii + assert mask_pii("(555) 867-5309") == "[phone redacted]" + + +def test_mask_pii_clean_text(): + from scripts.feedback_api import mask_pii + assert mask_pii("no sensitive data here") == "no sensitive data here" + + +def test_mask_pii_multiple_emails(): + from scripts.feedback_api import mask_pii + result = mask_pii("a@b.com and c@d.com") + assert result == "[email redacted] and [email redacted]" + + +# ── collect_context ─────────────────────────────────────────────────────────── + +def test_collect_context_required_keys(): + from scripts.feedback_api import collect_context + ctx = collect_context("Home") + for key in ("page", "version", "tier", "llm_backend", "os", "timestamp"): + assert key in ctx, f"missing key: {key}" + + +def test_collect_context_page_value(): + from scripts.feedback_api import collect_context + ctx = collect_context("MyPage") + assert ctx["page"] == "MyPage" + + +def test_collect_context_timestamp_is_utc(): + from scripts.feedback_api import collect_context + ctx = collect_context("X") + assert ctx["timestamp"].endswith("Z") +``` + +**Step 2: Run to verify they fail** + +```bash +conda run -n job-seeker pytest tests/test_feedback_api.py -v 2>&1 | head -30 +``` + +Expected: `ModuleNotFoundError: No module named 'scripts.feedback_api'` + +**Step 3: Create `scripts/feedback_api.py` with mask_pii and collect_context** + +```python +""" +Feedback API — pure Python backend, no Streamlit imports. +Called directly from app/feedback.py now; wrappable in a FastAPI route later. +""" +from __future__ import annotations + +import os +import platform +import re +import subprocess +from datetime import datetime, timezone +from pathlib import Path + +import requests +import yaml + +_ROOT = Path(__file__).parent.parent +_EMAIL_RE = re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}") +_PHONE_RE = re.compile(r"(\+?1[\s\-.]?)?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}") + + +def mask_pii(text: str) -> str: + """Redact email addresses and phone numbers from text.""" + text = _EMAIL_RE.sub("[email redacted]", text) + text = _PHONE_RE.sub("[phone redacted]", text) + return text + + +def collect_context(page: str) -> dict: + """Collect app context: page, version, tier, LLM backend, OS, timestamp.""" + # App version from git + try: + version = subprocess.check_output( + ["git", "describe", "--tags", "--always"], + cwd=_ROOT, text=True, timeout=5, + ).strip() + except Exception: + version = "dev" + + # Tier from user.yaml + tier = "unknown" + try: + user = yaml.safe_load((_ROOT / "config" / "user.yaml").read_text()) or {} + tier = user.get("tier", "unknown") + except Exception: + pass + + # LLM backend from llm.yaml + llm_backend = "unknown" + try: + llm = yaml.safe_load((_ROOT / "config" / "llm.yaml").read_text()) or {} + llm_backend = llm.get("provider", "unknown") + except Exception: + pass + + return { + "page": page, + "version": version, + "tier": tier, + "llm_backend": llm_backend, + "os": platform.platform(), + "timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + } +``` + +**Step 4: Run tests to verify they pass** + +```bash +conda run -n job-seeker pytest tests/test_feedback_api.py::test_mask_pii_email \ + tests/test_feedback_api.py::test_mask_pii_phone_dashes \ + tests/test_feedback_api.py::test_mask_pii_phone_parens \ + tests/test_feedback_api.py::test_mask_pii_clean_text \ + tests/test_feedback_api.py::test_mask_pii_multiple_emails \ + tests/test_feedback_api.py::test_collect_context_required_keys \ + tests/test_feedback_api.py::test_collect_context_page_value \ + tests/test_feedback_api.py::test_collect_context_timestamp_is_utc -v +``` + +Expected: 8 PASSED. + +**Step 5: Commit** + +```bash +git add scripts/feedback_api.py tests/test_feedback_api.py +git commit -m "feat: feedback_api — mask_pii + collect_context" +``` + +--- + +## Task 3: Backend — log + listing collection + +**Files:** +- Modify: `scripts/feedback_api.py` +- Modify: `tests/test_feedback_api.py` + +**Step 1: Write failing tests** + +Append to `tests/test_feedback_api.py`: + +```python +# ── collect_logs ────────────────────────────────────────────────────────────── + +def test_collect_logs_returns_string(tmp_path): + from scripts.feedback_api import collect_logs + log = tmp_path / ".streamlit.log" + log.write_text("line1\nline2\nline3\n") + result = collect_logs(log_path=log, n=10) + assert isinstance(result, str) + assert "line3" in result + + +def test_collect_logs_tails_n_lines(tmp_path): + from scripts.feedback_api import collect_logs + log = tmp_path / ".streamlit.log" + log.write_text("\n".join(f"line{i}" for i in range(200))) + result = collect_logs(log_path=log, n=10) + assert "line199" in result + assert "line0" not in result + + +def test_collect_logs_masks_pii(tmp_path): + from scripts.feedback_api import collect_logs + log = tmp_path / "test.log" + log.write_text("user foo@bar.com connected\n") + result = collect_logs(log_path=log) + assert "foo@bar.com" not in result + assert "[email redacted]" in result + + +def test_collect_logs_missing_file(tmp_path): + from scripts.feedback_api import collect_logs + result = collect_logs(log_path=tmp_path / "nonexistent.log") + assert "no log file" in result.lower() + + +# ── collect_listings ────────────────────────────────────────────────────────── + +def test_collect_listings_safe_fields_only(tmp_path): + """Only title, company, url — no cover letters, notes, or emails.""" + from scripts.db import init_db, insert_job + from scripts.feedback_api import collect_listings + db = tmp_path / "test.db" + init_db(db) + insert_job(db, { + "title": "CSM", "company": "Acme", "url": "https://example.com/1", + "source": "linkedin", "location": "Remote", "is_remote": True, + "salary": "", "description": "great role", "date_found": "2026-03-01", + }) + results = collect_listings(db_path=db, n=5) + assert len(results) == 1 + assert set(results[0].keys()) == {"title", "company", "url"} + assert results[0]["title"] == "CSM" + + +def test_collect_listings_respects_n(tmp_path): + from scripts.db import init_db, insert_job + from scripts.feedback_api import collect_listings + db = tmp_path / "test.db" + init_db(db) + for i in range(10): + insert_job(db, { + "title": f"Job {i}", "company": "Acme", "url": f"https://example.com/{i}", + "source": "linkedin", "location": "Remote", "is_remote": False, + "salary": "", "description": "", "date_found": "2026-03-01", + }) + assert len(collect_listings(db_path=db, n=3)) == 3 +``` + +**Step 2: Run to verify they fail** + +```bash +conda run -n job-seeker pytest tests/test_feedback_api.py -k "collect_logs or collect_listings" -v 2>&1 | head -20 +``` + +Expected: all FAIL with `ImportError` or similar. + +**Step 3: Add functions to `scripts/feedback_api.py`** + +Append after `collect_context`: + +```python +def collect_logs(n: int = 100, log_path: Path | None = None) -> str: + """Return last n lines of the Streamlit log, with PII masked.""" + path = log_path or (_ROOT / ".streamlit.log") + if not path.exists(): + return "(no log file found)" + lines = path.read_text(errors="replace").splitlines() + return mask_pii("\n".join(lines[-n:])) + + +def collect_listings(db_path: Path | None = None, n: int = 5) -> list[dict]: + """Return the n most-recent job listings — title, company, url only.""" + import sqlite3 + from scripts.db import DEFAULT_DB + path = db_path or DEFAULT_DB + conn = sqlite3.connect(path) + conn.row_factory = sqlite3.Row + rows = conn.execute( + "SELECT title, company, url FROM jobs ORDER BY id DESC LIMIT ?", (n,) + ).fetchall() + conn.close() + return [{"title": r["title"], "company": r["company"], "url": r["url"]} for r in rows] +``` + +**Step 4: Run tests to verify they pass** + +```bash +conda run -n job-seeker pytest tests/test_feedback_api.py -k "collect_logs or collect_listings" -v +``` + +Expected: 6 PASSED. + +**Step 5: Commit** + +```bash +git add scripts/feedback_api.py tests/test_feedback_api.py +git commit -m "feat: feedback_api — collect_logs + collect_listings" +``` + +--- + +## Task 4: Backend — issue body builder + +**Files:** +- Modify: `scripts/feedback_api.py` +- Modify: `tests/test_feedback_api.py` + +**Step 1: Write failing tests** + +Append to `tests/test_feedback_api.py`: + +```python +# ── build_issue_body ────────────────────────────────────────────────────────── + +def test_build_issue_body_contains_description(): + from scripts.feedback_api import build_issue_body + form = {"type": "bug", "title": "Test", "description": "it broke", "repro": ""} + ctx = {"page": "Home", "version": "v1.0", "tier": "free", + "llm_backend": "ollama", "os": "Linux", "timestamp": "2026-03-03T00:00:00Z"} + body = build_issue_body(form, ctx, {}) + assert "it broke" in body + assert "Home" in body + assert "v1.0" in body + + +def test_build_issue_body_bug_includes_repro(): + from scripts.feedback_api import build_issue_body + form = {"type": "bug", "title": "X", "description": "desc", "repro": "step 1\nstep 2"} + body = build_issue_body(form, {}, {}) + assert "step 1" in body + assert "Reproduction" in body + + +def test_build_issue_body_no_repro_for_feature(): + from scripts.feedback_api import build_issue_body + form = {"type": "feature", "title": "X", "description": "add dark mode", "repro": "ignored"} + body = build_issue_body(form, {}, {}) + assert "Reproduction" not in body + + +def test_build_issue_body_logs_in_collapsible(): + from scripts.feedback_api import build_issue_body + form = {"type": "other", "title": "X", "description": "Y", "repro": ""} + body = build_issue_body(form, {}, {"logs": "log line 1\nlog line 2"}) + assert "
" in body + assert "log line 1" in body + + +def test_build_issue_body_omits_logs_when_not_provided(): + from scripts.feedback_api import build_issue_body + form = {"type": "bug", "title": "X", "description": "Y", "repro": ""} + body = build_issue_body(form, {}, {}) + assert "
" not in body + + +def test_build_issue_body_submitter_attribution(): + from scripts.feedback_api import build_issue_body + form = {"type": "bug", "title": "X", "description": "Y", "repro": ""} + body = build_issue_body(form, {}, {"submitter": "Jane Doe "}) + assert "Jane Doe" in body + + +def test_build_issue_body_listings_shown(): + from scripts.feedback_api import build_issue_body + form = {"type": "bug", "title": "X", "description": "Y", "repro": ""} + listings = [{"title": "CSM", "company": "Acme", "url": "https://example.com/1"}] + body = build_issue_body(form, {}, {"listings": listings}) + assert "CSM" in body + assert "Acme" in body +``` + +**Step 2: Run to verify they fail** + +```bash +conda run -n job-seeker pytest tests/test_feedback_api.py -k "build_issue_body" -v 2>&1 | head -20 +``` + +**Step 3: Add `build_issue_body` to `scripts/feedback_api.py`** + +Append after `collect_listings`: + +```python +def build_issue_body(form: dict, context: dict, attachments: dict) -> str: + """Assemble the Forgejo issue markdown body from form data, context, and attachments.""" + _TYPE_LABELS = {"bug": "🐛 Bug", "feature": "✨ Feature Request", "other": "💬 Other"} + lines: list[str] = [ + f"## {_TYPE_LABELS.get(form.get('type', 'other'), '💬 Other')}", + "", + form.get("description", ""), + "", + ] + + if form.get("type") == "bug" and form.get("repro"): + lines += ["### Reproduction Steps", "", form["repro"], ""] + + if context: + lines += ["### Context", ""] + for k, v in context.items(): + lines.append(f"- **{k}:** {v}") + lines.append("") + + if attachments.get("logs"): + lines += [ + "
", + "App Logs (last 100 lines)", + "", + "```", + attachments["logs"], + "```", + "
", + "", + ] + + if attachments.get("listings"): + lines += ["### Recent Listings", ""] + for j in attachments["listings"]: + lines.append(f"- [{j['title']} @ {j['company']}]({j['url']})") + lines.append("") + + if attachments.get("submitter"): + lines += ["---", f"*Submitted by: {attachments['submitter']}*"] + + return "\n".join(lines) +``` + +**Step 4: Run tests to verify they pass** + +```bash +conda run -n job-seeker pytest tests/test_feedback_api.py -k "build_issue_body" -v +``` + +Expected: 7 PASSED. + +**Step 5: Commit** + +```bash +git add scripts/feedback_api.py tests/test_feedback_api.py +git commit -m "feat: feedback_api — build_issue_body" +``` + +--- + +## Task 5: Backend — Forgejo API client + +**Files:** +- Modify: `scripts/feedback_api.py` +- Modify: `tests/test_feedback_api.py` + +**Step 1: Write failing tests** + +Append to `tests/test_feedback_api.py`: + +```python +# ── Forgejo API ─────────────────────────────────────────────────────────────── + +@patch("scripts.feedback_api.requests.get") +@patch("scripts.feedback_api.requests.post") +def test_ensure_labels_uses_existing(mock_post, mock_get): + from scripts.feedback_api import _ensure_labels + mock_get.return_value.ok = True + mock_get.return_value.json.return_value = [ + {"name": "beta-feedback", "id": 1}, + {"name": "bug", "id": 2}, + ] + ids = _ensure_labels( + ["beta-feedback", "bug"], + "https://example.com/api/v1", {"Authorization": "token x"}, "owner/repo" + ) + assert ids == [1, 2] + mock_post.assert_not_called() + + +@patch("scripts.feedback_api.requests.get") +@patch("scripts.feedback_api.requests.post") +def test_ensure_labels_creates_missing(mock_post, mock_get): + from scripts.feedback_api import _ensure_labels + mock_get.return_value.ok = True + mock_get.return_value.json.return_value = [] + mock_post.return_value.ok = True + mock_post.return_value.json.return_value = {"id": 99} + ids = _ensure_labels( + ["needs-triage"], + "https://example.com/api/v1", {"Authorization": "token x"}, "owner/repo" + ) + assert 99 in ids + + +@patch("scripts.feedback_api._ensure_labels", return_value=[1, 2]) +@patch("scripts.feedback_api.requests.post") +def test_create_forgejo_issue_success(mock_post, mock_labels, monkeypatch): + from scripts.feedback_api import create_forgejo_issue + monkeypatch.setenv("FORGEJO_API_TOKEN", "testtoken") + monkeypatch.setenv("FORGEJO_REPO", "owner/repo") + monkeypatch.setenv("FORGEJO_API_URL", "https://example.com/api/v1") + mock_post.return_value.status_code = 201 + mock_post.return_value.raise_for_status = lambda: None + mock_post.return_value.json.return_value = {"number": 42, "html_url": "https://example.com/issues/42"} + result = create_forgejo_issue("Test issue", "body text", ["beta-feedback", "bug"]) + assert result["number"] == 42 + assert "42" in result["url"] + + +@patch("scripts.feedback_api.requests.post") +def test_upload_attachment_returns_url(mock_post, monkeypatch): + from scripts.feedback_api import upload_attachment + monkeypatch.setenv("FORGEJO_API_TOKEN", "testtoken") + monkeypatch.setenv("FORGEJO_REPO", "owner/repo") + monkeypatch.setenv("FORGEJO_API_URL", "https://example.com/api/v1") + mock_post.return_value.status_code = 201 + mock_post.return_value.raise_for_status = lambda: None + mock_post.return_value.json.return_value = { + "uuid": "abc", "browser_download_url": "https://example.com/assets/abc" + } + url = upload_attachment(42, b"\x89PNG", "screenshot.png") + assert url == "https://example.com/assets/abc" +``` + +**Step 2: Run to verify they fail** + +```bash +conda run -n job-seeker pytest tests/test_feedback_api.py -k "label or issue or attach" -v 2>&1 | head -20 +``` + +**Step 3: Add Forgejo API functions to `scripts/feedback_api.py`** + +Append after `build_issue_body`: + +```python +def _ensure_labels( + label_names: list[str], base_url: str, headers: dict, repo: str +) -> list[int]: + """Look up or create Forgejo labels by name. Returns list of IDs.""" + _COLORS = { + "beta-feedback": "#0075ca", + "needs-triage": "#e4e669", + "bug": "#d73a4a", + "feature-request": "#a2eeef", + "question": "#d876e3", + } + resp = requests.get(f"{base_url}/repos/{repo}/labels", headers=headers, timeout=10) + existing = {lb["name"]: lb["id"] for lb in resp.json()} if resp.ok else {} + ids: list[int] = [] + for name in label_names: + if name in existing: + ids.append(existing[name]) + else: + r = requests.post( + f"{base_url}/repos/{repo}/labels", + headers=headers, + json={"name": name, "color": _COLORS.get(name, "#ededed")}, + timeout=10, + ) + if r.ok: + ids.append(r.json()["id"]) + return ids + + +def create_forgejo_issue(title: str, body: str, labels: list[str]) -> dict: + """Create a Forgejo issue. Returns {"number": int, "url": str}.""" + token = os.environ.get("FORGEJO_API_TOKEN", "") + repo = os.environ.get("FORGEJO_REPO", "pyr0ball/peregrine") + base = os.environ.get("FORGEJO_API_URL", "https://git.opensourcesolarpunk.com/api/v1") + headers = {"Authorization": f"token {token}", "Content-Type": "application/json"} + label_ids = _ensure_labels(labels, base, headers, repo) + resp = requests.post( + f"{base}/repos/{repo}/issues", + headers=headers, + json={"title": title, "body": body, "labels": label_ids}, + timeout=15, + ) + resp.raise_for_status() + data = resp.json() + return {"number": data["number"], "url": data["html_url"]} + + +def upload_attachment( + issue_number: int, image_bytes: bytes, filename: str = "screenshot.png" +) -> str: + """Upload a screenshot to an existing Forgejo issue. Returns attachment URL.""" + token = os.environ.get("FORGEJO_API_TOKEN", "") + repo = os.environ.get("FORGEJO_REPO", "pyr0ball/peregrine") + base = os.environ.get("FORGEJO_API_URL", "https://git.opensourcesolarpunk.com/api/v1") + headers = {"Authorization": f"token {token}"} + resp = requests.post( + f"{base}/repos/{repo}/issues/{issue_number}/assets", + headers=headers, + files={"attachment": (filename, image_bytes, "image/png")}, + timeout=15, + ) + resp.raise_for_status() + return resp.json().get("browser_download_url", "") +``` + +**Step 4: Run tests to verify they pass** + +```bash +conda run -n job-seeker pytest tests/test_feedback_api.py -k "label or issue or attach" -v +``` + +Expected: 4 PASSED. + +**Step 5: Run full test suite to check for regressions** + +```bash +conda run -n job-seeker pytest tests/test_feedback_api.py -v +``` + +Expected: all PASSED. + +**Step 6: Commit** + +```bash +git add scripts/feedback_api.py tests/test_feedback_api.py +git commit -m "feat: feedback_api — Forgejo label management + issue filing + attachment upload" +``` + +--- + +## Task 6: Backend — server-side screenshot capture + +**Files:** +- Modify: `scripts/feedback_api.py` +- Modify: `tests/test_feedback_api.py` + +**Step 1: Write failing tests** + +Append to `tests/test_feedback_api.py`: + +```python +# ── screenshot_page ─────────────────────────────────────────────────────────── + +def test_screenshot_page_returns_none_without_playwright(monkeypatch): + """If playwright is not installed, screenshot_page returns None gracefully.""" + import builtins + real_import = builtins.__import__ + def mock_import(name, *args, **kwargs): + if name == "playwright.sync_api": + raise ImportError("no playwright") + return real_import(name, *args, **kwargs) + monkeypatch.setattr(builtins, "__import__", mock_import) + from scripts.feedback_api import screenshot_page + result = screenshot_page(port=9999) + assert result is None + + +@patch("scripts.feedback_api.sync_playwright") +def test_screenshot_page_returns_bytes(mock_pw): + """screenshot_page returns PNG bytes when playwright is available.""" + from scripts.feedback_api import screenshot_page + fake_png = b"\x89PNG\r\n\x1a\n" + mock_context = MagicMock() + mock_pw.return_value.__enter__ = lambda s: mock_context + mock_pw.return_value.__exit__ = MagicMock(return_value=False) + mock_browser = mock_context.chromium.launch.return_value + mock_page = mock_browser.new_page.return_value + mock_page.screenshot.return_value = fake_png + result = screenshot_page(port=8502) + assert result == fake_png +``` + +**Step 2: Run to verify they fail** + +```bash +conda run -n job-seeker pytest tests/test_feedback_api.py -k "screenshot" -v 2>&1 | head -20 +``` + +**Step 3: Add `screenshot_page` to `scripts/feedback_api.py`** + +Append after `upload_attachment`. Note the `try/except ImportError` for graceful degradation: + +```python +def screenshot_page(port: int | None = None) -> bytes | None: + """ + Capture a screenshot of the running Peregrine UI using Playwright. + Returns PNG bytes, or None if Playwright is not installed. + """ + try: + from playwright.sync_api import sync_playwright + except ImportError: + return None + + if port is None: + port = int(os.environ.get("STREAMLIT_PORT", os.environ.get("STREAMLIT_SERVER_PORT", "8502"))) + + try: + with sync_playwright() as p: + browser = p.chromium.launch() + page = browser.new_page(viewport={"width": 1280, "height": 800}) + page.goto(f"http://localhost:{port}", timeout=10_000) + page.wait_for_load_state("networkidle", timeout=10_000) + png = page.screenshot(full_page=False) + browser.close() + return png + except Exception: + return None +``` + +Also add the import at the top of the try block to satisfy the mock test. The import at the function level is correct — do NOT add it to the module level, because we want the graceful degradation path to work. + +**Step 4: Run tests to verify they pass** + +```bash +conda run -n job-seeker pytest tests/test_feedback_api.py -k "screenshot" -v +``` + +Expected: 2 PASSED. + +**Step 5: Run full backend test suite** + +```bash +conda run -n job-seeker pytest tests/test_feedback_api.py -v +``` + +Expected: all PASSED. + +**Step 6: Commit** + +```bash +git add scripts/feedback_api.py tests/test_feedback_api.py +git commit -m "feat: feedback_api — screenshot_page with Playwright (graceful fallback)" +``` + +--- + +## Task 7: UI — floating button + feedback dialog + +**Files:** +- Create: `app/feedback.py` + +No pytest tests for Streamlit UI (too brittle for dialogs). Manual verification in Task 8. + +**Step 1: Create `app/feedback.py`** + +```python +""" +Floating feedback button + dialog — thin Streamlit shell. +All business logic lives in scripts/feedback_api.py. +""" +from __future__ import annotations + +import os +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +import streamlit as st + +# ── CSS: float the button to the bottom-right corner ───────────────────────── +# Targets the button by its aria-label (set via `help=` parameter). +_FLOAT_CSS = """ + +""" + + +@st.dialog("Send Feedback", width="large") +def _feedback_dialog(page: str) -> None: + """Two-step feedback dialog: form → consent/attachments → submit.""" + from scripts.feedback_api import ( + collect_context, collect_logs, collect_listings, + build_issue_body, create_forgejo_issue, + upload_attachment, screenshot_page, + ) + from scripts.db import DEFAULT_DB + + # ── Initialise step counter ─────────────────────────────────────────────── + if "fb_step" not in st.session_state: + st.session_state.fb_step = 1 + + # ═════════════════════════════════════════════════════════════════════════ + # STEP 1 — Form + # ═════════════════════════════════════════════════════════════════════════ + if st.session_state.fb_step == 1: + st.subheader("What's on your mind?") + + fb_type = st.selectbox( + "Type", ["Bug", "Feature Request", "Other"], key="fb_type" + ) + fb_title = st.text_input( + "Title", placeholder="Short summary of the issue or idea", key="fb_title" + ) + fb_desc = st.text_area( + "Description", + placeholder="Describe what happened or what you'd like to see...", + key="fb_desc", + ) + if fb_type == "Bug": + st.text_area( + "Reproduction steps", + placeholder="1. Go to...\n2. Click...\n3. See error", + key="fb_repro", + ) + + col_cancel, _, col_next = st.columns([1, 3, 1]) + with col_cancel: + if st.button("Cancel"): + _clear_feedback_state() + st.rerun() + with col_next: + if st.button( + "Next →", + type="primary", + disabled=not st.session_state.get("fb_title", "").strip() + or not st.session_state.get("fb_desc", "").strip(), + ): + st.session_state.fb_step = 2 + st.rerun() + + # ═════════════════════════════════════════════════════════════════════════ + # STEP 2 — Consent + attachments + # ═════════════════════════════════════════════════════════════════════════ + elif st.session_state.fb_step == 2: + st.subheader("Optional: attach diagnostic data") + + # ── Diagnostic data toggle + preview ───────────────────────────────── + include_diag = st.toggle( + "Include diagnostic data (logs + recent listings)", key="fb_diag" + ) + if include_diag: + with st.expander("Preview what will be sent", expanded=True): + st.caption("**App logs (last 100 lines, PII masked):**") + st.code(collect_logs(100), language=None) + st.caption("**Recent listings (title / company / URL only):**") + for j in collect_listings(DEFAULT_DB, 5): + st.write(f"- {j['title']} @ {j['company']} — {j['url']}") + + # ── Screenshot ──────────────────────────────────────────────────────── + st.divider() + st.caption("**Screenshot** (optional)") + col_cap, col_up = st.columns(2) + + with col_cap: + if st.button("📸 Capture current view"): + with st.spinner("Capturing page…"): + png = screenshot_page() + if png: + st.session_state.fb_screenshot = png + else: + st.warning( + "Playwright not available — install it with " + "`playwright install chromium`, or upload a screenshot instead." + ) + + with col_up: + uploaded = st.file_uploader( + "Upload screenshot", + type=["png", "jpg", "jpeg"], + label_visibility="collapsed", + key="fb_upload", + ) + if uploaded: + st.session_state.fb_screenshot = uploaded.read() + + if st.session_state.get("fb_screenshot"): + st.image( + st.session_state["fb_screenshot"], + caption="Screenshot preview — this will be attached to the issue", + use_container_width=True, + ) + if st.button("🗑 Remove screenshot"): + st.session_state.pop("fb_screenshot", None) + st.rerun() + + # ── Attribution consent ─────────────────────────────────────────────── + st.divider() + submitter: str | None = None + try: + import yaml + _ROOT = Path(__file__).parent.parent + user = yaml.safe_load((_ROOT / "config" / "user.yaml").read_text()) or {} + name = (user.get("name") or "").strip() + email = (user.get("email") or "").strip() + if name or email: + label = f"Include my name & email in the report: **{name}** ({email})" + if st.checkbox(label, key="fb_attr"): + submitter = f"{name} <{email}>" + except Exception: + pass + + # ── Navigation ──────────────────────────────────────────────────────── + col_back, _, col_submit = st.columns([1, 3, 2]) + with col_back: + if st.button("← Back"): + st.session_state.fb_step = 1 + st.rerun() + + with col_submit: + if st.button("Submit Feedback", type="primary"): + _submit(page, include_diag, submitter, collect_context, + collect_logs, collect_listings, build_issue_body, + create_forgejo_issue, upload_attachment, DEFAULT_DB) + + +def _submit(page, include_diag, submitter, collect_context, collect_logs, + collect_listings, build_issue_body, create_forgejo_issue, + upload_attachment, db_path) -> None: + """Handle form submission: build body, file issue, upload screenshot.""" + with st.spinner("Filing issue…"): + context = collect_context(page) + attachments: dict = {} + if include_diag: + attachments["logs"] = collect_logs(100) + attachments["listings"] = collect_listings(db_path, 5) + if submitter: + attachments["submitter"] = submitter + + fb_type = st.session_state.get("fb_type", "Other") + type_key = {"Bug": "bug", "Feature Request": "feature", "Other": "other"}.get( + fb_type, "other" + ) + labels = ["beta-feedback", "needs-triage"] + labels.append( + {"bug": "bug", "feature": "feature-request"}.get(type_key, "question") + ) + + form = { + "type": type_key, + "description": st.session_state.get("fb_desc", ""), + "repro": st.session_state.get("fb_repro", "") if type_key == "bug" else "", + } + + body = build_issue_body(form, context, attachments) + + try: + result = create_forgejo_issue( + st.session_state.get("fb_title", "Feedback"), body, labels + ) + screenshot = st.session_state.get("fb_screenshot") + if screenshot: + upload_attachment(result["number"], screenshot) + + _clear_feedback_state() + st.success(f"Issue filed! [View on Forgejo]({result['url']})") + st.balloons() + + except Exception as exc: + st.error(f"Failed to file issue: {exc}") + + +def _clear_feedback_state() -> None: + for key in [ + "fb_step", "fb_type", "fb_title", "fb_desc", "fb_repro", + "fb_diag", "fb_upload", "fb_attr", "fb_screenshot", + ]: + st.session_state.pop(key, None) + + +def inject_feedback_button(page: str = "Unknown") -> None: + """ + Inject the floating feedback button. Call once per page render in app.py. + Hidden automatically in DEMO_MODE. + """ + if os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes"): + return + if not os.environ.get("FORGEJO_API_TOKEN"): + return # silently skip if not configured + + st.markdown(_FLOAT_CSS, unsafe_allow_html=True) + if st.button( + "💬 Feedback", + key="__feedback_floating_btn__", + help="Send feedback or report a bug", + ): + _feedback_dialog(page) +``` + +**Step 2: Verify the file has no syntax errors** + +```bash +conda run -n job-seeker python -c "import app.feedback; print('OK')" +``` + +Expected: `OK` + +**Step 3: Commit** + +```bash +git add app/feedback.py +git commit -m "feat: floating feedback button + two-step dialog (Streamlit shell)" +``` + +--- + +## Task 8: Wire into app.py + manual verification + +**Files:** +- Modify: `app/app.py` + +**Step 1: Add import and call to `app/app.py`** + +Find the `with st.sidebar:` block near the bottom of `app/app.py` (currently ends with `st.caption(f"Peregrine {_get_version()}")`). + +Add two lines — the import near the top of the file (after the existing imports), and the call in the sidebar block: + +At the top of `app/app.py`, after `from scripts.db import ...`: +```python +from app.feedback import inject_feedback_button +``` + +At the end of the `with st.sidebar:` block, after `st.caption(...)`: +```python + inject_feedback_button(page=st.session_state.get("__current_page__", "Unknown")) +``` + +To capture the current page name, also add this anywhere early in the sidebar block (before the caption): +```python + # Track current page for feedback context + try: + _page_name = pg.pages[st.session_state.get("page_index", 0)].title + except Exception: + _page_name = "Unknown" + inject_feedback_button(page=_page_name) +``` + +> **Note on page detection:** Streamlit's `st.navigation` doesn't expose the current page via a simple API. If `pg.pages[...]` doesn't resolve cleanly, simplify to `inject_feedback_button()` with no argument — the page context is a nice-to-have, not critical. + +**Step 2: Verify app starts without errors** + +```bash +bash /Library/Development/CircuitForge/peregrine/manage.sh restart +bash /Library/Development/CircuitForge/peregrine/manage.sh logs +``` + +Expected: no Python tracebacks in logs. + +**Step 3: Manual end-to-end verification checklist** + +Open http://localhost:8502 and verify: + +- [ ] A "💬 Feedback" pill button appears fixed in the bottom-right corner +- [ ] Button is visible on Home, Setup, and all other pages +- [ ] Button is NOT visible in DEMO_MODE (set `DEMO_MODE=1` in `.env`, restart, check) +- [ ] Clicking the button opens the two-step dialog +- [ ] Step 1: selecting "Bug" reveals the reproduction steps field; "Feature Request" hides it +- [ ] "Next →" is disabled until title + description are filled +- [ ] Step 2: toggling diagnostic data shows the masked preview (no real emails/phones) +- [ ] "📸 Capture current view" either shows a thumbnail or a warning about Playwright +- [ ] Uploading a PNG via file picker shows a thumbnail +- [ ] "🗑 Remove screenshot" clears the thumbnail +- [ ] Attribution checkbox shows the name/email from user.yaml +- [ ] Submitting files a real issue at https://git.opensourcesolarpunk.com/pyr0ball/peregrine/issues +- [ ] Issue has correct labels (beta-feedback, needs-triage, + type label) +- [ ] If screenshot provided, it appears as an attachment on the Forgejo issue +- [ ] Success message contains a clickable link to the issue + +**Step 4: Commit** + +```bash +git add app/app.py +git commit -m "feat: wire feedback button into app.py sidebar" +``` + +--- + +## Done + +All tasks complete. The feedback button is live. When moving to Vue/Nuxt, `scripts/feedback_api.py` is wrapped in a FastAPI route — no changes to the backend needed. + +**Future tasks (not in scope now):** +- GitHub mirroring (add `GITHUB_TOKEN` + `GITHUB_REPO` env vars, add `create_github_issue()`) +- Rate limiting (if beta users abuse it) +- In-app issue status tracking diff --git a/docs/plans/2026-03-05-digest-parsers-design.md b/docs/plans/2026-03-05-digest-parsers-design.md new file mode 100644 index 0000000..c09926e --- /dev/null +++ b/docs/plans/2026-03-05-digest-parsers-design.md @@ -0,0 +1,242 @@ +# Digest Email Parsers — Design + +**Date:** 2026-03-05 +**Products:** Peregrine (primary), Avocet (bucket) +**Status:** Design approved, ready for implementation planning + +--- + +## Problem + +Peregrine's `imap_sync.py` can extract leads from digest emails, but only for LinkedIn — the +parser is hardcoded inline with no extension point. Adzuna and The Ladders digest emails are +unhandled. Additionally, any digest email from an unknown sender is silently dropped with no +way to collect samples for building new parsers. + +--- + +## Solution Overview + +Two complementary changes: + +1. **`peregrine/scripts/digest_parsers.py`** — a standalone parser module with a sender registry + and dispatcher. `imap_sync.py` calls a single function; the registry handles dispatch. + LinkedIn parser moves here; Adzuna and Ladders parsers are built against real IMAP samples. + +2. **Avocet digest bucket** — when a user labels an email as `digest` in the Avocet label UI, + the email is appended to `data/digest_samples.jsonl`. This file is the corpus for building + and testing new parsers for senders not yet in the registry. + +--- + +## Architecture + +### Production path (Peregrine) + +``` +imap_sync._scan_unmatched_leads() + │ + ├─ parse_digest(from_addr, body) + │ │ + │ ├─ None → unknown sender → fall through to LLM extraction (unchanged) + │ ├─ [] → known sender, nothing found → skip + │ └─ [...] → jobs found → insert_job() + submit_task("scrape_url") + │ + └─ continue (digest email consumed; does not reach LLM path) +``` + +### Sample collection path (Avocet) + +``` +Avocet label UI + │ + └─ label == "digest" + │ + └─ append to data/digest_samples.jsonl + │ + └─ used as reference for building new parsers +``` + +--- + +## Module: `peregrine/scripts/digest_parsers.py` + +### Parser interface + +Each parser function: + +```python +def parse_(body: str) -> list[dict] +``` + +Returns zero or more job dicts: + +```python +{ + "title": str, # job title + "company": str, # company name + "location": str, # location string (may be empty) + "url": str, # canonical URL, tracking params stripped + "source": str, # "linkedin" | "adzuna" | "theladders" +} +``` + +### Dispatcher + +```python +DIGEST_PARSERS: dict[str, tuple[str, Callable[[str], list[dict]]]] = { + "jobalerts@linkedin.com": ("linkedin", parse_linkedin), + "noreply@adzuna.com": ("adzuna", parse_adzuna), + "noreply@theladders.com": ("theladders", parse_theladders), +} + +def parse_digest(from_addr: str, body: str) -> list[dict] | None: + """ + Dispatch to the appropriate parser based on sender address. + + Returns: + None — no parser matched (not a known digest sender) + [] — parser matched, no extractable jobs found + [dict, ...] — one dict per job card extracted + """ + addr = from_addr.lower() + for sender, (source, parse_fn) in DIGEST_PARSERS.items(): + if sender in addr: + return parse_fn(body) + return None +``` + +Sender matching is a substring check, tolerant of display-name wrappers +(`"LinkedIn "` matches correctly). + +### Parsers + +**`parse_linkedin`** — moved verbatim from `imap_sync.parse_linkedin_alert()`, renamed. +No behavior change. + +**`parse_adzuna`** — built against real Adzuna digest email bodies pulled from the +configured IMAP account during implementation. Expected format: job blocks separated +by consistent delimiters with title, company, location, and a trackable URL per block. + +**`parse_theladders`** — same approach. The Ladders already has a web scraper in +`scripts/custom_boards/theladders.py`; URL canonicalization patterns from there apply here. + +--- + +## Changes to `imap_sync.py` + +Replace the LinkedIn-specific block in `_scan_unmatched_leads()` (~lines 561–585): + +**Before:** +```python +if _LINKEDIN_ALERT_SENDER in parsed["from_addr"].lower(): + cards = parse_linkedin_alert(parsed["body"]) + for card in cards: + # ... LinkedIn-specific insert ... + known_message_ids.add(mid) + continue +``` + +**After:** +```python +from scripts.digest_parsers import parse_digest # top of file + +cards = parse_digest(parsed["from_addr"], parsed["body"]) +if cards is not None: + for card in cards: + if card["url"] in existing_urls: + continue + job_id = insert_job(db_path, { + "title": card["title"], + "company": card["company"], + "url": card["url"], + "source": card["source"], + "location": card["location"], + "is_remote": 0, + "salary": "", + "description": "", + "date_found": datetime.now().isoformat()[:10], + }) + if job_id: + submit_task(db_path, "scrape_url", job_id) + existing_urls.add(card["url"]) + new_leads += 1 + print(f"[imap] digest ({card['source']}) → {card['company']} — {card['title']}") + known_message_ids.add(mid) + continue +``` + +`parse_digest` returning `None` falls through to the existing LLM extraction path — all +non-digest recruitment emails are completely unaffected. + +--- + +## Avocet: Digest Bucket + +### File + +`avocet/data/digest_samples.jsonl` — gitignored. An `.example` entry is committed. + +Schema matches the existing label queue (JSONL on-disk schema): + +```json +{"subject": "...", "body": "...", "from_addr": "...", "date": "...", "account": "..."} +``` + +### Trigger + +In `app/label_tool.py` and `app/api.py`: when a `digest` label is applied, append the +email to `digest_samples.jsonl` alongside the normal write to `email_score.jsonl`. + +No Peregrine dependency — if the file path doesn't exist the `data/` directory is created +automatically. Avocet remains fully standalone. + +### Usage + +When a new digest sender appears in the wild: +1. Label representative emails as `digest` in Avocet → samples land in `digest_samples.jsonl` +2. Inspect samples, write `parse_(body)` in `digest_parsers.py` +3. Add the sender string to `DIGEST_PARSERS` +4. Add fixture test in `peregrine/tests/test_digest_parsers.py` + +--- + +## Testing + +### `peregrine/tests/test_digest_parsers.py` + +- Fixture bodies sourced from real IMAP samples (anonymized company names / URLs acceptable) +- Each parser: valid body → expected cards returned +- Each parser: empty / malformed body → `[]`, no exception +- Dispatcher: known sender → correct parser invoked +- Dispatcher: unknown sender → `None` +- URL canonicalization: tracking params stripped, canonical form asserted +- Dedup within digest: same URL appearing twice in one email → one card + +### `avocet/tests/test_digest_bucket.py` + +- `digest` label → row appended to `digest_samples.jsonl` +- Any other label → `digest_samples.jsonl` not touched +- First write creates `data/` directory if absent + +--- + +## Files Changed / Created + +| File | Change | +|------|--------| +| `peregrine/scripts/digest_parsers.py` | **New** — parser module | +| `peregrine/scripts/imap_sync.py` | Replace inline LinkedIn block with `parse_digest()` call | +| `peregrine/tests/test_digest_parsers.py` | **New** — parser unit tests | +| `avocet/app/label_tool.py` | Append to `digest_samples.jsonl` on `digest` label | +| `avocet/app/api.py` | Same — digest bucket write in label endpoint | +| `avocet/tests/test_digest_bucket.py` | **New** — bucket write tests | +| `avocet/data/digest_samples.jsonl.example` | **New** — committed sample for reference | + +--- + +## Out of Scope + +- Avocet → Peregrine direct import trigger (deferred; bucket is sufficient for now) +- `background_tasks` integration for digest re-processing (not needed with bucket approach) +- HTML digest parsing (all three senders send plain-text alerts; revisit if needed) diff --git a/docs/plans/2026-03-05-digest-parsers-plan.md b/docs/plans/2026-03-05-digest-parsers-plan.md new file mode 100644 index 0000000..d4e5e8f --- /dev/null +++ b/docs/plans/2026-03-05-digest-parsers-plan.md @@ -0,0 +1,897 @@ +# Digest Email Parsers Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Extract job listings from LinkedIn, Adzuna, and The Ladders digest emails into Peregrine leads, with an Avocet bucket that collects digest samples for future parser development. + +**Architecture:** New `peregrine/scripts/digest_parsers.py` exposes a `parse_digest(from_addr, body)` dispatcher backed by a sender registry. `imap_sync.py` replaces its inline LinkedIn block with one dispatcher call. Avocet's two label paths (`label_tool.py` + `api.py`) append digest-labeled emails to `data/digest_samples.jsonl`. Adzuna and Ladders parsers are built from real IMAP samples fetched in Task 2. + +**Tech Stack:** Python stdlib only — `re`, `json`, `pathlib`. No new dependencies. + +--- + +### Task 1: Create `digest_parsers.py` with dispatcher + LinkedIn parser + +**Files:** +- Create: `peregrine/scripts/digest_parsers.py` +- Create: `peregrine/tests/test_digest_parsers.py` + +**Context:** +`parse_linkedin_alert()` currently lives inline in `imap_sync.py`. We move it here (renamed +`parse_linkedin`) and wrap it in a dispatcher. All other parsers plug into the same registry. + +Run all tests with: +``` +/devl/miniconda3/envs/job-seeker/bin/pytest tests/test_digest_parsers.py -v +``` + +--- + +**Step 1: Write the failing tests** + +Create `peregrine/tests/test_digest_parsers.py`: + +```python +"""Tests for digest email parser registry.""" +import pytest +from scripts.digest_parsers import parse_digest, parse_linkedin + +# ── LinkedIn fixture ────────────────────────────────────────────────────────── +# Mirrors the plain-text format LinkedIn Job Alert emails actually send. +# Each job block is separated by a line of 10+ dashes. +LINKEDIN_BODY = """\ +Software Engineer +Acme Corp +San Francisco, CA + +View job: https://www.linkedin.com/comm/jobs/view/1111111111/?refId=abc&trackingId=xyz + +-------------------------------------------------- +Senior Developer +Widget Inc +Remote + +View job: https://www.linkedin.com/comm/jobs/view/2222222222/?refId=def +""" + +LINKEDIN_BODY_EMPTY = "No jobs matched your alert this week." + +LINKEDIN_BODY_NO_URL = """\ +Software Engineer +Acme Corp +San Francisco, CA + +-------------------------------------------------- +""" + + +def test_dispatcher_linkedin_sender(): + cards = parse_digest("LinkedIn ", LINKEDIN_BODY) + assert cards is not None + assert len(cards) == 2 + + +def test_dispatcher_unknown_sender_returns_none(): + result = parse_digest("noreply@randomboard.com", LINKEDIN_BODY) + assert result is None + + +def test_dispatcher_case_insensitive_sender(): + cards = parse_digest("JOBALERTS@LINKEDIN.COM", LINKEDIN_BODY) + assert cards is not None + + +def test_parse_linkedin_returns_correct_fields(): + cards = parse_linkedin(LINKEDIN_BODY) + assert cards[0]["title"] == "Software Engineer" + assert cards[0]["company"] == "Acme Corp" + assert cards[0]["location"] == "San Francisco, CA" + assert cards[0]["source"] == "linkedin" + + +def test_parse_linkedin_url_canonicalized(): + """Tracking params stripped; canonical jobs/view// form.""" + cards = parse_linkedin(LINKEDIN_BODY) + assert cards[0]["url"] == "https://www.linkedin.com/jobs/view/1111111111/" + assert "refId" not in cards[0]["url"] + assert "trackingId" not in cards[0]["url"] + + +def test_parse_linkedin_empty_body_returns_empty_list(): + assert parse_linkedin(LINKEDIN_BODY_EMPTY) == [] + + +def test_parse_linkedin_block_without_url_skipped(): + cards = parse_linkedin(LINKEDIN_BODY_NO_URL) + assert cards == [] +``` + +**Step 2: Run tests to verify they fail** + +``` +/devl/miniconda3/envs/job-seeker/bin/pytest tests/test_digest_parsers.py -v +``` +Expected: `ImportError: cannot import name 'parse_digest'` + +--- + +**Step 3: Write `digest_parsers.py`** + +Create `peregrine/scripts/digest_parsers.py`: + +```python +"""Digest email parser registry for Peregrine. + +Each parser extracts job listings from a known digest sender's plain-text body. +New parsers are added by decorating with @_register(sender_substring, source_name). + +Usage: + from scripts.digest_parsers import parse_digest + + cards = parse_digest(from_addr, body) + # None → unknown sender (fall through to LLM path) + # [] → known sender, nothing extractable + # [...] → list of {title, company, location, url, source} dicts +""" +from __future__ import annotations + +import re +from typing import Callable + +# ── Registry ────────────────────────────────────────────────────────────────── + +# Maps sender substring (lowercased) → (source_name, parse_fn) +DIGEST_PARSERS: dict[str, tuple[str, Callable[[str], list[dict]]]] = {} + + +def _register(sender: str, source: str): + """Decorator to register a parser for a given sender substring.""" + def decorator(fn: Callable[[str], list[dict]]): + DIGEST_PARSERS[sender.lower()] = (source, fn) + return fn + return decorator + + +def parse_digest(from_addr: str, body: str) -> list[dict] | None: + """Dispatch to the appropriate parser based on sender address. + + Returns: + None — no parser matched (caller should use LLM fallback) + [] — known sender, no extractable jobs + [dict, ...] — one dict per job card with keys: + title, company, location, url, source + """ + addr = from_addr.lower() + for sender, (source, parse_fn) in DIGEST_PARSERS.items(): + if sender in addr: + return parse_fn(body) + return None + + +# ── Shared helpers ───────────────────────────────────────────────────────────── + +_LINKEDIN_SKIP_PHRASES = { + "promoted", "easily apply", "apply now", "job alert", + "unsubscribe", "linkedin corporation", +} + + +# ── LinkedIn Job Alert ───────────────────────────────────────────────────────── + +@_register("jobalerts@linkedin.com", "linkedin") +def parse_linkedin(body: str) -> list[dict]: + """Parse LinkedIn Job Alert digest email body. + + Blocks are separated by lines of 10+ dashes. Each block contains: + Line 0: job title + Line 1: company + Line 2: location (optional) + 'View job: ' → canonicalized to /jobs/view// + """ + jobs = [] + blocks = re.split(r"\n\s*-{10,}\s*\n", body) + for block in blocks: + lines = [ln.strip() for ln in block.strip().splitlines() if ln.strip()] + + url = None + for line in lines: + m = re.search(r"View job:\s*(https?://\S+)", line, re.IGNORECASE) + if m: + raw_url = m.group(1) + job_id_m = re.search(r"/jobs/view/(\d+)", raw_url) + if job_id_m: + url = f"https://www.linkedin.com/jobs/view/{job_id_m.group(1)}/" + break + if not url: + continue + + content = [ + ln for ln in lines + if not any(p in ln.lower() for p in _LINKEDIN_SKIP_PHRASES) + and not ln.lower().startswith("view job:") + and not ln.startswith("http") + ] + if len(content) < 2: + continue + + jobs.append({ + "title": content[0], + "company": content[1], + "location": content[2] if len(content) > 2 else "", + "url": url, + "source": "linkedin", + }) + return jobs + + +# ── Adzuna Job Alert ─────────────────────────────────────────────────────────── + +@_register("noreply@adzuna.com", "adzuna") +def parse_adzuna(body: str) -> list[dict]: + """Parse Adzuna job alert digest email body. + + TODO: implement after reviewing samples in avocet/data/digest_samples.jsonl + See Task 3 in docs/plans/2026-03-05-digest-parsers-plan.md + """ + return [] + + +# ── The Ladders Job Alert ────────────────────────────────────────────────────── + +@_register("noreply@theladders.com", "theladders") +def parse_theladders(body: str) -> list[dict]: + """Parse The Ladders job alert digest email body. + + TODO: implement after reviewing samples in avocet/data/digest_samples.jsonl + See Task 4 in docs/plans/2026-03-05-digest-parsers-plan.md + """ + return [] +``` + +**Step 4: Run tests to verify they pass** + +``` +/devl/miniconda3/envs/job-seeker/bin/pytest tests/test_digest_parsers.py -v +``` +Expected: all 8 tests PASS + +**Step 5: Commit** + +```bash +git add scripts/digest_parsers.py tests/test_digest_parsers.py +git commit -m "feat: digest parser registry + LinkedIn parser (moved from imap_sync)" +``` + +--- + +### Task 2: Fetch digest samples from IMAP + +**Files:** +- Create: `avocet/scripts/fetch_digest_samples.py` + +**Context:** +We need real Adzuna and Ladders email bodies to write parsers against. This one-off script +searches the configured IMAP account by sender domain and writes results to +`data/digest_samples.jsonl`. Run it once; the output file feeds Tasks 3 and 4. + +--- + +**Step 1: Create the fetch script** + +Create `avocet/scripts/fetch_digest_samples.py`: + +```python +#!/usr/bin/env python3 +"""Fetch digest email samples from IMAP into data/digest_samples.jsonl. + +Searches for emails from known digest sender domains, deduplicates against +any existing samples, and appends new ones. + +Usage: + conda run -n job-seeker python scripts/fetch_digest_samples.py + +Reads config/label_tool.yaml for IMAP credentials (first account used). +""" +from __future__ import annotations + +import imaplib +import json +import sys +from pathlib import Path + +import yaml + +ROOT = Path(__file__).parent.parent +CONFIG = ROOT / "config" / "label_tool.yaml" +OUTPUT = ROOT / "data" / "digest_samples.jsonl" + +# Sender domains to search — add new ones here as needed +DIGEST_SENDERS = [ + "adzuna.com", + "theladders.com", + "jobalerts@linkedin.com", +] + +# Import shared helpers from avocet +sys.path.insert(0, str(ROOT)) +from app.imap_fetch import _decode_str, _extract_body, entry_key # noqa: E402 + + +def _load_existing_keys() -> set[str]: + if not OUTPUT.exists(): + return set() + keys = set() + for line in OUTPUT.read_text().splitlines(): + try: + keys.add(entry_key(json.loads(line))) + except Exception: + pass + return keys + + +def main() -> None: + cfg = yaml.safe_load(CONFIG.read_text()) + accounts = cfg.get("accounts", []) + if not accounts: + print("No accounts configured in config/label_tool.yaml") + sys.exit(1) + + acc = accounts[0] + host = acc.get("host", "imap.gmail.com") + port = int(acc.get("port", 993)) + use_ssl = acc.get("use_ssl", True) + username = acc["username"] + password = acc["password"] + folder = acc.get("folder", "INBOX") + days_back = int(acc.get("days_back", 90)) + + from datetime import datetime, timedelta + import email as _email_lib + + since = (datetime.now() - timedelta(days=days_back)).strftime("%d-%b-%Y") + + conn = (imaplib.IMAP4_SSL if use_ssl else imaplib.IMAP4)(host, port) + conn.login(username, password) + conn.select(folder, readonly=True) + + known_keys = _load_existing_keys() + found: list[dict] = [] + seen_uids: dict[bytes, None] = {} + + for sender in DIGEST_SENDERS: + try: + _, data = conn.search(None, f'(FROM "{sender}" SINCE "{since}")') + for uid in (data[0] or b"").split(): + seen_uids[uid] = None + except Exception as exc: + print(f" search error for {sender!r}: {exc}") + + print(f"Found {len(seen_uids)} candidate UIDs across {len(DIGEST_SENDERS)} senders") + + for uid in seen_uids: + try: + _, raw_data = conn.fetch(uid, "(RFC822)") + if not raw_data or not raw_data[0]: + continue + msg = _email_lib.message_from_bytes(raw_data[0][1]) + entry = { + "subject": _decode_str(msg.get("Subject", "")), + "body": _extract_body(msg)[:2000], # larger cap for parser dev + "from_addr": _decode_str(msg.get("From", "")), + "date": _decode_str(msg.get("Date", "")), + "account": acc.get("name", username), + } + k = entry_key(entry) + if k not in known_keys: + known_keys.add(k) + found.append(entry) + except Exception as exc: + print(f" fetch error uid {uid}: {exc}") + + conn.logout() + + if not found: + print("No new digest samples found.") + return + + OUTPUT.parent.mkdir(exist_ok=True) + with OUTPUT.open("a", encoding="utf-8") as f: + for entry in found: + f.write(json.dumps(entry) + "\n") + + print(f"Wrote {len(found)} new samples to {OUTPUT}") + + +if __name__ == "__main__": + main() +``` + +**Step 2: Run the fetch script** + +``` +cd /Library/Development/CircuitForge/avocet +conda run -n job-seeker python scripts/fetch_digest_samples.py +``` + +Expected output: `Wrote N new samples to data/digest_samples.jsonl` + +**Step 3: Inspect the samples** + +``` +# View first few entries — look at from_addr and body for Adzuna and Ladders format +conda run -n job-seeker python -c " +import json +from pathlib import Path +for line in Path('data/digest_samples.jsonl').read_text().splitlines()[:10]: + e = json.loads(line) + print('FROM:', e['from_addr']) + print('SUBJECT:', e['subject']) + print('BODY[:500]:', e['body'][:500]) + print('---') +" +``` + +Note down: +- The exact sender addresses for Adzuna and Ladders (update `DIGEST_PARSERS` in `digest_parsers.py` if different from `noreply@adzuna.com` / `noreply@theladders.com`) +- The structure of each job block in the body (separator lines, field order, URL format) + +**Step 4: Commit** + +```bash +cd /Library/Development/CircuitForge/avocet +git add scripts/fetch_digest_samples.py +git commit -m "feat: fetch_digest_samples script for building new parsers" +``` + +--- + +### Task 3: Build and test Adzuna parser + +**Files:** +- Modify: `peregrine/scripts/digest_parsers.py` — implement `parse_adzuna` +- Modify: `peregrine/tests/test_digest_parsers.py` — add Adzuna fixtures + tests + +**Context:** +After running Task 2, you have real Adzuna email bodies in `avocet/data/digest_samples.jsonl`. +Inspect them (see Task 2 Step 3), identify the structure, then write the test fixture from +a real sample before implementing the parser. + +--- + +**Step 1: Write a failing Adzuna test** + +Inspect a real Adzuna sample from `data/digest_samples.jsonl` and identify: +- How job blocks are separated (blank lines? dashes? headers?) +- Field order (title first? company first?) +- Where the job URL appears and what format it uses +- Any noise lines to filter (unsubscribe, promo text, etc.) + +Add to `peregrine/tests/test_digest_parsers.py`: + +```python +from scripts.digest_parsers import parse_adzuna + +# Replace ADZUNA_BODY with a real excerpt from avocet/data/digest_samples.jsonl +# Copy 2-3 job blocks verbatim; replace real company names with "Test Co" etc. if desired +ADZUNA_BODY = """ + +""" + +def test_dispatcher_adzuna_sender(): + # Update sender string if real sender differs from noreply@adzuna.com + cards = parse_digest("noreply@adzuna.com", ADZUNA_BODY) + assert cards is not None + assert len(cards) >= 1 + +def test_parse_adzuna_fields(): + cards = parse_adzuna(ADZUNA_BODY) + assert cards[0]["title"] # non-empty + assert cards[0]["company"] # non-empty + assert cards[0]["url"].startswith("http") + assert cards[0]["source"] == "adzuna" + +def test_parse_adzuna_url_no_tracking(): + """Adzuna URLs often contain tracking params — strip them.""" + cards = parse_adzuna(ADZUNA_BODY) + # Adjust assertion to match actual URL format once you've seen real samples + for card in cards: + assert "utm_" not in card["url"] + +def test_parse_adzuna_empty_body(): + assert parse_adzuna("No jobs this week.") == [] +``` + +**Step 2: Run tests to verify they fail** + +``` +/devl/miniconda3/envs/job-seeker/bin/pytest tests/test_digest_parsers.py::test_parse_adzuna_fields -v +``` +Expected: FAIL (stub returns `[]`) + +**Step 3: Implement `parse_adzuna` in `digest_parsers.py`** + +Replace the stub body of `parse_adzuna` based on the actual email structure you observed. +Pattern to follow (adapt field positions to match Adzuna's actual format): + +```python +@_register("noreply@adzuna.com", "adzuna") # update sender if needed +def parse_adzuna(body: str) -> list[dict]: + jobs = [] + # Split on whatever delimiter Adzuna uses between blocks + # e.g.: blocks = re.split(r"\n\s*\n{2,}", body) # double blank line + # For each block, extract title, company, location, url + # Strip tracking params from URL: re.sub(r"\?.*", "", url) or parse with urllib + return jobs +``` + +If Adzuna sender differs from `noreply@adzuna.com`, update the `@_register` decorator +**and** the `DIGEST_PARSERS` key in the registry (they're set by the decorator — just change +the decorator argument). + +**Step 4: Run all digest tests** + +``` +/devl/miniconda3/envs/job-seeker/bin/pytest tests/test_digest_parsers.py -v +``` +Expected: all tests PASS + +**Step 5: Commit** + +```bash +cd /Library/Development/CircuitForge/peregrine +git add scripts/digest_parsers.py tests/test_digest_parsers.py +git commit -m "feat: Adzuna digest email parser" +``` + +--- + +### Task 4: Build and test The Ladders parser + +**Files:** +- Modify: `peregrine/scripts/digest_parsers.py` — implement `parse_theladders` +- Modify: `peregrine/tests/test_digest_parsers.py` — add Ladders fixtures + tests + +**Context:** +Same approach as Task 3. The Ladders already has a web scraper in +`scripts/custom_boards/theladders.py` — check it for URL patterns that may apply here. + +--- + +**Step 1: Write failing Ladders tests** + +Inspect a real Ladders sample from `avocet/data/digest_samples.jsonl`. Add to test file: + +```python +from scripts.digest_parsers import parse_theladders + +# Replace with real Ladders body excerpt +LADDERS_BODY = """ + +""" + +def test_dispatcher_ladders_sender(): + cards = parse_digest("noreply@theladders.com", LADDERS_BODY) + assert cards is not None + assert len(cards) >= 1 + +def test_parse_theladders_fields(): + cards = parse_theladders(LADDERS_BODY) + assert cards[0]["title"] + assert cards[0]["company"] + assert cards[0]["url"].startswith("http") + assert cards[0]["source"] == "theladders" + +def test_parse_theladders_empty_body(): + assert parse_theladders("No new jobs.") == [] +``` + +**Step 2: Run tests to verify they fail** + +``` +/devl/miniconda3/envs/job-seeker/bin/pytest tests/test_digest_parsers.py::test_parse_theladders_fields -v +``` +Expected: FAIL + +**Step 3: Implement `parse_theladders`** + +Replace the stub. The Ladders URLs often use redirect wrappers — canonicalize to the +`theladders.com/job/` form if possible, otherwise just strip tracking params. + +**Step 4: Run all digest tests** + +``` +/devl/miniconda3/envs/job-seeker/bin/pytest tests/test_digest_parsers.py -v +``` +Expected: all tests PASS + +**Step 5: Commit** + +```bash +git add scripts/digest_parsers.py tests/test_digest_parsers.py +git commit -m "feat: The Ladders digest email parser" +``` + +--- + +### Task 5: Update `imap_sync.py` to use the dispatcher + +**Files:** +- Modify: `peregrine/scripts/imap_sync.py` + +**Context:** +The LinkedIn-specific block in `_scan_unmatched_leads()` (search for +`_LINKEDIN_ALERT_SENDER`) gets replaced with a generic `parse_digest()` call. +The existing behavior is preserved — only the dispatch mechanism changes. + +--- + +**Step 1: Add the import** + +At the top of `imap_sync.py`, alongside other local imports, add: + +```python +from scripts.digest_parsers import parse_digest +``` + +**Step 2: Find the LinkedIn-specific block** + +Search for `_LINKEDIN_ALERT_SENDER` in `imap_sync.py`. The block looks like: + +```python +if _LINKEDIN_ALERT_SENDER in parsed["from_addr"].lower(): + cards = parse_linkedin_alert(parsed["body"]) + for card in cards: + ... + known_message_ids.add(mid) + continue +``` + +**Step 3: Replace with the generic dispatcher** + +```python +# ── Digest email — dispatch to parser registry ──────────────────────── +cards = parse_digest(parsed["from_addr"], parsed["body"]) +if cards is not None: + for card in cards: + if card["url"] in existing_urls: + continue + job_id = insert_job(db_path, { + "title": card["title"], + "company": card["company"], + "url": card["url"], + "source": card["source"], + "location": card["location"], + "is_remote": 0, + "salary": "", + "description": "", + "date_found": datetime.now().isoformat()[:10], + }) + if job_id: + submit_task(db_path, "scrape_url", job_id) + existing_urls.add(card["url"]) + new_leads += 1 + print(f"[imap] digest ({card['source']}) → {card['company']} — {card['title']}") + known_message_ids.add(mid) + continue +``` + +**Step 4: Remove the now-unused `parse_linkedin_alert` import/definition** + +`parse_linkedin_alert` was defined in `imap_sync.py`. It's now `parse_linkedin` in +`digest_parsers.py`. Delete the old function from `imap_sync.py`. Also remove +`_LINKEDIN_ALERT_SENDER` constant if it's no longer referenced. + +**Step 5: Run the full test suite** + +``` +/devl/miniconda3/envs/job-seeker/bin/pytest tests/ -v +``` +Expected: all existing tests still pass; no regressions + +**Step 6: Commit** + +```bash +git add scripts/imap_sync.py +git commit -m "refactor: imap_sync uses digest_parsers dispatcher; remove inline LinkedIn parser" +``` + +--- + +### Task 6: Avocet digest bucket + +**Files:** +- Modify: `avocet/app/label_tool.py` +- Modify: `avocet/app/api.py` +- Create: `avocet/tests/test_digest_bucket.py` +- Create: `avocet/data/digest_samples.jsonl.example` + +**Context:** +When either label path (`_do_label` in the Streamlit UI or `POST /api/label` in the FastAPI +app) assigns the `digest` label, the full email record is appended to +`data/digest_samples.jsonl`. This is the sample corpus for building future parsers. + +--- + +**Step 1: Write failing tests** + +Create `avocet/tests/test_digest_bucket.py`: + +```python +"""Tests for digest sample bucket write behavior.""" +import json +import pytest +from pathlib import Path +from unittest.mock import patch, MagicMock + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def _read_bucket(tmp_path: Path) -> list[dict]: + bucket = tmp_path / "data" / "digest_samples.jsonl" + if not bucket.exists(): + return [] + return [json.loads(line) for line in bucket.read_text().splitlines() if line.strip()] + + +SAMPLE_ENTRY = { + "subject": "10 new jobs for you", + "body": "Software Engineer\nAcme Corp\nRemote\nView job: https://example.com/123", + "from_addr": "noreply@adzuna.com", + "date": "Mon, 03 Mar 2026 09:00:00 +0000", + "account": "test@example.com", +} + + +# ── api.py bucket tests ─────────────────────────────────────────────────────── + +def test_api_digest_label_writes_to_bucket(tmp_path): + from app.api import _append_digest_sample + data_dir = tmp_path / "data" + _append_digest_sample(SAMPLE_ENTRY, data_dir=data_dir) + rows = _read_bucket(tmp_path) + assert len(rows) == 1 + assert rows[0]["from_addr"] == "noreply@adzuna.com" + + +def test_api_non_digest_label_does_not_write(tmp_path): + from app.api import _append_digest_sample + data_dir = tmp_path / "data" + # _append_digest_sample should only be called for digest; confirm it writes when called + # Confirm that callers gate on label == "digest" — tested via integration below + _append_digest_sample(SAMPLE_ENTRY, data_dir=data_dir) + rows = _read_bucket(tmp_path) + assert len(rows) == 1 # called directly, always writes + + +def test_api_digest_creates_data_dir(tmp_path): + from app.api import _append_digest_sample + data_dir = tmp_path / "nonexistent" / "data" + assert not data_dir.exists() + _append_digest_sample(SAMPLE_ENTRY, data_dir=data_dir) + assert data_dir.exists() + + +def test_api_digest_appends_multiple(tmp_path): + from app.api import _append_digest_sample + data_dir = tmp_path / "data" + _append_digest_sample(SAMPLE_ENTRY, data_dir=data_dir) + _append_digest_sample({**SAMPLE_ENTRY, "subject": "5 more jobs"}, data_dir=data_dir) + rows = _read_bucket(tmp_path) + assert len(rows) == 2 +``` + +**Step 2: Run tests to verify they fail** + +``` +/devl/miniconda3/envs/job-seeker/bin/pytest tests/test_digest_bucket.py -v +``` +Expected: `ImportError: cannot import name '_append_digest_sample'` + +--- + +**Step 3: Add `_append_digest_sample` to `api.py`** + +In `avocet/app/api.py`, add this helper (near the top, after the imports and `_DATA_DIR` +constant): + +```python +_DIGEST_SAMPLES_FILE = _DATA_DIR / "digest_samples.jsonl" + + +def _append_digest_sample(entry: dict, data_dir: Path | None = None) -> None: + """Append a digest-labeled email to the sample corpus.""" + target_dir = data_dir if data_dir is not None else _DATA_DIR + target_dir.mkdir(parents=True, exist_ok=True) + bucket = target_dir / "digest_samples.jsonl" + record = { + "subject": entry.get("subject", ""), + "body": entry.get("body", ""), + "from_addr": entry.get("from_addr", entry.get("from", "")), + "date": entry.get("date", ""), + "account": entry.get("account", entry.get("source", "")), + } + with bucket.open("a", encoding="utf-8") as f: + f.write(json.dumps(record) + "\n") +``` + +Then in `post_label()` (around line 127, after `_append_jsonl(_score_file(), record)`): + +```python + if req.label == "digest": + _append_digest_sample(match) +``` + +**Step 4: Add the same write to `label_tool.py`** + +In `avocet/app/label_tool.py`, add a module-level constant after `_SCORE_FILE`: + +```python +_DIGEST_SAMPLES_FILE = _ROOT / "data" / "digest_samples.jsonl" +``` + +In `_do_label()` (around line 728, after `_append_jsonl(_SCORE_FILE, row)`): + +```python + if label == "digest": + _append_jsonl( + _DIGEST_SAMPLES_FILE, + { + "subject": entry.get("subject", ""), + "body": (entry.get("body", ""))[:2000], + "from_addr": entry.get("from_addr", ""), + "date": entry.get("date", ""), + "account": entry.get("account", ""), + }, + ) +``` + +(`_append_jsonl` already exists in label_tool.py at line ~396 — reuse it.) + +**Step 5: Create the example file** + +Create `avocet/data/digest_samples.jsonl.example`: + +```json +{"subject": "10 new Software Engineer jobs for you", "body": "Software Engineer\nAcme Corp\nSan Francisco, CA\n\nView job: https://www.linkedin.com/jobs/view/1234567890/\n", "from_addr": "LinkedIn ", "date": "Mon, 03 Mar 2026 09:00:00 +0000", "account": "example@gmail.com"} +``` + +**Step 6: Update `.gitignore` in avocet** + +Verify `data/digest_samples.jsonl` is gitignored. Open `avocet/.gitignore` — it should +already have `data/*.jsonl`. If not, add: + +``` +data/digest_samples.jsonl +``` + +**Step 7: Run all avocet tests** + +``` +/devl/miniconda3/envs/job-seeker/bin/pytest tests/ -v +``` +Expected: all tests PASS + +**Step 8: Commit** + +```bash +cd /Library/Development/CircuitForge/avocet +git add app/api.py app/label_tool.py tests/test_digest_bucket.py data/digest_samples.jsonl.example +git commit -m "feat: digest sample bucket — write digest-labeled emails to digest_samples.jsonl" +``` + +--- + +## Summary + +| Task | Repo | Commit message | +|------|------|----------------| +| 1 | peregrine | `feat: digest parser registry + LinkedIn parser (moved from imap_sync)` | +| 2 | avocet | `feat: fetch_digest_samples script for building new parsers` | +| 3 | peregrine | `feat: Adzuna digest email parser` | +| 4 | peregrine | `feat: The Ladders digest email parser` | +| 5 | peregrine | `refactor: imap_sync uses digest_parsers dispatcher; remove inline LinkedIn parser` | +| 6 | avocet | `feat: digest sample bucket — write digest-labeled emails to digest_samples.jsonl` | + +Tasks 1, 2, and 6 are independent and can be done in any order. +Tasks 3 and 4 depend on Task 2 (samples needed before implementing parsers). +Task 5 depends on Tasks 1, 3, and 4 (all parsers should be ready before switching imap_sync). diff --git a/requirements.txt b/requirements.txt index a63d778..81e8237 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,7 @@ streamlit-paste-button>=0.1.0 # ── Job scraping ────────────────────────────────────────────────────────── python-jobspy>=1.1 -playwright +playwright>=1.40 selenium undetected-chromedriver webdriver-manager diff --git a/scripts/backup.py b/scripts/backup.py new file mode 100644 index 0000000..b20a465 --- /dev/null +++ b/scripts/backup.py @@ -0,0 +1,277 @@ +"""Config backup / restore / teleport for Peregrine. + +Creates a portable zip of all gitignored configs + optionally the staging DB. +Intended for: machine migrations, Docker volume transfers, and safe wizard testing. +Supports both the Peregrine Docker instance and the legacy /devl/job-seeker install. + +Usage (CLI): + conda run -n job-seeker python scripts/backup.py --create backup.zip + conda run -n job-seeker python scripts/backup.py --create backup.zip --no-db + conda run -n job-seeker python scripts/backup.py --create backup.zip --base-dir /devl/job-seeker + conda run -n job-seeker python scripts/backup.py --restore backup.zip + conda run -n job-seeker python scripts/backup.py --list backup.zip + +Usage (programmatic — called from Settings UI): + from scripts.backup import create_backup, restore_backup, list_backup_contents + zip_bytes = create_backup(base_dir, include_db=True) + info = list_backup_contents(zip_bytes) + result = restore_backup(zip_bytes, base_dir, include_db=True) +""" +from __future__ import annotations + +import io +import json +import zipfile +from datetime import datetime +from pathlib import Path + +# --------------------------------------------------------------------------- +# Files included in every backup (relative to repo root) +# --------------------------------------------------------------------------- + +# Gitignored config files that hold secrets / personal data +_SECRET_CONFIGS = [ + "config/notion.yaml", + "config/tokens.yaml", + "config/email.yaml", + "config/adzuna.yaml", + "config/craigslist.yaml", + "config/user.yaml", + "config/plain_text_resume.yaml", + "config/license.json", + "config/user.yaml.working", +] + +# Gitignored integration configs (glob pattern — each matching file is added) +_INTEGRATION_CONFIG_GLOB = "config/integrations/*.yaml" + +# Non-secret committed configs worth preserving for portability +# (also present in the legacy /devl/job-seeker instance) +_EXTRA_CONFIGS = [ + "config/llm.yaml", + "config/search_profiles.yaml", + "config/resume_keywords.yaml", # personal keyword list — present in both instances + "config/skills_suggestions.yaml", + "config/blocklist.yaml", + "config/server.yaml", # deployment config (base URL path, port) — Peregrine only +] + +# Candidate DB paths (first one that exists wins) +_DB_CANDIDATES = ["data/staging.db", "staging.db"] + +_MANIFEST_NAME = "backup-manifest.json" + + +# --------------------------------------------------------------------------- +# Source detection +# --------------------------------------------------------------------------- + +def _detect_source_label(base_dir: Path) -> str: + """Return a human-readable label for the instance being backed up. + + Uses the directory name — stable as long as the repo root isn't renamed, + which is the normal case for both the Docker install (peregrine/) and the + legacy Conda install (job-seeker/). + + Args: + base_dir: The root directory being backed up. + + Returns: + A short identifier string, e.g. "peregrine" or "job-seeker". + """ + return base_dir.name + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +def create_backup( + base_dir: Path, + include_db: bool = True, + source_label: str | None = None, +) -> bytes: + """Return a zip archive as raw bytes. + + Args: + base_dir: Repo root (parent of config/ and staging.db). + include_db: If True, include staging.db in the archive. + source_label: Human-readable instance name stored in the manifest + (e.g. "peregrine", "job-seeker"). Auto-detected if None. + """ + buf = io.BytesIO() + included: list[str] = [] + + with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf: + # Gitignored secret configs + for rel in _SECRET_CONFIGS: + p = base_dir / rel + if p.exists(): + zf.write(p, rel) + included.append(rel) + + # Integration configs (glob) + for p in sorted((base_dir).glob(_INTEGRATION_CONFIG_GLOB)): + rel = str(p.relative_to(base_dir)) + zf.write(p, rel) + included.append(rel) + + # Extra non-secret configs + for rel in _EXTRA_CONFIGS: + p = base_dir / rel + if p.exists(): + zf.write(p, rel) + included.append(rel) + + # Staging DB + if include_db: + for candidate in _DB_CANDIDATES: + p = base_dir / candidate + if p.exists(): + zf.write(p, candidate) + included.append(candidate) + break + + # Manifest + manifest = { + "created_at": datetime.now().isoformat(), + "source": source_label or _detect_source_label(base_dir), + "source_path": str(base_dir.resolve()), + "peregrine_version": "1.0", + "files": included, + "includes_db": include_db and any(f.endswith(".db") for f in included), + } + zf.writestr(_MANIFEST_NAME, json.dumps(manifest, indent=2)) + + return buf.getvalue() + + +def list_backup_contents(zip_bytes: bytes) -> dict: + """Return manifest + file list from a backup zip (no extraction).""" + with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf: + names = [n for n in zf.namelist() if n != _MANIFEST_NAME] + manifest: dict = {} + if _MANIFEST_NAME in zf.namelist(): + manifest = json.loads(zf.read(_MANIFEST_NAME)) + sizes = {info.filename: info.file_size for info in zf.infolist()} + return { + "manifest": manifest, + "files": names, + "sizes": sizes, + "total_bytes": sum(sizes[n] for n in names if n in sizes), + } + + +def restore_backup( + zip_bytes: bytes, + base_dir: Path, + include_db: bool = True, + overwrite: bool = True, +) -> dict[str, list[str]]: + """Extract a backup zip into base_dir. + + Args: + zip_bytes: Raw bytes of the backup zip. + base_dir: Repo root to restore into. + include_db: If False, skip any .db files. + overwrite: If False, skip files that already exist. + + Returns: + {"restored": [...], "skipped": [...]} + """ + restored: list[str] = [] + skipped: list[str] = [] + + with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf: + for name in zf.namelist(): + if name == _MANIFEST_NAME: + continue + if not include_db and name.endswith(".db"): + skipped.append(name) + continue + dest = base_dir / name + if dest.exists() and not overwrite: + skipped.append(name) + continue + dest.parent.mkdir(parents=True, exist_ok=True) + dest.write_bytes(zf.read(name)) + restored.append(name) + + return {"restored": restored, "skipped": skipped} + + +# --------------------------------------------------------------------------- +# CLI entry point +# --------------------------------------------------------------------------- + +def main() -> None: + import argparse + import sys + + parser = argparse.ArgumentParser(description="Peregrine config backup / restore / teleport") + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--create", metavar="OUT.zip", help="Create a backup zip") + group.add_argument("--restore", metavar="IN.zip", help="Restore from a backup zip") + group.add_argument("--list", metavar="IN.zip", help="List contents of a backup zip") + parser.add_argument("--no-db", action="store_true", help="Exclude staging.db (--create/--restore)") + parser.add_argument("--no-overwrite", action="store_true", + help="Skip files that already exist (--restore)") + parser.add_argument( + "--base-dir", metavar="PATH", + help="Root of the instance to back up/restore (default: this repo root). " + "Use /devl/job-seeker to target the legacy Conda install.", + ) + args = parser.parse_args() + + base_dir = Path(args.base_dir).resolve() if args.base_dir else Path(__file__).parent.parent + + if args.create: + out = Path(args.create) + data = create_backup(base_dir, include_db=not args.no_db) + out.write_bytes(data) + info = list_backup_contents(data) + m = info["manifest"] + print(f"Backup created: {out} ({len(data):,} bytes)") + print(f" Source: {m.get('source', '?')} ({base_dir})") + print(f" {len(info['files'])} files archived:") + for name in info["files"]: + size = info["sizes"].get(name, 0) + print(f" {name} ({size:,} bytes)") + + elif args.restore: + in_path = Path(args.restore) + if not in_path.exists(): + print(f"ERROR: {in_path} not found", file=sys.stderr) + sys.exit(1) + data = in_path.read_bytes() + result = restore_backup(data, base_dir, + include_db=not args.no_db, + overwrite=not args.no_overwrite) + print(f"Restored {len(result['restored'])} files:") + for name in result["restored"]: + print(f" ✓ {name}") + if result["skipped"]: + print(f"Skipped {len(result['skipped'])} files:") + for name in result["skipped"]: + print(f" - {name}") + + elif args.list: + in_path = Path(args.list) + if not in_path.exists(): + print(f"ERROR: {in_path} not found", file=sys.stderr) + sys.exit(1) + data = in_path.read_bytes() + info = list_backup_contents(data) + m = info["manifest"] + if m: + print(f"Created: {m.get('created_at', 'unknown')}") + print(f"Source: {m.get('source', '?')} ({m.get('source_path', '?')})") + print(f"Has DB: {m.get('includes_db', '?')}") + print(f"\n{len(info['files'])} files ({info['total_bytes']:,} bytes uncompressed):") + for name in info["files"]: + size = info["sizes"].get(name, 0) + print(f" {name} ({size:,} bytes)") + + +if __name__ == "__main__": + main() diff --git a/scripts/byok_guard.py b/scripts/byok_guard.py new file mode 100644 index 0000000..a3bb536 --- /dev/null +++ b/scripts/byok_guard.py @@ -0,0 +1,58 @@ +""" +BYOK cloud backend detection. + +Determines whether LLM backends in llm.yaml send data to third-party cloud +providers. Used by Settings (activation warning) and app.py (sidebar indicator). + +No Streamlit dependency — pure Python so it's unit-testable and reusable. +""" + +# 0.0.0.0 is a bind address (all interfaces), not a true loopback, but a backend +# configured to call it is talking to the local machine — treat as local. +LOCAL_URL_MARKERS = ("localhost", "127.0.0.1", "0.0.0.0") + + +def is_cloud_backend(name: str, cfg: dict) -> bool: + """Return True if this backend sends prompts to a third-party cloud provider. + + Classification rules (applied in order): + 1. local: true in cfg → always local (user override) + 2. vision_service type → always local + 3. anthropic or claude_code type → always cloud + 4. openai_compat with a localhost/loopback base_url → local + 5. openai_compat with any other base_url → cloud + 6. anything else → local (unknown types assumed safe) + """ + if cfg.get("local", False): + return False + + btype = cfg.get("type", "") + + if btype == "vision_service": + return False + + if btype in ("anthropic", "claude_code"): + return True + + if btype == "openai_compat": + url = cfg.get("base_url", "") + return not any(marker in url for marker in LOCAL_URL_MARKERS) + + return False + + +def cloud_backends(llm_cfg: dict) -> list[str]: + """Return names of enabled cloud backends from a parsed llm.yaml dict. + + Args: + llm_cfg: parsed contents of config/llm.yaml + + Returns: + List of backend names that are enabled and classified as cloud. + Empty list means fully local configuration. + """ + return [ + name + for name, cfg in llm_cfg.get("backends", {}).items() + if cfg.get("enabled", True) and is_cloud_backend(name, cfg) + ] diff --git a/scripts/db.py b/scripts/db.py index 6cf888f..a091a87 100644 --- a/scripts/db.py +++ b/scripts/db.py @@ -3,12 +3,13 @@ SQLite staging layer for job listings. Jobs flow: pending → approved/rejected → applied → synced applied → phone_screen → interviewing → offer → hired (or rejected) """ +import os import sqlite3 from datetime import datetime from pathlib import Path from typing import Optional -DEFAULT_DB = Path(__file__).parent.parent / "staging.db" +DEFAULT_DB = Path(os.environ.get("STAGING_DB", Path(__file__).parent.parent / "staging.db")) CREATE_JOBS = """ CREATE TABLE IF NOT EXISTS jobs ( diff --git a/scripts/feedback_api.py b/scripts/feedback_api.py new file mode 100644 index 0000000..0c8129a --- /dev/null +++ b/scripts/feedback_api.py @@ -0,0 +1,223 @@ +""" +Feedback API — pure Python backend, no Streamlit imports. +Called directly from app/feedback.py now; wrappable in a FastAPI route later. +""" +from __future__ import annotations + +import os +import platform +import re +import subprocess +from datetime import datetime, timezone +from pathlib import Path + +import requests +import yaml + +_ROOT = Path(__file__).parent.parent +_EMAIL_RE = re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}") +_PHONE_RE = re.compile(r"(\+?1[\s\-.]?)?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}") + + +def mask_pii(text: str) -> str: + """Redact email addresses and phone numbers from text.""" + text = _EMAIL_RE.sub("[email redacted]", text) + text = _PHONE_RE.sub("[phone redacted]", text) + return text + + +def collect_context(page: str) -> dict: + """Collect app context: page, version, tier, LLM backend, OS, timestamp.""" + # App version from git + try: + version = subprocess.check_output( + ["git", "describe", "--tags", "--always"], + cwd=_ROOT, text=True, timeout=5, + ).strip() + except Exception: + version = "dev" + + # Tier from user.yaml + tier = "unknown" + try: + user = yaml.safe_load((_ROOT / "config" / "user.yaml").read_text()) or {} + tier = user.get("tier", "unknown") + except Exception: + pass + + # LLM backend from llm.yaml — report first entry in fallback_order that's enabled + llm_backend = "unknown" + try: + llm = yaml.safe_load((_ROOT / "config" / "llm.yaml").read_text()) or {} + backends = llm.get("backends", {}) + for name in llm.get("fallback_order", []): + if backends.get(name, {}).get("enabled", False): + llm_backend = name + break + except Exception: + pass + + return { + "page": page, + "version": version, + "tier": tier, + "llm_backend": llm_backend, + "os": platform.platform(), + "timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + } + + +def collect_logs(n: int = 100, log_path: Path | None = None) -> str: + """Return last n lines of the Streamlit log, with PII masked.""" + path = log_path or (_ROOT / "data" / ".streamlit.log") + if not path.exists(): + return "(no log file found)" + lines = path.read_text(errors="replace").splitlines() + return mask_pii("\n".join(lines[-n:])) + + +def collect_listings(db_path: Path | None = None, n: int = 5) -> list[dict]: + """Return the n most-recent job listings — title, company, url only.""" + import sqlite3 + from scripts.db import DEFAULT_DB + path = db_path or DEFAULT_DB + with sqlite3.connect(path) as conn: + conn.row_factory = sqlite3.Row + rows = conn.execute( + "SELECT title, company, url FROM jobs ORDER BY id DESC LIMIT ?", (n,) + ).fetchall() + return [{"title": r["title"], "company": r["company"], "url": r["url"]} for r in rows] + + +def build_issue_body(form: dict, context: dict, attachments: dict) -> str: + """Assemble the Forgejo issue markdown body from form data, context, and attachments.""" + _TYPE_LABELS = {"bug": "🐛 Bug", "feature": "✨ Feature Request", "other": "💬 Other"} + lines: list[str] = [ + f"## {_TYPE_LABELS.get(form.get('type', 'other'), '💬 Other')}", + "", + form.get("description", ""), + "", + ] + + if form.get("type") == "bug" and form.get("repro"): + lines += ["### Reproduction Steps", "", form["repro"], ""] + + if context: + lines += ["### Context", ""] + for k, v in context.items(): + lines.append(f"- **{k}:** {v}") + lines.append("") + + if attachments.get("logs"): + lines += [ + "
", + "App Logs (last 100 lines)", + "", + "```", + attachments["logs"], + "```", + "
", + "", + ] + + if attachments.get("listings"): + lines += ["### Recent Listings", ""] + for j in attachments["listings"]: + lines.append(f"- [{j['title']} @ {j['company']}]({j['url']})") + lines.append("") + + if attachments.get("submitter"): + lines += ["---", f"*Submitted by: {attachments['submitter']}*"] + + return "\n".join(lines) + + +def _ensure_labels( + label_names: list[str], base_url: str, headers: dict, repo: str +) -> list[int]: + """Look up or create Forgejo labels by name. Returns list of IDs.""" + _COLORS = { + "beta-feedback": "#0075ca", + "needs-triage": "#e4e669", + "bug": "#d73a4a", + "feature-request": "#a2eeef", + "question": "#d876e3", + } + resp = requests.get(f"{base_url}/repos/{repo}/labels", headers=headers, timeout=10) + existing = {lb["name"]: lb["id"] for lb in resp.json()} if resp.ok else {} + ids: list[int] = [] + for name in label_names: + if name in existing: + ids.append(existing[name]) + else: + r = requests.post( + f"{base_url}/repos/{repo}/labels", + headers=headers, + json={"name": name, "color": _COLORS.get(name, "#ededed")}, + timeout=10, + ) + if r.ok: + ids.append(r.json()["id"]) + return ids + + +def create_forgejo_issue(title: str, body: str, labels: list[str]) -> dict: + """Create a Forgejo issue. Returns {"number": int, "url": str}.""" + token = os.environ.get("FORGEJO_API_TOKEN", "") + repo = os.environ.get("FORGEJO_REPO", "pyr0ball/peregrine") + base = os.environ.get("FORGEJO_API_URL", "https://git.opensourcesolarpunk.com/api/v1") + headers = {"Authorization": f"token {token}", "Content-Type": "application/json"} + label_ids = _ensure_labels(labels, base, headers, repo) + resp = requests.post( + f"{base}/repos/{repo}/issues", + headers=headers, + json={"title": title, "body": body, "labels": label_ids}, + timeout=15, + ) + resp.raise_for_status() + data = resp.json() + return {"number": data["number"], "url": data["html_url"]} + + +def upload_attachment( + issue_number: int, image_bytes: bytes, filename: str = "screenshot.png" +) -> str: + """Upload a screenshot to an existing Forgejo issue. Returns attachment URL.""" + token = os.environ.get("FORGEJO_API_TOKEN", "") + repo = os.environ.get("FORGEJO_REPO", "pyr0ball/peregrine") + base = os.environ.get("FORGEJO_API_URL", "https://git.opensourcesolarpunk.com/api/v1") + headers = {"Authorization": f"token {token}"} + resp = requests.post( + f"{base}/repos/{repo}/issues/{issue_number}/assets", + headers=headers, + files={"attachment": (filename, image_bytes, "image/png")}, + timeout=15, + ) + resp.raise_for_status() + return resp.json().get("browser_download_url", "") + + +def screenshot_page(port: int | None = None) -> bytes | None: + """ + Capture a screenshot of the running Peregrine UI using Playwright. + Returns PNG bytes, or None if Playwright is not installed or capture fails. + """ + try: + from playwright.sync_api import sync_playwright + except ImportError: + return None + + if port is None: + port = int(os.environ.get("STREAMLIT_PORT", os.environ.get("STREAMLIT_SERVER_PORT", "8502"))) + + try: + with sync_playwright() as p: + browser = p.chromium.launch() + page = browser.new_page(viewport={"width": 1280, "height": 800}) + page.goto(f"http://localhost:{port}", timeout=10_000) + page.wait_for_load_state("networkidle", timeout=10_000) + png = page.screenshot(full_page=False) + browser.close() + return png + except Exception: + return None diff --git a/scripts/suggest_helpers.py b/scripts/suggest_helpers.py new file mode 100644 index 0000000..6ac3475 --- /dev/null +++ b/scripts/suggest_helpers.py @@ -0,0 +1,160 @@ +""" +LLM-powered suggestion helpers for Settings UI. +Two functions, each makes one LLMRouter call: + - suggest_search_terms: enhanced title + three-angle exclude suggestions + - suggest_resume_keywords: skills/domains/keywords gap analysis +""" +import json +import re +from pathlib import Path +from typing import Any + +from scripts.llm_router import LLMRouter + + +def _load_resume_context(resume_path: Path) -> str: + """Extract 3 most recent positions from plain_text_resume.yaml as a short summary.""" + import yaml + if not resume_path.exists(): + return "" + resume = yaml.safe_load(resume_path.read_text()) or {} + lines = [] + for exp in (resume.get("experience_details") or [])[:3]: + pos = exp.get("position", "") + co = exp.get("company", "") + skills = ", ".join((exp.get("skills_acquired") or [])[:5]) + lines.append(f"- {pos} at {co}: {skills}") + return "\n".join(lines) + + +def _parse_json(text: str) -> dict[str, Any]: + """Extract the first JSON object from LLM output. Returns {} on failure.""" + m = re.search(r"\{.*\}", text, re.DOTALL) + if m: + try: + return json.loads(m.group()) + except Exception: + pass + return {} + + +def suggest_search_terms( + current_titles: list[str], + resume_path: Path, + blocklist: dict[str, Any], + user_profile: dict[str, Any], +) -> dict: + """ + Suggest additional job titles and exclude keywords. + + Three-angle exclude analysis: + A: Blocklist alias expansion (blocked companies/industries → keyword variants) + B: Values misalignment (mission preferences → industries/culture to avoid) + C: Role-type filter (career summary → role types that don't fit) + + Returns: {"suggested_titles": [...], "suggested_excludes": [...]} + """ + resume_context = _load_resume_context(resume_path) + titles_str = "\n".join(f"- {t}" for t in current_titles) or "(none yet)" + + bl_companies = ", ".join(blocklist.get("companies", [])) or "none" + bl_industries = ", ".join(blocklist.get("industries", [])) or "none" + nda = ", ".join(user_profile.get("nda_companies", [])) or "none" + career_summary = user_profile.get("career_summary", "") or "Not provided" + mission_raw = user_profile.get("mission_preferences", {}) or {} + # Three exclude angles are intentionally collapsed into one flat suggested_excludes list + mission_str = "\n".join( + f" - {k}: {v}" for k, v in mission_raw.items() if v and isinstance(v, str) and v.strip() + ) or " (none specified)" + + prompt = f"""You are helping a job seeker optimise their search configuration. + +--- RESUME BACKGROUND --- +{resume_context or "Not provided"} + +--- CAREER SUMMARY --- +{career_summary} + +--- CURRENT TITLES BEING SEARCHED --- +{titles_str} + +--- BLOCKED ENTITIES --- +Companies blocked: {bl_companies} +Industries blocked: {bl_industries} +NDA / confidential employers: {nda} + +--- MISSION & VALUES --- +{mission_str} + +Provide all four of the following: + +1. TITLE SUGGESTIONS + 5-8 additional job titles they may be missing: alternative names, adjacent roles, or senior variants of their current titles. + +2. EXCLUDE KEYWORDS — BLOCKLIST ALIASES + The user has blocked the companies/industries above. Suggest keyword variants that would also catch their aliases, subsidiaries, or related brands. + Example: blocking "Meta" → also exclude "facebook", "instagram", "metaverse", "oculus". + +3. EXCLUDE KEYWORDS — VALUES MISALIGNMENT + Based on the user's mission and values above, suggest industry or culture keywords to exclude. + Examples: "tobacco", "gambling", "fossil fuel", "defense contractor", "MLM", "commission-only", "pyramid". + +4. EXCLUDE KEYWORDS — ROLE TYPE FILTER + Based on the user's career background, suggest role-type terms that don't match their trajectory. + Examples for a CS/TAM leader: "cold calling", "door to door", "quota-driven", "SDR", "sales development rep". + +Return ONLY valid JSON in exactly this format (no extra text): +{{"suggested_titles": ["Title 1", "Title 2"], + "suggested_excludes": ["keyword 1", "keyword 2", "keyword 3"]}}""" + + raw = LLMRouter().complete(prompt).strip() + parsed = _parse_json(raw) + return { + "suggested_titles": parsed.get("suggested_titles", []), + "suggested_excludes": parsed.get("suggested_excludes", []), + } + + +def suggest_resume_keywords( + resume_path: Path, + current_kw: dict[str, list[str]], +) -> dict: + """ + Suggest skills, domains, and keywords not already in the user's resume_keywords.yaml. + + Returns: {"skills": [...], "domains": [...], "keywords": [...]} + """ + resume_context = _load_resume_context(resume_path) + + already_skills = ", ".join(current_kw.get("skills", [])) or "none" + already_domains = ", ".join(current_kw.get("domains", [])) or "none" + already_keywords = ", ".join(current_kw.get("keywords", [])) or "none" + + prompt = f"""You are helping a job seeker build a keyword profile used to score job description matches. + +--- RESUME BACKGROUND --- +{resume_context or "Not provided"} + +--- ALREADY SELECTED (do not repeat these) --- +Skills: {already_skills} +Domains: {already_domains} +Keywords: {already_keywords} + +Suggest additional tags in each of the three categories below. Only suggest tags NOT already in the lists above. + +SKILLS — specific technical or soft skills (e.g. "Salesforce", "Executive Communication", "SQL", "Stakeholder Management") +DOMAINS — industry verticals, company types, or functional areas (e.g. "B2B SaaS", "EdTech", "Non-profit", "Series A-C") +KEYWORDS — specific terms, methodologies, metrics, or JD phrases (e.g. "NPS", "churn prevention", "QBR", "cross-functional") + +Return ONLY valid JSON in exactly this format (no extra text): +{{"skills": ["Skill A", "Skill B"], + "domains": ["Domain A"], + "keywords": ["Keyword A", "Keyword B"]}}""" + + raw = LLMRouter().complete(prompt).strip() + parsed = _parse_json(raw) + return { + "skills": parsed.get("skills", []), + "domains": parsed.get("domains", []), + "keywords": parsed.get("keywords", []), + } diff --git a/tests/test_backup.py b/tests/test_backup.py new file mode 100644 index 0000000..a96de42 --- /dev/null +++ b/tests/test_backup.py @@ -0,0 +1,231 @@ +"""Tests for scripts/backup.py — create, list, restore, and multi-instance support.""" +from __future__ import annotations + +import json +import zipfile +from pathlib import Path + +import pytest + +from scripts.backup import ( + _detect_source_label, + create_backup, + list_backup_contents, + restore_backup, +) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +def _make_instance(tmp_path: Path, name: str, *, root_db: bool = False) -> Path: + """Build a minimal fake instance directory for testing.""" + base = tmp_path / name + base.mkdir() + + # Secret configs + (base / "config").mkdir() + (base / "config" / "notion.yaml").write_text("token: secret") + (base / "config" / "email.yaml").write_text("user: test@example.com") + + # Extra config + (base / "config" / "llm.yaml").write_text("backend: ollama") + (base / "config" / "resume_keywords.yaml").write_text("keywords: [python]") + (base / "config" / "server.yaml").write_text("port: 8502") + + # DB — either at data/staging.db (Peregrine) or staging.db root (legacy) + if root_db: + (base / "staging.db").write_bytes(b"SQLite legacy") + else: + (base / "data").mkdir() + (base / "data" / "staging.db").write_bytes(b"SQLite peregrine") + + return base + + +# --------------------------------------------------------------------------- +# create_backup +# --------------------------------------------------------------------------- + +class TestCreateBackup: + def test_returns_valid_zip(self, tmp_path): + base = _make_instance(tmp_path, "peregrine") + data = create_backup(base) + assert zipfile.is_zipfile(__import__("io").BytesIO(data)) + + def test_includes_secret_configs(self, tmp_path): + base = _make_instance(tmp_path, "peregrine") + data = create_backup(base) + info = list_backup_contents(data) + assert "config/notion.yaml" in info["files"] + assert "config/email.yaml" in info["files"] + + def test_includes_extra_configs(self, tmp_path): + base = _make_instance(tmp_path, "peregrine") + data = create_backup(base) + info = list_backup_contents(data) + assert "config/llm.yaml" in info["files"] + assert "config/resume_keywords.yaml" in info["files"] + assert "config/server.yaml" in info["files"] + + def test_includes_db_by_default(self, tmp_path): + base = _make_instance(tmp_path, "peregrine") + data = create_backup(base) + info = list_backup_contents(data) + assert info["manifest"]["includes_db"] is True + assert any(f.endswith(".db") for f in info["files"]) + + def test_excludes_db_when_flag_false(self, tmp_path): + base = _make_instance(tmp_path, "peregrine") + data = create_backup(base, include_db=False) + info = list_backup_contents(data) + assert info["manifest"]["includes_db"] is False + assert not any(f.endswith(".db") for f in info["files"]) + + def test_silently_skips_missing_files(self, tmp_path): + base = _make_instance(tmp_path, "peregrine") + # tokens.yaml not created in fixture — should not raise + data = create_backup(base) + info = list_backup_contents(data) + assert "config/tokens.yaml" not in info["files"] + + def test_manifest_contains_source_label(self, tmp_path): + base = _make_instance(tmp_path, "peregrine") + data = create_backup(base) + info = list_backup_contents(data) + assert info["manifest"]["source"] == "peregrine" + + def test_source_label_override(self, tmp_path): + base = _make_instance(tmp_path, "peregrine") + data = create_backup(base, source_label="custom-label") + info = list_backup_contents(data) + assert info["manifest"]["source"] == "custom-label" + + +# --------------------------------------------------------------------------- +# Legacy instance (staging.db at repo root) +# --------------------------------------------------------------------------- + +class TestLegacyInstance: + def test_picks_up_root_db(self, tmp_path): + base = _make_instance(tmp_path, "job-seeker", root_db=True) + data = create_backup(base) + info = list_backup_contents(data) + assert "staging.db" in info["files"] + assert "data/staging.db" not in info["files"] + + def test_source_label_is_job_seeker(self, tmp_path): + base = _make_instance(tmp_path, "job-seeker", root_db=True) + data = create_backup(base) + info = list_backup_contents(data) + assert info["manifest"]["source"] == "job-seeker" + + def test_missing_peregrine_only_configs_skipped(self, tmp_path): + """Legacy doesn't have server.yaml, user.yaml, etc. — should not error.""" + base = _make_instance(tmp_path, "job-seeker", root_db=True) + # Remove server.yaml to simulate legacy (it won't exist there) + (base / "config" / "server.yaml").unlink() + data = create_backup(base) + info = list_backup_contents(data) + assert "config/server.yaml" not in info["files"] + assert "config/notion.yaml" in info["files"] + + +# --------------------------------------------------------------------------- +# list_backup_contents +# --------------------------------------------------------------------------- + +class TestListBackupContents: + def test_returns_manifest_and_files(self, tmp_path): + base = _make_instance(tmp_path, "peregrine") + data = create_backup(base) + info = list_backup_contents(data) + assert "manifest" in info + assert "files" in info + assert "sizes" in info + assert "total_bytes" in info + + def test_total_bytes_is_sum_of_file_sizes(self, tmp_path): + base = _make_instance(tmp_path, "peregrine") + data = create_backup(base) + info = list_backup_contents(data) + expected = sum(info["sizes"][f] for f in info["files"] if f in info["sizes"]) + assert info["total_bytes"] == expected + + def test_manifest_not_in_files_list(self, tmp_path): + base = _make_instance(tmp_path, "peregrine") + data = create_backup(base) + info = list_backup_contents(data) + assert "backup-manifest.json" not in info["files"] + + +# --------------------------------------------------------------------------- +# restore_backup +# --------------------------------------------------------------------------- + +class TestRestoreBackup: + def test_restores_all_files(self, tmp_path): + src = _make_instance(tmp_path, "peregrine") + dst = tmp_path / "restored" + dst.mkdir() + data = create_backup(src) + result = restore_backup(data, dst) + assert len(result["restored"]) > 0 + assert (dst / "config" / "notion.yaml").exists() + + def test_skips_db_when_flag_false(self, tmp_path): + src = _make_instance(tmp_path, "peregrine") + dst = tmp_path / "restored" + dst.mkdir() + data = create_backup(src) + result = restore_backup(data, dst, include_db=False) + assert not any(f.endswith(".db") for f in result["restored"]) + assert any(f.endswith(".db") for f in result["skipped"]) + + def test_no_overwrite_skips_existing(self, tmp_path): + src = _make_instance(tmp_path, "peregrine") + dst = tmp_path / "restored" + dst.mkdir() + (dst / "config").mkdir() + existing = dst / "config" / "notion.yaml" + existing.write_text("original content") + data = create_backup(src) + result = restore_backup(data, dst, overwrite=False) + assert "config/notion.yaml" in result["skipped"] + assert existing.read_text() == "original content" + + def test_overwrite_replaces_existing(self, tmp_path): + src = _make_instance(tmp_path, "peregrine") + dst = tmp_path / "restored" + dst.mkdir() + (dst / "config").mkdir() + (dst / "config" / "notion.yaml").write_text("stale content") + data = create_backup(src) + restore_backup(data, dst, overwrite=True) + assert (dst / "config" / "notion.yaml").read_text() == "token: secret" + + def test_roundtrip_preserves_content(self, tmp_path): + src = _make_instance(tmp_path, "peregrine") + original = (src / "config" / "notion.yaml").read_text() + dst = tmp_path / "restored" + dst.mkdir() + data = create_backup(src) + restore_backup(data, dst) + assert (dst / "config" / "notion.yaml").read_text() == original + + +# --------------------------------------------------------------------------- +# _detect_source_label +# --------------------------------------------------------------------------- + +class TestDetectSourceLabel: + def test_returns_directory_name(self, tmp_path): + base = tmp_path / "peregrine" + base.mkdir() + assert _detect_source_label(base) == "peregrine" + + def test_legacy_label(self, tmp_path): + base = tmp_path / "job-seeker" + base.mkdir() + assert _detect_source_label(base) == "job-seeker" diff --git a/tests/test_byok_guard.py b/tests/test_byok_guard.py new file mode 100644 index 0000000..a662dd6 --- /dev/null +++ b/tests/test_byok_guard.py @@ -0,0 +1,101 @@ +"""Tests for BYOK cloud backend detection.""" +import pytest +from scripts.byok_guard import is_cloud_backend, cloud_backends + + +class TestIsCloudBackend: + def test_anthropic_type_is_always_cloud(self): + assert is_cloud_backend("anthropic", {"type": "anthropic", "enabled": True}) is True + + def test_claude_code_type_is_cloud(self): + assert is_cloud_backend("claude_code", {"type": "claude_code", "enabled": True}) is True + + def test_vision_service_is_always_local(self): + assert is_cloud_backend("vision", {"type": "vision_service"}) is False + + def test_openai_compat_localhost_is_local(self): + cfg = {"type": "openai_compat", "base_url": "http://localhost:11434/v1"} + assert is_cloud_backend("ollama", cfg) is False + + def test_openai_compat_127_is_local(self): + cfg = {"type": "openai_compat", "base_url": "http://127.0.0.1:8000/v1"} + assert is_cloud_backend("vllm", cfg) is False + + def test_openai_compat_0000_is_local(self): + cfg = {"type": "openai_compat", "base_url": "http://0.0.0.0:8000/v1"} + assert is_cloud_backend("vllm", cfg) is False + + def test_openai_compat_remote_url_is_cloud(self): + cfg = {"type": "openai_compat", "base_url": "https://api.openai.com/v1"} + assert is_cloud_backend("openai", cfg) is True + + def test_openai_compat_together_is_cloud(self): + cfg = {"type": "openai_compat", "base_url": "https://api.together.xyz/v1"} + assert is_cloud_backend("together", cfg) is True + + def test_local_override_suppresses_cloud_detection(self): + cfg = {"type": "openai_compat", "base_url": "http://192.168.1.100:11434/v1", "local": True} + assert is_cloud_backend("nas_ollama", cfg) is False + + def test_local_override_on_anthropic_suppresses_detection(self): + cfg = {"type": "anthropic", "local": True} + assert is_cloud_backend("anthropic", cfg) is False + + def test_openai_compat_missing_base_url_treated_as_cloud(self): + # No base_url → unknown destination → defensively treated as cloud + cfg = {"type": "openai_compat"} + assert is_cloud_backend("unknown", cfg) is True + + def test_unknown_type_without_url_is_local(self): + assert is_cloud_backend("mystery", {"type": "unknown_type"}) is False + + +class TestCloudBackends: + def test_empty_config_returns_empty(self): + assert cloud_backends({}) == [] + + def test_fully_local_config_returns_empty(self): + cfg = { + "backends": { + "ollama": {"type": "openai_compat", "base_url": "http://localhost:11434/v1", "enabled": True}, + "vision": {"type": "vision_service", "enabled": True}, + } + } + assert cloud_backends(cfg) == [] + + def test_cloud_backend_returned(self): + cfg = { + "backends": { + "anthropic": {"type": "anthropic", "enabled": True}, + } + } + assert cloud_backends(cfg) == ["anthropic"] + + def test_disabled_cloud_backend_excluded(self): + cfg = { + "backends": { + "anthropic": {"type": "anthropic", "enabled": False}, + } + } + assert cloud_backends(cfg) == [] + + def test_mix_returns_only_enabled_cloud(self): + cfg = { + "backends": { + "ollama": {"type": "openai_compat", "base_url": "http://localhost:11434/v1", "enabled": True}, + "anthropic": {"type": "anthropic", "enabled": True}, + "openai": {"type": "openai_compat", "base_url": "https://api.openai.com/v1", "enabled": False}, + } + } + result = cloud_backends(cfg) + assert result == ["anthropic"] + + def test_multiple_cloud_backends_all_returned(self): + cfg = { + "backends": { + "anthropic": {"type": "anthropic", "enabled": True}, + "openai": {"type": "openai_compat", "base_url": "https://api.openai.com/v1", "enabled": True}, + } + } + result = cloud_backends(cfg) + assert set(result) == {"anthropic", "openai"} diff --git a/tests/test_cover_letter_refinement.py b/tests/test_cover_letter_refinement.py index c2fb8fb..8fc5b88 100644 --- a/tests/test_cover_letter_refinement.py +++ b/tests/test_cover_letter_refinement.py @@ -21,7 +21,7 @@ class TestGenerateRefinement: """Call generate() with a mock router and return the captured prompt.""" captured = {} mock_router = MagicMock() - mock_router.complete.side_effect = lambda p: (captured.update({"prompt": p}), "result")[1] + mock_router.complete.side_effect = lambda p, **kwargs: (captured.update({"prompt": p}), "result")[1] with patch("scripts.generate_cover_letter.load_corpus", return_value=[]), \ patch("scripts.generate_cover_letter.find_similar_letters", return_value=[]): from scripts.generate_cover_letter import generate diff --git a/tests/test_feedback_api.py b/tests/test_feedback_api.py new file mode 100644 index 0000000..8c7260a --- /dev/null +++ b/tests/test_feedback_api.py @@ -0,0 +1,273 @@ +"""Tests for the feedback API backend.""" +import pytest +from unittest.mock import patch, MagicMock +from pathlib import Path + + +# ── mask_pii ────────────────────────────────────────────────────────────────── + +def test_mask_pii_email(): + from scripts.feedback_api import mask_pii + assert mask_pii("contact foo@bar.com please") == "contact [email redacted] please" + + +def test_mask_pii_phone_dashes(): + from scripts.feedback_api import mask_pii + assert mask_pii("call 555-123-4567 now") == "call [phone redacted] now" + + +def test_mask_pii_phone_parens(): + from scripts.feedback_api import mask_pii + assert mask_pii("(555) 867-5309") == "[phone redacted]" + + +def test_mask_pii_clean_text(): + from scripts.feedback_api import mask_pii + assert mask_pii("no sensitive data here") == "no sensitive data here" + + +def test_mask_pii_multiple_emails(): + from scripts.feedback_api import mask_pii + result = mask_pii("a@b.com and c@d.com") + assert result == "[email redacted] and [email redacted]" + + +# ── collect_context ─────────────────────────────────────────────────────────── + +def test_collect_context_required_keys(): + from scripts.feedback_api import collect_context + ctx = collect_context("Home") + for key in ("page", "version", "tier", "llm_backend", "os", "timestamp"): + assert key in ctx, f"missing key: {key}" + + +def test_collect_context_page_value(): + from scripts.feedback_api import collect_context + ctx = collect_context("MyPage") + assert ctx["page"] == "MyPage" + + +def test_collect_context_timestamp_is_utc(): + from scripts.feedback_api import collect_context + ctx = collect_context("X") + assert ctx["timestamp"].endswith("Z") + + +# ── collect_logs ────────────────────────────────────────────────────────────── + +def test_collect_logs_returns_string(tmp_path): + from scripts.feedback_api import collect_logs + log = tmp_path / ".streamlit.log" + log.write_text("line1\nline2\nline3\n") + result = collect_logs(log_path=log, n=10) + assert isinstance(result, str) + assert "line3" in result + + +def test_collect_logs_tails_n_lines(tmp_path): + from scripts.feedback_api import collect_logs + log = tmp_path / ".streamlit.log" + log.write_text("\n".join(f"line{i}" for i in range(200))) + result = collect_logs(log_path=log, n=10) + assert "line199" in result + assert "line0" not in result + + +def test_collect_logs_masks_pii(tmp_path): + from scripts.feedback_api import collect_logs + log = tmp_path / "test.log" + log.write_text("user foo@bar.com connected\n") + result = collect_logs(log_path=log) + assert "foo@bar.com" not in result + assert "[email redacted]" in result + + +def test_collect_logs_missing_file(tmp_path): + from scripts.feedback_api import collect_logs + result = collect_logs(log_path=tmp_path / "nonexistent.log") + assert "no log file" in result.lower() + + +# ── collect_listings ────────────────────────────────────────────────────────── + +def test_collect_listings_safe_fields_only(tmp_path): + """Only title, company, url — no cover letters, notes, or emails.""" + from scripts.db import init_db, insert_job + from scripts.feedback_api import collect_listings + db = tmp_path / "test.db" + init_db(db) + insert_job(db, { + "title": "CSM", "company": "Acme", "url": "https://example.com/1", + "source": "linkedin", "location": "Remote", "is_remote": True, + "salary": "", "description": "great role", "date_found": "2026-03-01", + }) + results = collect_listings(db_path=db, n=5) + assert len(results) == 1 + assert set(results[0].keys()) == {"title", "company", "url"} + assert results[0]["title"] == "CSM" + + +def test_collect_listings_respects_n(tmp_path): + from scripts.db import init_db, insert_job + from scripts.feedback_api import collect_listings + db = tmp_path / "test.db" + init_db(db) + for i in range(10): + insert_job(db, { + "title": f"Job {i}", "company": "Acme", "url": f"https://example.com/{i}", + "source": "linkedin", "location": "Remote", "is_remote": False, + "salary": "", "description": "", "date_found": "2026-03-01", + }) + assert len(collect_listings(db_path=db, n=3)) == 3 + + +# ── build_issue_body ────────────────────────────────────────────────────────── + +def test_build_issue_body_contains_description(): + from scripts.feedback_api import build_issue_body + form = {"type": "bug", "title": "Test", "description": "it broke", "repro": ""} + ctx = {"page": "Home", "version": "v1.0", "tier": "free", + "llm_backend": "ollama", "os": "Linux", "timestamp": "2026-03-03T00:00:00Z"} + body = build_issue_body(form, ctx, {}) + assert "it broke" in body + assert "Home" in body + assert "v1.0" in body + + +def test_build_issue_body_bug_includes_repro(): + from scripts.feedback_api import build_issue_body + form = {"type": "bug", "title": "X", "description": "desc", "repro": "step 1\nstep 2"} + body = build_issue_body(form, {}, {}) + assert "step 1" in body + assert "Reproduction" in body + + +def test_build_issue_body_no_repro_for_feature(): + from scripts.feedback_api import build_issue_body + form = {"type": "feature", "title": "X", "description": "add dark mode", "repro": "ignored"} + body = build_issue_body(form, {}, {}) + assert "Reproduction" not in body + + +def test_build_issue_body_logs_in_collapsible(): + from scripts.feedback_api import build_issue_body + form = {"type": "other", "title": "X", "description": "Y", "repro": ""} + body = build_issue_body(form, {}, {"logs": "log line 1\nlog line 2"}) + assert "
" in body + assert "log line 1" in body + + +def test_build_issue_body_omits_logs_when_not_provided(): + from scripts.feedback_api import build_issue_body + form = {"type": "bug", "title": "X", "description": "Y", "repro": ""} + body = build_issue_body(form, {}, {}) + assert "
" not in body + + +def test_build_issue_body_submitter_attribution(): + from scripts.feedback_api import build_issue_body + form = {"type": "bug", "title": "X", "description": "Y", "repro": ""} + body = build_issue_body(form, {}, {"submitter": "Jane Doe "}) + assert "Jane Doe" in body + + +def test_build_issue_body_listings_shown(): + from scripts.feedback_api import build_issue_body + form = {"type": "bug", "title": "X", "description": "Y", "repro": ""} + listings = [{"title": "CSM", "company": "Acme", "url": "https://example.com/1"}] + body = build_issue_body(form, {}, {"listings": listings}) + assert "CSM" in body + assert "Acme" in body + + +# ── Forgejo API ─────────────────────────────────────────────────────────────── + +@patch("scripts.feedback_api.requests.get") +@patch("scripts.feedback_api.requests.post") +def test_ensure_labels_uses_existing(mock_post, mock_get): + from scripts.feedback_api import _ensure_labels + mock_get.return_value.ok = True + mock_get.return_value.json.return_value = [ + {"name": "beta-feedback", "id": 1}, + {"name": "bug", "id": 2}, + ] + ids = _ensure_labels( + ["beta-feedback", "bug"], + "https://example.com/api/v1", {"Authorization": "token x"}, "owner/repo" + ) + assert ids == [1, 2] + mock_post.assert_not_called() + + +@patch("scripts.feedback_api.requests.get") +@patch("scripts.feedback_api.requests.post") +def test_ensure_labels_creates_missing(mock_post, mock_get): + from scripts.feedback_api import _ensure_labels + mock_get.return_value.ok = True + mock_get.return_value.json.return_value = [] + mock_post.return_value.ok = True + mock_post.return_value.json.return_value = {"id": 99} + ids = _ensure_labels( + ["needs-triage"], + "https://example.com/api/v1", {"Authorization": "token x"}, "owner/repo" + ) + assert 99 in ids + + +@patch("scripts.feedback_api._ensure_labels", return_value=[1, 2]) +@patch("scripts.feedback_api.requests.post") +def test_create_forgejo_issue_success(mock_post, mock_labels, monkeypatch): + from scripts.feedback_api import create_forgejo_issue + monkeypatch.setenv("FORGEJO_API_TOKEN", "testtoken") + monkeypatch.setenv("FORGEJO_REPO", "owner/repo") + monkeypatch.setenv("FORGEJO_API_URL", "https://example.com/api/v1") + mock_post.return_value.status_code = 201 + mock_post.return_value.raise_for_status = lambda: None + mock_post.return_value.json.return_value = {"number": 42, "html_url": "https://example.com/issues/42"} + result = create_forgejo_issue("Test issue", "body text", ["beta-feedback", "bug"]) + assert result["number"] == 42 + assert "42" in result["url"] + + +@patch("scripts.feedback_api.requests.post") +def test_upload_attachment_returns_url(mock_post, monkeypatch): + from scripts.feedback_api import upload_attachment + monkeypatch.setenv("FORGEJO_API_TOKEN", "testtoken") + monkeypatch.setenv("FORGEJO_REPO", "owner/repo") + monkeypatch.setenv("FORGEJO_API_URL", "https://example.com/api/v1") + mock_post.return_value.status_code = 201 + mock_post.return_value.raise_for_status = lambda: None + mock_post.return_value.json.return_value = { + "uuid": "abc", "browser_download_url": "https://example.com/assets/abc" + } + url = upload_attachment(42, b"\x89PNG", "screenshot.png") + assert url == "https://example.com/assets/abc" + + +# ── screenshot_page ─────────────────────────────────────────────────────────── + +def test_screenshot_page_returns_none_on_failure(monkeypatch): + """screenshot_page returns None gracefully when capture fails.""" + from scripts.feedback_api import screenshot_page + import playwright.sync_api as pw_api + original = pw_api.sync_playwright + def bad_playwright(): + raise RuntimeError("browser unavailable") + monkeypatch.setattr(pw_api, "sync_playwright", bad_playwright) + result = screenshot_page(port=9999) + assert result is None + + +@patch("playwright.sync_api.sync_playwright") +def test_screenshot_page_returns_bytes(mock_pw): + """screenshot_page returns PNG bytes when playwright is available.""" + from scripts.feedback_api import screenshot_page + fake_png = b"\x89PNG\r\n\x1a\n" + mock_context = MagicMock() + mock_pw.return_value.__enter__ = lambda s: mock_context + mock_pw.return_value.__exit__ = MagicMock(return_value=False) + mock_browser = mock_context.chromium.launch.return_value + mock_page = mock_browser.new_page.return_value + mock_page.screenshot.return_value = fake_png + result = screenshot_page(port=8502) + assert result == fake_png diff --git a/tests/test_imap_sync.py b/tests/test_imap_sync.py index 49c9be2..f9cc4e5 100644 --- a/tests/test_imap_sync.py +++ b/tests/test_imap_sync.py @@ -391,7 +391,7 @@ def test_rejection_uppercase_lowercased(): def test_rejection_phrase_in_quoted_thread_beyond_limit_not_blocked(): """Rejection phrase beyond 1500-char body window does not block the email.""" from scripts.imap_sync import _has_rejection_or_ats_signal - clean_intro = "Hi Alex, we'd love to schedule a call with you. " * 30 # ~1500 chars + clean_intro = "Hi Alex, we'd love to schedule a call with you. " * 32 # ~1500 chars quoted_footer = "\n\nOn Mon, Jan 1 wrote:\n> Unfortunately we went with another candidate." body = clean_intro + quoted_footer # The phrase lands after the 1500-char cutoff — should NOT be blocked diff --git a/tests/test_suggest_helpers.py b/tests/test_suggest_helpers.py new file mode 100644 index 0000000..2f071b5 --- /dev/null +++ b/tests/test_suggest_helpers.py @@ -0,0 +1,148 @@ +"""Tests for scripts/suggest_helpers.py.""" +import json +import pytest +from pathlib import Path +from unittest.mock import patch, MagicMock + +RESUME_PATH = Path(__file__).parent.parent / "config" / "plain_text_resume.yaml" + + +# ── _parse_json ─────────────────────────────────────────────────────────────── + +def test_parse_json_extracts_valid_object(): + from scripts.suggest_helpers import _parse_json + raw = 'Here is the result: {"a": [1, 2], "b": "hello"} done.' + assert _parse_json(raw) == {"a": [1, 2], "b": "hello"} + + +def test_parse_json_returns_empty_on_invalid(): + from scripts.suggest_helpers import _parse_json + assert _parse_json("no json here") == {} + assert _parse_json('{"broken": ') == {} + + +# ── suggest_search_terms ────────────────────────────────────────────────────── + +BLOCKLIST = { + "companies": ["Meta", "Amazon"], + "industries": ["gambling"], + "locations": [], +} +USER_PROFILE = { + "career_summary": "Customer success leader with 10 years in B2B SaaS.", + "mission_preferences": { + "animal_welfare": "I volunteer at my local shelter.", + "education": "", + }, + "nda_companies": ["Acme Corp"], +} + + +def _mock_llm(response_dict: dict): + """Return a patcher that makes LLMRouter().complete() return a JSON string.""" + mock_router = MagicMock() + mock_router.complete.return_value = json.dumps(response_dict) + return patch("scripts.suggest_helpers.LLMRouter", return_value=mock_router) + + +def test_suggest_search_terms_returns_titles_and_excludes(): + from scripts.suggest_helpers import suggest_search_terms + payload = {"suggested_titles": ["VP Customer Success"], "suggested_excludes": ["cold calling"]} + with _mock_llm(payload): + result = suggest_search_terms(["Customer Success Manager"], RESUME_PATH, BLOCKLIST, USER_PROFILE) + assert result["suggested_titles"] == ["VP Customer Success"] + assert result["suggested_excludes"] == ["cold calling"] + + +def test_suggest_search_terms_prompt_contains_blocklist_companies(): + from scripts.suggest_helpers import suggest_search_terms + with _mock_llm({"suggested_titles": [], "suggested_excludes": []}) as mock_cls: + suggest_search_terms(["CSM"], RESUME_PATH, BLOCKLIST, USER_PROFILE) + prompt_sent = mock_cls.return_value.complete.call_args[0][0] + assert "Meta" in prompt_sent + assert "Amazon" in prompt_sent + + +def test_suggest_search_terms_prompt_contains_mission(): + from scripts.suggest_helpers import suggest_search_terms + with _mock_llm({"suggested_titles": [], "suggested_excludes": []}) as mock_cls: + suggest_search_terms(["CSM"], RESUME_PATH, BLOCKLIST, USER_PROFILE) + prompt_sent = mock_cls.return_value.complete.call_args[0][0] + assert "animal_welfare" in prompt_sent or "animal welfare" in prompt_sent.lower() + + +def test_suggest_search_terms_prompt_contains_career_summary(): + from scripts.suggest_helpers import suggest_search_terms + with _mock_llm({"suggested_titles": [], "suggested_excludes": []}) as mock_cls: + suggest_search_terms(["CSM"], RESUME_PATH, BLOCKLIST, USER_PROFILE) + prompt_sent = mock_cls.return_value.complete.call_args[0][0] + assert "Customer success leader" in prompt_sent + + +def test_suggest_search_terms_returns_empty_on_bad_json(): + from scripts.suggest_helpers import suggest_search_terms + mock_router = MagicMock() + mock_router.complete.return_value = "sorry, I cannot help with that" + with patch("scripts.suggest_helpers.LLMRouter", return_value=mock_router): + result = suggest_search_terms(["CSM"], RESUME_PATH, BLOCKLIST, USER_PROFILE) + assert result == {"suggested_titles": [], "suggested_excludes": []} + + +def test_suggest_search_terms_raises_on_llm_exhausted(): + from scripts.suggest_helpers import suggest_search_terms + mock_router = MagicMock() + mock_router.complete.side_effect = RuntimeError("All LLM backends exhausted") + with patch("scripts.suggest_helpers.LLMRouter", return_value=mock_router): + with pytest.raises(RuntimeError, match="All LLM backends exhausted"): + suggest_search_terms(["CSM"], RESUME_PATH, BLOCKLIST, USER_PROFILE) + + +# ── suggest_resume_keywords ─────────────────────────────────────────────────── + +CURRENT_KW = { + "skills": ["Customer Success", "SQL"], + "domains": ["B2B SaaS"], + "keywords": ["NPS"], +} + + +def test_suggest_resume_keywords_returns_all_three_categories(): + from scripts.suggest_helpers import suggest_resume_keywords + payload = { + "skills": ["Project Management"], + "domains": ["EdTech"], + "keywords": ["churn prevention"], + } + with _mock_llm(payload): + result = suggest_resume_keywords(RESUME_PATH, CURRENT_KW) + assert "skills" in result + assert "domains" in result + assert "keywords" in result + + +def test_suggest_resume_keywords_excludes_already_selected(): + from scripts.suggest_helpers import suggest_resume_keywords + with _mock_llm({"skills": [], "domains": [], "keywords": []}) as mock_cls: + suggest_resume_keywords(RESUME_PATH, CURRENT_KW) + prompt_sent = mock_cls.return_value.complete.call_args[0][0] + # Already-selected tags should appear in the prompt so LLM knows to skip them + assert "Customer Success" in prompt_sent + assert "NPS" in prompt_sent + + +def test_suggest_resume_keywords_returns_empty_on_bad_json(): + from scripts.suggest_helpers import suggest_resume_keywords + mock_router = MagicMock() + mock_router.complete.return_value = "I cannot assist." + with patch("scripts.suggest_helpers.LLMRouter", return_value=mock_router): + result = suggest_resume_keywords(RESUME_PATH, CURRENT_KW) + assert result == {"skills": [], "domains": [], "keywords": []} + + +def test_suggest_resume_keywords_raises_on_llm_exhausted(): + from scripts.suggest_helpers import suggest_resume_keywords + mock_router = MagicMock() + mock_router.complete.side_effect = RuntimeError("All LLM backends exhausted") + with patch("scripts.suggest_helpers.LLMRouter", return_value=mock_router): + with pytest.raises(RuntimeError, match="All LLM backends exhausted"): + suggest_resume_keywords(RESUME_PATH, CURRENT_KW)