docs: update backlog with LinkedIn import follow-up items

fix(cloud): use per-user config dir for wizard gate; redirect on invalid session
- app.py: wizard gate now reads get_config_dir()/user.yaml instead of hardcoded repo-level config/ — fixes perpetual onboarding loop in cloud mode where per-user wizard_complete was never seen - app.py: page title corrected to "Peregrine" - cloud_session.py: add get_config_dir() returning per-user config path in cloud mode, repo config/ locally - cloud_session.py: replace st.error() with JS redirect on missing/invalid session token so users land on login page instead of error screen - Home.py, 4_Apply.py, migrate.py: remove remaining AIHawk UI references
2026-03-13 11:24:55 -07:00 · 2026-03-13 11:24:42 -07:00 · 2026-03-13 10:58:58 -07:00 · 2026-03-13 10:55:25 -07:00 · 2026-03-13 10:44:03 -07:00 · 2026-03-13 10:44:02 -07:00
18 changed files with 1312 additions and 78 deletions
--- a/5
+++ b/5
@ -10,8 +10,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    && rm -rf /var/lib/apt/lists/*

 COPY requirements.txt .
+# Install Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt

+# Install Playwright browser (cached separately from Python deps so requirements
+# changes don't bust the ~600–900 MB Chromium layer and vice versa)
+RUN playwright install chromium && playwright install-deps chromium
+
 # Bundle companyScraper (company research web scraper)
 COPY scrapers/ /app/scrapers/

--- a/app/Home.py
+++ b/app/Home.py
@ -69,7 +69,7 @@ _SETUP_BANNERS = [
    {"key": "upload_corpus",       "text": "Upload your cover letter corpus for voice fine-tuning",
     "link_label": "Settings → Fine-Tune"},
    {"key": "configure_linkedin",  "text": "Configure LinkedIn Easy Apply automation",
-     "link_label": "Settings → AIHawk"},
+     "link_label": "Settings → Integrations"},
    {"key": "setup_searxng",       "text": "Set up company research with SearXNG",
     "link_label": "Settings → Services"},
    {"key": "target_companies",    "text": "Build a target company list for focused outreach",
--- a/app/app.py
+++ b/app/app.py
@ -22,11 +22,11 @@ IS_DEMO = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
 import streamlit as st
 from scripts.db import DEFAULT_DB, init_db, get_active_tasks
 from app.feedback import inject_feedback_button
-from app.cloud_session import resolve_session, get_db_path
+from app.cloud_session import resolve_session, get_db_path, get_config_dir
 import sqlite3

 st.set_page_config(
-    page_title="Job Seeker",
+    page_title="Peregrine",
    page_icon="💼",
    layout="wide",
 )
@ -80,7 +80,7 @@ except Exception:

 # ── First-run wizard gate ───────────────────────────────────────────────────────
 from scripts.user_profile import UserProfile as _UserProfile
-_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
+_USER_YAML = get_config_dir() / "user.yaml"

 _show_wizard = not IS_DEMO and (
    not _UserProfile.exists(_USER_YAML)
--- a/app/cloud_session.py
+++ b/app/cloud_session.py
@ -112,13 +112,19 @@ def resolve_session(app: str = "peregrine") -> None:
    cookie_header = st.context.headers.get("x-cf-session", "")
    session_jwt = _extract_session_token(cookie_header)
    if not session_jwt:
-        st.error("Session token missing. Please log in at circuitforge.tech.")
+        st.components.v1.html(
+            '<script>window.top.location.href = "https://circuitforge.tech/login";</script>',
+            height=0,
+        )
        st.stop()

    try:
        user_id = validate_session_jwt(session_jwt)
-    except Exception as exc:
-        st.error(f"Invalid session — please log in again. ({exc})")
+    except Exception:
+        st.components.v1.html(
+            '<script>window.top.location.href = "https://circuitforge.tech/login";</script>',
+            height=0,
+        )
        st.stop()

    user_path = _user_data_path(user_id, app)
@ -141,6 +147,19 @@ def get_db_path() -> Path:
    return st.session_state.get("db_path", DEFAULT_DB)


+def get_config_dir() -> Path:
+    """
+    Return the config directory for this session.
+    Cloud: per-user path (<data_root>/<user_id>/peregrine/config/) so each
+           user's YAML files (user.yaml, plain_text_resume.yaml, etc.) are
+           isolated and never shared across tenants.
+    Local: repo-level config/ directory.
+    """
+    if CLOUD_MODE and st.session_state.get("db_path"):
+        return Path(st.session_state["db_path"]).parent / "config"
+    return Path(__file__).parent.parent.parent / "config"
+
+
 def get_cloud_tier() -> str:
    """
    Return the current user's cloud tier.
--- a/app/components/init.py
+++ b/app/components/init.py
@ -0,0 +1 @@
+# app/components/__init__.py
--- a/app/components/linkedin_import.py
+++ b/app/components/linkedin_import.py
@ -0,0 +1,185 @@
+# app/components/linkedin_import.py
+"""
+Shared LinkedIn import widget.
+
+Usage in a page:
+    from app.components.linkedin_import import render_linkedin_tab
+
+    # At top of page render — check for pending import:
+    _li_data = st.session_state.pop("_linkedin_extracted", None)
+    if _li_data:
+        st.session_state["_parsed_resume"] = _li_data
+        st.rerun()
+
+    # Inside the LinkedIn tab:
+    with tab_linkedin:
+        render_linkedin_tab(config_dir=CONFIG_DIR, tier=tier)
+"""
+from __future__ import annotations
+
+import json
+import re
+from datetime import datetime, timezone
+from pathlib import Path
+
+import streamlit as st
+
+_LINKEDIN_PROFILE_RE = re.compile(r"https?://(www\.)?linkedin\.com/in/", re.I)
+
+
+def _stage_path(config_dir: Path) -> Path:
+    return config_dir / "linkedin_stage.json"
+
+
+def _load_stage(config_dir: Path) -> dict | None:
+    path = _stage_path(config_dir)
+    if not path.exists():
+        return None
+    try:
+        return json.loads(path.read_text())
+    except Exception:
+        return None
+
+
+def _days_ago(iso_ts: str) -> str:
+    try:
+        dt = datetime.fromisoformat(iso_ts)
+        delta = datetime.now(timezone.utc) - dt
+        days = delta.days
+        if days == 0:
+            return "today"
+        if days == 1:
+            return "yesterday"
+        return f"{days} days ago"
+    except Exception:
+        return "unknown"
+
+
+def _do_scrape(url: str, config_dir: Path) -> None:
+    """Validate URL, run scrape, update state."""
+    if not _LINKEDIN_PROFILE_RE.match(url):
+        st.error("Please enter a LinkedIn profile URL (linkedin.com/in/…)")
+        return
+
+    with st.spinner("Fetching LinkedIn profile… (10–20 seconds)"):
+        try:
+            from scripts.linkedin_scraper import scrape_profile
+            scrape_profile(url, _stage_path(config_dir))
+            st.success("Profile imported successfully.")
+            st.rerun()
+        except ValueError as e:
+            st.error(str(e))
+        except RuntimeError as e:
+            st.warning(str(e))
+        except Exception as e:
+            st.error(f"Unexpected error: {e}")
+
+
+def render_linkedin_tab(config_dir: Path, tier: str) -> None:
+    """
+    Render the LinkedIn import UI.
+
+    When the user clicks "Use this data", writes the extracted dict to
+    st.session_state["_linkedin_extracted"] and calls st.rerun().
+
+    Caller reads: data = st.session_state.pop("_linkedin_extracted", None)
+    """
+    stage = _load_stage(config_dir)
+
+    # ── Staged data status bar ────────────────────────────────────────────────
+    if stage:
+        scraped_at = stage.get("scraped_at", "")
+        source_label = "LinkedIn export" if stage.get("source") == "export_zip" else "LinkedIn profile"
+        col_info, col_refresh = st.columns([4, 1])
+        col_info.caption(f"Last imported from {source_label}: {_days_ago(scraped_at)}")
+        if col_refresh.button("🔄 Refresh", key="li_refresh"):
+            url = stage.get("url")
+            if url:
+                _do_scrape(url, config_dir)
+            else:
+                st.info("Original URL not available — paste the URL below to re-import.")
+
+    # ── URL import ────────────────────────────────────────────────────────────
+    st.markdown("**Import from LinkedIn profile URL**")
+    url_input = st.text_input(
+        "LinkedIn profile URL",
+        placeholder="https://linkedin.com/in/your-name",
+        label_visibility="collapsed",
+        key="li_url_input",
+    )
+    if st.button("🔗 Import from LinkedIn", key="li_import_btn", type="primary"):
+        if not url_input.strip():
+            st.warning("Please enter your LinkedIn profile URL.")
+        else:
+            _do_scrape(url_input.strip(), config_dir)
+
+    st.caption(
+        "Imports from your public LinkedIn profile. No login or credentials required. "
+        "Scraping typically takes 10–20 seconds."
+    )
+
+    # ── Section preview + use button ─────────────────────────────────────────
+    if stage:
+        from scripts.linkedin_parser import parse_stage
+        extracted, err = parse_stage(_stage_path(config_dir))
+
+        if err:
+            st.warning(f"Could not read staged data: {err}")
+        else:
+            st.divider()
+            st.markdown("**Preview**")
+            col1, col2, col3 = st.columns(3)
+            col1.metric("Experience entries", len(extracted.get("experience", [])))
+            col2.metric("Skills", len(extracted.get("skills", [])))
+            col3.metric("Certifications", len(extracted.get("achievements", [])))
+
+            if extracted.get("career_summary"):
+                with st.expander("Summary"):
+                    st.write(extracted["career_summary"])
+
+            if extracted.get("experience"):
+                with st.expander(f"Experience ({len(extracted['experience'])} entries)"):
+                    for exp in extracted["experience"]:
+                        st.markdown(f"**{exp.get('title')}** @ {exp.get('company')} · {exp.get('date_range', '')}")
+
+            if extracted.get("education"):
+                with st.expander("Education"):
+                    for edu in extracted["education"]:
+                        st.markdown(f"**{edu.get('school')}** — {edu.get('degree')} {edu.get('field', '')}".strip())
+
+            if extracted.get("skills"):
+                with st.expander("Skills"):
+                    st.write(", ".join(extracted["skills"]))
+
+            st.divider()
+            if st.button("✅ Use this data", key="li_use_btn", type="primary"):
+                st.session_state["_linkedin_extracted"] = extracted
+                st.rerun()
+
+    # ── Advanced: data export ─────────────────────────────────────────────────
+    with st.expander("⬇️ Import from LinkedIn data export (advanced)", expanded=False):
+        st.caption(
+            "Download your LinkedIn data: **Settings & Privacy → Data Privacy → "
+            "Get a copy of your data → Request archive → Fast file**. "
+            "The Fast file is available immediately and contains your profile, "
+            "experience, education, and skills."
+        )
+        zip_file = st.file_uploader(
+            "Upload LinkedIn export zip", type=["zip"], key="li_zip_upload"
+        )
+        if zip_file is not None:
+            if st.button("📦 Parse export", key="li_parse_zip"):
+                with st.spinner("Parsing export archive…"):
+                    try:
+                        from scripts.linkedin_scraper import parse_export_zip
+                        extracted = parse_export_zip(
+                            zip_file.read(), _stage_path(config_dir)
+                        )
+                        st.success(
+                            f"Imported {len(extracted.get('experience', []))} experience entries, "
+                            f"{len(extracted.get('skills', []))} skills. "
+                            "Click 'Use this data' above to apply."
+                        )
+                        st.rerun()
+                    except Exception as e:
+                        st.error(f"Failed to parse export: {e}")
--- a/app/pages/0_Setup.py
+++ b/app/pages/0_Setup.py
@ -15,14 +15,14 @@ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 import streamlit as st
 import yaml

-from app.cloud_session import resolve_session, get_db_path
+from app.cloud_session import resolve_session, get_db_path, get_config_dir
 resolve_session("peregrine")

 _ROOT       = Path(__file__).parent.parent.parent
-CONFIG_DIR  = _ROOT / "config"
+CONFIG_DIR  = get_config_dir()   # per-user dir in cloud; repo config/ locally
 USER_YAML   = CONFIG_DIR / "user.yaml"
 STEPS       = 6  # mandatory steps
-STEP_LABELS = ["Hardware", "Tier", "Identity", "Resume", "Inference", "Search"]
+STEP_LABELS = ["Hardware", "Tier", "Resume", "Identity", "Inference", "Search"]


 # ── Helpers ────────────────────────────────────────────────────────────────────
@ -179,6 +179,13 @@ st.divider()

 # ── Step 1: Hardware ───────────────────────────────────────────────────────────
 if step == 1:
+    from app.cloud_session import CLOUD_MODE as _CLOUD_MODE
+    if _CLOUD_MODE:
+        # Cloud deployment: always single-gpu (Heimdall), skip hardware selection
+        _save_yaml({"inference_profile": "single-gpu", "wizard_step": 1})
+        st.session_state.wizard_step = 2
+        st.rerun()
+
    from app.wizard.step_hardware import validate, PROFILES

    st.subheader("Step 1 \u2014 Hardware Detection")
@ -212,6 +219,14 @@ if step == 1:

 # ── Step 2: Tier ───────────────────────────────────────────────────────────────
 elif step == 2:
+    from app.cloud_session import CLOUD_MODE as _CLOUD_MODE
+    if _CLOUD_MODE:
+        # Cloud mode: tier already resolved from Heimdall at session init
+        cloud_tier = st.session_state.get("cloud_tier", "free")
+        _save_yaml({"tier": cloud_tier, "wizard_step": 2})
+        st.session_state.wizard_step = 3
+        st.rerun()
+
    from app.wizard.step_tier import validate

    st.subheader("Step 2 \u2014 Choose Your Plan")
@ -248,63 +263,21 @@ elif step == 2:
            st.rerun()


-# ── Step 3: Identity ───────────────────────────────────────────────────────────
+# ── Step 3: Resume ─────────────────────────────────────────────────────────────
 elif step == 3:
-    from app.wizard.step_identity import validate
-
-    st.subheader("Step 3 \u2014 Your Identity")
-    st.caption("Used in cover letter PDFs, LLM prompts, and the app header.")
-
-    c1, c2 = st.columns(2)
-    name     = c1.text_input("Full Name *",  saved_yaml.get("name", ""))
-    email    = c1.text_input("Email *",      saved_yaml.get("email", ""))
-    phone    = c2.text_input("Phone",        saved_yaml.get("phone", ""))
-    linkedin = c2.text_input("LinkedIn URL", saved_yaml.get("linkedin", ""))
-
-    # Career summary with optional LLM generation
-    summary_default = st.session_state.get("_gen_result_career_summary") or saved_yaml.get("career_summary", "")
-    summary = st.text_area(
-        "Career Summary *", value=summary_default, height=120,
-        placeholder="Experienced professional with X years in [field]. Specialise in [skills].",
-        help="Injected into cover letter and research prompts as your professional context.",
-    )
-
-    gen_result = _generation_widget(
-        section="career_summary",
-        label="Generate from resume",
-        tier=_tier,
-        feature_key="llm_career_summary",
-        input_data={"resume_text": saved_yaml.get("_raw_resume_text", "")},
-    )
-    if gen_result and gen_result != summary:
-        st.info(f"\u2728 Suggested summary \u2014 paste it above if it looks good:\n\n{gen_result}")
-
-    col_back, col_next = st.columns([1, 4])
-    if col_back.button("\u2190 Back", key="ident_back"):
-        st.session_state.wizard_step = 2
-        st.rerun()
-    if col_next.button("Next \u2192", type="primary", key="ident_next"):
-        errs = validate({"name": name, "email": email, "career_summary": summary})
-        if errs:
-            st.error("\n".join(errs))
-        else:
-            _save_yaml({
-                "name": name, "email": email, "phone": phone,
-                "linkedin": linkedin, "career_summary": summary,
-                "wizard_complete": False, "wizard_step": 3,
-            })
-            st.session_state.wizard_step = 4
-            st.rerun()
-
-
-# ── Step 4: Resume ─────────────────────────────────────────────────────────────
-elif step == 4:
    from app.wizard.step_resume import validate

-    st.subheader("Step 4 \u2014 Resume")
+    st.subheader("Step 3 \u2014 Resume")
    st.caption("Upload your resume for fast parsing, or build it section by section.")

-    tab_upload, tab_builder = st.tabs(["\U0001f4ce Upload", "\U0001f4dd Build manually"])
+    # Read LinkedIn import result before tabs render (spec: "at step render time")
+    _li_data = st.session_state.pop("_linkedin_extracted", None)
+    if _li_data:
+        st.session_state["_parsed_resume"] = _li_data
+
+    tab_upload, tab_builder, tab_linkedin = st.tabs([
+        "\U0001f4ce Upload", "\U0001f4dd Build Manually", "\U0001f517 LinkedIn"
+    ])

    with tab_upload:
        uploaded = st.file_uploader("Upload PDF, DOCX, or ODT", type=["pdf", "docx", "odt"])
@ -393,9 +366,13 @@ elif step == 4:
                input_data={"bullet_notes": all_bullets},
            )

+    with tab_linkedin:
+        from app.components.linkedin_import import render_linkedin_tab
+        render_linkedin_tab(config_dir=CONFIG_DIR, tier=_tier)
+
    col_back, col_next = st.columns([1, 4])
    if col_back.button("\u2190 Back", key="resume_back"):
-        st.session_state.wizard_step = 3
+        st.session_state.wizard_step = 2
        st.rerun()
    if col_next.button("Next \u2192", type="primary", key="resume_next"):
        parsed = st.session_state.get("_parsed_resume", {})
@ -407,19 +384,75 @@ elif step == 4:
        if errs:
            st.error("\n".join(errs))
        else:
-            resume_yaml_path = _ROOT / "config" / "plain_text_resume.yaml"
+            resume_yaml_path = CONFIG_DIR / "plain_text_resume.yaml"
            resume_yaml_path.parent.mkdir(parents=True, exist_ok=True)
            resume_data = {**parsed, "experience": experience} if parsed else {"experience": experience}
            resume_yaml_path.write_text(
                yaml.dump(resume_data, default_flow_style=False, allow_unicode=True)
            )
-            _save_yaml({"wizard_step": 4})
+            _save_yaml({"wizard_step": 3})
+            st.session_state.wizard_step = 4
+            st.rerun()
+
+
+# ── Step 4: Identity ───────────────────────────────────────────────────────────
+elif step == 4:
+    from app.wizard.step_identity import validate
+
+    st.subheader("Step 4 \u2014 Your Identity")
+    st.caption("Used in cover letter PDFs, LLM prompts, and the app header.")
+
+    c1, c2 = st.columns(2)
+    name     = c1.text_input("Full Name *",  saved_yaml.get("name", ""))
+    email    = c1.text_input("Email *",      saved_yaml.get("email", ""))
+    phone    = c2.text_input("Phone",        saved_yaml.get("phone", ""))
+    linkedin = c2.text_input("LinkedIn URL", saved_yaml.get("linkedin", ""))
+
+    # Career summary with optional LLM generation — resume text available now (step 3 ran first)
+    summary_default = st.session_state.get("_gen_result_career_summary") or saved_yaml.get("career_summary", "")
+    summary = st.text_area(
+        "Career Summary *", value=summary_default, height=120,
+        placeholder="Experienced professional with X years in [field]. Specialise in [skills].",
+        help="Injected into cover letter and research prompts as your professional context.",
+    )
+
+    gen_result = _generation_widget(
+        section="career_summary",
+        label="Generate from resume",
+        tier=_tier,
+        feature_key="llm_career_summary",
+        input_data={"resume_text": saved_yaml.get("_raw_resume_text", "")},
+    )
+    if gen_result and gen_result != summary:
+        st.info(f"\u2728 Suggested summary \u2014 paste it above if it looks good:\n\n{gen_result}")
+
+    col_back, col_next = st.columns([1, 4])
+    if col_back.button("\u2190 Back", key="ident_back"):
+        st.session_state.wizard_step = 3
+        st.rerun()
+    if col_next.button("Next \u2192", type="primary", key="ident_next"):
+        errs = validate({"name": name, "email": email, "career_summary": summary})
+        if errs:
+            st.error("\n".join(errs))
+        else:
+            _save_yaml({
+                "name": name, "email": email, "phone": phone,
+                "linkedin": linkedin, "career_summary": summary,
+                "wizard_complete": False, "wizard_step": 4,
+            })
            st.session_state.wizard_step = 5
            st.rerun()


 # ── Step 5: Inference ──────────────────────────────────────────────────────────
 elif step == 5:
+    from app.cloud_session import CLOUD_MODE as _CLOUD_MODE
+    if _CLOUD_MODE:
+        # Cloud deployment: inference is managed server-side; skip this step
+        _save_yaml({"wizard_step": 5})
+        st.session_state.wizard_step = 6
+        st.rerun()
+
    from app.wizard.step_inference import validate

    st.subheader("Step 5 \u2014 Inference & API Keys")
--- a/app/pages/2_Settings.py
+++ b/app/pages/2_Settings.py
@ -12,23 +12,24 @@ import yaml
 import os as _os

 from scripts.user_profile import UserProfile
-from app.cloud_session import resolve_session, get_db_path, CLOUD_MODE
-
-_USER_YAML = Path(__file__).parent.parent.parent / "config" / "user.yaml"
-_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
-_name = _profile.name if _profile else "Job Seeker"
+from app.cloud_session import resolve_session, get_db_path, get_config_dir, CLOUD_MODE

 resolve_session("peregrine")
 st.title("⚙️ Settings")

-CONFIG_DIR = Path(__file__).parent.parent.parent / "config"
+# Config paths — per-user directory in cloud mode, shared repo config/ locally
+CONFIG_DIR = get_config_dir()
 SEARCH_CFG = CONFIG_DIR / "search_profiles.yaml"
 BLOCKLIST_CFG = CONFIG_DIR / "blocklist.yaml"
 LLM_CFG = CONFIG_DIR / "llm.yaml"
 NOTION_CFG = CONFIG_DIR / "notion.yaml"
-RESUME_PATH = Path(__file__).parent.parent.parent / "config" / "plain_text_resume.yaml"
+RESUME_PATH = CONFIG_DIR / "plain_text_resume.yaml"
 KEYWORDS_CFG = CONFIG_DIR / "resume_keywords.yaml"

+_USER_YAML = CONFIG_DIR / "user.yaml"
+_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
+_name = _profile.name if _profile else "Peregrine User"
+
 def load_yaml(path: Path) -> dict:
    if path.exists():
        return yaml.safe_load(path.read_text()) or {}
@ -54,8 +55,9 @@ def _suggest_search_terms(current_titles, resume_path, blocklist=None, user_prof
 _show_finetune = bool(_profile and _profile.inference_profile in ("single-gpu", "dual-gpu"))

 USER_CFG = CONFIG_DIR / "user.yaml"
-SERVER_CFG = CONFIG_DIR / "server.yaml"
-SERVER_CFG_EXAMPLE = CONFIG_DIR / "server.yaml.example"
+# Server config is always repo-level — it controls the container, not the user
+SERVER_CFG = Path(__file__).parent.parent.parent / "config" / "server.yaml"
+SERVER_CFG_EXAMPLE = Path(__file__).parent.parent.parent / "config" / "server.yaml.example"

 _dev_mode = _os.getenv("DEV_MODE", "").lower() in ("true", "1", "yes")
 _u_for_dev = yaml.safe_load(USER_CFG.read_text()) or {} if USER_CFG.exists() else {}
@ -587,6 +589,23 @@ def _upload_resume_widget(key_prefix: str) -> None:
            )

 with tab_resume:
+    # ── LinkedIn import ───────────────────────────────────────────────────────
+    _li_data = st.session_state.pop("_linkedin_extracted", None)
+    if _li_data:
+        # Merge imported data into resume YAML — only bootstrap empty fields,
+        # never overwrite existing detail with sparse LinkedIn data
+        existing = load_yaml(RESUME_PATH)
+        existing.update({k: v for k, v in _li_data.items() if v and not existing.get(k)})
+        RESUME_PATH.parent.mkdir(parents=True, exist_ok=True)
+        save_yaml(RESUME_PATH, existing)
+        st.success("LinkedIn data applied to resume profile.")
+        st.rerun()
+
+    with st.expander("🔗 Import from LinkedIn", expanded=False):
+        from app.components.linkedin_import import render_linkedin_tab
+        _tab_tier = _profile.tier if _profile else "free"
+        render_linkedin_tab(config_dir=CONFIG_DIR, tier=_tab_tier)
+
    st.caption(
        f"Edit {_name}'s application profile. "
        "Bullets are used as paste-able shortcuts in the Apply Workspace."
@ -867,6 +886,14 @@ with tab_resume:
 with tab_system:
    st.caption("Infrastructure, LLM backends, integrations, and service connections.")

+    if CLOUD_MODE:
+        st.info(
+            "**Your instance is managed by CircuitForge.**\n\n"
+            "Infrastructure, LLM backends, and service settings are configured by the platform. "
+            "To change your plan or billing, visit your [account page](https://circuitforge.tech/account)."
+        )
+        st.stop()
+
    # ── File Paths & Inference ────────────────────────────────────────────────
    with st.expander("📁 File Paths & Inference Profile"):
        _su = _yaml_up.safe_load(USER_CFG.read_text()) or {} if USER_CFG.exists() else {}
@ -1464,6 +1491,13 @@ with tab_finetune:
 with tab_license:
    st.subheader("🔑 License")

+    if CLOUD_MODE:
+        _cloud_tier = st.session_state.get("cloud_tier", "free")
+        st.success(f"**{_cloud_tier.title()} tier** — managed via your CircuitForge account")
+        st.caption("Your plan is tied to your account and applied automatically.")
+        st.page_link("https://circuitforge.tech/account", label="Manage plan →", icon="🔗")
+        st.stop()
+
    from scripts.license import (
        verify_local as _verify_local,
        activate as _activate,
--- a/app/pages/4_Apply.py
+++ b/app/pages/4_Apply.py
@ -389,7 +389,7 @@ with col_tools:

                st.markdown("---")
        else:
-            st.warning("Resume YAML not found — check that AIHawk is cloned.")
+            st.warning("Resume profile not found — complete setup or upload a resume in Settings → Resume Profile.")

    # ── Application Q&A ───────────────────────────────────────────────────────
    with st.expander("💬 Answer Application Questions"):
--- a/docs/backlog.md
+++ b/docs/backlog.md
@ -2,6 +2,52 @@

 Unscheduled ideas and deferred features. Roughly grouped by area.

+See also: `circuitforge-plans/shared/2026-03-07-launch-checklist.md` for pre-launch blockers
+(legal docs, Stripe live keys, website deployment, demo DB ownership fix).
+
+---
+
+## Launch Blockers (tracked in shared launch checklist)
+
+- **ToS + Refund Policy** — required before live Stripe charges. Files go in `website/content/legal/`.
+- **Stripe live key rotation** — swap test keys to live in `website/.env` (zero code changes).
+- **Website deployment to bastion** — Caddy route for Nuxt frontend at `circuitforge.tech`.
+- **Demo DB ownership** — `demo/data/staging.db` is root-owned (Docker artifact); fix with `sudo chown alan:alan` then re-run `demo/seed_demo.py`.
+
+---
+
+## Post-Launch / Infrastructure
+
+- **Accessibility Statement** — WCAG 2.1 conformance doc at `website/content/legal/accessibility.md`. High credibility value for ND audience.
+- **Data deletion request process** — published procedure at `website/content/legal/data-deletion.md` (GDPR/CCPA; references `privacy@circuitforge.tech`).
+- **Uptime Kuma monitors** — 6 monitors need to be added manually (website, Heimdall, demo, Directus, Forgejo, Peregrine container health).
+- **Directus admin password rotation** — change from `changeme-set-via-ui-on-first-run` before website goes public.
+
+---
+
+## Discovery — Community Scraper Plugin System
+
+Design doc: `circuitforge-plans/peregrine/2026-03-07-community-scraper-plugin-design.md`
+
+**Summary:** Add a `scripts/plugins/` directory with auto-discovery and a documented MIT-licensed
+plugin API. Separates CF-built custom scrapers (paid, BSL 1.1, in `scripts/custom_boards/`) from
+community-contributed and CF-freebie scrapers (free, MIT, in `scripts/plugins/`).
+
+**Implementation tasks:**
+- [ ] Add `scripts/plugins/` with `__init__.py`, `README.md`, and `example_plugin.py`
+- [ ] Add `config/plugins/` directory with `.gitkeep`; gitignore `config/plugins/*.yaml` (not `.example`)
+- [ ] Update `discover.py`: `load_plugins()` auto-discovery + tier gate (`custom_boards` = paid, `plugins` = free)
+- [ ] Update `search_profiles.yaml` schema: add `plugins:` list + `plugin_config:` block
+- [ ] Migrate `scripts/custom_boards/craigslist.py` → `scripts/plugins/craigslist.py` (CF freebie)
+- [ ] Settings UI: render `CONFIG_SCHEMA` fields for installed plugins (Settings → Search)
+- [ ] Rewrite `docs/developer-guide/adding-scrapers.md` to document the plugin API
+- [ ] Add `scripts/plugins/LICENSE` (MIT) to make the dual-license split explicit
+
+**CF freebie candidates** (future, after plugin system ships):
+- Dice.com (tech-focused, no API key)
+- We Work Remotely (remote-only, clean HTML)
+- Wellfound / AngelList (startup roles)
+
 ---

 ## Settings / Data Management
--- a/scripts/linkedin_parser.py
+++ b/scripts/linkedin_parser.py
@ -0,0 +1,56 @@
+# scripts/linkedin_parser.py
+"""
+LinkedIn staging file reader.
+
+parse_stage(stage_path) reads an existing staging file and returns
+a structured dict. For url_scrape sources it re-runs the HTML parser
+so improvements to linkedin_utils take effect without a new scrape.
+"""
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from scripts.linkedin_utils import parse_html
+
+
+def parse_stage(stage_path: Path) -> tuple[dict, str]:
+    """
+    Read and return the extracted profile data from a staging file.
+
+    For url_scrape sources: re-runs parse_html on stored raw_html so
+    parser improvements are applied without re-scraping.
+
+    Returns (extracted_dict, error_string).
+    On any failure returns ({}, error_message).
+    """
+    if not stage_path.exists():
+        return {}, f"No staged data found at {stage_path}"
+
+    try:
+        data = json.loads(stage_path.read_text())
+    except Exception as e:
+        return {}, f"Could not read staging file: {e}"
+
+    source   = data.get("source")
+    raw_html = data.get("raw_html")
+
+    if source == "url_scrape" and raw_html:
+        # Re-run the parser — picks up any selector improvements
+        extracted = parse_html(raw_html)
+        # Preserve linkedin URL — parse_html always returns "" for this field
+        extracted["linkedin"] = extracted.get("linkedin") or data.get("url") or ""
+
+        # Write updated extracted back to staging file atomically
+        data["extracted"] = extracted
+        tmp = stage_path.with_suffix(".tmp")
+        tmp.write_text(json.dumps(data, ensure_ascii=False, indent=2))
+        tmp.rename(stage_path)
+
+        return extracted, ""
+
+    extracted = data.get("extracted")
+    if not extracted:
+        return {}, "Staging file has no extracted data"
+
+    return extracted, ""
--- a/scripts/linkedin_scraper.py
+++ b/scripts/linkedin_scraper.py
@ -0,0 +1,169 @@
+# scripts/linkedin_scraper.py
+"""
+LinkedIn profile scraper.
+
+Two entry points:
+  scrape_profile(url, stage_path)         — Playwright headless fetch
+  parse_export_zip(zip_bytes, stage_path) — LinkedIn data archive CSV parse
+
+Both write a staging file at stage_path and return the extracted dict.
+"""
+from __future__ import annotations
+
+import csv
+import io
+import json
+import re
+import zipfile
+from datetime import datetime, timezone
+from pathlib import Path
+
+from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout
+from scripts.linkedin_utils import parse_html
+
+_LINKEDIN_PROFILE_RE = re.compile(r"https?://(www\.)?linkedin\.com/in/", re.I)
+
+_CHROME_UA = (
+    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
+    "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
+)
+
+
+def _write_stage(stage_path: Path, payload: dict) -> None:
+    """Atomic write: write to .tmp then rename to avoid partial reads."""
+    tmp = stage_path.with_suffix(".tmp")
+    tmp.write_text(json.dumps(payload, ensure_ascii=False, indent=2))
+    tmp.rename(stage_path)
+
+
+def scrape_profile(url: str, stage_path: Path) -> dict:
+    """
+    Fetch a public LinkedIn profile via Playwright headless Chrome.
+
+    Raises ValueError if url is not a linkedin.com/in/ URL.
+    Raises RuntimeError on scrape failure (timeout, blocked, etc.).
+    Returns the extracted dict and writes the staging file.
+    """
+    if not _LINKEDIN_PROFILE_RE.match(url):
+        raise ValueError(
+            f"Expected a LinkedIn profile URL (linkedin.com/in/…), got: {url}"
+        )
+
+    try:
+        with sync_playwright() as pw:
+            browser = pw.chromium.launch(headless=True)
+            page = browser.new_page(user_agent=_CHROME_UA)
+            page.goto(url, timeout=30_000)
+            page.wait_for_selector(
+                "h1, section[data-section], #experience, #about",
+                timeout=20_000,
+            )
+            raw_html = page.content()
+            browser.close()
+    except PWTimeout:
+        raise RuntimeError(
+            "LinkedIn did not load in time — the request may have been blocked. "
+            "Try the data export option instead."
+        )
+    except Exception as e:
+        raise RuntimeError(f"LinkedIn scrape failed: {e}") from e
+
+    extracted = parse_html(raw_html)
+    extracted["linkedin"] = url
+
+    _write_stage(stage_path, {
+        "url":        url,
+        "scraped_at": datetime.now(timezone.utc).isoformat(),
+        "source":     "url_scrape",
+        "raw_html":   raw_html,
+        "extracted":  extracted,
+    })
+    return extracted
+
+
+def parse_export_zip(zip_bytes: bytes, stage_path: Path) -> dict:
+    """
+    Parse a LinkedIn data export archive.
+
+    zip_bytes: raw zip bytes — callers do: zip_bytes = uploaded_file.read()
+    Returns the extracted dict and writes the staging file.
+    Missing CSV files are skipped silently.
+    """
+    extracted: dict = {
+        "name": "", "email": "", "phone": "", "linkedin": "",
+        "career_summary": "",
+        "experience": [], "education": [], "skills": [], "achievements": [],
+    }
+
+    try:
+        with zipfile.ZipFile(io.BytesIO(zip_bytes)) as zf:
+            names_in_zip = {n.lower(): n for n in zf.namelist()}
+
+            def _read_csv(filename: str) -> list[dict]:
+                key = filename.lower()
+                if key not in names_in_zip:
+                    return []
+                text = zf.read(names_in_zip[key]).decode("utf-8-sig", errors="replace")
+                return list(csv.DictReader(io.StringIO(text)))
+
+            for row in _read_csv("Profile.csv"):
+                first = row.get("First Name", "").strip()
+                last  = row.get("Last Name", "").strip()
+                extracted["name"]           = f"{first} {last}".strip()
+                extracted["email"]          = row.get("Email Address", "").strip()
+                extracted["career_summary"] = row.get("Summary", "").strip()
+                break
+
+            for row in _read_csv("Position.csv"):
+                company    = row.get("Company Name", "").strip()
+                title      = row.get("Title", "").strip()
+                desc       = row.get("Description", "").strip()
+                start      = row.get("Started On", "").strip()
+                end        = row.get("Finished On", "").strip()
+                end_label  = end if end else ("Present" if start else "")
+                date_range = f"{start} – {end_label}".strip(" –") if (start or end) else ""
+                bullets    = [d.strip() for d in re.split(r"[.•\n]+", desc) if d.strip() and len(d.strip()) > 3]
+                if company or title:
+                    extracted["experience"].append({
+                        "company":    company,
+                        "title":      title,
+                        "date_range": date_range,
+                        "bullets":    bullets,
+                    })
+
+            for row in _read_csv("Education.csv"):
+                school = row.get("School Name", "").strip()
+                degree = row.get("Degree Name", "").strip()
+                field  = row.get("Field Of Study", "").strip()
+                start  = row.get("Start Date", "").strip()
+                end    = row.get("End Date", "").strip()
+                dates  = f"{start} – {end}".strip(" –") if start or end else ""
+                if school or degree:
+                    extracted["education"].append({
+                        "school": school,
+                        "degree": degree,
+                        "field":  field,
+                        "dates":  dates,
+                    })
+
+            for row in _read_csv("Skills.csv"):
+                skill = row.get("Name", "").strip()
+                if skill:
+                    extracted["skills"].append(skill)
+
+            for row in _read_csv("Certifications.csv"):
+                name = row.get("Name", "").strip()
+                if name:
+                    extracted["achievements"].append(name)
+
+    except zipfile.BadZipFile as e:
+        raise ValueError(f"Not a valid zip file: {e}")
+
+    _write_stage(stage_path, {
+        "url":        None,
+        "scraped_at": datetime.now(timezone.utc).isoformat(),
+        "source":     "export_zip",
+        "raw_html":   None,
+        "extracted":  extracted,
+    })
+    return extracted
--- a/scripts/linkedin_utils.py
+++ b/scripts/linkedin_utils.py
@ -0,0 +1,194 @@
+# scripts/linkedin_utils.py
+"""
+LinkedIn profile HTML parser.
+
+Extracts structured profile data from a raw LinkedIn public profile page.
+No Playwright dependency — importable by both linkedin_scraper and linkedin_parser.
+
+Selectors target the 2024-2025 LinkedIn public profile DOM.
+When LinkedIn changes their markup, update the selector lists here only.
+Each section uses ordered fallbacks — first matching selector wins.
+"""
+from __future__ import annotations
+import re
+from bs4 import BeautifulSoup
+
+
+# ── Selector fallback lists ────────────────────────────────────────────────────
+
+_NAME_SELECTORS = [
+    "h1.top-card-layout__title",
+    "h1[class*='title']",
+    ".pv-top-card--list h1",
+    "h1",
+]
+
+_SUMMARY_SELECTORS = [
+    "section[data-section='about'] .show-more-less-text__text--less",
+    "section[data-section='about'] p",
+    "#about ~ * p.show-more-less-text__text--less",
+    ".pv-about-section p",
+]
+
+_EXPERIENCE_ITEM_SELECTORS = [
+    "section[data-section='experience'] li.experience-item",
+    "section[data-section='experience'] li",
+    "#experience-section li",
+    "#experience ~ * li",
+]
+
+_EXP_TITLE_SELECTORS   = ["span.experience-item__title", "span[class*='title']", "h3"]
+_EXP_COMPANY_SELECTORS = ["span.experience-item__subtitle", "span[class*='subtitle']", "p[class*='company']"]
+_EXP_DATE_SELECTORS    = ["span.date-range", "[class*='date-range']", "span[class*='duration']"]
+_EXP_DESC_SELECTORS    = [".show-more-less-text__text--less", "p[class*='description']", "p"]
+
+_EDUCATION_ITEM_SELECTORS = [
+    "section[data-section='education'] li.education__list-item",
+    "section[data-section='education'] li",
+    "#education ~ * li",
+]
+
+_EDU_SCHOOL_SELECTORS = ["h3.education__school-name", "h3[class*='school']", "h3"]
+_EDU_DEGREE_SELECTORS = ["span.education__item--degree-name", "span[class*='degree']", "p[class*='degree']"]
+_EDU_DATES_SELECTORS  = ["span.education__item--duration", "span[class*='duration']", "time"]
+
+_SKILLS_SELECTORS = [
+    "section[data-section='skills'] span.mr1",
+    "section[data-section='skills'] li span[class*='bold']",
+    "section[data-section='skills'] li span",
+    "#skills ~ * li span",
+]
+
+_CERT_ITEM_SELECTORS = [
+    "section[data-section='certifications'] li",
+    "#certifications ~ * li",
+    "#licenses_and_certifications ~ * li",
+]
+_CERT_NAME_SELECTORS = ["h3.certifications__name", "h3[class*='name']", "h3", "span[class*='title']"]
+
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+def _select_first(soup, selectors):
+    for sel in selectors:
+        try:
+            el = soup.select_one(sel)
+            if el and el.get_text(strip=True):
+                return el.get_text(strip=True)
+        except Exception:
+            continue
+    return ""
+
+
+def _select_all(soup, selectors):
+    for sel in selectors:
+        try:
+            els = soup.select(sel)
+            if els:
+                return els
+        except Exception:
+            continue
+    return []
+
+
+def _split_bullets(text):
+    parts = re.split(r"[•·]\s*|(?<=\s)–\s+|\n+", text)
+    return [p.strip() for p in parts if p.strip() and len(p.strip()) > 3]
+
+
+def _date_range_text(item):
+    for sel in _EXP_DATE_SELECTORS:
+        try:
+            el = item.select_one(sel)
+            if el:
+                times = [t.get_text(strip=True) for t in el.find_all("time")]
+                if times:
+                    return " – ".join(times)
+                text = el.get_text(strip=True)
+                if text:
+                    return text
+        except Exception:
+            continue
+    return ""
+
+
+# ── Public API ────────────────────────────────────────────────────────────────
+
+def parse_html(raw_html: str) -> dict:
+    """
+    Extract structured profile data from a raw LinkedIn public profile HTML page.
+
+    Returns a dict with keys: name, email, phone, linkedin, career_summary,
+    experience[], education[], skills[], achievements[]
+
+    Never raises — returns empty values for sections that cannot be parsed.
+    """
+    soup = BeautifulSoup(raw_html, "lxml")
+
+    name = _select_first(soup, _NAME_SELECTORS)
+    career_summary = _select_first(soup, _SUMMARY_SELECTORS)
+
+    experience = []
+    for item in _select_all(soup, _EXPERIENCE_ITEM_SELECTORS):
+        title   = _select_first(item, _EXP_TITLE_SELECTORS)
+        company = _select_first(item, _EXP_COMPANY_SELECTORS)
+        dates   = _date_range_text(item)
+        desc_el = None
+        for sel in _EXP_DESC_SELECTORS:
+            try:
+                desc_el = item.select_one(sel)
+                if desc_el:
+                    break
+            except Exception:
+                continue
+        bullets = _split_bullets(desc_el.get_text(" ", strip=True)) if desc_el else []
+        if title or company:
+            experience.append({
+                "company":    company,
+                "title":      title,
+                "date_range": dates,
+                "bullets":    bullets,
+            })
+
+    education = []
+    for item in _select_all(soup, _EDUCATION_ITEM_SELECTORS):
+        school = _select_first(item, _EDU_SCHOOL_SELECTORS)
+        degree = _select_first(item, _EDU_DEGREE_SELECTORS)
+        dates  = ""
+        for sel in _EDU_DATES_SELECTORS:
+            try:
+                el = item.select_one(sel)
+                if el:
+                    dates = el.get_text(strip=True)
+                    break
+            except Exception:
+                continue
+        if school or degree:
+            education.append({
+                "school": school,
+                "degree": degree,
+                "field":  "",
+                "dates":  dates,
+            })
+
+    skills = [el.get_text(strip=True) for el in _select_all(soup, _SKILLS_SELECTORS)
+              if el.get_text(strip=True)]
+    skills = list(dict.fromkeys(skills))
+
+    achievements = []
+    for item in _select_all(soup, _CERT_ITEM_SELECTORS):
+        label = _select_first(item, _CERT_NAME_SELECTORS)
+        if label:
+            achievements.append(label)
+
+    return {
+        "name":           name,
+        "email":          "",
+        "phone":          "",
+        "linkedin":       "",
+        "career_summary": career_summary,
+        "experience":     experience,
+        "education":      education,
+        "skills":         skills,
+        "achievements":   achievements,
+    }
--- a/scripts/migrate.py
+++ b/scripts/migrate.py
@ -83,10 +83,10 @@ def _extract_career_summary(source: Path) -> str:


 def _extract_personal_info(source: Path) -> dict:
-    """Extract personal info from aihawk resume yaml."""
+    """Extract personal info from resume yaml."""
    resume = source / "config" / "plain_text_resume.yaml"
    if not resume.exists():
-        resume = source / "aihawk" / "data_folder" / "plain_text_resume.yaml"
+        resume = source / "aihawk" / "data_folder" / "plain_text_resume.yaml"  # legacy path
    if not resume.exists():
        return {}
    data = _load_yaml(resume)
@ -196,7 +196,7 @@ def _copy_configs(source: Path, dest: Path, apply: bool) -> None:


 def _copy_aihawk_resume(source: Path, dest: Path, apply: bool) -> None:
-    print("\n── Copying AIHawk resume profile")
+    print("\n── Copying resume profile")
    src = source / "config" / "plain_text_resume.yaml"
    if not src.exists():
        src = source / "aihawk" / "data_folder" / "plain_text_resume.yaml"
--- a/tests/fixtures/linkedin_profile.html
+++ b/tests/fixtures/linkedin_profile.html
@ -0,0 +1,110 @@
+<!-- tests/fixtures/linkedin_profile.html -->
+<!DOCTYPE html>
+<html>
+<head><title>Alan Weinstock | LinkedIn</title></head>
+<body>
+  <!-- Name and headline -->
+  <div class="top-card-layout__entity-info">
+    <h1 class="top-card-layout__title">Alan Weinstock</h1>
+    <h2 class="top-card-layout__headline">Staff Engineer · Open to Work</h2>
+  </div>
+
+  <!-- About / Summary -->
+  <section data-section="about">
+    <div class="core-section-container__content">
+      <p class="show-more-less-text__text--less">
+        Experienced engineer with 10 years in embedded systems and DevOps.
+        Passionate about open-source and accessibility tooling.
+      </p>
+    </div>
+  </section>
+
+  <!-- Experience -->
+  <section data-section="experience">
+    <ul>
+      <li class="experience-item">
+        <div class="experience-item__info">
+          <span class="experience-item__title">Staff Engineer</span>
+          <span class="experience-item__subtitle">Acme Corp</span>
+          <span class="experience-item__duration">
+            <span class="date-range">
+              <time>Jan 2022</time>
+              <time>Present</time>
+            </span>
+          </span>
+        </div>
+        <div class="experience-item__description">
+          <p class="show-more-less-text__text--less">
+            Led migration of monolith to microservices. &bull;
+            Reduced p99 latency by 40%. &bull;
+            Mentored three junior engineers.
+          </p>
+        </div>
+      </li>
+      <li class="experience-item">
+        <div class="experience-item__info">
+          <span class="experience-item__title">Senior Engineer</span>
+          <span class="experience-item__subtitle">Beta Industries</span>
+          <span class="experience-item__duration">
+            <span class="date-range">
+              <time>Mar 2019</time>
+              <time>Dec 2021</time>
+            </span>
+          </span>
+        </div>
+        <div class="experience-item__description">
+          <p class="show-more-less-text__text--less">
+            Designed CI/CD pipeline. &bull; Maintained Kubernetes clusters.
+          </p>
+        </div>
+      </li>
+    </ul>
+  </section>
+
+  <!-- Education -->
+  <section data-section="education">
+    <ul>
+      <li class="education__list-item">
+        <div class="education__item--degree-info">
+          <h3 class="education__school-name">State University</h3>
+          <span class="education__item--degree-name">B.S. Computer Science</span>
+          <span class="education__item--duration">2010 – 2014</span>
+        </div>
+      </li>
+    </ul>
+  </section>
+
+  <!-- Skills -->
+  <section data-section="skills">
+    <ul>
+      <li class="skills-section__list-item">
+        <div class="skills-section__skill">
+          <span class="mr1 t-bold">Python</span>
+        </div>
+      </li>
+      <li class="skills-section__list-item">
+        <div class="skills-section__skill">
+          <span class="mr1 t-bold">Kubernetes</span>
+        </div>
+      </li>
+      <li class="skills-section__list-item">
+        <div class="skills-section__skill">
+          <span class="mr1 t-bold">PostgreSQL</span>
+        </div>
+      </li>
+    </ul>
+  </section>
+
+  <!-- Certifications -->
+  <section data-section="certifications">
+    <ul>
+      <li class="certifications__list-item">
+        <h3 class="certifications__name">AWS Solutions Architect – Associate</h3>
+      </li>
+      <li class="certifications__list-item">
+        <h3 class="certifications__name">CKA: Certified Kubernetes Administrator</h3>
+      </li>
+    </ul>
+  </section>
+</body>
+</html>
--- a/tests/test_linkedin_parser.py
+++ b/tests/test_linkedin_parser.py
@ -0,0 +1,96 @@
+# tests/test_linkedin_parser.py
+import json
+import sys
+import tempfile
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+FIXTURE_HTML = (Path(__file__).parent / "fixtures" / "linkedin_profile.html").read_text()
+
+
+def _write_url_stage(path: Path) -> None:
+    """Write a minimal url_scrape staging file with intentionally stale extracted data."""
+    path.write_text(json.dumps({
+        "url": "https://linkedin.com/in/alanw",
+        "scraped_at": "2026-03-12T14:30:00+00:00",
+        "source": "url_scrape",
+        "raw_html": FIXTURE_HTML,
+        "extracted": {
+            "name": "Alan Weinstock (stale)",   # stale — re-parse should update this
+            "career_summary": "",
+            "experience": [], "education": [], "skills": [], "achievements": [],
+            "email": "", "phone": "", "linkedin": "",
+        },
+    }))
+
+
+def _write_zip_stage(path: Path) -> None:
+    """Write a minimal export_zip staging file (no raw_html)."""
+    path.write_text(json.dumps({
+        "url": None,
+        "scraped_at": "2026-03-12T14:30:00+00:00",
+        "source": "export_zip",
+        "raw_html": None,
+        "extracted": {
+            "name": "Alan Weinstock",
+            "career_summary": "Engineer",
+            "experience": [{"company": "Acme", "title": "SE", "date_range": "", "bullets": []}],
+            "education": [], "skills": ["Python"], "achievements": [],
+            "email": "alan@example.com", "phone": "", "linkedin": "",
+        },
+    }))
+
+
+def test_parse_stage_reruns_parser_on_url_scrape():
+    """parse_stage re-runs parse_html from raw_html, ignoring stale extracted data."""
+    from scripts.linkedin_parser import parse_stage
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        _write_url_stage(stage)
+        result, err = parse_stage(stage)
+    assert err == ""
+    assert result["name"] == "Alan Weinstock"   # fresh parse, not "(stale)"
+    assert len(result["experience"]) == 2
+
+
+def test_parse_stage_returns_stored_data_for_zip():
+    """parse_stage returns stored extracted dict for export_zip (no raw_html to re-parse)."""
+    from scripts.linkedin_parser import parse_stage
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        _write_zip_stage(stage)
+        result, err = parse_stage(stage)
+    assert err == ""
+    assert result["name"] == "Alan Weinstock"
+    assert result["email"] == "alan@example.com"
+    assert "Python" in result["skills"]
+
+
+def test_parse_stage_missing_file_returns_error():
+    from scripts.linkedin_parser import parse_stage
+    result, err = parse_stage(Path("/nonexistent/stage.json"))
+    assert result == {}
+    assert err != ""
+
+
+def test_parse_stage_corrupted_file_returns_error():
+    from scripts.linkedin_parser import parse_stage
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        stage.write_text("not valid json {{{{")
+        result, err = parse_stage(stage)
+    assert result == {}
+    assert err != ""
+
+
+def test_parse_stage_updates_staging_file_after_reparse():
+    """After re-parsing, the staging file's extracted dict is updated."""
+    from scripts.linkedin_parser import parse_stage
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        _write_url_stage(stage)
+        parse_stage(stage)
+        updated = json.loads(stage.read_text())
+    assert updated["extracted"]["name"] == "Alan Weinstock"
+    assert len(updated["extracted"]["experience"]) == 2
--- a/tests/test_linkedin_scraper.py
+++ b/tests/test_linkedin_scraper.py
@ -0,0 +1,213 @@
+# tests/test_linkedin_scraper.py
+import io
+import json
+import sys
+import zipfile
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+import tempfile
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+
+def test_invalid_url_raises():
+    from scripts.linkedin_scraper import scrape_profile
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        try:
+            scrape_profile("https://linkedin.com/company/acme", stage)
+            assert False, "should have raised"
+        except ValueError as e:
+            assert "linkedin.com/in/" in str(e)
+
+
+def test_non_linkedin_url_raises():
+    from scripts.linkedin_scraper import scrape_profile
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        try:
+            scrape_profile("https://example.com/profile", stage)
+            assert False, "should have raised"
+        except ValueError:
+            pass
+
+
+def test_valid_linkedin_url_accepted():
+    from scripts.linkedin_scraper import scrape_profile
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        fixture_html = (Path(__file__).parent / "fixtures" / "linkedin_profile.html").read_text()
+
+        mock_page = MagicMock()
+        mock_page.content.return_value = fixture_html
+        mock_browser = MagicMock()
+        mock_browser.new_page.return_value = mock_page
+        mock_playwright = MagicMock()
+        mock_playwright.chromium.launch.return_value = mock_browser
+
+        with patch("scripts.linkedin_scraper.sync_playwright") as mock_sync_pw:
+            mock_sync_pw.return_value.__enter__ = MagicMock(return_value=mock_playwright)
+            mock_sync_pw.return_value.__exit__ = MagicMock(return_value=False)
+            result = scrape_profile("https://linkedin.com/in/alanw", stage)
+
+        assert result["name"] == "Alan Weinstock"
+        assert stage.exists()
+
+
+def test_scrape_profile_writes_staging_file():
+    from scripts.linkedin_scraper import scrape_profile
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        fixture_html = (Path(__file__).parent / "fixtures" / "linkedin_profile.html").read_text()
+
+        mock_page = MagicMock()
+        mock_page.content.return_value = fixture_html
+        mock_browser = MagicMock()
+        mock_browser.new_page.return_value = mock_page
+        mock_playwright = MagicMock()
+        mock_playwright.chromium.launch.return_value = mock_browser
+
+        with patch("scripts.linkedin_scraper.sync_playwright") as mock_sync_pw:
+            mock_sync_pw.return_value.__enter__ = MagicMock(return_value=mock_playwright)
+            mock_sync_pw.return_value.__exit__ = MagicMock(return_value=False)
+            scrape_profile("https://linkedin.com/in/alanw", stage)
+
+        data = json.loads(stage.read_text())
+        assert data["source"] == "url_scrape"
+        assert data["url"] == "https://linkedin.com/in/alanw"
+        assert "raw_html" in data
+        assert "extracted" in data
+        assert data["extracted"]["name"] == "Alan Weinstock"
+
+
+def _make_export_zip() -> bytes:
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, "w") as zf:
+        zf.writestr("Position.csv",
+            "Company Name,Title,Description,Started On,Finished On\n"
+            "Acme Corp,Staff Engineer,Led migration. Built CI/CD.,Jan 2022,\n"
+            "Beta Industries,Senior Engineer,Maintained clusters.,Mar 2019,Dec 2021\n"
+        )
+        zf.writestr("Education.csv",
+            "School Name,Degree Name,Field Of Study,Start Date,End Date\n"
+            "State University,Bachelor of Science,Computer Science,2010,2014\n"
+        )
+        zf.writestr("Skills.csv",
+            "Name,Description\n"
+            "Python,\n"
+            "Kubernetes,\n"
+        )
+        zf.writestr("Profile.csv",
+            "First Name,Last Name,Headline,Summary,Email Address\n"
+            "Alan,Weinstock,Staff Engineer,Experienced engineer.,alan@example.com\n"
+        )
+    return buf.getvalue()
+
+
+def test_parse_export_zip_experience():
+    from scripts.linkedin_scraper import parse_export_zip
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        result = parse_export_zip(_make_export_zip(), stage)
+    assert len(result["experience"]) == 2
+    assert result["experience"][0]["company"] == "Acme Corp"
+    assert result["experience"][0]["title"] == "Staff Engineer"
+
+
+def test_parse_export_zip_education():
+    from scripts.linkedin_scraper import parse_export_zip
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        result = parse_export_zip(_make_export_zip(), stage)
+    assert result["education"][0]["school"] == "State University"
+    assert result["education"][0]["field"] == "Computer Science"
+
+
+def test_parse_export_zip_skills():
+    from scripts.linkedin_scraper import parse_export_zip
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        result = parse_export_zip(_make_export_zip(), stage)
+    assert "Python" in result["skills"]
+
+
+def test_parse_export_zip_name_and_email():
+    from scripts.linkedin_scraper import parse_export_zip
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        result = parse_export_zip(_make_export_zip(), stage)
+    assert result["name"] == "Alan Weinstock"
+    assert result["email"] == "alan@example.com"
+
+
+def test_parse_export_zip_missing_csv_does_not_raise():
+    from scripts.linkedin_scraper import parse_export_zip
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, "w") as zf:
+        zf.writestr("Profile.csv",
+            "First Name,Last Name,Headline,Summary,Email Address\n"
+            "Alan,Weinstock,Engineer,Summary here.,alan@example.com\n"
+        )
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        result = parse_export_zip(buf.getvalue(), stage)
+    assert result["name"] == "Alan Weinstock"
+    assert result["experience"] == []
+
+
+def test_parse_export_zip_writes_staging_file():
+    from scripts.linkedin_scraper import parse_export_zip
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        parse_export_zip(_make_export_zip(), stage)
+        data = json.loads(stage.read_text())
+    assert data["source"] == "export_zip"
+    assert data["raw_html"] is None
+
+
+def test_scrape_profile_sets_linkedin_url():
+    from scripts.linkedin_scraper import scrape_profile
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        fixture_html = (Path(__file__).parent / "fixtures" / "linkedin_profile.html").read_text()
+        mock_page = MagicMock()
+        mock_page.content.return_value = fixture_html
+        mock_browser = MagicMock()
+        mock_browser.new_page.return_value = mock_page
+        mock_playwright = MagicMock()
+        mock_playwright.chromium.launch.return_value = mock_browser
+        with patch("scripts.linkedin_scraper.sync_playwright") as mock_sync_pw:
+            mock_sync_pw.return_value.__enter__ = MagicMock(return_value=mock_playwright)
+            mock_sync_pw.return_value.__exit__ = MagicMock(return_value=False)
+            result = scrape_profile("https://linkedin.com/in/alanw", stage)
+        assert result["linkedin"] == "https://linkedin.com/in/alanw"
+
+
+def test_parse_export_zip_bad_zip_raises():
+    from scripts.linkedin_scraper import parse_export_zip
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        try:
+            parse_export_zip(b"not a zip file at all", stage)
+            assert False, "should have raised"
+        except ValueError as e:
+            assert "zip" in str(e).lower()
+
+
+def test_parse_export_zip_current_job_shows_present():
+    """Empty Finished On renders as '– Present', not truncated."""
+    from scripts.linkedin_scraper import parse_export_zip
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, "w") as zf:
+        zf.writestr("Position.csv",
+            "Company Name,Title,Description,Started On,Finished On\n"
+            "Acme Corp,Staff Engineer,,Jan 2022,\n"
+        )
+        zf.writestr("Profile.csv",
+            "First Name,Last Name,Headline,Summary,Email Address\n"
+            "Alan,Weinstock,Engineer,,\n"
+        )
+    with tempfile.TemporaryDirectory() as tmp:
+        stage = Path(tmp) / "stage.json"
+        result = parse_export_zip(buf.getvalue(), stage)
+    assert result["experience"][0]["date_range"] == "Jan 2022 – Present"
--- a/tests/test_linkedin_utils.py
+++ b/tests/test_linkedin_utils.py
@ -0,0 +1,73 @@
+# tests/test_linkedin_utils.py
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+FIXTURE = (Path(__file__).parent / "fixtures" / "linkedin_profile.html").read_text()
+
+
+def test_parse_html_name():
+    from scripts.linkedin_utils import parse_html
+    result = parse_html(FIXTURE)
+    assert result["name"] == "Alan Weinstock"
+
+
+def test_parse_html_summary():
+    from scripts.linkedin_utils import parse_html
+    result = parse_html(FIXTURE)
+    assert "embedded systems" in result["career_summary"]
+
+
+def test_parse_html_experience_count():
+    from scripts.linkedin_utils import parse_html
+    result = parse_html(FIXTURE)
+    assert len(result["experience"]) == 2
+
+
+def test_parse_html_experience_fields():
+    from scripts.linkedin_utils import parse_html
+    result = parse_html(FIXTURE)
+    first = result["experience"][0]
+    assert first["company"] == "Acme Corp"
+    assert first["title"] == "Staff Engineer"
+    assert "Jan 2022" in first["date_range"]
+    assert len(first["bullets"]) >= 2
+    assert any("latency" in b for b in first["bullets"])
+
+
+def test_parse_html_education():
+    from scripts.linkedin_utils import parse_html
+    result = parse_html(FIXTURE)
+    assert len(result["education"]) == 1
+    edu = result["education"][0]
+    assert edu["school"] == "State University"
+    assert "Computer Science" in edu["degree"]
+
+
+def test_parse_html_skills():
+    from scripts.linkedin_utils import parse_html
+    result = parse_html(FIXTURE)
+    assert "Python" in result["skills"]
+    assert "Kubernetes" in result["skills"]
+
+
+def test_parse_html_achievements():
+    from scripts.linkedin_utils import parse_html
+    result = parse_html(FIXTURE)
+    assert any("AWS" in a for a in result["achievements"])
+
+
+def test_parse_html_missing_section_returns_empty():
+    """A profile with no skills section returns empty skills list, not an error."""
+    from scripts.linkedin_utils import parse_html
+    html_no_skills = FIXTURE.replace('data-section="skills"', 'data-section="hidden"')
+    result = parse_html(html_no_skills)
+    assert result["skills"] == []
+
+
+def test_parse_html_returns_all_keys():
+    from scripts.linkedin_utils import parse_html
+    result = parse_html(FIXTURE)
+    for key in ("name", "email", "phone", "linkedin", "career_summary",
+                "experience", "education", "skills", "achievements"):
+        assert key in result, f"Missing key: {key}"
Author	SHA1	Message	Date
pyr0ball	7d15980bdd	docs: update backlog with LinkedIn import follow-up items Some checks are pending CI / test (push) Waiting to run Details	2026-03-13 11:24:55 -07:00
pyr0ball	9603d591a3	fix(cloud): use per-user config dir for wizard gate; redirect on invalid session - app.py: wizard gate now reads get_config_dir()/user.yaml instead of hardcoded repo-level config/ — fixes perpetual onboarding loop in cloud mode where per-user wizard_complete was never seen - app.py: page title corrected to "Peregrine" - cloud_session.py: add get_config_dir() returning per-user config path in cloud mode, repo config/ locally - cloud_session.py: replace st.error() with JS redirect on missing/invalid session token so users land on login page instead of error screen - Home.py, 4_Apply.py, migrate.py: remove remaining AIHawk UI references	2026-03-13 11:24:42 -07:00
pyr0ball	f3617abb6b	fix(linkedin): conservative settings merge, mkdir guard, split dockerfile playwright layer	2026-03-13 10:58:58 -07:00
pyr0ball	6b59804d35	fix(linkedin): move session state pop before tabs; add rerun after settings merge - Pop _linkedin_extracted before st.tabs() so tab_builder sees the freshly populated _parsed_resume in the same render pass (no extra rerun needed) - Fix tab label capitalisation: "Build Manually" (capital M) per spec - Add st.rerun() after LinkedIn merge in Settings so form fields refresh immediately to show the newly applied data	2026-03-13 10:55:25 -07:00
pyr0ball	7b9e758861	feat(linkedin): install Playwright Chromium in Docker image	2026-03-13 10:44:03 -07:00
pyr0ball	070be6c2e9	feat(linkedin): add LinkedIn import expander to Settings Resume Profile tab	2026-03-13 10:44:02 -07:00
pyr0ball	083dff2ec8	feat(linkedin): add LinkedIn tab to wizard resume step	2026-03-13 10:43:53 -07:00
pyr0ball	ac1db1ea7f	feat(linkedin): add shared LinkedIn import Streamlit widget	2026-03-13 10:32:23 -07:00
pyr0ball	260d186c86	feat(linkedin): add staging file parser with re-parse support	2026-03-13 10:18:01 -07:00
pyr0ball	04d0a66f21	fix(linkedin): improve scraper error handling, current-job date range, add missing tests	2026-03-13 06:02:03 -07:00
pyr0ball	32ed451933	feat(linkedin): add scraper (Playwright + export zip) with URL validation	2026-03-13 01:06:39 -07:00
pyr0ball	6c61290218	feat(linkedin): add HTML parser utils with fixture tests	2026-03-13 01:01:05 -07:00