Some checks failed
CI / test (push) Has been cancelled
LinkedIn's unauthenticated public profile only exposes name, summary (truncated), current employer name, and certifications. Past roles, education, and skills are blurred server-side behind a login wall — not a scraper limitation. - Update selectors: data-section='summary' (was 'about'), .profile-section-card for certs, .visible-list for current experience entry - Strip login-wall noise injected into summary text after 'see more' - Skip aria-hidden blurred placeholder experience items - Add info callout in UI directing users to data export zip for full history
192 lines
7.7 KiB
Python
192 lines
7.7 KiB
Python
# app/components/linkedin_import.py
|
||
"""
|
||
Shared LinkedIn import widget.
|
||
|
||
Usage in a page:
|
||
from app.components.linkedin_import import render_linkedin_tab
|
||
|
||
# At top of page render — check for pending import:
|
||
_li_data = st.session_state.pop("_linkedin_extracted", None)
|
||
if _li_data:
|
||
st.session_state["_parsed_resume"] = _li_data
|
||
st.rerun()
|
||
|
||
# Inside the LinkedIn tab:
|
||
with tab_linkedin:
|
||
render_linkedin_tab(config_dir=CONFIG_DIR, tier=tier)
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import re
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
|
||
import streamlit as st
|
||
|
||
_LINKEDIN_PROFILE_RE = re.compile(r"https?://(www\.)?linkedin\.com/in/", re.I)
|
||
|
||
|
||
def _stage_path(config_dir: Path) -> Path:
|
||
return config_dir / "linkedin_stage.json"
|
||
|
||
|
||
def _load_stage(config_dir: Path) -> dict | None:
|
||
path = _stage_path(config_dir)
|
||
if not path.exists():
|
||
return None
|
||
try:
|
||
return json.loads(path.read_text())
|
||
except Exception:
|
||
return None
|
||
|
||
|
||
def _days_ago(iso_ts: str) -> str:
|
||
try:
|
||
dt = datetime.fromisoformat(iso_ts)
|
||
delta = datetime.now(timezone.utc) - dt
|
||
days = delta.days
|
||
if days == 0:
|
||
return "today"
|
||
if days == 1:
|
||
return "yesterday"
|
||
return f"{days} days ago"
|
||
except Exception:
|
||
return "unknown"
|
||
|
||
|
||
def _do_scrape(url: str, config_dir: Path) -> None:
|
||
"""Validate URL, run scrape, update state."""
|
||
if not _LINKEDIN_PROFILE_RE.match(url):
|
||
st.error("Please enter a LinkedIn profile URL (linkedin.com/in/…)")
|
||
return
|
||
|
||
with st.spinner("Fetching LinkedIn profile… (10–20 seconds)"):
|
||
try:
|
||
from scripts.linkedin_scraper import scrape_profile
|
||
scrape_profile(url, _stage_path(config_dir))
|
||
st.success("Profile imported successfully.")
|
||
st.rerun()
|
||
except ValueError as e:
|
||
st.error(str(e))
|
||
except RuntimeError as e:
|
||
st.warning(str(e))
|
||
except Exception as e:
|
||
st.error(f"Unexpected error: {e}")
|
||
|
||
|
||
def render_linkedin_tab(config_dir: Path, tier: str) -> None:
|
||
"""
|
||
Render the LinkedIn import UI.
|
||
|
||
When the user clicks "Use this data", writes the extracted dict to
|
||
st.session_state["_linkedin_extracted"] and calls st.rerun().
|
||
|
||
Caller reads: data = st.session_state.pop("_linkedin_extracted", None)
|
||
"""
|
||
stage = _load_stage(config_dir)
|
||
|
||
# ── Staged data status bar ────────────────────────────────────────────────
|
||
if stage:
|
||
scraped_at = stage.get("scraped_at", "")
|
||
source_label = "LinkedIn export" if stage.get("source") == "export_zip" else "LinkedIn profile"
|
||
col_info, col_refresh = st.columns([4, 1])
|
||
col_info.caption(f"Last imported from {source_label}: {_days_ago(scraped_at)}")
|
||
if col_refresh.button("🔄 Refresh", key="li_refresh"):
|
||
url = stage.get("url")
|
||
if url:
|
||
_do_scrape(url, config_dir)
|
||
else:
|
||
st.info("Original URL not available — paste the URL below to re-import.")
|
||
|
||
# ── URL import ────────────────────────────────────────────────────────────
|
||
st.markdown("**Import from LinkedIn profile URL**")
|
||
url_input = st.text_input(
|
||
"LinkedIn profile URL",
|
||
placeholder="https://linkedin.com/in/your-name",
|
||
label_visibility="collapsed",
|
||
key="li_url_input",
|
||
)
|
||
if st.button("🔗 Import from LinkedIn", key="li_import_btn", type="primary"):
|
||
if not url_input.strip():
|
||
st.warning("Please enter your LinkedIn profile URL.")
|
||
else:
|
||
_do_scrape(url_input.strip(), config_dir)
|
||
|
||
st.caption(
|
||
"Imports from your public LinkedIn profile. No login or credentials required. "
|
||
"Scraping typically takes 10–20 seconds."
|
||
)
|
||
st.info(
|
||
"**LinkedIn limits public profile data.** Without logging in, LinkedIn only "
|
||
"exposes your name, About summary, current employer, and certifications — "
|
||
"past roles, education, and skills are hidden behind their login wall. "
|
||
"For your full career history use the **data export zip** option below.",
|
||
icon="ℹ️",
|
||
)
|
||
|
||
# ── Section preview + use button ─────────────────────────────────────────
|
||
if stage:
|
||
from scripts.linkedin_parser import parse_stage
|
||
extracted, err = parse_stage(_stage_path(config_dir))
|
||
|
||
if err:
|
||
st.warning(f"Could not read staged data: {err}")
|
||
else:
|
||
st.divider()
|
||
st.markdown("**Preview**")
|
||
col1, col2, col3 = st.columns(3)
|
||
col1.metric("Experience entries", len(extracted.get("experience", [])))
|
||
col2.metric("Skills", len(extracted.get("skills", [])))
|
||
col3.metric("Certifications", len(extracted.get("achievements", [])))
|
||
|
||
if extracted.get("career_summary"):
|
||
with st.expander("Summary"):
|
||
st.write(extracted["career_summary"])
|
||
|
||
if extracted.get("experience"):
|
||
with st.expander(f"Experience ({len(extracted['experience'])} entries)"):
|
||
for exp in extracted["experience"]:
|
||
st.markdown(f"**{exp.get('title')}** @ {exp.get('company')} · {exp.get('date_range', '')}")
|
||
|
||
if extracted.get("education"):
|
||
with st.expander("Education"):
|
||
for edu in extracted["education"]:
|
||
st.markdown(f"**{edu.get('school')}** — {edu.get('degree')} {edu.get('field', '')}".strip())
|
||
|
||
if extracted.get("skills"):
|
||
with st.expander("Skills"):
|
||
st.write(", ".join(extracted["skills"]))
|
||
|
||
st.divider()
|
||
if st.button("✅ Use this data", key="li_use_btn", type="primary"):
|
||
st.session_state["_linkedin_extracted"] = extracted
|
||
st.rerun()
|
||
|
||
# ── Advanced: data export ─────────────────────────────────────────────────
|
||
with st.expander("⬇️ Import from LinkedIn data export (advanced)", expanded=False):
|
||
st.caption(
|
||
"Download your LinkedIn data: **Settings & Privacy → Data Privacy → "
|
||
"Get a copy of your data → Request archive → Fast file**. "
|
||
"The Fast file is available immediately and contains your profile, "
|
||
"experience, education, and skills."
|
||
)
|
||
zip_file = st.file_uploader(
|
||
"Upload LinkedIn export zip", type=["zip"], key="li_zip_upload"
|
||
)
|
||
if zip_file is not None:
|
||
if st.button("📦 Parse export", key="li_parse_zip"):
|
||
with st.spinner("Parsing export archive…"):
|
||
try:
|
||
from scripts.linkedin_scraper import parse_export_zip
|
||
extracted = parse_export_zip(
|
||
zip_file.read(), _stage_path(config_dir)
|
||
)
|
||
st.success(
|
||
f"Imported {len(extracted.get('experience', []))} experience entries, "
|
||
f"{len(extracted.get('skills', []))} skills. "
|
||
"Click 'Use this data' above to apply."
|
||
)
|
||
st.rerun()
|
||
except Exception as e:
|
||
st.error(f"Failed to parse export: {e}")
|