feat: extract hard-coded personal references from all scripts via UserProfile
Replace hard-coded paths (/Library/Documents/JobSearch), names (Alex Rivera), NDA sets (_NDA_COMPANIES), and the scraper path with UserProfile-driven lookups. Update tests to be profile-agnostic (no user.yaml in peregrine config dir).
This commit is contained in:
parent
7380deb021
commit
9dc0244546
7 changed files with 124 additions and 90 deletions
|
|
@ -3,13 +3,13 @@
|
|||
Pre-interview company research generator.
|
||||
|
||||
Three-phase approach:
|
||||
1. If SearXNG is available (port 8888), use companyScraper.py to fetch live
|
||||
1. If SearXNG is available, use companyScraper.py to fetch live
|
||||
data: CEO name, HQ address, LinkedIn, contact info.
|
||||
1b. Use Phase 1 data (company name + CEO if found) to query SearXNG for
|
||||
recent news snippets (funding, launches, leadership changes, etc.).
|
||||
2. Feed all real data into an LLM prompt to synthesise a structured brief
|
||||
covering company overview, leadership, recent developments, and talking
|
||||
points tailored to Alex.
|
||||
points tailored to the candidate.
|
||||
|
||||
Falls back to pure LLM knowledge when SearXNG is offline.
|
||||
|
||||
|
|
@ -24,25 +24,32 @@ from types import SimpleNamespace
|
|||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
# ── SearXNG scraper integration ───────────────────────────────────────────────
|
||||
_SCRAPER_DIR = Path("/Library/Development/scrapers")
|
||||
_SCRAPER_AVAILABLE = False
|
||||
from scripts.user_profile import UserProfile
|
||||
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
||||
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||
|
||||
if _SCRAPER_DIR.exists():
|
||||
sys.path.insert(0, str(_SCRAPER_DIR))
|
||||
# ── SearXNG scraper integration ───────────────────────────────────────────────
|
||||
# companyScraper is bundled into the Docker image at /app/scrapers/
|
||||
_SCRAPER_AVAILABLE = False
|
||||
for _scraper_candidate in [
|
||||
Path("/app/scrapers"), # Docker container path
|
||||
Path(__file__).parent.parent / "scrapers", # local dev fallback
|
||||
]:
|
||||
if _scraper_candidate.exists():
|
||||
sys.path.insert(0, str(_scraper_candidate))
|
||||
try:
|
||||
from companyScraper import EnhancedCompanyScraper, Config as _ScraperConfig
|
||||
_SCRAPER_AVAILABLE = True
|
||||
except (ImportError, SystemExit):
|
||||
# companyScraper calls sys.exit(1) if bs4/fake-useragent aren't installed
|
||||
pass
|
||||
break
|
||||
|
||||
|
||||
def _searxng_running() -> bool:
|
||||
def _searxng_running(searxng_url: str = "http://localhost:8888") -> bool:
|
||||
"""Quick check whether SearXNG is reachable."""
|
||||
try:
|
||||
import requests
|
||||
r = requests.get("http://localhost:8888/", timeout=3)
|
||||
r = requests.get(f"{searxng_url}/", timeout=3)
|
||||
return r.status_code == 200
|
||||
except Exception:
|
||||
return False
|
||||
|
|
@ -186,9 +193,13 @@ def _parse_sections(text: str) -> dict[str, str]:
|
|||
_RESUME_YAML = Path(__file__).parent.parent / "aihawk" / "data_folder" / "plain_text_resume.yaml"
|
||||
_KEYWORDS_YAML = Path(__file__).parent.parent / "config" / "resume_keywords.yaml"
|
||||
|
||||
# Companies where Alex has an NDA — reference as generic label unless
|
||||
# the role is security-focused (score >= 3 matching JD keywords).
|
||||
_NDA_COMPANIES = {"upguard"}
|
||||
|
||||
def _company_label(exp: dict) -> str:
|
||||
company = exp.get("company", "")
|
||||
score = exp.get("score", 0)
|
||||
if _profile:
|
||||
return _profile.nda_label(company, score)
|
||||
return company
|
||||
|
||||
|
||||
def _score_experiences(experiences: list[dict], keywords: list[str], jd: str) -> list[dict]:
|
||||
|
|
@ -214,8 +225,7 @@ def _build_resume_context(resume: dict, keywords: list[str], jd: str) -> str:
|
|||
"""
|
||||
Build the resume section of the LLM context block.
|
||||
Top 2 scored experiences included in full detail; rest as one-liners.
|
||||
Applies UpGuard NDA rule: reference as 'enterprise security vendor (NDA)'
|
||||
unless the role is security-focused (score >= 3).
|
||||
NDA companies are masked via UserProfile.nda_label() when score < threshold.
|
||||
"""
|
||||
experiences = resume.get("experience_details", [])
|
||||
if not experiences:
|
||||
|
|
@ -225,11 +235,7 @@ def _build_resume_context(resume: dict, keywords: list[str], jd: str) -> str:
|
|||
top2 = scored[:2]
|
||||
rest = scored[2:]
|
||||
|
||||
def _company_label(exp: dict) -> str:
|
||||
company = exp.get("company", "")
|
||||
if company.lower() in _NDA_COMPANIES and exp.get("score", 0) < 3:
|
||||
return "enterprise security vendor (NDA)"
|
||||
return company
|
||||
candidate = _profile.name if _profile else "the candidate"
|
||||
|
||||
def _exp_header(exp: dict) -> str:
|
||||
return f"{exp.get('position', '')} @ {_company_label(exp)} ({exp.get('employment_period', '')})"
|
||||
|
|
@ -238,14 +244,14 @@ def _build_resume_context(resume: dict, keywords: list[str], jd: str) -> str:
|
|||
bullets = [v for resp in exp.get("key_responsibilities", []) for v in resp.values()]
|
||||
return "\n".join(f" - {b}" for b in bullets)
|
||||
|
||||
lines = ["## Alex's Matched Experience"]
|
||||
lines = [f"## {candidate}'s Matched Experience"]
|
||||
for exp in top2:
|
||||
lines.append(f"\n**{_exp_header(exp)}** (match score: {exp['score']})")
|
||||
lines.append(_exp_bullets(exp))
|
||||
|
||||
if rest:
|
||||
condensed = ", ".join(_exp_header(e) for e in rest)
|
||||
lines.append(f"\nAlso in Alex's background: {condensed}")
|
||||
lines.append(f"\nAlso in {candidate}'s background: {condensed}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
|
@ -359,7 +365,10 @@ def research_company(job: dict, use_scraper: bool = True, on_stage=None) -> dict
|
|||
|
||||
# ── Phase 2: LLM synthesis ────────────────────────────────────────────────
|
||||
_stage("Generating brief with LLM… (30–90 seconds)")
|
||||
prompt = f"""You are preparing Alex Rivera for a job interview.
|
||||
name = _profile.name if _profile else "the candidate"
|
||||
career_summary = _profile.career_summary if _profile else ""
|
||||
prompt = f"""You are preparing {name} for a job interview.
|
||||
{f"Candidate background: {career_summary}" if career_summary else ""}
|
||||
|
||||
Role: **{title}** at **{company}**
|
||||
|
||||
|
|
@ -404,12 +413,12 @@ Assess {company}'s commitment to disability inclusion and accessibility. Cover:
|
|||
- Any public disability/accessibility advocacy, partnerships, or certifications
|
||||
- Glassdoor or press signals about how employees with disabilities experience the company
|
||||
If no specific signals are found, say so clearly — absence of public commitment is itself signal.
|
||||
This section is for Alex's personal decision-making only and will not appear in any application.
|
||||
This section is for the candidate's personal decision-making only and will not appear in any application.
|
||||
|
||||
## Talking Points for Alex
|
||||
## Talking Points for {name}
|
||||
Five specific talking points for the phone screen. Each must:
|
||||
- Reference a concrete experience from Alex's matched background by name
|
||||
(UpGuard NDA rule: say "enterprise security vendor" unless the role has a clear security/compliance focus)
|
||||
- Reference a concrete experience from {name}'s matched background by name
|
||||
(NDA rule: use the masked label shown in the matched experience section for any NDA-protected employer)
|
||||
- Connect to a specific signal from the JD or company context above
|
||||
- Be 1–2 sentences, ready to speak aloud
|
||||
- Never give generic advice
|
||||
|
|
@ -432,7 +441,7 @@ Five specific talking points for the phone screen. Each must:
|
|||
"competitors_brief": sections.get("Funding & Market Position", ""), # competitor landscape is in the funding section
|
||||
"red_flags": sections.get("Red Flags & Watch-outs", ""),
|
||||
"accessibility_brief": sections.get("Inclusion & Accessibility", ""),
|
||||
"talking_points": sections.get("Talking Points for Alex", ""),
|
||||
"talking_points": sections.get(f"Talking Points for {name}", ""),
|
||||
"scrape_used": scrape_used,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
# scripts/finetune_local.py
|
||||
"""
|
||||
Local LoRA fine-tune on Alex's cover letter corpus.
|
||||
Local LoRA fine-tune on the candidate's cover letter corpus.
|
||||
No HuggingFace account or internet required after the base model is cached.
|
||||
|
||||
Usage:
|
||||
|
|
@ -17,24 +17,32 @@ import os
|
|||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
# Limit CUDA to GPU 0. device_map={"":0} in FastLanguageModel.from_pretrained
|
||||
# pins every layer to GPU 0, avoiding the accelerate None-device bug that
|
||||
# occurs with device_map="auto" on multi-GPU machines with 4-bit quantisation.
|
||||
# Do NOT set WORLD_SIZE/RANK — that triggers torch.distributed initialisation.
|
||||
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "0")
|
||||
|
||||
from scripts.user_profile import UserProfile
|
||||
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
||||
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||
|
||||
# ── Config ────────────────────────────────────────────────────────────────────
|
||||
DEFAULT_MODEL = "unsloth/Llama-3.2-3B-Instruct" # safe on 8 GB VRAM
|
||||
LETTERS_JSONL = Path("/Library/Documents/JobSearch/training_data/cover_letters.jsonl")
|
||||
OUTPUT_DIR = Path("/Library/Documents/JobSearch/training_data/finetune_output")
|
||||
GGUF_DIR = Path("/Library/Documents/JobSearch/training_data/gguf")
|
||||
OLLAMA_NAME = "alex-cover-writer"
|
||||
|
||||
_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
||||
LETTERS_JSONL = _docs / "training_data" / "cover_letters.jsonl"
|
||||
OUTPUT_DIR = _docs / "training_data" / "finetune_output"
|
||||
GGUF_DIR = _docs / "training_data" / "gguf"
|
||||
OLLAMA_NAME = f"{_profile.name.split()[0].lower()}-cover-writer" if _profile else "cover-writer"
|
||||
|
||||
SYSTEM_PROMPT = (
|
||||
"You are Alex Rivera's personal cover letter writer. "
|
||||
"Write professional, warm, and results-focused cover letters in Alex's voice. "
|
||||
"Draw on her background in customer success, technical account management, "
|
||||
"and revenue operations. Be specific and avoid generic filler."
|
||||
f"You are {_profile.name}'s personal cover letter writer. "
|
||||
f"{_profile.career_summary}"
|
||||
if _profile else
|
||||
"You are a professional cover letter writer. Write in first person."
|
||||
)
|
||||
|
||||
# ── Args ──────────────────────────────────────────────────────────────────────
|
||||
|
|
@ -48,7 +56,7 @@ parser.add_argument("--max-length", type=int, default=1024, help="Max token leng
|
|||
args = parser.parse_args()
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f" Alex Cover Letter Fine-Tuner")
|
||||
print(f" Cover Letter Fine-Tuner [{OLLAMA_NAME}]")
|
||||
print(f" Base model : {args.model}")
|
||||
print(f" Epochs : {args.epochs}")
|
||||
print(f" LoRA rank : {args.rank}")
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# scripts/generate_cover_letter.py
|
||||
"""
|
||||
Generate a cover letter in Alex's voice using few-shot examples from her corpus.
|
||||
Generate a cover letter in the candidate's voice using few-shot examples from their corpus.
|
||||
|
||||
Usage:
|
||||
conda run -n job-seeker python scripts/generate_cover_letter.py \
|
||||
|
|
@ -16,30 +16,21 @@ import re
|
|||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
LETTERS_DIR = Path("/Library/Documents/JobSearch")
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from scripts.user_profile import UserProfile
|
||||
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
||||
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||
|
||||
LETTERS_DIR = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
||||
LETTER_GLOB = "*Cover Letter*.md"
|
||||
|
||||
# Background injected into every prompt so the model has Alex's facts
|
||||
SYSTEM_CONTEXT = """You are writing cover letters for Alex Rivera, a customer success leader.
|
||||
|
||||
Background:
|
||||
- 6+ years in customer success, technical account management, and CS leadership
|
||||
- Most recent role: led Americas Customer Success at UpGuard (cybersecurity SaaS), managing enterprise + Fortune 500 accounts, drove NPS consistently above 95
|
||||
- Also founder of M3 Consulting, a CS advisory practice for SaaS startups
|
||||
- Attended Texas State (2 yrs), CSU East Bay (1 yr); completed degree elsewhere
|
||||
- Based in San Francisco Bay Area; open to remote/hybrid
|
||||
- Pronouns: any
|
||||
|
||||
Voice guidelines:
|
||||
- Warm, confident, and specific — never generic
|
||||
- Opens with "I'm delighted/thrilled to apply for [role] at [company]."
|
||||
- 3–4 focused paragraphs, ~250–350 words total
|
||||
- Para 2: concrete experience (cite UpGuard and/or M3 Consulting with a specific metric)
|
||||
- Para 3: genuine connection to THIS company's mission/product
|
||||
- Closes with "Thank you for considering my application." + warm sign-off
|
||||
- Never use: "I am writing to express my interest", "passionate about making a difference",
|
||||
"I look forward to hearing from you", or any hollow filler phrases
|
||||
"""
|
||||
# Background injected into every prompt so the model has the candidate's facts
|
||||
SYSTEM_CONTEXT = (
|
||||
f"You are writing cover letters for {_profile.name}. {_profile.career_summary}"
|
||||
if _profile else
|
||||
"You are a professional cover letter writer. Write in first person."
|
||||
)
|
||||
|
||||
|
||||
# ── Mission-alignment detection ───────────────────────────────────────────────
|
||||
|
|
@ -69,21 +60,23 @@ _MISSION_SIGNALS: dict[str, list[str]] = {
|
|||
],
|
||||
}
|
||||
|
||||
_candidate = _profile.name if _profile else "the candidate"
|
||||
|
||||
_MISSION_NOTES: dict[str, str] = {
|
||||
"music": (
|
||||
"This company is in the music industry, which is one of Alex's genuinely "
|
||||
"ideal work environments — she has a real personal passion for the music scene. "
|
||||
f"This company is in the music industry, which is one of {_candidate}'s genuinely "
|
||||
"ideal work environments — they have a real personal passion for the music scene. "
|
||||
"Para 3 should warmly and specifically reflect this authentic alignment, not as "
|
||||
"a generic fan statement, but as an honest statement of where she'd love to apply "
|
||||
"her CS skills."
|
||||
"a generic fan statement, but as an honest statement of where they'd love to apply "
|
||||
"their CS skills."
|
||||
),
|
||||
"animal_welfare": (
|
||||
"This organization works in animal welfare/rescue — one of Alex's dream-job "
|
||||
f"This organization works in animal welfare/rescue — one of {_candidate}'s dream-job "
|
||||
"domains and a genuine personal passion. Para 3 should reflect this authentic "
|
||||
"connection warmly and specifically, tying her CS skills to this mission."
|
||||
"connection warmly and specifically, tying their CS skills to this mission."
|
||||
),
|
||||
"education": (
|
||||
"This company works in children's education or EdTech — one of Alex's ideal "
|
||||
f"This company works in children's education or EdTech — one of {_candidate}'s ideal "
|
||||
"work domains, reflecting genuine personal values around learning and young people. "
|
||||
"Para 3 should reflect this authentic connection specifically and warmly."
|
||||
),
|
||||
|
|
@ -138,7 +131,7 @@ def build_prompt(
|
|||
) -> str:
|
||||
parts = [SYSTEM_CONTEXT.strip(), ""]
|
||||
if examples:
|
||||
parts.append("=== STYLE EXAMPLES (Alex's past letters) ===\n")
|
||||
parts.append(f"=== STYLE EXAMPLES ({_candidate}'s past letters) ===\n")
|
||||
for i, ex in enumerate(examples, 1):
|
||||
parts.append(f"--- Example {i} ({ex['company']}) ---")
|
||||
parts.append(ex["text"])
|
||||
|
|
@ -183,7 +176,7 @@ def generate(title: str, company: str, description: str = "", _router=None) -> s
|
|||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Generate a cover letter in Alex's voice")
|
||||
parser = argparse.ArgumentParser(description=f"Generate a cover letter in {_candidate}'s voice")
|
||||
parser.add_argument("--title", help="Job title")
|
||||
parser.add_argument("--company", help="Company name")
|
||||
parser.add_argument("--description", default="", help="Job description text")
|
||||
|
|
|
|||
|
|
@ -18,8 +18,22 @@ import yaml
|
|||
from bs4 import BeautifulSoup
|
||||
from notion_client import Client
|
||||
|
||||
from scripts.user_profile import UserProfile
|
||||
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
||||
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||
|
||||
CONFIG_DIR = Path(__file__).parent.parent / "config"
|
||||
RESUME_PATH = Path("/Library/Documents/JobSearch/Alex_Rivera_Resume_02-19-2025.pdf")
|
||||
|
||||
|
||||
def _find_resume(docs_dir: Path) -> Path | None:
|
||||
"""Find the most recently modified PDF in docs_dir matching *resume* or *cv*."""
|
||||
candidates = list(docs_dir.glob("*[Rr]esume*.pdf")) + list(docs_dir.glob("*[Cc][Vv]*.pdf"))
|
||||
return max(candidates, key=lambda p: p.stat().st_mtime) if candidates else None
|
||||
|
||||
|
||||
RESUME_PATH = (
|
||||
_find_resume(_profile.docs_dir) if _profile else None
|
||||
) or Path(__file__).parent.parent / "config" / "resume.pdf"
|
||||
|
||||
|
||||
def load_notion() -> tuple[Client, dict]:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# scripts/prepare_training_data.py
|
||||
"""
|
||||
Extract training pairs from Alex's cover letter corpus for LoRA fine-tuning.
|
||||
Extract training pairs from the candidate's cover letter corpus for LoRA fine-tuning.
|
||||
|
||||
Outputs a JSONL file where each line is:
|
||||
{"instruction": "Write a cover letter for the [role] position at [company].",
|
||||
|
|
@ -16,10 +16,17 @@ import re
|
|||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
LETTERS_DIR = Path("/Library/Documents/JobSearch")
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from scripts.user_profile import UserProfile
|
||||
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
||||
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||
|
||||
_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
||||
LETTERS_DIR = _docs
|
||||
# Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter")
|
||||
LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]
|
||||
DEFAULT_OUTPUT = LETTERS_DIR / "training_data" / "cover_letters.jsonl"
|
||||
DEFAULT_OUTPUT = _docs / "training_data" / "cover_letters.jsonl"
|
||||
|
||||
# Patterns that appear in opening sentences to extract role
|
||||
ROLE_PATTERNS = [
|
||||
|
|
|
|||
|
|
@ -64,16 +64,22 @@ def test_build_resume_context_top2_in_full():
|
|||
def test_build_resume_context_rest_condensed():
|
||||
"""Remaining experiences appear as condensed one-liners, not full bullets."""
|
||||
ctx = _build_resume_context(RESUME, KEYWORDS, JD)
|
||||
assert "Also in Alex" in ctx
|
||||
assert "Also in" in ctx
|
||||
assert "Generic Co" in ctx
|
||||
# Generic Co bullets should NOT appear in full
|
||||
assert "Managed SMB portfolio" not in ctx
|
||||
|
||||
|
||||
def test_upguard_nda_low_score():
|
||||
"""UpGuard name replaced with 'enterprise security vendor' when score < 3."""
|
||||
"""UpGuard NDA rule: company masked when score < 3 and profile has NDA companies configured."""
|
||||
from scripts.company_research import _profile
|
||||
ctx = _build_resume_context(RESUME, ["python", "kubernetes"], "python kubernetes devops")
|
||||
assert "enterprise security vendor" in ctx
|
||||
if _profile and _profile.is_nda("upguard"):
|
||||
# Profile present with UpGuard NDA — company should be masked
|
||||
assert "UpGuard" not in ctx
|
||||
else:
|
||||
# No profile or UpGuard not in NDA list — company name appears directly
|
||||
assert "UpGuard" in ctx or "enterprise security vendor" in ctx or "previous employer" in ctx
|
||||
|
||||
|
||||
def test_load_resume_and_keywords_returns_lists():
|
||||
|
|
|
|||
|
|
@ -89,17 +89,14 @@ def test_find_similar_letters_returns_top_k():
|
|||
|
||||
|
||||
def test_load_corpus_returns_list():
|
||||
"""load_corpus returns a list (may be empty if LETTERS_DIR absent, must not crash)."""
|
||||
"""load_corpus returns a list (empty if LETTERS_DIR absent) without crashing."""
|
||||
from scripts.generate_cover_letter import load_corpus, LETTERS_DIR
|
||||
|
||||
if LETTERS_DIR.exists():
|
||||
corpus = load_corpus()
|
||||
assert isinstance(corpus, list)
|
||||
if corpus:
|
||||
assert "company" in corpus[0]
|
||||
assert "text" in corpus[0]
|
||||
else:
|
||||
pytest.skip("LETTERS_DIR not present in this environment")
|
||||
|
||||
|
||||
def test_generate_calls_llm_router():
|
||||
|
|
|
|||
Loading…
Reference in a new issue