feat: extract hard-coded personal references from all scripts via UserProfile

Replace hard-coded paths (/Library/Documents/JobSearch), names (Alex Rivera), NDA sets (_NDA_COMPANIES), and the scraper path with UserProfile-driven lookups. Update tests to be profile-agnostic (no user.yaml in peregrine config dir).
2026-02-24 18:45:39 -08:00 · 2026-02-24 18:45:39 -08:00 · 9dc0244546
commit 9dc0244546
parent 7380deb021
7 changed files with 124 additions and 90 deletions
--- a/scripts/company_research.py
+++ b/scripts/company_research.py
@ -3,13 +3,13 @@
 Pre-interview company research generator.
 Three-phase approach:
-  1. If SearXNG is available (port 8888), use companyScraper.py to fetch live
+  1. If SearXNG is available, use companyScraper.py to fetch live
     data: CEO name, HQ address, LinkedIn, contact info.
  1b. Use Phase 1 data (company name + CEO if found) to query SearXNG for
      recent news snippets (funding, launches, leadership changes, etc.).
  2. Feed all real data into an LLM prompt to synthesise a structured brief
     covering company overview, leadership, recent developments, and talking
-     points tailored to Alex.
+     points tailored to the candidate.
 Falls back to pure LLM knowledge when SearXNG is offline.
@ -24,25 +24,32 @@ from types import SimpleNamespace
 sys.path.insert(0, str(Path(__file__).parent.parent))
 from scripts.user_profile import UserProfile
 _USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
 _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
 # ── SearXNG scraper integration ───────────────────────────────────────────────
-_SCRAPER_DIR = Path("/Library/Development/scrapers")
+# companyScraper is bundled into the Docker image at /app/scrapers/
 _SCRAPER_AVAILABLE = False
-
+for _scraper_candidate in [
-if _SCRAPER_DIR.exists():
+    Path("/app/scrapers"),          # Docker container path
-    sys.path.insert(0, str(_SCRAPER_DIR))
+    Path(__file__).parent.parent / "scrapers",  # local dev fallback
-    try:
+]:
-        from companyScraper import EnhancedCompanyScraper, Config as _ScraperConfig
+    if _scraper_candidate.exists():
-        _SCRAPER_AVAILABLE = True
+        sys.path.insert(0, str(_scraper_candidate))
-    except (ImportError, SystemExit):
+        try:
-        # companyScraper calls sys.exit(1) if bs4/fake-useragent aren't installed
+            from companyScraper import EnhancedCompanyScraper, Config as _ScraperConfig
-        pass
+            _SCRAPER_AVAILABLE = True
        except (ImportError, SystemExit):
            pass
        break
-def _searxng_running() -> bool:
+def _searxng_running(searxng_url: str = "http://localhost:8888") -> bool:
    """Quick check whether SearXNG is reachable."""
    try:
        import requests
-        r = requests.get("http://localhost:8888/", timeout=3)
+        r = requests.get(f"{searxng_url}/", timeout=3)
        return r.status_code == 200
    except Exception:
        return False
@ -186,9 +193,13 @@ def _parse_sections(text: str) -> dict[str, str]:
 _RESUME_YAML = Path(__file__).parent.parent / "aihawk" / "data_folder" / "plain_text_resume.yaml"
 _KEYWORDS_YAML = Path(__file__).parent.parent / "config" / "resume_keywords.yaml"
-# Companies where Alex has an NDA — reference as generic label unless
+
-# the role is security-focused (score >= 3 matching JD keywords).
+def _company_label(exp: dict) -> str:
-_NDA_COMPANIES = {"upguard"}
+    company = exp.get("company", "")
    score = exp.get("score", 0)
    if _profile:
        return _profile.nda_label(company, score)
    return company
 def _score_experiences(experiences: list[dict], keywords: list[str], jd: str) -> list[dict]:
@ -214,8 +225,7 @@ def _build_resume_context(resume: dict, keywords: list[str], jd: str) -> str:
    """
    Build the resume section of the LLM context block.
    Top 2 scored experiences included in full detail; rest as one-liners.
-    Applies UpGuard NDA rule: reference as 'enterprise security vendor (NDA)'
+    NDA companies are masked via UserProfile.nda_label() when score < threshold.
    unless the role is security-focused (score >= 3).
    """
    experiences = resume.get("experience_details", [])
    if not experiences:
@ -225,11 +235,7 @@ def _build_resume_context(resume: dict, keywords: list[str], jd: str) -> str:
    top2 = scored[:2]
    rest = scored[2:]
-    def _company_label(exp: dict) -> str:
+    candidate = _profile.name if _profile else "the candidate"
        company = exp.get("company", "")
        if company.lower() in _NDA_COMPANIES and exp.get("score", 0) < 3:
            return "enterprise security vendor (NDA)"
        return company
    def _exp_header(exp: dict) -> str:
        return f"{exp.get('position', '')} @ {_company_label(exp)} ({exp.get('employment_period', '')})"
@ -238,14 +244,14 @@ def _build_resume_context(resume: dict, keywords: list[str], jd: str) -> str:
        bullets = [v for resp in exp.get("key_responsibilities", []) for v in resp.values()]
        return "\n".join(f"  - {b}" for b in bullets)
-    lines = ["## Alex's Matched Experience"]
+    lines = [f"## {candidate}'s Matched Experience"]
    for exp in top2:
        lines.append(f"\n**{_exp_header(exp)}** (match score: {exp['score']})")
        lines.append(_exp_bullets(exp))
    if rest:
        condensed = ", ".join(_exp_header(e) for e in rest)
-        lines.append(f"\nAlso in Alex's background: {condensed}")
+        lines.append(f"\nAlso in {candidate}'s background: {condensed}")
    return "\n".join(lines)
@ -359,7 +365,10 @@ def research_company(job: dict, use_scraper: bool = True, on_stage=None) -> dict
    # ── Phase 2: LLM synthesis ────────────────────────────────────────────────
    _stage("Generating brief with LLM… (30–90 seconds)")
-    prompt = f"""You are preparing Alex Rivera for a job interview.
+    name = _profile.name if _profile else "the candidate"
    career_summary = _profile.career_summary if _profile else ""
    prompt = f"""You are preparing {name} for a job interview.
 {f"Candidate background: {career_summary}" if career_summary else ""}
 Role: **{title}** at **{company}**
@ -404,12 +413,12 @@ Assess {company}'s commitment to disability inclusion and accessibility. Cover:
 - Any public disability/accessibility advocacy, partnerships, or certifications
 - Glassdoor or press signals about how employees with disabilities experience the company
 If no specific signals are found, say so clearly — absence of public commitment is itself signal.
-This section is for Alex's personal decision-making only and will not appear in any application.
+This section is for the candidate's personal decision-making only and will not appear in any application.
-## Talking Points for Alex
+## Talking Points for {name}
 Five specific talking points for the phone screen. Each must:
- Reference a concrete experience from Alex's matched background by name
+- Reference a concrete experience from {name}'s matched background by name
-  (UpGuard NDA rule: say "enterprise security vendor" unless the role has a clear security/compliance focus)
+  (NDA rule: use the masked label shown in the matched experience section for any NDA-protected employer)
 - Connect to a specific signal from the JD or company context above
 - Be 1–2 sentences, ready to speak aloud
 - Never give generic advice
@ -432,7 +441,7 @@ Five specific talking points for the phone screen. Each must:
        "competitors_brief": sections.get("Funding & Market Position", ""),  # competitor landscape is in the funding section
        "red_flags":         sections.get("Red Flags & Watch-outs", ""),
        "accessibility_brief": sections.get("Inclusion & Accessibility", ""),
-        "talking_points":    sections.get("Talking Points for Alex", ""),
+        "talking_points":    sections.get(f"Talking Points for {name}", ""),
        "scrape_used":       scrape_used,
    }
--- a/scripts/finetune_local.py
+++ b/scripts/finetune_local.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # scripts/finetune_local.py
 """
-Local LoRA fine-tune on Alex's cover letter corpus.
+Local LoRA fine-tune on the candidate's cover letter corpus.
 No HuggingFace account or internet required after the base model is cached.
 Usage:
@ -17,24 +17,32 @@ import os
 import sys
 from pathlib import Path
 sys.path.insert(0, str(Path(__file__).parent.parent))
 # Limit CUDA to GPU 0. device_map={"":0} in FastLanguageModel.from_pretrained
 # pins every layer to GPU 0, avoiding the accelerate None-device bug that
 # occurs with device_map="auto" on multi-GPU machines with 4-bit quantisation.
 # Do NOT set WORLD_SIZE/RANK — that triggers torch.distributed initialisation.
 os.environ.setdefault("CUDA_VISIBLE_DEVICES", "0")
 from scripts.user_profile import UserProfile
 _USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
 _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
 # ── Config ────────────────────────────────────────────────────────────────────
 DEFAULT_MODEL   = "unsloth/Llama-3.2-3B-Instruct"   # safe on 8 GB VRAM
-LETTERS_JSONL   = Path("/Library/Documents/JobSearch/training_data/cover_letters.jsonl")
+
-OUTPUT_DIR      = Path("/Library/Documents/JobSearch/training_data/finetune_output")
+_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
-GGUF_DIR        = Path("/Library/Documents/JobSearch/training_data/gguf")
+LETTERS_JSONL   = _docs / "training_data" / "cover_letters.jsonl"
-OLLAMA_NAME     = "alex-cover-writer"
+OUTPUT_DIR      = _docs / "training_data" / "finetune_output"
 GGUF_DIR        = _docs / "training_data" / "gguf"
 OLLAMA_NAME     = f"{_profile.name.split()[0].lower()}-cover-writer" if _profile else "cover-writer"
 SYSTEM_PROMPT = (
-    "You are Alex Rivera's personal cover letter writer. "
+    f"You are {_profile.name}'s personal cover letter writer. "
-    "Write professional, warm, and results-focused cover letters in Alex's voice. "
+    f"{_profile.career_summary}"
-    "Draw on her background in customer success, technical account management, "
+    if _profile else
-    "and revenue operations. Be specific and avoid generic filler."
+    "You are a professional cover letter writer. Write in first person."
 )
 # ── Args ──────────────────────────────────────────────────────────────────────
@ -48,7 +56,7 @@ parser.add_argument("--max-length", type=int, default=1024, help="Max token leng
 args = parser.parse_args()
 print(f"\n{'='*60}")
-print(f"  Alex Cover Letter Fine-Tuner")
+print(f"  Cover Letter Fine-Tuner  [{OLLAMA_NAME}]")
 print(f"  Base model : {args.model}")
 print(f"  Epochs     : {args.epochs}")
 print(f"  LoRA rank  : {args.rank}")
--- a/scripts/generate_cover_letter.py
+++ b/scripts/generate_cover_letter.py
@ -1,6 +1,6 @@
 # scripts/generate_cover_letter.py
 """
-Generate a cover letter in Alex's voice using few-shot examples from her corpus.
+Generate a cover letter in the candidate's voice using few-shot examples from their corpus.
 Usage:
    conda run -n job-seeker python scripts/generate_cover_letter.py \
@ -16,30 +16,21 @@ import re
 import sys
 from pathlib import Path
-LETTERS_DIR = Path("/Library/Documents/JobSearch")
+sys.path.insert(0, str(Path(__file__).parent.parent))
 from scripts.user_profile import UserProfile
 _USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
 _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
 LETTERS_DIR = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
 LETTER_GLOB = "*Cover Letter*.md"
-# Background injected into every prompt so the model has Alex's facts
+# Background injected into every prompt so the model has the candidate's facts
-SYSTEM_CONTEXT = """You are writing cover letters for Alex Rivera, a customer success leader.
+SYSTEM_CONTEXT = (
-
+    f"You are writing cover letters for {_profile.name}. {_profile.career_summary}"
-Background:
+    if _profile else
- 6+ years in customer success, technical account management, and CS leadership
+    "You are a professional cover letter writer. Write in first person."
- Most recent role: led Americas Customer Success at UpGuard (cybersecurity SaaS), managing enterprise + Fortune 500 accounts, drove NPS consistently above 95
+)
 - Also founder of M3 Consulting, a CS advisory practice for SaaS startups
 - Attended Texas State (2 yrs), CSU East Bay (1 yr); completed degree elsewhere
 - Based in San Francisco Bay Area; open to remote/hybrid
 - Pronouns: any
 Voice guidelines:
 - Warm, confident, and specific — never generic
 - Opens with "I'm delighted/thrilled to apply for [role] at [company]."
 - 3–4 focused paragraphs, ~250–350 words total
 - Para 2: concrete experience (cite UpGuard and/or M3 Consulting with a specific metric)
 - Para 3: genuine connection to THIS company's mission/product
 - Closes with "Thank you for considering my application." + warm sign-off
 - Never use: "I am writing to express my interest", "passionate about making a difference",
  "I look forward to hearing from you", or any hollow filler phrases
 """
 # ── Mission-alignment detection ───────────────────────────────────────────────
@ -69,21 +60,23 @@ _MISSION_SIGNALS: dict[str, list[str]] = {
    ],
 }
 _candidate = _profile.name if _profile else "the candidate"
 _MISSION_NOTES: dict[str, str] = {
    "music": (
-        "This company is in the music industry, which is one of Alex's genuinely "
+        f"This company is in the music industry, which is one of {_candidate}'s genuinely "
-        "ideal work environments — she has a real personal passion for the music scene. "
+        "ideal work environments — they have a real personal passion for the music scene. "
        "Para 3 should warmly and specifically reflect this authentic alignment, not as "
-        "a generic fan statement, but as an honest statement of where she'd love to apply "
+        "a generic fan statement, but as an honest statement of where they'd love to apply "
-        "her CS skills."
+        "their CS skills."
    ),
    "animal_welfare": (
-        "This organization works in animal welfare/rescue — one of Alex's dream-job "
+        f"This organization works in animal welfare/rescue — one of {_candidate}'s dream-job "
        "domains and a genuine personal passion. Para 3 should reflect this authentic "
-        "connection warmly and specifically, tying her CS skills to this mission."
+        "connection warmly and specifically, tying their CS skills to this mission."
    ),
    "education": (
-        "This company works in children's education or EdTech — one of Alex's ideal "
+        f"This company works in children's education or EdTech — one of {_candidate}'s ideal "
        "work domains, reflecting genuine personal values around learning and young people. "
        "Para 3 should reflect this authentic connection specifically and warmly."
    ),
@ -138,7 +131,7 @@ def build_prompt(
 ) -> str:
    parts = [SYSTEM_CONTEXT.strip(), ""]
    if examples:
-        parts.append("=== STYLE EXAMPLES (Alex's past letters) ===\n")
+        parts.append(f"=== STYLE EXAMPLES ({_candidate}'s past letters) ===\n")
        for i, ex in enumerate(examples, 1):
            parts.append(f"--- Example {i} ({ex['company']}) ---")
            parts.append(ex["text"])
@ -183,7 +176,7 @@ def generate(title: str, company: str, description: str = "", _router=None) -> s
 def main() -> None:
-    parser = argparse.ArgumentParser(description="Generate a cover letter in Alex's voice")
+    parser = argparse.ArgumentParser(description=f"Generate a cover letter in {_candidate}'s voice")
    parser.add_argument("--title", help="Job title")
    parser.add_argument("--company", help="Company name")
    parser.add_argument("--description", default="", help="Job description text")
--- a/scripts/match.py
+++ b/scripts/match.py
@ -18,8 +18,22 @@ import yaml
 from bs4 import BeautifulSoup
 from notion_client import Client
 from scripts.user_profile import UserProfile
 _USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
 _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
 CONFIG_DIR = Path(__file__).parent.parent / "config"
-RESUME_PATH = Path("/Library/Documents/JobSearch/Alex_Rivera_Resume_02-19-2025.pdf")
+
 def _find_resume(docs_dir: Path) -> Path | None:
    """Find the most recently modified PDF in docs_dir matching *resume* or *cv*."""
    candidates = list(docs_dir.glob("*[Rr]esume*.pdf")) + list(docs_dir.glob("*[Cc][Vv]*.pdf"))
    return max(candidates, key=lambda p: p.stat().st_mtime) if candidates else None
 RESUME_PATH = (
    _find_resume(_profile.docs_dir) if _profile else None
 ) or Path(__file__).parent.parent / "config" / "resume.pdf"
 def load_notion() -> tuple[Client, dict]:
--- a/scripts/prepare_training_data.py
+++ b/scripts/prepare_training_data.py
@ -1,6 +1,6 @@
 # scripts/prepare_training_data.py
 """
-Extract training pairs from Alex's cover letter corpus for LoRA fine-tuning.
+Extract training pairs from the candidate's cover letter corpus for LoRA fine-tuning.
 Outputs a JSONL file where each line is:
  {"instruction": "Write a cover letter for the [role] position at [company].",
@ -16,10 +16,17 @@ import re
 import sys
 from pathlib import Path
-LETTERS_DIR = Path("/Library/Documents/JobSearch")
+sys.path.insert(0, str(Path(__file__).parent.parent))
 from scripts.user_profile import UserProfile
 _USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
 _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
 _docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
 LETTERS_DIR = _docs
 # Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter")
 LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]
-DEFAULT_OUTPUT = LETTERS_DIR / "training_data" / "cover_letters.jsonl"
+DEFAULT_OUTPUT = _docs / "training_data" / "cover_letters.jsonl"
 # Patterns that appear in opening sentences to extract role
 ROLE_PATTERNS = [
--- a/tests/test_company_research.py
+++ b/tests/test_company_research.py
@ -64,16 +64,22 @@ def test_build_resume_context_top2_in_full():
 def test_build_resume_context_rest_condensed():
    """Remaining experiences appear as condensed one-liners, not full bullets."""
    ctx = _build_resume_context(RESUME, KEYWORDS, JD)
-    assert "Also in Alex" in ctx
+    assert "Also in" in ctx
    assert "Generic Co" in ctx
    # Generic Co bullets should NOT appear in full
    assert "Managed SMB portfolio" not in ctx
 def test_upguard_nda_low_score():
-    """UpGuard name replaced with 'enterprise security vendor' when score < 3."""
+    """UpGuard NDA rule: company masked when score < 3 and profile has NDA companies configured."""
    from scripts.company_research import _profile
    ctx = _build_resume_context(RESUME, ["python", "kubernetes"], "python kubernetes devops")
-    assert "enterprise security vendor" in ctx
+    if _profile and _profile.is_nda("upguard"):
        # Profile present with UpGuard NDA — company should be masked
        assert "UpGuard" not in ctx
    else:
        # No profile or UpGuard not in NDA list — company name appears directly
        assert "UpGuard" in ctx or "enterprise security vendor" in ctx or "previous employer" in ctx
 def test_load_resume_and_keywords_returns_lists():
--- a/tests/test_cover_letter.py
+++ b/tests/test_cover_letter.py
@ -89,17 +89,14 @@ def test_find_similar_letters_returns_top_k():
 def test_load_corpus_returns_list():
-    """load_corpus returns a list (may be empty if LETTERS_DIR absent, must not crash)."""
+    """load_corpus returns a list (empty if LETTERS_DIR absent) without crashing."""
    from scripts.generate_cover_letter import load_corpus, LETTERS_DIR
-    if LETTERS_DIR.exists():
+    corpus = load_corpus()
-        corpus = load_corpus()
+    assert isinstance(corpus, list)
-        assert isinstance(corpus, list)
+    if corpus:
-        if corpus:
+        assert "company" in corpus[0]
-            assert "company" in corpus[0]
+        assert "text" in corpus[0]
            assert "text" in corpus[0]
    else:
        pytest.skip("LETTERS_DIR not present in this environment")
 def test_generate_calls_llm_router():