feat: extract hard-coded personal references from all scripts via UserProfile
Replace hard-coded paths (/Library/Documents/JobSearch), names (Alex Rivera), NDA sets (_NDA_COMPANIES), and the scraper path with UserProfile-driven lookups. Update tests to be profile-agnostic (no user.yaml in peregrine config dir).
This commit is contained in:
parent
7380deb021
commit
9dc0244546
7 changed files with 124 additions and 90 deletions
|
|
@ -3,13 +3,13 @@
|
||||||
Pre-interview company research generator.
|
Pre-interview company research generator.
|
||||||
|
|
||||||
Three-phase approach:
|
Three-phase approach:
|
||||||
1. If SearXNG is available (port 8888), use companyScraper.py to fetch live
|
1. If SearXNG is available, use companyScraper.py to fetch live
|
||||||
data: CEO name, HQ address, LinkedIn, contact info.
|
data: CEO name, HQ address, LinkedIn, contact info.
|
||||||
1b. Use Phase 1 data (company name + CEO if found) to query SearXNG for
|
1b. Use Phase 1 data (company name + CEO if found) to query SearXNG for
|
||||||
recent news snippets (funding, launches, leadership changes, etc.).
|
recent news snippets (funding, launches, leadership changes, etc.).
|
||||||
2. Feed all real data into an LLM prompt to synthesise a structured brief
|
2. Feed all real data into an LLM prompt to synthesise a structured brief
|
||||||
covering company overview, leadership, recent developments, and talking
|
covering company overview, leadership, recent developments, and talking
|
||||||
points tailored to Alex.
|
points tailored to the candidate.
|
||||||
|
|
||||||
Falls back to pure LLM knowledge when SearXNG is offline.
|
Falls back to pure LLM knowledge when SearXNG is offline.
|
||||||
|
|
||||||
|
|
@ -24,25 +24,32 @@ from types import SimpleNamespace
|
||||||
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from scripts.user_profile import UserProfile
|
||||||
|
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
||||||
|
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||||
|
|
||||||
# ── SearXNG scraper integration ───────────────────────────────────────────────
|
# ── SearXNG scraper integration ───────────────────────────────────────────────
|
||||||
_SCRAPER_DIR = Path("/Library/Development/scrapers")
|
# companyScraper is bundled into the Docker image at /app/scrapers/
|
||||||
_SCRAPER_AVAILABLE = False
|
_SCRAPER_AVAILABLE = False
|
||||||
|
for _scraper_candidate in [
|
||||||
if _SCRAPER_DIR.exists():
|
Path("/app/scrapers"), # Docker container path
|
||||||
sys.path.insert(0, str(_SCRAPER_DIR))
|
Path(__file__).parent.parent / "scrapers", # local dev fallback
|
||||||
try:
|
]:
|
||||||
from companyScraper import EnhancedCompanyScraper, Config as _ScraperConfig
|
if _scraper_candidate.exists():
|
||||||
_SCRAPER_AVAILABLE = True
|
sys.path.insert(0, str(_scraper_candidate))
|
||||||
except (ImportError, SystemExit):
|
try:
|
||||||
# companyScraper calls sys.exit(1) if bs4/fake-useragent aren't installed
|
from companyScraper import EnhancedCompanyScraper, Config as _ScraperConfig
|
||||||
pass
|
_SCRAPER_AVAILABLE = True
|
||||||
|
except (ImportError, SystemExit):
|
||||||
|
pass
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
def _searxng_running() -> bool:
|
def _searxng_running(searxng_url: str = "http://localhost:8888") -> bool:
|
||||||
"""Quick check whether SearXNG is reachable."""
|
"""Quick check whether SearXNG is reachable."""
|
||||||
try:
|
try:
|
||||||
import requests
|
import requests
|
||||||
r = requests.get("http://localhost:8888/", timeout=3)
|
r = requests.get(f"{searxng_url}/", timeout=3)
|
||||||
return r.status_code == 200
|
return r.status_code == 200
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
@ -186,9 +193,13 @@ def _parse_sections(text: str) -> dict[str, str]:
|
||||||
_RESUME_YAML = Path(__file__).parent.parent / "aihawk" / "data_folder" / "plain_text_resume.yaml"
|
_RESUME_YAML = Path(__file__).parent.parent / "aihawk" / "data_folder" / "plain_text_resume.yaml"
|
||||||
_KEYWORDS_YAML = Path(__file__).parent.parent / "config" / "resume_keywords.yaml"
|
_KEYWORDS_YAML = Path(__file__).parent.parent / "config" / "resume_keywords.yaml"
|
||||||
|
|
||||||
# Companies where Alex has an NDA — reference as generic label unless
|
|
||||||
# the role is security-focused (score >= 3 matching JD keywords).
|
def _company_label(exp: dict) -> str:
|
||||||
_NDA_COMPANIES = {"upguard"}
|
company = exp.get("company", "")
|
||||||
|
score = exp.get("score", 0)
|
||||||
|
if _profile:
|
||||||
|
return _profile.nda_label(company, score)
|
||||||
|
return company
|
||||||
|
|
||||||
|
|
||||||
def _score_experiences(experiences: list[dict], keywords: list[str], jd: str) -> list[dict]:
|
def _score_experiences(experiences: list[dict], keywords: list[str], jd: str) -> list[dict]:
|
||||||
|
|
@ -214,8 +225,7 @@ def _build_resume_context(resume: dict, keywords: list[str], jd: str) -> str:
|
||||||
"""
|
"""
|
||||||
Build the resume section of the LLM context block.
|
Build the resume section of the LLM context block.
|
||||||
Top 2 scored experiences included in full detail; rest as one-liners.
|
Top 2 scored experiences included in full detail; rest as one-liners.
|
||||||
Applies UpGuard NDA rule: reference as 'enterprise security vendor (NDA)'
|
NDA companies are masked via UserProfile.nda_label() when score < threshold.
|
||||||
unless the role is security-focused (score >= 3).
|
|
||||||
"""
|
"""
|
||||||
experiences = resume.get("experience_details", [])
|
experiences = resume.get("experience_details", [])
|
||||||
if not experiences:
|
if not experiences:
|
||||||
|
|
@ -225,11 +235,7 @@ def _build_resume_context(resume: dict, keywords: list[str], jd: str) -> str:
|
||||||
top2 = scored[:2]
|
top2 = scored[:2]
|
||||||
rest = scored[2:]
|
rest = scored[2:]
|
||||||
|
|
||||||
def _company_label(exp: dict) -> str:
|
candidate = _profile.name if _profile else "the candidate"
|
||||||
company = exp.get("company", "")
|
|
||||||
if company.lower() in _NDA_COMPANIES and exp.get("score", 0) < 3:
|
|
||||||
return "enterprise security vendor (NDA)"
|
|
||||||
return company
|
|
||||||
|
|
||||||
def _exp_header(exp: dict) -> str:
|
def _exp_header(exp: dict) -> str:
|
||||||
return f"{exp.get('position', '')} @ {_company_label(exp)} ({exp.get('employment_period', '')})"
|
return f"{exp.get('position', '')} @ {_company_label(exp)} ({exp.get('employment_period', '')})"
|
||||||
|
|
@ -238,14 +244,14 @@ def _build_resume_context(resume: dict, keywords: list[str], jd: str) -> str:
|
||||||
bullets = [v for resp in exp.get("key_responsibilities", []) for v in resp.values()]
|
bullets = [v for resp in exp.get("key_responsibilities", []) for v in resp.values()]
|
||||||
return "\n".join(f" - {b}" for b in bullets)
|
return "\n".join(f" - {b}" for b in bullets)
|
||||||
|
|
||||||
lines = ["## Alex's Matched Experience"]
|
lines = [f"## {candidate}'s Matched Experience"]
|
||||||
for exp in top2:
|
for exp in top2:
|
||||||
lines.append(f"\n**{_exp_header(exp)}** (match score: {exp['score']})")
|
lines.append(f"\n**{_exp_header(exp)}** (match score: {exp['score']})")
|
||||||
lines.append(_exp_bullets(exp))
|
lines.append(_exp_bullets(exp))
|
||||||
|
|
||||||
if rest:
|
if rest:
|
||||||
condensed = ", ".join(_exp_header(e) for e in rest)
|
condensed = ", ".join(_exp_header(e) for e in rest)
|
||||||
lines.append(f"\nAlso in Alex's background: {condensed}")
|
lines.append(f"\nAlso in {candidate}'s background: {condensed}")
|
||||||
|
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
@ -359,7 +365,10 @@ def research_company(job: dict, use_scraper: bool = True, on_stage=None) -> dict
|
||||||
|
|
||||||
# ── Phase 2: LLM synthesis ────────────────────────────────────────────────
|
# ── Phase 2: LLM synthesis ────────────────────────────────────────────────
|
||||||
_stage("Generating brief with LLM… (30–90 seconds)")
|
_stage("Generating brief with LLM… (30–90 seconds)")
|
||||||
prompt = f"""You are preparing Alex Rivera for a job interview.
|
name = _profile.name if _profile else "the candidate"
|
||||||
|
career_summary = _profile.career_summary if _profile else ""
|
||||||
|
prompt = f"""You are preparing {name} for a job interview.
|
||||||
|
{f"Candidate background: {career_summary}" if career_summary else ""}
|
||||||
|
|
||||||
Role: **{title}** at **{company}**
|
Role: **{title}** at **{company}**
|
||||||
|
|
||||||
|
|
@ -404,12 +413,12 @@ Assess {company}'s commitment to disability inclusion and accessibility. Cover:
|
||||||
- Any public disability/accessibility advocacy, partnerships, or certifications
|
- Any public disability/accessibility advocacy, partnerships, or certifications
|
||||||
- Glassdoor or press signals about how employees with disabilities experience the company
|
- Glassdoor or press signals about how employees with disabilities experience the company
|
||||||
If no specific signals are found, say so clearly — absence of public commitment is itself signal.
|
If no specific signals are found, say so clearly — absence of public commitment is itself signal.
|
||||||
This section is for Alex's personal decision-making only and will not appear in any application.
|
This section is for the candidate's personal decision-making only and will not appear in any application.
|
||||||
|
|
||||||
## Talking Points for Alex
|
## Talking Points for {name}
|
||||||
Five specific talking points for the phone screen. Each must:
|
Five specific talking points for the phone screen. Each must:
|
||||||
- Reference a concrete experience from Alex's matched background by name
|
- Reference a concrete experience from {name}'s matched background by name
|
||||||
(UpGuard NDA rule: say "enterprise security vendor" unless the role has a clear security/compliance focus)
|
(NDA rule: use the masked label shown in the matched experience section for any NDA-protected employer)
|
||||||
- Connect to a specific signal from the JD or company context above
|
- Connect to a specific signal from the JD or company context above
|
||||||
- Be 1–2 sentences, ready to speak aloud
|
- Be 1–2 sentences, ready to speak aloud
|
||||||
- Never give generic advice
|
- Never give generic advice
|
||||||
|
|
@ -432,7 +441,7 @@ Five specific talking points for the phone screen. Each must:
|
||||||
"competitors_brief": sections.get("Funding & Market Position", ""), # competitor landscape is in the funding section
|
"competitors_brief": sections.get("Funding & Market Position", ""), # competitor landscape is in the funding section
|
||||||
"red_flags": sections.get("Red Flags & Watch-outs", ""),
|
"red_flags": sections.get("Red Flags & Watch-outs", ""),
|
||||||
"accessibility_brief": sections.get("Inclusion & Accessibility", ""),
|
"accessibility_brief": sections.get("Inclusion & Accessibility", ""),
|
||||||
"talking_points": sections.get("Talking Points for Alex", ""),
|
"talking_points": sections.get(f"Talking Points for {name}", ""),
|
||||||
"scrape_used": scrape_used,
|
"scrape_used": scrape_used,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# scripts/finetune_local.py
|
# scripts/finetune_local.py
|
||||||
"""
|
"""
|
||||||
Local LoRA fine-tune on Alex's cover letter corpus.
|
Local LoRA fine-tune on the candidate's cover letter corpus.
|
||||||
No HuggingFace account or internet required after the base model is cached.
|
No HuggingFace account or internet required after the base model is cached.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
|
|
@ -17,24 +17,32 @@ import os
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
# Limit CUDA to GPU 0. device_map={"":0} in FastLanguageModel.from_pretrained
|
# Limit CUDA to GPU 0. device_map={"":0} in FastLanguageModel.from_pretrained
|
||||||
# pins every layer to GPU 0, avoiding the accelerate None-device bug that
|
# pins every layer to GPU 0, avoiding the accelerate None-device bug that
|
||||||
# occurs with device_map="auto" on multi-GPU machines with 4-bit quantisation.
|
# occurs with device_map="auto" on multi-GPU machines with 4-bit quantisation.
|
||||||
# Do NOT set WORLD_SIZE/RANK — that triggers torch.distributed initialisation.
|
# Do NOT set WORLD_SIZE/RANK — that triggers torch.distributed initialisation.
|
||||||
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "0")
|
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "0")
|
||||||
|
|
||||||
|
from scripts.user_profile import UserProfile
|
||||||
|
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
||||||
|
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||||
|
|
||||||
# ── Config ────────────────────────────────────────────────────────────────────
|
# ── Config ────────────────────────────────────────────────────────────────────
|
||||||
DEFAULT_MODEL = "unsloth/Llama-3.2-3B-Instruct" # safe on 8 GB VRAM
|
DEFAULT_MODEL = "unsloth/Llama-3.2-3B-Instruct" # safe on 8 GB VRAM
|
||||||
LETTERS_JSONL = Path("/Library/Documents/JobSearch/training_data/cover_letters.jsonl")
|
|
||||||
OUTPUT_DIR = Path("/Library/Documents/JobSearch/training_data/finetune_output")
|
_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
||||||
GGUF_DIR = Path("/Library/Documents/JobSearch/training_data/gguf")
|
LETTERS_JSONL = _docs / "training_data" / "cover_letters.jsonl"
|
||||||
OLLAMA_NAME = "alex-cover-writer"
|
OUTPUT_DIR = _docs / "training_data" / "finetune_output"
|
||||||
|
GGUF_DIR = _docs / "training_data" / "gguf"
|
||||||
|
OLLAMA_NAME = f"{_profile.name.split()[0].lower()}-cover-writer" if _profile else "cover-writer"
|
||||||
|
|
||||||
SYSTEM_PROMPT = (
|
SYSTEM_PROMPT = (
|
||||||
"You are Alex Rivera's personal cover letter writer. "
|
f"You are {_profile.name}'s personal cover letter writer. "
|
||||||
"Write professional, warm, and results-focused cover letters in Alex's voice. "
|
f"{_profile.career_summary}"
|
||||||
"Draw on her background in customer success, technical account management, "
|
if _profile else
|
||||||
"and revenue operations. Be specific and avoid generic filler."
|
"You are a professional cover letter writer. Write in first person."
|
||||||
)
|
)
|
||||||
|
|
||||||
# ── Args ──────────────────────────────────────────────────────────────────────
|
# ── Args ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
@ -48,7 +56,7 @@ parser.add_argument("--max-length", type=int, default=1024, help="Max token leng
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
print(f"\n{'='*60}")
|
print(f"\n{'='*60}")
|
||||||
print(f" Alex Cover Letter Fine-Tuner")
|
print(f" Cover Letter Fine-Tuner [{OLLAMA_NAME}]")
|
||||||
print(f" Base model : {args.model}")
|
print(f" Base model : {args.model}")
|
||||||
print(f" Epochs : {args.epochs}")
|
print(f" Epochs : {args.epochs}")
|
||||||
print(f" LoRA rank : {args.rank}")
|
print(f" LoRA rank : {args.rank}")
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
# scripts/generate_cover_letter.py
|
# scripts/generate_cover_letter.py
|
||||||
"""
|
"""
|
||||||
Generate a cover letter in Alex's voice using few-shot examples from her corpus.
|
Generate a cover letter in the candidate's voice using few-shot examples from their corpus.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
conda run -n job-seeker python scripts/generate_cover_letter.py \
|
conda run -n job-seeker python scripts/generate_cover_letter.py \
|
||||||
|
|
@ -16,30 +16,21 @@ import re
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
LETTERS_DIR = Path("/Library/Documents/JobSearch")
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from scripts.user_profile import UserProfile
|
||||||
|
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
||||||
|
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||||
|
|
||||||
|
LETTERS_DIR = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
||||||
LETTER_GLOB = "*Cover Letter*.md"
|
LETTER_GLOB = "*Cover Letter*.md"
|
||||||
|
|
||||||
# Background injected into every prompt so the model has Alex's facts
|
# Background injected into every prompt so the model has the candidate's facts
|
||||||
SYSTEM_CONTEXT = """You are writing cover letters for Alex Rivera, a customer success leader.
|
SYSTEM_CONTEXT = (
|
||||||
|
f"You are writing cover letters for {_profile.name}. {_profile.career_summary}"
|
||||||
Background:
|
if _profile else
|
||||||
- 6+ years in customer success, technical account management, and CS leadership
|
"You are a professional cover letter writer. Write in first person."
|
||||||
- Most recent role: led Americas Customer Success at UpGuard (cybersecurity SaaS), managing enterprise + Fortune 500 accounts, drove NPS consistently above 95
|
)
|
||||||
- Also founder of M3 Consulting, a CS advisory practice for SaaS startups
|
|
||||||
- Attended Texas State (2 yrs), CSU East Bay (1 yr); completed degree elsewhere
|
|
||||||
- Based in San Francisco Bay Area; open to remote/hybrid
|
|
||||||
- Pronouns: any
|
|
||||||
|
|
||||||
Voice guidelines:
|
|
||||||
- Warm, confident, and specific — never generic
|
|
||||||
- Opens with "I'm delighted/thrilled to apply for [role] at [company]."
|
|
||||||
- 3–4 focused paragraphs, ~250–350 words total
|
|
||||||
- Para 2: concrete experience (cite UpGuard and/or M3 Consulting with a specific metric)
|
|
||||||
- Para 3: genuine connection to THIS company's mission/product
|
|
||||||
- Closes with "Thank you for considering my application." + warm sign-off
|
|
||||||
- Never use: "I am writing to express my interest", "passionate about making a difference",
|
|
||||||
"I look forward to hearing from you", or any hollow filler phrases
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
# ── Mission-alignment detection ───────────────────────────────────────────────
|
# ── Mission-alignment detection ───────────────────────────────────────────────
|
||||||
|
|
@ -69,21 +60,23 @@ _MISSION_SIGNALS: dict[str, list[str]] = {
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_candidate = _profile.name if _profile else "the candidate"
|
||||||
|
|
||||||
_MISSION_NOTES: dict[str, str] = {
|
_MISSION_NOTES: dict[str, str] = {
|
||||||
"music": (
|
"music": (
|
||||||
"This company is in the music industry, which is one of Alex's genuinely "
|
f"This company is in the music industry, which is one of {_candidate}'s genuinely "
|
||||||
"ideal work environments — she has a real personal passion for the music scene. "
|
"ideal work environments — they have a real personal passion for the music scene. "
|
||||||
"Para 3 should warmly and specifically reflect this authentic alignment, not as "
|
"Para 3 should warmly and specifically reflect this authentic alignment, not as "
|
||||||
"a generic fan statement, but as an honest statement of where she'd love to apply "
|
"a generic fan statement, but as an honest statement of where they'd love to apply "
|
||||||
"her CS skills."
|
"their CS skills."
|
||||||
),
|
),
|
||||||
"animal_welfare": (
|
"animal_welfare": (
|
||||||
"This organization works in animal welfare/rescue — one of Alex's dream-job "
|
f"This organization works in animal welfare/rescue — one of {_candidate}'s dream-job "
|
||||||
"domains and a genuine personal passion. Para 3 should reflect this authentic "
|
"domains and a genuine personal passion. Para 3 should reflect this authentic "
|
||||||
"connection warmly and specifically, tying her CS skills to this mission."
|
"connection warmly and specifically, tying their CS skills to this mission."
|
||||||
),
|
),
|
||||||
"education": (
|
"education": (
|
||||||
"This company works in children's education or EdTech — one of Alex's ideal "
|
f"This company works in children's education or EdTech — one of {_candidate}'s ideal "
|
||||||
"work domains, reflecting genuine personal values around learning and young people. "
|
"work domains, reflecting genuine personal values around learning and young people. "
|
||||||
"Para 3 should reflect this authentic connection specifically and warmly."
|
"Para 3 should reflect this authentic connection specifically and warmly."
|
||||||
),
|
),
|
||||||
|
|
@ -138,7 +131,7 @@ def build_prompt(
|
||||||
) -> str:
|
) -> str:
|
||||||
parts = [SYSTEM_CONTEXT.strip(), ""]
|
parts = [SYSTEM_CONTEXT.strip(), ""]
|
||||||
if examples:
|
if examples:
|
||||||
parts.append("=== STYLE EXAMPLES (Alex's past letters) ===\n")
|
parts.append(f"=== STYLE EXAMPLES ({_candidate}'s past letters) ===\n")
|
||||||
for i, ex in enumerate(examples, 1):
|
for i, ex in enumerate(examples, 1):
|
||||||
parts.append(f"--- Example {i} ({ex['company']}) ---")
|
parts.append(f"--- Example {i} ({ex['company']}) ---")
|
||||||
parts.append(ex["text"])
|
parts.append(ex["text"])
|
||||||
|
|
@ -183,7 +176,7 @@ def generate(title: str, company: str, description: str = "", _router=None) -> s
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
parser = argparse.ArgumentParser(description="Generate a cover letter in Alex's voice")
|
parser = argparse.ArgumentParser(description=f"Generate a cover letter in {_candidate}'s voice")
|
||||||
parser.add_argument("--title", help="Job title")
|
parser.add_argument("--title", help="Job title")
|
||||||
parser.add_argument("--company", help="Company name")
|
parser.add_argument("--company", help="Company name")
|
||||||
parser.add_argument("--description", default="", help="Job description text")
|
parser.add_argument("--description", default="", help="Job description text")
|
||||||
|
|
|
||||||
|
|
@ -18,8 +18,22 @@ import yaml
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from notion_client import Client
|
from notion_client import Client
|
||||||
|
|
||||||
|
from scripts.user_profile import UserProfile
|
||||||
|
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
||||||
|
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||||
|
|
||||||
CONFIG_DIR = Path(__file__).parent.parent / "config"
|
CONFIG_DIR = Path(__file__).parent.parent / "config"
|
||||||
RESUME_PATH = Path("/Library/Documents/JobSearch/Alex_Rivera_Resume_02-19-2025.pdf")
|
|
||||||
|
|
||||||
|
def _find_resume(docs_dir: Path) -> Path | None:
|
||||||
|
"""Find the most recently modified PDF in docs_dir matching *resume* or *cv*."""
|
||||||
|
candidates = list(docs_dir.glob("*[Rr]esume*.pdf")) + list(docs_dir.glob("*[Cc][Vv]*.pdf"))
|
||||||
|
return max(candidates, key=lambda p: p.stat().st_mtime) if candidates else None
|
||||||
|
|
||||||
|
|
||||||
|
RESUME_PATH = (
|
||||||
|
_find_resume(_profile.docs_dir) if _profile else None
|
||||||
|
) or Path(__file__).parent.parent / "config" / "resume.pdf"
|
||||||
|
|
||||||
|
|
||||||
def load_notion() -> tuple[Client, dict]:
|
def load_notion() -> tuple[Client, dict]:
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
# scripts/prepare_training_data.py
|
# scripts/prepare_training_data.py
|
||||||
"""
|
"""
|
||||||
Extract training pairs from Alex's cover letter corpus for LoRA fine-tuning.
|
Extract training pairs from the candidate's cover letter corpus for LoRA fine-tuning.
|
||||||
|
|
||||||
Outputs a JSONL file where each line is:
|
Outputs a JSONL file where each line is:
|
||||||
{"instruction": "Write a cover letter for the [role] position at [company].",
|
{"instruction": "Write a cover letter for the [role] position at [company].",
|
||||||
|
|
@ -16,10 +16,17 @@ import re
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
LETTERS_DIR = Path("/Library/Documents/JobSearch")
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from scripts.user_profile import UserProfile
|
||||||
|
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
||||||
|
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||||
|
|
||||||
|
_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
||||||
|
LETTERS_DIR = _docs
|
||||||
# Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter")
|
# Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter")
|
||||||
LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]
|
LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]
|
||||||
DEFAULT_OUTPUT = LETTERS_DIR / "training_data" / "cover_letters.jsonl"
|
DEFAULT_OUTPUT = _docs / "training_data" / "cover_letters.jsonl"
|
||||||
|
|
||||||
# Patterns that appear in opening sentences to extract role
|
# Patterns that appear in opening sentences to extract role
|
||||||
ROLE_PATTERNS = [
|
ROLE_PATTERNS = [
|
||||||
|
|
|
||||||
|
|
@ -64,16 +64,22 @@ def test_build_resume_context_top2_in_full():
|
||||||
def test_build_resume_context_rest_condensed():
|
def test_build_resume_context_rest_condensed():
|
||||||
"""Remaining experiences appear as condensed one-liners, not full bullets."""
|
"""Remaining experiences appear as condensed one-liners, not full bullets."""
|
||||||
ctx = _build_resume_context(RESUME, KEYWORDS, JD)
|
ctx = _build_resume_context(RESUME, KEYWORDS, JD)
|
||||||
assert "Also in Alex" in ctx
|
assert "Also in" in ctx
|
||||||
assert "Generic Co" in ctx
|
assert "Generic Co" in ctx
|
||||||
# Generic Co bullets should NOT appear in full
|
# Generic Co bullets should NOT appear in full
|
||||||
assert "Managed SMB portfolio" not in ctx
|
assert "Managed SMB portfolio" not in ctx
|
||||||
|
|
||||||
|
|
||||||
def test_upguard_nda_low_score():
|
def test_upguard_nda_low_score():
|
||||||
"""UpGuard name replaced with 'enterprise security vendor' when score < 3."""
|
"""UpGuard NDA rule: company masked when score < 3 and profile has NDA companies configured."""
|
||||||
|
from scripts.company_research import _profile
|
||||||
ctx = _build_resume_context(RESUME, ["python", "kubernetes"], "python kubernetes devops")
|
ctx = _build_resume_context(RESUME, ["python", "kubernetes"], "python kubernetes devops")
|
||||||
assert "enterprise security vendor" in ctx
|
if _profile and _profile.is_nda("upguard"):
|
||||||
|
# Profile present with UpGuard NDA — company should be masked
|
||||||
|
assert "UpGuard" not in ctx
|
||||||
|
else:
|
||||||
|
# No profile or UpGuard not in NDA list — company name appears directly
|
||||||
|
assert "UpGuard" in ctx or "enterprise security vendor" in ctx or "previous employer" in ctx
|
||||||
|
|
||||||
|
|
||||||
def test_load_resume_and_keywords_returns_lists():
|
def test_load_resume_and_keywords_returns_lists():
|
||||||
|
|
|
||||||
|
|
@ -89,17 +89,14 @@ def test_find_similar_letters_returns_top_k():
|
||||||
|
|
||||||
|
|
||||||
def test_load_corpus_returns_list():
|
def test_load_corpus_returns_list():
|
||||||
"""load_corpus returns a list (may be empty if LETTERS_DIR absent, must not crash)."""
|
"""load_corpus returns a list (empty if LETTERS_DIR absent) without crashing."""
|
||||||
from scripts.generate_cover_letter import load_corpus, LETTERS_DIR
|
from scripts.generate_cover_letter import load_corpus, LETTERS_DIR
|
||||||
|
|
||||||
if LETTERS_DIR.exists():
|
corpus = load_corpus()
|
||||||
corpus = load_corpus()
|
assert isinstance(corpus, list)
|
||||||
assert isinstance(corpus, list)
|
if corpus:
|
||||||
if corpus:
|
assert "company" in corpus[0]
|
||||||
assert "company" in corpus[0]
|
assert "text" in corpus[0]
|
||||||
assert "text" in corpus[0]
|
|
||||||
else:
|
|
||||||
pytest.skip("LETTERS_DIR not present in this environment")
|
|
||||||
|
|
||||||
|
|
||||||
def test_generate_calls_llm_router():
|
def test_generate_calls_llm_router():
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue