peregrine/scripts/generate_cover_letter.py
pyr0ball b06d596d4c
Some checks failed
CI / test (pull_request) Failing after 1m16s
feat(vue): open Vue SPA to all tiers; fix cloud nav and feedback button
- Lower vue_ui_beta gate to "free" so all licensed users can access the
  new UI without a paid subscription
- Remove "Paid tier" wording from the Try New UI banner
- Fix Vue SPA navigation in cloud/demo deployments: add VITE_BASE_PATH
  build arg so Vite sets the correct subpath base, and pass
  import.meta.env.BASE_URL to createWebHistory() so router links
  emit /peregrine/... paths that Caddy can match
- Fix feedback button missing on cloud instance by passing
  FORGEJO_API_TOKEN through compose.cloud.yml
- Remove vLLM container from compose.yml (vLLM dropped from stack;
  cf-research service in cfcore covers the use case)
- Fix cloud config path in Apply page (use get_config_dir() so per-user
  cloud data roots resolve correctly for user.yaml and resume YAML)
- Refactor generate_cover_letter._build_system_context and
  _build_mission_notes to accept explicit profile arg (enables
  per-user cover letter generation in cloud multi-tenant mode)
- Add API proxy block to nginx.conf (Vue web container can now call
  /api/ directly without Vite dev proxy)
- Update .env.example: remove vLLM vars, add research model + tuning
  vars for external vLLM deployments
- Update llm.yaml: switch vllm base_url to host.docker.internal
  (vLLM now runs outside Docker stack)

Closes #63 (feedback button)
Related: #8 (Vue SPA), #50–#62 (parity milestone)
2026-04-02 17:41:35 -07:00

365 lines
16 KiB
Python

# scripts/generate_cover_letter.py
"""
Generate a cover letter in the candidate's voice using few-shot examples from their corpus.
Usage:
conda run -n job-seeker python scripts/generate_cover_letter.py \
--title "Director of Customer Success" \
--company "Acme Corp" \
--description "We are looking for..."
Or pass a staging DB job ID:
conda run -n job-seeker python scripts/generate_cover_letter.py --job-id 42
"""
import argparse
import re
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from scripts.user_profile import UserProfile
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
LETTERS_DIR = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
LETTER_GLOB = "*Cover Letter*.md"
# Background injected into every prompt so the model has the candidate's facts
def _build_system_context(profile=None) -> str:
p = profile or _profile
if not p:
return "You are a professional cover letter writer. Write in first person."
parts = [f"You are writing cover letters for {p.name}. {p.career_summary}"]
if p.candidate_voice:
parts.append(
f"Voice and personality: {p.candidate_voice} "
"Write in a way that reflects these authentic traits — not as a checklist, "
"but as a natural expression of who this person is."
)
return " ".join(parts)
SYSTEM_CONTEXT = _build_system_context()
# ── Mission-alignment detection ───────────────────────────────────────────────
# When a company/JD signals one of these preferred industries, the cover letter
# prompt injects a hint so Para 3 can reflect genuine personal connection.
# This does NOT disclose any personal disability or family information.
_MISSION_SIGNALS: dict[str, list[str]] = {
"music": [
"music", "spotify", "tidal", "soundcloud", "bandcamp", "apple music",
"distrokid", "cd baby", "landr", "beatport", "reverb", "vinyl",
"streaming", "artist", "label", "live nation", "ticketmaster", "aeg",
"songkick", "concert", "venue", "festival", "audio", "podcast",
"studio", "record", "musician", "playlist",
],
"animal_welfare": [
"animal", "shelter", "rescue", "humane society", "spca", "aspca",
"veterinary", "vet ", "wildlife", "pet ", "adoption", "foster",
"dog", "cat", "feline", "canine", "sanctuary", "zoo",
],
"education": [
"education", "school", "learning", "student", "edtech", "classroom",
"curriculum", "tutoring", "academic", "university", "kids", "children",
"youth", "literacy", "khan academy", "duolingo", "chegg", "coursera",
"instructure", "canvas lms", "clever", "district", "teacher",
"k-12", "k12", "grade", "pedagogy",
],
"social_impact": [
"nonprofit", "non-profit", "501(c)", "social impact", "mission-driven",
"public benefit", "community", "underserved", "equity", "justice",
"humanitarian", "advocacy", "charity", "foundation", "ngo",
"social good", "civic", "public health", "mental health", "food security",
"housing", "homelessness", "poverty", "workforce development",
],
# Health is listed last — it's a genuine but lower-priority connection than
# music/animals/education/social_impact. detect_mission_alignment returns on first
# match, so dict order = preference order.
"health": [
"patient", "patients", "healthcare", "health tech", "healthtech",
"pharma", "pharmaceutical", "clinical", "medical",
"hospital", "clinic", "therapy", "therapist",
"rare disease", "life sciences", "life science",
"treatment", "prescription", "biotech", "biopharma", "medtech",
"behavioral health", "population health",
"care management", "care coordination", "oncology", "specialty pharmacy",
"provider network", "payer", "health plan", "benefits administration",
"ehr", "emr", "fhir", "hipaa",
],
}
_candidate = _profile.name if _profile else "the candidate"
_MISSION_DEFAULTS: dict[str, str] = {
"music": (
f"This company is in the music industry — an industry {_candidate} finds genuinely "
"compelling. Para 3 should warmly and specifically reflect this authentic alignment, "
"not as a generic fan statement, but as an honest statement of where they'd love to "
"apply their skills."
),
"animal_welfare": (
f"This organization works in animal welfare/rescue — a mission {_candidate} finds "
"genuinely meaningful. Para 3 should reflect this authentic connection warmly and "
"specifically, tying their skills to this mission."
),
"education": (
f"This company works in education or EdTech — a domain that resonates with "
f"{_candidate}'s values. Para 3 should reflect this authentic connection specifically "
"and warmly."
),
"social_impact": (
f"This organization is mission-driven / social impact focused — exactly the kind of "
f"cause {_candidate} cares deeply about. Para 3 should warmly reflect their genuine "
"desire to apply their skills to work that makes a real difference in people's lives."
),
"health": (
f"This company works in healthcare, life sciences, or patient care. "
f"Do NOT write about {_candidate}'s passion for pharmaceuticals or healthcare as an "
"industry. Instead, Para 3 should reflect genuine care for the PEOPLE these companies "
"exist to serve — those navigating complex, often invisible, or unusual health journeys; "
"patients facing rare or poorly understood conditions; individuals whose situations don't "
"fit a clean category. The connection is to the humans behind the data, not the industry. "
"If the user has provided a personal note, use that to anchor Para 3 specifically."
),
}
def _build_mission_notes(profile=None, candidate_name: str | None = None) -> dict[str, str]:
"""Merge user's custom mission notes with generic defaults."""
p = profile or _profile
name = candidate_name or _candidate
prefs = p.mission_preferences if p else {}
notes = {}
for industry, default_note in _MISSION_DEFAULTS.items():
custom = (prefs.get(industry) or "").strip()
if custom:
notes[industry] = (
f"Mission alignment — {name} shared: \"{custom}\". "
"Para 3 should warmly and specifically reflect this authentic connection."
)
else:
notes[industry] = default_note
return notes
_MISSION_NOTES = _build_mission_notes()
def detect_mission_alignment(
company: str, description: str, mission_notes: dict | None = None
) -> str | None:
"""Return a mission hint string if company/JD matches a preferred industry, else None."""
notes = mission_notes if mission_notes is not None else _MISSION_NOTES
text = f"{company} {description}".lower()
for industry, signals in _MISSION_SIGNALS.items():
if any(sig in text for sig in signals):
return notes[industry]
return None
def load_corpus() -> list[dict]:
"""Load all .md cover letters from LETTERS_DIR. Returns list of {path, company, text}."""
corpus = []
for path in sorted(LETTERS_DIR.glob(LETTER_GLOB)):
text = path.read_text(encoding="utf-8", errors="ignore").strip()
if not text:
continue
# Extract company from filename: "Tailscale Cover Letter.md" → "Tailscale"
company = re.sub(r"\s*Cover Letter.*", "", path.stem, flags=re.IGNORECASE).strip()
corpus.append({"path": path, "company": company, "text": text})
return corpus
def find_similar_letters(job_description: str, corpus: list[dict], top_k: int = 3) -> list[dict]:
"""Return the top_k letters most similar to the job description by TF-IDF cosine sim."""
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
if not corpus:
return []
docs = [job_description] + [c["text"] for c in corpus]
vectorizer = TfidfVectorizer(stop_words="english", max_features=500)
tfidf = vectorizer.fit_transform(docs)
sims = cosine_similarity(tfidf[0:1], tfidf[1:])[0]
ranked = sorted(zip(sims, corpus), key=lambda x: x[0], reverse=True)
return [entry for _, entry in ranked[:top_k]]
def build_prompt(
title: str,
company: str,
description: str,
examples: list[dict],
mission_hint: str | None = None,
is_jobgether: bool = False,
system_context: str | None = None,
candidate_name: str | None = None,
) -> str:
ctx = system_context if system_context is not None else SYSTEM_CONTEXT
name = candidate_name or _candidate
parts = [ctx.strip(), ""]
if examples:
parts.append(f"=== STYLE EXAMPLES ({name}'s past letters) ===\n")
for i, ex in enumerate(examples, 1):
parts.append(f"--- Example {i} ({ex['company']}) ---")
parts.append(ex["text"])
parts.append("")
parts.append("=== END EXAMPLES ===\n")
if mission_hint:
parts.append(f"⭐ Mission alignment note (for Para 3): {mission_hint}\n")
if is_jobgether:
if company and company.lower() != "jobgether":
recruiter_note = (
f"🤝 Recruiter context: This listing is posted by Jobgether on behalf of "
f"{company}. Address the cover letter to the Jobgether recruiter, not directly "
f"to the hiring company. Use framing like 'Your client at {company} will "
f"appreciate...' rather than addressing {company} directly. The role "
f"requirements are those of the actual employer."
)
else:
recruiter_note = (
"🤝 Recruiter context: This listing is posted by Jobgether on behalf of an "
"undisclosed employer. Address the cover letter to the Jobgether recruiter. "
"Use framing like 'Your client will appreciate...' rather than addressing "
"the company directly."
)
parts.append(f"{recruiter_note}\n")
parts.append(f"Now write a new cover letter for:")
parts.append(f" Role: {title}")
parts.append(f" Company: {company}")
if description:
snippet = description[:1500].strip()
parts.append(f"\nJob description excerpt:\n{snippet}")
parts.append("\nWrite the full cover letter now:")
return "\n".join(parts)
def _trim_to_letter_end(text: str, profile=None) -> str:
"""Remove repetitive hallucinated content after the first complete sign-off.
Fine-tuned models sometimes loop after completing the letter. This cuts at
the first closing + candidate name so only the intended letter is saved.
"""
p = profile or _profile
candidate_first = (p.name.split()[0] if p else "").strip()
pattern = (
r'(?:Warm regards|Sincerely|Best regards|Kind regards|Thank you)[,.]?\s*\n+\s*'
+ (re.escape(candidate_first) if candidate_first else r'\w+(?:\s+\w+)?')
+ r'\b'
)
m = re.search(pattern, text, re.IGNORECASE)
if m:
return text[:m.end()].strip()
return text.strip()
def generate(
title: str,
company: str,
description: str = "",
previous_result: str = "",
feedback: str = "",
is_jobgether: bool = False,
_router=None,
config_path: "Path | None" = None,
user_yaml_path: "Path | None" = None,
) -> str:
"""Generate a cover letter and return it as a string.
Pass previous_result + feedback for iterative refinement — the prior draft
and requested changes are appended to the prompt so the LLM revises rather
than starting from scratch.
user_yaml_path overrides the module-level profile — required in cloud mode
so each user's name/voice/mission prefs are used instead of the global default.
_router is an optional pre-built LLMRouter (used in tests to avoid real LLM calls).
"""
# Per-call profile override (cloud mode: each user has their own user.yaml)
if user_yaml_path and Path(user_yaml_path).exists():
_prof = UserProfile(Path(user_yaml_path))
else:
_prof = _profile
sys_ctx = _build_system_context(_prof)
mission_notes = _build_mission_notes(_prof, candidate_name=(_prof.name if _prof else None))
candidate_name = _prof.name if _prof else _candidate
corpus = load_corpus()
examples = find_similar_letters(description or f"{title} {company}", corpus)
mission_hint = detect_mission_alignment(company, description, mission_notes=mission_notes)
if mission_hint:
print(f"[cover-letter] Mission alignment detected for {company}", file=sys.stderr)
prompt = build_prompt(title, company, description, examples,
mission_hint=mission_hint, is_jobgether=is_jobgether,
system_context=sys_ctx, candidate_name=candidate_name)
if previous_result:
prompt += f"\n\n---\nPrevious draft:\n{previous_result}"
if feedback:
prompt += f"\n\nUser feedback / requested changes:\n{feedback}\n\nPlease revise accordingly."
if _router is None:
sys.path.insert(0, str(Path(__file__).parent.parent))
from scripts.llm_router import LLMRouter, CONFIG_PATH
resolved = config_path if (config_path and Path(config_path).exists()) else CONFIG_PATH
_router = LLMRouter(resolved)
print(f"[cover-letter] Generating for: {title} @ {company}", file=sys.stderr)
print(f"[cover-letter] Style examples: {[e['company'] for e in examples]}", file=sys.stderr)
if feedback:
print("[cover-letter] Refinement mode: feedback provided", file=sys.stderr)
# max_tokens=1200 caps generation at ~900 words — enough for any cover letter
# and prevents fine-tuned models from looping into repetitive garbage output.
result = _router.complete(prompt, max_tokens=1200)
return _trim_to_letter_end(result, _prof)
def main() -> None:
parser = argparse.ArgumentParser(description=f"Generate a cover letter in {_candidate}'s voice")
parser.add_argument("--title", help="Job title")
parser.add_argument("--company", help="Company name")
parser.add_argument("--description", default="", help="Job description text")
parser.add_argument("--job-id", type=int, help="Load job from staging.db by ID")
parser.add_argument("--output", help="Write output to this file path")
args = parser.parse_args()
title, company, description = args.title, args.company, args.description
if args.job_id is not None:
from scripts.db import DEFAULT_DB
import sqlite3
conn = sqlite3.connect(DEFAULT_DB)
conn.row_factory = sqlite3.Row
row = conn.execute("SELECT * FROM jobs WHERE id = ?", (args.job_id,)).fetchone()
conn.close()
if not row:
print(f"No job with id={args.job_id} in staging.db", file=sys.stderr)
sys.exit(1)
job = dict(row)
title = title or job.get("title", "")
company = company or job.get("company", "")
description = description or job.get("description", "")
if not title or not company:
parser.error("--title and --company are required (or use --job-id)")
letter = generate(title, company, description)
if args.output:
Path(args.output).write_text(letter)
print(f"Saved to {args.output}", file=sys.stderr)
else:
print(letter)
if __name__ == "__main__":
main()