Some checks failed
CI / test (pull_request) Failing after 1m16s
- Lower vue_ui_beta gate to "free" so all licensed users can access the new UI without a paid subscription - Remove "Paid tier" wording from the Try New UI banner - Fix Vue SPA navigation in cloud/demo deployments: add VITE_BASE_PATH build arg so Vite sets the correct subpath base, and pass import.meta.env.BASE_URL to createWebHistory() so router links emit /peregrine/... paths that Caddy can match - Fix feedback button missing on cloud instance by passing FORGEJO_API_TOKEN through compose.cloud.yml - Remove vLLM container from compose.yml (vLLM dropped from stack; cf-research service in cfcore covers the use case) - Fix cloud config path in Apply page (use get_config_dir() so per-user cloud data roots resolve correctly for user.yaml and resume YAML) - Refactor generate_cover_letter._build_system_context and _build_mission_notes to accept explicit profile arg (enables per-user cover letter generation in cloud multi-tenant mode) - Add API proxy block to nginx.conf (Vue web container can now call /api/ directly without Vite dev proxy) - Update .env.example: remove vLLM vars, add research model + tuning vars for external vLLM deployments - Update llm.yaml: switch vllm base_url to host.docker.internal (vLLM now runs outside Docker stack) Closes #63 (feedback button) Related: #8 (Vue SPA), #50–#62 (parity milestone)
365 lines
16 KiB
Python
365 lines
16 KiB
Python
# scripts/generate_cover_letter.py
|
|
"""
|
|
Generate a cover letter in the candidate's voice using few-shot examples from their corpus.
|
|
|
|
Usage:
|
|
conda run -n job-seeker python scripts/generate_cover_letter.py \
|
|
--title "Director of Customer Success" \
|
|
--company "Acme Corp" \
|
|
--description "We are looking for..."
|
|
|
|
Or pass a staging DB job ID:
|
|
conda run -n job-seeker python scripts/generate_cover_letter.py --job-id 42
|
|
"""
|
|
import argparse
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from scripts.user_profile import UserProfile
|
|
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
|
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
|
|
|
LETTERS_DIR = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
|
LETTER_GLOB = "*Cover Letter*.md"
|
|
|
|
# Background injected into every prompt so the model has the candidate's facts
|
|
def _build_system_context(profile=None) -> str:
|
|
p = profile or _profile
|
|
if not p:
|
|
return "You are a professional cover letter writer. Write in first person."
|
|
parts = [f"You are writing cover letters for {p.name}. {p.career_summary}"]
|
|
if p.candidate_voice:
|
|
parts.append(
|
|
f"Voice and personality: {p.candidate_voice} "
|
|
"Write in a way that reflects these authentic traits — not as a checklist, "
|
|
"but as a natural expression of who this person is."
|
|
)
|
|
return " ".join(parts)
|
|
|
|
SYSTEM_CONTEXT = _build_system_context()
|
|
|
|
|
|
# ── Mission-alignment detection ───────────────────────────────────────────────
|
|
# When a company/JD signals one of these preferred industries, the cover letter
|
|
# prompt injects a hint so Para 3 can reflect genuine personal connection.
|
|
# This does NOT disclose any personal disability or family information.
|
|
|
|
_MISSION_SIGNALS: dict[str, list[str]] = {
|
|
"music": [
|
|
"music", "spotify", "tidal", "soundcloud", "bandcamp", "apple music",
|
|
"distrokid", "cd baby", "landr", "beatport", "reverb", "vinyl",
|
|
"streaming", "artist", "label", "live nation", "ticketmaster", "aeg",
|
|
"songkick", "concert", "venue", "festival", "audio", "podcast",
|
|
"studio", "record", "musician", "playlist",
|
|
],
|
|
"animal_welfare": [
|
|
"animal", "shelter", "rescue", "humane society", "spca", "aspca",
|
|
"veterinary", "vet ", "wildlife", "pet ", "adoption", "foster",
|
|
"dog", "cat", "feline", "canine", "sanctuary", "zoo",
|
|
],
|
|
"education": [
|
|
"education", "school", "learning", "student", "edtech", "classroom",
|
|
"curriculum", "tutoring", "academic", "university", "kids", "children",
|
|
"youth", "literacy", "khan academy", "duolingo", "chegg", "coursera",
|
|
"instructure", "canvas lms", "clever", "district", "teacher",
|
|
"k-12", "k12", "grade", "pedagogy",
|
|
],
|
|
"social_impact": [
|
|
"nonprofit", "non-profit", "501(c)", "social impact", "mission-driven",
|
|
"public benefit", "community", "underserved", "equity", "justice",
|
|
"humanitarian", "advocacy", "charity", "foundation", "ngo",
|
|
"social good", "civic", "public health", "mental health", "food security",
|
|
"housing", "homelessness", "poverty", "workforce development",
|
|
],
|
|
# Health is listed last — it's a genuine but lower-priority connection than
|
|
# music/animals/education/social_impact. detect_mission_alignment returns on first
|
|
# match, so dict order = preference order.
|
|
"health": [
|
|
"patient", "patients", "healthcare", "health tech", "healthtech",
|
|
"pharma", "pharmaceutical", "clinical", "medical",
|
|
"hospital", "clinic", "therapy", "therapist",
|
|
"rare disease", "life sciences", "life science",
|
|
"treatment", "prescription", "biotech", "biopharma", "medtech",
|
|
"behavioral health", "population health",
|
|
"care management", "care coordination", "oncology", "specialty pharmacy",
|
|
"provider network", "payer", "health plan", "benefits administration",
|
|
"ehr", "emr", "fhir", "hipaa",
|
|
],
|
|
}
|
|
|
|
_candidate = _profile.name if _profile else "the candidate"
|
|
|
|
_MISSION_DEFAULTS: dict[str, str] = {
|
|
"music": (
|
|
f"This company is in the music industry — an industry {_candidate} finds genuinely "
|
|
"compelling. Para 3 should warmly and specifically reflect this authentic alignment, "
|
|
"not as a generic fan statement, but as an honest statement of where they'd love to "
|
|
"apply their skills."
|
|
),
|
|
"animal_welfare": (
|
|
f"This organization works in animal welfare/rescue — a mission {_candidate} finds "
|
|
"genuinely meaningful. Para 3 should reflect this authentic connection warmly and "
|
|
"specifically, tying their skills to this mission."
|
|
),
|
|
"education": (
|
|
f"This company works in education or EdTech — a domain that resonates with "
|
|
f"{_candidate}'s values. Para 3 should reflect this authentic connection specifically "
|
|
"and warmly."
|
|
),
|
|
"social_impact": (
|
|
f"This organization is mission-driven / social impact focused — exactly the kind of "
|
|
f"cause {_candidate} cares deeply about. Para 3 should warmly reflect their genuine "
|
|
"desire to apply their skills to work that makes a real difference in people's lives."
|
|
),
|
|
"health": (
|
|
f"This company works in healthcare, life sciences, or patient care. "
|
|
f"Do NOT write about {_candidate}'s passion for pharmaceuticals or healthcare as an "
|
|
"industry. Instead, Para 3 should reflect genuine care for the PEOPLE these companies "
|
|
"exist to serve — those navigating complex, often invisible, or unusual health journeys; "
|
|
"patients facing rare or poorly understood conditions; individuals whose situations don't "
|
|
"fit a clean category. The connection is to the humans behind the data, not the industry. "
|
|
"If the user has provided a personal note, use that to anchor Para 3 specifically."
|
|
),
|
|
}
|
|
|
|
|
|
def _build_mission_notes(profile=None, candidate_name: str | None = None) -> dict[str, str]:
|
|
"""Merge user's custom mission notes with generic defaults."""
|
|
p = profile or _profile
|
|
name = candidate_name or _candidate
|
|
prefs = p.mission_preferences if p else {}
|
|
notes = {}
|
|
for industry, default_note in _MISSION_DEFAULTS.items():
|
|
custom = (prefs.get(industry) or "").strip()
|
|
if custom:
|
|
notes[industry] = (
|
|
f"Mission alignment — {name} shared: \"{custom}\". "
|
|
"Para 3 should warmly and specifically reflect this authentic connection."
|
|
)
|
|
else:
|
|
notes[industry] = default_note
|
|
return notes
|
|
|
|
|
|
_MISSION_NOTES = _build_mission_notes()
|
|
|
|
|
|
def detect_mission_alignment(
|
|
company: str, description: str, mission_notes: dict | None = None
|
|
) -> str | None:
|
|
"""Return a mission hint string if company/JD matches a preferred industry, else None."""
|
|
notes = mission_notes if mission_notes is not None else _MISSION_NOTES
|
|
text = f"{company} {description}".lower()
|
|
for industry, signals in _MISSION_SIGNALS.items():
|
|
if any(sig in text for sig in signals):
|
|
return notes[industry]
|
|
return None
|
|
|
|
|
|
def load_corpus() -> list[dict]:
|
|
"""Load all .md cover letters from LETTERS_DIR. Returns list of {path, company, text}."""
|
|
corpus = []
|
|
for path in sorted(LETTERS_DIR.glob(LETTER_GLOB)):
|
|
text = path.read_text(encoding="utf-8", errors="ignore").strip()
|
|
if not text:
|
|
continue
|
|
# Extract company from filename: "Tailscale Cover Letter.md" → "Tailscale"
|
|
company = re.sub(r"\s*Cover Letter.*", "", path.stem, flags=re.IGNORECASE).strip()
|
|
corpus.append({"path": path, "company": company, "text": text})
|
|
return corpus
|
|
|
|
|
|
def find_similar_letters(job_description: str, corpus: list[dict], top_k: int = 3) -> list[dict]:
|
|
"""Return the top_k letters most similar to the job description by TF-IDF cosine sim."""
|
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
if not corpus:
|
|
return []
|
|
|
|
docs = [job_description] + [c["text"] for c in corpus]
|
|
vectorizer = TfidfVectorizer(stop_words="english", max_features=500)
|
|
tfidf = vectorizer.fit_transform(docs)
|
|
sims = cosine_similarity(tfidf[0:1], tfidf[1:])[0]
|
|
|
|
ranked = sorted(zip(sims, corpus), key=lambda x: x[0], reverse=True)
|
|
return [entry for _, entry in ranked[:top_k]]
|
|
|
|
|
|
def build_prompt(
|
|
title: str,
|
|
company: str,
|
|
description: str,
|
|
examples: list[dict],
|
|
mission_hint: str | None = None,
|
|
is_jobgether: bool = False,
|
|
system_context: str | None = None,
|
|
candidate_name: str | None = None,
|
|
) -> str:
|
|
ctx = system_context if system_context is not None else SYSTEM_CONTEXT
|
|
name = candidate_name or _candidate
|
|
parts = [ctx.strip(), ""]
|
|
if examples:
|
|
parts.append(f"=== STYLE EXAMPLES ({name}'s past letters) ===\n")
|
|
for i, ex in enumerate(examples, 1):
|
|
parts.append(f"--- Example {i} ({ex['company']}) ---")
|
|
parts.append(ex["text"])
|
|
parts.append("")
|
|
parts.append("=== END EXAMPLES ===\n")
|
|
|
|
if mission_hint:
|
|
parts.append(f"⭐ Mission alignment note (for Para 3): {mission_hint}\n")
|
|
|
|
if is_jobgether:
|
|
if company and company.lower() != "jobgether":
|
|
recruiter_note = (
|
|
f"🤝 Recruiter context: This listing is posted by Jobgether on behalf of "
|
|
f"{company}. Address the cover letter to the Jobgether recruiter, not directly "
|
|
f"to the hiring company. Use framing like 'Your client at {company} will "
|
|
f"appreciate...' rather than addressing {company} directly. The role "
|
|
f"requirements are those of the actual employer."
|
|
)
|
|
else:
|
|
recruiter_note = (
|
|
"🤝 Recruiter context: This listing is posted by Jobgether on behalf of an "
|
|
"undisclosed employer. Address the cover letter to the Jobgether recruiter. "
|
|
"Use framing like 'Your client will appreciate...' rather than addressing "
|
|
"the company directly."
|
|
)
|
|
parts.append(f"{recruiter_note}\n")
|
|
|
|
parts.append(f"Now write a new cover letter for:")
|
|
parts.append(f" Role: {title}")
|
|
parts.append(f" Company: {company}")
|
|
if description:
|
|
snippet = description[:1500].strip()
|
|
parts.append(f"\nJob description excerpt:\n{snippet}")
|
|
parts.append("\nWrite the full cover letter now:")
|
|
return "\n".join(parts)
|
|
|
|
|
|
def _trim_to_letter_end(text: str, profile=None) -> str:
|
|
"""Remove repetitive hallucinated content after the first complete sign-off.
|
|
|
|
Fine-tuned models sometimes loop after completing the letter. This cuts at
|
|
the first closing + candidate name so only the intended letter is saved.
|
|
"""
|
|
p = profile or _profile
|
|
candidate_first = (p.name.split()[0] if p else "").strip()
|
|
pattern = (
|
|
r'(?:Warm regards|Sincerely|Best regards|Kind regards|Thank you)[,.]?\s*\n+\s*'
|
|
+ (re.escape(candidate_first) if candidate_first else r'\w+(?:\s+\w+)?')
|
|
+ r'\b'
|
|
)
|
|
m = re.search(pattern, text, re.IGNORECASE)
|
|
if m:
|
|
return text[:m.end()].strip()
|
|
return text.strip()
|
|
|
|
|
|
def generate(
|
|
title: str,
|
|
company: str,
|
|
description: str = "",
|
|
previous_result: str = "",
|
|
feedback: str = "",
|
|
is_jobgether: bool = False,
|
|
_router=None,
|
|
config_path: "Path | None" = None,
|
|
user_yaml_path: "Path | None" = None,
|
|
) -> str:
|
|
"""Generate a cover letter and return it as a string.
|
|
|
|
Pass previous_result + feedback for iterative refinement — the prior draft
|
|
and requested changes are appended to the prompt so the LLM revises rather
|
|
than starting from scratch.
|
|
|
|
user_yaml_path overrides the module-level profile — required in cloud mode
|
|
so each user's name/voice/mission prefs are used instead of the global default.
|
|
|
|
_router is an optional pre-built LLMRouter (used in tests to avoid real LLM calls).
|
|
"""
|
|
# Per-call profile override (cloud mode: each user has their own user.yaml)
|
|
if user_yaml_path and Path(user_yaml_path).exists():
|
|
_prof = UserProfile(Path(user_yaml_path))
|
|
else:
|
|
_prof = _profile
|
|
|
|
sys_ctx = _build_system_context(_prof)
|
|
mission_notes = _build_mission_notes(_prof, candidate_name=(_prof.name if _prof else None))
|
|
candidate_name = _prof.name if _prof else _candidate
|
|
|
|
corpus = load_corpus()
|
|
examples = find_similar_letters(description or f"{title} {company}", corpus)
|
|
mission_hint = detect_mission_alignment(company, description, mission_notes=mission_notes)
|
|
if mission_hint:
|
|
print(f"[cover-letter] Mission alignment detected for {company}", file=sys.stderr)
|
|
prompt = build_prompt(title, company, description, examples,
|
|
mission_hint=mission_hint, is_jobgether=is_jobgether,
|
|
system_context=sys_ctx, candidate_name=candidate_name)
|
|
|
|
if previous_result:
|
|
prompt += f"\n\n---\nPrevious draft:\n{previous_result}"
|
|
if feedback:
|
|
prompt += f"\n\nUser feedback / requested changes:\n{feedback}\n\nPlease revise accordingly."
|
|
|
|
if _router is None:
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
from scripts.llm_router import LLMRouter, CONFIG_PATH
|
|
resolved = config_path if (config_path and Path(config_path).exists()) else CONFIG_PATH
|
|
_router = LLMRouter(resolved)
|
|
|
|
print(f"[cover-letter] Generating for: {title} @ {company}", file=sys.stderr)
|
|
print(f"[cover-letter] Style examples: {[e['company'] for e in examples]}", file=sys.stderr)
|
|
if feedback:
|
|
print("[cover-letter] Refinement mode: feedback provided", file=sys.stderr)
|
|
|
|
# max_tokens=1200 caps generation at ~900 words — enough for any cover letter
|
|
# and prevents fine-tuned models from looping into repetitive garbage output.
|
|
result = _router.complete(prompt, max_tokens=1200)
|
|
return _trim_to_letter_end(result, _prof)
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description=f"Generate a cover letter in {_candidate}'s voice")
|
|
parser.add_argument("--title", help="Job title")
|
|
parser.add_argument("--company", help="Company name")
|
|
parser.add_argument("--description", default="", help="Job description text")
|
|
parser.add_argument("--job-id", type=int, help="Load job from staging.db by ID")
|
|
parser.add_argument("--output", help="Write output to this file path")
|
|
args = parser.parse_args()
|
|
|
|
title, company, description = args.title, args.company, args.description
|
|
|
|
if args.job_id is not None:
|
|
from scripts.db import DEFAULT_DB
|
|
import sqlite3
|
|
conn = sqlite3.connect(DEFAULT_DB)
|
|
conn.row_factory = sqlite3.Row
|
|
row = conn.execute("SELECT * FROM jobs WHERE id = ?", (args.job_id,)).fetchone()
|
|
conn.close()
|
|
if not row:
|
|
print(f"No job with id={args.job_id} in staging.db", file=sys.stderr)
|
|
sys.exit(1)
|
|
job = dict(row)
|
|
title = title or job.get("title", "")
|
|
company = company or job.get("company", "")
|
|
description = description or job.get("description", "")
|
|
|
|
if not title or not company:
|
|
parser.error("--title and --company are required (or use --job-id)")
|
|
|
|
letter = generate(title, company, description)
|
|
|
|
if args.output:
|
|
Path(args.output).write_text(letter)
|
|
print(f"Saved to {args.output}", file=sys.stderr)
|
|
else:
|
|
print(letter)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|