diff --git a/config/mission_domains.yaml b/config/mission_domains.yaml new file mode 100644 index 0000000..69293dc --- /dev/null +++ b/config/mission_domains.yaml @@ -0,0 +1,258 @@ +# Mission domain signal configuration for cover letter generation. +# +# When a job description or company name matches signals in a domain, +# the cover letter prompt injects a Para 3 hint to reflect genuine personal +# alignment. Dict order = match priority (first match wins). +# +# Users can add custom domains under `mission_preferences` in user.yaml. +# Any key in mission_preferences that is NOT listed here is treated as a +# user-defined domain: no signal detection, custom note only (skipped if +# the job description doesn't contain the key as a literal word). +# +# Schema per domain: +# signals: list[str] — lowercase keywords to scan for in "company + JD" +# default_note: str — hint injected when user has no custom note for domain + +domains: + music: + signals: + - music + - spotify + - tidal + - soundcloud + - bandcamp + - apple music + - distrokid + - cd baby + - landr + - beatport + - reverb + - vinyl + - streaming + - artist + - label + - live nation + - ticketmaster + - aeg + - songkick + - concert + - venue + - festival + - audio + - podcast + - studio + - record + - musician + - playlist + default_note: > + This company is in the music industry — an industry the candidate finds genuinely + compelling. Para 3 should warmly and specifically reflect this authentic alignment, + not as a generic fan statement, but as an honest statement of where they'd love to + apply their skills. + + animal_welfare: + signals: + - animal + - shelter + - rescue + - humane society + - spca + - aspca + - veterinary + - "vet " + - wildlife + - "pet " + - adoption + - foster + - dog + - cat + - feline + - canine + - sanctuary + - zoo + default_note: > + This organization works in animal welfare/rescue — a mission the candidate finds + genuinely meaningful. Para 3 should reflect this authentic connection warmly and + specifically, tying their skills to this mission. + + education: + signals: + - education + - school + - learning + - student + - edtech + - classroom + - curriculum + - tutoring + - academic + - university + - kids + - children + - youth + - literacy + - khan academy + - duolingo + - chegg + - coursera + - instructure + - canvas lms + - clever + - district + - teacher + - k-12 + - k12 + - grade + - pedagogy + default_note: > + This company works in education or EdTech — a domain that resonates with the + candidate's values. Para 3 should reflect this authentic connection specifically + and warmly. + + social_impact: + signals: + - nonprofit + - non-profit + - "501(c)" + - social impact + - mission-driven + - public benefit + - community + - underserved + - equity + - justice + - humanitarian + - advocacy + - charity + - foundation + - ngo + - social good + - civic + - public health + - mental health + - food security + - housing + - homelessness + - poverty + - workforce development + default_note: > + This organization is mission-driven / social impact focused — exactly the kind of + cause the candidate cares deeply about. Para 3 should warmly reflect their genuine + desire to apply their skills to work that makes a real difference in people's lives. + + # Health listed last — genuine but lower-priority connection. + health: + signals: + - patient + - patients + - healthcare + - health tech + - healthtech + - pharma + - pharmaceutical + - clinical + - medical + - hospital + - clinic + - therapy + - therapist + - rare disease + - life sciences + - life science + - treatment + - prescription + - biotech + - biopharma + - medtech + - behavioral health + - population health + - care management + - care coordination + - oncology + - specialty pharmacy + - provider network + - payer + - health plan + - benefits administration + - ehr + - emr + - fhir + - hipaa + default_note: > + This company works in healthcare, life sciences, or patient care. + Do NOT write about the candidate's passion for pharmaceuticals or healthcare as an + industry. Instead, Para 3 should reflect genuine care for the PEOPLE these companies + exist to serve: those navigating complex, often invisible, or unusual health journeys; + patients facing rare or poorly understood conditions; individuals whose situations don't + fit a clean category. The connection is to the humans behind the data, not the industry. + If the user has provided a personal note, use that to anchor Para 3 specifically. + + # Extended domains — added 2026-04-12 + + privacy: + signals: + - privacy + - data rights + - surveillance + - gdpr + - ccpa + - anonymity + - end-to-end encryption + - open source + - decentralized + - self-hosted + - zero knowledge + - data sovereignty + - digital rights + - eff + - electronic frontier + default_note: > + This company operates in the privacy, data rights, or digital rights space — + a domain the candidate genuinely cares about. Para 3 should reflect their + authentic belief in user autonomy and data sovereignty, not as abstract principle + but as something that shapes how they approach their work. + + accessibility: + signals: + - accessibility + - assistive technology + - a11y + - wcag + - screen reader + - adaptive technology + - disability + - neurodivergent + - neurodiversity + - adhd + - autism + - inclusive design + - universal design + - accommodations + - ada compliance + default_note: > + This company works in accessibility or assistive technology — a mission the + candidate feels genuine, personal alignment with. Para 3 should reflect authentic + investment in building tools and systems that work for everyone, especially those + whose needs are most often overlooked in mainstream product development. + + open_source: + signals: + - open source + - open-source + - linux foundation + - apache foundation + - free software + - gnu + - contributor + - maintainer + - upstream + - community-driven + - innersource + - copyleft + - mozilla + - wikimedia + default_note: > + This organization is rooted in open source culture — a community the candidate + actively participates in and believes in. Para 3 should reflect genuine investment + in the collaborative, transparent, and community-driven approach to building + software that lasts. diff --git a/scripts/generate_cover_letter.py b/scripts/generate_cover_letter.py index 3067bdb..5aa732c 100644 --- a/scripts/generate_cover_letter.py +++ b/scripts/generate_cover_letter.py @@ -16,6 +16,8 @@ import re import sys from pathlib import Path +import yaml + sys.path.insert(0, str(Path(__file__).parent.parent)) from scripts.user_profile import UserProfile @@ -43,104 +45,53 @@ SYSTEM_CONTEXT = _build_system_context() # ── Mission-alignment detection ─────────────────────────────────────────────── -# When a company/JD signals one of these preferred industries, the cover letter -# prompt injects a hint so Para 3 can reflect genuine personal connection. +# Domains and their keyword signals are loaded from config/mission_domains.yaml. # This does NOT disclose any personal disability or family information. +_MISSION_DOMAINS_PATH = Path(__file__).parent.parent / "config" / "mission_domains.yaml" + + +def load_mission_domains(path: Path | None = None) -> dict[str, dict]: + """Load mission domain config from YAML. Returns dict keyed by domain name.""" + p = path or _MISSION_DOMAINS_PATH + if not p.exists(): + return {} + with p.open(encoding="utf-8") as fh: + data = yaml.safe_load(fh) + return data.get("domains", {}) if data else {} + + +_MISSION_DOMAINS: dict[str, dict] = load_mission_domains() _MISSION_SIGNALS: dict[str, list[str]] = { - "music": [ - "music", "spotify", "tidal", "soundcloud", "bandcamp", "apple music", - "distrokid", "cd baby", "landr", "beatport", "reverb", "vinyl", - "streaming", "artist", "label", "live nation", "ticketmaster", "aeg", - "songkick", "concert", "venue", "festival", "audio", "podcast", - "studio", "record", "musician", "playlist", - ], - "animal_welfare": [ - "animal", "shelter", "rescue", "humane society", "spca", "aspca", - "veterinary", "vet ", "wildlife", "pet ", "adoption", "foster", - "dog", "cat", "feline", "canine", "sanctuary", "zoo", - ], - "education": [ - "education", "school", "learning", "student", "edtech", "classroom", - "curriculum", "tutoring", "academic", "university", "kids", "children", - "youth", "literacy", "khan academy", "duolingo", "chegg", "coursera", - "instructure", "canvas lms", "clever", "district", "teacher", - "k-12", "k12", "grade", "pedagogy", - ], - "social_impact": [ - "nonprofit", "non-profit", "501(c)", "social impact", "mission-driven", - "public benefit", "community", "underserved", "equity", "justice", - "humanitarian", "advocacy", "charity", "foundation", "ngo", - "social good", "civic", "public health", "mental health", "food security", - "housing", "homelessness", "poverty", "workforce development", - ], - # Health is listed last — it's a genuine but lower-priority connection than - # music/animals/education/social_impact. detect_mission_alignment returns on first - # match, so dict order = preference order. - "health": [ - "patient", "patients", "healthcare", "health tech", "healthtech", - "pharma", "pharmaceutical", "clinical", "medical", - "hospital", "clinic", "therapy", "therapist", - "rare disease", "life sciences", "life science", - "treatment", "prescription", "biotech", "biopharma", "medtech", - "behavioral health", "population health", - "care management", "care coordination", "oncology", "specialty pharmacy", - "provider network", "payer", "health plan", "benefits administration", - "ehr", "emr", "fhir", "hipaa", - ], -} - -_candidate = _profile.name if _profile else "the candidate" - -_MISSION_DEFAULTS: dict[str, str] = { - "music": ( - f"This company is in the music industry — an industry {_candidate} finds genuinely " - "compelling. Para 3 should warmly and specifically reflect this authentic alignment, " - "not as a generic fan statement, but as an honest statement of where they'd love to " - "apply their skills." - ), - "animal_welfare": ( - f"This organization works in animal welfare/rescue — a mission {_candidate} finds " - "genuinely meaningful. Para 3 should reflect this authentic connection warmly and " - "specifically, tying their skills to this mission." - ), - "education": ( - f"This company works in education or EdTech — a domain that resonates with " - f"{_candidate}'s values. Para 3 should reflect this authentic connection specifically " - "and warmly." - ), - "social_impact": ( - f"This organization is mission-driven / social impact focused — exactly the kind of " - f"cause {_candidate} cares deeply about. Para 3 should warmly reflect their genuine " - "desire to apply their skills to work that makes a real difference in people's lives." - ), - "health": ( - f"This company works in healthcare, life sciences, or patient care. " - f"Do NOT write about {_candidate}'s passion for pharmaceuticals or healthcare as an " - "industry. Instead, Para 3 should reflect genuine care for the PEOPLE these companies " - "exist to serve — those navigating complex, often invisible, or unusual health journeys; " - "patients facing rare or poorly understood conditions; individuals whose situations don't " - "fit a clean category. The connection is to the humans behind the data, not the industry. " - "If the user has provided a personal note, use that to anchor Para 3 specifically." - ), + domain: cfg.get("signals", []) for domain, cfg in _MISSION_DOMAINS.items() } def _build_mission_notes(profile=None, candidate_name: str | None = None) -> dict[str, str]: - """Merge user's custom mission notes with generic defaults.""" + """Merge user's custom mission notes with YAML defaults. + + For domains defined in mission_domains.yaml the default_note is used when + the user has not provided a custom note in user.yaml mission_preferences. + + For user-defined domains (keys in mission_preferences that are NOT in the + YAML config), the custom note is used as-is; no signal detection applies. + """ p = profile or _profile - name = candidate_name or _candidate + name = candidate_name or (p.name if p else "the candidate") prefs = p.mission_preferences if p else {} - notes = {} - for industry, default_note in _MISSION_DEFAULTS.items(): - custom = (prefs.get(industry) or "").strip() + notes: dict[str, str] = {} + + for domain, cfg in _MISSION_DOMAINS.items(): + default_note = (cfg.get("default_note") or "").strip() + custom = (prefs.get(domain) or "").strip() if custom: - notes[industry] = ( + notes[domain] = ( f"Mission alignment — {name} shared: \"{custom}\". " "Para 3 should warmly and specifically reflect this authentic connection." ) else: - notes[industry] = default_note + notes[domain] = default_note + return notes @@ -150,12 +101,15 @@ _MISSION_NOTES = _build_mission_notes() def detect_mission_alignment( company: str, description: str, mission_notes: dict | None = None ) -> str | None: - """Return a mission hint string if company/JD matches a preferred industry, else None.""" + """Return a mission hint string if company/JD matches a configured domain, else None. + + Checks domains in YAML file order (dict order = match priority). + """ notes = mission_notes if mission_notes is not None else _MISSION_NOTES text = f"{company} {description}".lower() - for industry, signals in _MISSION_SIGNALS.items(): + for domain, signals in _MISSION_SIGNALS.items(): if any(sig in text for sig in signals): - return notes[industry] + return notes.get(domain) return None diff --git a/tests/test_mission_domains.py b/tests/test_mission_domains.py new file mode 100644 index 0000000..6d6ace9 --- /dev/null +++ b/tests/test_mission_domains.py @@ -0,0 +1,161 @@ +# tests/test_mission_domains.py +"""Tests for YAML-driven mission domain configuration.""" +import sys +from pathlib import Path + +import pytest +import yaml + +sys.path.insert(0, str(Path(__file__).parent.parent)) + + +# ── load_mission_domains ────────────────────────────────────────────────────── + +def test_load_mission_domains_returns_dict(tmp_path: Path) -> None: + """load_mission_domains parses a valid YAML file into a dict.""" + cfg = tmp_path / "mission_domains.yaml" + cfg.write_text( + "domains:\n" + " music:\n" + " signals: [music, spotify]\n" + " default_note: A music note.\n" + ) + from scripts.generate_cover_letter import load_mission_domains + result = load_mission_domains(cfg) + assert "music" in result + assert result["music"]["signals"] == ["music", "spotify"] + assert result["music"]["default_note"] == "A music note." + + +def test_load_mission_domains_missing_file_returns_empty(tmp_path: Path) -> None: + """load_mission_domains returns {} when the file does not exist.""" + from scripts.generate_cover_letter import load_mission_domains + result = load_mission_domains(tmp_path / "nonexistent.yaml") + assert result == {} + + +def test_load_mission_domains_empty_file_returns_empty(tmp_path: Path) -> None: + """load_mission_domains returns {} for a blank file.""" + cfg = tmp_path / "mission_domains.yaml" + cfg.write_text("") + from scripts.generate_cover_letter import load_mission_domains + result = load_mission_domains(cfg) + assert result == {} + + +# ── detect_mission_alignment ───────────────────────────────────────────────── + +def _make_signals(domains: dict[str, dict]) -> dict[str, list[str]]: + return {d: cfg.get("signals", []) for d, cfg in domains.items()} + + +def test_detect_returns_note_on_signal_match() -> None: + """detect_mission_alignment returns the domain note when a signal is present.""" + from scripts.generate_cover_letter import detect_mission_alignment + notes = {"music": "Music note here."} + result = detect_mission_alignment("Spotify", "We stream music worldwide.", notes) + assert result == "Music note here." + + +def test_detect_returns_none_on_no_match() -> None: + """detect_mission_alignment returns None when no signal matches.""" + from scripts.generate_cover_letter import detect_mission_alignment + notes = {"music": "Music note."} + result = detect_mission_alignment("Acme Corp", "We sell widgets.", notes) + assert result is None + + +def test_detect_is_case_insensitive() -> None: + """Signal matching is case-insensitive (text is lowercased before scan).""" + from scripts.generate_cover_letter import detect_mission_alignment + notes = {"animal_welfare": "Animal note."} + result = detect_mission_alignment("ASPCA", "We care for ANIMALS.", notes) + assert result == "Animal note." + + +def test_detect_uses_default_mission_notes_when_none_passed() -> None: + """detect_mission_alignment uses module-level _MISSION_NOTES when notes=None.""" + from scripts.generate_cover_letter import detect_mission_alignment, _MISSION_DOMAINS + if "music" not in _MISSION_DOMAINS: + pytest.skip("music domain not present in loaded config") + result = detect_mission_alignment("Spotify", "We build music streaming products.") + assert result is not None + assert len(result) > 10 # some non-empty hint + + +# ── _build_mission_notes ───────────────────────────────────────────────────── + +def test_build_mission_notes_uses_default_when_no_custom(tmp_path: Path) -> None: + """_build_mission_notes uses YAML default_note when user has no custom note.""" + cfg = tmp_path / "mission_domains.yaml" + cfg.write_text( + "domains:\n" + " music:\n" + " signals: [music]\n" + " default_note: Generic music note.\n" + ) + + class EmptyProfile: + name = "Test User" + mission_preferences: dict = {} + + from scripts.generate_cover_letter import load_mission_domains, _build_mission_notes + import scripts.generate_cover_letter as gcl + domains_orig = gcl._MISSION_DOMAINS + signals_orig = gcl._MISSION_SIGNALS + try: + gcl._MISSION_DOMAINS = load_mission_domains(cfg) + gcl._MISSION_SIGNALS = _make_signals(gcl._MISSION_DOMAINS) + notes = _build_mission_notes(profile=EmptyProfile()) + assert notes["music"] == "Generic music note." + finally: + gcl._MISSION_DOMAINS = domains_orig + gcl._MISSION_SIGNALS = signals_orig + + +def test_build_mission_notes_uses_custom_note_when_provided(tmp_path: Path) -> None: + """_build_mission_notes wraps user's custom note in a prompt hint.""" + cfg = tmp_path / "mission_domains.yaml" + cfg.write_text( + "domains:\n" + " music:\n" + " signals: [music]\n" + " default_note: Default.\n" + ) + + class FakeProfile: + name = "Alex" + mission_preferences = {"music": "I played guitar for 10 years."} + + from scripts.generate_cover_letter import load_mission_domains, _build_mission_notes + import scripts.generate_cover_letter as gcl + domains_orig = gcl._MISSION_DOMAINS + signals_orig = gcl._MISSION_SIGNALS + try: + gcl._MISSION_DOMAINS = load_mission_domains(cfg) + gcl._MISSION_SIGNALS = _make_signals(gcl._MISSION_DOMAINS) + notes = _build_mission_notes(profile=FakeProfile()) + assert "I played guitar for 10 years." in notes["music"] + assert "Alex" in notes["music"] + finally: + gcl._MISSION_DOMAINS = domains_orig + gcl._MISSION_SIGNALS = signals_orig + + +# ── committed config sanity checks ─────────────────────────────────────────── + +def test_committed_config_has_required_domains() -> None: + """The committed mission_domains.yaml contains the original 4 domains + 3 new ones.""" + from scripts.generate_cover_letter import _MISSION_DOMAINS + required = {"music", "animal_welfare", "education", "social_impact", "health", + "privacy", "accessibility", "open_source"} + missing = required - set(_MISSION_DOMAINS.keys()) + assert not missing, f"Missing domains in committed config: {missing}" + + +def test_committed_config_each_domain_has_signals_and_note() -> None: + """Every domain in the committed config has a non-empty signals list and default_note.""" + from scripts.generate_cover_letter import _MISSION_DOMAINS + for domain, cfg in _MISSION_DOMAINS.items(): + assert cfg.get("signals"), f"Domain '{domain}' has no signals" + assert cfg.get("default_note", "").strip(), f"Domain '{domain}' has no default_note"