feat: add suggest_search_terms with three-angle exclude analysis
Replaces NotImplementedError stub with a real LLMRouter-backed implementation that builds a structured prompt covering blocklist alias expansion, values misalignment, and role-type filtering, then parses the JSON response into suggested_titles and suggested_excludes lists. Moves LLMRouter import to module level so tests can patch it at scripts.suggest_helpers.LLMRouter.
This commit is contained in:
parent
efe71150e3
commit
5f1c372c0a
2 changed files with 223 additions and 0 deletions
126
scripts/suggest_helpers.py
Normal file
126
scripts/suggest_helpers.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
"""
|
||||
LLM-powered suggestion helpers for Settings UI.
|
||||
Two functions, each makes one LLMRouter call:
|
||||
- suggest_search_terms: enhanced title + three-angle exclude suggestions
|
||||
- suggest_resume_keywords: skills/domains/keywords gap analysis
|
||||
"""
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from scripts.llm_router import LLMRouter
|
||||
|
||||
|
||||
def _load_resume_context(resume_path: Path) -> str:
|
||||
"""Extract 3 most recent positions from plain_text_resume.yaml as a short summary."""
|
||||
import yaml
|
||||
if not resume_path.exists():
|
||||
return ""
|
||||
resume = yaml.safe_load(resume_path.read_text()) or {}
|
||||
lines = []
|
||||
for exp in (resume.get("experience_details") or [])[:3]:
|
||||
pos = exp.get("position", "")
|
||||
co = exp.get("company", "")
|
||||
skills = ", ".join((exp.get("skills_acquired") or [])[:5])
|
||||
lines.append(f"- {pos} at {co}: {skills}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _parse_json(text: str) -> dict[str, Any]:
|
||||
"""Extract the first JSON object from LLM output. Returns {} on failure."""
|
||||
m = re.search(r"\{.*\}", text, re.DOTALL)
|
||||
if m:
|
||||
try:
|
||||
return json.loads(m.group())
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
|
||||
def suggest_search_terms(
|
||||
current_titles: list[str],
|
||||
resume_path: Path,
|
||||
blocklist: dict[str, Any],
|
||||
user_profile: dict[str, Any],
|
||||
) -> dict:
|
||||
"""
|
||||
Suggest additional job titles and exclude keywords.
|
||||
|
||||
Three-angle exclude analysis:
|
||||
A: Blocklist alias expansion (blocked companies/industries → keyword variants)
|
||||
B: Values misalignment (mission preferences → industries/culture to avoid)
|
||||
C: Role-type filter (career summary → role types that don't fit)
|
||||
|
||||
Returns: {"suggested_titles": [...], "suggested_excludes": [...]}
|
||||
"""
|
||||
resume_context = _load_resume_context(resume_path)
|
||||
titles_str = "\n".join(f"- {t}" for t in current_titles) or "(none yet)"
|
||||
|
||||
bl_companies = ", ".join(blocklist.get("companies", [])) or "none"
|
||||
bl_industries = ", ".join(blocklist.get("industries", [])) or "none"
|
||||
nda = ", ".join(user_profile.get("nda_companies", [])) or "none"
|
||||
career_summary = user_profile.get("career_summary", "") or "Not provided"
|
||||
mission_raw = user_profile.get("mission_preferences", {}) or {}
|
||||
mission_str = "\n".join(
|
||||
f" - {k}: {v}" for k, v in mission_raw.items() if v and v.strip()
|
||||
) or " (none specified)"
|
||||
|
||||
prompt = f"""You are helping a job seeker optimise their search configuration.
|
||||
|
||||
--- RESUME BACKGROUND ---
|
||||
{resume_context or "Not provided"}
|
||||
|
||||
--- CAREER SUMMARY ---
|
||||
{career_summary}
|
||||
|
||||
--- CURRENT TITLES BEING SEARCHED ---
|
||||
{titles_str}
|
||||
|
||||
--- BLOCKED ENTITIES ---
|
||||
Companies blocked: {bl_companies}
|
||||
Industries blocked: {bl_industries}
|
||||
NDA / confidential employers: {nda}
|
||||
|
||||
--- MISSION & VALUES ---
|
||||
{mission_str}
|
||||
|
||||
Provide all four of the following:
|
||||
|
||||
1. TITLE SUGGESTIONS
|
||||
5-8 additional job titles they may be missing: alternative names, adjacent roles, or senior variants of their current titles.
|
||||
|
||||
2. EXCLUDE KEYWORDS — BLOCKLIST ALIASES
|
||||
The user has blocked the companies/industries above. Suggest keyword variants that would also catch their aliases, subsidiaries, or related brands.
|
||||
Example: blocking "Meta" → also exclude "facebook", "instagram", "metaverse", "oculus".
|
||||
|
||||
3. EXCLUDE KEYWORDS — VALUES MISALIGNMENT
|
||||
Based on the user's mission and values above, suggest industry or culture keywords to exclude.
|
||||
Examples: "tobacco", "gambling", "fossil fuel", "defense contractor", "MLM", "commission-only", "pyramid".
|
||||
|
||||
4. EXCLUDE KEYWORDS — ROLE TYPE FILTER
|
||||
Based on the user's career background, suggest role-type terms that don't match their trajectory.
|
||||
Examples for a CS/TAM leader: "cold calling", "door to door", "quota-driven", "SDR", "sales development rep".
|
||||
|
||||
Return ONLY valid JSON in exactly this format (no extra text):
|
||||
{{"suggested_titles": ["Title 1", "Title 2"],
|
||||
"suggested_excludes": ["keyword 1", "keyword 2", "keyword 3"]}}"""
|
||||
|
||||
raw = LLMRouter().complete(prompt).strip()
|
||||
parsed = _parse_json(raw)
|
||||
return {
|
||||
"suggested_titles": parsed.get("suggested_titles", []),
|
||||
"suggested_excludes": parsed.get("suggested_excludes", []),
|
||||
}
|
||||
|
||||
|
||||
def suggest_resume_keywords(
|
||||
resume_path: Path,
|
||||
current_kw: dict[str, list[str]],
|
||||
) -> dict:
|
||||
"""
|
||||
Suggest skills, domains, and keywords not already in the user's resume_keywords.yaml.
|
||||
|
||||
Returns: {"skills": [...], "domains": [...], "keywords": [...]}
|
||||
"""
|
||||
raise NotImplementedError
|
||||
97
tests/test_suggest_helpers.py
Normal file
97
tests/test_suggest_helpers.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
"""Tests for scripts/suggest_helpers.py."""
|
||||
import json
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
RESUME_PATH = Path(__file__).parent.parent / "config" / "plain_text_resume.yaml"
|
||||
|
||||
|
||||
# ── _parse_json ───────────────────────────────────────────────────────────────
|
||||
|
||||
def test_parse_json_extracts_valid_object():
|
||||
from scripts.suggest_helpers import _parse_json
|
||||
raw = 'Here is the result: {"a": [1, 2], "b": "hello"} done.'
|
||||
assert _parse_json(raw) == {"a": [1, 2], "b": "hello"}
|
||||
|
||||
|
||||
def test_parse_json_returns_empty_on_invalid():
|
||||
from scripts.suggest_helpers import _parse_json
|
||||
assert _parse_json("no json here") == {}
|
||||
assert _parse_json('{"broken": ') == {}
|
||||
|
||||
|
||||
# ── suggest_search_terms ──────────────────────────────────────────────────────
|
||||
|
||||
BLOCKLIST = {
|
||||
"companies": ["Meta", "Amazon"],
|
||||
"industries": ["gambling"],
|
||||
"locations": [],
|
||||
}
|
||||
USER_PROFILE = {
|
||||
"career_summary": "Customer success leader with 10 years in B2B SaaS.",
|
||||
"mission_preferences": {
|
||||
"animal_welfare": "I volunteer at my local shelter.",
|
||||
"education": "",
|
||||
},
|
||||
"nda_companies": ["Acme Corp"],
|
||||
}
|
||||
|
||||
|
||||
def _mock_llm(response_dict: dict):
|
||||
"""Return a patcher that makes LLMRouter().complete() return a JSON string."""
|
||||
mock_router = MagicMock()
|
||||
mock_router.complete.return_value = json.dumps(response_dict)
|
||||
return patch("scripts.suggest_helpers.LLMRouter", return_value=mock_router)
|
||||
|
||||
|
||||
def test_suggest_search_terms_returns_titles_and_excludes():
|
||||
from scripts.suggest_helpers import suggest_search_terms
|
||||
payload = {"suggested_titles": ["VP Customer Success"], "suggested_excludes": ["cold calling"]}
|
||||
with _mock_llm(payload):
|
||||
result = suggest_search_terms(["Customer Success Manager"], RESUME_PATH, BLOCKLIST, USER_PROFILE)
|
||||
assert result["suggested_titles"] == ["VP Customer Success"]
|
||||
assert result["suggested_excludes"] == ["cold calling"]
|
||||
|
||||
|
||||
def test_suggest_search_terms_prompt_contains_blocklist_companies():
|
||||
from scripts.suggest_helpers import suggest_search_terms
|
||||
with _mock_llm({"suggested_titles": [], "suggested_excludes": []}) as mock_cls:
|
||||
suggest_search_terms(["CSM"], RESUME_PATH, BLOCKLIST, USER_PROFILE)
|
||||
prompt_sent = mock_cls.return_value.complete.call_args[0][0]
|
||||
assert "Meta" in prompt_sent
|
||||
assert "Amazon" in prompt_sent
|
||||
|
||||
|
||||
def test_suggest_search_terms_prompt_contains_mission():
|
||||
from scripts.suggest_helpers import suggest_search_terms
|
||||
with _mock_llm({"suggested_titles": [], "suggested_excludes": []}) as mock_cls:
|
||||
suggest_search_terms(["CSM"], RESUME_PATH, BLOCKLIST, USER_PROFILE)
|
||||
prompt_sent = mock_cls.return_value.complete.call_args[0][0]
|
||||
assert "animal_welfare" in prompt_sent or "animal welfare" in prompt_sent.lower()
|
||||
|
||||
|
||||
def test_suggest_search_terms_prompt_contains_career_summary():
|
||||
from scripts.suggest_helpers import suggest_search_terms
|
||||
with _mock_llm({"suggested_titles": [], "suggested_excludes": []}) as mock_cls:
|
||||
suggest_search_terms(["CSM"], RESUME_PATH, BLOCKLIST, USER_PROFILE)
|
||||
prompt_sent = mock_cls.return_value.complete.call_args[0][0]
|
||||
assert "Customer success leader" in prompt_sent
|
||||
|
||||
|
||||
def test_suggest_search_terms_returns_empty_on_bad_json():
|
||||
from scripts.suggest_helpers import suggest_search_terms
|
||||
mock_router = MagicMock()
|
||||
mock_router.complete.return_value = "sorry, I cannot help with that"
|
||||
with patch("scripts.suggest_helpers.LLMRouter", return_value=mock_router):
|
||||
result = suggest_search_terms(["CSM"], RESUME_PATH, BLOCKLIST, USER_PROFILE)
|
||||
assert result == {"suggested_titles": [], "suggested_excludes": []}
|
||||
|
||||
|
||||
def test_suggest_search_terms_raises_on_llm_exhausted():
|
||||
from scripts.suggest_helpers import suggest_search_terms
|
||||
mock_router = MagicMock()
|
||||
mock_router.complete.side_effect = RuntimeError("All LLM backends exhausted")
|
||||
with patch("scripts.suggest_helpers.LLMRouter", return_value=mock_router):
|
||||
with pytest.raises(RuntimeError, match="All LLM backends exhausted"):
|
||||
suggest_search_terms(["CSM"], RESUME_PATH, BLOCKLIST, USER_PROFILE)
|
||||
Loading…
Reference in a new issue