feat(api): add job ranker and credential store scripts
- scripts/job_ranker.py: two-stage rank pipeline for /api/jobs/stack endpoint; scores pending jobs by match_score + seniority signals - scripts/credential_store.py: per-user credential management (BYOK API keys, email passwords); used by dev_api settings endpoints
This commit is contained in:
parent
ee66b6b235
commit
faa1807e96
2 changed files with 511 additions and 0 deletions
198
scripts/credential_store.py
Normal file
198
scripts/credential_store.py
Normal file
|
|
@ -0,0 +1,198 @@
|
||||||
|
"""
|
||||||
|
Credential store abstraction for Peregrine.
|
||||||
|
|
||||||
|
Backends (set via CREDENTIAL_BACKEND env var):
|
||||||
|
auto → try keyring, fall back to file (default)
|
||||||
|
keyring → python-keyring (OS Keychain / SecretService / libsecret)
|
||||||
|
file → Fernet-encrypted JSON in config/credentials/ (key at config/.credential_key)
|
||||||
|
|
||||||
|
Env var references:
|
||||||
|
Any stored value matching ${VAR_NAME} is resolved from os.environ at read time.
|
||||||
|
Users can store "${IMAP_PASSWORD}" as the credential value; it is never treated
|
||||||
|
as the actual secret — only the env var it points to is used.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_ENV_REF = re.compile(r'^\$\{([A-Z_][A-Z0-9_]*)\}$')
|
||||||
|
|
||||||
|
_PROJECT_ROOT = Path(__file__).parent.parent
|
||||||
|
CRED_DIR = _PROJECT_ROOT / "config" / "credentials"
|
||||||
|
KEY_PATH = _PROJECT_ROOT / "config" / ".credential_key"
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_env_ref(value: str) -> Optional[str]:
|
||||||
|
"""If value is ${VAR_NAME}, return os.environ[VAR_NAME]; otherwise return None."""
|
||||||
|
m = _ENV_REF.match(value)
|
||||||
|
if m:
|
||||||
|
resolved = os.environ.get(m.group(1))
|
||||||
|
if resolved is None:
|
||||||
|
logger.warning("Credential reference %s is set but env var is not defined", value)
|
||||||
|
return resolved
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_backend() -> str:
|
||||||
|
backend = os.environ.get("CREDENTIAL_BACKEND", "auto").lower()
|
||||||
|
if backend != "auto":
|
||||||
|
return backend
|
||||||
|
# Auto: try keyring, fall back to file
|
||||||
|
try:
|
||||||
|
import keyring
|
||||||
|
kr = keyring.get_keyring()
|
||||||
|
# Reject the null/fail keyring — it can't actually store anything
|
||||||
|
if "fail" in type(kr).__name__.lower() or "null" in type(kr).__name__.lower():
|
||||||
|
raise RuntimeError("No usable keyring backend found")
|
||||||
|
return "keyring"
|
||||||
|
except Exception:
|
||||||
|
return "file"
|
||||||
|
|
||||||
|
|
||||||
|
def _get_fernet():
|
||||||
|
"""Return a Fernet instance, auto-generating the key on first use."""
|
||||||
|
try:
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
|
except ImportError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if KEY_PATH.exists():
|
||||||
|
key = KEY_PATH.read_bytes().strip()
|
||||||
|
else:
|
||||||
|
key = Fernet.generate_key()
|
||||||
|
KEY_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
fd = os.open(str(KEY_PATH), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
||||||
|
with os.fdopen(fd, "wb") as f:
|
||||||
|
f.write(key)
|
||||||
|
logger.info("Generated new credential encryption key at %s", KEY_PATH)
|
||||||
|
|
||||||
|
return Fernet(key)
|
||||||
|
|
||||||
|
|
||||||
|
def _file_read(service: str) -> dict:
|
||||||
|
"""Read the credentials file for a service, decrypting if possible."""
|
||||||
|
cred_file = CRED_DIR / f"{service}.json"
|
||||||
|
if not cred_file.exists():
|
||||||
|
return {}
|
||||||
|
raw = cred_file.read_bytes()
|
||||||
|
fernet = _get_fernet()
|
||||||
|
if fernet:
|
||||||
|
try:
|
||||||
|
return json.loads(fernet.decrypt(raw))
|
||||||
|
except Exception:
|
||||||
|
# May be an older plaintext file — try reading as text
|
||||||
|
try:
|
||||||
|
return json.loads(raw.decode())
|
||||||
|
except Exception:
|
||||||
|
logger.error("Failed to read credentials for service %s", service)
|
||||||
|
return {}
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
return json.loads(raw.decode())
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _file_write(service: str, data: dict) -> None:
|
||||||
|
"""Write the credentials file for a service, encrypting if possible."""
|
||||||
|
CRED_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
cred_file = CRED_DIR / f"{service}.json"
|
||||||
|
fernet = _get_fernet()
|
||||||
|
if fernet:
|
||||||
|
content = fernet.encrypt(json.dumps(data).encode())
|
||||||
|
fd = os.open(str(cred_file), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
||||||
|
with os.fdopen(fd, "wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"cryptography package not installed — storing credentials as plaintext with chmod 600. "
|
||||||
|
"Install with: pip install cryptography"
|
||||||
|
)
|
||||||
|
content = json.dumps(data).encode()
|
||||||
|
fd = os.open(str(cred_file), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
||||||
|
with os.fdopen(fd, "wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
|
||||||
|
def get_credential(service: str, key: str) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Retrieve a credential. If the stored value is an env var reference (${VAR}),
|
||||||
|
resolves it from os.environ at call time.
|
||||||
|
"""
|
||||||
|
backend = _get_backend()
|
||||||
|
raw: Optional[str] = None
|
||||||
|
|
||||||
|
if backend == "keyring":
|
||||||
|
try:
|
||||||
|
import keyring
|
||||||
|
raw = keyring.get_password(service, key)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("keyring get failed for %s/%s: %s", service, key, e)
|
||||||
|
else: # file
|
||||||
|
data = _file_read(service)
|
||||||
|
raw = data.get(key)
|
||||||
|
|
||||||
|
if raw is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Resolve env var references transparently
|
||||||
|
resolved = _resolve_env_ref(raw)
|
||||||
|
if resolved is not None:
|
||||||
|
return resolved
|
||||||
|
if _ENV_REF.match(raw):
|
||||||
|
return None # reference defined but env var not set
|
||||||
|
|
||||||
|
return raw
|
||||||
|
|
||||||
|
|
||||||
|
def set_credential(service: str, key: str, value: str) -> None:
|
||||||
|
"""
|
||||||
|
Store a credential. Value may be a literal secret or a ${VAR_NAME} reference.
|
||||||
|
Env var references are stored as-is and resolved at get time.
|
||||||
|
"""
|
||||||
|
if not value:
|
||||||
|
return
|
||||||
|
|
||||||
|
backend = _get_backend()
|
||||||
|
|
||||||
|
if backend == "keyring":
|
||||||
|
try:
|
||||||
|
import keyring
|
||||||
|
keyring.set_password(service, key, value)
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("keyring set failed for %s/%s: %s — falling back to file", service, key, e)
|
||||||
|
backend = "file"
|
||||||
|
|
||||||
|
# file backend
|
||||||
|
data = _file_read(service)
|
||||||
|
data[key] = value
|
||||||
|
_file_write(service, data)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_credential(service: str, key: str) -> None:
|
||||||
|
"""Remove a stored credential."""
|
||||||
|
backend = _get_backend()
|
||||||
|
|
||||||
|
if backend == "keyring":
|
||||||
|
try:
|
||||||
|
import keyring
|
||||||
|
keyring.delete_password(service, key)
|
||||||
|
return
|
||||||
|
except Exception:
|
||||||
|
backend = "file"
|
||||||
|
|
||||||
|
data = _file_read(service)
|
||||||
|
data.pop(key, None)
|
||||||
|
if data:
|
||||||
|
_file_write(service, data)
|
||||||
|
else:
|
||||||
|
cred_file = CRED_DIR / f"{service}.json"
|
||||||
|
if cred_file.exists():
|
||||||
|
cred_file.unlink()
|
||||||
313
scripts/job_ranker.py
Normal file
313
scripts/job_ranker.py
Normal file
|
|
@ -0,0 +1,313 @@
|
||||||
|
"""Job ranking engine — two-stage discovery → review pipeline.
|
||||||
|
|
||||||
|
Stage 1 (discover.py) scrapes a wide corpus and stores everything as 'pending'.
|
||||||
|
Stage 2 (this module) scores the corpus; GET /api/jobs/stack returns top-N best
|
||||||
|
matches for the user's current review session.
|
||||||
|
|
||||||
|
All signal functions return a float in [0, 1]. The final stack_score is 0–100.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from scripts.job_ranker import rank_jobs
|
||||||
|
ranked = rank_jobs(jobs, search_titles, salary_min, salary_max, user_level)
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
import re
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
|
||||||
|
# ── TUNING ─────────────────────────────────────────────────────────────────────
|
||||||
|
# Adjust these constants to change how jobs are ranked.
|
||||||
|
# All individual signal scores are normalised to [0, 1] before weighting.
|
||||||
|
# Weights should sum to ≤ 1.0; the remainder is unallocated slack.
|
||||||
|
|
||||||
|
W_RESUME_MATCH = 0.40 # TF-IDF cosine similarity stored as match_score (0–100 → 0–1)
|
||||||
|
W_TITLE_MATCH = 0.30 # seniority-aware title + domain keyword overlap
|
||||||
|
W_RECENCY = 0.15 # freshness — exponential decay from date_found
|
||||||
|
W_SALARY_FIT = 0.10 # salary range overlap vs user target (neutral when unknown)
|
||||||
|
W_DESC_QUALITY = 0.05 # posting completeness — penalises stub / ghost posts
|
||||||
|
|
||||||
|
# Keyword gap penalty: each missing keyword from the resume match costs points.
|
||||||
|
# Gaps are already partially captured by W_RESUME_MATCH (same TF-IDF source),
|
||||||
|
# so this is a soft nudge, not a hard filter.
|
||||||
|
GAP_PENALTY_PER_KEYWORD: float = 0.5 # points off per gap keyword (0–100 scale)
|
||||||
|
GAP_MAX_PENALTY: float = 5.0 # hard cap so a gap-heavy job can still rank
|
||||||
|
|
||||||
|
# Recency half-life: score halves every N days past date_found
|
||||||
|
RECENCY_HALF_LIFE: int = 7 # days
|
||||||
|
|
||||||
|
# Description word-count thresholds
|
||||||
|
DESC_MIN_WORDS: int = 50 # below this → scaled penalty
|
||||||
|
DESC_TARGET_WORDS: int = 200 # at or above → full quality score
|
||||||
|
# ── END TUNING ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
# ── Seniority level map ────────────────────────────────────────────────────────
|
||||||
|
# (level, [keyword substrings that identify that level])
|
||||||
|
# Matched on " <lower_title> " with a space-padded check to avoid false hits.
|
||||||
|
# Level 3 is the default (mid-level, no seniority modifier in title).
|
||||||
|
_SENIORITY_MAP: list[tuple[int, list[str]]] = [
|
||||||
|
(1, ["intern", "internship", "trainee", "apprentice", "co-op", "coop"]),
|
||||||
|
(2, ["entry level", "entry-level", "junior", "jr ", "jr.", "associate "]),
|
||||||
|
(3, ["mid level", "mid-level", "intermediate"]),
|
||||||
|
(4, ["senior ", "senior,", "sr ", "sr.", " lead ", "lead,", " ii ", " iii ",
|
||||||
|
"specialist", "experienced"]),
|
||||||
|
(5, ["staff ", "principal ", "architect ", "expert ", "distinguished"]),
|
||||||
|
(6, ["director", "head of ", "manager ", "vice president", " vp "]),
|
||||||
|
(7, ["chief", "cto", "cio", "cpo", "president", "founder"]),
|
||||||
|
]
|
||||||
|
|
||||||
|
# job_level − user_level → scoring multiplier
|
||||||
|
# Positive delta = job is more senior (stretch up = encouraged)
|
||||||
|
# Negative delta = job is below the user's level
|
||||||
|
_LEVEL_MULTIPLIER: dict[int, float] = {
|
||||||
|
-4: 0.05, -3: 0.10, -2: 0.25, -1: 0.65,
|
||||||
|
0: 1.00,
|
||||||
|
1: 0.90, 2: 0.65, 3: 0.25, 4: 0.05,
|
||||||
|
}
|
||||||
|
_DEFAULT_LEVEL_MULTIPLIER = 0.05
|
||||||
|
|
||||||
|
|
||||||
|
# ── Seniority helpers ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def infer_seniority(title: str) -> int:
|
||||||
|
"""Return seniority level 1–7 for a job or resume title. Defaults to 3."""
|
||||||
|
padded = f" {title.lower()} "
|
||||||
|
# Iterate highest → lowest so "Senior Lead" resolves to 4, not 6
|
||||||
|
for level, keywords in reversed(_SENIORITY_MAP):
|
||||||
|
for kw in keywords:
|
||||||
|
if kw in padded:
|
||||||
|
return level
|
||||||
|
return 3
|
||||||
|
|
||||||
|
|
||||||
|
def seniority_from_experience(titles: list[str]) -> int:
|
||||||
|
"""Estimate user's current level from their most recent experience titles.
|
||||||
|
|
||||||
|
Averages the levels of the top-3 most recent titles (first in the list).
|
||||||
|
Falls back to 3 (mid-level) if no titles are provided.
|
||||||
|
"""
|
||||||
|
if not titles:
|
||||||
|
return 3
|
||||||
|
sample = [t for t in titles if t.strip()][:3]
|
||||||
|
if not sample:
|
||||||
|
return 3
|
||||||
|
levels = [infer_seniority(t) for t in sample]
|
||||||
|
return round(sum(levels) / len(levels))
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_level_words(text: str) -> str:
|
||||||
|
"""Remove seniority/modifier words so domain keywords stand out."""
|
||||||
|
strip = {
|
||||||
|
"senior", "sr", "junior", "jr", "lead", "staff", "principal",
|
||||||
|
"associate", "entry", "mid", "intermediate", "experienced",
|
||||||
|
"director", "head", "manager", "architect", "chief", "intern",
|
||||||
|
"ii", "iii", "iv", "i",
|
||||||
|
}
|
||||||
|
return " ".join(w for w in text.lower().split() if w not in strip)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Signal functions ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def title_match_score(job_title: str, search_titles: list[str], user_level: int) -> float:
|
||||||
|
"""Seniority-aware title similarity in [0, 1].
|
||||||
|
|
||||||
|
Combines:
|
||||||
|
- Domain overlap: keyword intersection between job title and search titles
|
||||||
|
after stripping level modifiers (so "Senior Software Engineer" vs
|
||||||
|
"Software Engineer" compares only on "software engineer").
|
||||||
|
- Seniority multiplier: rewards same-level and +1 stretch; penalises
|
||||||
|
large downgrade or unreachable stretch.
|
||||||
|
"""
|
||||||
|
if not search_titles:
|
||||||
|
return 0.5 # neutral — user hasn't set title prefs yet
|
||||||
|
|
||||||
|
job_level = infer_seniority(job_title)
|
||||||
|
level_delta = job_level - user_level
|
||||||
|
seniority_factor = _LEVEL_MULTIPLIER.get(level_delta, _DEFAULT_LEVEL_MULTIPLIER)
|
||||||
|
|
||||||
|
job_core_words = {w for w in _strip_level_words(job_title).split() if len(w) > 2}
|
||||||
|
|
||||||
|
best_domain = 0.0
|
||||||
|
for st in search_titles:
|
||||||
|
st_core_words = {w for w in _strip_level_words(st).split() if len(w) > 2}
|
||||||
|
if not st_core_words:
|
||||||
|
continue
|
||||||
|
# Recall-biased overlap: what fraction of the search title keywords
|
||||||
|
# appear in the job title? (A job posting may use synonyms but we
|
||||||
|
# at least want the core nouns to match.)
|
||||||
|
overlap = len(st_core_words & job_core_words) / len(st_core_words)
|
||||||
|
best_domain = max(best_domain, overlap)
|
||||||
|
|
||||||
|
# Base score from domain match scaled by seniority appropriateness.
|
||||||
|
# A small seniority_factor bonus (×0.2) ensures that even a near-miss
|
||||||
|
# domain match still benefits from seniority alignment.
|
||||||
|
return min(1.0, best_domain * seniority_factor + seniority_factor * 0.15)
|
||||||
|
|
||||||
|
|
||||||
|
def recency_decay(date_found: str) -> float:
|
||||||
|
"""Exponential decay starting from date_found.
|
||||||
|
|
||||||
|
Returns 1.0 for today, 0.5 after RECENCY_HALF_LIFE days, ~0.0 after ~4×.
|
||||||
|
Returns 0.5 (neutral) if the date is unparseable.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Support both "YYYY-MM-DD" and "YYYY-MM-DD HH:MM:SS"
|
||||||
|
found = datetime.fromisoformat(date_found.split("T")[0].split(" ")[0])
|
||||||
|
found = found.replace(tzinfo=timezone.utc)
|
||||||
|
now = datetime.now(tz=timezone.utc)
|
||||||
|
days_old = max(0.0, (now - found).total_seconds() / 86400)
|
||||||
|
return math.exp(-math.log(2) * days_old / RECENCY_HALF_LIFE)
|
||||||
|
except Exception:
|
||||||
|
return 0.5
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_salary_range(text: str | None) -> tuple[int | None, int | None]:
|
||||||
|
"""Extract (low, high) salary integers from free-text. Returns (None, None) on failure.
|
||||||
|
|
||||||
|
Handles: "$80k - $120k", "USD 80,000 - 120,000 per year", "£45,000",
|
||||||
|
"80000", "80K/yr", "80-120k", etc.
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return None, None
|
||||||
|
normalized = re.sub(r"[$,£€₹¥\s]", "", text.lower())
|
||||||
|
# Match numbers optionally followed by 'k'
|
||||||
|
raw_nums = re.findall(r"(\d+(?:\.\d+)?)k?", normalized)
|
||||||
|
values = []
|
||||||
|
for n, full in zip(raw_nums, re.finditer(r"(\d+(?:\.\d+)?)(k?)", normalized)):
|
||||||
|
val = float(full.group(1))
|
||||||
|
if full.group(2): # ends with 'k'
|
||||||
|
val *= 1000
|
||||||
|
elif val < 1000: # bare numbers < 1000 are likely thousands (e.g., "80" in "80-120k")
|
||||||
|
val *= 1000
|
||||||
|
if val >= 10_000: # sanity: ignore clearly wrong values
|
||||||
|
values.append(int(val))
|
||||||
|
values = sorted(set(values))
|
||||||
|
if not values:
|
||||||
|
return None, None
|
||||||
|
return values[0], values[-1]
|
||||||
|
|
||||||
|
|
||||||
|
def salary_fit(
|
||||||
|
salary_text: str | None,
|
||||||
|
target_min: int | None,
|
||||||
|
target_max: int | None,
|
||||||
|
) -> float:
|
||||||
|
"""Salary range overlap score in [0, 1].
|
||||||
|
|
||||||
|
Returns 0.5 (neutral) when either range is unknown — a missing salary
|
||||||
|
line is not inherently negative.
|
||||||
|
"""
|
||||||
|
if not salary_text or (target_min is None and target_max is None):
|
||||||
|
return 0.5
|
||||||
|
|
||||||
|
job_low, job_high = _parse_salary_range(salary_text)
|
||||||
|
if job_low is None:
|
||||||
|
return 0.5
|
||||||
|
|
||||||
|
t_min = target_min or 0
|
||||||
|
t_max = target_max or (int(target_min * 1.5) if target_min else job_high or job_low)
|
||||||
|
job_high = job_high or job_low
|
||||||
|
|
||||||
|
overlap_low = max(job_low, t_min)
|
||||||
|
overlap_high = min(job_high, t_max)
|
||||||
|
overlap = max(0, overlap_high - overlap_low)
|
||||||
|
target_span = max(1, t_max - t_min)
|
||||||
|
return min(1.0, overlap / target_span)
|
||||||
|
|
||||||
|
|
||||||
|
def description_quality(description: str | None) -> float:
|
||||||
|
"""Posting completeness score in [0, 1].
|
||||||
|
|
||||||
|
Stubs and ghost posts score near 0; well-written descriptions score 1.0.
|
||||||
|
"""
|
||||||
|
if not description:
|
||||||
|
return 0.0
|
||||||
|
words = len(description.split())
|
||||||
|
if words < DESC_MIN_WORDS:
|
||||||
|
return (words / DESC_MIN_WORDS) * 0.4 # steep penalty for stubs
|
||||||
|
if words >= DESC_TARGET_WORDS:
|
||||||
|
return 1.0
|
||||||
|
return 0.4 + 0.6 * (words - DESC_MIN_WORDS) / (DESC_TARGET_WORDS - DESC_MIN_WORDS)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Composite scorer ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def score_job(
|
||||||
|
job: dict,
|
||||||
|
search_titles: list[str],
|
||||||
|
target_salary_min: int | None,
|
||||||
|
target_salary_max: int | None,
|
||||||
|
user_level: int,
|
||||||
|
) -> float:
|
||||||
|
"""Compute composite stack_score (0–100) for a single job dict.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job: Row dict from the jobs table (must have title, match_score,
|
||||||
|
date_found, salary, description, keyword_gaps).
|
||||||
|
search_titles: User's desired job titles (from search prefs).
|
||||||
|
target_salary_*: User's salary target from resume profile (or None).
|
||||||
|
user_level: Inferred seniority level 1–7.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A float 0–100. Higher = better match for this user's session.
|
||||||
|
"""
|
||||||
|
# ── Individual signals (all 0–1) ──────────────────────────────────────────
|
||||||
|
match_raw = job.get("match_score")
|
||||||
|
s_resume = (match_raw / 100.0) if match_raw is not None else 0.5
|
||||||
|
|
||||||
|
s_title = title_match_score(job.get("title", ""), search_titles, user_level)
|
||||||
|
s_recency = recency_decay(job.get("date_found", ""))
|
||||||
|
s_salary = salary_fit(job.get("salary"), target_salary_min, target_salary_max)
|
||||||
|
s_desc = description_quality(job.get("description"))
|
||||||
|
|
||||||
|
# ── Weighted sum ──────────────────────────────────────────────────────────
|
||||||
|
base = (
|
||||||
|
W_RESUME_MATCH * s_resume
|
||||||
|
+ W_TITLE_MATCH * s_title
|
||||||
|
+ W_RECENCY * s_recency
|
||||||
|
+ W_SALARY_FIT * s_salary
|
||||||
|
+ W_DESC_QUALITY * s_desc
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Keyword gap penalty (applied on the 0–100 scale) ─────────────────────
|
||||||
|
gaps_raw = job.get("keyword_gaps") or ""
|
||||||
|
gap_count = len([g for g in gaps_raw.split(",") if g.strip()]) if gaps_raw else 0
|
||||||
|
gap_penalty = min(GAP_MAX_PENALTY, gap_count * GAP_PENALTY_PER_KEYWORD) / 100.0
|
||||||
|
|
||||||
|
return round(max(0.0, base - gap_penalty) * 100, 1)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Public API ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def rank_jobs(
|
||||||
|
jobs: list[dict],
|
||||||
|
search_titles: list[str],
|
||||||
|
target_salary_min: int | None = None,
|
||||||
|
target_salary_max: int | None = None,
|
||||||
|
user_level: int = 3,
|
||||||
|
limit: int = 10,
|
||||||
|
min_score: float = 20.0,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Score and rank pending jobs; return top-N above min_score.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
jobs: List of job dicts (from DB or any source).
|
||||||
|
search_titles: User's desired job titles from search prefs.
|
||||||
|
target_salary_*: User's salary target (from resume profile).
|
||||||
|
user_level: Seniority level 1–7 (use seniority_from_experience()).
|
||||||
|
limit: Stack size; pass 0 to return all qualifying jobs.
|
||||||
|
min_score: Minimum stack_score to include (0–100).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Sorted list (best first) with 'stack_score' key added to each dict.
|
||||||
|
"""
|
||||||
|
scored = []
|
||||||
|
for job in jobs:
|
||||||
|
s = score_job(job, search_titles, target_salary_min, target_salary_max, user_level)
|
||||||
|
if s >= min_score:
|
||||||
|
scored.append({**job, "stack_score": s})
|
||||||
|
|
||||||
|
scored.sort(key=lambda j: j["stack_score"], reverse=True)
|
||||||
|
return scored[:limit] if limit > 0 else scored
|
||||||
Loading…
Reference in a new issue