fix(discovery): per-user config dir in cloud mode; normalize job_titles key
Some checks failed
CI / test (push) Failing after 22s

- discover.py: run_discovery() accepts config_dir param; auto-derives it
  from db_path parent (per-user in cloud, falls back to /app/config)
- task_runner.py: passes db_path.parent/config as config_dir to run_discovery
- wizard (0_Setup.py): write 'titles' key not 'job_titles' — matches what
  discover.py and all custom board scrapers read
- adzuna/theladders/craigslist: fall back to 'job_titles' for existing
  profiles written by older wizard versions
- Fixed Sheridan's live config in place (job_titles → titles)
This commit is contained in:
pyr0ball 2026-04-01 19:37:29 -07:00
parent 4700a2f6d6
commit 5b296b3e01
6 changed files with 26 additions and 16 deletions

View file

@ -631,7 +631,7 @@ elif step == 6:
)
default_profile = {
"name": "default",
"job_titles": titles,
"titles": titles,
"locations": locations,
"remote_only": False,
"boards": ["linkedin", "indeed", "glassdoor", "zip_recruiter"],

View file

@ -70,7 +70,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
print(f" [adzuna] Skipped — {exc}")
return []
titles = profile.get("titles", [])
titles = profile.get("titles") or profile.get("job_titles", [])
hours_old = profile.get("hours_old", 240)
max_days_old = max(1, hours_old // 24)
is_remote_search = location.lower() == "remote"

View file

@ -121,7 +121,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
return []
metros = [metro]
titles: list[str] = profile.get("titles", [])
titles: list[str] = profile.get("titles") or profile.get("job_titles", [])
hours_old: int = profile.get("hours_old", 240)
cutoff = datetime.now(tz=timezone.utc).timestamp() - (hours_old * 3600)

View file

@ -107,7 +107,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
)
page = ctx.new_page()
for title in profile.get("titles", []):
for title in (profile.get("titles") or profile.get("job_titles", [])):
if len(results) >= results_wanted:
break

View file

@ -34,17 +34,21 @@ CUSTOM_SCRAPERS: dict[str, object] = {
}
def load_config() -> tuple[dict, dict]:
profiles = yaml.safe_load(PROFILES_CFG.read_text())
notion_cfg = yaml.safe_load(NOTION_CFG.read_text())
def load_config(config_dir: Path | None = None) -> tuple[dict, dict]:
cfg = config_dir or CONFIG_DIR
profiles_path = cfg / "search_profiles.yaml"
notion_path = cfg / "notion.yaml"
profiles = yaml.safe_load(profiles_path.read_text())
notion_cfg = yaml.safe_load(notion_path.read_text()) if notion_path.exists() else {"field_map": {}, "token": None, "database_id": None}
return profiles, notion_cfg
def load_blocklist() -> dict:
def load_blocklist(config_dir: Path | None = None) -> dict:
"""Load global blocklist config. Returns dict with companies, industries, locations lists."""
if not BLOCKLIST_CFG.exists():
blocklist_path = (config_dir or CONFIG_DIR) / "blocklist.yaml"
if not blocklist_path.exists():
return {"companies": [], "industries": [], "locations": []}
raw = yaml.safe_load(BLOCKLIST_CFG.read_text()) or {}
raw = yaml.safe_load(blocklist_path.read_text()) or {}
return {
"companies": [c.lower() for c in raw.get("companies", []) if c],
"industries": [i.lower() for i in raw.get("industries", []) if i],
@ -117,10 +121,15 @@ def push_to_notion(notion: Client, db_id: str, job: dict, fm: dict) -> None:
)
def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None:
profiles_cfg, notion_cfg = load_config()
fm = notion_cfg["field_map"]
blocklist = load_blocklist()
def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_dir: Path | None = None) -> None:
# In cloud mode, config_dir is the per-user config directory derived from db_path.
# Falls back to the app-level /app/config for single-tenant deployments.
resolved_cfg = config_dir or Path(db_path).parent / "config"
if not resolved_cfg.exists():
resolved_cfg = CONFIG_DIR
profiles_cfg, notion_cfg = load_config(resolved_cfg)
fm = notion_cfg.get("field_map") or {}
blocklist = load_blocklist(resolved_cfg)
_bl_summary = {k: len(v) for k, v in blocklist.items() if v}
if _bl_summary:
@ -211,7 +220,7 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None
try:
jobspy_kwargs: dict = dict(
site_name=boards,
search_term=" OR ".join(f'"{t}"' for t in profile["titles"]),
search_term=" OR ".join(f'"{t}"' for t in (profile.get("titles") or profile.get("job_titles", []))),
location=location,
results_wanted=results_per_board,
hours_old=profile.get("hours_old", 72),

View file

@ -166,7 +166,8 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
)
return
from scripts.discover import run_discovery
new_count = run_discovery(db_path)
from pathlib import Path as _Path
new_count = run_discovery(db_path, config_dir=_Path(db_path).parent / "config")
n = new_count or 0
update_task_status(
db_path, task_id, "completed",