fix(discovery): per-user config dir in cloud mode; normalize job_titles key

- discover.py: run_discovery() accepts config_dir param; auto-derives it from db_path parent (per-user in cloud, falls back to /app/config) - task_runner.py: passes db_path.parent/config as config_dir to run_discovery - wizard (0_Setup.py): write 'titles' key not 'job_titles' — matches what discover.py and all custom board scrapers read - adzuna/theladders/craigslist: fall back to 'job_titles' for existing profiles written by older wizard versions - Fixed Sheridan's live config in place (job_titles → titles)
2026-04-01 19:37:29 -07:00 · 2026-04-01 19:37:29 -07:00 · 5b296b3e01
commit 5b296b3e01
parent 4700a2f6d6
6 changed files with 26 additions and 16 deletions
--- a/app/pages/0_Setup.py
+++ b/app/pages/0_Setup.py
@ -631,7 +631,7 @@ elif step == 6:
            )
            default_profile = {
                "name": "default",
-                "job_titles": titles,
+                "titles": titles,
                "locations": locations,
                "remote_only": False,
                "boards": ["linkedin", "indeed", "glassdoor", "zip_recruiter"],
--- a/scripts/custom_boards/adzuna.py
+++ b/scripts/custom_boards/adzuna.py
@ -70,7 +70,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
        print(f"    [adzuna] Skipped — {exc}")
        return []

-    titles = profile.get("titles", [])
+    titles = profile.get("titles") or profile.get("job_titles", [])
    hours_old = profile.get("hours_old", 240)
    max_days_old = max(1, hours_old // 24)
    is_remote_search = location.lower() == "remote"
--- a/scripts/custom_boards/craigslist.py
+++ b/scripts/custom_boards/craigslist.py
@ -121,7 +121,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
            return []
        metros = [metro]

-    titles: list[str] = profile.get("titles", [])
+    titles: list[str] = profile.get("titles") or profile.get("job_titles", [])
    hours_old: int = profile.get("hours_old", 240)
    cutoff = datetime.now(tz=timezone.utc).timestamp() - (hours_old * 3600)

--- a/scripts/custom_boards/theladders.py
+++ b/scripts/custom_boards/theladders.py
@ -107,7 +107,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
        )
        page = ctx.new_page()

-        for title in profile.get("titles", []):
+        for title in (profile.get("titles") or profile.get("job_titles", [])):
            if len(results) >= results_wanted:
                break

--- a/scripts/discover.py
+++ b/scripts/discover.py
@ -34,17 +34,21 @@ CUSTOM_SCRAPERS: dict[str, object] = {
 }


-def load_config() -> tuple[dict, dict]:
-    profiles = yaml.safe_load(PROFILES_CFG.read_text())
-    notion_cfg = yaml.safe_load(NOTION_CFG.read_text())
+def load_config(config_dir: Path | None = None) -> tuple[dict, dict]:
+    cfg = config_dir or CONFIG_DIR
+    profiles_path = cfg / "search_profiles.yaml"
+    notion_path = cfg / "notion.yaml"
+    profiles = yaml.safe_load(profiles_path.read_text())
+    notion_cfg = yaml.safe_load(notion_path.read_text()) if notion_path.exists() else {"field_map": {}, "token": None, "database_id": None}
    return profiles, notion_cfg


-def load_blocklist() -> dict:
+def load_blocklist(config_dir: Path | None = None) -> dict:
    """Load global blocklist config. Returns dict with companies, industries, locations lists."""
-    if not BLOCKLIST_CFG.exists():
+    blocklist_path = (config_dir or CONFIG_DIR) / "blocklist.yaml"
+    if not blocklist_path.exists():
        return {"companies": [], "industries": [], "locations": []}
-    raw = yaml.safe_load(BLOCKLIST_CFG.read_text()) or {}
+    raw = yaml.safe_load(blocklist_path.read_text()) or {}
    return {
        "companies":  [c.lower() for c in raw.get("companies", []) if c],
        "industries": [i.lower() for i in raw.get("industries", []) if i],
@ -117,10 +121,15 @@ def push_to_notion(notion: Client, db_id: str, job: dict, fm: dict) -> None:
    )


-def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None:
-    profiles_cfg, notion_cfg = load_config()
-    fm = notion_cfg["field_map"]
-    blocklist = load_blocklist()
+def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_dir: Path | None = None) -> None:
+    # In cloud mode, config_dir is the per-user config directory derived from db_path.
+    # Falls back to the app-level /app/config for single-tenant deployments.
+    resolved_cfg = config_dir or Path(db_path).parent / "config"
+    if not resolved_cfg.exists():
+        resolved_cfg = CONFIG_DIR
+    profiles_cfg, notion_cfg = load_config(resolved_cfg)
+    fm = notion_cfg.get("field_map") or {}
+    blocklist = load_blocklist(resolved_cfg)

    _bl_summary = {k: len(v) for k, v in blocklist.items() if v}
    if _bl_summary:
@ -211,7 +220,7 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None
                try:
                    jobspy_kwargs: dict = dict(
                        site_name=boards,
-                        search_term=" OR ".join(f'"{t}"' for t in profile["titles"]),
+                        search_term=" OR ".join(f'"{t}"' for t in (profile.get("titles") or profile.get("job_titles", []))),
                        location=location,
                        results_wanted=results_per_board,
                        hours_old=profile.get("hours_old", 72),
--- a/scripts/task_runner.py
+++ b/scripts/task_runner.py
@ -166,7 +166,8 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
                )
                return
            from scripts.discover import run_discovery
-            new_count = run_discovery(db_path)
+            from pathlib import Path as _Path
+            new_count = run_discovery(db_path, config_dir=_Path(db_path).parent / "config")
            n = new_count or 0
            update_task_status(
                db_path, task_id, "completed",