From 5b296b3e010ec7b85bfd8e16cf36b3dc29a1311c Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Wed, 1 Apr 2026 19:37:29 -0700
Subject: [PATCH] fix(discovery): per-user config dir in cloud mode; normalize
 job_titles key
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- discover.py: run_discovery() accepts config_dir param; auto-derives it
  from db_path parent (per-user in cloud, falls back to /app/config)
- task_runner.py: passes db_path.parent/config as config_dir to run_discovery
- wizard (0_Setup.py): write 'titles' key not 'job_titles' — matches what
  discover.py and all custom board scrapers read
- adzuna/theladders/craigslist: fall back to 'job_titles' for existing
  profiles written by older wizard versions
- Fixed Sheridan's live config in place (job_titles → titles)
---
 app/pages/0_Setup.py                |  2 +-
 scripts/custom_boards/adzuna.py     |  2 +-
 scripts/custom_boards/craigslist.py |  2 +-
 scripts/custom_boards/theladders.py |  2 +-
 scripts/discover.py                 | 31 +++++++++++++++++++----------
 scripts/task_runner.py              |  3 ++-
 6 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/app/pages/0_Setup.py b/app/pages/0_Setup.py
index 3aed1af..fa28123 100644
--- a/app/pages/0_Setup.py
+++ b/app/pages/0_Setup.py
@@ -631,7 +631,7 @@ elif step == 6:
             )
             default_profile = {
                 "name": "default",
-                "job_titles": titles,
+                "titles": titles,
                 "locations": locations,
                 "remote_only": False,
                 "boards": ["linkedin", "indeed", "glassdoor", "zip_recruiter"],
diff --git a/scripts/custom_boards/adzuna.py b/scripts/custom_boards/adzuna.py
index fa57bdc..2188d12 100644
--- a/scripts/custom_boards/adzuna.py
+++ b/scripts/custom_boards/adzuna.py
@@ -70,7 +70,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
         print(f"    [adzuna] Skipped — {exc}")
         return []
 
-    titles = profile.get("titles", [])
+    titles = profile.get("titles") or profile.get("job_titles", [])
     hours_old = profile.get("hours_old", 240)
     max_days_old = max(1, hours_old // 24)
     is_remote_search = location.lower() == "remote"
diff --git a/scripts/custom_boards/craigslist.py b/scripts/custom_boards/craigslist.py
index 30226ae..92696d2 100644
--- a/scripts/custom_boards/craigslist.py
+++ b/scripts/custom_boards/craigslist.py
@@ -121,7 +121,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
             return []
         metros = [metro]
 
-    titles: list[str] = profile.get("titles", [])
+    titles: list[str] = profile.get("titles") or profile.get("job_titles", [])
     hours_old: int = profile.get("hours_old", 240)
     cutoff = datetime.now(tz=timezone.utc).timestamp() - (hours_old * 3600)
 
diff --git a/scripts/custom_boards/theladders.py b/scripts/custom_boards/theladders.py
index d7330af..47fb462 100644
--- a/scripts/custom_boards/theladders.py
+++ b/scripts/custom_boards/theladders.py
@@ -107,7 +107,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
         )
         page = ctx.new_page()
 
-        for title in profile.get("titles", []):
+        for title in (profile.get("titles") or profile.get("job_titles", [])):
             if len(results) >= results_wanted:
                 break
 
diff --git a/scripts/discover.py b/scripts/discover.py
index 77f8f9d..bc0e3f0 100644
--- a/scripts/discover.py
+++ b/scripts/discover.py
@@ -34,17 +34,21 @@ CUSTOM_SCRAPERS: dict[str, object] = {
 }
 
 
-def load_config() -> tuple[dict, dict]:
-    profiles = yaml.safe_load(PROFILES_CFG.read_text())
-    notion_cfg = yaml.safe_load(NOTION_CFG.read_text())
+def load_config(config_dir: Path | None = None) -> tuple[dict, dict]:
+    cfg = config_dir or CONFIG_DIR
+    profiles_path = cfg / "search_profiles.yaml"
+    notion_path = cfg / "notion.yaml"
+    profiles = yaml.safe_load(profiles_path.read_text())
+    notion_cfg = yaml.safe_load(notion_path.read_text()) if notion_path.exists() else {"field_map": {}, "token": None, "database_id": None}
     return profiles, notion_cfg
 
 
-def load_blocklist() -> dict:
+def load_blocklist(config_dir: Path | None = None) -> dict:
     """Load global blocklist config. Returns dict with companies, industries, locations lists."""
-    if not BLOCKLIST_CFG.exists():
+    blocklist_path = (config_dir or CONFIG_DIR) / "blocklist.yaml"
+    if not blocklist_path.exists():
         return {"companies": [], "industries": [], "locations": []}
-    raw = yaml.safe_load(BLOCKLIST_CFG.read_text()) or {}
+    raw = yaml.safe_load(blocklist_path.read_text()) or {}
     return {
         "companies":  [c.lower() for c in raw.get("companies", []) if c],
         "industries": [i.lower() for i in raw.get("industries", []) if i],
@@ -117,10 +121,15 @@ def push_to_notion(notion: Client, db_id: str, job: dict, fm: dict) -> None:
     )
 
 
-def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None:
-    profiles_cfg, notion_cfg = load_config()
-    fm = notion_cfg["field_map"]
-    blocklist = load_blocklist()
+def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_dir: Path | None = None) -> None:
+    # In cloud mode, config_dir is the per-user config directory derived from db_path.
+    # Falls back to the app-level /app/config for single-tenant deployments.
+    resolved_cfg = config_dir or Path(db_path).parent / "config"
+    if not resolved_cfg.exists():
+        resolved_cfg = CONFIG_DIR
+    profiles_cfg, notion_cfg = load_config(resolved_cfg)
+    fm = notion_cfg.get("field_map") or {}
+    blocklist = load_blocklist(resolved_cfg)
 
     _bl_summary = {k: len(v) for k, v in blocklist.items() if v}
     if _bl_summary:
@@ -211,7 +220,7 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None
                 try:
                     jobspy_kwargs: dict = dict(
                         site_name=boards,
-                        search_term=" OR ".join(f'"{t}"' for t in profile["titles"]),
+                        search_term=" OR ".join(f'"{t}"' for t in (profile.get("titles") or profile.get("job_titles", []))),
                         location=location,
                         results_wanted=results_per_board,
                         hours_old=profile.get("hours_old", 72),
diff --git a/scripts/task_runner.py b/scripts/task_runner.py
index 6fa4dc5..ea2a652 100644
--- a/scripts/task_runner.py
+++ b/scripts/task_runner.py
@@ -166,7 +166,8 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
                 )
                 return
             from scripts.discover import run_discovery
-            new_count = run_discovery(db_path)
+            from pathlib import Path as _Path
+            new_count = run_discovery(db_path, config_dir=_Path(db_path).parent / "config")
             n = new_count or 0
             update_task_status(
                 db_path, task_id, "completed",