From 5b296b3e010ec7b85bfd8e16cf36b3dc29a1311c Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Wed, 1 Apr 2026 19:37:29 -0700 Subject: [PATCH] fix(discovery): per-user config dir in cloud mode; normalize job_titles key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - discover.py: run_discovery() accepts config_dir param; auto-derives it from db_path parent (per-user in cloud, falls back to /app/config) - task_runner.py: passes db_path.parent/config as config_dir to run_discovery - wizard (0_Setup.py): write 'titles' key not 'job_titles' — matches what discover.py and all custom board scrapers read - adzuna/theladders/craigslist: fall back to 'job_titles' for existing profiles written by older wizard versions - Fixed Sheridan's live config in place (job_titles → titles) --- app/pages/0_Setup.py | 2 +- scripts/custom_boards/adzuna.py | 2 +- scripts/custom_boards/craigslist.py | 2 +- scripts/custom_boards/theladders.py | 2 +- scripts/discover.py | 31 +++++++++++++++++++---------- scripts/task_runner.py | 3 ++- 6 files changed, 26 insertions(+), 16 deletions(-) diff --git a/app/pages/0_Setup.py b/app/pages/0_Setup.py index 3aed1af..fa28123 100644 --- a/app/pages/0_Setup.py +++ b/app/pages/0_Setup.py @@ -631,7 +631,7 @@ elif step == 6: ) default_profile = { "name": "default", - "job_titles": titles, + "titles": titles, "locations": locations, "remote_only": False, "boards": ["linkedin", "indeed", "glassdoor", "zip_recruiter"], diff --git a/scripts/custom_boards/adzuna.py b/scripts/custom_boards/adzuna.py index fa57bdc..2188d12 100644 --- a/scripts/custom_boards/adzuna.py +++ b/scripts/custom_boards/adzuna.py @@ -70,7 +70,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict] print(f" [adzuna] Skipped — {exc}") return [] - titles = profile.get("titles", []) + titles = profile.get("titles") or profile.get("job_titles", []) hours_old = profile.get("hours_old", 240) max_days_old = max(1, hours_old // 24) is_remote_search = location.lower() == "remote" diff --git a/scripts/custom_boards/craigslist.py b/scripts/custom_boards/craigslist.py index 30226ae..92696d2 100644 --- a/scripts/custom_boards/craigslist.py +++ b/scripts/custom_boards/craigslist.py @@ -121,7 +121,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict] return [] metros = [metro] - titles: list[str] = profile.get("titles", []) + titles: list[str] = profile.get("titles") or profile.get("job_titles", []) hours_old: int = profile.get("hours_old", 240) cutoff = datetime.now(tz=timezone.utc).timestamp() - (hours_old * 3600) diff --git a/scripts/custom_boards/theladders.py b/scripts/custom_boards/theladders.py index d7330af..47fb462 100644 --- a/scripts/custom_boards/theladders.py +++ b/scripts/custom_boards/theladders.py @@ -107,7 +107,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict] ) page = ctx.new_page() - for title in profile.get("titles", []): + for title in (profile.get("titles") or profile.get("job_titles", [])): if len(results) >= results_wanted: break diff --git a/scripts/discover.py b/scripts/discover.py index 77f8f9d..bc0e3f0 100644 --- a/scripts/discover.py +++ b/scripts/discover.py @@ -34,17 +34,21 @@ CUSTOM_SCRAPERS: dict[str, object] = { } -def load_config() -> tuple[dict, dict]: - profiles = yaml.safe_load(PROFILES_CFG.read_text()) - notion_cfg = yaml.safe_load(NOTION_CFG.read_text()) +def load_config(config_dir: Path | None = None) -> tuple[dict, dict]: + cfg = config_dir or CONFIG_DIR + profiles_path = cfg / "search_profiles.yaml" + notion_path = cfg / "notion.yaml" + profiles = yaml.safe_load(profiles_path.read_text()) + notion_cfg = yaml.safe_load(notion_path.read_text()) if notion_path.exists() else {"field_map": {}, "token": None, "database_id": None} return profiles, notion_cfg -def load_blocklist() -> dict: +def load_blocklist(config_dir: Path | None = None) -> dict: """Load global blocklist config. Returns dict with companies, industries, locations lists.""" - if not BLOCKLIST_CFG.exists(): + blocklist_path = (config_dir or CONFIG_DIR) / "blocklist.yaml" + if not blocklist_path.exists(): return {"companies": [], "industries": [], "locations": []} - raw = yaml.safe_load(BLOCKLIST_CFG.read_text()) or {} + raw = yaml.safe_load(blocklist_path.read_text()) or {} return { "companies": [c.lower() for c in raw.get("companies", []) if c], "industries": [i.lower() for i in raw.get("industries", []) if i], @@ -117,10 +121,15 @@ def push_to_notion(notion: Client, db_id: str, job: dict, fm: dict) -> None: ) -def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None: - profiles_cfg, notion_cfg = load_config() - fm = notion_cfg["field_map"] - blocklist = load_blocklist() +def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_dir: Path | None = None) -> None: + # In cloud mode, config_dir is the per-user config directory derived from db_path. + # Falls back to the app-level /app/config for single-tenant deployments. + resolved_cfg = config_dir or Path(db_path).parent / "config" + if not resolved_cfg.exists(): + resolved_cfg = CONFIG_DIR + profiles_cfg, notion_cfg = load_config(resolved_cfg) + fm = notion_cfg.get("field_map") or {} + blocklist = load_blocklist(resolved_cfg) _bl_summary = {k: len(v) for k, v in blocklist.items() if v} if _bl_summary: @@ -211,7 +220,7 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None try: jobspy_kwargs: dict = dict( site_name=boards, - search_term=" OR ".join(f'"{t}"' for t in profile["titles"]), + search_term=" OR ".join(f'"{t}"' for t in (profile.get("titles") or profile.get("job_titles", []))), location=location, results_wanted=results_per_board, hours_old=profile.get("hours_old", 72), diff --git a/scripts/task_runner.py b/scripts/task_runner.py index 6fa4dc5..ea2a652 100644 --- a/scripts/task_runner.py +++ b/scripts/task_runner.py @@ -166,7 +166,8 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int, ) return from scripts.discover import run_discovery - new_count = run_discovery(db_path) + from pathlib import Path as _Path + new_count = run_discovery(db_path, config_dir=_Path(db_path).parent / "config") n = new_count or 0 update_task_status( db_path, task_id, "completed",