fix(discovery): per-user config dir in cloud mode; normalize job_titles key
Some checks failed
CI / test (push) Failing after 22s
Some checks failed
CI / test (push) Failing after 22s
- discover.py: run_discovery() accepts config_dir param; auto-derives it from db_path parent (per-user in cloud, falls back to /app/config) - task_runner.py: passes db_path.parent/config as config_dir to run_discovery - wizard (0_Setup.py): write 'titles' key not 'job_titles' — matches what discover.py and all custom board scrapers read - adzuna/theladders/craigslist: fall back to 'job_titles' for existing profiles written by older wizard versions - Fixed Sheridan's live config in place (job_titles → titles)
This commit is contained in:
parent
4700a2f6d6
commit
5b296b3e01
6 changed files with 26 additions and 16 deletions
|
|
@ -631,7 +631,7 @@ elif step == 6:
|
|||
)
|
||||
default_profile = {
|
||||
"name": "default",
|
||||
"job_titles": titles,
|
||||
"titles": titles,
|
||||
"locations": locations,
|
||||
"remote_only": False,
|
||||
"boards": ["linkedin", "indeed", "glassdoor", "zip_recruiter"],
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
|
|||
print(f" [adzuna] Skipped — {exc}")
|
||||
return []
|
||||
|
||||
titles = profile.get("titles", [])
|
||||
titles = profile.get("titles") or profile.get("job_titles", [])
|
||||
hours_old = profile.get("hours_old", 240)
|
||||
max_days_old = max(1, hours_old // 24)
|
||||
is_remote_search = location.lower() == "remote"
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
|
|||
return []
|
||||
metros = [metro]
|
||||
|
||||
titles: list[str] = profile.get("titles", [])
|
||||
titles: list[str] = profile.get("titles") or profile.get("job_titles", [])
|
||||
hours_old: int = profile.get("hours_old", 240)
|
||||
cutoff = datetime.now(tz=timezone.utc).timestamp() - (hours_old * 3600)
|
||||
|
||||
|
|
|
|||
|
|
@ -107,7 +107,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
|
|||
)
|
||||
page = ctx.new_page()
|
||||
|
||||
for title in profile.get("titles", []):
|
||||
for title in (profile.get("titles") or profile.get("job_titles", [])):
|
||||
if len(results) >= results_wanted:
|
||||
break
|
||||
|
||||
|
|
|
|||
|
|
@ -34,17 +34,21 @@ CUSTOM_SCRAPERS: dict[str, object] = {
|
|||
}
|
||||
|
||||
|
||||
def load_config() -> tuple[dict, dict]:
|
||||
profiles = yaml.safe_load(PROFILES_CFG.read_text())
|
||||
notion_cfg = yaml.safe_load(NOTION_CFG.read_text())
|
||||
def load_config(config_dir: Path | None = None) -> tuple[dict, dict]:
|
||||
cfg = config_dir or CONFIG_DIR
|
||||
profiles_path = cfg / "search_profiles.yaml"
|
||||
notion_path = cfg / "notion.yaml"
|
||||
profiles = yaml.safe_load(profiles_path.read_text())
|
||||
notion_cfg = yaml.safe_load(notion_path.read_text()) if notion_path.exists() else {"field_map": {}, "token": None, "database_id": None}
|
||||
return profiles, notion_cfg
|
||||
|
||||
|
||||
def load_blocklist() -> dict:
|
||||
def load_blocklist(config_dir: Path | None = None) -> dict:
|
||||
"""Load global blocklist config. Returns dict with companies, industries, locations lists."""
|
||||
if not BLOCKLIST_CFG.exists():
|
||||
blocklist_path = (config_dir or CONFIG_DIR) / "blocklist.yaml"
|
||||
if not blocklist_path.exists():
|
||||
return {"companies": [], "industries": [], "locations": []}
|
||||
raw = yaml.safe_load(BLOCKLIST_CFG.read_text()) or {}
|
||||
raw = yaml.safe_load(blocklist_path.read_text()) or {}
|
||||
return {
|
||||
"companies": [c.lower() for c in raw.get("companies", []) if c],
|
||||
"industries": [i.lower() for i in raw.get("industries", []) if i],
|
||||
|
|
@ -117,10 +121,15 @@ def push_to_notion(notion: Client, db_id: str, job: dict, fm: dict) -> None:
|
|||
)
|
||||
|
||||
|
||||
def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None:
|
||||
profiles_cfg, notion_cfg = load_config()
|
||||
fm = notion_cfg["field_map"]
|
||||
blocklist = load_blocklist()
|
||||
def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_dir: Path | None = None) -> None:
|
||||
# In cloud mode, config_dir is the per-user config directory derived from db_path.
|
||||
# Falls back to the app-level /app/config for single-tenant deployments.
|
||||
resolved_cfg = config_dir or Path(db_path).parent / "config"
|
||||
if not resolved_cfg.exists():
|
||||
resolved_cfg = CONFIG_DIR
|
||||
profiles_cfg, notion_cfg = load_config(resolved_cfg)
|
||||
fm = notion_cfg.get("field_map") or {}
|
||||
blocklist = load_blocklist(resolved_cfg)
|
||||
|
||||
_bl_summary = {k: len(v) for k, v in blocklist.items() if v}
|
||||
if _bl_summary:
|
||||
|
|
@ -211,7 +220,7 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None
|
|||
try:
|
||||
jobspy_kwargs: dict = dict(
|
||||
site_name=boards,
|
||||
search_term=" OR ".join(f'"{t}"' for t in profile["titles"]),
|
||||
search_term=" OR ".join(f'"{t}"' for t in (profile.get("titles") or profile.get("job_titles", []))),
|
||||
location=location,
|
||||
results_wanted=results_per_board,
|
||||
hours_old=profile.get("hours_old", 72),
|
||||
|
|
|
|||
|
|
@ -166,7 +166,8 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
|
|||
)
|
||||
return
|
||||
from scripts.discover import run_discovery
|
||||
new_count = run_discovery(db_path)
|
||||
from pathlib import Path as _Path
|
||||
new_count = run_discovery(db_path, config_dir=_Path(db_path).parent / "config")
|
||||
n = new_count or 0
|
||||
update_task_status(
|
||||
db_path, task_id, "completed",
|
||||
|
|
|
|||
Loading…
Reference in a new issue