diff --git a/scripts/manage-ui.sh b/scripts/manage-ui.sh index 55cadd9..b676a9f 100755 --- a/scripts/manage-ui.sh +++ b/scripts/manage-ui.sh @@ -5,7 +5,18 @@ set -euo pipefail REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -STREAMLIT_BIN="/devl/miniconda3/envs/job-seeker/bin/streamlit" +STREAMLIT_BIN="${STREAMLIT_BIN:-streamlit}" +# Resolve: conda env bin, system PATH, or explicit override +if [[ "$STREAMLIT_BIN" == "streamlit" ]]; then + for _candidate in \ + "$(conda run -n job-seeker which streamlit 2>/dev/null)" \ + "$(which streamlit 2>/dev/null)"; do + if [[ -n "$_candidate" && -x "$_candidate" ]]; then + STREAMLIT_BIN="$_candidate" + break + fi + done +fi APP_ENTRY="$REPO_DIR/app/app.py" PID_FILE="$REPO_DIR/.streamlit.pid" LOG_FILE="$REPO_DIR/.streamlit.log" diff --git a/scripts/migrate.py b/scripts/migrate.py new file mode 100644 index 0000000..d370fb6 --- /dev/null +++ b/scripts/migrate.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python3 +""" +Peregrine migration tool — import config and data from a legacy job-seeker repo. + +Usage: + python scripts/migrate.py # dry run (show what would change) + python scripts/migrate.py --apply # write files + python scripts/migrate.py --apply --copy-db # also copy staging.db + python scripts/migrate.py --source /path/to/repo # non-default source + +What it migrates: + - config/user.yaml (generated from source resume + scripts) + - config/notion.yaml (copied — contains live token) + - config/email.yaml (copied — contains IMAP credentials) + - config/adzuna.yaml (copied — API credentials) + - config/craigslist.yaml (copied — metro/location map) + - config/search_profiles.yaml (copied — user's job search targets) + - config/resume_keywords.yaml (copied) + - config/blocklist.yaml (copied) + - config/llm.yaml (merges fine-tuned model name from source) + - aihawk/data_folder/plain_text_resume.yaml (copied if aihawk present) + - staging.db (optional — copies current DB state) +""" +import argparse +import shutil +import sys +from pathlib import Path +from textwrap import dedent + +import yaml + +ROOT = Path(__file__).parent.parent + + +def _load_yaml(path: Path) -> dict: + if path.exists(): + return yaml.safe_load(path.read_text()) or {} + return {} + + +def _write_yaml(path: Path, data: dict, apply: bool) -> None: + text = yaml.dump(data, default_flow_style=False, allow_unicode=True, sort_keys=False) + if apply: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text) + print(f" ✓ wrote {path.relative_to(ROOT)}") + else: + print(f" (dry) would write {path.relative_to(ROOT)}") + + +def _copy_file(src: Path, dest: Path, apply: bool) -> bool: + if not src.exists(): + print(f" ✗ skip {dest.name} — not found at {src}") + return False + if apply: + dest.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dest) + print(f" ✓ copied {dest.relative_to(ROOT)}") + else: + print(f" (dry) would copy {src} → {dest.relative_to(ROOT)}") + return True + + +def _extract_career_summary(source: Path) -> str: + """Pull career summary from source generate_cover_letter.py SYSTEM_CONTEXT.""" + gcl = source / "scripts" / "generate_cover_letter.py" + if not gcl.exists(): + return "" + text = gcl.read_text() + start = text.find('SYSTEM_CONTEXT = """') + if start == -1: + start = text.find("SYSTEM_CONTEXT = '''") + if start == -1: + return "" + start = text.find('"""', start) + 3 + end = text.find('"""', start) + if end == -1: + return "" + block = text[start:end].strip() + # Extract just the Background lines (skip the role description preamble) + lines = [l.strip("- ").strip() for l in block.splitlines() if l.strip().startswith("-")] + return " ".join(lines[:4]) if lines else block[:300] + + +def _extract_personal_info(source: Path) -> dict: + """Extract personal info from aihawk resume yaml.""" + resume = source / "aihawk" / "data_folder" / "plain_text_resume.yaml" + if not resume.exists(): + resume = source / "config" / "plain_text_resume.yaml" + if not resume.exists(): + return {} + data = _load_yaml(resume) + info = data.get("personal_information", {}) + return { + "name": f"{info.get('name', '')} {info.get('surname', '')}".strip(), + "email": info.get("email", ""), + "phone": str(info.get("phone", "")), + "linkedin": info.get("linkedin", ""), + } + + +def _extract_docs_dir(source: Path) -> str: + """Try to find docs directory from source scripts.""" + gcl = source / "scripts" / "generate_cover_letter.py" + if gcl.exists(): + for line in gcl.read_text().splitlines(): + if "LETTERS_DIR" in line and "Path(" in line: + # e.g. LETTERS_DIR = Path("/Library/Documents/JobSearch") + start = line.find('"') + end = line.rfind('"') + if start != end: + return line[start + 1:end] + return "~/Documents/JobSearch" + + +def _build_user_yaml(source: Path, dest: Path, apply: bool) -> None: + print("\n── Generating config/user.yaml") + info = _extract_personal_info(source) + career_summary = _extract_career_summary(source) + docs_dir = _extract_docs_dir(source) + + # Mission preferences — extracted from source _MISSION_NOTES + gcl_text = (source / "scripts" / "generate_cover_letter.py").read_text() \ + if (source / "scripts" / "generate_cover_letter.py").exists() else "" + mission_prefs: dict = {} + # The original _MISSION_NOTES encoded personal alignment notes inline; + # we set sensible short personal notes for each industry. + if "music" in gcl_text and "personal passion" in gcl_text: + mission_prefs["music"] = ( + "I have a real personal passion for the music scene and would love " + "to apply my CS skills in this space." + ) + if "animal_welfare" in gcl_text or "animal" in gcl_text: + mission_prefs["animal_welfare"] = ( + "Animal welfare is a dream domain for me — a genuine personal passion " + "that deeply aligns with my values." + ) + if "education" in gcl_text and "EdTech" in gcl_text: + mission_prefs["education"] = ( + "Children's education and EdTech reflect genuine personal values around " + "learning and young people that I'd love to connect to my CS work." + ) + + data = { + "name": info.get("name", ""), + "email": info.get("email", ""), + "phone": info.get("phone", ""), + "linkedin": info.get("linkedin", ""), + "career_summary": career_summary, + "nda_companies": [], + "mission_preferences": mission_prefs, + "candidate_accessibility_focus": False, + "candidate_lgbtq_focus": False, + "docs_dir": docs_dir, + "ollama_models_dir": "~/models/ollama", + "vllm_models_dir": "~/models/vllm", + "inference_profile": "dual-gpu", + "services": { + "streamlit_port": 8501, + "ollama_host": "localhost", + "ollama_port": 11434, + "ollama_ssl": False, + "ollama_ssl_verify": True, + "vllm_host": "localhost", + "vllm_port": 8000, + "vllm_ssl": False, + "vllm_ssl_verify": True, + "searxng_host": "localhost", + "searxng_port": 8888, + "searxng_ssl": False, + "searxng_ssl_verify": True, + }, + } + _write_yaml(dest / "config" / "user.yaml", data, apply) + + if not apply: + print(f" name: {data['name'] or '(not found)'}") + print(f" email: {data['email'] or '(not found)'}") + print(f" docs: {data['docs_dir']}") + print(f" profile: {data['inference_profile']}") + + +def _copy_configs(source: Path, dest: Path, apply: bool) -> None: + print("\n── Copying config files") + files = [ + "config/notion.yaml", + "config/email.yaml", + "config/adzuna.yaml", + "config/craigslist.yaml", + "config/search_profiles.yaml", + "config/resume_keywords.yaml", + "config/blocklist.yaml", + ] + for rel in files: + _copy_file(source / rel, dest / rel, apply) + + +def _copy_aihawk_resume(source: Path, dest: Path, apply: bool) -> None: + print("\n── Copying AIHawk resume profile") + src = source / "aihawk" / "data_folder" / "plain_text_resume.yaml" + dst = dest / "aihawk" / "data_folder" / "plain_text_resume.yaml" + _copy_file(src, dst, apply) + + +def _merge_llm_yaml(source: Path, dest: Path, apply: bool) -> None: + """Copy the fine-tuned model name from source llm.yaml into dest llm.yaml.""" + print("\n── Merging llm.yaml (fine-tuned model name)") + src_cfg = _load_yaml(source / "config" / "llm.yaml") + dst_cfg = _load_yaml(dest / "config" / "llm.yaml") + + src_model = src_cfg.get("backends", {}).get("ollama", {}).get("model", "") + if src_model and src_model != "llama3.2:3b": + dst_cfg.setdefault("backends", {}).setdefault("ollama", {})["model"] = src_model + print(f" model: {src_model}") + _write_yaml(dest / "config" / "llm.yaml", dst_cfg, apply) + else: + print(f" no custom model in source — keeping {dst_cfg.get('backends', {}).get('ollama', {}).get('model', 'default')}") + + +def _copy_db(source: Path, dest: Path, apply: bool) -> None: + print("\n── Copying staging database") + _copy_file(source / "staging.db", dest / "staging.db", apply) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Migrate config from legacy job-seeker repo to Peregrine") + parser.add_argument("--source", default="/devl/job-seeker", + help="Path to legacy job-seeker repo (default: /devl/job-seeker)") + parser.add_argument("--dest", default=str(ROOT), + help="Path to Peregrine repo (default: this repo)") + parser.add_argument("--apply", action="store_true", + help="Actually write files (default is dry run)") + parser.add_argument("--copy-db", action="store_true", + help="Also copy staging.db") + args = parser.parse_args() + + source = Path(args.source).expanduser().resolve() + dest = Path(args.dest).expanduser().resolve() + + if not source.exists(): + print(f"Source repo not found: {source}", file=sys.stderr) + sys.exit(1) + + mode = "APPLY" if args.apply else "DRY RUN" + print(f"Peregrine migration [{mode}]") + print(f" source: {source}") + print(f" dest: {dest}") + + _build_user_yaml(source, dest, args.apply) + _copy_configs(source, dest, args.apply) + _copy_aihawk_resume(source, dest, args.apply) + _merge_llm_yaml(source, dest, args.apply) + + if args.copy_db: + _copy_db(source, dest, args.apply) + + print() + if args.apply: + print("Migration complete.") + print("Next: bash scripts/manage-ui.sh start") + else: + print("Dry run complete. Re-run with --apply to write files.") + if args.copy_db or True: + print("Add --copy-db to also migrate staging.db.") + + +if __name__ == "__main__": + main()