scripts/migrate.py: - dry-run by default; --apply writes files; --copy-db migrates staging.db - generates config/user.yaml from source repo's resume + cover letter scripts - copies gitignored configs (notion, email, adzuna, craigslist, search profiles, resume keywords, blocklist, aihawk resume) - merges fine-tuned model name from source llm.yaml into dest llm.yaml scripts/manage-ui.sh: - STREAMLIT_BIN no longer hardcoded; auto-resolves via conda env or PATH; override with STREAMLIT_BIN env var scripts/manage-vllm.sh: - VLLM_BIN and MODEL_DIR now read from env vars with portable defaults
268 lines
10 KiB
Python
268 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Peregrine migration tool — import config and data from a legacy job-seeker repo.
|
|
|
|
Usage:
|
|
python scripts/migrate.py # dry run (show what would change)
|
|
python scripts/migrate.py --apply # write files
|
|
python scripts/migrate.py --apply --copy-db # also copy staging.db
|
|
python scripts/migrate.py --source /path/to/repo # non-default source
|
|
|
|
What it migrates:
|
|
- config/user.yaml (generated from source resume + scripts)
|
|
- config/notion.yaml (copied — contains live token)
|
|
- config/email.yaml (copied — contains IMAP credentials)
|
|
- config/adzuna.yaml (copied — API credentials)
|
|
- config/craigslist.yaml (copied — metro/location map)
|
|
- config/search_profiles.yaml (copied — user's job search targets)
|
|
- config/resume_keywords.yaml (copied)
|
|
- config/blocklist.yaml (copied)
|
|
- config/llm.yaml (merges fine-tuned model name from source)
|
|
- aihawk/data_folder/plain_text_resume.yaml (copied if aihawk present)
|
|
- staging.db (optional — copies current DB state)
|
|
"""
|
|
import argparse
|
|
import shutil
|
|
import sys
|
|
from pathlib import Path
|
|
from textwrap import dedent
|
|
|
|
import yaml
|
|
|
|
ROOT = Path(__file__).parent.parent
|
|
|
|
|
|
def _load_yaml(path: Path) -> dict:
|
|
if path.exists():
|
|
return yaml.safe_load(path.read_text()) or {}
|
|
return {}
|
|
|
|
|
|
def _write_yaml(path: Path, data: dict, apply: bool) -> None:
|
|
text = yaml.dump(data, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
if apply:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_text(text)
|
|
print(f" ✓ wrote {path.relative_to(ROOT)}")
|
|
else:
|
|
print(f" (dry) would write {path.relative_to(ROOT)}")
|
|
|
|
|
|
def _copy_file(src: Path, dest: Path, apply: bool) -> bool:
|
|
if not src.exists():
|
|
print(f" ✗ skip {dest.name} — not found at {src}")
|
|
return False
|
|
if apply:
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.copy2(src, dest)
|
|
print(f" ✓ copied {dest.relative_to(ROOT)}")
|
|
else:
|
|
print(f" (dry) would copy {src} → {dest.relative_to(ROOT)}")
|
|
return True
|
|
|
|
|
|
def _extract_career_summary(source: Path) -> str:
|
|
"""Pull career summary from source generate_cover_letter.py SYSTEM_CONTEXT."""
|
|
gcl = source / "scripts" / "generate_cover_letter.py"
|
|
if not gcl.exists():
|
|
return ""
|
|
text = gcl.read_text()
|
|
start = text.find('SYSTEM_CONTEXT = """')
|
|
if start == -1:
|
|
start = text.find("SYSTEM_CONTEXT = '''")
|
|
if start == -1:
|
|
return ""
|
|
start = text.find('"""', start) + 3
|
|
end = text.find('"""', start)
|
|
if end == -1:
|
|
return ""
|
|
block = text[start:end].strip()
|
|
# Extract just the Background lines (skip the role description preamble)
|
|
lines = [l.strip("- ").strip() for l in block.splitlines() if l.strip().startswith("-")]
|
|
return " ".join(lines[:4]) if lines else block[:300]
|
|
|
|
|
|
def _extract_personal_info(source: Path) -> dict:
|
|
"""Extract personal info from aihawk resume yaml."""
|
|
resume = source / "aihawk" / "data_folder" / "plain_text_resume.yaml"
|
|
if not resume.exists():
|
|
resume = source / "config" / "plain_text_resume.yaml"
|
|
if not resume.exists():
|
|
return {}
|
|
data = _load_yaml(resume)
|
|
info = data.get("personal_information", {})
|
|
return {
|
|
"name": f"{info.get('name', '')} {info.get('surname', '')}".strip(),
|
|
"email": info.get("email", ""),
|
|
"phone": str(info.get("phone", "")),
|
|
"linkedin": info.get("linkedin", ""),
|
|
}
|
|
|
|
|
|
def _extract_docs_dir(source: Path) -> str:
|
|
"""Try to find docs directory from source scripts."""
|
|
gcl = source / "scripts" / "generate_cover_letter.py"
|
|
if gcl.exists():
|
|
for line in gcl.read_text().splitlines():
|
|
if "LETTERS_DIR" in line and "Path(" in line:
|
|
# e.g. LETTERS_DIR = Path("/Library/Documents/JobSearch")
|
|
start = line.find('"')
|
|
end = line.rfind('"')
|
|
if start != end:
|
|
return line[start + 1:end]
|
|
return "~/Documents/JobSearch"
|
|
|
|
|
|
def _build_user_yaml(source: Path, dest: Path, apply: bool) -> None:
|
|
print("\n── Generating config/user.yaml")
|
|
info = _extract_personal_info(source)
|
|
career_summary = _extract_career_summary(source)
|
|
docs_dir = _extract_docs_dir(source)
|
|
|
|
# Mission preferences — extracted from source _MISSION_NOTES
|
|
gcl_text = (source / "scripts" / "generate_cover_letter.py").read_text() \
|
|
if (source / "scripts" / "generate_cover_letter.py").exists() else ""
|
|
mission_prefs: dict = {}
|
|
# The original _MISSION_NOTES encoded personal alignment notes inline;
|
|
# we set sensible short personal notes for each industry.
|
|
if "music" in gcl_text and "personal passion" in gcl_text:
|
|
mission_prefs["music"] = (
|
|
"I have a real personal passion for the music scene and would love "
|
|
"to apply my CS skills in this space."
|
|
)
|
|
if "animal_welfare" in gcl_text or "animal" in gcl_text:
|
|
mission_prefs["animal_welfare"] = (
|
|
"Animal welfare is a dream domain for me — a genuine personal passion "
|
|
"that deeply aligns with my values."
|
|
)
|
|
if "education" in gcl_text and "EdTech" in gcl_text:
|
|
mission_prefs["education"] = (
|
|
"Children's education and EdTech reflect genuine personal values around "
|
|
"learning and young people that I'd love to connect to my CS work."
|
|
)
|
|
|
|
data = {
|
|
"name": info.get("name", ""),
|
|
"email": info.get("email", ""),
|
|
"phone": info.get("phone", ""),
|
|
"linkedin": info.get("linkedin", ""),
|
|
"career_summary": career_summary,
|
|
"nda_companies": [],
|
|
"mission_preferences": mission_prefs,
|
|
"candidate_accessibility_focus": False,
|
|
"candidate_lgbtq_focus": False,
|
|
"docs_dir": docs_dir,
|
|
"ollama_models_dir": "~/models/ollama",
|
|
"vllm_models_dir": "~/models/vllm",
|
|
"inference_profile": "dual-gpu",
|
|
"services": {
|
|
"streamlit_port": 8501,
|
|
"ollama_host": "localhost",
|
|
"ollama_port": 11434,
|
|
"ollama_ssl": False,
|
|
"ollama_ssl_verify": True,
|
|
"vllm_host": "localhost",
|
|
"vllm_port": 8000,
|
|
"vllm_ssl": False,
|
|
"vllm_ssl_verify": True,
|
|
"searxng_host": "localhost",
|
|
"searxng_port": 8888,
|
|
"searxng_ssl": False,
|
|
"searxng_ssl_verify": True,
|
|
},
|
|
}
|
|
_write_yaml(dest / "config" / "user.yaml", data, apply)
|
|
|
|
if not apply:
|
|
print(f" name: {data['name'] or '(not found)'}")
|
|
print(f" email: {data['email'] or '(not found)'}")
|
|
print(f" docs: {data['docs_dir']}")
|
|
print(f" profile: {data['inference_profile']}")
|
|
|
|
|
|
def _copy_configs(source: Path, dest: Path, apply: bool) -> None:
|
|
print("\n── Copying config files")
|
|
files = [
|
|
"config/notion.yaml",
|
|
"config/email.yaml",
|
|
"config/adzuna.yaml",
|
|
"config/craigslist.yaml",
|
|
"config/search_profiles.yaml",
|
|
"config/resume_keywords.yaml",
|
|
"config/blocklist.yaml",
|
|
]
|
|
for rel in files:
|
|
_copy_file(source / rel, dest / rel, apply)
|
|
|
|
|
|
def _copy_aihawk_resume(source: Path, dest: Path, apply: bool) -> None:
|
|
print("\n── Copying AIHawk resume profile")
|
|
src = source / "aihawk" / "data_folder" / "plain_text_resume.yaml"
|
|
dst = dest / "aihawk" / "data_folder" / "plain_text_resume.yaml"
|
|
_copy_file(src, dst, apply)
|
|
|
|
|
|
def _merge_llm_yaml(source: Path, dest: Path, apply: bool) -> None:
|
|
"""Copy the fine-tuned model name from source llm.yaml into dest llm.yaml."""
|
|
print("\n── Merging llm.yaml (fine-tuned model name)")
|
|
src_cfg = _load_yaml(source / "config" / "llm.yaml")
|
|
dst_cfg = _load_yaml(dest / "config" / "llm.yaml")
|
|
|
|
src_model = src_cfg.get("backends", {}).get("ollama", {}).get("model", "")
|
|
if src_model and src_model != "llama3.2:3b":
|
|
dst_cfg.setdefault("backends", {}).setdefault("ollama", {})["model"] = src_model
|
|
print(f" model: {src_model}")
|
|
_write_yaml(dest / "config" / "llm.yaml", dst_cfg, apply)
|
|
else:
|
|
print(f" no custom model in source — keeping {dst_cfg.get('backends', {}).get('ollama', {}).get('model', 'default')}")
|
|
|
|
|
|
def _copy_db(source: Path, dest: Path, apply: bool) -> None:
|
|
print("\n── Copying staging database")
|
|
_copy_file(source / "staging.db", dest / "staging.db", apply)
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description="Migrate config from legacy job-seeker repo to Peregrine")
|
|
parser.add_argument("--source", default="/devl/job-seeker",
|
|
help="Path to legacy job-seeker repo (default: /devl/job-seeker)")
|
|
parser.add_argument("--dest", default=str(ROOT),
|
|
help="Path to Peregrine repo (default: this repo)")
|
|
parser.add_argument("--apply", action="store_true",
|
|
help="Actually write files (default is dry run)")
|
|
parser.add_argument("--copy-db", action="store_true",
|
|
help="Also copy staging.db")
|
|
args = parser.parse_args()
|
|
|
|
source = Path(args.source).expanduser().resolve()
|
|
dest = Path(args.dest).expanduser().resolve()
|
|
|
|
if not source.exists():
|
|
print(f"Source repo not found: {source}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
mode = "APPLY" if args.apply else "DRY RUN"
|
|
print(f"Peregrine migration [{mode}]")
|
|
print(f" source: {source}")
|
|
print(f" dest: {dest}")
|
|
|
|
_build_user_yaml(source, dest, args.apply)
|
|
_copy_configs(source, dest, args.apply)
|
|
_copy_aihawk_resume(source, dest, args.apply)
|
|
_merge_llm_yaml(source, dest, args.apply)
|
|
|
|
if args.copy_db:
|
|
_copy_db(source, dest, args.apply)
|
|
|
|
print()
|
|
if args.apply:
|
|
print("Migration complete.")
|
|
print("Next: bash scripts/manage-ui.sh start")
|
|
else:
|
|
print("Dry run complete. Re-run with --apply to write files.")
|
|
if args.copy_db or True:
|
|
print("Add --copy-db to also migrate staging.db.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|