feat: migration tool + portable startup scripts

scripts/migrate.py:
- dry-run by default; --apply writes files; --copy-db migrates staging.db
- generates config/user.yaml from source repo's resume + cover letter scripts
- copies gitignored configs (notion, email, adzuna, craigslist, search profiles,
  resume keywords, blocklist, aihawk resume)
- merges fine-tuned model name from source llm.yaml into dest llm.yaml

scripts/manage-ui.sh:
- STREAMLIT_BIN no longer hardcoded; auto-resolves via conda env or PATH;
  override with STREAMLIT_BIN env var

scripts/manage-vllm.sh:
- VLLM_BIN and MODEL_DIR now read from env vars with portable defaults
This commit is contained in:
pyr0ball 2026-02-24 20:25:54 -08:00
parent 2d1c48e7af
commit 4841b211ea
2 changed files with 280 additions and 1 deletions

View file

@ -5,7 +5,18 @@
set -euo pipefail set -euo pipefail
REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
STREAMLIT_BIN="/devl/miniconda3/envs/job-seeker/bin/streamlit" STREAMLIT_BIN="${STREAMLIT_BIN:-streamlit}"
# Resolve: conda env bin, system PATH, or explicit override
if [[ "$STREAMLIT_BIN" == "streamlit" ]]; then
for _candidate in \
"$(conda run -n job-seeker which streamlit 2>/dev/null)" \
"$(which streamlit 2>/dev/null)"; do
if [[ -n "$_candidate" && -x "$_candidate" ]]; then
STREAMLIT_BIN="$_candidate"
break
fi
done
fi
APP_ENTRY="$REPO_DIR/app/app.py" APP_ENTRY="$REPO_DIR/app/app.py"
PID_FILE="$REPO_DIR/.streamlit.pid" PID_FILE="$REPO_DIR/.streamlit.pid"
LOG_FILE="$REPO_DIR/.streamlit.log" LOG_FILE="$REPO_DIR/.streamlit.log"

268
scripts/migrate.py Normal file
View file

@ -0,0 +1,268 @@
#!/usr/bin/env python3
"""
Peregrine migration tool import config and data from a legacy job-seeker repo.
Usage:
python scripts/migrate.py # dry run (show what would change)
python scripts/migrate.py --apply # write files
python scripts/migrate.py --apply --copy-db # also copy staging.db
python scripts/migrate.py --source /path/to/repo # non-default source
What it migrates:
- config/user.yaml (generated from source resume + scripts)
- config/notion.yaml (copied contains live token)
- config/email.yaml (copied contains IMAP credentials)
- config/adzuna.yaml (copied API credentials)
- config/craigslist.yaml (copied metro/location map)
- config/search_profiles.yaml (copied user's job search targets)
- config/resume_keywords.yaml (copied)
- config/blocklist.yaml (copied)
- config/llm.yaml (merges fine-tuned model name from source)
- aihawk/data_folder/plain_text_resume.yaml (copied if aihawk present)
- staging.db (optional copies current DB state)
"""
import argparse
import shutil
import sys
from pathlib import Path
from textwrap import dedent
import yaml
ROOT = Path(__file__).parent.parent
def _load_yaml(path: Path) -> dict:
if path.exists():
return yaml.safe_load(path.read_text()) or {}
return {}
def _write_yaml(path: Path, data: dict, apply: bool) -> None:
text = yaml.dump(data, default_flow_style=False, allow_unicode=True, sort_keys=False)
if apply:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(text)
print(f" ✓ wrote {path.relative_to(ROOT)}")
else:
print(f" (dry) would write {path.relative_to(ROOT)}")
def _copy_file(src: Path, dest: Path, apply: bool) -> bool:
if not src.exists():
print(f" ✗ skip {dest.name} — not found at {src}")
return False
if apply:
dest.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dest)
print(f" ✓ copied {dest.relative_to(ROOT)}")
else:
print(f" (dry) would copy {src}{dest.relative_to(ROOT)}")
return True
def _extract_career_summary(source: Path) -> str:
"""Pull career summary from source generate_cover_letter.py SYSTEM_CONTEXT."""
gcl = source / "scripts" / "generate_cover_letter.py"
if not gcl.exists():
return ""
text = gcl.read_text()
start = text.find('SYSTEM_CONTEXT = """')
if start == -1:
start = text.find("SYSTEM_CONTEXT = '''")
if start == -1:
return ""
start = text.find('"""', start) + 3
end = text.find('"""', start)
if end == -1:
return ""
block = text[start:end].strip()
# Extract just the Background lines (skip the role description preamble)
lines = [l.strip("- ").strip() for l in block.splitlines() if l.strip().startswith("-")]
return " ".join(lines[:4]) if lines else block[:300]
def _extract_personal_info(source: Path) -> dict:
"""Extract personal info from aihawk resume yaml."""
resume = source / "aihawk" / "data_folder" / "plain_text_resume.yaml"
if not resume.exists():
resume = source / "config" / "plain_text_resume.yaml"
if not resume.exists():
return {}
data = _load_yaml(resume)
info = data.get("personal_information", {})
return {
"name": f"{info.get('name', '')} {info.get('surname', '')}".strip(),
"email": info.get("email", ""),
"phone": str(info.get("phone", "")),
"linkedin": info.get("linkedin", ""),
}
def _extract_docs_dir(source: Path) -> str:
"""Try to find docs directory from source scripts."""
gcl = source / "scripts" / "generate_cover_letter.py"
if gcl.exists():
for line in gcl.read_text().splitlines():
if "LETTERS_DIR" in line and "Path(" in line:
# e.g. LETTERS_DIR = Path("/Library/Documents/JobSearch")
start = line.find('"')
end = line.rfind('"')
if start != end:
return line[start + 1:end]
return "~/Documents/JobSearch"
def _build_user_yaml(source: Path, dest: Path, apply: bool) -> None:
print("\n── Generating config/user.yaml")
info = _extract_personal_info(source)
career_summary = _extract_career_summary(source)
docs_dir = _extract_docs_dir(source)
# Mission preferences — extracted from source _MISSION_NOTES
gcl_text = (source / "scripts" / "generate_cover_letter.py").read_text() \
if (source / "scripts" / "generate_cover_letter.py").exists() else ""
mission_prefs: dict = {}
# The original _MISSION_NOTES encoded personal alignment notes inline;
# we set sensible short personal notes for each industry.
if "music" in gcl_text and "personal passion" in gcl_text:
mission_prefs["music"] = (
"I have a real personal passion for the music scene and would love "
"to apply my CS skills in this space."
)
if "animal_welfare" in gcl_text or "animal" in gcl_text:
mission_prefs["animal_welfare"] = (
"Animal welfare is a dream domain for me — a genuine personal passion "
"that deeply aligns with my values."
)
if "education" in gcl_text and "EdTech" in gcl_text:
mission_prefs["education"] = (
"Children's education and EdTech reflect genuine personal values around "
"learning and young people that I'd love to connect to my CS work."
)
data = {
"name": info.get("name", ""),
"email": info.get("email", ""),
"phone": info.get("phone", ""),
"linkedin": info.get("linkedin", ""),
"career_summary": career_summary,
"nda_companies": [],
"mission_preferences": mission_prefs,
"candidate_accessibility_focus": False,
"candidate_lgbtq_focus": False,
"docs_dir": docs_dir,
"ollama_models_dir": "~/models/ollama",
"vllm_models_dir": "~/models/vllm",
"inference_profile": "dual-gpu",
"services": {
"streamlit_port": 8501,
"ollama_host": "localhost",
"ollama_port": 11434,
"ollama_ssl": False,
"ollama_ssl_verify": True,
"vllm_host": "localhost",
"vllm_port": 8000,
"vllm_ssl": False,
"vllm_ssl_verify": True,
"searxng_host": "localhost",
"searxng_port": 8888,
"searxng_ssl": False,
"searxng_ssl_verify": True,
},
}
_write_yaml(dest / "config" / "user.yaml", data, apply)
if not apply:
print(f" name: {data['name'] or '(not found)'}")
print(f" email: {data['email'] or '(not found)'}")
print(f" docs: {data['docs_dir']}")
print(f" profile: {data['inference_profile']}")
def _copy_configs(source: Path, dest: Path, apply: bool) -> None:
print("\n── Copying config files")
files = [
"config/notion.yaml",
"config/email.yaml",
"config/adzuna.yaml",
"config/craigslist.yaml",
"config/search_profiles.yaml",
"config/resume_keywords.yaml",
"config/blocklist.yaml",
]
for rel in files:
_copy_file(source / rel, dest / rel, apply)
def _copy_aihawk_resume(source: Path, dest: Path, apply: bool) -> None:
print("\n── Copying AIHawk resume profile")
src = source / "aihawk" / "data_folder" / "plain_text_resume.yaml"
dst = dest / "aihawk" / "data_folder" / "plain_text_resume.yaml"
_copy_file(src, dst, apply)
def _merge_llm_yaml(source: Path, dest: Path, apply: bool) -> None:
"""Copy the fine-tuned model name from source llm.yaml into dest llm.yaml."""
print("\n── Merging llm.yaml (fine-tuned model name)")
src_cfg = _load_yaml(source / "config" / "llm.yaml")
dst_cfg = _load_yaml(dest / "config" / "llm.yaml")
src_model = src_cfg.get("backends", {}).get("ollama", {}).get("model", "")
if src_model and src_model != "llama3.2:3b":
dst_cfg.setdefault("backends", {}).setdefault("ollama", {})["model"] = src_model
print(f" model: {src_model}")
_write_yaml(dest / "config" / "llm.yaml", dst_cfg, apply)
else:
print(f" no custom model in source — keeping {dst_cfg.get('backends', {}).get('ollama', {}).get('model', 'default')}")
def _copy_db(source: Path, dest: Path, apply: bool) -> None:
print("\n── Copying staging database")
_copy_file(source / "staging.db", dest / "staging.db", apply)
def main() -> None:
parser = argparse.ArgumentParser(description="Migrate config from legacy job-seeker repo to Peregrine")
parser.add_argument("--source", default="/devl/job-seeker",
help="Path to legacy job-seeker repo (default: /devl/job-seeker)")
parser.add_argument("--dest", default=str(ROOT),
help="Path to Peregrine repo (default: this repo)")
parser.add_argument("--apply", action="store_true",
help="Actually write files (default is dry run)")
parser.add_argument("--copy-db", action="store_true",
help="Also copy staging.db")
args = parser.parse_args()
source = Path(args.source).expanduser().resolve()
dest = Path(args.dest).expanduser().resolve()
if not source.exists():
print(f"Source repo not found: {source}", file=sys.stderr)
sys.exit(1)
mode = "APPLY" if args.apply else "DRY RUN"
print(f"Peregrine migration [{mode}]")
print(f" source: {source}")
print(f" dest: {dest}")
_build_user_yaml(source, dest, args.apply)
_copy_configs(source, dest, args.apply)
_copy_aihawk_resume(source, dest, args.apply)
_merge_llm_yaml(source, dest, args.apply)
if args.copy_db:
_copy_db(source, dest, args.apply)
print()
if args.apply:
print("Migration complete.")
print("Next: bash scripts/manage-ui.sh start")
else:
print("Dry run complete. Re-run with --apply to write files.")
if args.copy_db or True:
print("Add --copy-db to also migrate staging.db.")
if __name__ == "__main__":
main()