From 54de37e5fa378bb3e93dd6e9b75a74d37d15afc7 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Wed, 25 Feb 2026 16:22:48 -0800 Subject: [PATCH] feat: containerize fine-tune pipeline (Dockerfile.finetune + make finetune) - Dockerfile.finetune: PyTorch 2.3/CUDA 12.1 base + unsloth + training stack - finetune_local.py: auto-register model via Ollama HTTP API after GGUF export; path-translate between finetune container mount and Ollama's view; update config/llm.yaml automatically; DOCS_DIR env override for Docker - prepare_training_data.py: DOCS_DIR env override so make prepare-training works correctly inside the app container - compose.yml: add finetune service (cpu/single-gpu/dual-gpu profiles); DOCS_DIR=/docs injected into app + finetune containers - compose.podman-gpu.yml: CDI device override for finetune service - Makefile: make prepare-training + make finetune targets --- Dockerfile.finetune | 38 +++++++++ Makefile | 11 ++- compose.podman-gpu.yml | 8 ++ compose.yml | 20 +++++ scripts/finetune_local.py | 134 ++++++++++++++++++++++++------- scripts/prepare_training_data.py | 6 +- 6 files changed, 183 insertions(+), 34 deletions(-) create mode 100644 Dockerfile.finetune diff --git a/Dockerfile.finetune b/Dockerfile.finetune new file mode 100644 index 0000000..bf3a70e --- /dev/null +++ b/Dockerfile.finetune @@ -0,0 +1,38 @@ +# Dockerfile.finetune — Cover letter LoRA fine-tuner (QLoRA via unsloth) +# Large image (~12-15 GB after build). Built once, cached on rebuilds. +# GPU strongly recommended. CPU fallback works but training is very slow. +# +# Tested base: pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime +# If your GPU requires a different CUDA version, change the FROM line and +# reinstall bitsandbytes for the matching CUDA (e.g. bitsandbytes-cuda121). +FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime + +WORKDIR /app + +# Build tools needed by bitsandbytes CUDA kernels and unsloth +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc g++ git libgomp1 \ + && rm -rf /var/lib/apt/lists/* + +# Install training stack. +# unsloth detects CUDA version automatically from the base image. +RUN pip install --no-cache-dir \ + "unsloth @ git+https://github.com/unslothai/unsloth.git" \ + "datasets>=2.18" "trl>=0.8" peft transformers \ + "bitsandbytes>=0.43.0" accelerate sentencepiece \ + requests pyyaml + +COPY scripts/ /app/scripts/ +COPY config/ /app/config/ + +ENV PYTHONUNBUFFERED=1 +# Pin to GPU 0; overridable at runtime with --env CUDA_VISIBLE_DEVICES= +ENV CUDA_VISIBLE_DEVICES=0 + +# Runtime env vars injected by compose.yml: +# OLLAMA_URL — Ollama API base (default: http://ollama:11434) +# OLLAMA_MODELS_MOUNT — finetune container's mount path for ollama models volume +# OLLAMA_MODELS_OLLAMA_PATH — Ollama container's mount path for same volume +# DOCS_DIR — cover letters + training data root (default: /docs) + +ENTRYPOINT ["python", "scripts/finetune_local.py"] diff --git a/Makefile b/Makefile index 1e5a1f7..4576ebf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # Makefile — Peregrine convenience targets # Usage: make -.PHONY: setup preflight start stop restart logs test clean help +.PHONY: setup preflight start stop restart logs test prepare-training finetune clean help PROFILE ?= remote PYTHON ?= python3 @@ -43,7 +43,14 @@ logs: ## Tail app logs $(COMPOSE) logs -f app test: ## Run the test suite - $(PYTHON) -m pytest tests/ -v + @$(PYTHON) -m pytest tests/ -v + +prepare-training: ## Scan docs_dir for cover letters and build training JSONL + $(COMPOSE) $(COMPOSE_FILES) run --rm app python scripts/prepare_training_data.py + +finetune: ## Fine-tune your personal cover letter model (run prepare-training first) + @echo "Starting fine-tune (30-90 min on GPU, much longer on CPU)..." + $(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) run --rm finetune clean: ## Remove containers, images, and data volumes (DESTRUCTIVE) @echo "WARNING: This will delete all Peregrine containers and data." diff --git a/compose.podman-gpu.yml b/compose.podman-gpu.yml index 46d5465..e812287 100644 --- a/compose.podman-gpu.yml +++ b/compose.podman-gpu.yml @@ -33,3 +33,11 @@ services: resources: reservations: devices: [] + + finetune: + devices: + - nvidia.com/gpu=0 + deploy: + resources: + reservations: + devices: [] diff --git a/compose.yml b/compose.yml index 79d8ba2..46b9bff 100644 --- a/compose.yml +++ b/compose.yml @@ -12,6 +12,7 @@ services: - ${DOCS_DIR:-~/Documents/JobSearch}:/docs environment: - STAGING_DB=/app/data/staging.db + - DOCS_DIR=/docs - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-} - OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-} - OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-} @@ -101,3 +102,22 @@ services: capabilities: [gpu] profiles: [dual-gpu] restart: unless-stopped + + finetune: + build: + context: . + dockerfile: Dockerfile.finetune + volumes: + - ${DOCS_DIR:-~/Documents/JobSearch}:/docs + - ${OLLAMA_MODELS_DIR:-~/models/ollama}:/ollama-models + - ./config:/app/config + environment: + - DOCS_DIR=/docs + - OLLAMA_URL=http://ollama:11434 + - OLLAMA_MODELS_MOUNT=/ollama-models + - OLLAMA_MODELS_OLLAMA_PATH=/root/.ollama + depends_on: + ollama: + condition: service_started + profiles: [cpu, single-gpu, dual-gpu] + restart: "no" diff --git a/scripts/finetune_local.py b/scripts/finetune_local.py index bfbf199..c096e33 100644 --- a/scripts/finetune_local.py +++ b/scripts/finetune_local.py @@ -32,7 +32,12 @@ _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None # ── Config ──────────────────────────────────────────────────────────────────── DEFAULT_MODEL = "unsloth/Llama-3.2-3B-Instruct" # safe on 8 GB VRAM -_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch" +# DOCS_DIR env var overrides user_profile when running inside Docker +_docs_env = os.environ.get("DOCS_DIR", "") +_docs = Path(_docs_env) if _docs_env else ( + _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch" +) + LETTERS_JSONL = _docs / "training_data" / "cover_letters.jsonl" OUTPUT_DIR = _docs / "training_data" / "finetune_output" GGUF_DIR = _docs / "training_data" / "gguf" @@ -66,7 +71,7 @@ print(f"{'='*60}\n") # ── Load dataset ────────────────────────────────────────────────────────────── if not LETTERS_JSONL.exists(): sys.exit(f"ERROR: Dataset not found at {LETTERS_JSONL}\n" - "Run: conda run -n job-seeker python scripts/prepare_training_data.py") + "Run: make prepare-training (or: python scripts/prepare_training_data.py)") records = [json.loads(l) for l in LETTERS_JSONL.read_text().splitlines() if l.strip()] print(f"Loaded {len(records)} training examples.") @@ -222,35 +227,102 @@ if not args.no_gguf and USE_UNSLOTH: else: gguf_path = None -# ── Print next steps ────────────────────────────────────────────────────────── -print(f"\n{'='*60}") -print(" DONE — next steps to load into Ollama:") -print(f"{'='*60}") +# ── Register with Ollama (auto) ──────────────────────────────────────────────── + +def _auto_register_ollama(gguf_path: Path, model_name: str, system_prompt: str) -> bool: + """ + Copy GGUF into the shared Ollama models volume and register via the API. + + Works in two modes: + Containerised — OLLAMA_MODELS_MOUNT + OLLAMA_MODELS_OLLAMA_PATH env vars + translate the container path into Ollama's view of the file. + Local — gguf_path is an absolute path Ollama can read directly. + """ + import shutil + import requests + + ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434") + models_mount = os.environ.get("OLLAMA_MODELS_MOUNT", "") + ollama_models_dir = os.environ.get("OLLAMA_MODELS_OLLAMA_PATH", "") + + # ── Place GGUF where Ollama can read it ─────────────────────────────────── + if models_mount and ollama_models_dir: + # Containerised: write into the shared volume; Ollama reads from its own mount. + dest_dir = Path(models_mount) / "custom" + dest_dir.mkdir(parents=True, exist_ok=True) + dest = dest_dir / gguf_path.name + if dest != gguf_path: + print(f"Copying GGUF → shared volume: {dest}") + shutil.copy2(gguf_path, dest) + ollama_gguf = f"{ollama_models_dir}/custom/{gguf_path.name}" + else: + # Local: pass the absolute path directly. + ollama_gguf = str(gguf_path.resolve()) + + modelfile_text = ( + f"FROM {ollama_gguf}\n" + f"SYSTEM \"\"\"\n{system_prompt}\n\"\"\"\n" + f"PARAMETER temperature 0.7\n" + f"PARAMETER top_p 0.9\n" + f"PARAMETER num_ctx 32768\n" + ) + + # Write Modelfile to disk as a reference (useful for debugging) + (OUTPUT_DIR / "Modelfile").write_text(modelfile_text) + + # ── Create via Ollama API ───────────────────────────────────────────────── + print(f"\nRegistering '{model_name}' with Ollama at {ollama_url} …") + try: + r = requests.post( + f"{ollama_url}/api/create", + json={"name": model_name, "modelfile": modelfile_text}, + timeout=300, + stream=True, + ) + for line in r.iter_lines(): + if line: + import json as _json + try: + msg = _json.loads(line).get("status", "") + except Exception: + msg = line.decode() + if msg: + print(f" {msg}") + if r.status_code != 200: + print(f" WARNING: Ollama returned HTTP {r.status_code}") + return False + except Exception as exc: + print(f" Ollama registration failed: {exc}") + print(f" Run manually: ollama create {model_name} -f {OUTPUT_DIR / 'Modelfile'}") + return False + + # ── Update config/llm.yaml ──────────────────────────────────────────────── + llm_yaml = Path(__file__).parent.parent / "config" / "llm.yaml" + if llm_yaml.exists(): + try: + import yaml as _yaml + cfg = _yaml.safe_load(llm_yaml.read_text()) or {} + if "backends" in cfg and "ollama" in cfg["backends"]: + cfg["backends"]["ollama"]["model"] = f"{model_name}:latest" + llm_yaml.write_text( + _yaml.dump(cfg, default_flow_style=False, allow_unicode=True) + ) + print(f" llm.yaml updated → ollama.model = {model_name}:latest") + except Exception as exc: + print(f" Could not update llm.yaml automatically: {exc}") + + print(f"\n{'='*60}") + print(f" Model ready: {model_name}:latest") + print(f" Test: ollama run {model_name} 'Write a cover letter for a Senior Engineer role at Acme Corp.'") + print(f"{'='*60}\n") + return True + if gguf_path and gguf_path.exists(): - modelfile = OUTPUT_DIR / "Modelfile" - modelfile.write_text(f"""FROM {gguf_path} -SYSTEM \"\"\" -{SYSTEM_PROMPT} -\"\"\" -PARAMETER temperature 0.7 -PARAMETER top_p 0.9 -PARAMETER num_ctx 32768 -""") - print(f"\n1. Modelfile written to: {modelfile}") - print(f"\n2. Create the Ollama model:") - print(f" ollama create {OLLAMA_NAME} -f {modelfile}") - print(f"\n3. Test it:") - print(f" ollama run {OLLAMA_NAME} 'Write a cover letter for a Senior Customer Success Manager position at Acme Corp.'") - print(f"\n4. Update llm.yaml to use '{OLLAMA_NAME}:latest' as the ollama model,") - print(f" then pick it in Settings → LLM Backends → Ollama → Model.") + _auto_register_ollama(gguf_path, OLLAMA_NAME, SYSTEM_PROMPT) else: - print(f"\n Adapter only (no GGUF). To convert manually:") - print(f" 1. Merge adapter:") - print(f" conda run -n ogma python -c \"") - print(f" from peft import AutoPeftModelForCausalLM") - print(f" m = AutoPeftModelForCausalLM.from_pretrained('{adapter_path}')") - print(f" m.merge_and_unload().save_pretrained('{OUTPUT_DIR}/merged')\"") - print(f" 2. Convert to GGUF using textgen env's convert_hf_to_gguf.py") - print(f" 3. ollama create {OLLAMA_NAME} -f Modelfile") -print() + print(f"\n{'='*60}") + print(" Adapter saved (no GGUF produced).") + print(f" Re-run without --no-gguf to generate a GGUF for Ollama registration.") + print(f" Adapter path: {adapter_path}") + print(f"{'='*60}\n") diff --git a/scripts/prepare_training_data.py b/scripts/prepare_training_data.py index 9b7441c..e0bc046 100644 --- a/scripts/prepare_training_data.py +++ b/scripts/prepare_training_data.py @@ -12,6 +12,7 @@ Usage: """ import argparse import json +import os import re import sys from pathlib import Path @@ -22,7 +23,10 @@ from scripts.user_profile import UserProfile _USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml" _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None -_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch" +_docs_env = os.environ.get("DOCS_DIR", "") +_docs = Path(_docs_env) if _docs_env else ( + _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch" +) LETTERS_DIR = _docs # Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter") LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]