feat: containerize fine-tune pipeline (Dockerfile.finetune + make finetune)
- Dockerfile.finetune: PyTorch 2.3/CUDA 12.1 base + unsloth + training stack - finetune_local.py: auto-register model via Ollama HTTP API after GGUF export; path-translate between finetune container mount and Ollama's view; update config/llm.yaml automatically; DOCS_DIR env override for Docker - prepare_training_data.py: DOCS_DIR env override so make prepare-training works correctly inside the app container - compose.yml: add finetune service (cpu/single-gpu/dual-gpu profiles); DOCS_DIR=/docs injected into app + finetune containers - compose.podman-gpu.yml: CDI device override for finetune service - Makefile: make prepare-training + make finetune targets
This commit is contained in:
parent
6c895b5a9b
commit
4d66c04d1e
6 changed files with 183 additions and 34 deletions
38
Dockerfile.finetune
Normal file
38
Dockerfile.finetune
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
# Dockerfile.finetune — Cover letter LoRA fine-tuner (QLoRA via unsloth)
|
||||||
|
# Large image (~12-15 GB after build). Built once, cached on rebuilds.
|
||||||
|
# GPU strongly recommended. CPU fallback works but training is very slow.
|
||||||
|
#
|
||||||
|
# Tested base: pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
|
||||||
|
# If your GPU requires a different CUDA version, change the FROM line and
|
||||||
|
# reinstall bitsandbytes for the matching CUDA (e.g. bitsandbytes-cuda121).
|
||||||
|
FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Build tools needed by bitsandbytes CUDA kernels and unsloth
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
gcc g++ git libgomp1 \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install training stack.
|
||||||
|
# unsloth detects CUDA version automatically from the base image.
|
||||||
|
RUN pip install --no-cache-dir \
|
||||||
|
"unsloth @ git+https://github.com/unslothai/unsloth.git" \
|
||||||
|
"datasets>=2.18" "trl>=0.8" peft transformers \
|
||||||
|
"bitsandbytes>=0.43.0" accelerate sentencepiece \
|
||||||
|
requests pyyaml
|
||||||
|
|
||||||
|
COPY scripts/ /app/scripts/
|
||||||
|
COPY config/ /app/config/
|
||||||
|
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
# Pin to GPU 0; overridable at runtime with --env CUDA_VISIBLE_DEVICES=
|
||||||
|
ENV CUDA_VISIBLE_DEVICES=0
|
||||||
|
|
||||||
|
# Runtime env vars injected by compose.yml:
|
||||||
|
# OLLAMA_URL — Ollama API base (default: http://ollama:11434)
|
||||||
|
# OLLAMA_MODELS_MOUNT — finetune container's mount path for ollama models volume
|
||||||
|
# OLLAMA_MODELS_OLLAMA_PATH — Ollama container's mount path for same volume
|
||||||
|
# DOCS_DIR — cover letters + training data root (default: /docs)
|
||||||
|
|
||||||
|
ENTRYPOINT ["python", "scripts/finetune_local.py"]
|
||||||
11
Makefile
11
Makefile
|
|
@ -1,7 +1,7 @@
|
||||||
# Makefile — Peregrine convenience targets
|
# Makefile — Peregrine convenience targets
|
||||||
# Usage: make <target>
|
# Usage: make <target>
|
||||||
|
|
||||||
.PHONY: setup preflight start stop restart logs test clean help
|
.PHONY: setup preflight start stop restart logs test prepare-training finetune clean help
|
||||||
|
|
||||||
PROFILE ?= remote
|
PROFILE ?= remote
|
||||||
PYTHON ?= python3
|
PYTHON ?= python3
|
||||||
|
|
@ -43,7 +43,14 @@ logs: ## Tail app logs
|
||||||
$(COMPOSE) logs -f app
|
$(COMPOSE) logs -f app
|
||||||
|
|
||||||
test: ## Run the test suite
|
test: ## Run the test suite
|
||||||
$(PYTHON) -m pytest tests/ -v
|
@$(PYTHON) -m pytest tests/ -v
|
||||||
|
|
||||||
|
prepare-training: ## Scan docs_dir for cover letters and build training JSONL
|
||||||
|
$(COMPOSE) $(COMPOSE_FILES) run --rm app python scripts/prepare_training_data.py
|
||||||
|
|
||||||
|
finetune: ## Fine-tune your personal cover letter model (run prepare-training first)
|
||||||
|
@echo "Starting fine-tune (30-90 min on GPU, much longer on CPU)..."
|
||||||
|
$(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) run --rm finetune
|
||||||
|
|
||||||
clean: ## Remove containers, images, and data volumes (DESTRUCTIVE)
|
clean: ## Remove containers, images, and data volumes (DESTRUCTIVE)
|
||||||
@echo "WARNING: This will delete all Peregrine containers and data."
|
@echo "WARNING: This will delete all Peregrine containers and data."
|
||||||
|
|
|
||||||
|
|
@ -33,3 +33,11 @@ services:
|
||||||
resources:
|
resources:
|
||||||
reservations:
|
reservations:
|
||||||
devices: []
|
devices: []
|
||||||
|
|
||||||
|
finetune:
|
||||||
|
devices:
|
||||||
|
- nvidia.com/gpu=0
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices: []
|
||||||
|
|
|
||||||
20
compose.yml
20
compose.yml
|
|
@ -12,6 +12,7 @@ services:
|
||||||
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
|
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
|
||||||
environment:
|
environment:
|
||||||
- STAGING_DB=/app/data/staging.db
|
- STAGING_DB=/app/data/staging.db
|
||||||
|
- DOCS_DIR=/docs
|
||||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||||
- OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-}
|
- OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-}
|
||||||
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
|
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
|
||||||
|
|
@ -101,3 +102,22 @@ services:
|
||||||
capabilities: [gpu]
|
capabilities: [gpu]
|
||||||
profiles: [dual-gpu]
|
profiles: [dual-gpu]
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
|
finetune:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.finetune
|
||||||
|
volumes:
|
||||||
|
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
|
||||||
|
- ${OLLAMA_MODELS_DIR:-~/models/ollama}:/ollama-models
|
||||||
|
- ./config:/app/config
|
||||||
|
environment:
|
||||||
|
- DOCS_DIR=/docs
|
||||||
|
- OLLAMA_URL=http://ollama:11434
|
||||||
|
- OLLAMA_MODELS_MOUNT=/ollama-models
|
||||||
|
- OLLAMA_MODELS_OLLAMA_PATH=/root/.ollama
|
||||||
|
depends_on:
|
||||||
|
ollama:
|
||||||
|
condition: service_started
|
||||||
|
profiles: [cpu, single-gpu, dual-gpu]
|
||||||
|
restart: "no"
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,12 @@ _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||||
# ── Config ────────────────────────────────────────────────────────────────────
|
# ── Config ────────────────────────────────────────────────────────────────────
|
||||||
DEFAULT_MODEL = "unsloth/Llama-3.2-3B-Instruct" # safe on 8 GB VRAM
|
DEFAULT_MODEL = "unsloth/Llama-3.2-3B-Instruct" # safe on 8 GB VRAM
|
||||||
|
|
||||||
_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
# DOCS_DIR env var overrides user_profile when running inside Docker
|
||||||
|
_docs_env = os.environ.get("DOCS_DIR", "")
|
||||||
|
_docs = Path(_docs_env) if _docs_env else (
|
||||||
|
_profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
||||||
|
)
|
||||||
|
|
||||||
LETTERS_JSONL = _docs / "training_data" / "cover_letters.jsonl"
|
LETTERS_JSONL = _docs / "training_data" / "cover_letters.jsonl"
|
||||||
OUTPUT_DIR = _docs / "training_data" / "finetune_output"
|
OUTPUT_DIR = _docs / "training_data" / "finetune_output"
|
||||||
GGUF_DIR = _docs / "training_data" / "gguf"
|
GGUF_DIR = _docs / "training_data" / "gguf"
|
||||||
|
|
@ -66,7 +71,7 @@ print(f"{'='*60}\n")
|
||||||
# ── Load dataset ──────────────────────────────────────────────────────────────
|
# ── Load dataset ──────────────────────────────────────────────────────────────
|
||||||
if not LETTERS_JSONL.exists():
|
if not LETTERS_JSONL.exists():
|
||||||
sys.exit(f"ERROR: Dataset not found at {LETTERS_JSONL}\n"
|
sys.exit(f"ERROR: Dataset not found at {LETTERS_JSONL}\n"
|
||||||
"Run: conda run -n job-seeker python scripts/prepare_training_data.py")
|
"Run: make prepare-training (or: python scripts/prepare_training_data.py)")
|
||||||
|
|
||||||
records = [json.loads(l) for l in LETTERS_JSONL.read_text().splitlines() if l.strip()]
|
records = [json.loads(l) for l in LETTERS_JSONL.read_text().splitlines() if l.strip()]
|
||||||
print(f"Loaded {len(records)} training examples.")
|
print(f"Loaded {len(records)} training examples.")
|
||||||
|
|
@ -222,35 +227,102 @@ if not args.no_gguf and USE_UNSLOTH:
|
||||||
else:
|
else:
|
||||||
gguf_path = None
|
gguf_path = None
|
||||||
|
|
||||||
# ── Print next steps ──────────────────────────────────────────────────────────
|
# ── Register with Ollama (auto) ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _auto_register_ollama(gguf_path: Path, model_name: str, system_prompt: str) -> bool:
|
||||||
|
"""
|
||||||
|
Copy GGUF into the shared Ollama models volume and register via the API.
|
||||||
|
|
||||||
|
Works in two modes:
|
||||||
|
Containerised — OLLAMA_MODELS_MOUNT + OLLAMA_MODELS_OLLAMA_PATH env vars
|
||||||
|
translate the container path into Ollama's view of the file.
|
||||||
|
Local — gguf_path is an absolute path Ollama can read directly.
|
||||||
|
"""
|
||||||
|
import shutil
|
||||||
|
import requests
|
||||||
|
|
||||||
|
ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||||
|
models_mount = os.environ.get("OLLAMA_MODELS_MOUNT", "")
|
||||||
|
ollama_models_dir = os.environ.get("OLLAMA_MODELS_OLLAMA_PATH", "")
|
||||||
|
|
||||||
|
# ── Place GGUF where Ollama can read it ───────────────────────────────────
|
||||||
|
if models_mount and ollama_models_dir:
|
||||||
|
# Containerised: write into the shared volume; Ollama reads from its own mount.
|
||||||
|
dest_dir = Path(models_mount) / "custom"
|
||||||
|
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
dest = dest_dir / gguf_path.name
|
||||||
|
if dest != gguf_path:
|
||||||
|
print(f"Copying GGUF → shared volume: {dest}")
|
||||||
|
shutil.copy2(gguf_path, dest)
|
||||||
|
ollama_gguf = f"{ollama_models_dir}/custom/{gguf_path.name}"
|
||||||
|
else:
|
||||||
|
# Local: pass the absolute path directly.
|
||||||
|
ollama_gguf = str(gguf_path.resolve())
|
||||||
|
|
||||||
|
modelfile_text = (
|
||||||
|
f"FROM {ollama_gguf}\n"
|
||||||
|
f"SYSTEM \"\"\"\n{system_prompt}\n\"\"\"\n"
|
||||||
|
f"PARAMETER temperature 0.7\n"
|
||||||
|
f"PARAMETER top_p 0.9\n"
|
||||||
|
f"PARAMETER num_ctx 32768\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Write Modelfile to disk as a reference (useful for debugging)
|
||||||
|
(OUTPUT_DIR / "Modelfile").write_text(modelfile_text)
|
||||||
|
|
||||||
|
# ── Create via Ollama API ─────────────────────────────────────────────────
|
||||||
|
print(f"\nRegistering '{model_name}' with Ollama at {ollama_url} …")
|
||||||
|
try:
|
||||||
|
r = requests.post(
|
||||||
|
f"{ollama_url}/api/create",
|
||||||
|
json={"name": model_name, "modelfile": modelfile_text},
|
||||||
|
timeout=300,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
for line in r.iter_lines():
|
||||||
|
if line:
|
||||||
|
import json as _json
|
||||||
|
try:
|
||||||
|
msg = _json.loads(line).get("status", "")
|
||||||
|
except Exception:
|
||||||
|
msg = line.decode()
|
||||||
|
if msg:
|
||||||
|
print(f" {msg}")
|
||||||
|
if r.status_code != 200:
|
||||||
|
print(f" WARNING: Ollama returned HTTP {r.status_code}")
|
||||||
|
return False
|
||||||
|
except Exception as exc:
|
||||||
|
print(f" Ollama registration failed: {exc}")
|
||||||
|
print(f" Run manually: ollama create {model_name} -f {OUTPUT_DIR / 'Modelfile'}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# ── Update config/llm.yaml ────────────────────────────────────────────────
|
||||||
|
llm_yaml = Path(__file__).parent.parent / "config" / "llm.yaml"
|
||||||
|
if llm_yaml.exists():
|
||||||
|
try:
|
||||||
|
import yaml as _yaml
|
||||||
|
cfg = _yaml.safe_load(llm_yaml.read_text()) or {}
|
||||||
|
if "backends" in cfg and "ollama" in cfg["backends"]:
|
||||||
|
cfg["backends"]["ollama"]["model"] = f"{model_name}:latest"
|
||||||
|
llm_yaml.write_text(
|
||||||
|
_yaml.dump(cfg, default_flow_style=False, allow_unicode=True)
|
||||||
|
)
|
||||||
|
print(f" llm.yaml updated → ollama.model = {model_name}:latest")
|
||||||
|
except Exception as exc:
|
||||||
|
print(f" Could not update llm.yaml automatically: {exc}")
|
||||||
|
|
||||||
print(f"\n{'='*60}")
|
print(f"\n{'='*60}")
|
||||||
print(" DONE — next steps to load into Ollama:")
|
print(f" Model ready: {model_name}:latest")
|
||||||
print(f"{'='*60}")
|
print(f" Test: ollama run {model_name} 'Write a cover letter for a Senior Engineer role at Acme Corp.'")
|
||||||
|
print(f"{'='*60}\n")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
if gguf_path and gguf_path.exists():
|
if gguf_path and gguf_path.exists():
|
||||||
modelfile = OUTPUT_DIR / "Modelfile"
|
_auto_register_ollama(gguf_path, OLLAMA_NAME, SYSTEM_PROMPT)
|
||||||
modelfile.write_text(f"""FROM {gguf_path}
|
|
||||||
SYSTEM \"\"\"
|
|
||||||
{SYSTEM_PROMPT}
|
|
||||||
\"\"\"
|
|
||||||
PARAMETER temperature 0.7
|
|
||||||
PARAMETER top_p 0.9
|
|
||||||
PARAMETER num_ctx 32768
|
|
||||||
""")
|
|
||||||
print(f"\n1. Modelfile written to: {modelfile}")
|
|
||||||
print(f"\n2. Create the Ollama model:")
|
|
||||||
print(f" ollama create {OLLAMA_NAME} -f {modelfile}")
|
|
||||||
print(f"\n3. Test it:")
|
|
||||||
print(f" ollama run {OLLAMA_NAME} 'Write a cover letter for a Senior Customer Success Manager position at Acme Corp.'")
|
|
||||||
print(f"\n4. Update llm.yaml to use '{OLLAMA_NAME}:latest' as the ollama model,")
|
|
||||||
print(f" then pick it in Settings → LLM Backends → Ollama → Model.")
|
|
||||||
else:
|
else:
|
||||||
print(f"\n Adapter only (no GGUF). To convert manually:")
|
print(f"\n{'='*60}")
|
||||||
print(f" 1. Merge adapter:")
|
print(" Adapter saved (no GGUF produced).")
|
||||||
print(f" conda run -n ogma python -c \"")
|
print(f" Re-run without --no-gguf to generate a GGUF for Ollama registration.")
|
||||||
print(f" from peft import AutoPeftModelForCausalLM")
|
print(f" Adapter path: {adapter_path}")
|
||||||
print(f" m = AutoPeftModelForCausalLM.from_pretrained('{adapter_path}')")
|
print(f"{'='*60}\n")
|
||||||
print(f" m.merge_and_unload().save_pretrained('{OUTPUT_DIR}/merged')\"")
|
|
||||||
print(f" 2. Convert to GGUF using textgen env's convert_hf_to_gguf.py")
|
|
||||||
print(f" 3. ollama create {OLLAMA_NAME} -f Modelfile")
|
|
||||||
print()
|
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ Usage:
|
||||||
"""
|
"""
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
@ -22,7 +23,10 @@ from scripts.user_profile import UserProfile
|
||||||
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
||||||
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||||
|
|
||||||
_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
_docs_env = os.environ.get("DOCS_DIR", "")
|
||||||
|
_docs = Path(_docs_env) if _docs_env else (
|
||||||
|
_profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
||||||
|
)
|
||||||
LETTERS_DIR = _docs
|
LETTERS_DIR = _docs
|
||||||
# Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter")
|
# Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter")
|
||||||
LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]
|
LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue