feat: containerize fine-tune pipeline (Dockerfile.finetune + make finetune)

- Dockerfile.finetune: PyTorch 2.3/CUDA 12.1 base + unsloth + training stack
- finetune_local.py: auto-register model via Ollama HTTP API after GGUF
  export; path-translate between finetune container mount and Ollama's view;
  update config/llm.yaml automatically; DOCS_DIR env override for Docker
- prepare_training_data.py: DOCS_DIR env override so make prepare-training
  works correctly inside the app container
- compose.yml: add finetune service (cpu/single-gpu/dual-gpu profiles);
  DOCS_DIR=/docs injected into app + finetune containers
- compose.podman-gpu.yml: CDI device override for finetune service
- Makefile: make prepare-training + make finetune targets
This commit is contained in:
pyr0ball 2026-02-25 16:22:48 -08:00
parent 6c895b5a9b
commit 4d66c04d1e
6 changed files with 183 additions and 34 deletions

38
Dockerfile.finetune Normal file
View file

@ -0,0 +1,38 @@
# Dockerfile.finetune — Cover letter LoRA fine-tuner (QLoRA via unsloth)
# Large image (~12-15 GB after build). Built once, cached on rebuilds.
# GPU strongly recommended. CPU fallback works but training is very slow.
#
# Tested base: pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
# If your GPU requires a different CUDA version, change the FROM line and
# reinstall bitsandbytes for the matching CUDA (e.g. bitsandbytes-cuda121).
FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
WORKDIR /app
# Build tools needed by bitsandbytes CUDA kernels and unsloth
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc g++ git libgomp1 \
&& rm -rf /var/lib/apt/lists/*
# Install training stack.
# unsloth detects CUDA version automatically from the base image.
RUN pip install --no-cache-dir \
"unsloth @ git+https://github.com/unslothai/unsloth.git" \
"datasets>=2.18" "trl>=0.8" peft transformers \
"bitsandbytes>=0.43.0" accelerate sentencepiece \
requests pyyaml
COPY scripts/ /app/scripts/
COPY config/ /app/config/
ENV PYTHONUNBUFFERED=1
# Pin to GPU 0; overridable at runtime with --env CUDA_VISIBLE_DEVICES=
ENV CUDA_VISIBLE_DEVICES=0
# Runtime env vars injected by compose.yml:
# OLLAMA_URL — Ollama API base (default: http://ollama:11434)
# OLLAMA_MODELS_MOUNT — finetune container's mount path for ollama models volume
# OLLAMA_MODELS_OLLAMA_PATH — Ollama container's mount path for same volume
# DOCS_DIR — cover letters + training data root (default: /docs)
ENTRYPOINT ["python", "scripts/finetune_local.py"]

View file

@ -1,7 +1,7 @@
# Makefile — Peregrine convenience targets
# Usage: make <target>
.PHONY: setup preflight start stop restart logs test clean help
.PHONY: setup preflight start stop restart logs test prepare-training finetune clean help
PROFILE ?= remote
PYTHON ?= python3
@ -43,7 +43,14 @@ logs: ## Tail app logs
$(COMPOSE) logs -f app
test: ## Run the test suite
$(PYTHON) -m pytest tests/ -v
@$(PYTHON) -m pytest tests/ -v
prepare-training: ## Scan docs_dir for cover letters and build training JSONL
$(COMPOSE) $(COMPOSE_FILES) run --rm app python scripts/prepare_training_data.py
finetune: ## Fine-tune your personal cover letter model (run prepare-training first)
@echo "Starting fine-tune (30-90 min on GPU, much longer on CPU)..."
$(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) run --rm finetune
clean: ## Remove containers, images, and data volumes (DESTRUCTIVE)
@echo "WARNING: This will delete all Peregrine containers and data."

View file

@ -33,3 +33,11 @@ services:
resources:
reservations:
devices: []
finetune:
devices:
- nvidia.com/gpu=0
deploy:
resources:
reservations:
devices: []

View file

@ -12,6 +12,7 @@ services:
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
environment:
- STAGING_DB=/app/data/staging.db
- DOCS_DIR=/docs
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
- OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-}
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
@ -101,3 +102,22 @@ services:
capabilities: [gpu]
profiles: [dual-gpu]
restart: unless-stopped
finetune:
build:
context: .
dockerfile: Dockerfile.finetune
volumes:
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
- ${OLLAMA_MODELS_DIR:-~/models/ollama}:/ollama-models
- ./config:/app/config
environment:
- DOCS_DIR=/docs
- OLLAMA_URL=http://ollama:11434
- OLLAMA_MODELS_MOUNT=/ollama-models
- OLLAMA_MODELS_OLLAMA_PATH=/root/.ollama
depends_on:
ollama:
condition: service_started
profiles: [cpu, single-gpu, dual-gpu]
restart: "no"

View file

@ -32,7 +32,12 @@ _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
# ── Config ────────────────────────────────────────────────────────────────────
DEFAULT_MODEL = "unsloth/Llama-3.2-3B-Instruct" # safe on 8 GB VRAM
_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
# DOCS_DIR env var overrides user_profile when running inside Docker
_docs_env = os.environ.get("DOCS_DIR", "")
_docs = Path(_docs_env) if _docs_env else (
_profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
)
LETTERS_JSONL = _docs / "training_data" / "cover_letters.jsonl"
OUTPUT_DIR = _docs / "training_data" / "finetune_output"
GGUF_DIR = _docs / "training_data" / "gguf"
@ -66,7 +71,7 @@ print(f"{'='*60}\n")
# ── Load dataset ──────────────────────────────────────────────────────────────
if not LETTERS_JSONL.exists():
sys.exit(f"ERROR: Dataset not found at {LETTERS_JSONL}\n"
"Run: conda run -n job-seeker python scripts/prepare_training_data.py")
"Run: make prepare-training (or: python scripts/prepare_training_data.py)")
records = [json.loads(l) for l in LETTERS_JSONL.read_text().splitlines() if l.strip()]
print(f"Loaded {len(records)} training examples.")
@ -222,35 +227,102 @@ if not args.no_gguf and USE_UNSLOTH:
else:
gguf_path = None
# ── Print next steps ──────────────────────────────────────────────────────────
# ── Register with Ollama (auto) ────────────────────────────────────────────────
def _auto_register_ollama(gguf_path: Path, model_name: str, system_prompt: str) -> bool:
"""
Copy GGUF into the shared Ollama models volume and register via the API.
Works in two modes:
Containerised OLLAMA_MODELS_MOUNT + OLLAMA_MODELS_OLLAMA_PATH env vars
translate the container path into Ollama's view of the file.
Local gguf_path is an absolute path Ollama can read directly.
"""
import shutil
import requests
ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
models_mount = os.environ.get("OLLAMA_MODELS_MOUNT", "")
ollama_models_dir = os.environ.get("OLLAMA_MODELS_OLLAMA_PATH", "")
# ── Place GGUF where Ollama can read it ───────────────────────────────────
if models_mount and ollama_models_dir:
# Containerised: write into the shared volume; Ollama reads from its own mount.
dest_dir = Path(models_mount) / "custom"
dest_dir.mkdir(parents=True, exist_ok=True)
dest = dest_dir / gguf_path.name
if dest != gguf_path:
print(f"Copying GGUF → shared volume: {dest}")
shutil.copy2(gguf_path, dest)
ollama_gguf = f"{ollama_models_dir}/custom/{gguf_path.name}"
else:
# Local: pass the absolute path directly.
ollama_gguf = str(gguf_path.resolve())
modelfile_text = (
f"FROM {ollama_gguf}\n"
f"SYSTEM \"\"\"\n{system_prompt}\n\"\"\"\n"
f"PARAMETER temperature 0.7\n"
f"PARAMETER top_p 0.9\n"
f"PARAMETER num_ctx 32768\n"
)
# Write Modelfile to disk as a reference (useful for debugging)
(OUTPUT_DIR / "Modelfile").write_text(modelfile_text)
# ── Create via Ollama API ─────────────────────────────────────────────────
print(f"\nRegistering '{model_name}' with Ollama at {ollama_url}")
try:
r = requests.post(
f"{ollama_url}/api/create",
json={"name": model_name, "modelfile": modelfile_text},
timeout=300,
stream=True,
)
for line in r.iter_lines():
if line:
import json as _json
try:
msg = _json.loads(line).get("status", "")
except Exception:
msg = line.decode()
if msg:
print(f" {msg}")
if r.status_code != 200:
print(f" WARNING: Ollama returned HTTP {r.status_code}")
return False
except Exception as exc:
print(f" Ollama registration failed: {exc}")
print(f" Run manually: ollama create {model_name} -f {OUTPUT_DIR / 'Modelfile'}")
return False
# ── Update config/llm.yaml ────────────────────────────────────────────────
llm_yaml = Path(__file__).parent.parent / "config" / "llm.yaml"
if llm_yaml.exists():
try:
import yaml as _yaml
cfg = _yaml.safe_load(llm_yaml.read_text()) or {}
if "backends" in cfg and "ollama" in cfg["backends"]:
cfg["backends"]["ollama"]["model"] = f"{model_name}:latest"
llm_yaml.write_text(
_yaml.dump(cfg, default_flow_style=False, allow_unicode=True)
)
print(f" llm.yaml updated → ollama.model = {model_name}:latest")
except Exception as exc:
print(f" Could not update llm.yaml automatically: {exc}")
print(f"\n{'='*60}")
print(" DONE — next steps to load into Ollama:")
print(f"{'='*60}")
print(f" Model ready: {model_name}:latest")
print(f" Test: ollama run {model_name} 'Write a cover letter for a Senior Engineer role at Acme Corp.'")
print(f"{'='*60}\n")
return True
if gguf_path and gguf_path.exists():
modelfile = OUTPUT_DIR / "Modelfile"
modelfile.write_text(f"""FROM {gguf_path}
SYSTEM \"\"\"
{SYSTEM_PROMPT}
\"\"\"
PARAMETER temperature 0.7
PARAMETER top_p 0.9
PARAMETER num_ctx 32768
""")
print(f"\n1. Modelfile written to: {modelfile}")
print(f"\n2. Create the Ollama model:")
print(f" ollama create {OLLAMA_NAME} -f {modelfile}")
print(f"\n3. Test it:")
print(f" ollama run {OLLAMA_NAME} 'Write a cover letter for a Senior Customer Success Manager position at Acme Corp.'")
print(f"\n4. Update llm.yaml to use '{OLLAMA_NAME}:latest' as the ollama model,")
print(f" then pick it in Settings → LLM Backends → Ollama → Model.")
_auto_register_ollama(gguf_path, OLLAMA_NAME, SYSTEM_PROMPT)
else:
print(f"\n Adapter only (no GGUF). To convert manually:")
print(f" 1. Merge adapter:")
print(f" conda run -n ogma python -c \"")
print(f" from peft import AutoPeftModelForCausalLM")
print(f" m = AutoPeftModelForCausalLM.from_pretrained('{adapter_path}')")
print(f" m.merge_and_unload().save_pretrained('{OUTPUT_DIR}/merged')\"")
print(f" 2. Convert to GGUF using textgen env's convert_hf_to_gguf.py")
print(f" 3. ollama create {OLLAMA_NAME} -f Modelfile")
print()
print(f"\n{'='*60}")
print(" Adapter saved (no GGUF produced).")
print(f" Re-run without --no-gguf to generate a GGUF for Ollama registration.")
print(f" Adapter path: {adapter_path}")
print(f"{'='*60}\n")

View file

@ -12,6 +12,7 @@ Usage:
"""
import argparse
import json
import os
import re
import sys
from pathlib import Path
@ -22,7 +23,10 @@ from scripts.user_profile import UserProfile
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
_docs_env = os.environ.get("DOCS_DIR", "")
_docs = Path(_docs_env) if _docs_env else (
_profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
)
LETTERS_DIR = _docs
# Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter")
LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]