feat: containerize fine-tune pipeline (Dockerfile.finetune + make finetune)

- Dockerfile.finetune: PyTorch 2.3/CUDA 12.1 base + unsloth + training stack - finetune_local.py: auto-register model via Ollama HTTP API after GGUF export; path-translate between finetune container mount and Ollama's view; update config/llm.yaml automatically; DOCS_DIR env override for Docker - prepare_training_data.py: DOCS_DIR env override so make prepare-training works correctly inside the app container - compose.yml: add finetune service (cpu/single-gpu/dual-gpu profiles); DOCS_DIR=/docs injected into app + finetune containers - compose.podman-gpu.yml: CDI device override for finetune service - Makefile: make prepare-training + make finetune targets
2026-02-25 16:22:48 -08:00 · 2026-02-25 16:22:48 -08:00 · 4d66c04d1e
commit 4d66c04d1e
parent 6c895b5a9b
6 changed files with 183 additions and 34 deletions
--- a/Dockerfile.finetune
+++ b/Dockerfile.finetune
@ -0,0 +1,38 @@
 # Dockerfile.finetune — Cover letter LoRA fine-tuner (QLoRA via unsloth)
 # Large image (~12-15 GB after build). Built once, cached on rebuilds.
 # GPU strongly recommended. CPU fallback works but training is very slow.
 #
 # Tested base: pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
 # If your GPU requires a different CUDA version, change the FROM line and
 # reinstall bitsandbytes for the matching CUDA (e.g. bitsandbytes-cuda121).
 FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
 WORKDIR /app
 # Build tools needed by bitsandbytes CUDA kernels and unsloth
 RUN apt-get update && apt-get install -y --no-install-recommends \
    gcc g++ git libgomp1 \
    && rm -rf /var/lib/apt/lists/*
 # Install training stack.
 # unsloth detects CUDA version automatically from the base image.
 RUN pip install --no-cache-dir \
    "unsloth @ git+https://github.com/unslothai/unsloth.git" \
    "datasets>=2.18" "trl>=0.8" peft transformers \
    "bitsandbytes>=0.43.0" accelerate sentencepiece \
    requests pyyaml
 COPY scripts/ /app/scripts/
 COPY config/  /app/config/
 ENV PYTHONUNBUFFERED=1
 # Pin to GPU 0; overridable at runtime with --env CUDA_VISIBLE_DEVICES=
 ENV CUDA_VISIBLE_DEVICES=0
 # Runtime env vars injected by compose.yml:
 #   OLLAMA_URL              — Ollama API base (default: http://ollama:11434)
 #   OLLAMA_MODELS_MOUNT     — finetune container's mount path for ollama models volume
 #   OLLAMA_MODELS_OLLAMA_PATH — Ollama container's mount path for same volume
 #   DOCS_DIR                — cover letters + training data root (default: /docs)
 ENTRYPOINT ["python", "scripts/finetune_local.py"]
--- a/11
+++ b/11
@ -1,7 +1,7 @@
 # Makefile — Peregrine convenience targets
 # Usage: make <target>
-.PHONY: setup preflight start stop restart logs test clean help
+.PHONY: setup preflight start stop restart logs test prepare-training finetune clean help
 PROFILE ?= remote
 PYTHON  ?= python3
@ -43,7 +43,14 @@ logs:           ## Tail app logs
 	$(COMPOSE) logs -f app
 test:           ## Run the test suite
-	$(PYTHON) -m pytest tests/ -v
+	@$(PYTHON) -m pytest tests/ -v
 prepare-training: ## Scan docs_dir for cover letters and build training JSONL
 	$(COMPOSE) $(COMPOSE_FILES) run --rm app python scripts/prepare_training_data.py
 finetune:       ## Fine-tune your personal cover letter model (run prepare-training first)
 	@echo "Starting fine-tune (30-90 min on GPU, much longer on CPU)..."
 	$(COMPOSE) $(COMPOSE_FILES) --profile $(PROFILE) run --rm finetune
 clean:          ## Remove containers, images, and data volumes (DESTRUCTIVE)
 	@echo "WARNING: This will delete all Peregrine containers and data."
--- a/compose.podman-gpu.yml
+++ b/compose.podman-gpu.yml
@ -33,3 +33,11 @@ services:
      resources:
        reservations:
          devices: []
  finetune:
    devices:
      - nvidia.com/gpu=0
    deploy:
      resources:
        reservations:
          devices: []
--- a/compose.yml
+++ b/compose.yml
@ -12,6 +12,7 @@ services:
      - ${DOCS_DIR:-~/Documents/JobSearch}:/docs
    environment:
      - STAGING_DB=/app/data/staging.db
      - DOCS_DIR=/docs
      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
      - OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-}
      - OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
@ -101,3 +102,22 @@ services:
              capabilities: [gpu]
    profiles: [dual-gpu]
    restart: unless-stopped
  finetune:
    build:
      context: .
      dockerfile: Dockerfile.finetune
    volumes:
      - ${DOCS_DIR:-~/Documents/JobSearch}:/docs
      - ${OLLAMA_MODELS_DIR:-~/models/ollama}:/ollama-models
      - ./config:/app/config
    environment:
      - DOCS_DIR=/docs
      - OLLAMA_URL=http://ollama:11434
      - OLLAMA_MODELS_MOUNT=/ollama-models
      - OLLAMA_MODELS_OLLAMA_PATH=/root/.ollama
    depends_on:
      ollama:
        condition: service_started
    profiles: [cpu, single-gpu, dual-gpu]
    restart: "no"
--- a/scripts/finetune_local.py
+++ b/scripts/finetune_local.py
@ -32,7 +32,12 @@ _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
 # ── Config ────────────────────────────────────────────────────────────────────
 DEFAULT_MODEL   = "unsloth/Llama-3.2-3B-Instruct"   # safe on 8 GB VRAM
-_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
+# DOCS_DIR env var overrides user_profile when running inside Docker
 _docs_env = os.environ.get("DOCS_DIR", "")
 _docs = Path(_docs_env) if _docs_env else (
    _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
 )
 LETTERS_JSONL   = _docs / "training_data" / "cover_letters.jsonl"
 OUTPUT_DIR      = _docs / "training_data" / "finetune_output"
 GGUF_DIR        = _docs / "training_data" / "gguf"
@ -66,7 +71,7 @@ print(f"{'='*60}\n")
 # ── Load dataset ──────────────────────────────────────────────────────────────
 if not LETTERS_JSONL.exists():
    sys.exit(f"ERROR: Dataset not found at {LETTERS_JSONL}\n"
-             "Run: conda run -n job-seeker python scripts/prepare_training_data.py")
+             "Run: make prepare-training  (or: python scripts/prepare_training_data.py)")
 records = [json.loads(l) for l in LETTERS_JSONL.read_text().splitlines() if l.strip()]
 print(f"Loaded {len(records)} training examples.")
@ -222,35 +227,102 @@ if not args.no_gguf and USE_UNSLOTH:
 else:
    gguf_path = None
-# ── Print next steps ──────────────────────────────────────────────────────────
+# ── Register with Ollama (auto) ────────────────────────────────────────────────
 def _auto_register_ollama(gguf_path: Path, model_name: str, system_prompt: str) -> bool:
    """
    Copy GGUF into the shared Ollama models volume and register via the API.
    Works in two modes:
      Containerised — OLLAMA_MODELS_MOUNT + OLLAMA_MODELS_OLLAMA_PATH env vars
                      translate the container path into Ollama's view of the file.
      Local         — gguf_path is an absolute path Ollama can read directly.
    """
    import shutil
    import requests
    ollama_url        = os.environ.get("OLLAMA_URL", "http://localhost:11434")
    models_mount      = os.environ.get("OLLAMA_MODELS_MOUNT", "")
    ollama_models_dir = os.environ.get("OLLAMA_MODELS_OLLAMA_PATH", "")
    # ── Place GGUF where Ollama can read it ───────────────────────────────────
    if models_mount and ollama_models_dir:
        # Containerised: write into the shared volume; Ollama reads from its own mount.
        dest_dir = Path(models_mount) / "custom"
        dest_dir.mkdir(parents=True, exist_ok=True)
        dest = dest_dir / gguf_path.name
        if dest != gguf_path:
            print(f"Copying GGUF → shared volume: {dest}")
            shutil.copy2(gguf_path, dest)
        ollama_gguf = f"{ollama_models_dir}/custom/{gguf_path.name}"
    else:
        # Local: pass the absolute path directly.
        ollama_gguf = str(gguf_path.resolve())
    modelfile_text = (
        f"FROM {ollama_gguf}\n"
        f"SYSTEM \"\"\"\n{system_prompt}\n\"\"\"\n"
        f"PARAMETER temperature 0.7\n"
        f"PARAMETER top_p 0.9\n"
        f"PARAMETER num_ctx 32768\n"
    )
    # Write Modelfile to disk as a reference (useful for debugging)
    (OUTPUT_DIR / "Modelfile").write_text(modelfile_text)
    # ── Create via Ollama API ─────────────────────────────────────────────────
    print(f"\nRegistering '{model_name}' with Ollama at {ollama_url} …")
    try:
        r = requests.post(
            f"{ollama_url}/api/create",
            json={"name": model_name, "modelfile": modelfile_text},
            timeout=300,
            stream=True,
        )
        for line in r.iter_lines():
            if line:
                import json as _json
                try:
                    msg = _json.loads(line).get("status", "")
                except Exception:
                    msg = line.decode()
                if msg:
                    print(f"  {msg}")
        if r.status_code != 200:
            print(f"  WARNING: Ollama returned HTTP {r.status_code}")
            return False
    except Exception as exc:
        print(f"  Ollama registration failed: {exc}")
        print(f"  Run manually: ollama create {model_name} -f {OUTPUT_DIR / 'Modelfile'}")
        return False
    # ── Update config/llm.yaml ────────────────────────────────────────────────
    llm_yaml = Path(__file__).parent.parent / "config" / "llm.yaml"
    if llm_yaml.exists():
        try:
            import yaml as _yaml
            cfg = _yaml.safe_load(llm_yaml.read_text()) or {}
            if "backends" in cfg and "ollama" in cfg["backends"]:
                cfg["backends"]["ollama"]["model"] = f"{model_name}:latest"
                llm_yaml.write_text(
                    _yaml.dump(cfg, default_flow_style=False, allow_unicode=True)
                )
                print(f"  llm.yaml updated → ollama.model = {model_name}:latest")
        except Exception as exc:
            print(f"  Could not update llm.yaml automatically: {exc}")
    print(f"\n{'='*60}")
-print("  DONE — next steps to load into Ollama:")
+    print(f"  Model ready: {model_name}:latest")
-print(f"{'='*60}")
+    print(f"  Test: ollama run {model_name} 'Write a cover letter for a Senior Engineer role at Acme Corp.'")
    print(f"{'='*60}\n")
    return True
 if gguf_path and gguf_path.exists():
-    modelfile = OUTPUT_DIR / "Modelfile"
+    _auto_register_ollama(gguf_path, OLLAMA_NAME, SYSTEM_PROMPT)
    modelfile.write_text(f"""FROM {gguf_path}
 SYSTEM \"\"\"
 {SYSTEM_PROMPT}
 \"\"\"
 PARAMETER temperature 0.7
 PARAMETER top_p 0.9
 PARAMETER num_ctx 32768
 """)
    print(f"\n1. Modelfile written to: {modelfile}")
    print(f"\n2. Create the Ollama model:")
    print(f"     ollama create {OLLAMA_NAME} -f {modelfile}")
    print(f"\n3. Test it:")
    print(f"     ollama run {OLLAMA_NAME} 'Write a cover letter for a Senior Customer Success Manager position at Acme Corp.'")
    print(f"\n4. Update llm.yaml to use '{OLLAMA_NAME}:latest' as the ollama model,")
    print(f"   then pick it in Settings → LLM Backends → Ollama → Model.")
 else:
-    print(f"\n  Adapter only (no GGUF). To convert manually:")
+    print(f"\n{'='*60}")
-    print(f"  1. Merge adapter:")
+    print("  Adapter saved (no GGUF produced).")
-    print(f"       conda run -n ogma python -c \"")
+    print(f"  Re-run without --no-gguf to generate a GGUF for Ollama registration.")
-    print(f"         from peft import AutoPeftModelForCausalLM")
+    print(f"  Adapter path: {adapter_path}")
-    print(f"         m = AutoPeftModelForCausalLM.from_pretrained('{adapter_path}')")
+    print(f"{'='*60}\n")
    print(f"         m.merge_and_unload().save_pretrained('{OUTPUT_DIR}/merged')\"")
    print(f"  2. Convert to GGUF using textgen env's convert_hf_to_gguf.py")
    print(f"  3. ollama create {OLLAMA_NAME} -f Modelfile")
 print()
--- a/scripts/prepare_training_data.py
+++ b/scripts/prepare_training_data.py
@ -12,6 +12,7 @@ Usage:
 """
 import argparse
 import json
 import os
 import re
 import sys
 from pathlib import Path
@ -22,7 +23,10 @@ from scripts.user_profile import UserProfile
 _USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
 _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
-_docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
+_docs_env = os.environ.get("DOCS_DIR", "")
 _docs = Path(_docs_env) if _docs_env else (
    _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
 )
 LETTERS_DIR = _docs
 # Use two globs to handle mixed capitalisation ("Cover Letter" vs "cover letter")
 LETTER_GLOBS = ["*Cover Letter*.md", "*cover letter*.md"]