peregrine/compose.gpu.yml
pyr0ball 775d54d605 fix: fix dual-gpu port conflict + move GPU config to overlay files
- Remove ollama-gpu service (was colliding with ollama on port 11434)
- Strip inline deploy.resources GPU blocks from vision and vllm
- Add compose.gpu.yml: Docker NVIDIA overlay for ollama (GPU 0),
  vision (GPU 0), vllm (GPU 1), finetune (GPU 0)
- Fix compose.podman-gpu.yml: rename ollama-gpu → ollama to match
  service name after removal of ollama-gpu
- Update Makefile: apply compose.gpu.yml for Docker + GPU profiles
  (was only applying podman-gpu.yml for Podman + GPU profiles)
2026-02-25 16:44:59 -08:00

46 lines
1.1 KiB
YAML

# compose.gpu.yml — Docker NVIDIA GPU overlay
#
# Adds NVIDIA GPU reservations to Peregrine services.
# Applied automatically by `make start PROFILE=single-gpu|dual-gpu` when Docker is detected.
# Manual: docker compose -f compose.yml -f compose.gpu.yml --profile single-gpu up -d
#
# Prerequisites:
# sudo nvidia-ctk runtime configure --runtime=docker
# sudo systemctl restart docker
#
services:
ollama:
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]
vision:
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]
vllm:
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
capabilities: [gpu]
finetune:
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]