- Remove ollama-gpu service (was colliding with ollama on port 11434) - Strip inline deploy.resources GPU blocks from vision and vllm - Add compose.gpu.yml: Docker NVIDIA overlay for ollama (GPU 0), vision (GPU 0), vllm (GPU 1), finetune (GPU 0) - Fix compose.podman-gpu.yml: rename ollama-gpu → ollama to match service name after removal of ollama-gpu - Update Makefile: apply compose.gpu.yml for Docker + GPU profiles (was only applying podman-gpu.yml for Podman + GPU profiles)
46 lines
1.1 KiB
YAML
46 lines
1.1 KiB
YAML
# compose.gpu.yml — Docker NVIDIA GPU overlay
|
|
#
|
|
# Adds NVIDIA GPU reservations to Peregrine services.
|
|
# Applied automatically by `make start PROFILE=single-gpu|dual-gpu` when Docker is detected.
|
|
# Manual: docker compose -f compose.yml -f compose.gpu.yml --profile single-gpu up -d
|
|
#
|
|
# Prerequisites:
|
|
# sudo nvidia-ctk runtime configure --runtime=docker
|
|
# sudo systemctl restart docker
|
|
#
|
|
services:
|
|
ollama:
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
device_ids: ["0"]
|
|
capabilities: [gpu]
|
|
|
|
vision:
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
device_ids: ["0"]
|
|
capabilities: [gpu]
|
|
|
|
vllm:
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
device_ids: ["1"]
|
|
capabilities: [gpu]
|
|
|
|
finetune:
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
device_ids: ["0"]
|
|
capabilities: [gpu]
|