fix: fix dual-gpu port conflict + move GPU config to overlay files
- Remove ollama-gpu service (was colliding with ollama on port 11434) - Strip inline deploy.resources GPU blocks from vision and vllm - Add compose.gpu.yml: Docker NVIDIA overlay for ollama (GPU 0), vision (GPU 0), vllm (GPU 1), finetune (GPU 0) - Fix compose.podman-gpu.yml: rename ollama-gpu → ollama to match service name after removal of ollama-gpu - Update Makefile: apply compose.gpu.yml for Docker + GPU profiles (was only applying podman-gpu.yml for Podman + GPU profiles)
This commit is contained in:
parent
dc4a08c063
commit
006738f7b3
4 changed files with 55 additions and 30 deletions
9
Makefile
9
Makefile
|
|
@ -15,13 +15,18 @@ COMPOSE ?= $(shell \
|
||||||
&& echo "podman compose" \
|
&& echo "podman compose" \
|
||||||
|| echo "podman-compose"))
|
|| echo "podman-compose"))
|
||||||
|
|
||||||
# GPU profiles on Podman require a CDI override (rootless Podman can't use driver: nvidia)
|
# GPU profiles require an overlay for NVIDIA device reservations.
|
||||||
# Generate CDI spec first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
|
# Docker uses deploy.resources (compose.gpu.yml); Podman uses CDI device specs (compose.podman-gpu.yml).
|
||||||
|
# Generate CDI spec for Podman first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
|
||||||
COMPOSE_FILES := -f compose.yml
|
COMPOSE_FILES := -f compose.yml
|
||||||
ifneq (,$(findstring podman,$(COMPOSE)))
|
ifneq (,$(findstring podman,$(COMPOSE)))
|
||||||
ifneq (,$(findstring gpu,$(PROFILE)))
|
ifneq (,$(findstring gpu,$(PROFILE)))
|
||||||
COMPOSE_FILES := -f compose.yml -f compose.podman-gpu.yml
|
COMPOSE_FILES := -f compose.yml -f compose.podman-gpu.yml
|
||||||
endif
|
endif
|
||||||
|
else
|
||||||
|
ifneq (,$(findstring gpu,$(PROFILE)))
|
||||||
|
COMPOSE_FILES := -f compose.yml -f compose.gpu.yml
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
setup: ## Install dependencies (Docker or Podman + NVIDIA toolkit)
|
setup: ## Install dependencies (Docker or Podman + NVIDIA toolkit)
|
||||||
|
|
|
||||||
46
compose.gpu.yml
Normal file
46
compose.gpu.yml
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
# compose.gpu.yml — Docker NVIDIA GPU overlay
|
||||||
|
#
|
||||||
|
# Adds NVIDIA GPU reservations to Peregrine services.
|
||||||
|
# Applied automatically by `make start PROFILE=single-gpu|dual-gpu` when Docker is detected.
|
||||||
|
# Manual: docker compose -f compose.yml -f compose.gpu.yml --profile single-gpu up -d
|
||||||
|
#
|
||||||
|
# Prerequisites:
|
||||||
|
# sudo nvidia-ctk runtime configure --runtime=docker
|
||||||
|
# sudo systemctl restart docker
|
||||||
|
#
|
||||||
|
services:
|
||||||
|
ollama:
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
device_ids: ["0"]
|
||||||
|
capabilities: [gpu]
|
||||||
|
|
||||||
|
vision:
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
device_ids: ["0"]
|
||||||
|
capabilities: [gpu]
|
||||||
|
|
||||||
|
vllm:
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
device_ids: ["1"]
|
||||||
|
capabilities: [gpu]
|
||||||
|
|
||||||
|
finetune:
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
device_ids: ["0"]
|
||||||
|
capabilities: [gpu]
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
# compose.podman-gpu.yml — Podman GPU override
|
# compose.podman-gpu.yml — Podman GPU override
|
||||||
#
|
#
|
||||||
# Replaces Docker-specific `driver: nvidia` reservations with CDI device specs
|
# Replaces Docker-specific `driver: nvidia` reservations with CDI device specs
|
||||||
# for rootless Podman. Apply automatically via `make start PROFILE=single-gpu`
|
# for rootless Podman. Applied automatically via `make start PROFILE=single-gpu|dual-gpu`
|
||||||
# when podman/podman-compose is detected, or manually:
|
# when podman/podman-compose is detected, or manually:
|
||||||
# podman-compose -f compose.yml -f compose.podman-gpu.yml --profile single-gpu up -d
|
# podman-compose -f compose.yml -f compose.podman-gpu.yml --profile single-gpu up -d
|
||||||
#
|
#
|
||||||
|
|
@ -10,7 +10,7 @@
|
||||||
# (requires nvidia-container-toolkit >= 1.14)
|
# (requires nvidia-container-toolkit >= 1.14)
|
||||||
#
|
#
|
||||||
services:
|
services:
|
||||||
ollama-gpu:
|
ollama:
|
||||||
devices:
|
devices:
|
||||||
- nvidia.com/gpu=0
|
- nvidia.com/gpu=0
|
||||||
deploy:
|
deploy:
|
||||||
|
|
|
||||||
26
compose.yml
26
compose.yml
|
|
@ -48,18 +48,6 @@ services:
|
||||||
profiles: [cpu, single-gpu, dual-gpu]
|
profiles: [cpu, single-gpu, dual-gpu]
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
ollama-gpu:
|
|
||||||
extends:
|
|
||||||
service: ollama
|
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
device_ids: ["0"]
|
|
||||||
capabilities: [gpu]
|
|
||||||
profiles: [single-gpu, dual-gpu]
|
|
||||||
|
|
||||||
vision:
|
vision:
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
|
|
@ -69,13 +57,6 @@ services:
|
||||||
environment:
|
environment:
|
||||||
- VISION_MODEL=${VISION_MODEL:-vikhyatk/moondream2}
|
- VISION_MODEL=${VISION_MODEL:-vikhyatk/moondream2}
|
||||||
- VISION_REVISION=${VISION_REVISION:-2025-01-09}
|
- VISION_REVISION=${VISION_REVISION:-2025-01-09}
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
device_ids: ["0"]
|
|
||||||
capabilities: [gpu]
|
|
||||||
profiles: [single-gpu, dual-gpu]
|
profiles: [single-gpu, dual-gpu]
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
|
|
@ -93,13 +74,6 @@ services:
|
||||||
--enforce-eager
|
--enforce-eager
|
||||||
--max-num-seqs 8
|
--max-num-seqs 8
|
||||||
--cpu-offload-gb ${CPU_OFFLOAD_GB:-0}
|
--cpu-offload-gb ${CPU_OFFLOAD_GB:-0}
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
device_ids: ["1"]
|
|
||||||
capabilities: [gpu]
|
|
||||||
profiles: [dual-gpu]
|
profiles: [dual-gpu]
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue