fix: fix dual-gpu port conflict + move GPU config to overlay files

- Remove ollama-gpu service (was colliding with ollama on port 11434)
- Strip inline deploy.resources GPU blocks from vision and vllm
- Add compose.gpu.yml: Docker NVIDIA overlay for ollama (GPU 0),
  vision (GPU 0), vllm (GPU 1), finetune (GPU 0)
- Fix compose.podman-gpu.yml: rename ollama-gpu → ollama to match
  service name after removal of ollama-gpu
- Update Makefile: apply compose.gpu.yml for Docker + GPU profiles
  (was only applying podman-gpu.yml for Podman + GPU profiles)
This commit is contained in:
pyr0ball 2026-02-25 16:44:59 -08:00
parent f38f0c2007
commit cc01f67b04
4 changed files with 55 additions and 30 deletions

View file

@ -15,13 +15,18 @@ COMPOSE ?= $(shell \
&& echo "podman compose" \
|| echo "podman-compose"))
# GPU profiles on Podman require a CDI override (rootless Podman can't use driver: nvidia)
# Generate CDI spec first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
# GPU profiles require an overlay for NVIDIA device reservations.
# Docker uses deploy.resources (compose.gpu.yml); Podman uses CDI device specs (compose.podman-gpu.yml).
# Generate CDI spec for Podman first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
COMPOSE_FILES := -f compose.yml
ifneq (,$(findstring podman,$(COMPOSE)))
ifneq (,$(findstring gpu,$(PROFILE)))
COMPOSE_FILES := -f compose.yml -f compose.podman-gpu.yml
endif
else
ifneq (,$(findstring gpu,$(PROFILE)))
COMPOSE_FILES := -f compose.yml -f compose.gpu.yml
endif
endif
setup: ## Install dependencies (Docker or Podman + NVIDIA toolkit)

46
compose.gpu.yml Normal file
View file

@ -0,0 +1,46 @@
# compose.gpu.yml — Docker NVIDIA GPU overlay
#
# Adds NVIDIA GPU reservations to Peregrine services.
# Applied automatically by `make start PROFILE=single-gpu|dual-gpu` when Docker is detected.
# Manual: docker compose -f compose.yml -f compose.gpu.yml --profile single-gpu up -d
#
# Prerequisites:
# sudo nvidia-ctk runtime configure --runtime=docker
# sudo systemctl restart docker
#
services:
ollama:
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]
vision:
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]
vllm:
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
capabilities: [gpu]
finetune:
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]

View file

@ -1,7 +1,7 @@
# compose.podman-gpu.yml — Podman GPU override
#
# Replaces Docker-specific `driver: nvidia` reservations with CDI device specs
# for rootless Podman. Apply automatically via `make start PROFILE=single-gpu`
# for rootless Podman. Applied automatically via `make start PROFILE=single-gpu|dual-gpu`
# when podman/podman-compose is detected, or manually:
# podman-compose -f compose.yml -f compose.podman-gpu.yml --profile single-gpu up -d
#
@ -10,7 +10,7 @@
# (requires nvidia-container-toolkit >= 1.14)
#
services:
ollama-gpu:
ollama:
devices:
- nvidia.com/gpu=0
deploy:

View file

@ -48,18 +48,6 @@ services:
profiles: [cpu, single-gpu, dual-gpu]
restart: unless-stopped
ollama-gpu:
extends:
service: ollama
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]
profiles: [single-gpu, dual-gpu]
vision:
build:
context: .
@ -69,13 +57,6 @@ services:
environment:
- VISION_MODEL=${VISION_MODEL:-vikhyatk/moondream2}
- VISION_REVISION=${VISION_REVISION:-2025-01-09}
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]
profiles: [single-gpu, dual-gpu]
restart: unless-stopped
@ -93,13 +74,6 @@ services:
--enforce-eager
--max-num-seqs 8
--cpu-offload-gb ${CPU_OFFLOAD_GB:-0}
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
capabilities: [gpu]
profiles: [dual-gpu]
restart: unless-stopped