From 006738f7b3401576a42a945e251d1b26c5b4878b Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Wed, 25 Feb 2026 16:44:59 -0800 Subject: [PATCH] fix: fix dual-gpu port conflict + move GPU config to overlay files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove ollama-gpu service (was colliding with ollama on port 11434) - Strip inline deploy.resources GPU blocks from vision and vllm - Add compose.gpu.yml: Docker NVIDIA overlay for ollama (GPU 0), vision (GPU 0), vllm (GPU 1), finetune (GPU 0) - Fix compose.podman-gpu.yml: rename ollama-gpu → ollama to match service name after removal of ollama-gpu - Update Makefile: apply compose.gpu.yml for Docker + GPU profiles (was only applying podman-gpu.yml for Podman + GPU profiles) --- Makefile | 9 +++++++-- compose.gpu.yml | 46 ++++++++++++++++++++++++++++++++++++++++++ compose.podman-gpu.yml | 4 ++-- compose.yml | 26 ------------------------ 4 files changed, 55 insertions(+), 30 deletions(-) create mode 100644 compose.gpu.yml diff --git a/Makefile b/Makefile index 4576ebf..dcb770a 100644 --- a/Makefile +++ b/Makefile @@ -15,13 +15,18 @@ COMPOSE ?= $(shell \ && echo "podman compose" \ || echo "podman-compose")) -# GPU profiles on Podman require a CDI override (rootless Podman can't use driver: nvidia) -# Generate CDI spec first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml +# GPU profiles require an overlay for NVIDIA device reservations. +# Docker uses deploy.resources (compose.gpu.yml); Podman uses CDI device specs (compose.podman-gpu.yml). +# Generate CDI spec for Podman first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml COMPOSE_FILES := -f compose.yml ifneq (,$(findstring podman,$(COMPOSE))) ifneq (,$(findstring gpu,$(PROFILE))) COMPOSE_FILES := -f compose.yml -f compose.podman-gpu.yml endif +else + ifneq (,$(findstring gpu,$(PROFILE))) + COMPOSE_FILES := -f compose.yml -f compose.gpu.yml + endif endif setup: ## Install dependencies (Docker or Podman + NVIDIA toolkit) diff --git a/compose.gpu.yml b/compose.gpu.yml new file mode 100644 index 0000000..f453134 --- /dev/null +++ b/compose.gpu.yml @@ -0,0 +1,46 @@ +# compose.gpu.yml — Docker NVIDIA GPU overlay +# +# Adds NVIDIA GPU reservations to Peregrine services. +# Applied automatically by `make start PROFILE=single-gpu|dual-gpu` when Docker is detected. +# Manual: docker compose -f compose.yml -f compose.gpu.yml --profile single-gpu up -d +# +# Prerequisites: +# sudo nvidia-ctk runtime configure --runtime=docker +# sudo systemctl restart docker +# +services: + ollama: + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["0"] + capabilities: [gpu] + + vision: + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["0"] + capabilities: [gpu] + + vllm: + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["1"] + capabilities: [gpu] + + finetune: + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["0"] + capabilities: [gpu] diff --git a/compose.podman-gpu.yml b/compose.podman-gpu.yml index e812287..688653f 100644 --- a/compose.podman-gpu.yml +++ b/compose.podman-gpu.yml @@ -1,7 +1,7 @@ # compose.podman-gpu.yml — Podman GPU override # # Replaces Docker-specific `driver: nvidia` reservations with CDI device specs -# for rootless Podman. Apply automatically via `make start PROFILE=single-gpu` +# for rootless Podman. Applied automatically via `make start PROFILE=single-gpu|dual-gpu` # when podman/podman-compose is detected, or manually: # podman-compose -f compose.yml -f compose.podman-gpu.yml --profile single-gpu up -d # @@ -10,7 +10,7 @@ # (requires nvidia-container-toolkit >= 1.14) # services: - ollama-gpu: + ollama: devices: - nvidia.com/gpu=0 deploy: diff --git a/compose.yml b/compose.yml index 46b9bff..739ffd9 100644 --- a/compose.yml +++ b/compose.yml @@ -48,18 +48,6 @@ services: profiles: [cpu, single-gpu, dual-gpu] restart: unless-stopped - ollama-gpu: - extends: - service: ollama - deploy: - resources: - reservations: - devices: - - driver: nvidia - device_ids: ["0"] - capabilities: [gpu] - profiles: [single-gpu, dual-gpu] - vision: build: context: . @@ -69,13 +57,6 @@ services: environment: - VISION_MODEL=${VISION_MODEL:-vikhyatk/moondream2} - VISION_REVISION=${VISION_REVISION:-2025-01-09} - deploy: - resources: - reservations: - devices: - - driver: nvidia - device_ids: ["0"] - capabilities: [gpu] profiles: [single-gpu, dual-gpu] restart: unless-stopped @@ -93,13 +74,6 @@ services: --enforce-eager --max-num-seqs 8 --cpu-offload-gb ${CPU_OFFLOAD_GB:-0} - deploy: - resources: - reservations: - devices: - - driver: nvidia - device_ids: ["1"] - capabilities: [gpu] profiles: [dual-gpu] restart: unless-stopped