- Remove ollama-gpu service (was colliding with ollama on port 11434) - Strip inline deploy.resources GPU blocks from vision and vllm - Add compose.gpu.yml: Docker NVIDIA overlay for ollama (GPU 0), vision (GPU 0), vllm (GPU 1), finetune (GPU 0) - Fix compose.podman-gpu.yml: rename ollama-gpu → ollama to match service name after removal of ollama-gpu - Update Makefile: apply compose.gpu.yml for Docker + GPU profiles (was only applying podman-gpu.yml for Podman + GPU profiles)
43 lines
991 B
YAML
43 lines
991 B
YAML
# compose.podman-gpu.yml — Podman GPU override
|
|
#
|
|
# Replaces Docker-specific `driver: nvidia` reservations with CDI device specs
|
|
# for rootless Podman. Applied automatically via `make start PROFILE=single-gpu|dual-gpu`
|
|
# when podman/podman-compose is detected, or manually:
|
|
# podman-compose -f compose.yml -f compose.podman-gpu.yml --profile single-gpu up -d
|
|
#
|
|
# Prerequisites:
|
|
# sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
|
|
# (requires nvidia-container-toolkit >= 1.14)
|
|
#
|
|
services:
|
|
ollama:
|
|
devices:
|
|
- nvidia.com/gpu=0
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices: []
|
|
|
|
vision:
|
|
devices:
|
|
- nvidia.com/gpu=0
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices: []
|
|
|
|
vllm:
|
|
devices:
|
|
- nvidia.com/gpu=1
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices: []
|
|
|
|
finetune:
|
|
devices:
|
|
- nvidia.com/gpu=0
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices: []
|