From c2ac55259dcc80b94956aa51440f1af3b215669f Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Tue, 26 May 2026 15:07:30 -0700 Subject: [PATCH] fix(video): enforce PCI_BUS_ID order + force CUDA_VISIBLE_DEVICES assignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CUDA defaults to FASTEST_FIRST device ordering, which does not match nvidia-smi's PCI bus order on multi-GPU nodes. On Muninn, the RTX 3090 is cuda:0 and the Quadro RTX 4000 is cuda:1 — the opposite of nvidia-smi. Two fixes: 1. Set CUDA_DEVICE_ORDER=PCI_BUS_ID so --gpu-id always matches nvidia-smi and the muninn.yaml profile GPU index assignments. 2. Use direct assignment (os.environ[...] = ...) instead of setdefault — setdefault silently no-ops if CUDA_VISIBLE_DEVICES is already present in the environment (conda activation, prior run, system default). --- circuitforge_core/video/app.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/circuitforge_core/video/app.py b/circuitforge_core/video/app.py index 00a3ee3..b2a3e64 100644 --- a/circuitforge_core/video/app.py +++ b/circuitforge_core/video/app.py @@ -171,10 +171,12 @@ if __name__ == "__main__": ) args = _parse_args() - # cf-orch sets CUDA_VISIBLE_DEVICES before spawning; only set it here when - # running the service manually (--gpu-id flag) without cf-orch. + # Pin GPU selection unconditionally — --gpu-id is authoritative. + # Force PCI_BUS_ID ordering so --gpu-id matches nvidia-smi (not CUDA's + # default FASTEST_FIRST, which can swap indices on multi-GPU nodes). if args.device == "cuda" and not args.mock: - os.environ.setdefault("CUDA_VISIBLE_DEVICES", str(args.gpu_id)) + os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" + os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id) mock = args.mock or args.model == "mock" device = "cpu" if mock else args.device