From 6ca5893b1c6fd3ed864f9104bf9a2b0a87f725b3 Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Fri, 27 Feb 2026 00:17:00 -0800
Subject: [PATCH] feat: add DUAL_GPU_MODE default, VRAM warning, and download
 size report to preflight

- Add _mixed_mode_vram_warning() to flag low VRAM on GPU 1 in mixed mode
- Wire download size report block into main() before closing border line
- Wire mixed-mode VRAM warning into report if triggered
- Write DUAL_GPU_MODE=ollama default to .env for new 2-GPU setups (no override if already set)
- Promote import os to top-level (was local import inside get_cpu_cores)
---
 scripts/preflight.py | 44 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/scripts/preflight.py b/scripts/preflight.py
index 621b2c0..b840dda 100644
--- a/scripts/preflight.py
+++ b/scripts/preflight.py
@@ -23,6 +23,7 @@ Exit codes:
   1 — manual action required (unresolvable port conflict on external service)
 """
 import argparse
+import os
 import platform
 import socket
 import subprocess
@@ -112,7 +113,6 @@ def get_ram_gb() -> tuple[float, float]:
 
 
 def get_cpu_cores() -> int:
-    import os
     return os.cpu_count() or 1
 
 
@@ -454,6 +454,38 @@ def main() -> None:
                 info = ports[name]
                 print(f"║    {name} :{info['resolved']}  → app will use host.docker.internal:{info['resolved']}")
 
+        # ── Download size warning ──────────────────────────────────────────────
+        dual_gpu_mode = os.environ.get("DUAL_GPU_MODE", "ollama")
+        sizes = _download_size_mb(profile, dual_gpu_mode)
+        total_mb = sum(sizes.values())
+        print("║")
+        print("║  Download sizes (first-run estimates)")
+        print("║    Docker images")
+        print(f"║      app (Python build)   ~{sizes.get('app', 0):,} MB")
+        if "searxng" in sizes:
+            print(f"║      searxng/searxng       ~{sizes['searxng']:,} MB")
+        if "ollama" in sizes:
+            shared_note = "  (shared by ollama + ollama_research)" if profile == "dual-gpu" and dual_gpu_mode in ("ollama", "mixed") else ""
+            print(f"║      ollama/ollama         ~{sizes['ollama']:,} MB{shared_note}")
+        if "vision_image" in sizes:
+            print(f"║      vision service        ~{sizes['vision_image']:,} MB  (torch + moondream)")
+        if "vllm_image" in sizes:
+            print(f"║      vllm/vllm-openai      ~{sizes['vllm_image']:,} MB")
+        print("║    Model weights  (lazy-loaded on first use)")
+        if "llama3_2_3b" in sizes:
+            print(f"║      llama3.2:3b            ~{sizes['llama3_2_3b']:,} MB  → OLLAMA_MODELS_DIR")
+        if "moondream2" in sizes:
+            print(f"║      moondream2             ~{sizes['moondream2']:,} MB  → vision container cache")
+        if profile == "dual-gpu" and dual_gpu_mode in ("ollama", "mixed"):
+            print("║    Note: ollama + ollama_research share model dir — no double download")
+        print(f"║  ⚠  Total first-run: ~{total_mb / 1024:.1f} GB  (models persist between restarts)")
+
+        # ── Mixed-mode VRAM warning ────────────────────────────────────────────
+        vram_warn = _mixed_mode_vram_warning(gpus, dual_gpu_mode)
+        if vram_warn:
+            print("║")
+            print(f"║  {vram_warn}")
+
         print("╚════════════════════════════════════════════════════╝")
 
     if not args.check_only:
@@ -466,6 +498,16 @@ def main() -> None:
         # GPU info for the app container (which lacks nvidia-smi access)
         env_updates["PEREGRINE_GPU_COUNT"] = str(len(gpus))
         env_updates["PEREGRINE_GPU_NAMES"] = ",".join(g["name"] for g in gpus)
+        # Write DUAL_GPU_MODE default for new 2-GPU setups (don't override user's choice)
+        if len(gpus) >= 2:
+            existing_env: dict[str, str] = {}
+            if ENV_FILE.exists():
+                for line in ENV_FILE.read_text().splitlines():
+                    if "=" in line and not line.startswith("#"):
+                        k, _, v = line.partition("=")
+                        existing_env[k.strip()] = v.strip()
+            if "DUAL_GPU_MODE" not in existing_env:
+                env_updates["DUAL_GPU_MODE"] = "ollama"
         write_env(env_updates)
         update_llm_yaml(ports)
         write_compose_override(ports)