From d600fb665161162794dc318eb4373a175108fe7e Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Thu, 2 Apr 2026 11:21:20 -0700
Subject: [PATCH] refactor(orch): hoist service_max_mb lookup; clarify
 warm-fallback comments

---
 .../resources/coordinator/node_selector.py    | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/circuitforge_core/resources/coordinator/node_selector.py b/circuitforge_core/resources/coordinator/node_selector.py
index 665cbb5..9cdb9f4 100644
--- a/circuitforge_core/resources/coordinator/node_selector.py
+++ b/circuitforge_core/resources/coordinator/node_selector.py
@@ -10,7 +10,7 @@ if TYPE_CHECKING:
 _WARM_BONUS_MB = 1000
 
 
-@dataclass
+@dataclass(frozen=True)
 class _Scored:
     node_id: str
     gpu_id: int
@@ -31,13 +31,14 @@ def select_node(
     Warm nodes (service already running) get priority, then sorted by free VRAM.
     Returns None if no suitable node exists.
     """
+    service_max_mb = _find_service_max_mb(service, profile_registry)
+    if service_max_mb is None:
+        return None  # service not in any profile
+
     candidates: list[_Scored] = []
     for node_id, record in agents.items():
         if not record.online:
             continue
-        service_max_mb = _find_service_max_mb(service, profile_registry)
-        if service_max_mb is None:
-            continue
         for gpu in record.gpus:
             warm = f"{node_id}:{service}" in resident_keys
             effective = gpu.vram_free_mb + (_WARM_BONUS_MB if warm else 0)
@@ -52,12 +53,15 @@ def select_node(
             ))
     if not candidates:
         return None
-    # Warm nodes are always eligible (they already have the service resident).
-    # Cold nodes must pass the can_fit threshold. If no node passes either
-    # criterion, fall back to the full candidate set.
+    # Prefer: (1) warm nodes (model already resident — no cold start)
+    #         (2) cold nodes that can fit the service (free >= half of max_mb)
+    # Fallback: best-effort node when nothing fits and nothing is warm
+    #   (coordinator will attempt to start the service anyway; it may evict or fail)
+    # Note: resident_keys are per-node, not per-GPU. On multi-GPU nodes, the warm
+    #   bonus applies to all GPUs on the node. This is a known coarseness —
+    #   per-GPU resident tracking requires a resident_key format change.
     preferred = [c for c in candidates if c.warm or c.can_fit]
     pool = preferred if preferred else candidates
-    # Warm nodes take priority; within the same warmth tier, prefer more free VRAM.
     best = max(pool, key=lambda c: (c.warm, c.effective_free_mb))
     return best.node_id, best.gpu_id