fix(scheduler): join batch worker threads in shutdown()

Previously shutdown() only joined the scheduler loop thread. Batch worker threads (which decrement _reserved_vram in their finally block) could still be running when shutdown returned, leaving stale VRAM accounting. Now snapshots active workers under lock and joins them all. Snapshot-then-join pattern avoids holding the lock across blocking join calls (which would deadlock since workers acquire the same lock on exit).
2026-04-01 11:21:30 -07:00 · 2026-04-01 11:21:30 -07:00 · aa51794f45
commit aa51794f45
parent 6b8e421eb2
1 changed files with 11 additions and 1 deletions
--- a/circuitforge_core/tasks/scheduler.py
+++ b/circuitforge_core/tasks/scheduler.py
@ -203,11 +203,21 @@ class TaskScheduler:
                self._wake.set()
    def shutdown(self, timeout: float = 5.0) -> None:
-        """Signal the scheduler to stop and wait for it to exit."""
+        """Signal the scheduler to stop and wait for it to exit.
        Joins both the scheduler loop thread and any active batch worker
        threads so callers can rely on clean state (e.g. _reserved_vram == 0)
        immediately after this returns.
        """
        self._stop.set()
        self._wake.set()
        if self._thread and self._thread.is_alive():
            self._thread.join(timeout=timeout)
        # Join active batch workers so _reserved_vram is settled on return
        with self._lock:
            workers = list(self._active.values())
        for worker in workers:
            worker.join(timeout=timeout)
    def _scheduler_loop(self) -> None:
        while not self._stop.is_set():