From 753f8f5def1fa4fe2c152bb09c7df8251e0480eb Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sun, 15 Mar 2026 17:23:40 -0700 Subject: [PATCH] fix(avocet): use_reentrant=False for gradient checkpointing Reentrant gradient checkpointing (the default) conflicts with Accelerate's gradient accumulation context manager -- causes 'backward through graph a second time' on the first training step. use_reentrant=False uses the non-reentrant autograd hook path which is compatible with Accelerate >= 0.27. --- scripts/finetune_classifier.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/finetune_classifier.py b/scripts/finetune_classifier.py index c70929e..e936466 100644 --- a/scripts/finetune_classifier.py +++ b/scripts/finetune_classifier.py @@ -310,7 +310,12 @@ def run_finetune(model_key: str, epochs: int = 5, score_files: list[Path] | None label2id=label2id, ) if config["gradient_checkpointing"]: - model.gradient_checkpointing_enable() + # use_reentrant=False avoids "backward through graph a second time" errors + # when Accelerate's gradient accumulation context is layered on top. + # Reentrant checkpointing (the default) conflicts with Accelerate ≥ 0.27. + model.gradient_checkpointing_enable( + gradient_checkpointing_kwargs={"use_reentrant": False} + ) # --- TrainingArguments --- training_args = TrainingArguments(