fix(avocet): use_reentrant=False for gradient checkpointing
Reentrant gradient checkpointing (the default) conflicts with Accelerate's gradient accumulation context manager -- causes 'backward through graph a second time' on the first training step. use_reentrant=False uses the non-reentrant autograd hook path which is compatible with Accelerate >= 0.27.
This commit is contained in:
parent
5dee23f53c
commit
753f8f5def
1 changed files with 6 additions and 1 deletions
|
|
@ -310,7 +310,12 @@ def run_finetune(model_key: str, epochs: int = 5, score_files: list[Path] | None
|
||||||
label2id=label2id,
|
label2id=label2id,
|
||||||
)
|
)
|
||||||
if config["gradient_checkpointing"]:
|
if config["gradient_checkpointing"]:
|
||||||
model.gradient_checkpointing_enable()
|
# use_reentrant=False avoids "backward through graph a second time" errors
|
||||||
|
# when Accelerate's gradient accumulation context is layered on top.
|
||||||
|
# Reentrant checkpointing (the default) conflicts with Accelerate ≥ 0.27.
|
||||||
|
model.gradient_checkpointing_enable(
|
||||||
|
gradient_checkpointing_kwargs={"use_reentrant": False}
|
||||||
|
)
|
||||||
|
|
||||||
# --- TrainingArguments ---
|
# --- TrainingArguments ---
|
||||||
training_args = TrainingArguments(
|
training_args = TrainingArguments(
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue