Make gradient checkpointing work properly

ExponentialML · web-flow · commit 7640ce30d02c · 2023-04-09T16:17:27.000-07:00
diff --git a/train.py b/train.py
@@ -722,7 +722,12 @@ def finetune_unet(batch, train_encoder=False):
             )
             cast_to_gpu_and_type([text_encoder], accelerator, torch.float32)
                 
-
+        # Fixes gradient checkpointing training.
+        # See: https://github.com/prigoyal/pytorch_memonger/blob/master/tutorial/Checkpointing_for_PyTorch_models.ipynb
+        if gradient_checkpointing or text_encoder_gradient_checkpointing:
+            unet.eval()
+            text_encoder.eval()
+            
         # Encode text embeddings
         token_ids = batch['prompt_ids']
         encoder_hidden_states = text_encoder(token_ids)[0]