From 65819c1d08768d484ca20a020da8428be99e4396 Mon Sep 17 00:00:00 2001
From: Adrian Corduneanu <adrian.dc@gmail.com>
Date: Tue, 25 Feb 2025 23:31:46 -0800
Subject: [PATCH] Update text2video.py to reduce GPU memory by emptying cache

If offload_model is set, empty_cache() must be called after the model is moved to CPU to actually free the GPU. I verified on a RTX 4090 that without calling empty_cache the model remains in memory and the subsequent vae decoding never finishes.
---
 wan/text2video.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/wan/text2video.py b/wan/text2video.py
index 96cfa78..7e9d4e7 100644
--- a/wan/text2video.py
+++ b/wan/text2video.py
@@ -252,6 +252,7 @@ class WanT2V:
             x0 = latents
             if offload_model:
                 self.model.cpu()
+                torch.cuda.empty_cache()
             if self.rank == 0:
                 videos = self.vae.decode(x0)
 
@@ -260,6 +261,7 @@ class WanT2V:
         if offload_model:
             gc.collect()
             torch.cuda.synchronize()
+            torch.cuda.empty_cache()
         if dist.is_initialized():
             dist.barrier()