Update text2video.py to reduce GPU memory by emptying cache

If offload_model is set, empty_cache() must be called after the model is moved to CPU to actually free the GPU. I verified on a RTX 4090 that without calling empty_cache the model remains in memory and the subsequent vae decoding never finishes.
This commit is contained in:
Adrian Corduneanu 2025-02-25 23:31:46 -08:00 committed by GitHub
parent 73648654c5
commit 65819c1d08
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -252,6 +252,7 @@ class WanT2V:
x0 = latents x0 = latents
if offload_model: if offload_model:
self.model.cpu() self.model.cpu()
torch.cuda.empty_cache()
if self.rank == 0: if self.rank == 0:
videos = self.vae.decode(x0) videos = self.vae.decode(x0)
@ -260,6 +261,7 @@ class WanT2V:
if offload_model: if offload_model:
gc.collect() gc.collect()
torch.cuda.synchronize() torch.cuda.synchronize()
torch.cuda.empty_cache()
if dist.is_initialized(): if dist.is_initialized():
dist.barrier() dist.barrier()