mirror of
				https://github.com/Wan-Video/Wan2.1.git
				synced 2025-11-04 06:15:17 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			85 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			85 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from abc import ABC, abstractmethod
 | 
						|
from typing import Tuple
 | 
						|
 | 
						|
import torch
 | 
						|
from diffusers.configuration_utils import ConfigMixin
 | 
						|
from einops import rearrange
 | 
						|
from torch import Tensor
 | 
						|
 | 
						|
 | 
						|
class Patchifier(ConfigMixin, ABC):
 | 
						|
    def __init__(self, patch_size: int):
 | 
						|
        super().__init__()
 | 
						|
        self._patch_size = (1, patch_size, patch_size)
 | 
						|
 | 
						|
    @abstractmethod
 | 
						|
    def patchify(self, latents: Tensor) -> Tuple[Tensor, Tensor]:
 | 
						|
        raise NotImplementedError("Patchify method not implemented")
 | 
						|
 | 
						|
    @abstractmethod
 | 
						|
    def unpatchify(
 | 
						|
        self,
 | 
						|
        latents: Tensor,
 | 
						|
        output_height: int,
 | 
						|
        output_width: int,
 | 
						|
        out_channels: int,
 | 
						|
    ) -> Tuple[Tensor, Tensor]:
 | 
						|
        pass
 | 
						|
 | 
						|
    @property
 | 
						|
    def patch_size(self):
 | 
						|
        return self._patch_size
 | 
						|
 | 
						|
    def get_latent_coords(
 | 
						|
        self, latent_num_frames, latent_height, latent_width, batch_size, device
 | 
						|
    ):
 | 
						|
        """
 | 
						|
        Return a tensor of shape [batch_size, 3, num_patches] containing the
 | 
						|
            top-left corner latent coordinates of each latent patch.
 | 
						|
        The tensor is repeated for each batch element.
 | 
						|
        """
 | 
						|
        latent_sample_coords = torch.meshgrid(
 | 
						|
            torch.arange(0, latent_num_frames, self._patch_size[0], device=device),
 | 
						|
            torch.arange(0, latent_height, self._patch_size[1], device=device),
 | 
						|
            torch.arange(0, latent_width, self._patch_size[2], device=device),
 | 
						|
        )
 | 
						|
        latent_sample_coords = torch.stack(latent_sample_coords, dim=0)
 | 
						|
        latent_coords = latent_sample_coords.unsqueeze(0).repeat(batch_size, 1, 1, 1, 1)
 | 
						|
        latent_coords = rearrange(
 | 
						|
            latent_coords, "b c f h w -> b c (f h w)", b=batch_size
 | 
						|
        )
 | 
						|
        return latent_coords
 | 
						|
 | 
						|
 | 
						|
class SymmetricPatchifier(Patchifier):
 | 
						|
    def patchify(self, latents: Tensor) -> Tuple[Tensor, Tensor]:
 | 
						|
        b, _, f, h, w = latents.shape
 | 
						|
        latent_coords = self.get_latent_coords(f, h, w, b, latents.device)
 | 
						|
        latents = rearrange(
 | 
						|
            latents,
 | 
						|
            "b c (f p1) (h p2) (w p3) -> b (f h w) (c p1 p2 p3)",
 | 
						|
            p1=self._patch_size[0],
 | 
						|
            p2=self._patch_size[1],
 | 
						|
            p3=self._patch_size[2],
 | 
						|
        )
 | 
						|
        return latents, latent_coords
 | 
						|
 | 
						|
    def unpatchify(
 | 
						|
        self,
 | 
						|
        latents: Tensor,
 | 
						|
        output_height: int,
 | 
						|
        output_width: int,
 | 
						|
        out_channels: int,
 | 
						|
    ) -> Tuple[Tensor, Tensor]:
 | 
						|
        output_height = output_height // self._patch_size[1]
 | 
						|
        output_width = output_width // self._patch_size[2]
 | 
						|
        latents = rearrange(
 | 
						|
            latents,
 | 
						|
            "b (f h w) (c p q) -> b c f (h p) (w q)",
 | 
						|
            h=output_height,
 | 
						|
            w=output_width,
 | 
						|
            p=self._patch_size[1],
 | 
						|
            q=self._patch_size[2],
 | 
						|
        )
 | 
						|
        return latents
 |