mirror of
https://github.com/Wan-Video/Wan2.1.git
synced 2025-11-04 14:16:57 +00:00
50 lines
1.7 KiB
Python
50 lines
1.7 KiB
Python
import torch
|
|
import torch.nn as nn
|
|
|
|
# https://github.com/facebookresearch/DiT
|
|
|
|
|
|
class TimestepEmbedder(nn.Module):
|
|
"""
|
|
Embeds scalar timesteps into vector representations.
|
|
"""
|
|
|
|
def __init__(self, dim, frequency_embedding_size, max_period):
|
|
super().__init__()
|
|
self.mlp = nn.Sequential(
|
|
nn.Linear(frequency_embedding_size, dim),
|
|
nn.SiLU(),
|
|
nn.Linear(dim, dim),
|
|
)
|
|
self.dim = dim
|
|
self.max_period = max_period
|
|
assert dim % 2 == 0, 'dim must be even.'
|
|
|
|
with torch.autocast('cuda', enabled=False):
|
|
self.freqs = nn.Buffer(
|
|
1.0 / (10000**(torch.arange(0, frequency_embedding_size, 2, dtype=torch.float32) /
|
|
frequency_embedding_size)),
|
|
persistent=False)
|
|
freq_scale = 10000 / max_period
|
|
self.freqs = freq_scale * self.freqs
|
|
|
|
def timestep_embedding(self, t):
|
|
"""
|
|
Create sinusoidal timestep embeddings.
|
|
:param t: a 1-D Tensor of N indices, one per batch element.
|
|
These may be fractional.
|
|
:param dim: the dimension of the output.
|
|
:param max_period: controls the minimum frequency of the embeddings.
|
|
:return: an (N, D) Tensor of positional embeddings.
|
|
"""
|
|
# https://github.com/openai/glide-text2im/blob/main/glide_text2im/nn.py
|
|
|
|
args = t[:, None].float() * self.freqs[None]
|
|
embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
|
|
return embedding
|
|
|
|
def forward(self, t):
|
|
t_freq = self.timestep_embedding(t).to(t.dtype)
|
|
t_emb = self.mlp(t_freq)
|
|
return t_emb
|