mirror of
https://github.com/Wan-Video/Wan2.1.git
synced 2025-11-04 14:16:57 +00:00
40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
import math
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
|
|
|
|
class CAResBlock(nn.Module):
|
|
def __init__(self, in_dim: int, out_dim: int, residual: bool = True):
|
|
super().__init__()
|
|
self.residual = residual
|
|
self.conv1 = nn.Conv2d(in_dim, out_dim, kernel_size=3, padding=1)
|
|
self.conv2 = nn.Conv2d(out_dim, out_dim, kernel_size=3, padding=1)
|
|
|
|
t = int((abs(math.log2(out_dim)) + 1) // 2)
|
|
k = t if t % 2 else t + 1
|
|
self.pool = nn.AdaptiveAvgPool2d(1)
|
|
self.conv = nn.Conv1d(1, 1, kernel_size=k, padding=(k - 1) // 2, bias=False)
|
|
|
|
if self.residual:
|
|
if in_dim == out_dim:
|
|
self.downsample = nn.Identity()
|
|
else:
|
|
self.downsample = nn.Conv2d(in_dim, out_dim, kernel_size=1)
|
|
|
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
r = x
|
|
x = self.conv1(F.relu(x))
|
|
x = self.conv2(F.relu(x))
|
|
|
|
b, c = x.shape[:2]
|
|
w = self.pool(x).view(b, 1, c)
|
|
w = self.conv(w).transpose(-1, -2).unsqueeze(-1).sigmoid() # B*C*1*1
|
|
|
|
if self.residual:
|
|
x = x * w + self.downsample(r)
|
|
else:
|
|
x = x * w
|
|
|
|
return x
|