mirror of
https://github.com/Wan-Video/Wan2.1.git
synced 2025-11-05 06:29:14 +00:00
46 lines
1.4 KiB
Python
46 lines
1.4 KiB
Python
import bisect
|
|
|
|
import torch
|
|
from torch.utils.data.dataset import Dataset
|
|
|
|
|
|
# modified from https://pytorch.org/docs/stable/_modules/torch/utils/data/dataset.html#ConcatDataset
|
|
class MultiModalDataset(Dataset):
|
|
datasets: list[Dataset]
|
|
cumulative_sizes: list[int]
|
|
|
|
@staticmethod
|
|
def cumsum(sequence):
|
|
r, s = [], 0
|
|
for e in sequence:
|
|
l = len(e)
|
|
r.append(l + s)
|
|
s += l
|
|
return r
|
|
|
|
def __init__(self, video_datasets: list[Dataset], audio_datasets: list[Dataset]):
|
|
super().__init__()
|
|
self.video_datasets = list(video_datasets)
|
|
self.audio_datasets = list(audio_datasets)
|
|
self.datasets = self.video_datasets + self.audio_datasets
|
|
|
|
self.cumulative_sizes = self.cumsum(self.datasets)
|
|
|
|
def __len__(self):
|
|
return self.cumulative_sizes[-1]
|
|
|
|
def __getitem__(self, idx):
|
|
if idx < 0:
|
|
if -idx > len(self):
|
|
raise ValueError("absolute value of index should not exceed dataset length")
|
|
idx = len(self) + idx
|
|
dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
|
|
if dataset_idx == 0:
|
|
sample_idx = idx
|
|
else:
|
|
sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
|
|
return self.datasets[dataset_idx][sample_idx]
|
|
|
|
def compute_latent_stats(self) -> tuple[torch.Tensor, torch.Tensor]:
|
|
return self.video_datasets[0].compute_latent_stats()
|