diff --git a/hyvideo/hunyuan.py b/hyvideo/hunyuan.py index e909e2f..19a147e 100644 --- a/hyvideo/hunyuan.py +++ b/hyvideo/hunyuan.py @@ -867,6 +867,9 @@ class HunyuanVideoSampler(Inference): if self.avatar: w, h = input_ref_images.size target_height, target_width = calculate_new_dimensions(target_height, target_width, h, w, fit_into_canvas) + if target_width != w or target_height != h: + input_ref_images = input_ref_images.resize((target_width,target_height), resample=Image.Resampling.LANCZOS) + concat_dict = {'mode': 'timecat', 'bias': -1} freqs_cos, freqs_sin = self.get_rotary_pos_embed_new(129, target_height, target_width, concat_dict) else: diff --git a/wgp.py b/wgp.py index 5679a68..427444e 100644 --- a/wgp.py +++ b/wgp.py @@ -3263,13 +3263,13 @@ def generate_video( if exp > 0: from rife.inference import temporal_interpolation if sliding_window and window_no > 1: - sample = torch.cat([previous_before_last_frame, sample], dim=1) - previous_before_last_frame = sample[:, -2:-1].clone() + sample = torch.cat([previous_last_frame, sample], dim=1) + previous_last_frame = sample[:, -1:].clone() sample = temporal_interpolation( os.path.join("ckpts", "flownet.pkl"), sample, exp, device=processing_device) sample = sample[:, 1:] else: sample = temporal_interpolation( os.path.join("ckpts", "flownet.pkl"), sample, exp, device=processing_device) - previous_before_last_frame = sample[:, -2:-1].clone() + previous_last_frame = sample[:, -1:].clone() output_fps = output_fps * 2**exp