diff --git a/hyvideo/hunyuan.py b/hyvideo/hunyuan.py
index e909e2f..19a147e 100644
--- a/hyvideo/hunyuan.py
+++ b/hyvideo/hunyuan.py
@@ -867,6 +867,9 @@ class HunyuanVideoSampler(Inference):
             if self.avatar:
                 w, h = input_ref_images.size
                 target_height, target_width = calculate_new_dimensions(target_height, target_width, h, w, fit_into_canvas)
+                if target_width != w or target_height != h:
+                    input_ref_images = input_ref_images.resize((target_width,target_height), resample=Image.Resampling.LANCZOS) 
+
                 concat_dict = {'mode': 'timecat', 'bias': -1} 
                 freqs_cos, freqs_sin = self.get_rotary_pos_embed_new(129, target_height, target_width, concat_dict)
             else:
diff --git a/wgp.py b/wgp.py
index 5679a68..427444e 100644
--- a/wgp.py
+++ b/wgp.py
@@ -3263,13 +3263,13 @@ def generate_video(
                 if exp > 0: 
                     from rife.inference import temporal_interpolation
                     if sliding_window and window_no > 1:
-                        sample = torch.cat([previous_before_last_frame, sample], dim=1)
-                        previous_before_last_frame = sample[:, -2:-1].clone()
+                        sample = torch.cat([previous_last_frame, sample], dim=1)
+                        previous_last_frame = sample[:, -1:].clone()
                         sample = temporal_interpolation( os.path.join("ckpts", "flownet.pkl"), sample, exp, device=processing_device)
                         sample = sample[:, 1:]
                     else:
                         sample = temporal_interpolation( os.path.join("ckpts", "flownet.pkl"), sample, exp, device=processing_device)
-                        previous_before_last_frame = sample[:, -2:-1].clone()
+                        previous_last_frame = sample[:, -1:].clone()
 
                     output_fps = output_fps * 2**exp