diff --git a/wan/text2video.py b/wan/text2video.py index af699b7..3d3d404 100644 --- a/wan/text2video.py +++ b/wan/text2video.py @@ -470,14 +470,14 @@ class WanT2V: latent_noise_factor = t / 1000 for zz, zz_r, ll in zip(z, z_reactive, [latents]): pass - # zz[0:16, ref_images_count:overlapped_latents_size + ref_images_count] = zz_r[:, ref_images_count:] * (1.0 - overlap_noise_factor) + torch.randn_like(zz_r[:, ref_images_count:] ) * overlap_noise_factor - # ll[:, 0:overlapped_latents_size + ref_images_count] = zz_r * (1.0 - latent_noise_factor) + torch.randn_like(zz_r ) * latent_noise_factor + zz[0:16, ref_images_count:overlapped_latents_size + ref_images_count] = zz_r[:, ref_images_count:] * (1.0 - overlap_noise_factor) + torch.randn_like(zz_r[:, ref_images_count:] ) * overlap_noise_factor + ll[:, 0:overlapped_latents_size + ref_images_count] = zz_r * (1.0 - latent_noise_factor) + torch.randn_like(zz_r ) * latent_noise_factor if conditioning_latents_size > 0 and overlap_noise > 0: pass overlap_noise_factor = overlap_noise / 1000 - latents[:, conditioning_latents_size + ref_images_count:] = latents[:, conditioning_latents_size + ref_images_count:] * (1.0 - overlap_noise_factor) + torch.randn_like(latents[:, conditioning_latents_size + ref_images_count:]) * overlap_noise_factor - #timestep = [torch.tensor([t.item()] * (conditioning_latents_size + ref_images_count) + [t.item() - overlap_noise]*(len(timesteps) - conditioning_latents_size - ref_images_count))] + # latents[:, conditioning_latents_size + ref_images_count:] = latents[:, conditioning_latents_size + ref_images_count:] * (1.0 - overlap_noise_factor) + torch.randn_like(latents[:, conditioning_latents_size + ref_images_count:]) * overlap_noise_factor + # timestep = [torch.tensor([t.item()] * (conditioning_latents_size + ref_images_count) + [t.item() - overlap_noise]*(target_shape[1] - conditioning_latents_size - ref_images_count))] if target_camera != None: latent_model_input = torch.cat([latents, source_latents], dim=1) diff --git a/wgp.py b/wgp.py index b5ba519..5679a68 100644 --- a/wgp.py +++ b/wgp.py @@ -42,8 +42,8 @@ global_queue_ref = [] AUTOSAVE_FILENAME = "queue.zip" PROMPT_VARS_MAX = 10 -target_mmgp_version = "3.4.7" -WanGP_version = "5.4" +target_mmgp_version = "3.4.8" +WanGP_version = "5.41" prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None from importlib.metadata import version @@ -3263,11 +3263,13 @@ def generate_video( if exp > 0: from rife.inference import temporal_interpolation if sliding_window and window_no > 1: - sample = torch.cat([frames_already_processed[:, -2:-1], sample], dim=1) + sample = torch.cat([previous_before_last_frame, sample], dim=1) + previous_before_last_frame = sample[:, -2:-1].clone() sample = temporal_interpolation( os.path.join("ckpts", "flownet.pkl"), sample, exp, device=processing_device) sample = sample[:, 1:] else: sample = temporal_interpolation( os.path.join("ckpts", "flownet.pkl"), sample, exp, device=processing_device) + previous_before_last_frame = sample[:, -2:-1].clone() output_fps = output_fps * 2**exp @@ -4843,8 +4845,8 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non temporal_upsampling = gr.Dropdown( choices=[ ("Disabled", ""), - ("Rife x2 (32 frames/s)", "rife2"), - ("Rife x4 (64 frames/s)", "rife4"), + ("Rife x2 frames/s", "rife2"), + ("Rife x4 frames/s", "rife4"), ], value=ui_defaults.get("temporal_upsampling", ""), visible=True,