From 8859e816d0a9969b4a1207f3eaf7333597cf1b26 Mon Sep 17 00:00:00 2001 From: DeepBeepMeep Date: Fri, 5 Sep 2025 14:17:21 +0200 Subject: [PATCH] Take me to outer space --- README.md | 17 ++ models/hyvideo/hunyuan_handler.py | 4 + models/ltx_video/ltxv_handler.py | 1 + models/wan/any2video.py | 113 ++++++------- models/wan/df_handler.py | 12 ++ models/wan/wan_handler.py | 54 +++++++ wgp.py | 256 ++++++++++++++---------------- 7 files changed, 254 insertions(+), 203 deletions(-) diff --git a/README.md b/README.md index 77f7fa8..eb497f1 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,23 @@ WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models **Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep ## 🔥 Latest Updates : +### September 5 2025: WanGP v8.4 - Take me to Outer Space +You have probably seen these short AI generated movies created using *Nano Banana* and the *First Frame - Last Frame* feature of *Kling 2.0*. The idea is to generate an image, modify a part of it with Nano Banana and give the these two images to Kling that will generate the Video between these two images, use now the previous Last Frame as the new First Frame, rinse and repeat and you get a full movie. + +I have made it easier to do just that with *Qwen Edit* and *Wan*: +- **End Frames can now be combined with Continue a Video** (and not just a Start Frame) +- **Multiple End Frames can be inputed**, each End Frame will be used for a different Sliding Window + +You can plan in advance all your shots (one shot = one Sliding Window) : I recommend using Wan 2.2 Image to Image with multiple End Frames (one for each shot / Sliding Window), and a different Text Prompt for each shot / Sliding Winow (remember to enable *Sliding Windows/Text Prompts Will be used for a new Sliding Window of the same Video Generation*) + +The results can quite be impressive. However, Wan 2.1 & 2.2 Image 2 Image are restricted to a single overlap frame when using Slide Windows, which means only one frame is reeused for the motion. This may be unsufficient if you are trying to connect two shots with fast movement. + +This is where *InfinitTalk* comes into play. Beside being one best models to generate animated audio driven avatars, InfiniteTalk uses internally more one than motion frames. It is quite good to maintain the motions between two shots. I have tweaked InfinitTalk so that **its motion engine can be used even if no audio is provided**. +So here is how to use InfiniteTalk: enable *Sliding Windows/Text Prompts Will be used for a new Sliding Window of the same Video Generation*), and if you continue an existing Video *Misc/Override Frames per Second" should be set to "Source Video*. Each Reference Frame inputed will play the same role as the End Frame except it wont be exactly an End Frame (it will correspond more to a middle frame, the actual End Frame will differ but will be close) + + +You will find below a 33s movie I have created using these two methods. Quality could be much better as I havent tuned at all the settings (I couldn't bother, I used 10 steps generation without Loras Accelerators for most of the gens). + ### September 2 2025: WanGP v8.31 - At last the pain stops - This single new feature should give you the strength to face all the potential bugs of this new release: diff --git a/models/hyvideo/hunyuan_handler.py b/models/hyvideo/hunyuan_handler.py index da60f72..9cbaea7 100644 --- a/models/hyvideo/hunyuan_handler.py +++ b/models/hyvideo/hunyuan_handler.py @@ -56,6 +56,10 @@ class family_handler(): if base_model_type in ["hunyuan_custom", "hunyuan_custom_edit", "hunyuan_custom_audio", "hunyuan_avatar"]: extra_model_def["one_image_ref_needed"] = True + + if base_model_type in ["hunyuan_i2v"]: + extra_model_def["image_prompt_types_allowed"] = "S" + return extra_model_def @staticmethod diff --git a/models/ltx_video/ltxv_handler.py b/models/ltx_video/ltxv_handler.py index d35bcd4..c89b69a 100644 --- a/models/ltx_video/ltxv_handler.py +++ b/models/ltx_video/ltxv_handler.py @@ -24,6 +24,7 @@ class family_handler(): extra_model_def["frames_minimum"] = 17 extra_model_def["frames_steps"] = 8 extra_model_def["sliding_window"] = True + extra_model_def["image_prompt_types_allowed"] = "TSEV" return extra_model_def diff --git a/models/wan/any2video.py b/models/wan/any2video.py index e7d54ef..bb91dc6 100644 --- a/models/wan/any2video.py +++ b/models/wan/any2video.py @@ -537,7 +537,6 @@ class WanAny2V: image_ref = input_frames[:, 0] if input_video is None: input_video = input_frames[:, 0:1] new_shot = "Q" in video_prompt_type - denoising_strength = 0.5 else: if pre_video_frame is None: new_shot = True @@ -556,74 +555,59 @@ class WanAny2V: _ , preframes_count, height, width = input_video.shape input_video = input_video.to(device=self.device).to(dtype= self.VAE_dtype) if infinitetalk: - image_for_clip = image_ref.to(input_video) + image_start = image_ref.to(input_video) control_pre_frames_count = 1 - control_video = image_for_clip.unsqueeze(1) + control_video = image_start.unsqueeze(1) else: - image_for_clip = input_video[:, -1] + image_start = input_video[:, -1] control_pre_frames_count = preframes_count control_video = input_video - lat_h, lat_w = height // self.vae_stride[1], width // self.vae_stride[2] - if hasattr(self, "clip"): - clip_image_size = self.clip.model.image_size - clip_image = resize_lanczos(image_for_clip, clip_image_size, clip_image_size)[:, None, :, :] - clip_context = self.clip.visual([clip_image]) if model_type != "flf2v_720p" else self.clip.visual([clip_image , clip_image ]) - clip_image = None - else: - clip_context = None - enc = torch.concat( [control_video, torch.zeros( (3, frame_num-control_pre_frames_count, height, width), - device=self.device, dtype= self.VAE_dtype)], - dim = 1).to(self.device) - color_reference_frame = image_for_clip.unsqueeze(1).clone() + + color_reference_frame = image_start.unsqueeze(1).clone() else: - preframes_count = control_pre_frames_count = 1 - any_end_frame = image_end is not None - add_frames_for_end_image = any_end_frame and model_type == "i2v" - if any_end_frame: - if add_frames_for_end_image: - frame_num +=1 - lat_frames = int((frame_num - 2) // self.vae_stride[0] + 2) - trim_frames = 1 - + preframes_count = control_pre_frames_count = 1 height, width = image_start.shape[1:] + control_video = image_start.unsqueeze(1).to(self.device) + color_reference_frame = control_video.clone() - lat_h = round( - height // self.vae_stride[1] // - self.patch_size[1] * self.patch_size[1]) - lat_w = round( - width // self.vae_stride[2] // - self.patch_size[2] * self.patch_size[2]) - height = lat_h * self.vae_stride[1] - width = lat_w * self.vae_stride[2] - image_start_frame = image_start.unsqueeze(1).to(self.device) - color_reference_frame = image_start_frame.clone() - if image_end is not None: - img_end_frame = image_end.unsqueeze(1).to(self.device) + any_end_frame = image_end is not None + add_frames_for_end_image = any_end_frame and model_type == "i2v" + if any_end_frame: + color_correction_strength = 0 #disable color correction as transition frames between shots may have a complete different color level than the colors of the new shot + if add_frames_for_end_image: + frame_num +=1 + lat_frames = int((frame_num - 2) // self.vae_stride[0] + 2) + trim_frames = 1 - if hasattr(self, "clip"): - clip_image_size = self.clip.model.image_size - image_start = resize_lanczos(image_start, clip_image_size, clip_image_size) - if image_end is not None: image_end = resize_lanczos(image_end, clip_image_size, clip_image_size) - if model_type == "flf2v_720p": - clip_context = self.clip.visual([image_start[:, None, :, :], image_end[:, None, :, :] if image_end is not None else image_start[:, None, :, :]]) - else: - clip_context = self.clip.visual([image_start[:, None, :, :]]) + lat_h, lat_w = height // self.vae_stride[1], width // self.vae_stride[2] + + if image_end is not None: + img_end_frame = image_end.unsqueeze(1).to(self.device) + + if hasattr(self, "clip"): + clip_image_size = self.clip.model.image_size + image_start = resize_lanczos(image_start, clip_image_size, clip_image_size) + image_end = resize_lanczos(image_end, clip_image_size, clip_image_size) if image_end is not None else image_start + if model_type == "flf2v_720p": + clip_context = self.clip.visual([image_start[:, None, :, :], image_end[:, None, :, :] if image_end is not None else image_start[:, None, :, :]]) else: - clip_context = None + clip_context = self.clip.visual([image_start[:, None, :, :]]) + else: + clip_context = None - if any_end_frame: - enc= torch.concat([ - image_start_frame, - torch.zeros( (3, frame_num-2, height, width), device=self.device, dtype= self.VAE_dtype), - img_end_frame, - ], dim=1).to(self.device) - else: - enc= torch.concat([ - image_start_frame, - torch.zeros( (3, frame_num-1, height, width), device=self.device, dtype= self.VAE_dtype) - ], dim=1).to(self.device) + if any_end_frame: + enc= torch.concat([ + control_video, + torch.zeros( (3, frame_num-control_pre_frames_count-1, height, width), device=self.device, dtype= self.VAE_dtype), + img_end_frame, + ], dim=1).to(self.device) + else: + enc= torch.concat([ + control_video, + torch.zeros( (3, frame_num-control_pre_frames_count, height, width), device=self.device, dtype= self.VAE_dtype) + ], dim=1).to(self.device) - image_start = image_end = image_start_frame = img_end_frame = image_for_clip = image_ref = None + image_start = image_end = img_end_frame = image_ref = control_video = None msk = torch.ones(1, frame_num, lat_h, lat_w, device=self.device) if any_end_frame: @@ -657,12 +641,11 @@ class WanAny2V: # Recam Master if recam: - # should be be in fact in input_frames since it is control video not a video to be extended target_camera = model_mode - height,width = input_video.shape[-2:] - input_video = input_video.to(dtype=self.dtype , device=self.device) - source_latents = self.vae.encode([input_video])[0].unsqueeze(0) #.to(dtype=self.dtype, device=self.device) - del input_video + height,width = input_frames.shape[-2:] + input_frames = input_frames.to(dtype=self.dtype , device=self.device) + source_latents = self.vae.encode([input_frames])[0].unsqueeze(0) #.to(dtype=self.dtype, device=self.device) + del input_frames # Process target camera (recammaster) from shared.utils.cammmaster_tools import get_camera_embedding cam_emb = get_camera_embedding(target_camera) @@ -754,7 +737,9 @@ class WanAny2V: else: target_shape = (self.vae.model.z_dim, lat_frames + ref_images_count, height // self.vae_stride[1], width // self.vae_stride[2]) - if multitalk and audio_proj != None: + if multitalk: + if audio_proj is None: + audio_proj = [ torch.zeros( (1, 1, 5, 12, 768 ), dtype=self.dtype, device=self.device), torch.zeros( (1, (frame_num - 1) // 4, 8, 12, 768 ), dtype=self.dtype, device=self.device) ] from .multitalk.multitalk import get_target_masks audio_proj = [audio.to(self.dtype) for audio in audio_proj] human_no = len(audio_proj[0]) diff --git a/models/wan/df_handler.py b/models/wan/df_handler.py index bc79e2e..82e704a 100644 --- a/models/wan/df_handler.py +++ b/models/wan/df_handler.py @@ -26,6 +26,18 @@ class family_handler(): extra_model_def["tea_cache"] = True extra_model_def["guidance_max_phases"] = 1 + extra_model_def["model_modes"] = { + "choices": [ + ("Synchronous", 0), + ("Asynchronous (better quality but around 50% extra steps added)", 5), + ], + "default": 0, + "label" : "Generation Type" + } + + extra_model_def["image_prompt_types_allowed"] = "TSEV" + + return extra_model_def @staticmethod diff --git a/models/wan/wan_handler.py b/models/wan/wan_handler.py index 9adc3a8..3a5dd64 100644 --- a/models/wan/wan_handler.py +++ b/models/wan/wan_handler.py @@ -5,6 +5,9 @@ import gradio as gr def test_class_i2v(base_model_type): return base_model_type in ["i2v", "i2v_2_2", "fun_inp_1.3B", "fun_inp", "flf2v_720p", "fantasy", "multitalk", "infinitetalk", "i2v_2_2_multitalk" ] +def text_oneframe_overlap(base_model_type): + return test_class_i2v(base_model_type) and not test_multitalk(base_model_type) + def test_class_1_3B(base_model_type): return base_model_type in [ "vace_1.3B", "t2v_1.3B", "recam_1.3B","phantom_1.3B","fun_inp_1.3B"] @@ -120,6 +123,37 @@ class family_handler(): if base_model_type in ["standin"] or vace_class: extra_model_def["lock_image_refs_ratios"] = True + if base_model_type in ["recam_1.3B"]: + extra_model_def["keep_frames_video_guide_not_supported"] = True + extra_model_def["model_modes"] = { + "choices": [ + ("Pan Right", 1), + ("Pan Left", 2), + ("Tilt Up", 3), + ("Tilt Down", 4), + ("Zoom In", 5), + ("Zoom Out", 6), + ("Translate Up (with rotation)", 7), + ("Translate Down (with rotation)", 8), + ("Arc Left (with rotation)", 9), + ("Arc Right (with rotation)", 10), + ], + "default": 1, + "label" : "Camera Movement Type" + } + if vace_class or base_model_type in ["infinitetalk"]: + image_prompt_types_allowed = "TVL" + elif base_model_type in ["ti2v_2_2"]: + image_prompt_types_allowed = "TSEVL" + elif i2v: + image_prompt_types_allowed = "SEVL" + else: + image_prompt_types_allowed = "" + extra_model_def["image_prompt_types_allowed"] = image_prompt_types_allowed + + if text_oneframe_overlap(base_model_type): + extra_model_def["sliding_window_defaults"] = { "overlap_min" : 1, "overlap_max" : 1, "overlap_step": 0, "overlap_default": 1} + # if base_model_type in ["phantom_1.3B", "phantom_14B"]: # extra_model_def["one_image_ref_needed"] = True @@ -251,6 +285,17 @@ class family_handler(): video_prompt_type = video_prompt_type.replace("U", "RU") ui_defaults["video_prompt_type"] = video_prompt_type + if settings_version < 2.31: + if base_model_type in "recam_1.3B": + video_prompt_type = ui_defaults.get("video_prompt_type", "") + if not "V" in video_prompt_type: + video_prompt_type += "UV" + ui_defaults["video_prompt_type"] = video_prompt_type + ui_defaults["image_prompt_type"] = "" + + if text_oneframe_overlap(base_model_type): + ui_defaults["sliding_window_overlap"] = 1 + @staticmethod def update_default_settings(base_model_type, model_def, ui_defaults): ui_defaults.update({ @@ -309,6 +354,15 @@ class family_handler(): "image_prompt_type": "T", }) + if base_model_type in ["recam_1.3B"]: + ui_defaults.update({ + "video_prompt_type": "UV", + }) + + if text_oneframe_overlap(base_model_type): + ui_defaults.update["sliding_window_overlap"] = 1 + ui_defaults.update["color_correction_strength"]= 0 + if test_multitalk(base_model_type): ui_defaults["audio_guidance_scale"] = 4 diff --git a/wgp.py b/wgp.py index 653683c..396e273 100644 --- a/wgp.py +++ b/wgp.py @@ -60,8 +60,8 @@ AUTOSAVE_FILENAME = "queue.zip" PROMPT_VARS_MAX = 10 target_mmgp_version = "3.6.0" -WanGP_version = "8.34" -settings_version = 2.29 +WanGP_version = "8.4" +settings_version = 2.31 max_source_video_frames = 3000 prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None @@ -347,7 +347,7 @@ def process_prompt_and_add_tasks(state, model_choice): model_switch_phase = inputs["model_switch_phase"] switch_threshold = inputs["switch_threshold"] switch_threshold2 = inputs["switch_threshold2"] - + multi_prompts_gen_type = inputs["multi_prompts_gen_type"] if len(loras_multipliers) > 0: _, _, errors = parse_loras_multipliers(loras_multipliers, len(activated_loras), num_inference_steps, nb_phases= guidance_phases) @@ -445,7 +445,7 @@ def process_prompt_and_add_tasks(state, model_choice): if "I" in video_prompt_type: if image_refs == None or len(image_refs) == 0: - gr.Info("You must provide at least one Refererence Image") + gr.Info("You must provide at least one Reference Image") return image_refs = clean_image_list(image_refs) if image_refs == None : @@ -511,9 +511,14 @@ def process_prompt_and_add_tasks(state, model_choice): if image_start == None : gr.Info("Start Image should be an Image") return + if multi_prompts_gen_type == 1 and len(image_start) > 1: + gr.Info("Only one Start Image is supported") + return else: image_start = None + if not any_letters(image_prompt_type, "SVL"): + image_prompt_type = image_prompt_type.replace("E", "") if "E" in image_prompt_type: if image_end == None or isinstance(image_end, list) and len(image_end) == 0: gr.Info("You must provide an End Image") @@ -522,32 +527,35 @@ def process_prompt_and_add_tasks(state, model_choice): if image_end == None : gr.Info("End Image should be an Image") return - if len(image_start) != len(image_end): - gr.Info("The number of Start and End Images should be the same ") - return + if multi_prompts_gen_type == 0: + if video_source is not None: + if len(image_end)> 1: + gr.Info("If a Video is to be continued and the option 'Each Text Prompt Will create a new generated Video' is set, there can be only one End Image") + return + elif len(image_start or []) != len(image_end or []): + gr.Info("The number of Start and End Images should be the same when the option 'Each Text Prompt Will create a new generated Video'") + return else: image_end = None if test_any_sliding_window(model_type) and image_mode == 0: if video_length > sliding_window_size: - full_video_length = video_length if video_source is None else video_length + sliding_window_overlap + full_video_length = video_length if video_source is None else video_length + sliding_window_overlap -1 extra = "" if full_video_length == video_length else f" including {sliding_window_overlap} added for Video Continuation" no_windows = compute_sliding_window_no(full_video_length, sliding_window_size, sliding_window_discard_last_frames, sliding_window_overlap) gr.Info(f"The Number of Frames to generate ({video_length}{extra}) is greater than the Sliding Window Size ({sliding_window_size}), {no_windows} Windows will be generated") if "recam" in model_filename: - if video_source == None: - gr.Info("You must provide a Source Video") + if video_guide == None: + gr.Info("You must provide a Control Video") return - - frames = get_resampled_video(video_source, 0, 81, get_computed_fps(force_fps, model_type , video_guide, video_source )) + computed_fps = get_computed_fps(force_fps, model_type , video_guide, video_source ) + frames = get_resampled_video(video_guide, 0, 81, computed_fps) if len(frames)<81: - gr.Info("Recammaster source video should be at least 81 frames once the resampling at 16 fps has been done") + gr.Info(f"Recammaster Control video should be at least 81 frames once the resampling at {computed_fps} fps has been done") return - - if "hunyuan_custom_custom_edit" in model_filename: if len(keep_frames_video_guide) > 0: gr.Info("Filtering Frames with this model is not supported") @@ -558,13 +566,13 @@ def process_prompt_and_add_tasks(state, model_choice): gr.Info("Only one Start Image must be provided if multiple prompts are used for different windows") return - if image_end != None and len(image_end) > 1: - gr.Info("Only one End Image must be provided if multiple prompts are used for different windows") - return + # if image_end != None and len(image_end) > 1: + # gr.Info("Only one End Image must be provided if multiple prompts are used for different windows") + # return override_inputs = { "image_start": image_start[0] if image_start !=None and len(image_start) > 0 else None, - "image_end": image_end[0] if image_end !=None and len(image_end) > 0 else None, + "image_end": image_end, #[0] if image_end !=None and len(image_end) > 0 else None, "image_refs": image_refs, "audio_guide": audio_guide, "audio_guide2": audio_guide2, @@ -640,19 +648,21 @@ def process_prompt_and_add_tasks(state, model_choice): override_inputs["prompt"] = single_prompt inputs.update(override_inputs) add_video_task(**inputs) + new_prompts_count = len(prompts) else: + new_prompts_count = 1 override_inputs["prompt"] = "\n".join(prompts) inputs.update(override_inputs) add_video_task(**inputs) - gen["prompts_max"] = len(prompts) + gen.get("prompts_max",0) + gen["prompts_max"] = new_prompts_count + gen.get("prompts_max",0) state["validate_success"] = 1 queue= gen.get("queue", []) return update_queue_data(queue) def get_preview_images(inputs): - inputs_to_query = ["image_start", "image_end", "video_source", "video_guide", "image_guide", "video_mask", "image_mask", "image_refs" ] - labels = ["Start Image", "End Image", "Video Source", "Video Guide", "Image Guide", "Video Mask", "Image Mask", "Image Reference"] + inputs_to_query = ["image_start", "video_source", "image_end", "video_guide", "image_guide", "video_mask", "image_mask", "image_refs" ] + labels = ["Start Image", "Video Source", "End Image", "Video Guide", "Image Guide", "Video Mask", "Image Mask", "Image Reference"] start_image_data = None start_image_labels = [] end_image_data = None @@ -3454,6 +3464,8 @@ def convert_image(image): from PIL import ImageOps from typing import cast + if isinstance(image, str): + image = Image.open(image) image = image.convert('RGB') return cast(Image, ImageOps.exif_transpose(image)) @@ -4506,7 +4518,7 @@ def generate_video( if test_any_sliding_window(model_type) : if video_source is not None: - current_video_length += sliding_window_overlap + current_video_length += sliding_window_overlap - 1 sliding_window = current_video_length > sliding_window_size reuse_frames = min(sliding_window_size - 4, sliding_window_overlap) else: @@ -4690,8 +4702,7 @@ def generate_video( while not abort: enable_RIFLEx = RIFLEx_setting == 0 and current_video_length > (6* get_model_fps(base_model_type)+1) or RIFLEx_setting == 1 - if sliding_window: - prompt = prompts[window_no] if window_no < len(prompts) else prompts[-1] + prompt = prompts[window_no] if window_no < len(prompts) else prompts[-1] new_extra_windows = gen.get("extra_windows",0) gen["extra_windows"] = 0 extra_windows += new_extra_windows @@ -4722,15 +4733,13 @@ def generate_video( image_start_tensor = image_start.resize((new_width, new_height), resample=Image.Resampling.LANCZOS) image_start_tensor = convert_image_to_tensor(image_start_tensor) pre_video_guide = prefix_video = image_start_tensor.unsqueeze(1) - if image_end is not None: - image_end_tensor = image_end.resize((new_width, new_height), resample=Image.Resampling.LANCZOS) - image_end_tensor = convert_image_to_tensor(image_end_tensor) else: if "L" in image_prompt_type: refresh_preview["video_source"] = get_video_frame(video_source, 0) prefix_video = preprocess_video(width=width, height=height,video_in=video_source, max_frames= parsed_keep_frames_video_source , start_frame = 0, fit_canvas= sample_fit_canvas, target_fps = fps, block_size = block_size ) prefix_video = prefix_video.permute(3, 0, 1, 2) prefix_video = prefix_video.float().div_(127.5).sub_(1.) # c, f, h, w + new_height, new_width = prefix_video.shape[-2:] pre_video_guide = prefix_video[:, -reuse_frames:] pre_video_frame = convert_tensor_to_image(prefix_video[:, -1]) source_video_overlap_frames_count = pre_video_guide.shape[1] @@ -4739,7 +4748,14 @@ def generate_video( image_size = pre_video_guide.shape[-2:] sample_fit_canvas = None guide_start_frame = prefix_video.shape[1] - + if image_end is not None: + image_end_list= image_end if isinstance(image_end, list) else [image_end] + if len(image_end_list) >= window_no: + new_height, new_width = image_size + image_end_tensor =image_end_list[window_no-1].resize((new_width, new_height), resample=Image.Resampling.LANCZOS) + image_end_tensor = convert_image_to_tensor(image_end_tensor) + image_end_list= None + window_start_frame = guide_start_frame - (reuse_frames if window_no > 1 else source_video_overlap_frames_count) guide_end_frame = guide_start_frame + current_video_length - (source_video_overlap_frames_count if window_no == 1 else reuse_frames) alignment_shift = source_video_frames_count if reset_control_aligment else 0 @@ -4797,7 +4813,7 @@ def generate_video( image_size = src_video.shape[-2:] sample_fit_canvas = None - elif "G" in video_prompt_type: # video to video + else: # video to video video_guide_processed = preprocess_video(width = image_size[1], height=image_size[0], video_in=video_guide, max_frames= len(keep_frames_parsed), start_frame = aligned_guide_start_frame, fit_canvas= sample_fit_canvas, target_fps = fps) if video_guide_processed is None: src_video = pre_video_guide @@ -5298,7 +5314,7 @@ def process_tasks(state): while True: with gen_lock: process_status = gen.get("process_status", None) - if process_status is None: + if process_status is None or process_status == "process:main": gen["process_status"] = "process:main" break time.sleep(1) @@ -6570,11 +6586,13 @@ def any_letters(source_str, letters): return True return False -def filter_letters(source_str, letters): +def filter_letters(source_str, letters, default= ""): ret = "" for letter in letters: if letter in source_str: ret += letter + if len(ret) == 0: + return default return ret def add_to_sequence(source_str, letters): @@ -6601,9 +6619,18 @@ def refresh_audio_prompt_type_sources(state, audio_prompt_type, audio_prompt_typ audio_prompt_type = add_to_sequence(audio_prompt_type, audio_prompt_type_sources) return audio_prompt_type, gr.update(visible = "A" in audio_prompt_type), gr.update(visible = "B" in audio_prompt_type), gr.update(visible = ("B" in audio_prompt_type or "X" in audio_prompt_type)) -def refresh_image_prompt_type(state, image_prompt_type): - any_video_source = len(filter_letters(image_prompt_type, "VLG"))>0 - return gr.update(visible = "S" in image_prompt_type ), gr.update(visible = "E" in image_prompt_type ), gr.update(visible = "V" in image_prompt_type) , gr.update(visible = any_video_source) +def refresh_image_prompt_type_radio(state, image_prompt_type, image_prompt_type_radio): + image_prompt_type = del_in_sequence(image_prompt_type, "VLTS") + image_prompt_type = add_to_sequence(image_prompt_type, image_prompt_type_radio) + any_video_source = len(filter_letters(image_prompt_type, "VL"))>0 + end_visible = any_letters(image_prompt_type, "SVL") + return image_prompt_type, gr.update(visible = "S" in image_prompt_type ), gr.update(visible = end_visible and ("E" in image_prompt_type) ), gr.update(visible = "V" in image_prompt_type) , gr.update(visible = any_video_source), gr.update(visible = end_visible) + +def refresh_image_prompt_type_endcheckbox(state, image_prompt_type, image_prompt_type_radio, end_checkbox): + image_prompt_type = del_in_sequence(image_prompt_type, "E") + if end_checkbox: image_prompt_type += "E" + image_prompt_type = add_to_sequence(image_prompt_type, image_prompt_type_radio) + return image_prompt_type, gr.update(visible = "E" in image_prompt_type ) def refresh_video_prompt_type_image_refs(state, video_prompt_type, video_prompt_type_image_refs): model_type = state["model_type"] @@ -6680,9 +6707,12 @@ def get_prompt_labels(multi_prompts_gen_type, image_outputs = False): new_line_text = "each new line of prompt will be used for a window" if multi_prompts_gen_type != 0 else "each new line of prompt will generate " + ("a new image" if image_outputs else "a new video") return "Prompts (" + new_line_text + ", # lines = comments, ! lines = macros)", "Prompts (" + new_line_text + ", # lines = comments)" +def get_image_end_label(multi_prompts_gen_type): + return "Images as ending points for new Videos in the Generation Queue" if multi_prompts_gen_type == 0 else "Images as ending points for each new Window of the same Video Generation" + def refresh_prompt_labels(multi_prompts_gen_type, image_mode): prompt_label, wizard_prompt_label = get_prompt_labels(multi_prompts_gen_type, image_mode == 1) - return gr.update(label=prompt_label), gr.update(label = wizard_prompt_label) + return gr.update(label=prompt_label), gr.update(label = wizard_prompt_label), gr.update(label=get_image_end_label(multi_prompts_gen_type)) def show_preview_column_modal(state, column_no): column_no = int(column_no) @@ -7054,101 +7084,46 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non with gr.Tab("Text to Image", id = "t2i", elem_classes="compact_tab"): pass - with gr.Column(visible= test_class_i2v(model_type) or hunyuan_i2v or diffusion_forcing or ltxv or recammaster or vace or ti2v_2_2) as image_prompt_column: - if vace or infinitetalk: - image_prompt_type_value= ui_defaults.get("image_prompt_type","") - image_prompt_type_value = "" if image_prompt_type_value == "S" else image_prompt_type_value - image_prompt_type = gr.Radio( [("New Video", ""),("Continue Video File", "V"),("Continue Last Video", "L")], value =image_prompt_type_value, label="Source Video", show_label= False, visible= not image_outputs , scale= 3) - - image_start_row, image_start, image_start_extra = get_image_gallery(visible = False ) - image_end_row, image_end, image_end_extra = get_image_gallery(visible = False ) - video_source = gr.Video(label= "Video Source", height = gallery_height, visible = "V" in image_prompt_type_value, value= ui_defaults.get("video_source", None)) - model_mode = gr.Dropdown(visible = False) - keep_frames_video_source = gr.Text(value=ui_defaults.get("keep_frames_video_source","") , visible= len(filter_letters(image_prompt_type_value, "VLG"))>0 , scale = 2, label= "Truncate Video beyond this number of resampled Frames (empty=Keep All, negative truncates from End)" ) + image_prompt_types_allowed = model_def.get("image_prompt_types_allowed", "") + model_mode_choices = model_def.get("model_modes", None) + with gr.Column(visible= len(image_prompt_types_allowed)> 0 or model_mode_choices is not None) as image_prompt_column: + image_prompt_type_value= ui_defaults.get("image_prompt_type","") + image_prompt_type = gr.Text(value= image_prompt_type_value, visible= False) + image_prompt_type_choices = [] + if "T" in image_prompt_types_allowed: + image_prompt_type_choices += [("Text Prompt Only", "")] + any_start_image = True + if "S" in image_prompt_types_allowed: + image_prompt_type_choices += [("Start Video with Image", "S")] + any_start_image = True + if "V" in image_prompt_types_allowed: any_video_source = True - - elif diffusion_forcing or ltxv or ti2v_2_2: - image_prompt_type_value= ui_defaults.get("image_prompt_type","T") - # image_prompt_type = gr.Radio( [("Start Video with Image", "S"),("Start and End Video with Images", "SE"), ("Continue Video", "V"),("Text Prompt Only", "T")], value =image_prompt_type_value, label="Location", show_label= False, visible= True, scale= 3) - image_prompt_type_choices = [("Text Prompt Only", "T"),("Start Video with Image", "S")] - if ltxv: - image_prompt_type_choices += [("Use both a Start and an End Image", "SE")] - if sliding_window_enabled: - any_video_source = True - image_prompt_type_choices += [("Continue Video", "V")] - image_prompt_type = gr.Radio( image_prompt_type_choices, value =image_prompt_type_value, label="Location", show_label= False, visible= True , scale= 3) - - image_start_row, image_start, image_start_extra = get_image_gallery(label= "Images as starting points for new videos", value = ui_defaults.get("image_start", None), visible= "S" in image_prompt_type_value ) - image_end_row, image_end, image_end_extra = get_image_gallery(label= "Images as ending points for new videos", value = ui_defaults.get("image_end", None), visible= "E" in image_prompt_type_value ) - video_source = gr.Video(label= "Video to Continue", height = gallery_height, visible= "V" in image_prompt_type_value, value= ui_defaults.get("video_source", None),) - if not diffusion_forcing: - model_mode = gr.Dropdown( - choices=[ - ], value=None, - visible= False - ) - else: - model_mode = gr.Dropdown( - choices=[ - ("Synchronous", 0), - ("Asynchronous (better quality but around 50% extra steps added)", 5), - ], - value=ui_defaults.get("model_mode", 0), - label="Generation Type", scale = 3, - visible= True - ) - keep_frames_video_source = gr.Text(value=ui_defaults.get("keep_frames_video_source","") , visible= "V" in image_prompt_type_value, scale = 2, label= "Truncate Video beyond this number of Frames of Video (empty=Keep All)" ) - elif recammaster: - image_prompt_type = gr.Radio(choices=[("Source Video", "V")], value="V") - image_start_row, image_start, image_start_extra = get_image_gallery(visible = False ) - image_end_row, image_end, image_end_extra = get_image_gallery(visible = False ) - video_source = gr.Video(label= "Video Source", height = gallery_height, visible = True, value= ui_defaults.get("video_source", None),) - model_mode = gr.Dropdown( - choices=[ - ("Pan Right", 1), - ("Pan Left", 2), - ("Tilt Up", 3), - ("Tilt Down", 4), - ("Zoom In", 5), - ("Zoom Out", 6), - ("Translate Up (with rotation)", 7), - ("Translate Down (with rotation)", 8), - ("Arc Left (with rotation)", 9), - ("Arc Right (with rotation)", 10), - ], - value=ui_defaults.get("model_mode", 1), - label="Camera Movement Type", scale = 3, - visible= True - ) - keep_frames_video_source = gr.Text(visible=False) - else: - if test_class_i2v(model_type) or hunyuan_i2v: - # image_prompt_type_value= ui_defaults.get("image_prompt_type","SE" if flf2v else "S" ) - image_prompt_type_value= ui_defaults.get("image_prompt_type","S" ) - image_prompt_type_choices = [("Start Video with Image", "S")] - image_prompt_type_choices += [("Use both a Start and an End Image", "SE")] - if not hunyuan_i2v: - any_video_source = True - image_prompt_type_choices += [("Continue Video", "V")] - - image_prompt_type = gr.Radio( image_prompt_type_choices, value =image_prompt_type_value, label="Location", show_label= False, visible= not hunyuan_i2v, scale= 3) - any_start_image = True - any_end_image = True - image_start_row, image_start, image_start_extra = get_image_gallery(label= "Images as starting points for new videos", value = ui_defaults.get("image_start", None), visible= "S" in image_prompt_type_value ) - image_end_row, image_end, image_end_extra = get_image_gallery(label= "Images as ending points for new videos", value = ui_defaults.get("image_end", None), visible= "E" in image_prompt_type_value ) - if hunyuan_i2v: - video_source = gr.Video(value=None, visible=False) + image_prompt_type_choices += [("Continue Video", "V")] + if "L" in image_prompt_types_allowed: + any_video_source = True + image_prompt_type_choices += [("Continue Last Video", "L")] + with gr.Group(visible= len(image_prompt_types_allowed)>1) as image_prompt_type_group: + with gr.Row(): + image_prompt_type_radio_allowed_values= filter_letters(image_prompt_types_allowed, "SVL") + if len(image_prompt_type_choices) > 0: + image_prompt_type_radio = gr.Radio( image_prompt_type_choices, value =filter_letters(image_prompt_type_value, image_prompt_type_radio_allowed_values, image_prompt_type_choices[0][1]), label="Location", show_label= False, visible= len(image_prompt_types_allowed)>1, scale= 3) else: - video_source = gr.Video(label= "Video to Continue", height = gallery_height, visible= "V" in image_prompt_type_value, value= ui_defaults.get("video_source", None),) - else: - image_prompt_type = gr.Radio(choices=[("", "")], value="") - image_start_row, image_start, image_start_extra = get_image_gallery(visible = False ) - image_end_row, image_end, image_end_extra = get_image_gallery(visible = False ) - video_source = gr.Video(value=None, visible=False) + image_prompt_type_radio = gr.Radio(choices=[("", "")], value="", visible= False) + if "E" in image_prompt_types_allowed: + image_prompt_type_endcheckbox = gr.Checkbox( value ="E" in image_prompt_type_value, label="End Image(s)", show_label= False, visible= any_letters(image_prompt_type_value, "SVL") and not image_outputs , scale= 1) + any_end_image = True + else: + image_prompt_type_endcheckbox = gr.Checkbox( value =False, show_label= False, visible= False , scale= 1) + image_start_row, image_start, image_start_extra = get_image_gallery(label= "Images as starting points for new Videos in the Generation Queue", value = ui_defaults.get("image_start", None), visible= "S" in image_prompt_type_value ) + video_source = gr.Video(label= "Video to Continue", height = gallery_height, visible= "V" in image_prompt_type_value, value= ui_defaults.get("video_source", None),) + image_end_row, image_end, image_end_extra = get_image_gallery(label= get_image_end_label(ui_defaults.get("multi_prompts_gen_type", 0)), value = ui_defaults.get("image_end", None), visible= any_letters(image_prompt_type_value, "SVL") and ("E" in image_prompt_type_value) ) + if model_mode_choices is None: model_mode = gr.Dropdown(value=None, visible=False) - keep_frames_video_source = gr.Text(visible=False) + else: + model_mode = gr.Dropdown(choices=model_mode_choices["choices"], value=ui_defaults.get("model_mode", model_mode_choices["default"]), label=model_mode_choices["label"], visible=True) + keep_frames_video_source = gr.Text(value=ui_defaults.get("keep_frames_video_source","") , visible= len(filter_letters(image_prompt_type_value, "VL"))>0 , scale = 2, label= "Truncate Video beyond this number of resampled Frames (empty=Keep All, negative truncates from End)" ) - with gr.Column(visible= vace or phantom or hunyuan_video_custom or hunyuan_video_avatar or hunyuan_video_custom_edit or t2v or standin or ltxv or infinitetalk or flux and model_reference_image or qwen and model_reference_image) as video_prompt_column: + with gr.Column(visible= vace or phantom or hunyuan_video_custom or hunyuan_video_avatar or hunyuan_video_custom_edit or t2v or standin or ltxv or infinitetalk or recammaster or flux and model_reference_image or qwen and model_reference_image) as video_prompt_column: video_prompt_type_value= ui_defaults.get("video_prompt_type","") video_prompt_type = gr.Text(value= video_prompt_type_value, visible= False) any_control_video = True @@ -7208,12 +7183,12 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non value=filter_letters(video_prompt_type_value, "PDSLCMUV"), label="Image to Image" if image_outputs else "Video to Video", scale = 3, visible= True, show_label= True, ) - elif infinitetalk: - video_prompt_type_video_guide = gr.Dropdown(value="", choices = [("","")], visible=False) + elif recammaster: + video_prompt_type_video_guide = gr.Dropdown(value="UV", choices = [("Control Video","UV")], visible=False) else: any_control_video = False any_control_image = False - video_prompt_type_video_guide = gr.Dropdown(visible= False) + video_prompt_type_video_guide = gr.Dropdown(value="", choices = [("","")], visible=False) if infinitetalk: video_prompt_type_video_guide_alt = gr.Dropdown( @@ -7228,6 +7203,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non value=filter_letters(video_prompt_type_value, "RGUVQKI"), label="Video to Video", scale = 3, visible= True, show_label= False, ) + any_control_video = any_control_image = True else: video_prompt_type_video_guide_alt = gr.Dropdown(value="", choices = [("","")], visible=False) @@ -7761,7 +7737,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, visible = False) elif ltxv: sliding_window_size = gr.Slider(41, get_max_frames(257), value=ui_defaults.get("sliding_window_size", 129), step=8, label="Sliding Window Size") - sliding_window_overlap = gr.Slider(9, 97, value=ui_defaults.get("sliding_window_overlap",9), step=8, label="Windows Frames Overlap (needed to maintain continuity between windows, a higher value will require more windows)") + sliding_window_overlap = gr.Slider(1, 97, value=ui_defaults.get("sliding_window_overlap",9), step=8, label="Windows Frames Overlap (needed to maintain continuity between windows, a higher value will require more windows)") sliding_window_color_correction_strength = gr.Slider(0, 1, visible=False, value =0) sliding_window_overlap_noise = gr.Slider(0, 100, value=ui_defaults.get("sliding_window_overlap_noise",20), step=1, label="Noise to be added to overlapped frames to reduce blur effect", visible = False) sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=8, label="Discard Last Frames of a Window (that may have bad quality)", visible = True) @@ -7772,8 +7748,9 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non sliding_window_overlap_noise = gr.Slider(0, 150, value=ui_defaults.get("sliding_window_overlap_noise",20), step=1, label="Noise to be added to overlapped frames to reduce blur effect", visible = False) sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, label="Discard Last Frames of a Window (that may have bad quality)", visible = True) else: # Vace, Multitalk + sliding_window_defaults = model_def.get("sliding_window_defaults", {}) sliding_window_size = gr.Slider(5, get_max_frames(257), value=ui_defaults.get("sliding_window_size", 129), step=4, label="Sliding Window Size") - sliding_window_overlap = gr.Slider(1, 97, value=ui_defaults.get("sliding_window_overlap",5), step=4, label="Windows Frames Overlap (needed to maintain continuity between windows, a higher value will require more windows)") + sliding_window_overlap = gr.Slider(sliding_window_defaults.get("overlap_min", 1), sliding_window_defaults.get("overlap_max", 97), value=ui_defaults.get("sliding_window_overlap",sliding_window_defaults.get("overlap_default", 5)), step=sliding_window_defaults.get("overlap_step", 4), label="Windows Frames Overlap (needed to maintain continuity between windows, a higher value will require more windows)") sliding_window_color_correction_strength = gr.Slider(0, 1, value=ui_defaults.get("sliding_window_color_correction_strength",1), step=0.01, label="Color Correction Strength (match colors of new window with previous one, 0 = disabled)") sliding_window_overlap_noise = gr.Slider(0, 150, value=ui_defaults.get("sliding_window_overlap_noise",20 if vace else 0), step=1, label="Noise to be added to overlapped frames to reduce blur effect" , visible = vace) sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, label="Discard Last Frames of a Window (that may have bad quality)", visible = True) @@ -7790,13 +7767,13 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non multi_prompts_gen_type = gr.Dropdown( choices=[ - ("Will create new generated Video", 0), + ("Will create a new generated Video added to the Generation Queue", 0), ("Will be used for a new Sliding Window of the same Video Generation", 1), ], value=ui_defaults.get("multi_prompts_gen_type",0), visible=True, scale = 1, - label="Text Prompts separated by a Carriage Return" + label="Images & Text Prompts separated by a Carriage Return" if (any_start_image or any_end_image) else "Text Prompts separated by a Carriage Return" ) with gr.Tab("Misc.", visible = True) as misc_tab: @@ -7962,7 +7939,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non hidden_countdown_state = gr.Number(value=-1, visible=False, elem_id="hidden_countdown_state_num") single_hidden_trigger_btn = gr.Button("trigger_countdown", visible=False, elem_id="trigger_info_single_btn") - extra_inputs = prompt_vars + [wizard_prompt, wizard_variables_var, wizard_prompt_activated_var, video_prompt_column, image_prompt_column, + extra_inputs = prompt_vars + [wizard_prompt, wizard_variables_var, wizard_prompt_activated_var, video_prompt_column, image_prompt_column, image_prompt_type_group, image_prompt_type_radio, image_prompt_type_endcheckbox, prompt_column_advanced, prompt_column_wizard_vars, prompt_column_wizard, lset_name, save_lset_prompt_drop, advanced_row, speed_tab, audio_tab, mmaudio_col, quality_tab, sliding_window_tab, misc_tab, prompt_enhancer_row, inference_steps_row, skip_layer_guidance_row, audio_guide_row, RIFLEx_setting_col, video_prompt_type_video_guide, video_prompt_type_video_guide_alt, video_prompt_type_video_mask, video_prompt_type_image_refs, apg_col, audio_prompt_type_sources, audio_prompt_type_remux, audio_prompt_type_remux_row, @@ -7981,23 +7958,24 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non target_settings = gr.Text(value = "settings", interactive= False, visible= False) last_choice = gr.Number(value =-1, interactive= False, visible= False) - resolution_group.input(fn=change_resolution_group, inputs=[state, resolution_group], outputs=[resolution]) + resolution_group.input(fn=change_resolution_group, inputs=[state, resolution_group], outputs=[resolution], show_progress="hidden") resolution.change(fn=record_last_resolution, inputs=[state, resolution]) # video_length.release(fn=refresh_video_length_label, inputs=[state, video_length ], outputs = video_length, trigger_mode="always_last" ) - gr.on(triggers=[video_length.release, force_fps.change, video_guide.change, video_source.change], fn=refresh_video_length_label, inputs=[state, video_length, force_fps, video_guide, video_source] , outputs = video_length, trigger_mode="always_last" ) + gr.on(triggers=[video_length.release, force_fps.change, video_guide.change, video_source.change], fn=refresh_video_length_label, inputs=[state, video_length, force_fps, video_guide, video_source] , outputs = video_length, trigger_mode="always_last", show_progress="hidden" ) guidance_phases.change(fn=change_guidance_phases, inputs= [state, guidance_phases], outputs =[model_switch_phase, guidance_phases_row, switch_threshold, switch_threshold2, guidance2_scale, guidance3_scale ]) audio_prompt_type_remux.change(fn=refresh_audio_prompt_type_remux, inputs=[state, audio_prompt_type, audio_prompt_type_remux], outputs=[audio_prompt_type]) audio_prompt_type_sources.change(fn=refresh_audio_prompt_type_sources, inputs=[state, audio_prompt_type, audio_prompt_type_sources], outputs=[audio_prompt_type, audio_guide, audio_guide2, speakers_locations_row]) - image_prompt_type.change(fn=refresh_image_prompt_type, inputs=[state, image_prompt_type], outputs=[image_start_row, image_end_row, video_source, keep_frames_video_source] ) + image_prompt_type_radio.change(fn=refresh_image_prompt_type_radio, inputs=[state, image_prompt_type, image_prompt_type_radio], outputs=[image_prompt_type, image_start_row, image_end_row, video_source, keep_frames_video_source, image_prompt_type_endcheckbox], show_progress="hidden" ) + image_prompt_type_endcheckbox.change(fn=refresh_image_prompt_type_endcheckbox, inputs=[state, image_prompt_type, image_prompt_type_radio, image_prompt_type_endcheckbox], outputs=[image_prompt_type, image_end_row] ) # video_prompt_video_guide_trigger.change(fn=refresh_video_prompt_video_guide_trigger, inputs=[state, video_prompt_type, video_prompt_video_guide_trigger], outputs=[video_prompt_type, video_prompt_type_video_guide, video_guide, keep_frames_video_guide, denoising_strength, video_guide_outpainting_col, video_prompt_type_video_mask, video_mask, mask_expand]) video_prompt_type_image_refs.input(fn=refresh_video_prompt_type_image_refs, inputs = [state, video_prompt_type, video_prompt_type_image_refs], outputs = [video_prompt_type, image_refs_row, remove_background_images_ref, image_refs_relative_size, frames_positions,video_guide_outpainting_col]) video_prompt_type_video_guide.input(fn=refresh_video_prompt_type_video_guide, inputs = [state, video_prompt_type, video_prompt_type_video_guide, image_mode], outputs = [video_prompt_type, video_guide, image_guide, keep_frames_video_guide, denoising_strength, video_guide_outpainting_col, video_prompt_type_video_mask, video_mask, image_mask, mask_expand]) video_prompt_type_video_guide_alt.input(fn=refresh_video_prompt_type_video_guide_alt, inputs = [state, video_prompt_type, video_prompt_type_video_guide_alt], outputs = [video_prompt_type, video_guide, image_refs_row, denoising_strength ]) video_prompt_type_video_mask.input(fn=refresh_video_prompt_type_video_mask, inputs = [state, video_prompt_type, video_prompt_type_video_mask, image_mode], outputs = [video_prompt_type, video_mask, image_mask, mask_expand]) video_prompt_type_alignment.input(fn=refresh_video_prompt_type_alignment, inputs = [state, video_prompt_type, video_prompt_type_alignment], outputs = [video_prompt_type]) - multi_prompts_gen_type.select(fn=refresh_prompt_labels, inputs=[multi_prompts_gen_type, image_mode], outputs=[prompt, wizard_prompt]) + multi_prompts_gen_type.select(fn=refresh_prompt_labels, inputs=[multi_prompts_gen_type, image_mode], outputs=[prompt, wizard_prompt, image_end], show_progress="hidden") video_guide_outpainting_top.input(fn=update_video_guide_outpainting, inputs=[video_guide_outpainting, video_guide_outpainting_top, gr.State(0)], outputs = [video_guide_outpainting], trigger_mode="multiple" ) video_guide_outpainting_bottom.input(fn=update_video_guide_outpainting, inputs=[video_guide_outpainting, video_guide_outpainting_bottom,gr.State(1)], outputs = [video_guide_outpainting], trigger_mode="multiple" ) video_guide_outpainting_left.input(fn=update_video_guide_outpainting, inputs=[video_guide_outpainting, video_guide_outpainting_left,gr.State(2)], outputs = [video_guide_outpainting], trigger_mode="multiple" )