Take me to outer space

This commit is contained in:
DeepBeepMeep 2025-09-05 14:17:21 +02:00
parent 13b001d4ea
commit 8859e816d0
7 changed files with 254 additions and 203 deletions

View File

@ -20,6 +20,23 @@ WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models
**Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep **Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep
## 🔥 Latest Updates : ## 🔥 Latest Updates :
### September 5 2025: WanGP v8.4 - Take me to Outer Space
You have probably seen these short AI generated movies created using *Nano Banana* and the *First Frame - Last Frame* feature of *Kling 2.0*. The idea is to generate an image, modify a part of it with Nano Banana and give the these two images to Kling that will generate the Video between these two images, use now the previous Last Frame as the new First Frame, rinse and repeat and you get a full movie.
I have made it easier to do just that with *Qwen Edit* and *Wan*:
- **End Frames can now be combined with Continue a Video** (and not just a Start Frame)
- **Multiple End Frames can be inputed**, each End Frame will be used for a different Sliding Window
You can plan in advance all your shots (one shot = one Sliding Window) : I recommend using Wan 2.2 Image to Image with multiple End Frames (one for each shot / Sliding Window), and a different Text Prompt for each shot / Sliding Winow (remember to enable *Sliding Windows/Text Prompts Will be used for a new Sliding Window of the same Video Generation*)
The results can quite be impressive. However, Wan 2.1 & 2.2 Image 2 Image are restricted to a single overlap frame when using Slide Windows, which means only one frame is reeused for the motion. This may be unsufficient if you are trying to connect two shots with fast movement.
This is where *InfinitTalk* comes into play. Beside being one best models to generate animated audio driven avatars, InfiniteTalk uses internally more one than motion frames. It is quite good to maintain the motions between two shots. I have tweaked InfinitTalk so that **its motion engine can be used even if no audio is provided**.
So here is how to use InfiniteTalk: enable *Sliding Windows/Text Prompts Will be used for a new Sliding Window of the same Video Generation*), and if you continue an existing Video *Misc/Override Frames per Second" should be set to "Source Video*. Each Reference Frame inputed will play the same role as the End Frame except it wont be exactly an End Frame (it will correspond more to a middle frame, the actual End Frame will differ but will be close)
You will find below a 33s movie I have created using these two methods. Quality could be much better as I havent tuned at all the settings (I couldn't bother, I used 10 steps generation without Loras Accelerators for most of the gens).
### September 2 2025: WanGP v8.31 - At last the pain stops ### September 2 2025: WanGP v8.31 - At last the pain stops
- This single new feature should give you the strength to face all the potential bugs of this new release: - This single new feature should give you the strength to face all the potential bugs of this new release:

View File

@ -56,6 +56,10 @@ class family_handler():
if base_model_type in ["hunyuan_custom", "hunyuan_custom_edit", "hunyuan_custom_audio", "hunyuan_avatar"]: if base_model_type in ["hunyuan_custom", "hunyuan_custom_edit", "hunyuan_custom_audio", "hunyuan_avatar"]:
extra_model_def["one_image_ref_needed"] = True extra_model_def["one_image_ref_needed"] = True
if base_model_type in ["hunyuan_i2v"]:
extra_model_def["image_prompt_types_allowed"] = "S"
return extra_model_def return extra_model_def
@staticmethod @staticmethod

View File

@ -24,6 +24,7 @@ class family_handler():
extra_model_def["frames_minimum"] = 17 extra_model_def["frames_minimum"] = 17
extra_model_def["frames_steps"] = 8 extra_model_def["frames_steps"] = 8
extra_model_def["sliding_window"] = True extra_model_def["sliding_window"] = True
extra_model_def["image_prompt_types_allowed"] = "TSEV"
return extra_model_def return extra_model_def

View File

@ -537,7 +537,6 @@ class WanAny2V:
image_ref = input_frames[:, 0] image_ref = input_frames[:, 0]
if input_video is None: input_video = input_frames[:, 0:1] if input_video is None: input_video = input_frames[:, 0:1]
new_shot = "Q" in video_prompt_type new_shot = "Q" in video_prompt_type
denoising_strength = 0.5
else: else:
if pre_video_frame is None: if pre_video_frame is None:
new_shot = True new_shot = True
@ -556,74 +555,59 @@ class WanAny2V:
_ , preframes_count, height, width = input_video.shape _ , preframes_count, height, width = input_video.shape
input_video = input_video.to(device=self.device).to(dtype= self.VAE_dtype) input_video = input_video.to(device=self.device).to(dtype= self.VAE_dtype)
if infinitetalk: if infinitetalk:
image_for_clip = image_ref.to(input_video) image_start = image_ref.to(input_video)
control_pre_frames_count = 1 control_pre_frames_count = 1
control_video = image_for_clip.unsqueeze(1) control_video = image_start.unsqueeze(1)
else: else:
image_for_clip = input_video[:, -1] image_start = input_video[:, -1]
control_pre_frames_count = preframes_count control_pre_frames_count = preframes_count
control_video = input_video control_video = input_video
lat_h, lat_w = height // self.vae_stride[1], width // self.vae_stride[2]
if hasattr(self, "clip"): color_reference_frame = image_start.unsqueeze(1).clone()
clip_image_size = self.clip.model.image_size
clip_image = resize_lanczos(image_for_clip, clip_image_size, clip_image_size)[:, None, :, :]
clip_context = self.clip.visual([clip_image]) if model_type != "flf2v_720p" else self.clip.visual([clip_image , clip_image ])
clip_image = None
else:
clip_context = None
enc = torch.concat( [control_video, torch.zeros( (3, frame_num-control_pre_frames_count, height, width),
device=self.device, dtype= self.VAE_dtype)],
dim = 1).to(self.device)
color_reference_frame = image_for_clip.unsqueeze(1).clone()
else: else:
preframes_count = control_pre_frames_count = 1 preframes_count = control_pre_frames_count = 1
any_end_frame = image_end is not None
add_frames_for_end_image = any_end_frame and model_type == "i2v"
if any_end_frame:
if add_frames_for_end_image:
frame_num +=1
lat_frames = int((frame_num - 2) // self.vae_stride[0] + 2)
trim_frames = 1
height, width = image_start.shape[1:] height, width = image_start.shape[1:]
control_video = image_start.unsqueeze(1).to(self.device)
color_reference_frame = control_video.clone()
lat_h = round( any_end_frame = image_end is not None
height // self.vae_stride[1] // add_frames_for_end_image = any_end_frame and model_type == "i2v"
self.patch_size[1] * self.patch_size[1]) if any_end_frame:
lat_w = round( color_correction_strength = 0 #disable color correction as transition frames between shots may have a complete different color level than the colors of the new shot
width // self.vae_stride[2] // if add_frames_for_end_image:
self.patch_size[2] * self.patch_size[2]) frame_num +=1
height = lat_h * self.vae_stride[1] lat_frames = int((frame_num - 2) // self.vae_stride[0] + 2)
width = lat_w * self.vae_stride[2] trim_frames = 1
image_start_frame = image_start.unsqueeze(1).to(self.device)
color_reference_frame = image_start_frame.clone()
if image_end is not None:
img_end_frame = image_end.unsqueeze(1).to(self.device)
if hasattr(self, "clip"): lat_h, lat_w = height // self.vae_stride[1], width // self.vae_stride[2]
clip_image_size = self.clip.model.image_size
image_start = resize_lanczos(image_start, clip_image_size, clip_image_size) if image_end is not None:
if image_end is not None: image_end = resize_lanczos(image_end, clip_image_size, clip_image_size) img_end_frame = image_end.unsqueeze(1).to(self.device)
if model_type == "flf2v_720p":
clip_context = self.clip.visual([image_start[:, None, :, :], image_end[:, None, :, :] if image_end is not None else image_start[:, None, :, :]]) if hasattr(self, "clip"):
else: clip_image_size = self.clip.model.image_size
clip_context = self.clip.visual([image_start[:, None, :, :]]) image_start = resize_lanczos(image_start, clip_image_size, clip_image_size)
image_end = resize_lanczos(image_end, clip_image_size, clip_image_size) if image_end is not None else image_start
if model_type == "flf2v_720p":
clip_context = self.clip.visual([image_start[:, None, :, :], image_end[:, None, :, :] if image_end is not None else image_start[:, None, :, :]])
else: else:
clip_context = None clip_context = self.clip.visual([image_start[:, None, :, :]])
else:
clip_context = None
if any_end_frame: if any_end_frame:
enc= torch.concat([ enc= torch.concat([
image_start_frame, control_video,
torch.zeros( (3, frame_num-2, height, width), device=self.device, dtype= self.VAE_dtype), torch.zeros( (3, frame_num-control_pre_frames_count-1, height, width), device=self.device, dtype= self.VAE_dtype),
img_end_frame, img_end_frame,
], dim=1).to(self.device) ], dim=1).to(self.device)
else: else:
enc= torch.concat([ enc= torch.concat([
image_start_frame, control_video,
torch.zeros( (3, frame_num-1, height, width), device=self.device, dtype= self.VAE_dtype) torch.zeros( (3, frame_num-control_pre_frames_count, height, width), device=self.device, dtype= self.VAE_dtype)
], dim=1).to(self.device) ], dim=1).to(self.device)
image_start = image_end = image_start_frame = img_end_frame = image_for_clip = image_ref = None image_start = image_end = img_end_frame = image_ref = control_video = None
msk = torch.ones(1, frame_num, lat_h, lat_w, device=self.device) msk = torch.ones(1, frame_num, lat_h, lat_w, device=self.device)
if any_end_frame: if any_end_frame:
@ -657,12 +641,11 @@ class WanAny2V:
# Recam Master # Recam Master
if recam: if recam:
# should be be in fact in input_frames since it is control video not a video to be extended
target_camera = model_mode target_camera = model_mode
height,width = input_video.shape[-2:] height,width = input_frames.shape[-2:]
input_video = input_video.to(dtype=self.dtype , device=self.device) input_frames = input_frames.to(dtype=self.dtype , device=self.device)
source_latents = self.vae.encode([input_video])[0].unsqueeze(0) #.to(dtype=self.dtype, device=self.device) source_latents = self.vae.encode([input_frames])[0].unsqueeze(0) #.to(dtype=self.dtype, device=self.device)
del input_video del input_frames
# Process target camera (recammaster) # Process target camera (recammaster)
from shared.utils.cammmaster_tools import get_camera_embedding from shared.utils.cammmaster_tools import get_camera_embedding
cam_emb = get_camera_embedding(target_camera) cam_emb = get_camera_embedding(target_camera)
@ -754,7 +737,9 @@ class WanAny2V:
else: else:
target_shape = (self.vae.model.z_dim, lat_frames + ref_images_count, height // self.vae_stride[1], width // self.vae_stride[2]) target_shape = (self.vae.model.z_dim, lat_frames + ref_images_count, height // self.vae_stride[1], width // self.vae_stride[2])
if multitalk and audio_proj != None: if multitalk:
if audio_proj is None:
audio_proj = [ torch.zeros( (1, 1, 5, 12, 768 ), dtype=self.dtype, device=self.device), torch.zeros( (1, (frame_num - 1) // 4, 8, 12, 768 ), dtype=self.dtype, device=self.device) ]
from .multitalk.multitalk import get_target_masks from .multitalk.multitalk import get_target_masks
audio_proj = [audio.to(self.dtype) for audio in audio_proj] audio_proj = [audio.to(self.dtype) for audio in audio_proj]
human_no = len(audio_proj[0]) human_no = len(audio_proj[0])

View File

@ -26,6 +26,18 @@ class family_handler():
extra_model_def["tea_cache"] = True extra_model_def["tea_cache"] = True
extra_model_def["guidance_max_phases"] = 1 extra_model_def["guidance_max_phases"] = 1
extra_model_def["model_modes"] = {
"choices": [
("Synchronous", 0),
("Asynchronous (better quality but around 50% extra steps added)", 5),
],
"default": 0,
"label" : "Generation Type"
}
extra_model_def["image_prompt_types_allowed"] = "TSEV"
return extra_model_def return extra_model_def
@staticmethod @staticmethod

View File

@ -5,6 +5,9 @@ import gradio as gr
def test_class_i2v(base_model_type): def test_class_i2v(base_model_type):
return base_model_type in ["i2v", "i2v_2_2", "fun_inp_1.3B", "fun_inp", "flf2v_720p", "fantasy", "multitalk", "infinitetalk", "i2v_2_2_multitalk" ] return base_model_type in ["i2v", "i2v_2_2", "fun_inp_1.3B", "fun_inp", "flf2v_720p", "fantasy", "multitalk", "infinitetalk", "i2v_2_2_multitalk" ]
def text_oneframe_overlap(base_model_type):
return test_class_i2v(base_model_type) and not test_multitalk(base_model_type)
def test_class_1_3B(base_model_type): def test_class_1_3B(base_model_type):
return base_model_type in [ "vace_1.3B", "t2v_1.3B", "recam_1.3B","phantom_1.3B","fun_inp_1.3B"] return base_model_type in [ "vace_1.3B", "t2v_1.3B", "recam_1.3B","phantom_1.3B","fun_inp_1.3B"]
@ -120,6 +123,37 @@ class family_handler():
if base_model_type in ["standin"] or vace_class: if base_model_type in ["standin"] or vace_class:
extra_model_def["lock_image_refs_ratios"] = True extra_model_def["lock_image_refs_ratios"] = True
if base_model_type in ["recam_1.3B"]:
extra_model_def["keep_frames_video_guide_not_supported"] = True
extra_model_def["model_modes"] = {
"choices": [
("Pan Right", 1),
("Pan Left", 2),
("Tilt Up", 3),
("Tilt Down", 4),
("Zoom In", 5),
("Zoom Out", 6),
("Translate Up (with rotation)", 7),
("Translate Down (with rotation)", 8),
("Arc Left (with rotation)", 9),
("Arc Right (with rotation)", 10),
],
"default": 1,
"label" : "Camera Movement Type"
}
if vace_class or base_model_type in ["infinitetalk"]:
image_prompt_types_allowed = "TVL"
elif base_model_type in ["ti2v_2_2"]:
image_prompt_types_allowed = "TSEVL"
elif i2v:
image_prompt_types_allowed = "SEVL"
else:
image_prompt_types_allowed = ""
extra_model_def["image_prompt_types_allowed"] = image_prompt_types_allowed
if text_oneframe_overlap(base_model_type):
extra_model_def["sliding_window_defaults"] = { "overlap_min" : 1, "overlap_max" : 1, "overlap_step": 0, "overlap_default": 1}
# if base_model_type in ["phantom_1.3B", "phantom_14B"]: # if base_model_type in ["phantom_1.3B", "phantom_14B"]:
# extra_model_def["one_image_ref_needed"] = True # extra_model_def["one_image_ref_needed"] = True
@ -251,6 +285,17 @@ class family_handler():
video_prompt_type = video_prompt_type.replace("U", "RU") video_prompt_type = video_prompt_type.replace("U", "RU")
ui_defaults["video_prompt_type"] = video_prompt_type ui_defaults["video_prompt_type"] = video_prompt_type
if settings_version < 2.31:
if base_model_type in "recam_1.3B":
video_prompt_type = ui_defaults.get("video_prompt_type", "")
if not "V" in video_prompt_type:
video_prompt_type += "UV"
ui_defaults["video_prompt_type"] = video_prompt_type
ui_defaults["image_prompt_type"] = ""
if text_oneframe_overlap(base_model_type):
ui_defaults["sliding_window_overlap"] = 1
@staticmethod @staticmethod
def update_default_settings(base_model_type, model_def, ui_defaults): def update_default_settings(base_model_type, model_def, ui_defaults):
ui_defaults.update({ ui_defaults.update({
@ -309,6 +354,15 @@ class family_handler():
"image_prompt_type": "T", "image_prompt_type": "T",
}) })
if base_model_type in ["recam_1.3B"]:
ui_defaults.update({
"video_prompt_type": "UV",
})
if text_oneframe_overlap(base_model_type):
ui_defaults.update["sliding_window_overlap"] = 1
ui_defaults.update["color_correction_strength"]= 0
if test_multitalk(base_model_type): if test_multitalk(base_model_type):
ui_defaults["audio_guidance_scale"] = 4 ui_defaults["audio_guidance_scale"] = 4

256
wgp.py
View File

@ -60,8 +60,8 @@ AUTOSAVE_FILENAME = "queue.zip"
PROMPT_VARS_MAX = 10 PROMPT_VARS_MAX = 10
target_mmgp_version = "3.6.0" target_mmgp_version = "3.6.0"
WanGP_version = "8.34" WanGP_version = "8.4"
settings_version = 2.29 settings_version = 2.31
max_source_video_frames = 3000 max_source_video_frames = 3000
prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None
@ -347,7 +347,7 @@ def process_prompt_and_add_tasks(state, model_choice):
model_switch_phase = inputs["model_switch_phase"] model_switch_phase = inputs["model_switch_phase"]
switch_threshold = inputs["switch_threshold"] switch_threshold = inputs["switch_threshold"]
switch_threshold2 = inputs["switch_threshold2"] switch_threshold2 = inputs["switch_threshold2"]
multi_prompts_gen_type = inputs["multi_prompts_gen_type"]
if len(loras_multipliers) > 0: if len(loras_multipliers) > 0:
_, _, errors = parse_loras_multipliers(loras_multipliers, len(activated_loras), num_inference_steps, nb_phases= guidance_phases) _, _, errors = parse_loras_multipliers(loras_multipliers, len(activated_loras), num_inference_steps, nb_phases= guidance_phases)
@ -445,7 +445,7 @@ def process_prompt_and_add_tasks(state, model_choice):
if "I" in video_prompt_type: if "I" in video_prompt_type:
if image_refs == None or len(image_refs) == 0: if image_refs == None or len(image_refs) == 0:
gr.Info("You must provide at least one Refererence Image") gr.Info("You must provide at least one Reference Image")
return return
image_refs = clean_image_list(image_refs) image_refs = clean_image_list(image_refs)
if image_refs == None : if image_refs == None :
@ -511,9 +511,14 @@ def process_prompt_and_add_tasks(state, model_choice):
if image_start == None : if image_start == None :
gr.Info("Start Image should be an Image") gr.Info("Start Image should be an Image")
return return
if multi_prompts_gen_type == 1 and len(image_start) > 1:
gr.Info("Only one Start Image is supported")
return
else: else:
image_start = None image_start = None
if not any_letters(image_prompt_type, "SVL"):
image_prompt_type = image_prompt_type.replace("E", "")
if "E" in image_prompt_type: if "E" in image_prompt_type:
if image_end == None or isinstance(image_end, list) and len(image_end) == 0: if image_end == None or isinstance(image_end, list) and len(image_end) == 0:
gr.Info("You must provide an End Image") gr.Info("You must provide an End Image")
@ -522,32 +527,35 @@ def process_prompt_and_add_tasks(state, model_choice):
if image_end == None : if image_end == None :
gr.Info("End Image should be an Image") gr.Info("End Image should be an Image")
return return
if len(image_start) != len(image_end): if multi_prompts_gen_type == 0:
gr.Info("The number of Start and End Images should be the same ") if video_source is not None:
return if len(image_end)> 1:
gr.Info("If a Video is to be continued and the option 'Each Text Prompt Will create a new generated Video' is set, there can be only one End Image")
return
elif len(image_start or []) != len(image_end or []):
gr.Info("The number of Start and End Images should be the same when the option 'Each Text Prompt Will create a new generated Video'")
return
else: else:
image_end = None image_end = None
if test_any_sliding_window(model_type) and image_mode == 0: if test_any_sliding_window(model_type) and image_mode == 0:
if video_length > sliding_window_size: if video_length > sliding_window_size:
full_video_length = video_length if video_source is None else video_length + sliding_window_overlap full_video_length = video_length if video_source is None else video_length + sliding_window_overlap -1
extra = "" if full_video_length == video_length else f" including {sliding_window_overlap} added for Video Continuation" extra = "" if full_video_length == video_length else f" including {sliding_window_overlap} added for Video Continuation"
no_windows = compute_sliding_window_no(full_video_length, sliding_window_size, sliding_window_discard_last_frames, sliding_window_overlap) no_windows = compute_sliding_window_no(full_video_length, sliding_window_size, sliding_window_discard_last_frames, sliding_window_overlap)
gr.Info(f"The Number of Frames to generate ({video_length}{extra}) is greater than the Sliding Window Size ({sliding_window_size}), {no_windows} Windows will be generated") gr.Info(f"The Number of Frames to generate ({video_length}{extra}) is greater than the Sliding Window Size ({sliding_window_size}), {no_windows} Windows will be generated")
if "recam" in model_filename: if "recam" in model_filename:
if video_source == None: if video_guide == None:
gr.Info("You must provide a Source Video") gr.Info("You must provide a Control Video")
return return
computed_fps = get_computed_fps(force_fps, model_type , video_guide, video_source )
frames = get_resampled_video(video_source, 0, 81, get_computed_fps(force_fps, model_type , video_guide, video_source )) frames = get_resampled_video(video_guide, 0, 81, computed_fps)
if len(frames)<81: if len(frames)<81:
gr.Info("Recammaster source video should be at least 81 frames once the resampling at 16 fps has been done") gr.Info(f"Recammaster Control video should be at least 81 frames once the resampling at {computed_fps} fps has been done")
return return
if "hunyuan_custom_custom_edit" in model_filename: if "hunyuan_custom_custom_edit" in model_filename:
if len(keep_frames_video_guide) > 0: if len(keep_frames_video_guide) > 0:
gr.Info("Filtering Frames with this model is not supported") gr.Info("Filtering Frames with this model is not supported")
@ -558,13 +566,13 @@ def process_prompt_and_add_tasks(state, model_choice):
gr.Info("Only one Start Image must be provided if multiple prompts are used for different windows") gr.Info("Only one Start Image must be provided if multiple prompts are used for different windows")
return return
if image_end != None and len(image_end) > 1: # if image_end != None and len(image_end) > 1:
gr.Info("Only one End Image must be provided if multiple prompts are used for different windows") # gr.Info("Only one End Image must be provided if multiple prompts are used for different windows")
return # return
override_inputs = { override_inputs = {
"image_start": image_start[0] if image_start !=None and len(image_start) > 0 else None, "image_start": image_start[0] if image_start !=None and len(image_start) > 0 else None,
"image_end": image_end[0] if image_end !=None and len(image_end) > 0 else None, "image_end": image_end, #[0] if image_end !=None and len(image_end) > 0 else None,
"image_refs": image_refs, "image_refs": image_refs,
"audio_guide": audio_guide, "audio_guide": audio_guide,
"audio_guide2": audio_guide2, "audio_guide2": audio_guide2,
@ -640,19 +648,21 @@ def process_prompt_and_add_tasks(state, model_choice):
override_inputs["prompt"] = single_prompt override_inputs["prompt"] = single_prompt
inputs.update(override_inputs) inputs.update(override_inputs)
add_video_task(**inputs) add_video_task(**inputs)
new_prompts_count = len(prompts)
else: else:
new_prompts_count = 1
override_inputs["prompt"] = "\n".join(prompts) override_inputs["prompt"] = "\n".join(prompts)
inputs.update(override_inputs) inputs.update(override_inputs)
add_video_task(**inputs) add_video_task(**inputs)
gen["prompts_max"] = len(prompts) + gen.get("prompts_max",0) gen["prompts_max"] = new_prompts_count + gen.get("prompts_max",0)
state["validate_success"] = 1 state["validate_success"] = 1
queue= gen.get("queue", []) queue= gen.get("queue", [])
return update_queue_data(queue) return update_queue_data(queue)
def get_preview_images(inputs): def get_preview_images(inputs):
inputs_to_query = ["image_start", "image_end", "video_source", "video_guide", "image_guide", "video_mask", "image_mask", "image_refs" ] inputs_to_query = ["image_start", "video_source", "image_end", "video_guide", "image_guide", "video_mask", "image_mask", "image_refs" ]
labels = ["Start Image", "End Image", "Video Source", "Video Guide", "Image Guide", "Video Mask", "Image Mask", "Image Reference"] labels = ["Start Image", "Video Source", "End Image", "Video Guide", "Image Guide", "Video Mask", "Image Mask", "Image Reference"]
start_image_data = None start_image_data = None
start_image_labels = [] start_image_labels = []
end_image_data = None end_image_data = None
@ -3454,6 +3464,8 @@ def convert_image(image):
from PIL import ImageOps from PIL import ImageOps
from typing import cast from typing import cast
if isinstance(image, str):
image = Image.open(image)
image = image.convert('RGB') image = image.convert('RGB')
return cast(Image, ImageOps.exif_transpose(image)) return cast(Image, ImageOps.exif_transpose(image))
@ -4506,7 +4518,7 @@ def generate_video(
if test_any_sliding_window(model_type) : if test_any_sliding_window(model_type) :
if video_source is not None: if video_source is not None:
current_video_length += sliding_window_overlap current_video_length += sliding_window_overlap - 1
sliding_window = current_video_length > sliding_window_size sliding_window = current_video_length > sliding_window_size
reuse_frames = min(sliding_window_size - 4, sliding_window_overlap) reuse_frames = min(sliding_window_size - 4, sliding_window_overlap)
else: else:
@ -4690,8 +4702,7 @@ def generate_video(
while not abort: while not abort:
enable_RIFLEx = RIFLEx_setting == 0 and current_video_length > (6* get_model_fps(base_model_type)+1) or RIFLEx_setting == 1 enable_RIFLEx = RIFLEx_setting == 0 and current_video_length > (6* get_model_fps(base_model_type)+1) or RIFLEx_setting == 1
if sliding_window: prompt = prompts[window_no] if window_no < len(prompts) else prompts[-1]
prompt = prompts[window_no] if window_no < len(prompts) else prompts[-1]
new_extra_windows = gen.get("extra_windows",0) new_extra_windows = gen.get("extra_windows",0)
gen["extra_windows"] = 0 gen["extra_windows"] = 0
extra_windows += new_extra_windows extra_windows += new_extra_windows
@ -4722,15 +4733,13 @@ def generate_video(
image_start_tensor = image_start.resize((new_width, new_height), resample=Image.Resampling.LANCZOS) image_start_tensor = image_start.resize((new_width, new_height), resample=Image.Resampling.LANCZOS)
image_start_tensor = convert_image_to_tensor(image_start_tensor) image_start_tensor = convert_image_to_tensor(image_start_tensor)
pre_video_guide = prefix_video = image_start_tensor.unsqueeze(1) pre_video_guide = prefix_video = image_start_tensor.unsqueeze(1)
if image_end is not None:
image_end_tensor = image_end.resize((new_width, new_height), resample=Image.Resampling.LANCZOS)
image_end_tensor = convert_image_to_tensor(image_end_tensor)
else: else:
if "L" in image_prompt_type: if "L" in image_prompt_type:
refresh_preview["video_source"] = get_video_frame(video_source, 0) refresh_preview["video_source"] = get_video_frame(video_source, 0)
prefix_video = preprocess_video(width=width, height=height,video_in=video_source, max_frames= parsed_keep_frames_video_source , start_frame = 0, fit_canvas= sample_fit_canvas, target_fps = fps, block_size = block_size ) prefix_video = preprocess_video(width=width, height=height,video_in=video_source, max_frames= parsed_keep_frames_video_source , start_frame = 0, fit_canvas= sample_fit_canvas, target_fps = fps, block_size = block_size )
prefix_video = prefix_video.permute(3, 0, 1, 2) prefix_video = prefix_video.permute(3, 0, 1, 2)
prefix_video = prefix_video.float().div_(127.5).sub_(1.) # c, f, h, w prefix_video = prefix_video.float().div_(127.5).sub_(1.) # c, f, h, w
new_height, new_width = prefix_video.shape[-2:]
pre_video_guide = prefix_video[:, -reuse_frames:] pre_video_guide = prefix_video[:, -reuse_frames:]
pre_video_frame = convert_tensor_to_image(prefix_video[:, -1]) pre_video_frame = convert_tensor_to_image(prefix_video[:, -1])
source_video_overlap_frames_count = pre_video_guide.shape[1] source_video_overlap_frames_count = pre_video_guide.shape[1]
@ -4739,7 +4748,14 @@ def generate_video(
image_size = pre_video_guide.shape[-2:] image_size = pre_video_guide.shape[-2:]
sample_fit_canvas = None sample_fit_canvas = None
guide_start_frame = prefix_video.shape[1] guide_start_frame = prefix_video.shape[1]
if image_end is not None:
image_end_list= image_end if isinstance(image_end, list) else [image_end]
if len(image_end_list) >= window_no:
new_height, new_width = image_size
image_end_tensor =image_end_list[window_no-1].resize((new_width, new_height), resample=Image.Resampling.LANCZOS)
image_end_tensor = convert_image_to_tensor(image_end_tensor)
image_end_list= None
window_start_frame = guide_start_frame - (reuse_frames if window_no > 1 else source_video_overlap_frames_count) window_start_frame = guide_start_frame - (reuse_frames if window_no > 1 else source_video_overlap_frames_count)
guide_end_frame = guide_start_frame + current_video_length - (source_video_overlap_frames_count if window_no == 1 else reuse_frames) guide_end_frame = guide_start_frame + current_video_length - (source_video_overlap_frames_count if window_no == 1 else reuse_frames)
alignment_shift = source_video_frames_count if reset_control_aligment else 0 alignment_shift = source_video_frames_count if reset_control_aligment else 0
@ -4797,7 +4813,7 @@ def generate_video(
image_size = src_video.shape[-2:] image_size = src_video.shape[-2:]
sample_fit_canvas = None sample_fit_canvas = None
elif "G" in video_prompt_type: # video to video else: # video to video
video_guide_processed = preprocess_video(width = image_size[1], height=image_size[0], video_in=video_guide, max_frames= len(keep_frames_parsed), start_frame = aligned_guide_start_frame, fit_canvas= sample_fit_canvas, target_fps = fps) video_guide_processed = preprocess_video(width = image_size[1], height=image_size[0], video_in=video_guide, max_frames= len(keep_frames_parsed), start_frame = aligned_guide_start_frame, fit_canvas= sample_fit_canvas, target_fps = fps)
if video_guide_processed is None: if video_guide_processed is None:
src_video = pre_video_guide src_video = pre_video_guide
@ -5298,7 +5314,7 @@ def process_tasks(state):
while True: while True:
with gen_lock: with gen_lock:
process_status = gen.get("process_status", None) process_status = gen.get("process_status", None)
if process_status is None: if process_status is None or process_status == "process:main":
gen["process_status"] = "process:main" gen["process_status"] = "process:main"
break break
time.sleep(1) time.sleep(1)
@ -6570,11 +6586,13 @@ def any_letters(source_str, letters):
return True return True
return False return False
def filter_letters(source_str, letters): def filter_letters(source_str, letters, default= ""):
ret = "" ret = ""
for letter in letters: for letter in letters:
if letter in source_str: if letter in source_str:
ret += letter ret += letter
if len(ret) == 0:
return default
return ret return ret
def add_to_sequence(source_str, letters): def add_to_sequence(source_str, letters):
@ -6601,9 +6619,18 @@ def refresh_audio_prompt_type_sources(state, audio_prompt_type, audio_prompt_typ
audio_prompt_type = add_to_sequence(audio_prompt_type, audio_prompt_type_sources) audio_prompt_type = add_to_sequence(audio_prompt_type, audio_prompt_type_sources)
return audio_prompt_type, gr.update(visible = "A" in audio_prompt_type), gr.update(visible = "B" in audio_prompt_type), gr.update(visible = ("B" in audio_prompt_type or "X" in audio_prompt_type)) return audio_prompt_type, gr.update(visible = "A" in audio_prompt_type), gr.update(visible = "B" in audio_prompt_type), gr.update(visible = ("B" in audio_prompt_type or "X" in audio_prompt_type))
def refresh_image_prompt_type(state, image_prompt_type): def refresh_image_prompt_type_radio(state, image_prompt_type, image_prompt_type_radio):
any_video_source = len(filter_letters(image_prompt_type, "VLG"))>0 image_prompt_type = del_in_sequence(image_prompt_type, "VLTS")
return gr.update(visible = "S" in image_prompt_type ), gr.update(visible = "E" in image_prompt_type ), gr.update(visible = "V" in image_prompt_type) , gr.update(visible = any_video_source) image_prompt_type = add_to_sequence(image_prompt_type, image_prompt_type_radio)
any_video_source = len(filter_letters(image_prompt_type, "VL"))>0
end_visible = any_letters(image_prompt_type, "SVL")
return image_prompt_type, gr.update(visible = "S" in image_prompt_type ), gr.update(visible = end_visible and ("E" in image_prompt_type) ), gr.update(visible = "V" in image_prompt_type) , gr.update(visible = any_video_source), gr.update(visible = end_visible)
def refresh_image_prompt_type_endcheckbox(state, image_prompt_type, image_prompt_type_radio, end_checkbox):
image_prompt_type = del_in_sequence(image_prompt_type, "E")
if end_checkbox: image_prompt_type += "E"
image_prompt_type = add_to_sequence(image_prompt_type, image_prompt_type_radio)
return image_prompt_type, gr.update(visible = "E" in image_prompt_type )
def refresh_video_prompt_type_image_refs(state, video_prompt_type, video_prompt_type_image_refs): def refresh_video_prompt_type_image_refs(state, video_prompt_type, video_prompt_type_image_refs):
model_type = state["model_type"] model_type = state["model_type"]
@ -6680,9 +6707,12 @@ def get_prompt_labels(multi_prompts_gen_type, image_outputs = False):
new_line_text = "each new line of prompt will be used for a window" if multi_prompts_gen_type != 0 else "each new line of prompt will generate " + ("a new image" if image_outputs else "a new video") new_line_text = "each new line of prompt will be used for a window" if multi_prompts_gen_type != 0 else "each new line of prompt will generate " + ("a new image" if image_outputs else "a new video")
return "Prompts (" + new_line_text + ", # lines = comments, ! lines = macros)", "Prompts (" + new_line_text + ", # lines = comments)" return "Prompts (" + new_line_text + ", # lines = comments, ! lines = macros)", "Prompts (" + new_line_text + ", # lines = comments)"
def get_image_end_label(multi_prompts_gen_type):
return "Images as ending points for new Videos in the Generation Queue" if multi_prompts_gen_type == 0 else "Images as ending points for each new Window of the same Video Generation"
def refresh_prompt_labels(multi_prompts_gen_type, image_mode): def refresh_prompt_labels(multi_prompts_gen_type, image_mode):
prompt_label, wizard_prompt_label = get_prompt_labels(multi_prompts_gen_type, image_mode == 1) prompt_label, wizard_prompt_label = get_prompt_labels(multi_prompts_gen_type, image_mode == 1)
return gr.update(label=prompt_label), gr.update(label = wizard_prompt_label) return gr.update(label=prompt_label), gr.update(label = wizard_prompt_label), gr.update(label=get_image_end_label(multi_prompts_gen_type))
def show_preview_column_modal(state, column_no): def show_preview_column_modal(state, column_no):
column_no = int(column_no) column_no = int(column_no)
@ -7054,101 +7084,46 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
with gr.Tab("Text to Image", id = "t2i", elem_classes="compact_tab"): with gr.Tab("Text to Image", id = "t2i", elem_classes="compact_tab"):
pass pass
with gr.Column(visible= test_class_i2v(model_type) or hunyuan_i2v or diffusion_forcing or ltxv or recammaster or vace or ti2v_2_2) as image_prompt_column: image_prompt_types_allowed = model_def.get("image_prompt_types_allowed", "")
if vace or infinitetalk: model_mode_choices = model_def.get("model_modes", None)
image_prompt_type_value= ui_defaults.get("image_prompt_type","") with gr.Column(visible= len(image_prompt_types_allowed)> 0 or model_mode_choices is not None) as image_prompt_column:
image_prompt_type_value = "" if image_prompt_type_value == "S" else image_prompt_type_value image_prompt_type_value= ui_defaults.get("image_prompt_type","")
image_prompt_type = gr.Radio( [("New Video", ""),("Continue Video File", "V"),("Continue Last Video", "L")], value =image_prompt_type_value, label="Source Video", show_label= False, visible= not image_outputs , scale= 3) image_prompt_type = gr.Text(value= image_prompt_type_value, visible= False)
image_prompt_type_choices = []
image_start_row, image_start, image_start_extra = get_image_gallery(visible = False ) if "T" in image_prompt_types_allowed:
image_end_row, image_end, image_end_extra = get_image_gallery(visible = False ) image_prompt_type_choices += [("Text Prompt Only", "")]
video_source = gr.Video(label= "Video Source", height = gallery_height, visible = "V" in image_prompt_type_value, value= ui_defaults.get("video_source", None)) any_start_image = True
model_mode = gr.Dropdown(visible = False) if "S" in image_prompt_types_allowed:
keep_frames_video_source = gr.Text(value=ui_defaults.get("keep_frames_video_source","") , visible= len(filter_letters(image_prompt_type_value, "VLG"))>0 , scale = 2, label= "Truncate Video beyond this number of resampled Frames (empty=Keep All, negative truncates from End)" ) image_prompt_type_choices += [("Start Video with Image", "S")]
any_start_image = True
if "V" in image_prompt_types_allowed:
any_video_source = True any_video_source = True
image_prompt_type_choices += [("Continue Video", "V")]
elif diffusion_forcing or ltxv or ti2v_2_2: if "L" in image_prompt_types_allowed:
image_prompt_type_value= ui_defaults.get("image_prompt_type","T") any_video_source = True
# image_prompt_type = gr.Radio( [("Start Video with Image", "S"),("Start and End Video with Images", "SE"), ("Continue Video", "V"),("Text Prompt Only", "T")], value =image_prompt_type_value, label="Location", show_label= False, visible= True, scale= 3) image_prompt_type_choices += [("Continue Last Video", "L")]
image_prompt_type_choices = [("Text Prompt Only", "T"),("Start Video with Image", "S")] with gr.Group(visible= len(image_prompt_types_allowed)>1) as image_prompt_type_group:
if ltxv: with gr.Row():
image_prompt_type_choices += [("Use both a Start and an End Image", "SE")] image_prompt_type_radio_allowed_values= filter_letters(image_prompt_types_allowed, "SVL")
if sliding_window_enabled: if len(image_prompt_type_choices) > 0:
any_video_source = True image_prompt_type_radio = gr.Radio( image_prompt_type_choices, value =filter_letters(image_prompt_type_value, image_prompt_type_radio_allowed_values, image_prompt_type_choices[0][1]), label="Location", show_label= False, visible= len(image_prompt_types_allowed)>1, scale= 3)
image_prompt_type_choices += [("Continue Video", "V")]
image_prompt_type = gr.Radio( image_prompt_type_choices, value =image_prompt_type_value, label="Location", show_label= False, visible= True , scale= 3)
image_start_row, image_start, image_start_extra = get_image_gallery(label= "Images as starting points for new videos", value = ui_defaults.get("image_start", None), visible= "S" in image_prompt_type_value )
image_end_row, image_end, image_end_extra = get_image_gallery(label= "Images as ending points for new videos", value = ui_defaults.get("image_end", None), visible= "E" in image_prompt_type_value )
video_source = gr.Video(label= "Video to Continue", height = gallery_height, visible= "V" in image_prompt_type_value, value= ui_defaults.get("video_source", None),)
if not diffusion_forcing:
model_mode = gr.Dropdown(
choices=[
], value=None,
visible= False
)
else:
model_mode = gr.Dropdown(
choices=[
("Synchronous", 0),
("Asynchronous (better quality but around 50% extra steps added)", 5),
],
value=ui_defaults.get("model_mode", 0),
label="Generation Type", scale = 3,
visible= True
)
keep_frames_video_source = gr.Text(value=ui_defaults.get("keep_frames_video_source","") , visible= "V" in image_prompt_type_value, scale = 2, label= "Truncate Video beyond this number of Frames of Video (empty=Keep All)" )
elif recammaster:
image_prompt_type = gr.Radio(choices=[("Source Video", "V")], value="V")
image_start_row, image_start, image_start_extra = get_image_gallery(visible = False )
image_end_row, image_end, image_end_extra = get_image_gallery(visible = False )
video_source = gr.Video(label= "Video Source", height = gallery_height, visible = True, value= ui_defaults.get("video_source", None),)
model_mode = gr.Dropdown(
choices=[
("Pan Right", 1),
("Pan Left", 2),
("Tilt Up", 3),
("Tilt Down", 4),
("Zoom In", 5),
("Zoom Out", 6),
("Translate Up (with rotation)", 7),
("Translate Down (with rotation)", 8),
("Arc Left (with rotation)", 9),
("Arc Right (with rotation)", 10),
],
value=ui_defaults.get("model_mode", 1),
label="Camera Movement Type", scale = 3,
visible= True
)
keep_frames_video_source = gr.Text(visible=False)
else:
if test_class_i2v(model_type) or hunyuan_i2v:
# image_prompt_type_value= ui_defaults.get("image_prompt_type","SE" if flf2v else "S" )
image_prompt_type_value= ui_defaults.get("image_prompt_type","S" )
image_prompt_type_choices = [("Start Video with Image", "S")]
image_prompt_type_choices += [("Use both a Start and an End Image", "SE")]
if not hunyuan_i2v:
any_video_source = True
image_prompt_type_choices += [("Continue Video", "V")]
image_prompt_type = gr.Radio( image_prompt_type_choices, value =image_prompt_type_value, label="Location", show_label= False, visible= not hunyuan_i2v, scale= 3)
any_start_image = True
any_end_image = True
image_start_row, image_start, image_start_extra = get_image_gallery(label= "Images as starting points for new videos", value = ui_defaults.get("image_start", None), visible= "S" in image_prompt_type_value )
image_end_row, image_end, image_end_extra = get_image_gallery(label= "Images as ending points for new videos", value = ui_defaults.get("image_end", None), visible= "E" in image_prompt_type_value )
if hunyuan_i2v:
video_source = gr.Video(value=None, visible=False)
else: else:
video_source = gr.Video(label= "Video to Continue", height = gallery_height, visible= "V" in image_prompt_type_value, value= ui_defaults.get("video_source", None),) image_prompt_type_radio = gr.Radio(choices=[("", "")], value="", visible= False)
else: if "E" in image_prompt_types_allowed:
image_prompt_type = gr.Radio(choices=[("", "")], value="") image_prompt_type_endcheckbox = gr.Checkbox( value ="E" in image_prompt_type_value, label="End Image(s)", show_label= False, visible= any_letters(image_prompt_type_value, "SVL") and not image_outputs , scale= 1)
image_start_row, image_start, image_start_extra = get_image_gallery(visible = False ) any_end_image = True
image_end_row, image_end, image_end_extra = get_image_gallery(visible = False ) else:
video_source = gr.Video(value=None, visible=False) image_prompt_type_endcheckbox = gr.Checkbox( value =False, show_label= False, visible= False , scale= 1)
image_start_row, image_start, image_start_extra = get_image_gallery(label= "Images as starting points for new Videos in the Generation Queue", value = ui_defaults.get("image_start", None), visible= "S" in image_prompt_type_value )
video_source = gr.Video(label= "Video to Continue", height = gallery_height, visible= "V" in image_prompt_type_value, value= ui_defaults.get("video_source", None),)
image_end_row, image_end, image_end_extra = get_image_gallery(label= get_image_end_label(ui_defaults.get("multi_prompts_gen_type", 0)), value = ui_defaults.get("image_end", None), visible= any_letters(image_prompt_type_value, "SVL") and ("E" in image_prompt_type_value) )
if model_mode_choices is None:
model_mode = gr.Dropdown(value=None, visible=False) model_mode = gr.Dropdown(value=None, visible=False)
keep_frames_video_source = gr.Text(visible=False) else:
model_mode = gr.Dropdown(choices=model_mode_choices["choices"], value=ui_defaults.get("model_mode", model_mode_choices["default"]), label=model_mode_choices["label"], visible=True)
keep_frames_video_source = gr.Text(value=ui_defaults.get("keep_frames_video_source","") , visible= len(filter_letters(image_prompt_type_value, "VL"))>0 , scale = 2, label= "Truncate Video beyond this number of resampled Frames (empty=Keep All, negative truncates from End)" )
with gr.Column(visible= vace or phantom or hunyuan_video_custom or hunyuan_video_avatar or hunyuan_video_custom_edit or t2v or standin or ltxv or infinitetalk or flux and model_reference_image or qwen and model_reference_image) as video_prompt_column: with gr.Column(visible= vace or phantom or hunyuan_video_custom or hunyuan_video_avatar or hunyuan_video_custom_edit or t2v or standin or ltxv or infinitetalk or recammaster or flux and model_reference_image or qwen and model_reference_image) as video_prompt_column:
video_prompt_type_value= ui_defaults.get("video_prompt_type","") video_prompt_type_value= ui_defaults.get("video_prompt_type","")
video_prompt_type = gr.Text(value= video_prompt_type_value, visible= False) video_prompt_type = gr.Text(value= video_prompt_type_value, visible= False)
any_control_video = True any_control_video = True
@ -7208,12 +7183,12 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
value=filter_letters(video_prompt_type_value, "PDSLCMUV"), value=filter_letters(video_prompt_type_value, "PDSLCMUV"),
label="Image to Image" if image_outputs else "Video to Video", scale = 3, visible= True, show_label= True, label="Image to Image" if image_outputs else "Video to Video", scale = 3, visible= True, show_label= True,
) )
elif infinitetalk: elif recammaster:
video_prompt_type_video_guide = gr.Dropdown(value="", choices = [("","")], visible=False) video_prompt_type_video_guide = gr.Dropdown(value="UV", choices = [("Control Video","UV")], visible=False)
else: else:
any_control_video = False any_control_video = False
any_control_image = False any_control_image = False
video_prompt_type_video_guide = gr.Dropdown(visible= False) video_prompt_type_video_guide = gr.Dropdown(value="", choices = [("","")], visible=False)
if infinitetalk: if infinitetalk:
video_prompt_type_video_guide_alt = gr.Dropdown( video_prompt_type_video_guide_alt = gr.Dropdown(
@ -7228,6 +7203,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
value=filter_letters(video_prompt_type_value, "RGUVQKI"), value=filter_letters(video_prompt_type_value, "RGUVQKI"),
label="Video to Video", scale = 3, visible= True, show_label= False, label="Video to Video", scale = 3, visible= True, show_label= False,
) )
any_control_video = any_control_image = True
else: else:
video_prompt_type_video_guide_alt = gr.Dropdown(value="", choices = [("","")], visible=False) video_prompt_type_video_guide_alt = gr.Dropdown(value="", choices = [("","")], visible=False)
@ -7761,7 +7737,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, visible = False) sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, visible = False)
elif ltxv: elif ltxv:
sliding_window_size = gr.Slider(41, get_max_frames(257), value=ui_defaults.get("sliding_window_size", 129), step=8, label="Sliding Window Size") sliding_window_size = gr.Slider(41, get_max_frames(257), value=ui_defaults.get("sliding_window_size", 129), step=8, label="Sliding Window Size")
sliding_window_overlap = gr.Slider(9, 97, value=ui_defaults.get("sliding_window_overlap",9), step=8, label="Windows Frames Overlap (needed to maintain continuity between windows, a higher value will require more windows)") sliding_window_overlap = gr.Slider(1, 97, value=ui_defaults.get("sliding_window_overlap",9), step=8, label="Windows Frames Overlap (needed to maintain continuity between windows, a higher value will require more windows)")
sliding_window_color_correction_strength = gr.Slider(0, 1, visible=False, value =0) sliding_window_color_correction_strength = gr.Slider(0, 1, visible=False, value =0)
sliding_window_overlap_noise = gr.Slider(0, 100, value=ui_defaults.get("sliding_window_overlap_noise",20), step=1, label="Noise to be added to overlapped frames to reduce blur effect", visible = False) sliding_window_overlap_noise = gr.Slider(0, 100, value=ui_defaults.get("sliding_window_overlap_noise",20), step=1, label="Noise to be added to overlapped frames to reduce blur effect", visible = False)
sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=8, label="Discard Last Frames of a Window (that may have bad quality)", visible = True) sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=8, label="Discard Last Frames of a Window (that may have bad quality)", visible = True)
@ -7772,8 +7748,9 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
sliding_window_overlap_noise = gr.Slider(0, 150, value=ui_defaults.get("sliding_window_overlap_noise",20), step=1, label="Noise to be added to overlapped frames to reduce blur effect", visible = False) sliding_window_overlap_noise = gr.Slider(0, 150, value=ui_defaults.get("sliding_window_overlap_noise",20), step=1, label="Noise to be added to overlapped frames to reduce blur effect", visible = False)
sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, label="Discard Last Frames of a Window (that may have bad quality)", visible = True) sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, label="Discard Last Frames of a Window (that may have bad quality)", visible = True)
else: # Vace, Multitalk else: # Vace, Multitalk
sliding_window_defaults = model_def.get("sliding_window_defaults", {})
sliding_window_size = gr.Slider(5, get_max_frames(257), value=ui_defaults.get("sliding_window_size", 129), step=4, label="Sliding Window Size") sliding_window_size = gr.Slider(5, get_max_frames(257), value=ui_defaults.get("sliding_window_size", 129), step=4, label="Sliding Window Size")
sliding_window_overlap = gr.Slider(1, 97, value=ui_defaults.get("sliding_window_overlap",5), step=4, label="Windows Frames Overlap (needed to maintain continuity between windows, a higher value will require more windows)") sliding_window_overlap = gr.Slider(sliding_window_defaults.get("overlap_min", 1), sliding_window_defaults.get("overlap_max", 97), value=ui_defaults.get("sliding_window_overlap",sliding_window_defaults.get("overlap_default", 5)), step=sliding_window_defaults.get("overlap_step", 4), label="Windows Frames Overlap (needed to maintain continuity between windows, a higher value will require more windows)")
sliding_window_color_correction_strength = gr.Slider(0, 1, value=ui_defaults.get("sliding_window_color_correction_strength",1), step=0.01, label="Color Correction Strength (match colors of new window with previous one, 0 = disabled)") sliding_window_color_correction_strength = gr.Slider(0, 1, value=ui_defaults.get("sliding_window_color_correction_strength",1), step=0.01, label="Color Correction Strength (match colors of new window with previous one, 0 = disabled)")
sliding_window_overlap_noise = gr.Slider(0, 150, value=ui_defaults.get("sliding_window_overlap_noise",20 if vace else 0), step=1, label="Noise to be added to overlapped frames to reduce blur effect" , visible = vace) sliding_window_overlap_noise = gr.Slider(0, 150, value=ui_defaults.get("sliding_window_overlap_noise",20 if vace else 0), step=1, label="Noise to be added to overlapped frames to reduce blur effect" , visible = vace)
sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, label="Discard Last Frames of a Window (that may have bad quality)", visible = True) sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, label="Discard Last Frames of a Window (that may have bad quality)", visible = True)
@ -7790,13 +7767,13 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
multi_prompts_gen_type = gr.Dropdown( multi_prompts_gen_type = gr.Dropdown(
choices=[ choices=[
("Will create new generated Video", 0), ("Will create a new generated Video added to the Generation Queue", 0),
("Will be used for a new Sliding Window of the same Video Generation", 1), ("Will be used for a new Sliding Window of the same Video Generation", 1),
], ],
value=ui_defaults.get("multi_prompts_gen_type",0), value=ui_defaults.get("multi_prompts_gen_type",0),
visible=True, visible=True,
scale = 1, scale = 1,
label="Text Prompts separated by a Carriage Return" label="Images & Text Prompts separated by a Carriage Return" if (any_start_image or any_end_image) else "Text Prompts separated by a Carriage Return"
) )
with gr.Tab("Misc.", visible = True) as misc_tab: with gr.Tab("Misc.", visible = True) as misc_tab:
@ -7962,7 +7939,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
hidden_countdown_state = gr.Number(value=-1, visible=False, elem_id="hidden_countdown_state_num") hidden_countdown_state = gr.Number(value=-1, visible=False, elem_id="hidden_countdown_state_num")
single_hidden_trigger_btn = gr.Button("trigger_countdown", visible=False, elem_id="trigger_info_single_btn") single_hidden_trigger_btn = gr.Button("trigger_countdown", visible=False, elem_id="trigger_info_single_btn")
extra_inputs = prompt_vars + [wizard_prompt, wizard_variables_var, wizard_prompt_activated_var, video_prompt_column, image_prompt_column, extra_inputs = prompt_vars + [wizard_prompt, wizard_variables_var, wizard_prompt_activated_var, video_prompt_column, image_prompt_column, image_prompt_type_group, image_prompt_type_radio, image_prompt_type_endcheckbox,
prompt_column_advanced, prompt_column_wizard_vars, prompt_column_wizard, lset_name, save_lset_prompt_drop, advanced_row, speed_tab, audio_tab, mmaudio_col, quality_tab, prompt_column_advanced, prompt_column_wizard_vars, prompt_column_wizard, lset_name, save_lset_prompt_drop, advanced_row, speed_tab, audio_tab, mmaudio_col, quality_tab,
sliding_window_tab, misc_tab, prompt_enhancer_row, inference_steps_row, skip_layer_guidance_row, audio_guide_row, RIFLEx_setting_col, sliding_window_tab, misc_tab, prompt_enhancer_row, inference_steps_row, skip_layer_guidance_row, audio_guide_row, RIFLEx_setting_col,
video_prompt_type_video_guide, video_prompt_type_video_guide_alt, video_prompt_type_video_mask, video_prompt_type_image_refs, apg_col, audio_prompt_type_sources, audio_prompt_type_remux, audio_prompt_type_remux_row, video_prompt_type_video_guide, video_prompt_type_video_guide_alt, video_prompt_type_video_mask, video_prompt_type_image_refs, apg_col, audio_prompt_type_sources, audio_prompt_type_remux, audio_prompt_type_remux_row,
@ -7981,23 +7958,24 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
target_settings = gr.Text(value = "settings", interactive= False, visible= False) target_settings = gr.Text(value = "settings", interactive= False, visible= False)
last_choice = gr.Number(value =-1, interactive= False, visible= False) last_choice = gr.Number(value =-1, interactive= False, visible= False)
resolution_group.input(fn=change_resolution_group, inputs=[state, resolution_group], outputs=[resolution]) resolution_group.input(fn=change_resolution_group, inputs=[state, resolution_group], outputs=[resolution], show_progress="hidden")
resolution.change(fn=record_last_resolution, inputs=[state, resolution]) resolution.change(fn=record_last_resolution, inputs=[state, resolution])
# video_length.release(fn=refresh_video_length_label, inputs=[state, video_length ], outputs = video_length, trigger_mode="always_last" ) # video_length.release(fn=refresh_video_length_label, inputs=[state, video_length ], outputs = video_length, trigger_mode="always_last" )
gr.on(triggers=[video_length.release, force_fps.change, video_guide.change, video_source.change], fn=refresh_video_length_label, inputs=[state, video_length, force_fps, video_guide, video_source] , outputs = video_length, trigger_mode="always_last" ) gr.on(triggers=[video_length.release, force_fps.change, video_guide.change, video_source.change], fn=refresh_video_length_label, inputs=[state, video_length, force_fps, video_guide, video_source] , outputs = video_length, trigger_mode="always_last", show_progress="hidden" )
guidance_phases.change(fn=change_guidance_phases, inputs= [state, guidance_phases], outputs =[model_switch_phase, guidance_phases_row, switch_threshold, switch_threshold2, guidance2_scale, guidance3_scale ]) guidance_phases.change(fn=change_guidance_phases, inputs= [state, guidance_phases], outputs =[model_switch_phase, guidance_phases_row, switch_threshold, switch_threshold2, guidance2_scale, guidance3_scale ])
audio_prompt_type_remux.change(fn=refresh_audio_prompt_type_remux, inputs=[state, audio_prompt_type, audio_prompt_type_remux], outputs=[audio_prompt_type]) audio_prompt_type_remux.change(fn=refresh_audio_prompt_type_remux, inputs=[state, audio_prompt_type, audio_prompt_type_remux], outputs=[audio_prompt_type])
audio_prompt_type_sources.change(fn=refresh_audio_prompt_type_sources, inputs=[state, audio_prompt_type, audio_prompt_type_sources], outputs=[audio_prompt_type, audio_guide, audio_guide2, speakers_locations_row]) audio_prompt_type_sources.change(fn=refresh_audio_prompt_type_sources, inputs=[state, audio_prompt_type, audio_prompt_type_sources], outputs=[audio_prompt_type, audio_guide, audio_guide2, speakers_locations_row])
image_prompt_type.change(fn=refresh_image_prompt_type, inputs=[state, image_prompt_type], outputs=[image_start_row, image_end_row, video_source, keep_frames_video_source] ) image_prompt_type_radio.change(fn=refresh_image_prompt_type_radio, inputs=[state, image_prompt_type, image_prompt_type_radio], outputs=[image_prompt_type, image_start_row, image_end_row, video_source, keep_frames_video_source, image_prompt_type_endcheckbox], show_progress="hidden" )
image_prompt_type_endcheckbox.change(fn=refresh_image_prompt_type_endcheckbox, inputs=[state, image_prompt_type, image_prompt_type_radio, image_prompt_type_endcheckbox], outputs=[image_prompt_type, image_end_row] )
# video_prompt_video_guide_trigger.change(fn=refresh_video_prompt_video_guide_trigger, inputs=[state, video_prompt_type, video_prompt_video_guide_trigger], outputs=[video_prompt_type, video_prompt_type_video_guide, video_guide, keep_frames_video_guide, denoising_strength, video_guide_outpainting_col, video_prompt_type_video_mask, video_mask, mask_expand]) # video_prompt_video_guide_trigger.change(fn=refresh_video_prompt_video_guide_trigger, inputs=[state, video_prompt_type, video_prompt_video_guide_trigger], outputs=[video_prompt_type, video_prompt_type_video_guide, video_guide, keep_frames_video_guide, denoising_strength, video_guide_outpainting_col, video_prompt_type_video_mask, video_mask, mask_expand])
video_prompt_type_image_refs.input(fn=refresh_video_prompt_type_image_refs, inputs = [state, video_prompt_type, video_prompt_type_image_refs], outputs = [video_prompt_type, image_refs_row, remove_background_images_ref, image_refs_relative_size, frames_positions,video_guide_outpainting_col]) video_prompt_type_image_refs.input(fn=refresh_video_prompt_type_image_refs, inputs = [state, video_prompt_type, video_prompt_type_image_refs], outputs = [video_prompt_type, image_refs_row, remove_background_images_ref, image_refs_relative_size, frames_positions,video_guide_outpainting_col])
video_prompt_type_video_guide.input(fn=refresh_video_prompt_type_video_guide, inputs = [state, video_prompt_type, video_prompt_type_video_guide, image_mode], outputs = [video_prompt_type, video_guide, image_guide, keep_frames_video_guide, denoising_strength, video_guide_outpainting_col, video_prompt_type_video_mask, video_mask, image_mask, mask_expand]) video_prompt_type_video_guide.input(fn=refresh_video_prompt_type_video_guide, inputs = [state, video_prompt_type, video_prompt_type_video_guide, image_mode], outputs = [video_prompt_type, video_guide, image_guide, keep_frames_video_guide, denoising_strength, video_guide_outpainting_col, video_prompt_type_video_mask, video_mask, image_mask, mask_expand])
video_prompt_type_video_guide_alt.input(fn=refresh_video_prompt_type_video_guide_alt, inputs = [state, video_prompt_type, video_prompt_type_video_guide_alt], outputs = [video_prompt_type, video_guide, image_refs_row, denoising_strength ]) video_prompt_type_video_guide_alt.input(fn=refresh_video_prompt_type_video_guide_alt, inputs = [state, video_prompt_type, video_prompt_type_video_guide_alt], outputs = [video_prompt_type, video_guide, image_refs_row, denoising_strength ])
video_prompt_type_video_mask.input(fn=refresh_video_prompt_type_video_mask, inputs = [state, video_prompt_type, video_prompt_type_video_mask, image_mode], outputs = [video_prompt_type, video_mask, image_mask, mask_expand]) video_prompt_type_video_mask.input(fn=refresh_video_prompt_type_video_mask, inputs = [state, video_prompt_type, video_prompt_type_video_mask, image_mode], outputs = [video_prompt_type, video_mask, image_mask, mask_expand])
video_prompt_type_alignment.input(fn=refresh_video_prompt_type_alignment, inputs = [state, video_prompt_type, video_prompt_type_alignment], outputs = [video_prompt_type]) video_prompt_type_alignment.input(fn=refresh_video_prompt_type_alignment, inputs = [state, video_prompt_type, video_prompt_type_alignment], outputs = [video_prompt_type])
multi_prompts_gen_type.select(fn=refresh_prompt_labels, inputs=[multi_prompts_gen_type, image_mode], outputs=[prompt, wizard_prompt]) multi_prompts_gen_type.select(fn=refresh_prompt_labels, inputs=[multi_prompts_gen_type, image_mode], outputs=[prompt, wizard_prompt, image_end], show_progress="hidden")
video_guide_outpainting_top.input(fn=update_video_guide_outpainting, inputs=[video_guide_outpainting, video_guide_outpainting_top, gr.State(0)], outputs = [video_guide_outpainting], trigger_mode="multiple" ) video_guide_outpainting_top.input(fn=update_video_guide_outpainting, inputs=[video_guide_outpainting, video_guide_outpainting_top, gr.State(0)], outputs = [video_guide_outpainting], trigger_mode="multiple" )
video_guide_outpainting_bottom.input(fn=update_video_guide_outpainting, inputs=[video_guide_outpainting, video_guide_outpainting_bottom,gr.State(1)], outputs = [video_guide_outpainting], trigger_mode="multiple" ) video_guide_outpainting_bottom.input(fn=update_video_guide_outpainting, inputs=[video_guide_outpainting, video_guide_outpainting_bottom,gr.State(1)], outputs = [video_guide_outpainting], trigger_mode="multiple" )
video_guide_outpainting_left.input(fn=update_video_guide_outpainting, inputs=[video_guide_outpainting, video_guide_outpainting_left,gr.State(2)], outputs = [video_guide_outpainting], trigger_mode="multiple" ) video_guide_outpainting_left.input(fn=update_video_guide_outpainting, inputs=[video_guide_outpainting, video_guide_outpainting_left,gr.State(2)], outputs = [video_guide_outpainting], trigger_mode="multiple" )