import os
os.environ["GRADIO_LANG"] = "en"
# # os.environ.pop("TORCH_LOGS", None) # make sure no env var is suppressing/overriding
# os.environ["TORCH_LOGS"]= "recompiles"
import torch._logging as tlog
# tlog.set_logs(recompiles=True, guards=True, graph_breaks=True)
import time
import sys
import threading
import argparse
from mmgp import offload, safetensors2, profile_type
try:
import triton
except ImportError:
pass
from pathlib import Path
from datetime import datetime
import gradio as gr
import random
import json
import numpy as np
import importlib
from shared.utils import notification_sound
from shared.utils.loras_mutipliers import preparse_loras_multipliers, parse_loras_multipliers
from shared.utils.utils import convert_tensor_to_image, save_image, get_video_info, get_file_creation_date, convert_image_to_video, calculate_new_dimensions, convert_image_to_tensor, calculate_dimensions_and_resize_image, rescale_and_crop, get_video_frame, resize_and_remove_background, rgb_bw_to_rgba_mask
from shared.utils.utils import calculate_new_dimensions, get_outpainting_frame_location, get_outpainting_full_area_dimensions
from shared.utils.utils import has_video_file_extension, has_image_file_extension
from shared.utils.audio_video import extract_audio_tracks, combine_video_with_audio_tracks, combine_and_concatenate_video_with_audio_tracks, cleanup_temp_audio_files, save_video, save_image
from shared.utils.audio_video import save_image_metadata, read_image_metadata
from shared.match_archi import match_nvidia_architecture
from shared.attention import get_attention_modes, get_supported_attention_modes
from huggingface_hub import hf_hub_download, snapshot_download
import torch
import gc
import traceback
import math
import typing
import asyncio
import inspect
from shared.utils import prompt_parser
import base64
import io
from PIL import Image
import zipfile
import tempfile
import atexit
import shutil
import glob
import cv2
from transformers.utils import logging
logging.set_verbosity_error
from preprocessing.matanyone import app as matanyone_app
from tqdm import tqdm
import requests
from shared.gradio.gallery import AdvancedMediaGallery
# import torch._dynamo as dynamo
# dynamo.config.recompile_limit = 2000 # default is 256
# dynamo.config.accumulated_recompile_limit = 2000 # or whatever limit you want
global_queue_ref = []
AUTOSAVE_FILENAME = "queue.zip"
PROMPT_VARS_MAX = 10
target_mmgp_version = "3.6.0"
WanGP_version = "8.73"
settings_version = 2.36
max_source_video_frames = 3000
prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None
from importlib.metadata import version
mmgp_version = version("mmgp")
if mmgp_version != target_mmgp_version:
print(f"Incorrect version of mmgp ({mmgp_version}), version {target_mmgp_version} is needed. Please upgrade with the command 'pip install -r requirements.txt'")
exit()
lock = threading.Lock()
current_task_id = None
task_id = 0
vmc_event_handler = matanyone_app.get_vmc_event_handler()
unique_id = 0
unique_id_lock = threading.Lock()
gen_lock = threading.Lock()
offloadobj = enhancer_offloadobj = wan_model = None
reload_needed = True
def clear_gen_cache():
if "_cache" in offload.shared_state:
del offload.shared_state["_cache"]
def release_model():
global wan_model, offloadobj, reload_needed
wan_model = None
clear_gen_cache()
offload.shared_state
if offloadobj is not None:
offloadobj.release()
offloadobj = None
torch.cuda.empty_cache()
gc.collect()
try:
torch._C._host_emptyCache()
except:
pass
reload_needed = True
else:
gc.collect()
def get_unique_id():
global unique_id
with unique_id_lock:
unique_id += 1
return str(time.time()+unique_id)
def download_ffmpeg():
if os.name != 'nt': return
exes = ['ffmpeg.exe', 'ffprobe.exe', 'ffplay.exe']
if all(os.path.exists(e) for e in exes): return
api_url = 'https://api.github.com/repos/GyanD/codexffmpeg/releases/latest'
r = requests.get(api_url, headers={'Accept': 'application/vnd.github+json'})
assets = r.json().get('assets', [])
zip_asset = next((a for a in assets if 'essentials_build.zip' in a['name']), None)
if not zip_asset: return
zip_url = zip_asset['browser_download_url']
zip_name = zip_asset['name']
with requests.get(zip_url, stream=True) as resp:
total = int(resp.headers.get('Content-Length', 0))
with open(zip_name, 'wb') as f, tqdm(total=total, unit='B', unit_scale=True) as pbar:
for chunk in resp.iter_content(chunk_size=8192):
f.write(chunk)
pbar.update(len(chunk))
with zipfile.ZipFile(zip_name) as z:
for f in z.namelist():
if f.endswith(tuple(exes)) and '/bin/' in f:
z.extract(f)
os.rename(f, os.path.basename(f))
os.remove(zip_name)
def format_time(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
if hours > 0:
return f"{hours}h {minutes:02d}m {secs:02d}s"
elif seconds >= 60:
return f"{minutes}m {secs:02d}s"
else:
return f"{seconds:.1f}s"
def pil_to_base64_uri(pil_image, format="png", quality=75):
if pil_image is None:
return None
if isinstance(pil_image, str):
from shared.utils.utils import get_video_frame
pil_image = get_video_frame(pil_image, 0)
buffer = io.BytesIO()
try:
img_to_save = pil_image
if format.lower() == 'jpeg' and pil_image.mode == 'RGBA':
img_to_save = pil_image.convert('RGB')
elif format.lower() == 'png' and pil_image.mode not in ['RGB', 'RGBA', 'L', 'P']:
img_to_save = pil_image.convert('RGBA')
elif pil_image.mode == 'P':
img_to_save = pil_image.convert('RGBA' if 'transparency' in pil_image.info else 'RGB')
if format.lower() == 'jpeg':
img_to_save.save(buffer, format=format, quality=quality)
else:
img_to_save.save(buffer, format=format)
img_bytes = buffer.getvalue()
encoded_string = base64.b64encode(img_bytes).decode("utf-8")
return f"data:image/{format.lower()};base64,{encoded_string}"
except Exception as e:
print(f"Error converting PIL to base64: {e}")
return None
def is_integer(n):
try:
float(n)
except ValueError:
return False
else:
return float(n).is_integer()
def compute_sliding_window_no(current_video_length, sliding_window_size, discard_last_frames, reuse_frames):
left_after_first_window = current_video_length - sliding_window_size + discard_last_frames
return 1 + math.ceil(left_after_first_window / (sliding_window_size - discard_last_frames - reuse_frames))
def clean_image_list(gradio_list):
if not isinstance(gradio_list, list): gradio_list = [gradio_list]
gradio_list = [ tup[0] if isinstance(tup, tuple) else tup for tup in gradio_list ]
if any( not isinstance(image, (Image.Image, str)) for image in gradio_list): return None
if any( isinstance(image, str) and not has_image_file_extension(image) for image in gradio_list): return None
gradio_list = [ convert_image( Image.open(img) if isinstance(img, str) else img ) for img in gradio_list ]
return gradio_list
def cancel_edit(state):
state["editing_task_index"] = None
gr.Info("Edit cancelled.")
return gr.Tabs(selected="video_gen")
def edit_task_in_queue(
lset_name,
image_mode,
prompt,
negative_prompt,
resolution,
video_length,
batch_size,
seed,
force_fps,
num_inference_steps,
guidance_scale,
guidance2_scale,
guidance3_scale,
switch_threshold,
switch_threshold2,
guidance_phases,
model_switch_phase,
audio_guidance_scale,
flow_shift,
sample_solver,
embedded_guidance_scale,
repeat_generation,
multi_prompts_gen_type,
multi_images_gen_type,
skip_steps_cache_type,
skip_steps_multiplier,
skip_steps_start_step_perc,
loras_choices,
loras_multipliers,
image_prompt_type,
image_start,
image_end,
model_mode,
video_source,
keep_frames_video_source,
video_guide_outpainting,
video_prompt_type,
image_refs,
frames_positions,
video_guide,
image_guide,
keep_frames_video_guide,
denoising_strength,
video_mask,
image_mask,
control_net_weight,
control_net_weight2,
mask_expand,
audio_guide,
audio_guide2,
audio_source,
audio_prompt_type,
speakers_locations,
sliding_window_size,
sliding_window_overlap,
sliding_window_color_correction_strength,
sliding_window_overlap_noise,
sliding_window_discard_last_frames,
image_refs_relative_size,
remove_background_images_ref,
temporal_upsampling,
spatial_upsampling,
film_grain_intensity,
film_grain_saturation,
MMAudio_setting,
MMAudio_prompt,
MMAudio_neg_prompt,
RIFLEx_setting,
NAG_scale,
NAG_tau,
NAG_alpha,
slg_switch,
slg_layers,
slg_start_perc,
slg_end_perc,
apg_switch,
cfg_star_switch,
cfg_zero_step,
prompt_enhancer,
min_frames_if_references,
override_profile,
mode,
state,
):
new_inputs = get_function_arguments(edit_task_in_queue, locals())
new_inputs.pop('lset_name', None)
if 'loras_choices' in new_inputs:
all_loras = state.get("loras", [])
lora_indices = new_inputs.pop('loras_choices')
activated_lora_filenames = [Path(all_loras[int(index)]).name for index in lora_indices]
new_inputs['activated_loras'] = activated_lora_filenames
gen = get_gen_info(state)
queue = gen.get("queue", [])
editing_task_index = state.get("editing_task_index", None)
if editing_task_index is None:
gr.Warning("No task selected for editing.")
return update_queue_data(queue), gr.Tabs(selected="video_gen")
task_to_edit_index = editing_task_index + 1
if task_to_edit_index >= len(queue):
gr.Warning("Task index out of bounds. Cannot edit.")
return update_queue_data(queue), gr.Tabs(selected="video_gen")
task_to_edit = queue[task_to_edit_index]
original_params = task_to_edit['params'].copy()
media_keys = [
"image_start", "image_end", "image_refs", "video_source",
"video_guide", "image_guide", "video_mask", "image_mask",
"audio_guide", "audio_guide2", "audio_source"
]
multi_image_keys = ["image_refs"]
single_image_keys = ["image_start", "image_end"]
for key, new_value in new_inputs.items():
if key in media_keys:
if new_value is not None:
if key in multi_image_keys:
cleaned_value = clean_image_list(new_value)
original_params[key] = cleaned_value
elif key in single_image_keys:
cleaned_list = clean_image_list(new_value)
original_params[key] = cleaned_list[0] if cleaned_list else None
else:
original_params[key] = new_value
else:
original_params[key] = new_value
task_to_edit['params'] = original_params
task_to_edit['prompt'] = new_inputs.get('prompt')
task_to_edit['length'] = new_inputs.get('video_length')
task_to_edit['steps'] = new_inputs.get('num_inference_steps')
update_task_thumbnails(task_to_edit, original_params)
gr.Info(f"Task ID {task_to_edit['id']} has been updated successfully.")
state["editing_task_index"] = None
return update_queue_data(queue), gr.Tabs(selected="video_gen")
def process_prompt_and_add_tasks(state, model_choice):
def ret():
return gr.update(), gr.update()
if state.get("validate_success",0) != 1:
ret()
state["validate_success"] = 0
model_filename = state["model_filename"]
model_type = state["model_type"]
inputs = get_model_settings(state, model_type)
if model_choice != model_type or inputs ==None:
raise gr.Error("Webform can not be used as the App has been restarted since the form was displayed. Please refresh the page")
inputs["state"] = state
gen = get_gen_info(state)
inputs["model_type"] = model_type
inputs.pop("lset_name")
if inputs == None:
gr.Warning("Internal state error: Could not retrieve inputs for the model.")
queue = gen.get("queue", [])
return ret()
model_def = get_model_def(model_type)
model_handler = get_model_handler(model_type)
image_outputs = inputs["image_mode"] > 0
any_steps_skipping = model_def.get("tea_cache", False) or model_def.get("mag_cache", False)
model_type = get_base_model_type(model_type)
inputs["model_filename"] = model_filename
mode = inputs["mode"]
if mode.startswith("edit_"):
edit_video_source =gen.get("edit_video_source", None)
edit_overrides =gen.get("edit_overrides", None)
_ , _ , _, frames_count = get_video_info(edit_video_source)
if frames_count > max_source_video_frames:
gr.Info(f"Post processing is not supported on videos longer than {max_source_video_frames} frames. Output Video will be truncated")
# return
for k in ["image_start", "image_end", "image_refs", "video_guide", "audio_guide", "audio_guide2", "audio_source" , "video_mask", "image_mask"]:
inputs[k] = None
inputs.update(edit_overrides)
del gen["edit_video_source"], gen["edit_overrides"]
inputs["video_source"]= edit_video_source
prompt = []
spatial_upsampling = inputs.get("spatial_upsampling","")
if len(spatial_upsampling) >0: prompt += ["Spatial Upsampling"]
temporal_upsampling = inputs.get("temporal_upsampling","")
if len(temporal_upsampling) >0: prompt += ["Temporal Upsampling"]
if has_image_file_extension(edit_video_source) and len(temporal_upsampling) > 0:
gr.Info("Temporal Upsampling can not be used with an Image")
return ret()
film_grain_intensity = inputs.get("film_grain_intensity",0)
film_grain_saturation = inputs.get("film_grain_saturation",0.5)
# if film_grain_intensity >0: prompt += [f"Film Grain: intensity={film_grain_intensity}, saturation={film_grain_saturation}"]
if film_grain_intensity >0: prompt += ["Film Grain"]
MMAudio_setting = inputs.get("MMAudio_setting",0)
repeat_generation= inputs.get("repeat_generation",1)
if mode =="edit_remux":
audio_source = inputs["audio_source"]
if MMAudio_setting== 1:
prompt += ["MMAudio"]
audio_source = None
inputs["audio_source"] = audio_source
else:
if audio_source is None:
gr.Info("You must provide a custom Audio")
return ret()
prompt += ["Custom Audio"]
repeat_generation == 1
seed = inputs.get("seed",None)
if len(prompt) == 0:
if mode=="edit_remux":
gr.Info("You must choose at least one Remux Method")
else:
gr.Info("You must choose at least one Post Processing Method")
return ret()
inputs["prompt"] = ", ".join(prompt)
add_video_task(**inputs)
gen["prompts_max"] = 1 + gen.get("prompts_max",0)
state["validate_success"] = 1
queue= gen.get("queue", [])
return ret()
if hasattr(model_handler, "validate_generative_settings"):
error = model_handler.validate_generative_settings(model_type, model_def, inputs)
if error is not None and len(error) > 0:
gr.Info(error)
return ret()
if inputs.get("cfg_star_switch", 0) != 0 and inputs.get("apg_switch", 0) != 0:
gr.Info("Adaptive Progressive Guidance and Classifier Free Guidance Star can not be set at the same time")
return ret()
prompt = inputs["prompt"]
if len(prompt) ==0:
gr.Info("Prompt cannot be empty.")
gen = get_gen_info(state)
queue = gen.get("queue", [])
return ret()
prompt, errors = prompt_parser.process_template(prompt)
if len(errors) > 0:
gr.Info("Error processing prompt template: " + errors)
return ret()
model_filename = get_model_filename(model_type)
prompts = prompt.replace("\r", "").split("\n")
prompts = [prompt.strip() for prompt in prompts if len(prompt.strip())>0 and not prompt.startswith("#")]
if len(prompts) == 0:
gr.Info("Prompt cannot be empty.")
gen = get_gen_info(state)
queue = gen.get("queue", [])
return ret()
resolution = inputs["resolution"]
width, height = resolution.split("x")
width, height = int(width), int(height)
image_start = inputs["image_start"]
image_end = inputs["image_end"]
image_refs = inputs["image_refs"]
image_prompt_type = inputs["image_prompt_type"]
audio_prompt_type = inputs["audio_prompt_type"]
if image_prompt_type == None: image_prompt_type = ""
video_prompt_type = inputs["video_prompt_type"]
if video_prompt_type == None: video_prompt_type = ""
force_fps = inputs["force_fps"]
audio_guide = inputs["audio_guide"]
audio_guide2 = inputs["audio_guide2"]
audio_source = inputs["audio_source"]
video_guide = inputs["video_guide"]
image_guide = inputs["image_guide"]
video_mask = inputs["video_mask"]
image_mask = inputs["image_mask"]
speakers_locations = inputs["speakers_locations"]
video_source = inputs["video_source"]
frames_positions = inputs["frames_positions"]
keep_frames_video_guide= inputs["keep_frames_video_guide"]
keep_frames_video_source = inputs["keep_frames_video_source"]
denoising_strength= inputs["denoising_strength"]
sliding_window_size = inputs["sliding_window_size"]
sliding_window_overlap = inputs["sliding_window_overlap"]
sliding_window_discard_last_frames = inputs["sliding_window_discard_last_frames"]
video_length = inputs["video_length"]
num_inference_steps= inputs["num_inference_steps"]
skip_steps_cache_type= inputs["skip_steps_cache_type"]
MMAudio_setting = inputs["MMAudio_setting"]
image_mode = inputs["image_mode"]
switch_threshold = inputs["switch_threshold"]
loras_multipliers = inputs["loras_multipliers"]
activated_loras = inputs["activated_loras"]
guidance_phases= inputs["guidance_phases"]
model_switch_phase = inputs["model_switch_phase"]
switch_threshold = inputs["switch_threshold"]
switch_threshold2 = inputs["switch_threshold2"]
multi_prompts_gen_type = inputs["multi_prompts_gen_type"]
video_guide_outpainting = inputs["video_guide_outpainting"]
outpainting_dims = get_outpainting_dims(video_guide_outpainting)
if server_config.get("fit_canvas", 0) == 2 and outpainting_dims is not None and any_letters(video_prompt_type, "VKF"):
gr.Info("Output Resolution Cropping will be not used for this Generation as it is not compatible with Video Outpainting")
if len(loras_multipliers) > 0:
_, _, errors = parse_loras_multipliers(loras_multipliers, len(activated_loras), num_inference_steps, nb_phases= guidance_phases)
if len(errors) > 0:
gr.Info(f"Error parsing Loras Multipliers: {errors}")
return ret()
if guidance_phases == 3:
if switch_threshold < switch_threshold2:
gr.Info(f"Phase 1-2 Switch Noise Level ({switch_threshold}) should be Greater than Phase 2-3 Switch Noise Level ({switch_threshold2}). As a reminder, noise will gradually go down from 1000 to 0.")
return ret()
else:
model_switch_phase = 1
if not any_steps_skipping: skip_steps_cache_type = ""
if not model_def.get("lock_inference_steps", False) and model_type in ["ltxv_13B"] and num_inference_steps < 20:
gr.Info("The minimum number of steps should be 20")
return ret()
if skip_steps_cache_type == "mag":
if num_inference_steps > 50:
gr.Info("Mag Cache maximum number of steps is 50")
return ret()
if image_mode > 0:
audio_prompt_type = ""
if "B" in audio_prompt_type or "X" in audio_prompt_type:
from models.wan.multitalk.multitalk import parse_speakers_locations
speakers_bboxes, error = parse_speakers_locations(speakers_locations)
if len(error) > 0:
gr.Info(error)
return ret()
if MMAudio_setting != 0 and server_config.get("mmaudio_enabled", 0) != 0 and video_length <16: #should depend on the architecture
gr.Info("MMAudio can generate an Audio track only if the Video is at least 1s long")
if "F" in video_prompt_type:
if len(frames_positions.strip()) > 0:
positions = frames_positions.replace(","," ").split(" ")
for pos_str in positions:
if not pos_str in ["L", "l"] and len(pos_str)>0:
if not is_integer(pos_str):
gr.Info(f"Invalid Frame Position '{pos_str}'")
return ret()
pos = int(pos_str)
if pos <1 or pos > max_source_video_frames:
gr.Info(f"Invalid Frame Position Value'{pos_str}'")
return ret()
else:
frames_positions = None
if audio_source is not None and MMAudio_setting != 0:
gr.Info("MMAudio and Custom Audio Soundtrack can't not be used at the same time")
return ret()
if len(filter_letters(image_prompt_type, "VLG")) > 0 and len(keep_frames_video_source) > 0:
if not is_integer(keep_frames_video_source) or int(keep_frames_video_source) == 0:
gr.Info("The number of frames to keep must be a non null integer")
return ret()
else:
keep_frames_video_source = ""
if image_outputs:
image_prompt_type = image_prompt_type.replace("V", "").replace("L", "")
if "V" in image_prompt_type:
if video_source == None:
gr.Info("You must provide a Source Video file to continue")
return ret()
else:
video_source = None
if "A" in audio_prompt_type:
if audio_guide == None:
gr.Info("You must provide an Audio Source")
return ret()
if "B" in audio_prompt_type:
if audio_guide2 == None:
gr.Info("You must provide a second Audio Source")
return ret()
else:
audio_guide2 = None
else:
audio_guide = None
audio_guide2 = None
if model_type in ["vace_multitalk_14B"] and ("B" in audio_prompt_type or "X" in audio_prompt_type):
if not "I" in video_prompt_type and not not "V" in video_prompt_type:
gr.Info("To get good results with Multitalk and two people speaking, it is recommended to set a Reference Frame or a Control Video (potentially truncated) that contains the two people one on each side")
if model_def.get("one_image_ref_needed", False):
if image_refs == None :
gr.Info("You must provide an Image Reference")
return ret()
if len(image_refs) > 1:
gr.Info("Only one Image Reference (a person) is supported for the moment by this model")
return ret()
if model_def.get("at_least_one_image_ref_needed", False):
if image_refs == None :
gr.Info("You must provide at least one Image Reference")
return ret()
if "I" in video_prompt_type:
if image_refs == None or len(image_refs) == 0:
gr.Info("You must provide at least one Reference Image")
return ret()
image_refs = clean_image_list(image_refs)
if image_refs == None :
gr.Info("A Reference Image should be an Image")
return ret()
else:
image_refs = None
if "V" in video_prompt_type:
if image_outputs:
if image_guide is None:
gr.Info("You must provide a Control Image")
return ret()
else:
if video_guide is None:
gr.Info("You must provide a Control Video")
return ret()
if "A" in video_prompt_type and not "U" in video_prompt_type:
if image_outputs:
if image_mask is None:
gr.Info("You must provide a Image Mask")
return ret()
else:
if video_mask is None:
gr.Info("You must provide a Video Mask")
return ret()
else:
video_mask = None
image_mask = None
if "G" in video_prompt_type:
if denoising_strength < 1.:
gr.Info(f"With Denoising Strength {denoising_strength:.1f}, denoising will start at Step no {int(round(num_inference_steps * (1. - denoising_strength),4))} ")
else:
denoising_strength = 1.0
if len(keep_frames_video_guide) > 0 and model_type in ["ltxv_13B"]:
gr.Info("Keep Frames for Control Video is not supported with LTX Video")
return ret()
_, error = parse_keep_frames_video_guide(keep_frames_video_guide, video_length)
if len(error) > 0:
gr.Info(f"Invalid Keep Frames property: {error}")
return ret()
else:
video_guide = None
image_guide = None
video_mask = None
image_mask = None
keep_frames_video_guide = ""
denoising_strength = 1.0
if image_outputs:
video_guide = None
video_mask = None
else:
image_guide = None
image_mask = None
if "S" in image_prompt_type:
if image_start == None or isinstance(image_start, list) and len(image_start) == 0:
gr.Info("You must provide a Start Image")
return ret()
image_start = clean_image_list(image_start)
if image_start == None :
gr.Info("Start Image should be an Image")
return ret()
if multi_prompts_gen_type == 1 and len(image_start) > 1:
gr.Info("Only one Start Image is supported")
return ret()
else:
image_start = None
if not any_letters(image_prompt_type, "SVL"):
image_prompt_type = image_prompt_type.replace("E", "")
if "E" in image_prompt_type:
if image_end == None or isinstance(image_end, list) and len(image_end) == 0:
gr.Info("You must provide an End Image")
return ret()
image_end = clean_image_list(image_end)
if image_end == None :
gr.Info("End Image should be an Image")
return ret()
if multi_prompts_gen_type == 0:
if video_source is not None:
if len(image_end)> 1:
gr.Info("If a Video is to be continued and the option 'Each Text Prompt Will create a new generated Video' is set, there can be only one End Image")
return ret()
elif len(image_start or []) != len(image_end or []):
gr.Info("The number of Start and End Images should be the same when the option 'Each Text Prompt Will create a new generated Video'")
return ret()
else:
image_end = None
if test_any_sliding_window(model_type) and image_mode == 0:
if video_length > sliding_window_size:
if model_type in ["t2v"] and not "G" in video_prompt_type :
gr.Info(f"You have requested to Generate Sliding Windows with a Text to Video model. Unless you use the Video to Video feature this is useless as a t2v model doesn't see past frames and it will generate the same video in each new window.")
return ret()
full_video_length = video_length if video_source is None else video_length + sliding_window_overlap -1
extra = "" if full_video_length == video_length else f" including {sliding_window_overlap} added for Video Continuation"
no_windows = compute_sliding_window_no(full_video_length, sliding_window_size, sliding_window_discard_last_frames, sliding_window_overlap)
gr.Info(f"The Number of Frames to generate ({video_length}{extra}) is greater than the Sliding Window Size ({sliding_window_size}), {no_windows} Windows will be generated")
if "recam" in model_filename:
if video_guide == None:
gr.Info("You must provide a Control Video")
return ret()
computed_fps = get_computed_fps(force_fps, model_type , video_guide, video_source )
frames = get_resampled_video(video_guide, 0, 81, computed_fps)
if len(frames)<81:
gr.Info(f"Recammaster Control video should be at least 81 frames once the resampling at {computed_fps} fps has been done")
return ret()
if "hunyuan_custom_custom_edit" in model_filename:
if len(keep_frames_video_guide) > 0:
gr.Info("Filtering Frames with this model is not supported")
return ret()
if inputs["multi_prompts_gen_type"] != 0:
if image_start != None and len(image_start) > 1:
gr.Info("Only one Start Image must be provided if multiple prompts are used for different windows")
return ret()
# if image_end != None and len(image_end) > 1:
# gr.Info("Only one End Image must be provided if multiple prompts are used for different windows")
# return
override_inputs = {
"image_start": image_start[0] if image_start !=None and len(image_start) > 0 else None,
"image_end": image_end, #[0] if image_end !=None and len(image_end) > 0 else None,
"image_refs": image_refs,
"audio_guide": audio_guide,
"audio_guide2": audio_guide2,
"audio_source": audio_source,
"video_guide": video_guide,
"image_guide": image_guide,
"video_mask": video_mask,
"image_mask": image_mask,
"video_source": video_source,
"frames_positions": frames_positions,
"keep_frames_video_source": keep_frames_video_source,
"keep_frames_video_guide": keep_frames_video_guide,
"denoising_strength": denoising_strength,
"image_prompt_type": image_prompt_type,
"video_prompt_type": video_prompt_type,
"audio_prompt_type": audio_prompt_type,
"skip_steps_cache_type": skip_steps_cache_type,
"model_switch_phase": model_switch_phase,
}
if inputs["multi_prompts_gen_type"] == 0:
if image_start != None and len(image_start) > 0:
if inputs["multi_images_gen_type"] == 0:
new_prompts = []
new_image_start = []
new_image_end = []
for i in range(len(prompts) * len(image_start) ):
new_prompts.append( prompts[ i % len(prompts)] )
new_image_start.append(image_start[i // len(prompts)] )
if image_end != None:
new_image_end.append(image_end[i // len(prompts)] )
prompts = new_prompts
image_start = new_image_start
if image_end != None:
image_end = new_image_end
else:
if len(prompts) >= len(image_start):
if len(prompts) % len(image_start) != 0:
gr.Info("If there are more text prompts than input images the number of text prompts should be dividable by the number of images")
return ret()
rep = len(prompts) // len(image_start)
new_image_start = []
new_image_end = []
for i, _ in enumerate(prompts):
new_image_start.append(image_start[i//rep] )
if image_end != None:
new_image_end.append(image_end[i//rep] )
image_start = new_image_start
if image_end != None:
image_end = new_image_end
else:
if len(image_start) % len(prompts) !=0:
gr.Info("If there are more input images than text prompts the number of images should be dividable by the number of text prompts")
return ret()
rep = len(image_start) // len(prompts)
new_prompts = []
for i, _ in enumerate(image_start):
new_prompts.append( prompts[ i//rep] )
prompts = new_prompts
if image_end == None or len(image_end) == 0:
image_end = [None] * len(prompts)
for single_prompt, start, end in zip(prompts, image_start, image_end) :
override_inputs.update({
"prompt" : single_prompt,
"image_start": start,
"image_end" : end,
})
inputs.update(override_inputs)
add_video_task(**inputs)
else:
for single_prompt in prompts :
override_inputs["prompt"] = single_prompt
inputs.update(override_inputs)
add_video_task(**inputs)
new_prompts_count = len(prompts)
else:
new_prompts_count = 1
override_inputs["prompt"] = "\n".join(prompts)
inputs.update(override_inputs)
add_video_task(**inputs)
new_prompts_count += gen.get("prompts_max",0)
gen["prompts_max"] = new_prompts_count
state["validate_success"] = 1
queue= gen.get("queue", [])
first_time_in_queue = state.get("first_time_in_queue", True)
state["first_time_in_queue"] = True
return update_queue_data(queue, first_time_in_queue), gr.update(open=True) if new_prompts_count > 1 else gr.update()
def get_preview_images(inputs):
inputs_to_query = ["image_start", "video_source", "image_end", "video_guide", "image_guide", "video_mask", "image_mask", "image_refs" ]
labels = ["Start Image", "Video Source", "End Image", "Video Guide", "Image Guide", "Video Mask", "Image Mask", "Image Reference"]
start_image_data = None
start_image_labels = []
end_image_data = None
end_image_labels = []
for label, name in zip(labels,inputs_to_query):
image= inputs.get(name, None)
if image is not None:
image= [image] if not isinstance(image, list) else image.copy()
if start_image_data == None:
start_image_data = image
start_image_labels += [label] * len(image)
else:
if end_image_data == None:
end_image_data = image
else:
end_image_data += image
end_image_labels += [label] * len(image)
if start_image_data != None and len(start_image_data) > 1 and end_image_data == None:
end_image_data = start_image_data [1:]
end_image_labels = start_image_labels [1:]
start_image_data = start_image_data [:1]
start_image_labels = start_image_labels [:1]
return start_image_data, end_image_data, start_image_labels, end_image_labels
def add_video_task(**inputs):
global task_id
state = inputs["state"]
gen = get_gen_info(state)
queue = gen["queue"]
task_id += 1
current_task_id = task_id
start_image_data, end_image_data, start_image_labels, end_image_labels = get_preview_images(inputs)
queue.append({
"id": current_task_id,
"params": inputs.copy(),
"repeats": inputs["repeat_generation"],
"length": inputs["video_length"], # !!!
"steps": inputs["num_inference_steps"],
"prompt": inputs["prompt"],
"start_image_labels": start_image_labels,
"end_image_labels": end_image_labels,
"start_image_data": start_image_data,
"end_image_data": end_image_data,
"start_image_data_base64": [pil_to_base64_uri(img, format="jpeg", quality=70) for img in start_image_data] if start_image_data != None else None,
"end_image_data_base64": [pil_to_base64_uri(img, format="jpeg", quality=70) for img in end_image_data] if end_image_data != None else None
})
def update_task_thumbnails(task, inputs):
start_image_data, end_image_data, start_labels, end_labels = get_preview_images(inputs)
task.update({
"start_image_labels": start_labels,
"end_image_labels": end_labels,
"start_image_data_base64": [pil_to_base64_uri(img, format="jpeg", quality=70) for img in start_image_data] if start_image_data != None else None,
"end_image_data_base64": [pil_to_base64_uri(img, format="jpeg", quality=70) for img in end_image_data] if end_image_data != None else None
})
def move_task(queue, old_index_str, new_index_str):
try:
old_idx = int(old_index_str)
new_idx = int(new_index_str)
except (ValueError, IndexError):
return update_queue_data(queue)
with lock:
old_idx += 1
new_idx += 1
if not (0 < old_idx < len(queue)):
return update_queue_data(queue)
item_to_move = queue.pop(old_idx)
if old_idx < new_idx:
new_idx -= 1
clamped_new_idx = max(1, min(new_idx, len(queue)))
queue.insert(clamped_new_idx, item_to_move)
return update_queue_data(queue)
def remove_task(queue, selected_indices):
if not selected_indices or len(selected_indices) == 0:
return update_queue_data(queue)
idx = selected_indices[0]
if isinstance(idx, list):
idx = idx[0]
idx = int(idx) + 1
with lock:
if idx < len(queue):
if idx == 0:
wan_model._interrupt = True
del queue[idx]
return update_queue_data(queue)
def update_global_queue_ref(queue):
global global_queue_ref
with lock:
global_queue_ref = queue[:]
def save_queue_action(state):
gen = get_gen_info(state)
queue = gen.get("queue", [])
if not queue or len(queue) <=1 :
gr.Info("Queue is empty. Nothing to save.")
return ""
zip_buffer = io.BytesIO()
with tempfile.TemporaryDirectory() as tmpdir:
queue_manifest = []
file_paths_in_zip = {}
for task_index, task in enumerate(queue):
if task is None or not isinstance(task, dict) or task.get('id') is None: continue
params_copy = task.get('params', {}).copy()
task_id_s = task.get('id', f"task_{task_index}")
image_keys = ["image_start", "image_end", "image_refs", "image_guide", "image_mask"]
video_keys = ["video_guide", "video_mask", "video_source", "audio_guide", "audio_guide2", "audio_source"]
for key in image_keys:
images_pil = params_copy.get(key)
if images_pil is None:
continue
is_originally_list = isinstance(images_pil, list)
if not is_originally_list:
images_pil = [images_pil]
image_filenames_for_json = []
for img_index, pil_image in enumerate(images_pil):
if not isinstance(pil_image, Image.Image):
print(f"Warning: Expected PIL Image for key '{key}' in task {task_id_s}, got {type(pil_image)}. Skipping image.")
continue
img_id = id(pil_image)
if img_id in file_paths_in_zip:
image_filenames_for_json.append(file_paths_in_zip[img_id])
continue
img_filename_in_zip = f"task{task_id_s}_{key}_{img_index}.png"
img_save_path = os.path.join(tmpdir, img_filename_in_zip)
try:
pil_image.save(img_save_path, "PNG")
image_filenames_for_json.append(img_filename_in_zip)
file_paths_in_zip[img_id] = img_filename_in_zip
print(f"Saved image: {img_filename_in_zip}")
except Exception as e:
print(f"Error saving image {img_filename_in_zip} for task {task_id_s}: {e}")
if image_filenames_for_json:
params_copy[key] = image_filenames_for_json if is_originally_list else image_filenames_for_json[0]
else:
pass
# params_copy.pop(key, None) #cant pop otherwise crash during reload
for key in video_keys:
video_path_orig = params_copy.get(key)
if video_path_orig is None or not isinstance(video_path_orig, str):
continue
if video_path_orig in file_paths_in_zip:
params_copy[key] = file_paths_in_zip[video_path_orig]
continue
if not os.path.isfile(video_path_orig):
print(f"Warning: Video file not found for key '{key}' in task {task_id_s}: {video_path_orig}. Skipping video.")
params_copy.pop(key, None)
continue
_, extension = os.path.splitext(video_path_orig)
vid_filename_in_zip = f"task{task_id_s}_{key}{extension if extension else '.mp4'}"
vid_save_path = os.path.join(tmpdir, vid_filename_in_zip)
try:
shutil.copy2(video_path_orig, vid_save_path)
params_copy[key] = vid_filename_in_zip
file_paths_in_zip[video_path_orig] = vid_filename_in_zip
print(f"Copied video: {video_path_orig} -> {vid_filename_in_zip}")
except Exception as e:
print(f"Error copying video {video_path_orig} to {vid_filename_in_zip} for task {task_id_s}: {e}")
params_copy.pop(key, None)
params_copy.pop('state', None)
params_copy.pop('start_image_labels', None)
params_copy.pop('end_image_labels', None)
params_copy.pop('start_image_data_base64', None)
params_copy.pop('end_image_data_base64', None)
params_copy.pop('start_image_data', None)
params_copy.pop('end_image_data', None)
task.pop('start_image_data', None)
task.pop('end_image_data', None)
manifest_entry = {
"id": task.get('id'),
"params": params_copy,
}
manifest_entry = {k: v for k, v in manifest_entry.items() if v is not None}
queue_manifest.append(manifest_entry)
manifest_path = os.path.join(tmpdir, "queue.json")
try:
with open(manifest_path, 'w', encoding='utf-8') as f:
json.dump(queue_manifest, f, indent=4)
except Exception as e:
print(f"Error writing queue.json: {e}")
gr.Warning("Failed to create queue manifest.")
return None
try:
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zf:
zf.write(manifest_path, arcname="queue.json")
for file_id, saved_file_rel_path in file_paths_in_zip.items():
saved_file_abs_path = os.path.join(tmpdir, saved_file_rel_path)
if os.path.exists(saved_file_abs_path):
zf.write(saved_file_abs_path, arcname=saved_file_rel_path)
print(f"Adding to zip: {saved_file_rel_path}")
else:
print(f"Warning: File {saved_file_rel_path} (ID: {file_id}) not found during zipping.")
zip_buffer.seek(0)
zip_binary_content = zip_buffer.getvalue()
zip_base64 = base64.b64encode(zip_binary_content).decode('utf-8')
print(f"Queue successfully prepared as base64 string ({len(zip_base64)} chars).")
return zip_base64
except Exception as e:
print(f"Error creating zip file in memory: {e}")
gr.Warning("Failed to create zip data for download.")
return None
finally:
zip_buffer.close()
def load_queue_action(filepath, state, evt:gr.EventData):
global task_id
gen = get_gen_info(state)
original_queue = gen.get("queue", [])
delete_autoqueue_file = False
if evt.target == None:
if original_queue or not Path(AUTOSAVE_FILENAME).is_file():
return
print(f"Autoloading queue from {AUTOSAVE_FILENAME}...")
filename = AUTOSAVE_FILENAME
delete_autoqueue_file = True
else:
if not filepath or not hasattr(filepath, 'name') or not Path(filepath.name).is_file():
print("[load_queue_action] Warning: No valid file selected or file not found.")
return update_queue_data(original_queue)
filename = filepath.name
save_path_base = server_config.get("save_path", "outputs")
loaded_cache_dir = os.path.join(save_path_base, "_loaded_queue_cache")
newly_loaded_queue = []
max_id_in_file = 0
error_message = ""
local_queue_copy_for_global_ref = None
try:
print(f"[load_queue_action] Attempting to load queue from: {filename}")
os.makedirs(loaded_cache_dir, exist_ok=True)
print(f"[load_queue_action] Using cache directory: {loaded_cache_dir}")
with tempfile.TemporaryDirectory() as tmpdir:
with zipfile.ZipFile(filename, 'r') as zf:
if "queue.json" not in zf.namelist(): raise ValueError("queue.json not found in zip file")
print(f"[load_queue_action] Extracting {filename} to {tmpdir}")
zf.extractall(tmpdir)
print(f"[load_queue_action] Extraction complete.")
manifest_path = os.path.join(tmpdir, "queue.json")
print(f"[load_queue_action] Reading manifest: {manifest_path}")
with open(manifest_path, 'r', encoding='utf-8') as f:
loaded_manifest = json.load(f)
print(f"[load_queue_action] Manifest loaded. Processing {len(loaded_manifest)} tasks.")
for task_index, task_data in enumerate(loaded_manifest):
if task_data is None or not isinstance(task_data, dict):
print(f"[load_queue_action] Skipping invalid task data at index {task_index}")
continue
params = task_data.get('params', {})
task_id_loaded = task_data.get('id', 0)
max_id_in_file = max(max_id_in_file, task_id_loaded)
params['state'] = state
image_keys = ["image_start", "image_end", "image_refs", "image_guide", "image_mask"]
video_keys = ["video_guide", "video_mask", "video_source", "audio_guide", "audio_guide2", "audio_source"]
loaded_pil_images = {}
loaded_video_paths = {}
for key in image_keys:
image_filenames = params.get(key)
if image_filenames is None: continue
is_list = isinstance(image_filenames, list)
if not is_list: image_filenames = [image_filenames]
loaded_pils = []
for img_filename_in_zip in image_filenames:
if not isinstance(img_filename_in_zip, str):
print(f"[load_queue_action] Warning: Non-string filename found for image key '{key}'. Skipping.")
continue
img_load_path = os.path.join(tmpdir, img_filename_in_zip)
if not os.path.exists(img_load_path):
print(f"[load_queue_action] Image file not found in extracted data: {img_load_path}. Skipping.")
continue
try:
pil_image = Image.open(img_load_path)
pil_image.load()
converted_image = convert_image(pil_image)
loaded_pils.append(converted_image)
pil_image.close()
print(f"Loaded image: {img_filename_in_zip} for key {key}")
except Exception as img_e:
print(f"[load_queue_action] Error loading image {img_filename_in_zip}: {img_e}")
if loaded_pils:
params[key] = loaded_pils if is_list else loaded_pils[0]
loaded_pil_images[key] = params[key]
else:
params.pop(key, None)
for key in video_keys:
video_filename_in_zip = params.get(key)
if video_filename_in_zip is None or not isinstance(video_filename_in_zip, str):
continue
video_load_path = os.path.join(tmpdir, video_filename_in_zip)
if not os.path.exists(video_load_path):
print(f"[load_queue_action] Video file not found in extracted data: {video_load_path}. Skipping.")
params.pop(key, None)
continue
persistent_video_path = os.path.join(loaded_cache_dir, video_filename_in_zip)
try:
shutil.copy2(video_load_path, persistent_video_path)
params[key] = persistent_video_path
loaded_video_paths[key] = persistent_video_path
print(f"Loaded video: {video_filename_in_zip} -> {persistent_video_path}")
except Exception as vid_e:
print(f"[load_queue_action] Error copying video {video_filename_in_zip} to cache: {vid_e}")
params.pop(key, None)
primary_preview_pil_list, secondary_preview_pil_list, primary_preview_pil_labels, secondary_preview_pil_labels = get_preview_images(params)
start_b64 = [pil_to_base64_uri(primary_preview_pil_list[0], format="jpeg", quality=70)] if isinstance(primary_preview_pil_list, list) and primary_preview_pil_list else None
end_b64 = [pil_to_base64_uri(secondary_preview_pil_list[0], format="jpeg", quality=70)] if isinstance(secondary_preview_pil_list, list) and secondary_preview_pil_list else None
top_level_start_image = params.get("image_start") or params.get("image_refs")
top_level_end_image = params.get("image_end")
runtime_task = {
"id": task_id_loaded,
"params": params.copy(),
"repeats": params.get('repeat_generation', 1),
"length": params.get('video_length'),
"steps": params.get('num_inference_steps'),
"prompt": params.get('prompt'),
"start_image_labels": primary_preview_pil_labels,
"end_image_labels": secondary_preview_pil_labels,
"start_image_data": top_level_start_image,
"end_image_data": top_level_end_image,
"start_image_data_base64": start_b64,
"end_image_data_base64": end_b64,
}
newly_loaded_queue.append(runtime_task)
print(f"[load_queue_action] Reconstructed task {task_index+1}/{len(loaded_manifest)}, ID: {task_id_loaded}")
with lock:
print("[load_queue_action] Acquiring lock to update state...")
gen["queue"] = newly_loaded_queue[:]
local_queue_copy_for_global_ref = gen["queue"][:]
current_max_id_in_new_queue = max([t['id'] for t in newly_loaded_queue if 'id' in t] + [0])
if current_max_id_in_new_queue >= task_id:
new_task_id = current_max_id_in_new_queue + 1
print(f"[load_queue_action] Updating global task_id from {task_id} to {new_task_id}")
task_id = new_task_id
else:
print(f"[load_queue_action] Global task_id ({task_id}) is > max in file ({current_max_id_in_new_queue}). Not changing task_id.")
gen["prompts_max"] = len(newly_loaded_queue)
print("[load_queue_action] State update complete. Releasing lock.")
if local_queue_copy_for_global_ref is not None:
print("[load_queue_action] Updating global queue reference...")
update_global_queue_ref(local_queue_copy_for_global_ref)
else:
print("[load_queue_action] Warning: Skipping global ref update as local copy is None.")
print(f"[load_queue_action] Queue load successful. Returning DataFrame update for {len(newly_loaded_queue)} tasks.")
return update_queue_data(newly_loaded_queue)
except (ValueError, zipfile.BadZipFile, FileNotFoundError, Exception) as e:
error_message = f"Error during queue load: {e}"
print(f"[load_queue_action] Caught error: {error_message}")
traceback.print_exc()
gr.Warning(f"Failed to load queue: {error_message[:200]}")
print("[load_queue_action] Load failed. Returning DataFrame update for original queue.")
return update_queue_data(original_queue)
finally:
if delete_autoqueue_file:
if os.path.isfile(filename):
os.remove(filename)
print(f"Clear Queue: Deleted autosave file '{filename}'.")
if filepath and hasattr(filepath, 'name') and filepath.name and os.path.exists(filepath.name):
if tempfile.gettempdir() in os.path.abspath(filepath.name):
try:
os.remove(filepath.name)
print(f"[load_queue_action] Removed temporary upload file: {filepath.name}")
except OSError as e:
print(f"[load_queue_action] Info: Could not remove temp file {filepath.name}: {e}")
else:
print(f"[load_queue_action] Info: Did not remove non-temporary file: {filepath.name}")
def clear_queue_action(state):
gen = get_gen_info(state)
queue = gen.get("queue", [])
aborted_current = False
cleared_pending = False
with lock:
if "in_progress" in gen and gen["in_progress"]:
print("Clear Queue: Signalling abort for in-progress task.")
gen["abort"] = True
gen["extra_orders"] = 0
if wan_model is not None:
wan_model._interrupt = True
aborted_current = True
if queue:
if len(queue) > 1 or (len(queue) == 1 and queue[0] is not None and queue[0].get('id') is not None):
print(f"Clear Queue: Clearing {len(queue)} tasks from queue.")
queue.clear()
cleared_pending = True
else:
pass
if aborted_current or cleared_pending:
gen["prompts_max"] = 0
if cleared_pending:
try:
if os.path.isfile(AUTOSAVE_FILENAME):
os.remove(AUTOSAVE_FILENAME)
print(f"Clear Queue: Deleted autosave file '{AUTOSAVE_FILENAME}'.")
except OSError as e:
print(f"Clear Queue: Error deleting autosave file '{AUTOSAVE_FILENAME}': {e}")
gr.Warning(f"Could not delete the autosave file '{AUTOSAVE_FILENAME}'. You may need to remove it manually.")
if aborted_current and cleared_pending:
gr.Info("Queue cleared and current generation aborted.")
elif aborted_current:
gr.Info("Current generation aborted.")
elif cleared_pending:
gr.Info("Queue cleared.")
else:
gr.Info("Queue is already empty or only contains the active task (which wasn't aborted now).")
return update_queue_data([])
def quit_application():
print("Save and Quit requested...")
autosave_queue()
import signal
os.kill(os.getpid(), signal.SIGINT)
def start_quit_process():
return 5, gr.update(visible=False), gr.update(visible=True)
def cancel_quit_process():
return -1, gr.update(visible=True), gr.update(visible=False)
def show_countdown_info_from_state(current_value: int):
if current_value > 0:
gr.Info(f"Quitting in {current_value}...")
return current_value - 1
return current_value
quitting_app = False
def autosave_queue():
global quitting_app
quitting_app = True
global global_queue_ref
if not global_queue_ref:
print("Autosave: Queue is empty, nothing to save.")
return
print(f"Autosaving queue ({len(global_queue_ref)} items) to {AUTOSAVE_FILENAME}...")
temp_state_for_save = {"gen": {"queue": global_queue_ref}}
zip_file_path = None
try:
def _save_queue_to_file(queue_to_save, output_filename):
if not queue_to_save: return None
with tempfile.TemporaryDirectory() as tmpdir:
queue_manifest = []
file_paths_in_zip = {}
for task_index, task in enumerate(queue_to_save):
if task is None or not isinstance(task, dict) or task.get('id') is None: continue
params_copy = task.get('params', {}).copy()
task_id_s = task.get('id', f"task_{task_index}")
image_keys = ["image_start", "image_end", "image_refs", "image_guide", "image_mask"]
video_keys = ["video_guide", "video_mask", "video_source", "audio_guide", "audio_guide2", "audio_source" ]
for key in image_keys:
images_pil = params_copy.get(key)
if images_pil is None: continue
is_list = isinstance(images_pil, list)
if not is_list: images_pil = [images_pil]
image_filenames_for_json = []
for img_index, pil_image in enumerate(images_pil):
if not isinstance(pil_image, Image.Image): continue
img_id = id(pil_image)
if img_id in file_paths_in_zip:
image_filenames_for_json.append(file_paths_in_zip[img_id])
continue
img_filename_in_zip = f"task{task_id_s}_{key}_{img_index}.png"
img_save_path = os.path.join(tmpdir, img_filename_in_zip)
try:
pil_image.save(img_save_path, "PNG")
image_filenames_for_json.append(img_filename_in_zip)
file_paths_in_zip[img_id] = img_filename_in_zip
except Exception as e:
print(f"Autosave error saving image {img_filename_in_zip}: {e}")
if image_filenames_for_json:
params_copy[key] = image_filenames_for_json if is_list else image_filenames_for_json[0]
else:
params_copy.pop(key, None)
for key in video_keys:
video_path_orig = params_copy.get(key)
if video_path_orig is None or not isinstance(video_path_orig, str):
continue
if video_path_orig in file_paths_in_zip:
params_copy[key] = file_paths_in_zip[video_path_orig]
continue
if not os.path.isfile(video_path_orig):
print(f"Warning (Autosave): Video file not found for key '{key}' in task {task_id_s}: {video_path_orig}. Skipping.")
params_copy.pop(key, None)
continue
_, extension = os.path.splitext(video_path_orig)
vid_filename_in_zip = f"task{task_id_s}_{key}{extension if extension else '.mp4'}"
vid_save_path = os.path.join(tmpdir, vid_filename_in_zip)
try:
shutil.copy2(video_path_orig, vid_save_path)
params_copy[key] = vid_filename_in_zip
file_paths_in_zip[video_path_orig] = vid_filename_in_zip
except Exception as e:
print(f"Error (Autosave) copying video {video_path_orig} to {vid_filename_in_zip} for task {task_id_s}: {e}")
params_copy.pop(key, None)
params_copy.pop('state', None)
params_copy.pop('start_image_data_base64', None)
params_copy.pop('end_image_data_base64', None)
params_copy.pop('start_image_data', None)
params_copy.pop('end_image_data', None)
manifest_entry = {
"id": task.get('id'),
"params": params_copy,
}
manifest_entry = {k: v for k, v in manifest_entry.items() if v is not None}
queue_manifest.append(manifest_entry)
manifest_path = os.path.join(tmpdir, "queue.json")
with open(manifest_path, 'w', encoding='utf-8') as f: json.dump(queue_manifest, f, indent=4)
with zipfile.ZipFile(output_filename, 'w', zipfile.ZIP_DEFLATED) as zf:
zf.write(manifest_path, arcname="queue.json")
for saved_file_rel_path in file_paths_in_zip.values():
saved_file_abs_path = os.path.join(tmpdir, saved_file_rel_path)
if os.path.exists(saved_file_abs_path):
zf.write(saved_file_abs_path, arcname=saved_file_rel_path)
else:
print(f"Warning (Autosave): File {saved_file_rel_path} not found during zipping.")
return output_filename
return None
saved_path = _save_queue_to_file(global_queue_ref, AUTOSAVE_FILENAME)
if saved_path:
print(f"Queue autosaved successfully to {saved_path}")
else:
print("Autosave failed.")
except Exception as e:
print(f"Error during autosave: {e}")
traceback.print_exc()
def finalize_generation_with_state(current_state):
if not isinstance(current_state, dict) or 'gen' not in current_state:
return gr.update(), gr.update(interactive=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False, value=""), gr.update(), current_state
gallery_update, abort_btn_update, gen_btn_update, add_queue_btn_update, current_gen_col_update, gen_info_update = finalize_generation(current_state)
accordion_update = gr.Accordion(open=False) if len(get_gen_info(current_state).get("queue", [])) <= 1 else gr.update()
return gallery_update, abort_btn_update, gen_btn_update, add_queue_btn_update, current_gen_col_update, gen_info_update, accordion_update, current_state
def generate_queue_html(queue):
if len(queue) <= 1:
return "
Queue is empty.
"
table_header = """
Drag
Qty
Prompt
Length
Steps
Start/Ref
End
"""
table_rows = []
for i, item in enumerate(queue):
if i == 0:
continue
row_index = i - 1
truncated_prompt = (item['prompt'][:97] + '...') if len(item['prompt']) > 100 else item['prompt']
full_prompt = item['prompt'].replace('"', '"')
prompt_cell = f'
{truncated_prompt}
'
start_img_data = item.get('start_image_data_base64') or [None]
start_img_uri = start_img_data[0]
start_img_labels = item.get('start_image_labels', [''])
end_img_data = item.get('end_image_data_base64') or [None]
end_img_uri = end_img_data[0]
end_img_labels = item.get('end_image_labels', [''])
num_steps = item.get('steps')
length = item.get('length')
start_img_md = ""
if start_img_uri:
start_img_md = f'
Attention mode " + (attn_mode if attn_mode!="auto" else "auto/" + get_auto_attention() )
if attention_mode not in attention_modes_installed:
header += " -NOT INSTALLED-"
elif attention_mode not in attention_modes_supported:
header += " -NOT SUPPORTED-"
elif overridden_attention is not None and attention_mode != overridden_attention:
header += " -MODEL SPECIFIC-"
header += ""
if compile:
header += ", Pytorch compilation ON"
if "fp16" in model_filename:
header += ", Data Type FP16"
else:
header += ", Data Type BF16"
if "int8" in model_filename:
header += ", Quantization Scaled Int8"
header += "
The new configuration has been succesfully applied
", header, model_family, model_choice, get_unique_id()
def get_gen_info(state):
cache = state.get("gen", None)
if cache == None:
cache = dict()
state["gen"] = cache
return cache
def build_callback(state, pipe, send_cmd, status, num_inference_steps):
gen = get_gen_info(state)
gen["num_inference_steps"] = num_inference_steps
start_time = time.time()
def callback(step_idx = -1, latent = None, force_refresh = True, read_state = False, override_num_inference_steps = -1, pass_no = -1, denoising_extra =""):
in_pause = False
with gen_lock:
process_status = gen.get("process_status", None)
pause_msg = None
if process_status.startswith("request:"):
gen["process_status"] = "process:" + process_status[len("request:"):]
offloadobj.unload_all()
pause_msg = gen.get("pause_msg", "Unknown Pause")
in_pause = True
if in_pause:
send_cmd("progress", [0, pause_msg])
while True:
time.sleep(1)
with gen_lock:
process_status = gen.get("process_status", None)
if process_status == "process:main": break
refresh_id = gen.get("refresh", -1)
if force_refresh or step_idx >= 0:
pass
else:
refresh_id = gen.get("refresh", -1)
if refresh_id < 0:
return
UI_refresh = state.get("refresh", 0)
if UI_refresh >= refresh_id:
return
if override_num_inference_steps > 0:
gen["num_inference_steps"] = override_num_inference_steps
num_inference_steps = gen.get("num_inference_steps", 0)
status = gen["progress_status"]
state["refresh"] = refresh_id
if read_state:
phase, step_idx = gen["progress_phase"]
else:
step_idx += 1
if gen.get("abort", False):
# pipe._interrupt = True
phase = "Aborting"
elif step_idx == num_inference_steps:
phase = "VAE Decoding"
else:
if pass_no <=0:
phase = "Denoising"
elif pass_no == 1:
phase = "Denoising First Pass"
elif pass_no == 2:
phase = "Denoising Second Pass"
elif pass_no == 3:
phase = "Denoising Third Pass"
else:
phase = f"Denoising {pass_no}th Pass"
if len(denoising_extra) > 0: phase += " | " + denoising_extra
gen["progress_phase"] = (phase, step_idx)
status_msg = merge_status_context(status, phase)
elapsed_time = time.time() - start_time
status_msg = merge_status_context(status, f"{phase} | {format_time(elapsed_time)}")
if step_idx >= 0:
progress_args = [(step_idx , num_inference_steps) , status_msg , num_inference_steps]
else:
progress_args = [0, status_msg]
# progress(*progress_args)
send_cmd("progress", progress_args)
if latent != None:
latent = latent.to("cpu", non_blocking=True)
send_cmd("preview", latent)
# gen["progress_args"] = progress_args
return callback
def abort_generation(state):
gen = get_gen_info(state)
if "in_progress" in gen: # and wan_model != None:
if wan_model != None:
wan_model._interrupt= True
gen["abort"] = True
msg = "Processing Request to abort Current Generation"
gen["status"] = msg
gr.Info(msg)
return gr.Button(interactive= False)
else:
return gr.Button(interactive= True)
def refresh_gallery(state): #, msg
gen = get_gen_info(state)
# gen["last_msg"] = msg
file_list = gen.get("file_list", None)
choice = gen.get("selected",0)
header_text = gen.get("header_text", "")
in_progress = "in_progress" in gen
if gen.get("last_selected", True) and file_list is not None:
choice = max(len(file_list) - 1,0)
queue = gen.get("queue", [])
abort_interactive = not gen.get("abort", False)
if not in_progress or len(queue) == 0:
return gr.Gallery(selected_index=choice, value = file_list), gr.HTML("", visible= False), gr.Button(visible=True), gr.Button(visible=False), gr.Row(visible=False), gr.Row(visible=False), update_queue_data(queue), gr.Button(interactive= abort_interactive), gr.Button(visible= False)
else:
task = queue[0]
prompt = task["prompt"]
params = task["params"]
model_type = params["model_type"]
base_model_type = get_base_model_type(model_type)
model_def = get_model_def(model_type)
onemorewindow_visible = test_any_sliding_window(base_model_type) and params.get("image_mode",0) == 0 and not params.get("mode","").startswith("edit_")
enhanced = False
if prompt.startswith("!enhanced!\n"):
enhanced = True
prompt = prompt[len("!enhanced!\n"):]
if "\n" in prompt :
prompts = prompt.split("\n")
window_no= gen.get("window_no",1)
if window_no > len(prompts):
window_no = len(prompts)
window_no -= 1
prompts[window_no]="" + prompts[window_no] + ""
prompt = " ".join(prompts)
if enhanced:
prompt = "Enhanced: " + prompt
if len(header_text) > 0:
prompt = "" + header_text + "
" + prompt
list_uri = []
list_labels = []
start_img_uri = task.get('start_image_data_base64')
if start_img_uri != None:
list_uri += start_img_uri
list_labels += task.get('start_image_labels')
end_img_uri = task.get('end_image_data_base64')
if end_img_uri != None:
list_uri += end_img_uri
list_labels += task.get('end_image_labels')
thumbnail_size = "100px"
thumbnails = ""
for i, (img_label, img_uri) in enumerate(zip(list_labels,list_uri)):
thumbnails += f'
{img_label}
'
# Get current theme from server config
current_theme = server_config.get("UI_theme", "default")
# Use minimal, adaptive styling that blends with any background
# This creates a subtle container that doesn't interfere with the page's theme
table_style = """
border: 1px solid rgba(128, 128, 128, 0.3);
background-color: transparent;
color: inherit;
padding: 8px;
border-radius: 6px;
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
"""
if params.get("mode", None) in ['edit'] : onemorewindow_visible = False
gen_buttons_visible = True
html = f"
"
def show_modal_image(state, action_string):
if not action_string:
return gr.HTML(), gr.Column(visible=False)
try:
img_type, row_index_str = action_string.split('_')
row_index = int(row_index_str)
except (ValueError, IndexError):
return gr.HTML(), gr.Column(visible=False)
gen = get_gen_info(state)
queue = gen.get("queue", [])
task_index = row_index + 1
if task_index >= len(queue):
return gr.HTML(), gr.Column(visible=False)
task_item = queue[task_index]
image_data = None
label_data = None
if img_type == 'start':
image_data = task_item.get('start_image_data_base64')
label_data = task_item.get('start_image_labels')
elif img_type == 'end':
image_data = task_item.get('end_image_data_base64')
label_data = task_item.get('end_image_labels')
if not image_data or not label_data:
return gr.HTML(), gr.Column(visible=False)
html_content = get_modal_image(image_data[0], label_data[0])
return gr.HTML(value=html_content), gr.Column(visible=True)
def get_prompt_labels(multi_prompts_gen_type, image_outputs = False):
new_line_text = "each new line of prompt will be used for a window" if multi_prompts_gen_type != 0 else "each new line of prompt will generate " + ("a new image" if image_outputs else "a new video")
return "Prompts (" + new_line_text + ", # lines = comments, ! lines = macros)", "Prompts (" + new_line_text + ", # lines = comments)"
def get_image_end_label(multi_prompts_gen_type):
return "Images as ending points for new Videos in the Generation Queue" if multi_prompts_gen_type == 0 else "Images as ending points for each new Window of the same Video Generation"
def refresh_prompt_labels(multi_prompts_gen_type, image_mode):
prompt_label, wizard_prompt_label = get_prompt_labels(multi_prompts_gen_type, image_mode > 0)
return gr.update(label=prompt_label), gr.update(label = wizard_prompt_label), gr.update(label=get_image_end_label(multi_prompts_gen_type))
def update_video_guide_outpainting(video_guide_outpainting_value, value, pos):
if len(video_guide_outpainting_value) <= 1:
video_guide_outpainting_list = ["0"] * 4
else:
video_guide_outpainting_list = video_guide_outpainting_value.split(" ")
video_guide_outpainting_list[pos] = str(value)
if all(v=="0" for v in video_guide_outpainting_list):
return ""
return " ".join(video_guide_outpainting_list)
def refresh_video_guide_outpainting_row(video_guide_outpainting_checkbox, video_guide_outpainting):
video_guide_outpainting = video_guide_outpainting[1:] if video_guide_outpainting_checkbox else "#" + video_guide_outpainting
return gr.update(visible=video_guide_outpainting_checkbox), video_guide_outpainting
custom_resolutions = None
def get_resolution_choices(current_resolution_choice, model_resolutions= None):
global custom_resolutions
resolution_file = "resolutions.json"
if model_resolutions is not None:
resolution_choices = model_resolutions
elif custom_resolutions == None and os.path.isfile(resolution_file) :
with open(resolution_file, 'r', encoding='utf-8') as f:
try:
resolution_choices = json.load(f)
except Exception as e:
print(f'Invalid "{resolution_file}" : {e}')
resolution_choices = None
if resolution_choices == None:
pass
elif not isinstance(resolution_choices, list):
print(f'"{resolution_file}" should be a list of 2 elements lists ["Label","WxH"]')
resolution_choices == None
else:
for tup in resolution_choices:
if not isinstance(tup, list) or len(tup) != 2 or not isinstance(tup[0], str) or not isinstance(tup[1], str):
print(f'"{resolution_file}" contains an invalid list of two elements: {tup}')
resolution_choices == None
break
res_list = tup[1].split("x")
if len(res_list) != 2 or not is_integer(res_list[0]) or not is_integer(res_list[1]):
print(f'"{resolution_file}" contains a resolution value that is not in the format "WxH": {tup[1]}')
resolution_choices == None
break
custom_resolutions = resolution_choices
else:
resolution_choices = custom_resolutions
if resolution_choices == None:
resolution_choices=[
# 1080p
("1920x1088 (16:9)", "1920x1088"),
("1088x1920 (9:16)", "1088x1920"),
("1920x832 (21:9)", "1920x832"),
("832x1920 (9:21)", "832x1920"),
# 720p
("1024x1024 (1:1)", "1024x1024"),
("1280x720 (16:9)", "1280x720"),
("720x1280 (9:16)", "720x1280"),
("1280x544 (21:9)", "1280x544"),
("544x1280 (9:21)", "544x1280"),
("1104x832 (4:3)", "1104x832"),
("832x1104 (3:4)", "832x1104"),
("960x960 (1:1)", "960x960"),
# 540p
("960x544 (16:9)", "960x544"),
("544x960 (9:16)", "544x960"),
# 480p
("832x624 (4:3)", "832x624"),
("624x832 (3:4)", "624x832"),
("720x720 (1:1)", "720x720"),
("832x480 (16:9)", "832x480"),
("480x832 (9:16)", "480x832"),
("512x512 (1:1)", "512x512"),
]
if current_resolution_choice is not None:
found = False
for label, res in resolution_choices:
if current_resolution_choice == res:
found = True
break
if not found:
if model_resolutions is None:
resolution_choices.append( (current_resolution_choice, current_resolution_choice ))
else:
current_resolution_choice = resolution_choices[0][1]
return resolution_choices, current_resolution_choice
group_thresholds = {
"360p": 320 * 640,
"480p": 832 * 624,
"540p": 960 * 544,
"720p": 1024 * 1024,
"1080p": 1920 * 1088,
"1440p": 9999 * 9999
}
def categorize_resolution(resolution_str):
width, height = map(int, resolution_str.split('x'))
pixel_count = width * height
for group in group_thresholds.keys():
if pixel_count <= group_thresholds[group]:
return group
return "1440p"
def group_resolutions(model_def, resolutions, selected_resolution):
model_resolutions = model_def.get("resolutions", None)
if model_resolutions is not None:
selected_group ="Locked"
available_groups = [selected_group ]
selected_group_resolutions = model_resolutions
else:
grouped_resolutions = {}
for resolution in resolutions:
group = categorize_resolution(resolution[1])
if group not in grouped_resolutions:
grouped_resolutions[group] = []
grouped_resolutions[group].append(resolution)
available_groups = [group for group in group_thresholds if group in grouped_resolutions]
selected_group = categorize_resolution(selected_resolution)
selected_group_resolutions = grouped_resolutions.get(selected_group, [])
available_groups.reverse()
return available_groups, selected_group_resolutions, selected_group
def change_resolution_group(state, selected_group):
model_type = state["model_type"]
model_def = get_model_def(model_type)
model_resolutions = model_def.get("resolutions", None)
resolution_choices, _ = get_resolution_choices(None, model_resolutions)
if model_resolutions is None:
group_resolution_choices = [ resolution for resolution in resolution_choices if categorize_resolution(resolution[1]) == selected_group ]
else:
last_resolution = group_resolution_choices[0][1]
return gr.update(choices= group_resolution_choices, value= last_resolution)
last_resolution_per_group = state["last_resolution_per_group"]
last_resolution = last_resolution_per_group.get(selected_group, "")
if len(last_resolution) == 0 or not any( [last_resolution == resolution[1] for resolution in group_resolution_choices]):
last_resolution = group_resolution_choices[0][1]
return gr.update(choices= group_resolution_choices, value= last_resolution )
def record_last_resolution(state, resolution):
model_type = state["model_type"]
model_def = get_model_def(model_type)
model_resolutions = model_def.get("resolutions", None)
if model_resolutions is not None: return
server_config["last_resolution_choice"] = resolution
selected_group = categorize_resolution(resolution)
last_resolution_per_group = state["last_resolution_per_group"]
last_resolution_per_group[selected_group ] = resolution
server_config["last_resolution_per_group"] = last_resolution_per_group
with open(server_config_filename, "w", encoding="utf-8") as writer:
writer.write(json.dumps(server_config, indent=4))
def get_max_frames(nb):
return (nb - 1) * server_config.get("max_frames_multiplier",1) + 1
def change_guidance_phases(state, guidance_phases):
model_type = state["model_type"]
model_def = get_model_def(model_type)
multiple_submodels = model_def.get("multiple_submodels", False)
label ="Phase 1-2" if guidance_phases ==3 else ( "Model / Guidance Switch Threshold" if multiple_submodels else "Guidance Switch Threshold" )
return gr.update(visible= guidance_phases >=3 and multiple_submodels) , gr.update(visible= guidance_phases >=2), gr.update(visible= guidance_phases >=2, label = label), gr.update(visible= guidance_phases >=3), gr.update(visible= guidance_phases >=2), gr.update(visible= guidance_phases >=3)
memory_profile_choices= [ ("Profile 1, HighRAM_HighVRAM: at least 64 GB of RAM and 24 GB of VRAM, the fastest for short videos with a RTX 3090 / RTX 4090", 1),
("Profile 2, HighRAM_LowVRAM: at least 64 GB of RAM and 12 GB of VRAM, the most versatile profile with high RAM, better suited for RTX 3070/3080/4070/4080 or for RTX 3090 / RTX 4090 with large pictures batches or long videos", 2),
("Profile 3, LowRAM_HighVRAM: at least 32 GB of RAM and 24 GB of VRAM, adapted for RTX 3090 / RTX 4090 with limited RAM for good speed short video",3),
("Profile 4, LowRAM_LowVRAM (Recommended): at least 32 GB of RAM and 12 GB of VRAM, if you have little VRAM or want to generate longer videos",4),
("Profile 5, VerylowRAM_LowVRAM (Fail safe): at least 24 GB of RAM and 10 GB of VRAM, if you don't have much it won't be fast but maybe it will work",5)]
def detect_auto_save_form(state, evt:gr.SelectData):
last_tab_id = state.get("last_tab_id", 0)
state["last_tab_id"] = new_tab_id = evt.index
if new_tab_id > 0 and last_tab_id == 0:
return get_unique_id()
else:
return gr.update()
def compute_video_length_label(fps, current_video_length):
if fps is None:
return f"Number of frames"
else:
return f"Number of frames ({fps} frames = 1s), current duration: {(current_video_length / fps):.1f}s",
def refresh_video_length_label(state, current_video_length, force_fps, video_guide, video_source):
base_model_type = get_base_model_type(state["model_type"])
computed_fps = get_computed_fps(force_fps, base_model_type , video_guide, video_source )
return gr.update(label= compute_video_length_label(computed_fps, current_video_length))
def get_default_value(choices, current_value, default_value = None):
for label, value in choices:
if value == current_value:
return current_value
return default_value
def generate_video_tab(update_form = False, state_dict = None, ui_defaults = None, model_family = None, model_choice = None, header = None, main = None, main_tabs= None, tab_id='generate'):
global inputs_names #, advanced
if update_form:
model_filename = state_dict["model_filename"]
model_type = state_dict["model_type"]
advanced_ui = state_dict["advanced"]
else:
model_type = transformer_type
model_filename = get_model_filename(model_type, transformer_quantization, transformer_dtype_policy)
advanced_ui = advanced
ui_defaults= get_default_settings(model_type)
state_dict = {}
state_dict["model_filename"] = model_filename
state_dict["model_type"] = model_type
state_dict["advanced"] = advanced_ui
state_dict["last_model_per_family"] = server_config.get("last_model_per_family", {})
state_dict["last_resolution_per_group"] = server_config.get("last_resolution_per_group", {})
gen = dict()
gen["queue"] = []
state_dict["gen"] = gen
model_def = get_model_def(model_type)
if model_def == None: model_def = {}
base_model_type = get_base_model_type(model_type)
model_filename = get_model_filename( base_model_type )
preset_to_load = lora_preselected_preset if lora_preset_model == model_type else ""
loras, loras_names, loras_presets, default_loras_choices, default_loras_multis_str, default_lora_preset_prompt, default_lora_preset = setup_loras(model_type, None, get_lora_dir(model_type), preset_to_load, None)
state_dict["loras"] = loras
state_dict["loras_presets"] = loras_presets
state_dict["loras_names"] = loras_names
launch_prompt = ""
launch_preset = ""
launch_loras = []
launch_multis_str = ""
if update_form:
pass
if len(default_lora_preset) > 0 and lora_preset_model == model_type:
launch_preset = default_lora_preset
launch_prompt = default_lora_preset_prompt
launch_loras = default_loras_choices
launch_multis_str = default_loras_multis_str
if len(launch_preset) == 0:
launch_preset = ui_defaults.get("lset_name","")
if len(launch_prompt) == 0:
launch_prompt = ui_defaults.get("prompt","")
if len(launch_loras) == 0:
launch_multis_str = ui_defaults.get("loras_multipliers","")
activated_loras = ui_defaults.get("activated_loras",[])
if len(activated_loras) > 0:
lora_filenames = [os.path.basename(lora_path) for lora_path in loras]
activated_indices = []
for lora_file in ui_defaults["activated_loras"]:
try:
idx = lora_filenames.index(lora_file)
activated_indices.append(str(idx))
except ValueError:
print(f"Warning: Lora file {lora_file} from config not found in loras directory")
launch_loras = activated_indices
with gr.Row():
with gr.Column():
with gr.Column(visible=False, elem_id="image-modal-container") as modal_container:
with gr.Row(elem_id="image-modal-close-button-row"):
close_modal_button = gr.Button("❌", size="sm", scale=1)
modal_html_display = gr.HTML()
modal_action_input = gr.Text(elem_id="modal_action_input", visible=False)
modal_action_trigger = gr.Button(elem_id="modal_action_trigger", visible=False)
with gr.Row(visible= True): #len(loras)>0) as presets_column:
lset_choices = compute_lset_choices(loras_presets) + [(get_new_preset_msg(advanced_ui), "")]
with gr.Column(scale=6):
lset_name = gr.Dropdown(show_label=False, allow_custom_value= True, scale=5, filterable=True, choices= lset_choices, value=launch_preset)
with gr.Column(scale=1):
with gr.Row(height=17):
apply_lset_btn = gr.Button("Apply", size="sm", min_width= 1)
refresh_lora_btn = gr.Button("Refresh", size="sm", min_width= 1, visible=advanced_ui or not only_allow_edit_in_advanced)
if len(launch_preset) == 0 :
lset_type = 2
else:
lset_type = 1 if launch_preset.endswith(".lset") else 2
save_lset_prompt_drop= gr.Dropdown(
choices=[
# ("Save Loras & Only Prompt Comments", 0),
("Save Only Loras & Full Prompt", 1),
("Save All the Settings", 2)
], show_label= False, container=False, value = lset_type, visible= False
)
with gr.Row(height=17, visible=False) as refresh2_row:
refresh_lora_btn2 = gr.Button("Refresh", size="sm", min_width= 1)
with gr.Row(height=17, visible=advanced_ui or not only_allow_edit_in_advanced) as preset_buttons_rows:
confirm_save_lset_btn = gr.Button("Go Ahead Save it !", size="sm", min_width= 1, visible=False)
confirm_delete_lset_btn = gr.Button("Go Ahead Delete it !", size="sm", min_width= 1, visible=False)
save_lset_btn = gr.Button("Save", size="sm", min_width= 1, visible = True)
delete_lset_btn = gr.Button("Delete", size="sm", min_width= 1, visible = True)
cancel_lset_btn = gr.Button("Don't do it !", size="sm", min_width= 1 , visible=False)
#confirm_save_lset_btn, confirm_delete_lset_btn, save_lset_btn, delete_lset_btn, cancel_lset_btn
if not update_form:
state = gr.State(state_dict)
trigger_refresh_input_type = gr.Text(interactive= False, visible= False)
t2v = base_model_type in ["t2v"]
t2v_1_3B = base_model_type in ["t2v_1.3B"]
flf2v = base_model_type == "flf2v_720p"
base_model_family = get_model_family(base_model_type)
diffusion_forcing = "diffusion_forcing" in model_filename
ltxv = "ltxv" in model_filename
lock_inference_steps = model_def.get("lock_inference_steps", False)
any_tea_cache = model_def.get("tea_cache", False)
any_mag_cache = model_def.get("mag_cache", False)
recammaster = base_model_type in ["recam_1.3B"]
vace = test_vace_module(base_model_type)
phantom = base_model_type in ["phantom_1.3B", "phantom_14B"]
fantasy = base_model_type in ["fantasy"]
multitalk = model_def.get("multitalk_class", False)
standin = model_def.get("standin_class", False)
infinitetalk = base_model_type in ["infinitetalk"]
hunyuan_t2v = "hunyuan_video_720" in model_filename
hunyuan_i2v = "hunyuan_video_i2v" in model_filename
hunyuan_video_custom = "hunyuan_video_custom" in model_filename
hunyuan_video_custom = base_model_type in ["hunyuan_custom", "hunyuan_custom_audio", "hunyuan_custom_edit"]
hunyuan_video_custom_audio = base_model_type in ["hunyuan_custom_audio"]
hunyuan_video_custom_edit = base_model_type in ["hunyuan_custom_edit"]
hunyuan_video_avatar = "hunyuan_video_avatar" in model_filename
flux = base_model_family in ["flux"]
qwen = base_model_family in ["qwen"]
image_outputs = model_def.get("image_outputs", False)
sliding_window_enabled = test_any_sliding_window(model_type)
multi_prompts_gen_type_value = ui_defaults.get("multi_prompts_gen_type_value",0)
prompt_label, wizard_prompt_label = get_prompt_labels(multi_prompts_gen_type_value, image_outputs)
any_video_source = False
fps = get_model_fps(base_model_type)
image_prompt_type_value = ""
video_prompt_type_value = ""
any_start_image = any_end_image = any_reference_image = any_image_mask = False
v2i_switch_supported = (vace or t2v or standin) and not image_outputs
ti2v_2_2 = base_model_type in ["ti2v_2_2"]
gallery_height = 350
def get_image_gallery(label ="", value = None, single_image_mode = False, visible = False ):
with gr.Row(visible = visible) as gallery_row:
gallery_amg = AdvancedMediaGallery(media_mode="image", height=gallery_height, columns=4, label=label, initial = value , single_image_mode = single_image_mode )
gallery_amg.mount(update_form=update_form)
return gallery_row, gallery_amg.gallery, [gallery_row] + gallery_amg.get_toggable_elements()
image_mode_value = ui_defaults.get("image_mode", 1 if image_outputs else 0 )
if not v2i_switch_supported and not image_outputs:
image_mode_value = 0
else:
image_outputs = image_mode_value > 0
inpaint_support = model_def.get("inpaint_support", False)
image_mode = gr.Number(value =image_mode_value, visible = False)
image_mode_tab_selected= "t2i" if image_mode_value == 1 else ("inpaint" if image_mode_value == 2 else "t2v")
with gr.Tabs(visible = v2i_switch_supported or inpaint_support, selected= image_mode_tab_selected ) as image_mode_tabs:
with gr.Tab("Text to Video", id = "t2v", elem_classes="compact_tab", visible = v2i_switch_supported) as tab_t2v:
pass
with gr.Tab("Text to Image", id = "t2i", elem_classes="compact_tab"):
pass
with gr.Tab("Image Inpainting", id = "inpaint", elem_classes="compact_tab", visible=inpaint_support) as tab_inpaint:
pass
image_prompt_types_allowed = model_def.get("image_prompt_types_allowed", "")
model_mode_choices = model_def.get("model_modes", None)
model_modes_visibility = [0,1,2]
if model_mode_choices is not None: model_modes_visibility= model_mode_choices.get("image_modes", model_modes_visibility)
with gr.Column(visible= image_mode_value == 0 and len(image_prompt_types_allowed)> 0 or model_mode_choices is not None and image_mode_value in model_modes_visibility ) as image_prompt_column:
# Video Continue / Start Frame / End Frame
image_prompt_type_value= ui_defaults.get("image_prompt_type","")
image_prompt_type = gr.Text(value= image_prompt_type_value, visible= False)
image_prompt_type_choices = []
if "T" in image_prompt_types_allowed:
image_prompt_type_choices += [("Text Prompt Only" if "S" in image_prompt_types_allowed else "New Video", "")]
if "S" in image_prompt_types_allowed:
image_prompt_type_choices += [("Start Video with Image", "S")]
any_start_image = True
if "V" in image_prompt_types_allowed:
any_video_source = True
image_prompt_type_choices += [("Continue Video", "V")]
if "L" in image_prompt_types_allowed:
any_video_source = True
image_prompt_type_choices += [("Continue Last Video", "L")]
with gr.Group(visible= len(image_prompt_types_allowed)>1 and image_mode_value == 0) as image_prompt_type_group:
with gr.Row():
image_prompt_type_radio_allowed_values= filter_letters(image_prompt_types_allowed, "SVL")
image_prompt_type_radio_value = filter_letters(image_prompt_type_value, image_prompt_type_radio_allowed_values, image_prompt_type_choices[0][1] if len(image_prompt_type_choices) > 0 else "")
if len(image_prompt_type_choices) > 0:
image_prompt_type_radio = gr.Radio( image_prompt_type_choices, value = image_prompt_type_radio_value, label="Location", show_label= False, visible= len(image_prompt_types_allowed)>1, scale= 3)
else:
image_prompt_type_radio = gr.Radio(choices=[("", "")], value="", visible= False)
if "E" in image_prompt_types_allowed:
image_prompt_type_endcheckbox = gr.Checkbox( value ="E" in image_prompt_type_value, label="End Image(s)", show_label= False, visible= any_letters(image_prompt_type_radio_value, "SVL") and not image_outputs , scale= 1)
any_end_image = True
else:
image_prompt_type_endcheckbox = gr.Checkbox( value =False, show_label= False, visible= False , scale= 1)
image_start_row, image_start, image_start_extra = get_image_gallery(label= "Images as starting points for new Videos in the Generation Queue", value = ui_defaults.get("image_start", None), visible= "S" in image_prompt_type_value )
video_source = gr.Video(label= "Video to Continue", height = gallery_height, visible= "V" in image_prompt_type_value, value= ui_defaults.get("video_source", None),)
image_end_row, image_end, image_end_extra = get_image_gallery(label= get_image_end_label(ui_defaults.get("multi_prompts_gen_type", 0)), value = ui_defaults.get("image_end", None), visible= any_letters(image_prompt_type_value, "SVL") and ("E" in image_prompt_type_value) )
if model_mode_choices is None or image_mode_value not in model_modes_visibility:
model_mode = gr.Dropdown(value=None, visible=False)
else:
model_mode_value = get_default_value(model_mode_choices["choices"], ui_defaults.get("model_mode", None), model_mode_choices["default"] )
model_mode = gr.Dropdown(choices=model_mode_choices["choices"], value=model_mode_value, label=model_mode_choices["label"], visible=True)
keep_frames_video_source = gr.Text(value=ui_defaults.get("keep_frames_video_source","") , visible= len(filter_letters(image_prompt_type_value, "VL"))>0 , scale = 2, label= "Truncate Video beyond this number of resampled Frames (empty=Keep All, negative truncates from End)" )
any_control_video = any_control_image = False
if image_mode_value ==2:
guide_preprocessing = { "selection": ["V", "VG"]}
mask_preprocessing = { "selection": ["A"]}
else:
guide_preprocessing = model_def.get("guide_preprocessing", None)
mask_preprocessing = model_def.get("mask_preprocessing", None)
guide_custom_choices = model_def.get("guide_custom_choices", None)
image_ref_choices = model_def.get("image_ref_choices", None)
# with gr.Column(visible= vace or phantom or hunyuan_video_custom or hunyuan_video_avatar or hunyuan_video_custom_edit or t2v or standin or ltxv or infinitetalk or recammaster or (flux or qwen ) and model_reference_image and image_mode_value >=1) as video_prompt_column:
with gr.Column(visible= guide_preprocessing is not None or mask_preprocessing is not None or guide_custom_choices is not None or image_ref_choices is not None) as video_prompt_column:
video_prompt_type_value= ui_defaults.get("video_prompt_type","")
video_prompt_type = gr.Text(value= video_prompt_type_value, visible= False)
with gr.Row(visible = image_mode_value!=2) as guide_selection_row:
# Control Video Preprocessing
if guide_preprocessing is None:
video_prompt_type_video_guide = gr.Dropdown(choices=[("","")], value="", label="Control Video", scale = 2, visible= False, show_label= True, )
else:
pose_label = "Pose" if image_outputs else "Motion"
guide_preprocessing_labels_all = {
"": "No Control Video",
"UV": "Keep Control Video Unchanged",
"PV": f"Transfer Human {pose_label}",
"DV": "Transfer Depth",
"EV": "Transfer Canny Edges",
"SV": "Transfer Shapes",
"LV": "Transfer Flow",
"CV": "Recolorize",
"MV": "Perform Inpainting",
"V": "Use Vace raw format",
"PDV": f"Transfer Human {pose_label} & Depth",
"PSV": f"Transfer Human {pose_label} & Shapes",
"PLV": f"Transfer Human {pose_label} & Flow" ,
"DSV": "Transfer Depth & Shapes",
"DLV": "Transfer Depth & Flow",
"SLV": "Transfer Shapes & Flow",
}
guide_preprocessing_choices = []
guide_preprocessing_labels = guide_preprocessing.get("labels", {})
for process_type in guide_preprocessing["selection"]:
process_label = guide_preprocessing_labels.get(process_type, None)
process_label = guide_preprocessing_labels_all.get(process_type,process_type) if process_label is None else process_label
if image_outputs: process_label = process_label.replace("Video", "Image")
guide_preprocessing_choices.append( (process_label, process_type) )
video_prompt_type_video_guide_label = guide_preprocessing.get("label", "Control Video Process")
if image_outputs: video_prompt_type_video_guide_label = video_prompt_type_video_guide_label.replace("Video", "Image")
video_prompt_type_video_guide = gr.Dropdown(
guide_preprocessing_choices,
value=filter_letters(video_prompt_type_value, all_guide_processes, guide_preprocessing.get("default", "") ),
label= video_prompt_type_video_guide_label , scale = 2, visible= guide_preprocessing.get("visible", True) , show_label= True,
)
any_control_video = True
any_control_image = image_outputs
# Alternate Control Video Preprocessing / Options
if guide_custom_choices is None:
video_prompt_type_video_guide_alt = gr.Dropdown(choices=[("","")], value="", label="Control Video", visible= False, scale = 2 )
else:
video_prompt_type_video_guide_alt_label = guide_custom_choices.get("label", "Control Video Process")
if image_outputs: video_prompt_type_video_guide_alt_label = video_prompt_type_video_guide_alt_label.replace("Video", "Image")
video_prompt_type_video_guide_alt_choices = [(label.replace("Video", "Image") if image_outputs else label, value) for label,value in guide_custom_choices["choices"] ]
guide_custom_choices_value = get_default_value(video_prompt_type_video_guide_alt_choices, filter_letters(video_prompt_type_value, guide_custom_choices["letters_filter"]), guide_custom_choices.get("default", "") )
video_prompt_type_video_guide_alt = gr.Dropdown(
choices= video_prompt_type_video_guide_alt_choices,
# value=filter_letters(video_prompt_type_value, guide_custom_choices["letters_filter"], guide_custom_choices.get("default", "") ),
value=guide_custom_choices_value,
visible = guide_custom_choices.get("visible", True),
label= video_prompt_type_video_guide_alt_label, show_label= guide_custom_choices.get("show_label", True), scale = 2
)
any_control_video = True
any_control_image = image_outputs
# Control Mask Preprocessing
if mask_preprocessing is None:
video_prompt_type_video_mask = gr.Dropdown(choices=[("","")], value="", label="Video Mask", scale = 2, visible= False, show_label= True, )
any_image_mask = image_outputs
else:
mask_preprocessing_labels_all = {
"": "Whole Frame",
"A": "Masked Area",
"NA": "Non Masked Area",
"XA": "Masked Area, rest Inpainted",
"XNA": "Non Masked Area, rest Inpainted",
"YA": "Masked Area, rest Depth",
"YNA": "Non Masked Area, rest Depth",
"WA": "Masked Area, rest Shapes",
"WNA": "Non Masked Area, rest Shapes",
"ZA": "Masked Area, rest Flow",
"ZNA": "Non Masked Area, rest Flow"
}
mask_preprocessing_choices = []
mask_preprocessing_labels = mask_preprocessing.get("labels", {})
for process_type in mask_preprocessing["selection"]:
process_label = mask_preprocessing_labels.get(process_type, None)
process_label = mask_preprocessing_labels_all.get(process_type, process_type) if process_label is None else process_label
mask_preprocessing_choices.append( (process_label, process_type) )
video_prompt_type_video_mask_label = mask_preprocessing.get("label", "Area Processed")
video_prompt_type_video_mask = gr.Dropdown(
mask_preprocessing_choices,
value=filter_letters(video_prompt_type_value, "XYZWNA", mask_preprocessing.get("default", "")),
label= video_prompt_type_video_mask_label , scale = 2, visible= "V" in video_prompt_type_value and not "U" in video_prompt_type_value and mask_preprocessing.get("visible", True),
show_label= True,
)
# Image Refs Selection
if image_ref_choices is None:
video_prompt_type_image_refs = gr.Dropdown(
# choices=[ ("None", ""),("Start", "KI"),("Ref Image", "I")],
choices=[ ("None", ""),],
value=filter_letters(video_prompt_type_value, ""),
visible = False,
label="Start / Reference Images", scale = 2
)
any_reference_image = False
else:
any_reference_image = True
video_prompt_type_image_refs = gr.Dropdown(
choices= image_ref_choices["choices"],
value=filter_letters(video_prompt_type_value, image_ref_choices["letters_filter"]),
visible = image_ref_choices.get("visible", True),
label=image_ref_choices.get("label", "Inject Reference Images"), show_label= True, scale = 2
)
image_guide = gr.Image(label= "Control Image", height = 800, type ="pil", visible= image_mode_value==1 and "V" in video_prompt_type_value and ("U" in video_prompt_type_value or not "A" in video_prompt_type_value ) , value= ui_defaults.get("image_guide", None))
video_guide = gr.Video(label= "Control Video", height = gallery_height, visible= (not image_outputs) and "V" in video_prompt_type_value, value= ui_defaults.get("video_guide", None))
if image_mode_value >= 1:
image_guide_value = ui_defaults.get("image_guide", None)
image_mask_value = ui_defaults.get("image_mask", None)
if image_guide_value is None:
image_mask_guide_value = None
else:
image_mask_guide_value = { "background" : image_guide_value, "composite" : None}
image_mask_guide_value["layers"] = [] if image_mask_value is None else [rgb_bw_to_rgba_mask(image_mask_value)]
image_mask_guide = gr.ImageEditor(
label="Control Image to be Inpainted" if image_mode_value == 2 else "Control Image and Mask",
value = image_mask_guide_value,
type='pil',
sources=["upload", "webcam"],
image_mode='RGB',
layers=False,
brush=gr.Brush(colors=["#FFFFFF"], color_mode="fixed"),
# fixed_canvas= True,
# width=800,
height=800,
# transforms=None,
# interactive=True,
elem_id="img_editor",
visible= "V" in video_prompt_type_value and "A" in video_prompt_type_value and not "U" in video_prompt_type_value
)
any_control_image = True
else:
image_mask_guide = gr.ImageEditor(value = None, visible = False, elem_id="img_editor")
denoising_strength = gr.Slider(0, 1, value= ui_defaults.get("denoising_strength" ,0.5), step=0.01, label=f"Denoising Strength (the Lower the Closer to the Control {'Image' if image_outputs else 'Video'})", visible = "G" in video_prompt_type_value, show_reset_button= False)
keep_frames_video_guide_visible = not image_outputs and "V" in video_prompt_type_value and not model_def.get("keep_frames_video_guide_not_supported", False)
keep_frames_video_guide = gr.Text(value=ui_defaults.get("keep_frames_video_guide","") , visible= keep_frames_video_guide_visible , scale = 2, label= "Frames to keep in Control Video (empty=All, 1=first, a:b for a range, space to separate values)" ) #, -1=last
video_guide_outpainting_modes = model_def.get("video_guide_outpainting", [])
with gr.Column(visible= ("V" in video_prompt_type_value or "K" in video_prompt_type_value or "F" in video_prompt_type_value) and image_mode_value in video_guide_outpainting_modes) as video_guide_outpainting_col:
video_guide_outpainting_value = ui_defaults.get("video_guide_outpainting","#")
video_guide_outpainting = gr.Text(value=video_guide_outpainting_value , visible= False)
with gr.Group():
video_guide_outpainting_checkbox = gr.Checkbox(label="Enable Spatial Outpainting on Control Video, Landscape or Positioned Reference Frames" if image_mode_value == 0 else "Enable Spatial Outpainting on Control Image", value=len(video_guide_outpainting_value)>0 and not video_guide_outpainting_value.startswith("#") )
with gr.Row(visible = not video_guide_outpainting_value.startswith("#")) as video_guide_outpainting_row:
video_guide_outpainting_value = video_guide_outpainting_value[1:] if video_guide_outpainting_value.startswith("#") else video_guide_outpainting_value
video_guide_outpainting_list = [0] * 4 if len(video_guide_outpainting_value) == 0 else [int(v) for v in video_guide_outpainting_value.split(" ")]
video_guide_outpainting_top= gr.Slider(0, 100, value= video_guide_outpainting_list[0], step=5, label="Top %", show_reset_button= False)
video_guide_outpainting_bottom = gr.Slider(0, 100, value= video_guide_outpainting_list[1], step=5, label="Bottom %", show_reset_button= False)
video_guide_outpainting_left = gr.Slider(0, 100, value= video_guide_outpainting_list[2], step=5, label="Left %", show_reset_button= False)
video_guide_outpainting_right = gr.Slider(0, 100, value= video_guide_outpainting_list[3], step=5, label="Right %", show_reset_button= False)
# image_mask = gr.Image(label= "Image Mask Area (for Inpainting, white = Control Area, black = Unchanged)", type ="pil", visible= image_mode_value==1 and "V" in video_prompt_type_value and "A" in video_prompt_type_value and not "U" in video_prompt_type_value , height = gallery_height, value= ui_defaults.get("image_mask", None))
image_mask = gr.Image(label= "Image Mask Area (for Inpainting, white = Control Area, black = Unchanged)", type ="pil", visible= False, height = gallery_height, value= ui_defaults.get("image_mask", None))
video_mask = gr.Video(label= "Video Mask Area (for Inpainting, white = Control Area, black = Unchanged)", visible= (not image_outputs) and "V" in video_prompt_type_value and "A" in video_prompt_type_value and not "U" in video_prompt_type_value , height = gallery_height, value= ui_defaults.get("video_mask", None))
mask_expand = gr.Slider(-10, 50, value=ui_defaults.get("mask_expand", 0), step=1, label="Expand / Shrink Mask Area", visible= "V" in video_prompt_type_value and "A" in video_prompt_type_value and not "U" in video_prompt_type_value )
image_refs_single_image_mode = model_def.get("one_image_ref_needed", False)
image_refs_label = "Start Image" if hunyuan_video_avatar else ("Reference Image" if image_refs_single_image_mode else "Reference Images") + (" (each Image will be associated to a Sliding Window)" if infinitetalk else "")
image_refs_row, image_refs, image_refs_extra = get_image_gallery(label= image_refs_label, value = ui_defaults.get("image_refs", None), visible= "I" in video_prompt_type_value, single_image_mode=image_refs_single_image_mode)
frames_positions = gr.Text(value=ui_defaults.get("frames_positions","") , visible= "F" in video_prompt_type_value, scale = 2, label= "Positions of Injected Frames (1=first, L=last of a window) no position for other Image Refs)" )
image_refs_relative_size = gr.Slider(20, 100, value=ui_defaults.get("image_refs_relative_size", 50), step=1, label="Rescale Internaly Image Ref (% in relation to Output Video) to change Output Composition", visible = model_def.get("any_image_refs_relative_size", False) and image_outputs)
no_background_removal = model_def.get("no_background_removal", False) or image_ref_choices is None
background_removal_label = model_def.get("background_removal_label", "Remove Background behind People / Objects")
remove_background_images_ref = gr.Dropdown(
choices=[
("Keep Backgrounds behind all Reference Images", 0),
(background_removal_label, 1),
],
value=0 if no_background_removal else ui_defaults.get("remove_background_images_ref",1),
label="Automatic Removal of Background behind People or Objects in Reference Images", scale = 3, visible= "I" in video_prompt_type_value and not no_background_removal
)
any_audio_voices_support = any_audio_track(base_model_type)
audio_prompt_type_value = ui_defaults.get("audio_prompt_type", "A" if any_audio_voices_support else "")
audio_prompt_type = gr.Text(value= audio_prompt_type_value, visible= False)
if any_audio_voices_support:
any_single_speaker = not model_def.get("multi_speakers_only", False)
if not any_single_speaker and "A" in audio_prompt_type_value and not ("B" in audio_prompt_type_value or "X" in audio_prompt_type_value): audio_prompt_type_value = del_in_sequence(audio_prompt_type_value, "XCPAB")
any_multi_speakers = not model_def.get("one_speaker_only", False)
if not any_multi_speakers: audio_prompt_type_value = del_in_sequence(audio_prompt_type_value, "XCPB")
speaker_choices=[("None", "")]
if any_single_speaker: speaker_choices += [("One Person Speaking Only", "A")]
if any_multi_speakers:speaker_choices += [
("Two speakers, Auto Separation of Speakers (will work only if Voices are distinct)", "XA"),
("Two speakers, Speakers Audio sources are assumed to be played in a Row", "CAB"),
("Two speakers, Speakers Audio sources are assumed to be played in Parallel", "PAB")
]
audio_prompt_type_sources = gr.Dropdown(
choices=speaker_choices,
value= filter_letters(audio_prompt_type_value, "XCPAB"),
label="Voices", scale = 3, visible = multitalk and not image_outputs
)
else:
audio_prompt_type_sources = gr.Dropdown( choices= [""], value = "", visible=False)
with gr.Row(visible = any_audio_voices_support and not image_outputs) as audio_guide_row:
audio_guide = gr.Audio(value= ui_defaults.get("audio_guide", None), type="filepath", label="Voice to follow", show_download_button= True, visible= any_audio_voices_support and "A" in audio_prompt_type_value )
audio_guide2 = gr.Audio(value= ui_defaults.get("audio_guide2", None), type="filepath", label="Voice to follow #2", show_download_button= True, visible= any_audio_voices_support and "B" in audio_prompt_type_value )
remove_background_sound = gr.Checkbox(label="Video Motion ignores Background Music (to get a better LipSync)", value="V" in audio_prompt_type_value, visible = any_audio_voices_support and any_letters(audio_prompt_type_value, "ABX") and not image_outputs)
with gr.Row(visible = any_audio_voices_support and ("B" in audio_prompt_type_value or "X" in audio_prompt_type_value) and not image_outputs ) as speakers_locations_row:
speakers_locations = gr.Text( ui_defaults.get("speakers_locations", "0:45 55:100"), label="Speakers Locations separated by a Space. Each Location = Left:Right or a BBox Left:Top:Right:Bottom", visible= True)
advanced_prompt = advanced_ui
prompt_vars=[]
if advanced_prompt:
default_wizard_prompt, variables, values= None, None, None
else:
default_wizard_prompt, variables, values, errors = extract_wizard_prompt(launch_prompt)
advanced_prompt = len(errors) > 0
with gr.Column(visible= advanced_prompt) as prompt_column_advanced:
prompt = gr.Textbox( visible= advanced_prompt, label=prompt_label, value=launch_prompt, lines=3)
with gr.Column(visible=not advanced_prompt and len(variables) > 0) as prompt_column_wizard_vars:
gr.Markdown("Please fill the following input fields to adapt automatically the Prompt:")
wizard_prompt_activated = "off"
wizard_variables = ""
with gr.Row():
if not advanced_prompt:
for variable in variables:
value = values.get(variable, "")
prompt_vars.append(gr.Textbox( placeholder=variable, min_width=80, show_label= False, info= variable, visible= True, value= "\n".join(value) ))
wizard_prompt_activated = "on"
if len(variables) > 0:
wizard_variables = "\n".join(variables)
for _ in range( PROMPT_VARS_MAX - len(prompt_vars)):
prompt_vars.append(gr.Textbox(visible= False, min_width=80, show_label= False))
with gr.Column(visible=not advanced_prompt) as prompt_column_wizard:
wizard_prompt = gr.Textbox(visible = not advanced_prompt, label=wizard_prompt_label, value=default_wizard_prompt, lines=3)
wizard_prompt_activated_var = gr.Text(wizard_prompt_activated, visible= False)
wizard_variables_var = gr.Text(wizard_variables, visible = False)
with gr.Row(visible= server_config.get("enhancer_enabled", 0) > 0 ) as prompt_enhancer_row:
on_demand_prompt_enhancer = server_config.get("enhancer_mode", 0) == 1
prompt_enhancer_value = ui_defaults.get("prompt_enhancer", "")
if len(prompt_enhancer_value) == 0 and on_demand_prompt_enhancer: prompt_enhancer_value = "T"
prompt_enhancer_btn = gr.Button( value ="Enhance Prompt", visible= on_demand_prompt_enhancer, size="lg", elem_classes="btn_centered")
prompt_enhancer = gr.Dropdown(
choices=
([] if on_demand_prompt_enhancer else [("Disabled", "")]) +
[("Based on Text Prompt Content", "T"),
("Based on Images Prompts Content (such as Start Image and Reference Images)", "I"),
("Based on both Text Prompt and Images Prompts Content", "TI"),
],
value=prompt_enhancer_value,
label="Enhance Prompt using a LLM", scale = 5,
visible= True, show_label= not on_demand_prompt_enhancer,
)
with gr.Row():
fit_canvas = server_config.get("fit_canvas", 0)
if fit_canvas == 1:
label = "Outer Box Resolution (one dimension may be less to preserve video W/H ratio)"
elif fit_canvas == 2:
label = "Output Resolution (Input Images wil be Cropped if the W/H ratio is different)"
else:
label = "Resolution Budget (Pixels will be reallocated to preserve Inputs W/H ratio)"
current_resolution_choice = ui_defaults.get("resolution","832x480") if update_form or last_resolution is None else last_resolution
model_resolutions = model_def.get("resolutions", None)
resolution_choices, current_resolution_choice = get_resolution_choices(current_resolution_choice, model_resolutions)
available_groups, selected_group_resolutions, selected_group = group_resolutions(model_def,resolution_choices, current_resolution_choice)
resolution_group = gr.Dropdown(
choices = available_groups,
value= selected_group,
label= "Category"
)
resolution = gr.Dropdown(
choices = selected_group_resolutions,
value= current_resolution_choice,
label= label,
scale = 5
)
with gr.Row():
batch_size = gr.Slider(1, 16, value=ui_defaults.get("batch_size", 1), step=1, label="Number of Images to Generate", visible = image_outputs)
if image_outputs:
video_length = gr.Slider(1, 9999, value=ui_defaults.get("video_length", 1), step=1, label="Number of frames", visible = False)
elif recammaster:
video_length = gr.Slider(5, 193, value=ui_defaults.get("video_length", get_max_frames(81)), step=4, label="Number of frames (16 = 1s), locked", interactive= False, visible = True)
else:
min_frames, frames_step, _ = get_model_min_frames_and_step(base_model_type)
current_video_length = ui_defaults.get("video_length", 81 if get_model_family(base_model_type)=="wan" else 97)
computed_fps = get_computed_fps(ui_defaults.get("force_fps",""), base_model_type , ui_defaults.get("video_guide", None), ui_defaults.get("video_source", None))
video_length = gr.Slider(min_frames, get_max_frames(737 if test_any_sliding_window(base_model_type) else 337), value=current_video_length,
step=frames_step, label=compute_video_length_label(computed_fps, current_video_length) , visible = True, interactive= True)
with gr.Row(visible = not lock_inference_steps) as inference_steps_row:
num_inference_steps = gr.Slider(1, 100, value=ui_defaults.get("num_inference_steps",30), step=1, label="Number of Inference Steps", visible = True)
show_advanced = gr.Checkbox(label="Advanced Mode", value=advanced_ui)
with gr.Tabs(visible=advanced_ui) as advanced_row:
guidance_max_phases = model_def.get("guidance_max_phases", 0)
no_negative_prompt = model_def.get("no_negative_prompt", False)
any_audio_guidance = fantasy or multitalk
with gr.Tab("General"):
with gr.Column():
with gr.Row():
seed = gr.Slider(-1, 999999999, value=ui_defaults.get("seed",-1), step=1, label="Seed (-1 for random)", scale=2)
guidance_phases_value = ui_defaults.get("guidance_phases", 1)
guidance_phases = gr.Dropdown(
choices=[
("One Phase", 1),
("Two Phases", 2),
("Three Phases", 3)],
value= guidance_phases_value,
label="Guidance Phases",
visible= guidance_max_phases >=2,
interactive = not model_def.get("lock_guidance_phases", False)
)
with gr.Row(visible = guidance_phases_value >=2 ) as guidance_phases_row:
multiple_submodels = model_def.get("multiple_submodels", False)
model_switch_phase = gr.Dropdown(
choices=[
("Phase 1-2 transition", 1),
("Phase 2-3 transition", 2)],
value=ui_defaults.get("model_switch_phase", 1),
label="Model Switch",
visible= model_def.get("multiple_submodels", False) and guidance_phases_value >= 3 and multiple_submodels
)
label ="Phase 1-2" if guidance_phases_value ==3 else ( "Model / Guidance Switch Threshold" if multiple_submodels else "Guidance Switch Threshold" )
switch_threshold = gr.Slider(0, 1000, value=ui_defaults.get("switch_threshold",0), step=1, label = label, visible= guidance_max_phases >= 2 and guidance_phases_value >= 2)
switch_threshold2 = gr.Slider(0, 1000, value=ui_defaults.get("switch_threshold2",0), step=1, label="Phase 2-3", visible= guidance_max_phases >= 3 and guidance_phases_value >= 3)
with gr.Row(visible = guidance_max_phases >=1 ) as guidance_row:
guidance_scale = gr.Slider(1.0, 20.0, value=ui_defaults.get("guidance_scale",5), step=0.5, label="Guidance (CFG)", visible=guidance_max_phases >=1 )
guidance2_scale = gr.Slider(1.0, 20.0, value=ui_defaults.get("guidance2_scale",5), step=0.5, label="Guidance2 (CFG)", visible= guidance_max_phases >=2 and guidance_phases_value >= 2)
guidance3_scale = gr.Slider(1.0, 20.0, value=ui_defaults.get("guidance3_scale",5), step=0.5, label="Guidance3 (CFG)", visible= guidance_max_phases >=3 and guidance_phases_value >= 3)
any_embedded_guidance = model_def.get("embedded_guidance", False)
with gr.Row(visible =any_embedded_guidance or any_audio_guidance) as embedded_guidance_row:
audio_guidance_scale = gr.Slider(1.0, 20.0, value=ui_defaults.get("audio_guidance_scale", 4), step=0.5, label="Audio Guidance", visible= any_audio_guidance )
embedded_guidance_scale = gr.Slider(1.0, 20.0, value=ui_defaults.get("embedded_guidance", 6.0), step=0.5, label="Embedded Guidance Scale", visible=any_embedded_guidance )
sample_solver_choices = model_def.get("sample_solvers", None)
with gr.Row(visible = sample_solver_choices is not None or not image_outputs) as sample_solver_row:
if sample_solver_choices is None:
sample_solver = gr.Dropdown( value="", choices=[ ("", ""), ], visible= False, label= "Sampler Solver / Scheduler" )
else:
sample_solver = gr.Dropdown( value=ui_defaults.get("sample_solver", sample_solver_choices[0][1]),
choices= sample_solver_choices, visible= True, label= "Sampler Solver / Scheduler"
)
flow_shift = gr.Slider(1.0, 25.0, value=ui_defaults.get("flow_shift",3), step=0.1, label="Shift Scale", visible = not image_outputs)
with gr.Row(visible = vace) as control_net_weights_row:
control_net_weight = gr.Slider(0.0, 2.0, value=ui_defaults.get("control_net_weight",1), step=0.1, label="Control Net Weight #1", visible=vace)
control_net_weight2 = gr.Slider(0.0, 2.0, value=ui_defaults.get("control_net_weight2",1), step=0.1, label="Control Net Weight #2", visible=vace)
negative_prompt = gr.Textbox(label="Negative Prompt (ignored if no Guidance that is if CFG = 1)", value=ui_defaults.get("negative_prompt", ""), visible = not (hunyuan_t2v or hunyuan_i2v or no_negative_prompt) )
with gr.Column(visible = vace or t2v or test_class_i2v(model_type)) as NAG_col:
gr.Markdown("NAG enforces Negative Prompt even if no Guidance is set (CFG = 1), set NAG Scale to > 1 to enable it")
with gr.Row():
NAG_scale = gr.Slider(1.0, 20.0, value=ui_defaults.get("NAG_scale",1), step=0.1, label="NAG Scale", visible = True)
NAG_tau = gr.Slider(1.0, 5.0, value=ui_defaults.get("NAG_tau",3.5), step=0.1, label="NAG Tau", visible = True)
NAG_alpha = gr.Slider(0.0, 2.0, value=ui_defaults.get("NAG_alpha",.5), step=0.1, label="NAG Alpha", visible = True)
with gr.Row():
repeat_generation = gr.Slider(1, 25.0, value=ui_defaults.get("repeat_generation",1), step=1, label="Num. of Generated Videos per Prompt", visible = not image_outputs)
multi_images_gen_type = gr.Dropdown( value=ui_defaults.get("multi_images_gen_type",0),
choices=[
("Generate every combination of images and texts", 0),
("Match images and text prompts", 1),
], visible= test_class_i2v(model_type), label= "Multiple Images as Texts Prompts"
)
with gr.Tab("Loras"):
with gr.Column(visible = True): #as loras_column:
gr.Markdown("Loras can be used to create special effects on the video by mentioning a trigger word in the Prompt. You can save Loras combinations in presets.")
loras_choices = gr.Dropdown(
choices=[
(lora_name, str(i) ) for i, lora_name in enumerate(loras_names)
],
value= launch_loras,
multiselect= True,
label="Activated Loras"
)
loras_multipliers = gr.Textbox(label="Loras Multipliers (1.0 by default) separated by Space chars or CR, lines that start with # are ignored", value=launch_multis_str)
with gr.Tab("Steps Skipping", visible = any_tea_cache or any_mag_cache) as speed_tab:
with gr.Column():
gr.Markdown("Tea Cache and Mag Cache accelerate the Video Generation by skipping intelligently some steps, the more steps are skipped the lower the quality of the video.")
gr.Markdown("Steps Skipping consumes also VRAM. It is recommended not to skip at least the first 10% steps.")
steps_skipping_choices = [("None", "")]
if any_tea_cache: steps_skipping_choices += [("Tea Cache", "tea")]
if any_mag_cache: steps_skipping_choices += [("Mag Cache", "mag")]
skip_steps_cache_type = gr.Dropdown(
choices= steps_skipping_choices,
value="" if not (any_tea_cache or any_mag_cache) else ui_defaults.get("skip_steps_cache_type",""),
visible=True,
label="Skip Steps Cache Type"
)
skip_steps_multiplier = gr.Dropdown(
choices=[
("around x1.5 speed up", 1.5),
("around x1.75 speed up", 1.75),
("around x2 speed up", 2.0),
("around x2.25 speed up", 2.25),
("around x2.5 speed up", 2.5),
],
value=float(ui_defaults.get("skip_steps_multiplier",1.75)),
visible=True,
label="Skip Steps Cache Global Acceleration"
)
skip_steps_start_step_perc = gr.Slider(0, 100, value=ui_defaults.get("skip_steps_start_step_perc",0), step=1, label="Skip Steps starting moment in % of generation")
with gr.Tab("Post Processing"):
with gr.Column():
gr.Markdown("Upsampling - postprocessing that may improve fluidity and the size of the video")
def gen_upsampling_dropdowns(temporal_upsampling, spatial_upsampling , film_grain_intensity, film_grain_saturation, element_class= None, max_height= None, image_outputs = False):
temporal_upsampling = gr.Dropdown(
choices=[
("Disabled", ""),
("Rife x2 frames/s", "rife2"),
("Rife x4 frames/s", "rife4"),
],
value=temporal_upsampling,
visible=not image_outputs,
scale = 1,
label="Temporal Upsampling",
elem_classes= element_class
# max_height = max_height
)
spatial_upsampling = gr.Dropdown(
choices=[
("Disabled", ""),
("Lanczos x1.5", "lanczos1.5"),
("Lanczos x2.0", "lanczos2"),
],
value=spatial_upsampling,
visible=True,
scale = 1,
label="Spatial Upsampling",
elem_classes= element_class
# max_height = max_height
)
with gr.Row():
film_grain_intensity = gr.Slider(0, 1, value=film_grain_intensity, step=0.01, label="Film Grain Intensity (0 = disabled)")
film_grain_saturation = gr.Slider(0.0, 1, value=film_grain_saturation, step=0.01, label="Film Grain Saturation")
return temporal_upsampling, spatial_upsampling, film_grain_intensity, film_grain_saturation
temporal_upsampling, spatial_upsampling, film_grain_intensity, film_grain_saturation = gen_upsampling_dropdowns(ui_defaults.get("temporal_upsampling", ""), ui_defaults.get("spatial_upsampling", ""), ui_defaults.get("film_grain_intensity", 0), ui_defaults.get("film_grain_saturation", 0.5), image_outputs= image_outputs)
with gr.Tab("Audio", visible = not image_outputs) as audio_tab:
with gr.Column(visible = server_config.get("mmaudio_enabled", 0) != 0) as mmaudio_col:
gr.Markdown("Add a soundtrack based on the content of the Generated Video")
with gr.Row():
MMAudio_setting = gr.Dropdown(
choices=[("Disabled", 0), ("Enabled", 1), ],
value=ui_defaults.get("MMAudio_setting", 0), visible=True, scale = 1, label="MMAudio",
)
# if MMAudio_seed != None:
# MMAudio_seed = gr.Slider(-1, 999999999, value=MMAudio_seed, step=1, scale=3, label="Seed (-1 for random)")
with gr.Row():
MMAudio_prompt = gr.Text(ui_defaults.get("MMAudio_prompt", ""), label="Prompt (1 or 2 keywords)")
MMAudio_neg_prompt = gr.Text(ui_defaults.get("MMAudio_neg_prompt", ""), label="Negative Prompt (1 or 2 keywords)")
with gr.Column(visible = any_control_video) as audio_prompt_type_remux_row:
gr.Markdown("You may transfer the existing audio tracks of a Control Video")
audio_prompt_type_remux = gr.Dropdown(
choices=[
("No Remux", ""),
("Remux Audio Files from Control Video if any and if no MMAudio / Custom Soundtrack", "R"),
],
value=filter_letters(audio_prompt_type_value, "R"),
label="Remux Audio Files",
visible = True
)
with gr.Column():
gr.Markdown("Add Custom Soundtrack to Video")
audio_source = gr.Audio(value= ui_defaults.get("audio_source", None), type="filepath", label="Soundtrack", show_download_button= True)
any_skip_layer_guidance = model_def.get("skip_layer_guidance", False)
any_cfg_zero = model_def.get("cfg_zero", False)
any_cfg_star = model_def.get("cfg_star", False)
any_apg = model_def.get("adaptive_projected_guidance", False)
with gr.Tab("Quality", visible = vace and image_outputs or any_skip_layer_guidance or any_cfg_zero or any_cfg_star or any_apg ) as quality_tab:
with gr.Column(visible = any_skip_layer_guidance ) as skip_layer_guidance_row:
gr.Markdown("Skip Layer Guidance (improves video quality, requires guidance > 1)")
with gr.Row():
slg_switch = gr.Dropdown(
choices=[
("OFF", 0),
("ON", 1),
],
value=ui_defaults.get("slg_switch",0),
visible=True,
scale = 1,
label="Skip Layer guidance"
)
slg_layers = gr.Dropdown(
choices=[
(str(i), i ) for i in range(40)
],
value=ui_defaults.get("slg_layers", [9]),
multiselect= True,
label="Skip Layers",
scale= 3
)
with gr.Row():
slg_start_perc = gr.Slider(0, 100, value=ui_defaults.get("slg_start_perc",10), step=1, label="Denoising Steps % start")
slg_end_perc = gr.Slider(0, 100, value=ui_defaults.get("slg_end_perc",90), step=1, label="Denoising Steps % end")
with gr.Column(visible= any_apg ) as apg_col:
gr.Markdown("Correct Progressive Color Saturation during long Video Generations")
apg_switch = gr.Dropdown(
choices=[
("OFF", 0),
("ON", 1),
],
value=ui_defaults.get("apg_switch",0),
visible=True,
scale = 1,
label="Adaptive Projected Guidance (requires Guidance > 1 or Audio Guidance > 1) " if multitalk else "Adaptive Projected Guidance (requires Guidance > 1)",
)
with gr.Column(visible = any_cfg_star) as cfg_free_guidance_col:
gr.Markdown("Classifier-Free Guidance Zero Star, better adherence to Text Prompt")
cfg_star_switch = gr.Dropdown(
choices=[
("OFF", 0),
("ON", 1),
],
value=ui_defaults.get("cfg_star_switch",0),
visible=True,
scale = 1,
label="Classifier-Free Guidance Star (requires Guidance > 1)"
)
with gr.Row():
cfg_zero_step = gr.Slider(-1, 39, value=ui_defaults.get("cfg_zero_step",-1), step=1, label="CFG Zero below this Layer (Extra Process)", visible = any_cfg_zero)
with gr.Column(visible = (vace or t2v or standin) and image_outputs) as min_frames_if_references_col:
gr.Markdown("Generating a single Frame alone may not be sufficient to preserve Reference Image Identity / Control Image Information or simply to get a good Image Quality. A workaround is to generate a short Video and keep the First Frame.")
min_frames_if_references = gr.Dropdown(
choices=[
("Disabled, generate only one Frame", 1),
("Generate a 5 Frames long Video only if any Reference Image / Control Image (x1.5 slower)",5),
("Generate a 9 Frames long Video only if any Reference Image / Control Image (x2.0 slower)",9),
("Generate a 13 Frames long Video only if any Reference Image / Control Image (x2.5 slower)",13),
("Generate a 17 Frames long Video only if any Reference Image / Control Image (x3.0 slower)",17),
("Generate always a 5 Frames long Video (x1.5 slower)",1005),
("Generate always a 9 Frames long Video (x2.0 slower)",1009),
("Generate always a 13 Frames long Video (x2.5 slower)",1013),
("Generate always a 17 Frames long Video (x3.0 slower)",1017),
],
value=ui_defaults.get("min_frames_if_references",9 if vace else 1),
visible=True,
scale = 1,
label="Generate more frames to preserve Reference Image Identity / Control Image Information or improve"
)
with gr.Tab("Sliding Window", visible= sliding_window_enabled and not image_outputs) as sliding_window_tab:
with gr.Column():
gr.Markdown("A Sliding Window allows you to generate video with a duration not limited by the Model")
gr.Markdown("It is automatically turned on if the number of frames to generate is higher than the Window Size")
if diffusion_forcing:
sliding_window_size = gr.Slider(37, get_max_frames(257), value=ui_defaults.get("sliding_window_size", 129), step=20, label=" (recommended to keep it at 97)")
sliding_window_overlap = gr.Slider(17, 97, value=ui_defaults.get("sliding_window_overlap",17), step=20, label="Windows Frames Overlap (needed to maintain continuity between windows, a higher value will require more windows)")
sliding_window_color_correction_strength = gr.Slider(0, 1, visible=False, value =0)
sliding_window_overlap_noise = gr.Slider(0, 100, value=ui_defaults.get("sliding_window_overlap_noise",20), step=1, label="Noise to be added to overlapped frames to reduce blur effect", visible = True)
sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, visible = False)
elif ltxv:
sliding_window_size = gr.Slider(41, get_max_frames(257), value=ui_defaults.get("sliding_window_size", 129), step=8, label="Sliding Window Size")
sliding_window_overlap = gr.Slider(1, 97, value=ui_defaults.get("sliding_window_overlap",9), step=8, label="Windows Frames Overlap (needed to maintain continuity between windows, a higher value will require more windows)")
sliding_window_color_correction_strength = gr.Slider(0, 1, visible=False, value =0)
sliding_window_overlap_noise = gr.Slider(0, 100, value=ui_defaults.get("sliding_window_overlap_noise",20), step=1, label="Noise to be added to overlapped frames to reduce blur effect", visible = False)
sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=8, label="Discard Last Frames of a Window (that may have bad quality)", visible = True)
elif hunyuan_video_custom_edit:
sliding_window_size = gr.Slider(5, get_max_frames(257), value=ui_defaults.get("sliding_window_size", 129), step=4, label="Sliding Window Size")
sliding_window_overlap = gr.Slider(1, 97, value=ui_defaults.get("sliding_window_overlap",5), step=4, label="Windows Frames Overlap (needed to maintain continuity between windows, a higher value will require more windows)")
sliding_window_color_correction_strength = gr.Slider(0, 1, visible=False, value =0)
sliding_window_overlap_noise = gr.Slider(0, 150, value=ui_defaults.get("sliding_window_overlap_noise",20), step=1, label="Noise to be added to overlapped frames to reduce blur effect", visible = False)
sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, label="Discard Last Frames of a Window (that may have bad quality)", visible = True)
else: # Vace, Multitalk
sliding_window_defaults = model_def.get("sliding_window_defaults", {})
sliding_window_size = gr.Slider(5, get_max_frames(257), value=ui_defaults.get("sliding_window_size", 129), step=4, label="Sliding Window Size")
sliding_window_overlap = gr.Slider(sliding_window_defaults.get("overlap_min", 1), sliding_window_defaults.get("overlap_max", 97), value=ui_defaults.get("sliding_window_overlap",sliding_window_defaults.get("overlap_default", 5)), step=sliding_window_defaults.get("overlap_step", 4), label="Windows Frames Overlap (needed to maintain continuity between windows, a higher value will require more windows)")
sliding_window_color_correction_strength = gr.Slider(0, 1, value=ui_defaults.get("sliding_window_color_correction_strength",0), step=0.01, label="Color Correction Strength (match colors of new window with previous one, 0 = disabled)", visible = True)
sliding_window_overlap_noise = gr.Slider(0, 150, value=ui_defaults.get("sliding_window_overlap_noise",20 if vace else 0), step=1, label="Noise to be added to overlapped frames to reduce blur effect" , visible = vace)
sliding_window_discard_last_frames = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_discard_last_frames", 0), step=4, label="Discard Last Frames of a Window (that may have bad quality)", visible = True)
video_prompt_type_alignment = gr.Dropdown(
choices=[
("Aligned to the beginning of the Source Video", ""),
("Aligned to the beginning of the First Window of the new Video Sample", "T"),
],
value=filter_letters(video_prompt_type_value, "T"),
label="Control Video / Control Audio / Positioned Frames Temporal Alignment when any Video to continue",
visible = vace or ltxv or t2v or infinitetalk
)
multi_prompts_gen_type = gr.Dropdown(
choices=[
("Will create a new generated Video added to the Generation Queue", 0),
("Will be used for a new Sliding Window of the same Video Generation", 1),
],
value=ui_defaults.get("multi_prompts_gen_type",0),
visible=True,
scale = 1,
label="Images & Text Prompts separated by a Carriage Return" if (any_start_image or any_end_image) else "Text Prompts separated by a Carriage Return"
)
with gr.Tab("Misc.", visible = True) as misc_tab:
with gr.Column(visible = not (recammaster or ltxv or diffusion_forcing)) as RIFLEx_setting_col:
gr.Markdown("With Riflex you can generate videos longer than 5s which is the default duration of videos used to train the model")
RIFLEx_setting = gr.Dropdown(
choices=[
("Auto (ON if Video longer than 5s)", 0),
("Always ON", 1),
("Always OFF", 2),
],
value=ui_defaults.get("RIFLEx_setting",0),
label="RIFLEx positional embedding to generate long video",
visible = True
)
gr.Markdown("You can change the Default number of Frames Per Second of the output Video, in the absence of Control Video this may create unwanted slow down / acceleration")
force_fps_choices = [(f"Model Default ({fps} fps)", "")]
if any_control_video and (any_video_source or recammaster):
force_fps_choices += [("Auto fps: Source Video if any, or Control Video if any, or Model Default", "auto")]
elif any_control_video :
force_fps_choices += [("Auto fps: Control Video if any, or Model Default", "auto")]
elif any_control_video and (any_video_source or recammaster):
force_fps_choices += [("Auto fps: Source Video if any, or Model Default", "auto")]
if any_control_video:
force_fps_choices += [("Control Video fps", "control")]
if any_video_source or recammaster:
force_fps_choices += [("Source Video fps", "source")]
force_fps_choices += [
("15", "15"),
("16", "16"),
("23", "23"),
("24", "24"),
("25", "25"),
("30", "30"),
]
force_fps = gr.Dropdown(
choices=force_fps_choices,
value=ui_defaults.get("force_fps",""),
label=f"Override Frames Per Second (model default={fps} fps)"
)
gr.Markdown("You can set a more agressive Memory Profile if you generate only Short Videos or Images")
override_profile = gr.Dropdown(
choices=[("Default Memory Profile", -1)] + memory_profile_choices,
value=ui_defaults.get("override_profile", -1),
label=f"Override Memory Profile"
)
with gr.Row():
save_settings_btn = gr.Button("Set Settings as Default", visible = not args.lock_config)
export_settings_from_file_btn = gr.Button("Export Settings to File")
with gr.Row():
settings_file = gr.File(height=41,label="Load Settings From Video / Image / JSON")
settings_base64_output = gr.Text(interactive= False, visible=False, value = "")
settings_filename = gr.Text(interactive= False, visible=False, value = "")
mode = gr.Text(value="", visible = False)
with gr.Column():
if not update_form:
gen_status = gr.Text(interactive= False, label = "Status")
status_trigger = gr.Text(interactive= False, visible=False)
default_files = []
output = gr.Gallery(value =default_files, label="Generated videos", preview= True, show_label=False, elem_id="gallery" , columns=[3], rows=[1], object_fit="contain", height=450, selected_index=0, interactive= False)
output_trigger = gr.Text(interactive= False, visible=False)
refresh_form_trigger = gr.Text(interactive= False, visible=False)
fill_wizard_prompt_trigger = gr.Text(interactive= False, visible=False)
save_form_trigger = gr.Text(interactive= False, visible=False)
with gr.Accordion("Video Info and Late Post Processing & Audio Remuxing", open=False) as video_info_accordion:
with gr.Tabs() as video_info_tabs:
with gr.Tab("Information", id="video_info"):
default_visibility = {} if update_form else {"visible" : False}
video_info = gr.HTML(visible=True, min_height=100, value=get_default_video_info())
with gr.Row(**default_visibility) as video_buttons_row:
video_info_extract_settings_btn = gr.Button("Extract Settings", min_width= 1, size ="sm")
video_info_to_video_source_btn = gr.Button("To Video Source", min_width= 1, size ="sm", visible = any_video_source)
video_info_to_control_video_btn = gr.Button("To Control Video", min_width= 1, size ="sm", visible = any_control_video )
video_info_eject_video_btn = gr.Button("Eject Video", min_width= 1, size ="sm")
with gr.Row(**default_visibility) as image_buttons_row:
video_info_extract_image_settings_btn = gr.Button("Extract Settings", min_width= 1, size ="sm")
video_info_to_start_image_btn = gr.Button("To Start Image", size ="sm", min_width= 1, visible = any_start_image )
video_info_to_end_image_btn = gr.Button("To End Image", size ="sm", min_width= 1, visible = any_end_image)
video_info_to_image_mask_btn = gr.Button("To Mask Image", min_width= 1, size ="sm", visible = any_image_mask and False)
video_info_to_reference_image_btn = gr.Button("To Reference Image", min_width= 1, size ="sm", visible = any_reference_image)
video_info_to_image_guide_btn = gr.Button("To Control Image", min_width= 1, size ="sm", visible = any_control_image )
video_info_eject_image_btn = gr.Button("Eject Image", min_width= 1, size ="sm")
with gr.Tab("Post Processing", id= "post_processing", visible = True) as video_postprocessing_tab:
with gr.Group(elem_classes= "postprocess"):
with gr.Column():
PP_temporal_upsampling, PP_spatial_upsampling, PP_film_grain_intensity, PP_film_grain_saturation = gen_upsampling_dropdowns("", "", 0, 0.5, element_class ="postprocess", image_outputs = False)
with gr.Row():
video_info_postprocessing_btn = gr.Button("Apply Postprocessing", size ="sm", visible=True)
video_info_eject_video2_btn = gr.Button("Eject Video", size ="sm", visible=True)
with gr.Tab("Audio Remuxing", id= "audio_remuxing", visible = True) as audio_remuxing_tab:
with gr.Group(elem_classes= "postprocess"):
with gr.Column(visible = server_config.get("mmaudio_enabled", 0) != 0) as PP_MMAudio_col:
with gr.Row():
PP_MMAudio_setting = gr.Dropdown(
choices=[("Add Custom Audio Sountrack", 0), ("Use MMAudio to generate a Soundtrack based on the Video", 1), ],
value=0, visible=True, scale = 1, label="MMAudio", show_label= False, elem_classes= "postprocess",
)
with gr.Column(visible = False) as PP_MMAudio_row:
with gr.Row():
PP_MMAudio_prompt = gr.Text("", label="Prompt (1 or 2 keywords)", elem_classes= "postprocess")
PP_MMAudio_neg_prompt = gr.Text("", label="Negative Prompt (1 or 2 keywords)", elem_classes= "postprocess")
PP_MMAudio_seed = gr.Slider(-1, 999999999, value=-1, step=1, label="Seed (-1 for random)")
PP_repeat_generation = gr.Slider(1, 25.0, value=1, step=1, label="Number of Sample Videos to Generate")
with gr.Row(visible = True) as PP_custom_audio_row:
PP_custom_audio = gr.Audio(label = "Soundtrack", type="filepath", show_download_button= True,)
with gr.Row():
video_info_remux_audio_btn = gr.Button("Remux Audio", size ="sm", visible=True)
video_info_eject_video3_btn = gr.Button("Eject Video", size ="sm", visible=True)
with gr.Tab("Add Videos / Images", id= "video_add"):
files_to_load = gr.Files(label= "Files to Load in Gallery", height=120)
with gr.Row():
video_info_add_videos_btn = gr.Button("Add Videos / Images", size ="sm")
if not update_form:
if tab_id == 'edit':
edit_btn = gr.Button("Edit")
cancel_btn = gr.Button("Cancel")
else:
generate_btn = gr.Button("Generate")
add_to_queue_btn = gr.Button("Add New Prompt To Queue", visible=False)
generate_trigger = gr.Text(visible = False)
add_to_queue_trigger = gr.Text(visible = False)
with gr.Column(visible= False) as current_gen_column:
with gr.Accordion("Preview", open=False):
preview = gr.Image(label="Preview", height=200, show_label= False)
preview_trigger = gr.Text(visible= False)
gen_info = gr.HTML(visible=False, min_height=1)
with gr.Row() as current_gen_buttons_row:
onemoresample_btn = gr.Button("One More Sample Please !", visible = True)
onemorewindow_btn = gr.Button("Extend this Sample Please !", visible = False)
abort_btn = gr.Button("Abort", visible = True)
with gr.Accordion("Queue Management", open=False) as queue_accordion:
with gr.Row():
queue_html = gr.HTML(
value=generate_queue_html(state_dict["gen"]["queue"]),
elem_id="queue_html_container"
)
queue_action_input = gr.Text(elem_id="queue_action_input", visible=False)
queue_action_trigger = gr.Button(elem_id="queue_action_trigger", visible=False)
with gr.Row(visible= True):
queue_zip_base64_output = gr.Text(visible=False)
save_queue_btn = gr.DownloadButton("Save Queue", size="sm")
load_queue_btn = gr.UploadButton("Load Queue", file_types=[".zip"], size="sm")
clear_queue_btn = gr.Button("Clear Queue", size="sm", variant="stop")
quit_button = gr.Button("Save and Quit", size="sm", variant="secondary")
with gr.Row(visible=False) as quit_confirmation_row:
confirm_quit_button = gr.Button("Confirm", elem_id="comfirm_quit_btn_hidden", size="sm", variant="stop")
cancel_quit_button = gr.Button("Cancel", size="sm", variant="secondary")
hidden_force_quit_trigger = gr.Button("force_quit", visible=False, elem_id="force_quit_btn_hidden")
hidden_countdown_state = gr.Number(value=-1, visible=False, elem_id="hidden_countdown_state_num")
single_hidden_trigger_btn = gr.Button("trigger_countdown", visible=False, elem_id="trigger_info_single_btn")
extra_inputs = prompt_vars + [wizard_prompt, wizard_variables_var, wizard_prompt_activated_var, video_prompt_column, image_prompt_column, image_prompt_type_group, image_prompt_type_radio, image_prompt_type_endcheckbox,
prompt_column_advanced, prompt_column_wizard_vars, prompt_column_wizard, lset_name, save_lset_prompt_drop, advanced_row, speed_tab, audio_tab, mmaudio_col, quality_tab,
sliding_window_tab, misc_tab, prompt_enhancer_row, inference_steps_row, skip_layer_guidance_row, audio_guide_row, RIFLEx_setting_col,
video_prompt_type_video_guide, video_prompt_type_video_guide_alt, video_prompt_type_video_mask, video_prompt_type_image_refs, apg_col, audio_prompt_type_sources, audio_prompt_type_remux, audio_prompt_type_remux_row,
video_guide_outpainting_col,video_guide_outpainting_top, video_guide_outpainting_bottom, video_guide_outpainting_left, video_guide_outpainting_right,
video_guide_outpainting_checkbox, video_guide_outpainting_row, show_advanced, video_info_to_control_video_btn, video_info_to_video_source_btn, sample_solver_row,
video_buttons_row, image_buttons_row, video_postprocessing_tab, audio_remuxing_tab, PP_MMAudio_row, PP_custom_audio_row,
video_info_to_start_image_btn, video_info_to_end_image_btn, video_info_to_reference_image_btn, video_info_to_image_guide_btn, video_info_to_image_mask_btn,
NAG_col, remove_background_sound , speakers_locations_row, embedded_guidance_row, guidance_phases_row, guidance_row, resolution_group, cfg_free_guidance_col, control_net_weights_row, guide_selection_row, image_mode_tabs,
min_frames_if_references_col, video_prompt_type_alignment, prompt_enhancer_btn, tab_inpaint, tab_t2v] + image_start_extra + image_end_extra + image_refs_extra # presets_column,
if update_form:
locals_dict = locals()
gen_inputs = [state_dict if k=="state" else locals_dict[k] for k in inputs_names] + [state_dict] + extra_inputs
return gen_inputs
else:
target_state = gr.Text(value = "state", interactive= False, visible= False)
target_settings = gr.Text(value = "settings", interactive= False, visible= False)
last_choice = gr.Number(value =-1, interactive= False, visible= False)
resolution_group.input(fn=change_resolution_group, inputs=[state, resolution_group], outputs=[resolution], show_progress="hidden")
resolution.change(fn=record_last_resolution, inputs=[state, resolution])
# video_length.release(fn=refresh_video_length_label, inputs=[state, video_length ], outputs = video_length, trigger_mode="always_last" )
gr.on(triggers=[video_length.release, force_fps.change, video_guide.change, video_source.change], fn=refresh_video_length_label, inputs=[state, video_length, force_fps, video_guide, video_source] , outputs = video_length, trigger_mode="always_last", show_progress="hidden" )
guidance_phases.change(fn=change_guidance_phases, inputs= [state, guidance_phases], outputs =[model_switch_phase, guidance_phases_row, switch_threshold, switch_threshold2, guidance2_scale, guidance3_scale ])
audio_prompt_type_remux.change(fn=refresh_audio_prompt_type_remux, inputs=[state, audio_prompt_type, audio_prompt_type_remux], outputs=[audio_prompt_type])
remove_background_sound.change(fn=refresh_remove_background_sound, inputs=[state, audio_prompt_type, remove_background_sound], outputs=[audio_prompt_type])
audio_prompt_type_sources.change(fn=refresh_audio_prompt_type_sources, inputs=[state, audio_prompt_type, audio_prompt_type_sources], outputs=[audio_prompt_type, audio_guide, audio_guide2, speakers_locations_row, remove_background_sound])
image_prompt_type_radio.change(fn=refresh_image_prompt_type_radio, inputs=[state, image_prompt_type, image_prompt_type_radio], outputs=[image_prompt_type, image_start_row, image_end_row, video_source, keep_frames_video_source, image_prompt_type_endcheckbox], show_progress="hidden" )
image_prompt_type_endcheckbox.change(fn=refresh_image_prompt_type_endcheckbox, inputs=[state, image_prompt_type, image_prompt_type_radio, image_prompt_type_endcheckbox], outputs=[image_prompt_type, image_end_row] )
video_prompt_type_image_refs.input(fn=refresh_video_prompt_type_image_refs, inputs = [state, video_prompt_type, video_prompt_type_image_refs,image_mode], outputs = [video_prompt_type, image_refs_row, remove_background_images_ref, image_refs_relative_size, frames_positions,video_guide_outpainting_col], show_progress="hidden")
video_prompt_type_video_guide.input(fn=refresh_video_prompt_type_video_guide, inputs = [state, video_prompt_type, video_prompt_type_video_guide, image_mode, image_mask_guide, image_guide, image_mask], outputs = [video_prompt_type, video_guide, image_guide, keep_frames_video_guide, denoising_strength, video_guide_outpainting_col, video_prompt_type_video_mask, video_mask, image_mask, image_mask_guide, mask_expand], show_progress="hidden")
video_prompt_type_video_guide_alt.input(fn=refresh_video_prompt_type_video_guide_alt, inputs = [state, video_prompt_type, video_prompt_type_video_guide_alt, image_mode], outputs = [video_prompt_type, video_guide, image_guide, image_refs_row, denoising_strength ], show_progress="hidden")
video_prompt_type_video_mask.input(fn=refresh_video_prompt_type_video_mask, inputs = [state, video_prompt_type, video_prompt_type_video_mask, image_mode, image_mask_guide, image_guide, image_mask], outputs = [video_prompt_type, video_mask, image_mask_guide, image_guide, image_mask, mask_expand], show_progress="hidden")
video_prompt_type_alignment.input(fn=refresh_video_prompt_type_alignment, inputs = [state, video_prompt_type, video_prompt_type_alignment], outputs = [video_prompt_type])
multi_prompts_gen_type.select(fn=refresh_prompt_labels, inputs=[multi_prompts_gen_type, image_mode], outputs=[prompt, wizard_prompt, image_end], show_progress="hidden")
video_guide_outpainting_top.input(fn=update_video_guide_outpainting, inputs=[video_guide_outpainting, video_guide_outpainting_top, gr.State(0)], outputs = [video_guide_outpainting], trigger_mode="multiple" )
video_guide_outpainting_bottom.input(fn=update_video_guide_outpainting, inputs=[video_guide_outpainting, video_guide_outpainting_bottom,gr.State(1)], outputs = [video_guide_outpainting], trigger_mode="multiple" )
video_guide_outpainting_left.input(fn=update_video_guide_outpainting, inputs=[video_guide_outpainting, video_guide_outpainting_left,gr.State(2)], outputs = [video_guide_outpainting], trigger_mode="multiple" )
video_guide_outpainting_right.input(fn=update_video_guide_outpainting, inputs=[video_guide_outpainting, video_guide_outpainting_right,gr.State(3)], outputs = [video_guide_outpainting], trigger_mode="multiple" )
video_guide_outpainting_checkbox.input(fn=refresh_video_guide_outpainting_row, inputs=[video_guide_outpainting_checkbox, video_guide_outpainting], outputs= [video_guide_outpainting_row,video_guide_outpainting])
show_advanced.change(fn=switch_advanced, inputs=[state, show_advanced, lset_name], outputs=[advanced_row, preset_buttons_rows, refresh_lora_btn, refresh2_row ,lset_name]).then(
fn=switch_prompt_type, inputs = [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars], outputs = [wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, prompt_column_advanced, prompt_column_wizard, prompt_column_wizard_vars, *prompt_vars])
queue_action_trigger.click(fn=handle_queue_action, inputs=[state, queue_action_input], outputs=[queue_html, main_tabs], show_progress="hidden")
gr.on( triggers=[output.change, output.select], fn=select_video, inputs=[state, output], outputs=[last_choice, video_info, video_buttons_row, image_buttons_row, video_postprocessing_tab, audio_remuxing_tab], show_progress="hidden")
preview_trigger.change(refresh_preview, inputs= [state], outputs= [preview], show_progress="hidden")
PP_MMAudio_setting.change(fn = lambda value : [gr.update(visible = value == 1), gr.update(visible = value == 0)] , inputs = [PP_MMAudio_setting], outputs = [PP_MMAudio_row, PP_custom_audio_row] )
def refresh_status_async(state, progress=gr.Progress()):
gen = get_gen_info(state)
gen["progress"] = progress
while True:
progress_args= gen.get("progress_args", None)
if progress_args != None:
progress(*progress_args)
gen["progress_args"] = None
status= gen.get("status","")
if status == None or len(status) > 0:
yield status
gen["status"]= ""
if not gen.get("status_display", False):
return
time.sleep(0.5)
def activate_status(state):
if state.get("validate_success",0) != 1:
return
gen = get_gen_info(state)
gen["status_display"] = True
return time.time()
start_quit_timer_js, cancel_quit_timer_js, trigger_zip_download_js, trigger_settings_download_js, click_brush_js = get_js()
status_trigger.change(refresh_status_async, inputs= [state] , outputs= [gen_status], show_progress_on= [gen_status])
if tab_id == 'generate':
output_trigger.change(refresh_gallery,
inputs = [state],
outputs = [output, gen_info, generate_btn, add_to_queue_btn, current_gen_column, current_gen_buttons_row, queue_html, abort_btn, onemorewindow_btn],
show_progress="hidden"
)
modal_action_trigger.click(
fn=show_modal_image,
inputs=[state, modal_action_input],
outputs=[modal_html_display, modal_container],
show_progress="hidden"
)
close_modal_button.click(
fn=lambda: gr.Column(visible=False),
inputs=[],
outputs=[modal_container],
show_progress="hidden"
)
abort_btn.click(abort_generation, [state], [ abort_btn] ) #.then(refresh_gallery, inputs = [state, gen_info], outputs = [output, gen_info, queue_html] )
onemoresample_btn.click(fn=one_more_sample,inputs=[state], outputs= [state])
onemorewindow_btn.click(fn=one_more_window,inputs=[state], outputs= [state])
inputs_names= list(inspect.signature(save_inputs).parameters)[1:-1]
locals_dict = locals()
gen_inputs = [locals_dict[k] for k in inputs_names] + [state]
save_settings_btn.click( fn=validate_wizard_prompt, inputs =[state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] , outputs= [prompt]).then(
save_inputs, inputs =[target_settings] + gen_inputs, outputs = [])
gr.on( triggers=[video_info_extract_settings_btn.click, video_info_extract_image_settings_btn.click], fn=validate_wizard_prompt,
inputs= [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] ,
outputs= [prompt],
show_progress="hidden",
).then(fn=save_inputs,
inputs =[target_state] + gen_inputs,
outputs= None
).then( fn=use_video_settings, inputs =[state, output, last_choice] , outputs= [model_family, model_choice, refresh_form_trigger])
prompt_enhancer_btn.click(fn=validate_wizard_prompt,
inputs= [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] ,
outputs= [prompt],
show_progress="hidden",
).then(fn=save_inputs,
inputs =[target_state] + gen_inputs,
outputs= None
).then( fn=enhance_prompt, inputs =[state, prompt, prompt_enhancer, multi_images_gen_type, override_profile ] , outputs= [prompt, wizard_prompt])
save_form_trigger.change(fn=validate_wizard_prompt,
inputs= [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] ,
outputs= [prompt],
show_progress="hidden",
).then(fn=save_inputs,
inputs =[target_state] + gen_inputs,
outputs= None
)
main_tabs.select(fn=detect_auto_save_form, inputs= [state], outputs= save_form_trigger, trigger_mode="multiple")
video_info_add_videos_btn.click(fn=add_videos_to_gallery, inputs =[state, output, last_choice, files_to_load], outputs = [output, files_to_load, video_info_tabs] )
gr.on(triggers=[video_info_eject_video_btn.click, video_info_eject_video2_btn.click, video_info_eject_video3_btn.click, video_info_eject_image_btn.click], fn=eject_video_from_gallery, inputs =[state, output, last_choice], outputs = [output, video_info, video_buttons_row] )
video_info_to_control_video_btn.click(fn=video_to_control_video, inputs =[state, output, last_choice], outputs = [video_guide] )
video_info_to_video_source_btn.click(fn=video_to_source_video, inputs =[state, output, last_choice], outputs = [video_source] )
video_info_to_start_image_btn.click(fn=image_to_ref_image_add, inputs =[state, output, last_choice, image_start, gr.State("Start Image")], outputs = [image_start] )
video_info_to_end_image_btn.click(fn=image_to_ref_image_add, inputs =[state, output, last_choice, image_end, gr.State("End Image")], outputs = [image_end] )
video_info_to_image_guide_btn.click(fn=image_to_ref_image_guide, inputs =[state, output, last_choice], outputs = [image_guide, image_mask_guide]).then(fn=None, inputs=[], outputs=[], js=click_brush_js )
video_info_to_image_mask_btn.click(fn=image_to_ref_image_set, inputs =[state, output, last_choice, image_mask, gr.State("Image Mask")], outputs = [image_mask] )
video_info_to_reference_image_btn.click(fn=image_to_ref_image_add, inputs =[state, output, last_choice, image_refs, gr.State("Ref Image")], outputs = [image_refs] )
video_info_postprocessing_btn.click(fn=apply_post_processing, inputs =[state, output, last_choice, PP_temporal_upsampling, PP_spatial_upsampling, PP_film_grain_intensity, PP_film_grain_saturation], outputs = [mode, generate_trigger, add_to_queue_trigger ] )
video_info_remux_audio_btn.click(fn=remux_audio, inputs =[state, output, last_choice, PP_MMAudio_setting, PP_MMAudio_prompt, PP_MMAudio_neg_prompt, PP_MMAudio_seed, PP_repeat_generation, PP_custom_audio], outputs = [mode, generate_trigger, add_to_queue_trigger ] )
save_lset_btn.click(validate_save_lset, inputs=[state, lset_name], outputs=[apply_lset_btn, refresh_lora_btn, delete_lset_btn, save_lset_btn,confirm_save_lset_btn, cancel_lset_btn, save_lset_prompt_drop])
delete_lset_btn.click(validate_delete_lset, inputs=[state, lset_name], outputs=[apply_lset_btn, refresh_lora_btn, delete_lset_btn, save_lset_btn,confirm_delete_lset_btn, cancel_lset_btn ])
confirm_save_lset_btn.click(fn=validate_wizard_prompt, inputs =[state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] , outputs= [prompt], show_progress="hidden",).then(
fn=save_inputs,
inputs =[target_state] + gen_inputs,
outputs= None).then(
fn=save_lset, inputs=[state, lset_name, loras_choices, loras_multipliers, prompt, save_lset_prompt_drop], outputs=[lset_name, apply_lset_btn,refresh_lora_btn, delete_lset_btn, save_lset_btn, confirm_save_lset_btn, cancel_lset_btn, save_lset_prompt_drop])
confirm_delete_lset_btn.click(delete_lset, inputs=[state, lset_name], outputs=[lset_name, apply_lset_btn, refresh_lora_btn, delete_lset_btn, save_lset_btn,confirm_delete_lset_btn, cancel_lset_btn ])
cancel_lset_btn.click(cancel_lset, inputs=[], outputs=[apply_lset_btn, refresh_lora_btn, delete_lset_btn, save_lset_btn, confirm_delete_lset_btn,confirm_save_lset_btn, cancel_lset_btn,save_lset_prompt_drop ])
apply_lset_btn.click(fn=save_inputs, inputs =[target_state] + gen_inputs, outputs= None).then(fn=apply_lset,
inputs=[state, wizard_prompt_activated_var, lset_name,loras_choices, loras_multipliers, prompt], outputs=[wizard_prompt_activated_var, loras_choices, loras_multipliers, prompt, fill_wizard_prompt_trigger, model_family, model_choice, refresh_form_trigger])
refresh_lora_btn.click(refresh_lora_list, inputs=[state, lset_name,loras_choices], outputs=[lset_name, loras_choices])
refresh_lora_btn2.click(refresh_lora_list, inputs=[state, lset_name,loras_choices], outputs=[lset_name, loras_choices])
lset_name.select(fn=update_lset_type, inputs=[state, lset_name], outputs=save_lset_prompt_drop)
export_settings_from_file_btn.click(fn=validate_wizard_prompt,
inputs= [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] ,
outputs= [prompt],
show_progress="hidden",
).then(fn=save_inputs,
inputs =[target_state] + gen_inputs,
outputs= None
).then(fn=export_settings,
inputs =[state],
outputs= [settings_base64_output, settings_filename]
).then(
fn=None,
inputs=[settings_base64_output, settings_filename],
outputs=None,
js=trigger_settings_download_js
)
image_mode_tabs.select(fn=record_image_mode_tab, inputs=[state], outputs= None
).then(fn=validate_wizard_prompt,
inputs= [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] ,
outputs= [prompt],
show_progress="hidden",
).then(fn=save_inputs,
inputs =[target_state] + gen_inputs,
outputs= None
).then(fn=switch_image_mode, inputs =[state] , outputs= [refresh_form_trigger], trigger_mode="multiple")
settings_file.upload(fn=validate_wizard_prompt,
inputs= [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] ,
outputs= [prompt],
show_progress="hidden",
).then(fn=save_inputs,
inputs =[target_state] + gen_inputs,
outputs= None
).then(fn=load_settings_from_file, inputs =[state, settings_file] , outputs= [model_family, model_choice, refresh_form_trigger, settings_file])
fill_wizard_prompt_trigger.change(
fn = fill_wizard_prompt, inputs = [state, wizard_prompt_activated_var, prompt, wizard_prompt], outputs = [ wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, prompt_column_advanced, prompt_column_wizard, prompt_column_wizard_vars, *prompt_vars]
)
if tab_id == 'edit':
edit_inputs_names = list(inspect.signature(edit_task_in_queue).parameters)[:-1]
edit_inputs_components = [locals_dict[k] for k in edit_inputs_names]
edit_btn.click(
fn=validate_wizard_prompt,
inputs=[state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars],
outputs=[prompt]
).then(
fn=edit_task_in_queue,
inputs=edit_inputs_components + [state],
outputs=[queue_html, main_tabs]
)
cancel_btn.click(
fn=cancel_edit,
inputs=[state],
outputs=[main_tabs]
)
refresh_form_trigger.change(fn= fill_inputs,
inputs=[state],
outputs=gen_inputs + extra_inputs,
show_progress= "full" if args.debug_gen_form else "hidden",
).then(fn=validate_wizard_prompt,
inputs= [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars],
outputs= [prompt],
show_progress="hidden",
)
model_family.input(fn=change_model_family, inputs=[state, model_family], outputs= [model_choice])
model_choice.change(fn=validate_wizard_prompt,
inputs= [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] ,
outputs= [prompt],
show_progress="hidden",
).then(fn=save_inputs,
inputs =[target_state] + gen_inputs,
outputs= None
).then(fn= change_model,
inputs=[state, model_choice],
outputs= [header]
).then(fn= fill_inputs,
inputs=[state],
outputs=gen_inputs + extra_inputs,
show_progress="full" if args.debug_gen_form else "hidden",
).then(fn= preload_model_when_switching,
inputs=[state],
outputs=[gen_status])
if tab_id == 'generate':
generate_btn.click(fn = init_generate, inputs = [state, output, last_choice], outputs=[generate_trigger, mode])
add_to_queue_btn.click(fn = lambda : (get_unique_id(), ""), inputs = None, outputs=[add_to_queue_trigger, mode])
# gr.on(triggers=[add_to_queue_btn.click, add_to_queue_trigger.change],fn=validate_wizard_prompt,
add_to_queue_trigger.change(fn=validate_wizard_prompt,
inputs =[state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] ,
outputs= [prompt],
show_progress="hidden",
).then(fn=save_inputs,
inputs =[target_state] + gen_inputs,
outputs= None
).then(fn=process_prompt_and_add_tasks,
inputs = [state, model_choice],
outputs=[queue_html, queue_accordion],
show_progress="hidden",
).then(
fn=update_status,
inputs = [state],
)
generate_trigger.change(fn=validate_wizard_prompt,
inputs= [state, wizard_prompt_activated_var, wizard_variables_var, prompt, wizard_prompt, *prompt_vars] ,
outputs= [prompt],
show_progress="hidden",
).then(fn=save_inputs,
inputs =[target_state] + gen_inputs,
outputs= None
).then(fn=process_prompt_and_add_tasks,
inputs = [state, model_choice],
outputs= [queue_html, queue_accordion],
show_progress="hidden",
).then(fn=prepare_generate_video,
inputs= [state],
outputs= [generate_btn, add_to_queue_btn, current_gen_column, current_gen_buttons_row]
).then(fn=activate_status,
inputs= [state],
outputs= [status_trigger],
).then(fn=process_tasks,
inputs= [state],
outputs= [preview_trigger, output_trigger],
show_progress="hidden",
).then(finalize_generation,
inputs= [state],
outputs= [output, abort_btn, generate_btn, add_to_queue_btn, current_gen_column, gen_info]
).then(
fn=lambda s: gr.Accordion(open=False) if len(get_gen_info(s).get("queue", [])) <= 1 else gr.update(),
inputs=[state],
outputs=[queue_accordion]
).then(unload_model_if_needed,
inputs= [state],
outputs= []
)
gr.on(triggers=[load_queue_btn.upload, main.load],
fn=load_queue_action,
inputs=[load_queue_btn, state],
outputs=[queue_html]
).then(
fn=lambda s: (gr.update(visible=bool(get_gen_info(s).get("queue",[]))), gr.Accordion(open=True)) if bool(get_gen_info(s).get("queue",[])) else (gr.update(visible=False), gr.update()),
inputs=[state],
outputs=[current_gen_column, queue_accordion]
).then(
fn=init_process_queue_if_any,
inputs=[state],
outputs=[generate_btn, add_to_queue_btn, current_gen_column, ]
).then(fn=activate_status,
inputs= [state],
outputs= [status_trigger],
).then(
fn=process_tasks,
inputs=[state],
outputs=[preview_trigger, output_trigger],
trigger_mode="once"
).then(
fn=finalize_generation_with_state,
inputs=[state],
outputs=[output, abort_btn, generate_btn, add_to_queue_btn, current_gen_column, gen_info, queue_accordion, state],
trigger_mode="always_last"
).then(
unload_model_if_needed,
inputs= [state],
outputs= []
)
single_hidden_trigger_btn.click(
fn=show_countdown_info_from_state,
inputs=[hidden_countdown_state],
outputs=[hidden_countdown_state]
)
quit_button.click(
fn=start_quit_process,
inputs=[],
outputs=[hidden_countdown_state, quit_button, quit_confirmation_row]
).then(
fn=None, inputs=None, outputs=None, js=start_quit_timer_js
)
confirm_quit_button.click(
fn=quit_application,
inputs=[],
outputs=[]
).then(
fn=None, inputs=None, outputs=None, js=cancel_quit_timer_js
)
cancel_quit_button.click(
fn=cancel_quit_process,
inputs=[],
outputs=[hidden_countdown_state, quit_button, quit_confirmation_row]
).then(
fn=None, inputs=None, outputs=None, js=cancel_quit_timer_js
)
hidden_force_quit_trigger.click(
fn=quit_application,
inputs=[],
outputs=[]
)
save_queue_btn.click(
fn=save_queue_action,
inputs=[state],
outputs=[queue_zip_base64_output]
).then(
fn=None,
inputs=[queue_zip_base64_output],
outputs=None,
js=trigger_zip_download_js
)
clear_queue_btn.click(
fn=clear_queue_action,
inputs=[state],
outputs=[queue_html]
).then(
fn=lambda: (gr.update(visible=False), gr.Accordion(open=False)),
inputs=None,
outputs=[current_gen_column, queue_accordion]
)
if tab_id == 'edit':
locals_dict = locals()
gen_inputs = [locals_dict[k] for k in inputs_names] + [state] + extra_inputs
return gen_inputs
else:
return ( state, loras_choices, lset_name, resolution, refresh_form_trigger, save_form_trigger,
# video_guide, image_guide, video_mask, image_mask, image_refs,
)
def generate_download_tab(lset_name,loras_choices, state):
with gr.Row():
with gr.Row(scale =2):
gr.Markdown("WanGP's Lora Festival ! Press the following button to download i2v Remade_AI Loras collection (and bonuses Loras).")
with gr.Row(scale =1):
download_loras_btn = gr.Button("---> Let the Lora's Festival Start !", scale =1)
with gr.Row(scale =1):
gr.Markdown("")
with gr.Row() as download_status_row:
download_status = gr.Markdown()
download_loras_btn.click(fn=download_loras, inputs=[], outputs=[download_status_row, download_status]).then(fn=refresh_lora_list, inputs=[state, lset_name,loras_choices], outputs=[lset_name, loras_choices])
def generate_configuration_tab(state, blocks, header, model_family, model_choice, resolution, refresh_form_trigger):
gr.Markdown("Please click Apply Changes at the bottom so that the changes are effective. Some choices below may be locked if the app has been launched by specifying a config preset.")
with gr.Column():
with gr.Tabs():
# with gr.Row(visible=advanced_ui) as advanced_row:
with gr.Tab("General"):
dropdown_families, dropdown_choices = get_sorted_dropdown(displayed_model_types, None)
transformer_types_choices = gr.Dropdown(
choices= dropdown_choices,
value= transformer_types,
label= "Selectable Generative Models (keep empty to get All of them)",
scale= 2,
multiselect= True
)
fit_canvas_choice = gr.Dropdown(
choices=[
("Dimensions correspond to the Pixels Budget (as the Prompt Image/Video will be Resized to match this pixels Budget, output video height or width may exceed the requested dimensions )", 0),
("Dimensions correspond to the Maximum Width and Height (as the Prompt Image/Video will be Resized to fit into these dimensions, the output video may be smaller)", 1),
("Dimensions correspond to the Output Width and Height (as the Prompt Image/Video will be Cropped to fit exactly these dimensions)", 2),
],
value= server_config.get("fit_canvas", 0),
label="Generated Video Dimensions when Prompt contains an Image or a Video",
interactive= not lock_ui_attention
)
def check(mode):
if not mode in attention_modes_installed:
return " (NOT INSTALLED)"
elif not mode in attention_modes_supported:
return " (NOT SUPPORTED)"
else:
return ""
attention_choice = gr.Dropdown(
choices=[
("Auto : pick sage2 > sage > sdpa depending on what is installed", "auto"),
("Scale Dot Product Attention: default, always available", "sdpa"),
("Flash" + check("flash")+ ": good quality - requires additional install (usually complex to set up on Windows without WSL)", "flash"),
("Xformers" + check("xformers")+ ": good quality - requires additional install (usually complex, may consume less VRAM to set up on Windows without WSL)", "xformers"),
("Sage" + check("sage")+ ": 30% faster but slightly worse quality - requires additional install (usually complex to set up on Windows without WSL)", "sage"),
("Sage2/2++" + check("sage2")+ ": 40% faster but slightly worse quality - requires additional install (usually complex to set up on Windows without WSL)", "sage2"),
("Sage3" + check("sage3")+ ": x2 faster but worse quality - requires additional install (usually complex to set up on Windows without WSL)", "sage3"),
],
value= attention_mode,
label="Attention Type",
interactive= not lock_ui_attention
)
metadata_choice = gr.Dropdown(
choices=[
("Export JSON files", "json"),
("Embed metadata (Exif tag)", "metadata"),
("Neither", "none")
],
value=server_config.get("metadata_type", "metadata"),
label="Metadata Handling"
)
preload_model_policy_choice = gr.CheckboxGroup([("Preload Model while Launching the App","P"), ("Preload Model while Switching Model", "S"), ("Unload Model when Queue is Done", "U")],
value=server_config.get("preload_model_policy",[]),
label="RAM Loading / Unloading Model Policy (in any case VRAM will be freed once the queue has been processed)"
)
clear_file_list_choice = gr.Dropdown(
choices=[
("None", 0),
("Keep the last video", 1),
("Keep the last 5 videos", 5),
("Keep the last 10 videos", 10),
("Keep the last 20 videos", 20),
("Keep the last 30 videos", 30),
],
value=server_config.get("clear_file_list", 5),
label="Keep Previously Generated Videos when starting a new Generation Batch"
)
display_stats_choice = gr.Dropdown(
choices=[
("Disabled", 0),
("Enabled", 1),
],
value=server_config.get("display_stats", 0),
label="Display in real time available RAM / VRAM and other stats (needs a restart)"
)
max_frames_multiplier_choice = gr.Dropdown(
choices=[
("Default", 1),
("x2", 2),
("x3", 3),
("x4", 4),
("x5", 5),
("x6", 7),
("x7", 7),
],
value=server_config.get("max_frames_multiplier", 1),
label="Increase the Max Number of Frames (needs more RAM and VRAM, usually the longer the worse the quality, needs an App restart)"
)
UI_theme_choice = gr.Dropdown(
choices=[
("Blue Sky", "default"),
("Classic Gradio", "gradio"),
],
value=server_config.get("UI_theme", "default"),
label="User Interface Theme. You will need to restart the App the see new Theme."
)
with gr.Tab("Performance"):
quantization_choice = gr.Dropdown(
choices=[
("Scaled Int8 Quantization (recommended)", "int8"),
("16 bits (no quantization)", "bf16"),
],
value= transformer_quantization,
label="Transformer Model Quantization Type (if available)",
)
transformer_dtype_policy_choice = gr.Dropdown(
choices=[
("Best Supported Data Type by Hardware", ""),
("FP16", "fp16"),
("BF16", "bf16"),
],
value= server_config.get("transformer_dtype_policy", ""),
label="Transformer Data Type (if available)"
)
mixed_precision_choice = gr.Dropdown(
choices=[
("16 bits only, requires less VRAM", "0"),
("Mixed 16 / 32 bits, slightly more VRAM needed but better Quality mainly for 1.3B models", "1"),
],
value= server_config.get("mixed_precision", "0"),
label="Transformer Engine Calculation"
)
text_encoder_quantization_choice = gr.Dropdown(
choices=[
("16 bits - unquantized text encoder, better quality uses more RAM", "bf16"),
("8 bits - quantized text encoder, slightly worse quality but uses less RAM", "int8"),
],
value= text_encoder_quantization,
label="Text Encoder model"
)
VAE_precision_choice = gr.Dropdown(
choices=[
("16 bits, requires less VRAM and faster", "16"),
("32 bits, requires twice more VRAM and slower but recommended with Window Sliding", "32"),
],
value= server_config.get("vae_precision", "16"),
label="VAE Encoding / Decoding precision"
)
gr.Text("Beware: when restarting the server or changing a resolution or video duration, the first step of generation for a duration / resolution may last a few minutes due to recompilation", interactive= False, show_label= False )
compile_choice = gr.Dropdown(
choices=[
("On (requires to have Triton installed)", "transformer"),
("Off", "" ),
],
value= compile,
label="Compile Transformer : up to 10-20% faster, useful only if multiple gens at same frames no / resolution",
interactive= not lock_ui_compile
)
depth_anything_v2_variant_choice = gr.Dropdown(
choices=[
("Large (more precise but 2x slower)", "vitl"),
("Big (less precise, less VRAM needed but faster)", "vitb"),
],
value= server_config.get("depth_anything_v2_variant", "vitl"),
label="Depth Anything v2 Vace Preprocessor Model type",
)
vae_config_choice = gr.Dropdown(
choices=[
("Auto", 0),
("Disabled (faster but may require up to 22 GB of VRAM)", 1),
("256 x 256 : If at least 8 GB of VRAM", 2),
("128 x 128 : If at least 6 GB of VRAM", 3),
],
value= vae_config,
label="VAE Tiling - reduce the high VRAM requirements for VAE decoding and VAE encoding (if enabled it will be slower)"
)
boost_choice = gr.Dropdown(
choices=[
# ("Auto (ON if Video longer than 5s)", 0),
("ON", 1),
("OFF", 2),
],
value=boost,
label="Boost: Give a 10% speedup without losing quality at the cost of a litle VRAM (up to 1GB at max frames and resolution)"
)
profile_choice = gr.Dropdown(
choices = memory_profile_choices,
value= default_profile,
label="Profile (for power users only, not needed to change it)"
)
preload_in_VRAM_choice = gr.Slider(0, 40000, value=server_config.get("preload_in_VRAM", 0), step=100, label="Number of MB of Models that are Preloaded in VRAM (0 will use Profile default)")
release_RAM_btn = gr.Button("Force Release RAM")
release_RAM_btn.click(fn=release_RAM)
with gr.Tab("Extensions"):
enhancer_enabled_choice = gr.Dropdown(
choices=[
("Off", 0),
("Florence 2 1.6B + LLava 3.2 3.5B", 1),
("Florence 2 1.6B + LLama Joy Caption (uncensored but needs more VRAM) 9,3B", 2),
],
value=server_config.get("enhancer_enabled", 0),
label="Prompt Enhancer (if enabled, from 8 GB to 14 GB of extra models will be downloaded)"
)
enhancer_mode_choice = gr.Dropdown(
choices=[
("Automatically triggered when Generating a Video", 0),
("On Demand Only", 1),
],
value=server_config.get("enhancer_mode", 0),
label="Prompt Enhancer Usage"
)
mmaudio_enabled_choice = gr.Dropdown(
choices=[
("Off", 0),
("Turned On but unloaded from RAM after usage", 1),
("Turned On and kept in RAM for fast loading", 2),
],
value=server_config.get("mmaudio_enabled", 0),
label="MMAudio (if enabled, 10 GB of extra models will be downloaded)"
)
with gr.Tab("Outputs"):
video_output_codec_choice = gr.Dropdown(
choices=[
("x265 Balanced Quality (CRF 28)", 'libx265_28'),
("x264 Balanced Quality (Level 8)", 'libx264_8'),
("x265 High Quality (CRF 8)", 'libx265_8'),
("x264 High Quality (Level 10)", 'libx264_10'),
("x264 Lossless", 'libx264_lossless'),
],
value=server_config.get("video_output_codec", "libx264_8"),
label="Video Codec to use"
)
image_output_codec_choice = gr.Dropdown(
choices=[
("JPEG Quality 85", 'jpeg_85'),
("WEBP Quality 85", 'webp_85'),
("JPEG Quality 95", 'jpeg_95'),
("WEBP Quality 95", 'webp_95'),
("WEBP Lossless", 'webp_lossless'),
("PNG Lossless", 'png'),
],
value=server_config.get("image_output_codec", "jpeg_95"),
label="Image Codec to use"
)
audio_output_codec_choice = gr.Dropdown(
choices=[
("AAC 128 kbit", 'aac_128'),
],
value=server_config.get("audio_output_codec", "aac_128"),
visible = False,
label="Audio Codec to use"
)
video_save_path_choice = gr.Textbox(
label="Output Folder for Generated Videos (need to restart app to be taken into account)",
value=server_config.get("save_path", save_path)
)
image_save_path_choice = gr.Textbox(
label="Output Folder for Generated Images (need to restart app to be taken into account)",
value=server_config.get("image_save_path", image_save_path)
)
with gr.Tab("Notifications"):
gr.Markdown("### Notification Settings")
notification_sound_enabled_choice = gr.Dropdown(
choices=[
("On", 1),
("Off", 0),
],
value=server_config.get("notification_sound_enabled", 0),
label="Notification Sound Enabled"
)
notification_sound_volume_choice = gr.Slider(
minimum=0,
maximum=100,
value=server_config.get("notification_sound_volume", 50),
step=5,
label="Notification Sound Volume (0 = silent, 100 = very loud)"
)
msg = gr.Markdown()
apply_btn = gr.Button("Apply Changes")
apply_btn.click(
fn=apply_changes,
inputs=[
state,
transformer_types_choices,
transformer_dtype_policy_choice,
text_encoder_quantization_choice,
VAE_precision_choice,
mixed_precision_choice,
video_save_path_choice,
image_save_path_choice,
attention_choice,
compile_choice,
profile_choice,
vae_config_choice,
metadata_choice,
quantization_choice,
boost_choice,
clear_file_list_choice,
preload_model_policy_choice,
UI_theme_choice,
enhancer_enabled_choice,
enhancer_mode_choice,
mmaudio_enabled_choice,
fit_canvas_choice,
preload_in_VRAM_choice,
depth_anything_v2_variant_choice,
notification_sound_enabled_choice,
notification_sound_volume_choice,
max_frames_multiplier_choice,
display_stats_choice,
video_output_codec_choice,
image_output_codec_choice,
audio_output_codec_choice,
resolution,
],
outputs= [msg , header, model_family, model_choice, refresh_form_trigger]
)
def generate_about_tab():
gr.Markdown("
WanGP - AI Generative Models for the GPU Poor by DeepBeepMeep (GitHub)
")
gr.Markdown("Many thanks to:")
gr.Markdown("- Alibaba Wan Team for the best open source video generators (https://github.com/Wan-Video/Wan2.1)")
gr.Markdown("- Alibaba Vace, Multitalk and Fun Teams for their incredible control net models (https://github.com/ali-vilab/VACE), (https://github.com/MeiGen-AI/MultiTalk) and (https://huggingface.co/alibaba-pai/Wan2.2-Fun-A14B-InP) ")
gr.Markdown("- Tencent for the impressive Hunyuan Video models (https://github.com/Tencent-Hunyuan/HunyuanVideo)")
gr.Markdown("- Blackforest Labs for the innovative Flux image generators (https://github.com/black-forest-labs/flux)")
gr.Markdown("- Alibaba Qwen Team for their state of the art Qwen Image generators (https://github.com/QwenLM/Qwen-Image)")
gr.Markdown("- Lightricks for their super fast LTX Video models (https://github.com/Lightricks/LTX-Video)")
gr.Markdown("- Hugging Face for providing hosting for the models and developing must have open source libraries such as Tranformers, Diffusers, Accelerate and Gradio (https://huggingface.co/)")
gr.Markdown(" Huge acknowledgments to these great open source projects used in WanGP:")
gr.Markdown("- Rife: temporal upsampler (https://github.com/hzwer/ECCV2022-RIFE)")
gr.Markdown("- DwPose: Open Pose extractor (https://github.com/IDEA-Research/DWPose)")
gr.Markdown("- DepthAnything & Midas: Depth extractors (https://github.com/DepthAnything/Depth-Anything-V2) and (https://github.com/isl-org/MiDaS")
gr.Markdown("- Matanyone and SAM2: Mask Generation (https://github.com/pq-yang/MatAnyone) and (https://github.com/facebookresearch/sam2)")
gr.Markdown("- Pyannote: speaker diarization (https://github.com/pyannote/pyannote-audio)")
gr.Markdown(" Special thanks to the following people for their support:")
gr.Markdown("- Cocktail Peanuts : QA dpand simple installation via Pinokio.computer")
gr.Markdown("- Tophness : created (former) multi tabs and queuing frameworks")
gr.Markdown("- AmericanPresidentJimmyCarter : added original support for Skip Layer Guidance")
gr.Markdown("- Remade_AI : for their awesome Loras collection")
gr.Markdown("- Reevoy24 : for his repackaging / completing the documentation")
gr.Markdown("- Redtash1 : for designing the protype of the RAM / VRAM stats viewer")
def generate_info_tab():
with open("docs/VACE.md", "r", encoding="utf-8") as reader:
vace= reader.read()
with open("docs/MODELS.md", "r", encoding="utf-8") as reader:
models = reader.read()
with open("docs/LORAS.md", "r", encoding="utf-8") as reader:
loras = reader.read()
with open("docs/FINETUNES.md", "r", encoding="utf-8") as reader:
finetunes = reader.read()
with gr.Tabs() :
with gr.Tab("Models", id="models"):
gr.Markdown(models)
with gr.Tab("Loras", id="loras"):
gr.Markdown(loras)
with gr.Tab("Vace", id="vace"):
gr.Markdown(vace)
with gr.Tab("Finetunes", id="finetunes"):
gr.Markdown(finetunes)
def compact_name(family_name, model_name):
if model_name.startswith(family_name):
return model_name[len(family_name):].strip()
return model_name
def get_sorted_dropdown(dropdown_types, current_model_family):
models_families = [get_model_family(type, for_ui= True) for type in dropdown_types]
families = {}
for family in models_families:
if family not in families: families[family] = 1
families_orders = [ families_infos[family][0] for family in families ]
families_labels = [ families_infos[family][1] for family in families ]
sorted_familes = [ info[1:] for info in sorted(zip(families_orders, families_labels, families), key=lambda c: c[0])]
if current_model_family is None:
dropdown_choices = [ (families_infos[family][0], get_model_name(model_type), model_type) for model_type, family in zip(dropdown_types, models_families)]
else:
dropdown_choices = [ (families_infos[family][0], compact_name(families_infos[family][1], get_model_name(model_type)), model_type) for model_type, family in zip( dropdown_types, models_families) if family == current_model_family]
dropdown_choices = sorted(dropdown_choices, key=lambda c: (c[0], c[1]))
dropdown_choices = [model[1:] for model in dropdown_choices]
return sorted_familes, dropdown_choices
def generate_dropdown_model_list(current_model_type):
dropdown_types= transformer_types if len(transformer_types) > 0 else displayed_model_types
if current_model_type not in dropdown_types:
dropdown_types.append(current_model_type)
current_model_family = get_model_family(current_model_type, for_ui= True)
sorted_familes, dropdown_choices = get_sorted_dropdown(dropdown_types, current_model_family)
dropdown_families = gr.Dropdown(
choices= sorted_familes,
value= current_model_family,
show_label= False,
scale= 1,
elem_id="family_list",
min_width=50
)
return dropdown_families, gr.Dropdown(
choices= dropdown_choices,
value= current_model_type,
show_label= False,
scale= 4,
elem_id="model_list",
)
def change_model_family(state, current_model_family):
dropdown_types= transformer_types if len(transformer_types) > 0 else displayed_model_types
current_family_name = families_infos[current_model_family][1]
models_families = [get_model_family(type, for_ui= True) for type in dropdown_types]
dropdown_choices = [ (compact_name(current_family_name, get_model_name(model_type)), model_type) for model_type, family in zip(dropdown_types, models_families) if family == current_model_family ]
dropdown_choices = sorted(dropdown_choices, key=lambda c: c[0])
last_model_per_family = state.get("last_model_per_family", {})
model_type = last_model_per_family.get(current_model_family, "")
if len(model_type) == "" or model_type not in [choice[1] for choice in dropdown_choices] : model_type = dropdown_choices[0][1]
return gr.Dropdown(choices= dropdown_choices, value = model_type )
def set_new_tab(tab_state, new_tab_no):
global vmc_event_handler
tab_video_mask_creator = 2
old_tab_no = tab_state.get("tab_no",0)
# print(f"old tab {old_tab_no}, new tab {new_tab_no}")
if old_tab_no == tab_video_mask_creator:
vmc_event_handler(False)
elif new_tab_no == tab_video_mask_creator:
if gen_in_progress:
gr.Info("Unable to access this Tab while a Generation is in Progress. Please come back later")
tab_state["tab_no"] = 0
return gr.Tabs(selected="video_gen")
else:
vmc_event_handler(True)
tab_state["tab_no"] = new_tab_no
return gr.Tabs()
def select_tab(tab_state, evt:gr.SelectData):
old_tab_no = tab_state.get("tab_no",0)
if old_tab_no == 0:
saveform_trigger = get_unique_id()
else:
saveform_trigger = gr.update()
return set_new_tab(tab_state, evt.index), saveform_trigger
def get_js():
start_quit_timer_js = """
() => {
function findAndClickGradioButton(elemId) {
const gradioApp = document.querySelector('gradio-app') || document;
const button = gradioApp.querySelector(`#${elemId}`);
if (button) { button.click(); }
}
if (window.quitCountdownTimeoutId) clearTimeout(window.quitCountdownTimeoutId);
let js_click_count = 0;
const max_clicks = 5;
function countdownStep() {
if (js_click_count < max_clicks) {
findAndClickGradioButton('trigger_info_single_btn');
js_click_count++;
window.quitCountdownTimeoutId = setTimeout(countdownStep, 1000);
} else {
findAndClickGradioButton('force_quit_btn_hidden');
}
}
countdownStep();
}
"""
cancel_quit_timer_js = """
() => {
if (window.quitCountdownTimeoutId) {
clearTimeout(window.quitCountdownTimeoutId);
window.quitCountdownTimeoutId = null;
console.log("Quit countdown cancelled (single trigger).");
}
}
"""
trigger_zip_download_js = """
(base64String) => {
if (!base64String) {
console.log("No base64 zip data received, skipping download.");
return;
}
try {
const byteCharacters = atob(base64String);
const byteNumbers = new Array(byteCharacters.length);
for (let i = 0; i < byteCharacters.length; i++) {
byteNumbers[i] = byteCharacters.charCodeAt(i);
}
const byteArray = new Uint8Array(byteNumbers);
const blob = new Blob([byteArray], { type: 'application/zip' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.style.display = 'none';
a.href = url;
a.download = 'queue.zip';
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
console.log("Zip download triggered.");
} catch (e) {
console.error("Error processing base64 data or triggering download:", e);
}
}
"""
trigger_settings_download_js = """
(base64String, filename) => {
if (!base64String) {
console.log("No base64 settings data received, skipping download.");
return;
}
try {
const byteCharacters = atob(base64String);
const byteNumbers = new Array(byteCharacters.length);
for (let i = 0; i < byteCharacters.length; i++) {
byteNumbers[i] = byteCharacters.charCodeAt(i);
}
const byteArray = new Uint8Array(byteNumbers);
const blob = new Blob([byteArray], { type: 'application/text' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.style.display = 'none';
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
console.log("settings download triggered.");
} catch (e) {
console.error("Error processing base64 data or triggering download:", e);
}
}
"""
click_brush_js = """
() => {
setTimeout(() => {
const brushButton = document.querySelector('button[aria-label="Brush"]');
if (brushButton) {
brushButton.click();
console.log('Brush button clicked');
} else {
console.log('Brush button not found');
}
}, 1000);
} """
return start_quit_timer_js, cancel_quit_timer_js, trigger_zip_download_js, trigger_settings_download_js, click_brush_js
def create_ui():
global vmc_event_handler
css = """
.postprocess div,
.postprocess span,
.postprocess label,
.postprocess input,
.postprocess select,
.postprocess textarea {
font-size: 12px !important;
padding: 0px !important;
border: 5px !important;
border-radius: 0px !important;
--form-gap-width: 0px !important;
box-shadow: none !important;
--layout-gap: 0px !important;
}
.postprocess span {margin-top:4px;margin-bottom:4px}
#model_list, #family_list{
background-color:black;
padding:1px}
#model_list input, #family_list input {
font-size:25px}
#family_list div div {
border-radius: 4px 0px 0px 4px;
}
#model_list div div {
border-radius: 0px 4px 4px 0px;
}
.title-with-lines {
display: flex;
align-items: center;
margin: 25px 0;
}
.line {
flex-grow: 1;
height: 1px;
background-color: #333;
}
h2 {
margin: 0 20px;
white-space: nowrap;
}
#queue_html_container table {
width: 100%;
border-collapse: collapse;
font-size: 14px;
table-layout: fixed;
}
#queue_html_container th {
text-align: left;
padding: 10px 8px;
border-bottom: 2px solid #4a5568;
font-weight: bold;
font-size: 11px;
text-transform: uppercase;
color: #a0aec0;
white-space: nowrap;
}
#queue_html_container td {
padding: 8px;
border-bottom: 1px solid #2d3748;
vertical-align: middle;
}
#queue_html_container tr:hover td {
background-color: rgba(255, 255, 255, 0.04);
}
#queue_html_container .prompt-cell {
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
#queue_html_container .action-button {
background: none;
border: none;
cursor: pointer;
font-size: 1.3em;
padding: 0;
color: #718096;
transition: color 0.2s;
line-height: 1;
}
#queue_html_container .action-button:hover {
color: #e2e8f0;
}
#queue_html_container .center-align {
text-align: center;
}
#queue_html_container .text-left {
text-align: left;
}
#queue_html_container .hover-image img {
max-width: 50px;
max-height: 50px;
object-fit: contain;
display: block;
margin: auto;
}
#queue_html_container .drag-handle {
cursor: grab;
user-select: none;
}
#queue_html_container tr.dragging {
opacity: 0.5;
background: #2d3748;
}
#queue_html_container tr.drag-over-top {
border-top: 2px solid #4299e1;
}
#queue_html_container tr.drag-over-bottom {
border-bottom: 2px solid #4299e1;
}
#image-modal-container {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-color: rgba(0, 0, 0, 0.7);
justify-content: center;
align-items: center;
z-index: 1000;
padding: 20px;
box-sizing: border-box;
}
#image-modal-container {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-color: rgba(0, 0, 0, 0.7);
justify-content: center;
align-items: center;
z-index: 1000;
padding: 20px;
box-sizing: border-box;
}
#image-modal-container > div {
background-color: white;
padding: 15px;
border-radius: 8px;
max-width: 90%;
max-height: 90%;
overflow: auto;
position: relative;
display: flex;
flex-direction: column;
}
#image-modal-container img {
max-width: 100%;
max-height: 80vh;
object-fit: contain;
margin-top: 10px;
}
#image-modal-close-button-row {
display: flex;
justify-content: flex-end;
}
#image-modal-close-button-row button {
cursor: pointer;
}
.progress-container-custom {
width: 100%;
background-color: #e9ecef;
border-radius: 0.375rem;
overflow: hidden;
height: 25px;
position: relative;
margin-top: 5px;
margin-bottom: 5px;
}
.progress-bar-custom {
height: 100%;
background-color: #0d6efd;
transition: width 0.3s ease-in-out;
display: flex;
align-items: center;
justify-content: center;
color: white;
font-size: 0.9em;
font-weight: bold;
white-space: nowrap;
overflow: hidden;
}
.progress-bar-custom.idle {
background-color: #6c757d;
}
.progress-bar-text {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
display: flex;
align-items: center;
justify-content: center;
color: white;
mix-blend-mode: difference;
font-size: 0.9em;
font-weight: bold;
white-space: nowrap;
z-index: 2;
pointer-events: none;
}
.hover-image {
cursor: pointer;
position: relative;
display: inline-block; /* Important for positioning */
}
.hover-image .tooltip {
visibility: hidden;
opacity: 0;
position: absolute;
top: 100%;
left: 50%;
transform: translateX(-50%);
background-color: rgba(0, 0, 0, 0.8);
color: white;
padding: 4px 6px;
border-radius: 2px;
font-size: 14px;
white-space: nowrap;
pointer-events: none;
z-index: 9999;
transition: visibility 0s linear 1s, opacity 0.3s linear 1s; /* Delay both properties */
}
div.compact_tab , span.compact_tab
{ padding: 0px !important;
}
.hover-image .tooltip2 {
visibility: hidden;
opacity: 0;
position: absolute;
top: 50%; /* Center vertically with the image */
left: 0; /* Position to the left of the image */
transform: translateY(-50%); /* Center vertically */
margin-left: -10px; /* Small gap to the left of image */
background-color: rgba(0, 0, 0, 0.8);
color: white;
padding: 8px 12px;
border-radius: 4px;
font-size: 14px;
white-space: nowrap;
pointer-events: none;
z-index: 9999;
transition: visibility 0s linear 1s, opacity 0.3s linear 1s;
}
.hover-image:hover .tooltip, .hover-image:hover .tooltip2 {
visibility: visible;
opacity: 1;
transition: visibility 0s linear 1s, opacity 0.3s linear 1s; /* 1s delay before showing */
}
.btn_centered {margin-top:10px; text-wrap-mode: nowrap;}
"""
UI_theme = server_config.get("UI_theme", "default")
UI_theme = args.theme if len(args.theme) > 0 else UI_theme
if UI_theme == "gradio":
theme = None
else:
theme = gr.themes.Soft(font=["Verdana"], primary_hue="sky", neutral_hue="slate", text_size="md")
js = """
function() {
window.updateAndTrigger = function(action) {
const hiddenTextbox = document.querySelector('#queue_action_input textarea');
const hiddenButton = document.querySelector('#queue_action_trigger');
if (hiddenTextbox && hiddenButton) {
hiddenTextbox.value = action;
const inputEvent = new Event('input', { bubbles: true });
hiddenTextbox.dispatchEvent(inputEvent);
hiddenButton.click();
} else {
console.error("Could not find hidden queue action elements.");
}
};
console.log('updateAndTrigger function for queue is ready.');
window.showImageModal = function(action) {
const hiddenTextbox = document.querySelector('#modal_action_input textarea');
const hiddenButton = document.querySelector('#modal_action_trigger');
if (hiddenTextbox && hiddenButton) {
hiddenTextbox.value = action;
hiddenTextbox.dispatchEvent(new Event('input', { bubbles: true }));
hiddenButton.click();
} else {
console.error("Could not find hidden modal action elements.");
}
};
window.closeImageModal = function() {
const modal = document.querySelector('#image-modal-container');
if (modal) {
modal.style.display = 'none';
}
};
let draggedItem = null;
function initializeQueueDragAndDrop() {
const queueTbody = document.querySelector('#queue_html_container table > tbody');
if (!queueTbody || queueTbody.dataset.dndInitialized) {
return;
}
queueTbody.dataset.dndInitialized = 'true';
queueTbody.addEventListener('dragstart', (e) => {
if (e.target.classList.contains('drag-handle')) {
draggedItem = e.target.closest('.draggable-row');
if (draggedItem) {
setTimeout(() => {
draggedItem.classList.add('dragging');
}, 0);
}
}
});
queueTbody.addEventListener('dragend', (e) => {
if (draggedItem) {
draggedItem.classList.remove('dragging');
draggedItem = null;
document.querySelectorAll('.drag-over-top, .drag-over-bottom').forEach(el => {
el.classList.remove('drag-over-top', 'drag-over-bottom');
});
}
});
queueTbody.addEventListener('dragover', (e) => {
e.preventDefault();
const targetRow = e.target.closest('.draggable-row');
// Clear previous indicators
document.querySelectorAll('.drag-over-top, .drag-over-bottom').forEach(el => {
el.classList.remove('drag-over-top', 'drag-over-bottom');
});
if (targetRow && draggedItem && targetRow !== draggedItem) {
const rect = targetRow.getBoundingClientRect();
const midpoint = rect.top + rect.height / 2;
if (e.clientY < midpoint) {
targetRow.classList.add('drag-over-top');
} else {
targetRow.classList.add('drag-over-bottom');
}
}
});
queueTbody.addEventListener('dragleave', (e) => {
const relatedTarget = e.relatedTarget;
const queueTable = e.currentTarget.closest('table');
if (queueTable && !queueTable.contains(relatedTarget)) {
document.querySelectorAll('.drag-over-top, .drag-over-bottom').forEach(el => {
el.classList.remove('drag-over-top', 'drag-over-bottom');
});
}
});
queueTbody.addEventListener('drop', (e) => {
e.preventDefault();
const targetRow = e.target.closest('.draggable-row');
if (draggedItem && targetRow && targetRow !== draggedItem) {
const oldIndex = draggedItem.dataset.index;
let newIndex = parseInt(targetRow.dataset.index);
// If dropping on the bottom half, the new index is after the target row
if (targetRow.classList.contains('drag-over-bottom')) {
newIndex++;
}
if (oldIndex != newIndex) {
const action = `move_${oldIndex}_to_${newIndex}`;
window.updateAndTrigger(action);
}
}
// Cleanup visual styles
document.querySelectorAll('.drag-over-top, .drag-over-bottom').forEach(el => {
el.classList.remove('drag-over-top', 'drag-over-bottom');
});
if (draggedItem) {
draggedItem.classList.remove('dragging');
draggedItem = null;
}
});
}
const observer = new MutationObserver((mutationsList, observer) => {
for(const mutation of mutationsList) {
if (mutation.type === 'childList') {
const queueContainer = document.querySelector('#queue_html_container');
if (queueContainer && queueContainer.querySelector('table > tbody')) {
initializeQueueDragAndDrop();
}
}
}
});
const targetNode = document.querySelector('gradio-app');
if (targetNode) {
observer.observe(targetNode, { childList: true, subtree: true });
}
setTimeout(initializeQueueDragAndDrop, 500);
// cancel wheel usage inside image editor
const hit = n => n?.id === "img_editor" || n?.classList?.contains("wheel-pass");
addEventListener("wheel", e => {
const path = e.composedPath?.() || (() => { let a=[],n=e.target; for(;n;n=n.parentNode||n.host) a.push(n); return a; })();
if (path.some(hit)) e.stopImmediatePropagation();
}, { capture: true, passive: true });
}
"""
if server_config.get("display_stats", 0) == 1:
from shared.utils.stats import SystemStatsApp
stats_app = SystemStatsApp()
else:
stats_app = None
with gr.Blocks(css=css, js=js, theme=theme, title= "WanGP") as main:
gr.Markdown(f"
WanGP v{WanGP_version} by DeepBeepMeep ") # (Updates)
")
global model_list
tab_state = gr.State({ "tab_no":0 })
with gr.Tabs(selected="video_gen", ) as main_tabs:
with gr.Tab("Video Generator", id="video_gen") as video_generator_tab:
with gr.Row():
if args.lock_model:
gr.Markdown("