more fixes

This commit is contained in:
DeepBeepMeep 2025-05-17 20:59:46 +02:00
parent ca9e9b94cb
commit 5ba133cae9
3 changed files with 36 additions and 32 deletions

View File

@ -1305,7 +1305,7 @@ class HunyuanVideoPipeline(DiffusionPipeline):
# perform guidance # perform guidance
if self.do_classifier_free_guidance: if self.do_classifier_free_guidance:
if cfg_star_rescale: if cfg_star_rescale:
batch_size = noise_pred_text.shape[0] batch_size = 1
positive_flat = noise_pred_text.view(batch_size, -1) positive_flat = noise_pred_text.view(batch_size, -1)
negative_flat = noise_pred_uncond.view(batch_size, -1) negative_flat = noise_pred_uncond.view(batch_size, -1)
dot_product = torch.sum( dot_product = torch.sum(

View File

@ -154,8 +154,8 @@ class LTXV:
mixed_precision_transformer = False mixed_precision_transformer = False
): ):
if dtype == torch.float16: # if dtype == torch.float16:
dtype = torch.bfloat16 dtype = torch.bfloat16
self.mixed_precision_transformer = mixed_precision_transformer self.mixed_precision_transformer = mixed_precision_transformer
self.distilled = any("lora" in name for name in model_filepath) self.distilled = any("lora" in name for name in model_filepath)
model_filepath = [name for name in model_filepath if not "lora" in name ] model_filepath = [name for name in model_filepath if not "lora" in name ]
@ -169,8 +169,8 @@ class LTXV:
# vae = CausalVideoAutoencoder.from_pretrained(ckpt_path) # vae = CausalVideoAutoencoder.from_pretrained(ckpt_path)
vae = offload.fast_load_transformers_model("ckpts/ltxv_0.9.7_VAE.safetensors", modelClass=CausalVideoAutoencoder) vae = offload.fast_load_transformers_model("ckpts/ltxv_0.9.7_VAE.safetensors", modelClass=CausalVideoAutoencoder)
if VAE_dtype == torch.float16: # if VAE_dtype == torch.float16:
VAE_dtype = torch.bfloat16 VAE_dtype = torch.bfloat16
vae = vae.to(VAE_dtype) vae = vae.to(VAE_dtype)
vae._model_dtype = VAE_dtype vae._model_dtype = VAE_dtype

58
wgp.py
View File

@ -1483,7 +1483,10 @@ src_move = [ "ckpts/models_clip_open-clip-xlm-roberta-large-vit-huge-14-bf16.saf
tgt_move = [ "ckpts/xlm-roberta-large/", "ckpts/umt5-xxl/", "ckpts/umt5-xxl/"] tgt_move = [ "ckpts/xlm-roberta-large/", "ckpts/umt5-xxl/", "ckpts/umt5-xxl/"]
for src,tgt in zip(src_move,tgt_move): for src,tgt in zip(src_move,tgt_move):
if os.path.isfile(src): if os.path.isfile(src):
shutil.move(src, tgt) try:
shutil.move(src, tgt)
except:
pass
@ -2772,7 +2775,7 @@ def generate_video(
if len(list_mult_choices_nums ) < len(activated_loras): if len(list_mult_choices_nums ) < len(activated_loras):
list_mult_choices_nums += [1.0] * ( len(activated_loras) - len(list_mult_choices_nums ) ) list_mult_choices_nums += [1.0] * ( len(activated_loras) - len(list_mult_choices_nums ) )
loras_selected = [ lora for lora in loras if os.path.basename(lora) in activated_loras] loras_selected = [ lora for lora in loras if os.path.basename(lora) in activated_loras]
pinnedLora = profile !=5 #False # # # pinnedLora = profile !=5 and transformer_loras_filenames == None #False # # #
split_linear_modules_map = getattr(trans,"split_linear_modules_map", None) split_linear_modules_map = getattr(trans,"split_linear_modules_map", None)
if transformer_loras_filenames != None: if transformer_loras_filenames != None:
loras_selected += transformer_loras_filenames loras_selected += transformer_loras_filenames
@ -3985,6 +3988,7 @@ def prepare_inputs_dict(target, inputs ):
for k in unsaved_params: for k in unsaved_params:
inputs.pop(k) inputs.pop(k)
if not "Vace" in model_filename or "diffusion_forcing" in model_filename or "ltxv" in model_filename: if not "Vace" in model_filename or "diffusion_forcing" in model_filename or "ltxv" in model_filename:
unsaved_params = [ "sliding_window_size", "sliding_window_overlap", "sliding_window_overlap_noise", "sliding_window_discard_last_frames"] unsaved_params = [ "sliding_window_size", "sliding_window_overlap", "sliding_window_overlap_noise", "sliding_window_discard_last_frames"]
for k in unsaved_params: for k in unsaved_params:
@ -4643,31 +4647,31 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
) )
with gr.Tab("Quality", visible = not ltxv) as quality_tab: with gr.Tab("Quality", visible = not ltxv) as quality_tab:
with gr.Row(): with gr.Column(visible = not (hunyuan_i2v or hunyuan_t2v or hunyuan_video_custom) ) as skip_layer_guidance_row:
gr.Markdown("<B>Skip Layer Guidance (improves video quality)</B>") gr.Markdown("<B>Skip Layer Guidance (improves video quality)</B>")
with gr.Row(): with gr.Row():
slg_switch = gr.Dropdown( slg_switch = gr.Dropdown(
choices=[ choices=[
("OFF", 0), ("OFF", 0),
("ON", 1), ("ON", 1),
], ],
value=ui_defaults.get("slg_switch",0), value=ui_defaults.get("slg_switch",0),
visible=True, visible=True,
scale = 1, scale = 1,
label="Skip Layer guidance" label="Skip Layer guidance"
) )
slg_layers = gr.Dropdown( slg_layers = gr.Dropdown(
choices=[ choices=[
(str(i), i ) for i in range(40) (str(i), i ) for i in range(40)
], ],
value=ui_defaults.get("slg_layers", ["9"]), value=ui_defaults.get("slg_layers", ["9"]),
multiselect= True, multiselect= True,
label="Skip Layers", label="Skip Layers",
scale= 3 scale= 3
) )
with gr.Row(): with gr.Row():
slg_start_perc = gr.Slider(0, 100, value=ui_defaults.get("slg_start_perc",10), step=1, label="Denoising Steps % start") slg_start_perc = gr.Slider(0, 100, value=ui_defaults.get("slg_start_perc",10), step=1, label="Denoising Steps % start")
slg_end_perc = gr.Slider(0, 100, value=ui_defaults.get("slg_end_perc",90), step=1, label="Denoising Steps % end") slg_end_perc = gr.Slider(0, 100, value=ui_defaults.get("slg_end_perc",90), step=1, label="Denoising Steps % end")
with gr.Row(): with gr.Row():
gr.Markdown("<B>Experimental: Classifier-Free Guidance Zero Star, better adherence to Text Prompt") gr.Markdown("<B>Experimental: Classifier-Free Guidance Zero Star, better adherence to Text Prompt")
@ -4772,7 +4776,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
extra_inputs = prompt_vars + [wizard_prompt, wizard_variables_var, wizard_prompt_activated_var, video_prompt_column, image_prompt_column, extra_inputs = prompt_vars + [wizard_prompt, wizard_variables_var, wizard_prompt_activated_var, video_prompt_column, image_prompt_column,
prompt_column_advanced, prompt_column_wizard_vars, prompt_column_wizard, lset_name, advanced_row, speed_tab, quality_tab, prompt_column_advanced, prompt_column_wizard_vars, prompt_column_wizard, lset_name, advanced_row, speed_tab, quality_tab,
sliding_window_tab, misc_tab, prompt_enhancer_row, inference_steps_row, sliding_window_tab, misc_tab, prompt_enhancer_row, inference_steps_row, skip_layer_guidance_row,
video_prompt_type_video_guide, video_prompt_type_image_refs] # show_advanced presets_column, video_prompt_type_video_guide, video_prompt_type_image_refs] # show_advanced presets_column,
if update_form: if update_form:
locals_dict = locals() locals_dict = locals()