better suppor lightning loras

This commit is contained in:
deepbeepmeep 2025-08-12 04:59:28 +02:00
parent 175e05fc1e
commit 6eb994e18a
9 changed files with 107 additions and 49 deletions

View File

@ -34,6 +34,8 @@ Generation Settings are stored in each of the above regardless of the format (th
Also you can now choose different output directories for images and videos.
unexpected luck: fixed lightning 8 steps for Qwen, and lightning 4 steps for Wan 2.2, now you just need 1x multiplier no weird numbers.
### August 10 2025: WanGP v7.76 - Faster than the VAE ...
We have a funny one here today: FastWan 2.2 5B, the Fastest Video Generator, only 20s to generate 121 frames at 720p. The snag is that VAE is twice as slow...
Thanks to Kijai for extracting the Lora that is used to build the corresponding finetune.

View File

@ -151,14 +151,12 @@ class QwenImagePipeline(): #DiffusionPipeline
text_encoder,
tokenizer,
transformer,
scheduler,
):
self.vae=vae
self.text_encoder=text_encoder
self.tokenizer=tokenizer
self.transformer=transformer
self.scheduler=scheduler
self.vae_scale_factor = 2 ** len(self.vae.temperal_downsample) if getattr(self, "vae", None) else 8
# QwenImage latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible

View File

@ -11,6 +11,9 @@ class family_handler():
def query_model_def(base_model_type, model_def):
model_def_output = {
"image_outputs" : True,
"sample_solvers":[
("Default", "default"),
("Lightning", "lightning")]
}
@ -38,7 +41,7 @@ class family_handler():
return {
"repoId" : "DeepBeepMeep/Qwen_image",
"sourceFolderList" : ["", "Qwen2.5-VL-7B-Instruct"],
"fileList" : [ ["qwen_vae.safetensors", "qwen_vae_config.json", "qwen_scheduler_config.json"], ["merges.txt", "tokenizer_config.json", "config.json", "vocab.json"] + computeList(text_encoder_filename) ]
"fileList" : [ ["qwen_vae.safetensors", "qwen_vae_config.json"], ["merges.txt", "tokenizer_config.json", "config.json", "vocab.json"] + computeList(text_encoder_filename) ]
}
@staticmethod
@ -64,10 +67,17 @@ class family_handler():
return pipe_processor, pipe
@staticmethod
def fix_settings(base_model_type, settings_version, model_def, ui_defaults):
if ui_defaults.get("sample_solver", "") == "":
ui_defaults["sample_solver"] = "default"
@staticmethod
def update_default_settings(base_model_type, model_def, ui_defaults):
ui_defaults.update({
"guidance_scale": 4,
"sample_solver": "default",
})
if model_def.get("reference_image", False):
ui_defaults.update({

View File

@ -5,6 +5,7 @@ from typing import Any, Callable, Dict, List, Optional, Union
import numpy as np
import torch, json, os
import math
from diffusers.image_processor import VaeImageProcessor
from .transformer_qwenimage import QwenImageTransformer2DModel
@ -31,13 +32,7 @@ class model_factory():
VAE_dtype = torch.float32,
mixed_precision_transformer = False
):
with open( os.path.join(checkpoint_dir, "qwen_scheduler_config.json"), 'r', encoding='utf-8') as f:
scheduler_config = json.load(f)
scheduler_config.pop("_class_name")
scheduler_config.pop("_diffusers_version")
scheduler = FlowMatchEulerDiscreteScheduler(**scheduler_config)
transformer_filename = model_filename[0]
tokenizer = AutoTokenizer.from_pretrained(os.path.join(checkpoint_dir,"Qwen2.5-VL-7B-Instruct"))
@ -61,13 +56,11 @@ class model_factory():
vae = offload.fast_load_transformers_model( os.path.join(checkpoint_dir,"qwen_vae.safetensors"), writable_tensors= True , modelClass=AutoencoderKLQwenImage, defaultConfigPath=os.path.join(checkpoint_dir,"qwen_vae_config.json"))
self.pipeline = QwenImagePipeline(vae, text_encoder, tokenizer, transformer, scheduler)
self.pipeline = QwenImagePipeline(vae, text_encoder, tokenizer, transformer)
self.vae=vae
self.text_encoder=text_encoder
self.tokenizer=tokenizer
self.transformer=transformer
self.scheduler=scheduler
def generate(
self,
@ -86,6 +79,7 @@ class model_factory():
video_prompt_type = "",
VAE_tile_size = None,
joint_pass = True,
sample_solver='default',
**bbargs
):
# Generate with different aspect ratios
@ -97,6 +91,44 @@ class model_factory():
"3:4": (1140, 1472)
}
if sample_solver =='lightning':
scheduler_config = {
"base_image_seq_len": 256,
"base_shift": math.log(3), # We use shift=3 in distillation
"invert_sigmas": False,
"max_image_seq_len": 8192,
"max_shift": math.log(3), # We use shift=3 in distillation
"num_train_timesteps": 1000,
"shift": 1.0,
"shift_terminal": None, # set shift_terminal to None
"stochastic_sampling": False,
"time_shift_type": "exponential",
"use_beta_sigmas": False,
"use_dynamic_shifting": True,
"use_exponential_sigmas": False,
"use_karras_sigmas": False,
}
else:
scheduler_config = {
"base_image_seq_len": 256,
"base_shift": 0.5,
"invert_sigmas": False,
"max_image_seq_len": 8192,
"max_shift": 0.9,
"num_train_timesteps": 1000,
"shift": 1.0,
"shift_terminal": 0.02,
"stochastic_sampling": False,
"time_shift_type": "exponential",
"use_beta_sigmas": False,
"use_dynamic_shifting": True,
"use_exponential_sigmas": False,
"use_karras_sigmas": False
}
self.scheduler=FlowMatchEulerDiscreteScheduler(**scheduler_config)
self.pipeline.scheduler = self.scheduler
if VAE_tile_size is not None:
self.vae.use_tiling = VAE_tile_size[0]
self.vae.tile_latent_min_height = VAE_tile_size[1]

View File

@ -108,7 +108,11 @@ class family_handler():
"adaptive_projected_guidance" : True,
"tea_cache" : not (base_model_type in ["i2v_2_2", "ti2v_2_2" ] or "URLs2" in model_def),
"mag_cache" : True,
"sample_solvers":[
("unipc", "unipc"),
("euler", "euler"),
("dpm++", "dpm++"),
("flowmatch causvid", "causvid"), ]
})
return extra_model_def
@ -208,9 +212,17 @@ class family_handler():
if hasattr(wan_model, "clip"):
pipe["text_encoder_2"] = wan_model.clip.model
return wan_model, pipe
@staticmethod
def fix_settings(base_model_type, settings_version, model_def, ui_defaults):
if ui_defaults.get("sample_solver", "") == "":
ui_defaults["sample_solver"] = "unipc"
@staticmethod
def update_default_settings(base_model_type, model_def, ui_defaults):
ui_defaults.update({
"sample_solver": "unipc",
})
if base_model_type in ["fantasy"]:
ui_defaults.update({
"audio_guidance_scale": 5.0,

View File

@ -17,7 +17,7 @@ gradio==5.23.0
numpy>=1.23.5,<2
einops
moviepy==1.0.3
mmgp==3.5.8
mmgp==3.5.9
peft==0.15.0
mutagen
pydantic==2.10.6

View File

@ -337,15 +337,15 @@ def save_image(tensor,
def _get_format_info(quality):
"""Get format extension and parameters."""
formats = {
# JPEG with torchvision (works)
'jpeg_95': {'ext': '.jpg', 'params': {'quality': 95}, 'use_pil': False},
'jpeg_85': {'ext': '.jpg', 'params': {'quality': 85}, 'use_pil': False},
'jpeg_70': {'ext': '.jpg', 'params': {'quality': 70}, 'use_pil': False},
'jpeg_50': {'ext': '.jpg', 'params': {'quality': 50}, 'use_pil': False},
# JPEG with PIL (so 'quality' works)
'jpeg_95': {'ext': '.jpg', 'params': {'quality': 95}, 'use_pil': True},
'jpeg_85': {'ext': '.jpg', 'params': {'quality': 85}, 'use_pil': True},
'jpeg_70': {'ext': '.jpg', 'params': {'quality': 70}, 'use_pil': True},
'jpeg_50': {'ext': '.jpg', 'params': {'quality': 50}, 'use_pil': True},
# PNG with torchvision
'png': {'ext': '.png', 'params': {}, 'use_pil': False},
# WebP with PIL (for quality control)
'webp_95': {'ext': '.webp', 'params': {'quality': 95}, 'use_pil': True},
'webp_85': {'ext': '.webp', 'params': {'quality': 85}, 'use_pil': True},

View File

@ -8,7 +8,8 @@ import sys
import threading
import time
import numpy as np
import os
os.environ["PYGAME_HIDE_SUPPORT_PROMPT"] = "1"
def generate_notification_beep(volume=50, sample_rate=44100):
"""Generate pleasant C major chord notification sound"""

53
wgp.py
View File

@ -54,7 +54,7 @@ global_queue_ref = []
AUTOSAVE_FILENAME = "queue.zip"
PROMPT_VARS_MAX = 10
target_mmgp_version = "3.5.8"
target_mmgp_version = "3.5.9"
WanGP_version = "7.77"
settings_version = 2.23
max_source_video_frames = 3000
@ -1972,11 +1972,11 @@ def get_settings_file_name(model_type):
return os.path.join(args.settings, model_type + "_settings.json")
def fix_settings(model_type, ui_defaults):
if model_type == None: return
if model_type is None: return
video_settings_version = ui_defaults.get("settings_version", 0)
settings_version = ui_defaults.get("settings_version", 0)
model_def = get_model_def(model_type)
model_type = get_base_model_type(model_type)
base_model_type = get_base_model_type(model_type)
prompts = ui_defaults.get("prompts", "")
if len(prompts) > 0:
@ -1987,43 +1987,43 @@ def fix_settings(model_type, ui_defaults):
image_prompt_type = "S" if image_prompt_type == 0 else "SE"
# if model_type == "flf2v_720p" and not "E" in image_prompt_type:
# image_prompt_type = "SE"
if video_settings_version <= 2:
if settings_version <= 2:
image_prompt_type = image_prompt_type.replace("G","")
ui_defaults["image_prompt_type"] = image_prompt_type
if "lset_name" in ui_defaults: del ui_defaults["lset_name"]
audio_prompt_type = ui_defaults.get("audio_prompt_type", None)
if video_settings_version < 2.2:
if not model_type in ["vace_1.3B","vace_14B", "sky_df_1.3B", "sky_df_14B", "ltxv_13B"]:
if settings_version < 2.2:
if not base_model_type in ["vace_1.3B","vace_14B", "sky_df_1.3B", "sky_df_14B", "ltxv_13B"]:
for p in ["sliding_window_size", "sliding_window_overlap", "sliding_window_overlap_noise", "sliding_window_discard_last_frames"]:
if p in ui_defaults: del ui_defaults[p]
if audio_prompt_type == None :
if any_audio_track(model_type):
if any_audio_track(base_model_type):
audio_prompt_type ="A"
ui_defaults["audio_prompt_type"] = audio_prompt_type
video_prompt_type = ui_defaults.get("video_prompt_type", "")
any_reference_image = model_def.get("reference_image", False)
if model_type in ["hunyuan_custom", "hunyuan_custom_edit", "hunyuan_custom_audio", "hunyuan_avatar", "phantom_14B", "phantom_1.3B"] or any_reference_image:
if base_model_type in ["hunyuan_custom", "hunyuan_custom_edit", "hunyuan_custom_audio", "hunyuan_avatar", "phantom_14B", "phantom_1.3B"] or any_reference_image:
if not "I" in video_prompt_type: # workaround for settings corruption
video_prompt_type += "I"
if model_type in ["hunyuan"]:
if base_model_type in ["hunyuan"]:
video_prompt_type = video_prompt_type.replace("I", "")
if model_type in ["flux"] and video_settings_version < 2.23:
if base_model_type in ["flux"] and settings_version < 2.23:
video_prompt_type = video_prompt_type.replace("K", "").replace("I", "KI")
remove_background_images_ref = ui_defaults.get("remove_background_images_ref", 1)
if video_settings_version < 2.22:
if settings_version < 2.22:
if "I" in video_prompt_type:
if remove_background_images_ref == 2:
video_prompt_type = video_prompt_type.replace("I", "KI")
if remove_background_images_ref != 0:
remove_background_images_ref = 1
if model_type in ["hunyuan_avatar"]: remove_background_images_ref = 0
if base_model_type in ["hunyuan_avatar"]: remove_background_images_ref = 0
ui_defaults["remove_background_images_ref"] = remove_background_images_ref
ui_defaults["video_prompt_type"] = video_prompt_type
@ -2044,6 +2044,10 @@ def fix_settings(model_type, ui_defaults):
del ui_defaults["tea_cache_start_step_perc"]
ui_defaults["skip_steps_start_step_perc"] = tea_cache_start_step_perc
model_handler = get_model_handler(base_model_type)
if hasattr(model_handler, "fix_settings"):
model_handler.fix_settings(base_model_type, settings_version, model_def, ui_defaults)
def get_default_settings(model_type):
def get_default_prompt(i2v):
if i2v:
@ -3174,8 +3178,8 @@ def select_video(state, input_file_list, event_data: gr.EventData):
values += [video_outpainting]
labels += ["Outpainting"]
video_sample_solver = configs.get("sample_solver", "")
if model_family == "wan":
values += ["unipc" if len(video_sample_solver) ==0 else video_sample_solver]
if model_def.get("sample_solvers", None) is not None and len(video_sample_solver) > 0 :
values += [video_sample_solver]
labels += ["Sampler Solver"]
values += [video_resolution, video_length_summary, video_seed, video_guidance_scale, video_flow_shift, video_num_inference_steps]
labels += [ "Resolution", video_length_label, "Seed", video_guidance_label, "Shift Scale", "Num Inference steps"]
@ -5511,7 +5515,7 @@ def prepare_inputs_dict(target, inputs, model_type = None, model_filename = None
if "force_fps" in inputs and len(inputs["force_fps"])== 0:
pop += ["force_fps"]
if not get_model_family(model_type) == "wan" or diffusion_forcing:
if model_def.get("sample_solvers", None) is None:
pop += ["sample_solver"]
# if not (test_class_i2v(base_model_type) or diffusion_forcing or ltxv or recammaster or vace):
@ -6974,16 +6978,15 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
with gr.Row(visible = model_def.get("guidance_max_phases",1) >1 and not (no_guidance and image_outputs)) as guidance_row2:
guidance2_scale = gr.Slider(1.0, 20.0, value=ui_defaults.get("guidance2_scale",5), step=0.5, label="Guidance2 (CFG)", visible=not (hunyuan_t2v or hunyuan_i2v or any_embedded_guidance) and not no_guidance)
switch_threshold = gr.Slider(0, 1000, value=ui_defaults.get("switch_threshold",0), step=1, label="Guidance / Model Switch Threshold", visible=not (hunyuan_t2v or hunyuan_i2v or any_embedded_guidance) and not no_guidance)
sample_solver_choices = model_def.get("sample_solvers", None)
with gr.Row(visible = sample_solver_choices is not None ) as sample_solver_row:
if sample_solver_choices is None:
sample_solver = gr.Dropdown( value="", choices=[ ("", ""), ], visible= False, label= "Sampler Solver / Scheduler" )
else:
sample_solver = gr.Dropdown( value=ui_defaults.get("sample_solver", sample_solver_choices[0][1]),
choices= sample_solver_choices, visible= True, label= "Sampler Solver / Scheduler"
)
with gr.Row(visible = get_model_family(model_type) == "wan" and not diffusion_forcing ) as sample_solver_row:
sample_solver = gr.Dropdown( value=ui_defaults.get("sample_solver",""),
choices=[
("unipc", ""),
("euler", "euler"),
("dpm++", "dpm++"),
("flowmatch causvid", "causvid"),
], visible= True, label= "Sampler Solver / Scheduler"
)
with gr.Row(visible = vace) as control_net_weights_row:
control_net_weight = gr.Slider(0.0, 2.0, value=ui_defaults.get("control_net_weight",1), step=0.1, label="Control Net Weight #1", visible=vace)