mirror of
https://github.com/Wan-Video/Wan2.1.git
synced 2025-11-04 14:16:57 +00:00
better suppor lightning loras
This commit is contained in:
parent
175e05fc1e
commit
6eb994e18a
@ -34,6 +34,8 @@ Generation Settings are stored in each of the above regardless of the format (th
|
||||
|
||||
Also you can now choose different output directories for images and videos.
|
||||
|
||||
unexpected luck: fixed lightning 8 steps for Qwen, and lightning 4 steps for Wan 2.2, now you just need 1x multiplier no weird numbers.
|
||||
|
||||
### August 10 2025: WanGP v7.76 - Faster than the VAE ...
|
||||
We have a funny one here today: FastWan 2.2 5B, the Fastest Video Generator, only 20s to generate 121 frames at 720p. The snag is that VAE is twice as slow...
|
||||
Thanks to Kijai for extracting the Lora that is used to build the corresponding finetune.
|
||||
|
||||
@ -151,14 +151,12 @@ class QwenImagePipeline(): #DiffusionPipeline
|
||||
text_encoder,
|
||||
tokenizer,
|
||||
transformer,
|
||||
scheduler,
|
||||
):
|
||||
|
||||
self.vae=vae
|
||||
self.text_encoder=text_encoder
|
||||
self.tokenizer=tokenizer
|
||||
self.transformer=transformer
|
||||
self.scheduler=scheduler
|
||||
|
||||
self.vae_scale_factor = 2 ** len(self.vae.temperal_downsample) if getattr(self, "vae", None) else 8
|
||||
# QwenImage latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
|
||||
|
||||
@ -11,6 +11,9 @@ class family_handler():
|
||||
def query_model_def(base_model_type, model_def):
|
||||
model_def_output = {
|
||||
"image_outputs" : True,
|
||||
"sample_solvers":[
|
||||
("Default", "default"),
|
||||
("Lightning", "lightning")]
|
||||
}
|
||||
|
||||
|
||||
@ -38,7 +41,7 @@ class family_handler():
|
||||
return {
|
||||
"repoId" : "DeepBeepMeep/Qwen_image",
|
||||
"sourceFolderList" : ["", "Qwen2.5-VL-7B-Instruct"],
|
||||
"fileList" : [ ["qwen_vae.safetensors", "qwen_vae_config.json", "qwen_scheduler_config.json"], ["merges.txt", "tokenizer_config.json", "config.json", "vocab.json"] + computeList(text_encoder_filename) ]
|
||||
"fileList" : [ ["qwen_vae.safetensors", "qwen_vae_config.json"], ["merges.txt", "tokenizer_config.json", "config.json", "vocab.json"] + computeList(text_encoder_filename) ]
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@ -64,10 +67,17 @@ class family_handler():
|
||||
|
||||
return pipe_processor, pipe
|
||||
|
||||
|
||||
@staticmethod
|
||||
def fix_settings(base_model_type, settings_version, model_def, ui_defaults):
|
||||
if ui_defaults.get("sample_solver", "") == "":
|
||||
ui_defaults["sample_solver"] = "default"
|
||||
|
||||
@staticmethod
|
||||
def update_default_settings(base_model_type, model_def, ui_defaults):
|
||||
ui_defaults.update({
|
||||
"guidance_scale": 4,
|
||||
"sample_solver": "default",
|
||||
})
|
||||
if model_def.get("reference_image", False):
|
||||
ui_defaults.update({
|
||||
|
||||
@ -5,6 +5,7 @@ from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import torch, json, os
|
||||
import math
|
||||
|
||||
from diffusers.image_processor import VaeImageProcessor
|
||||
from .transformer_qwenimage import QwenImageTransformer2DModel
|
||||
@ -31,13 +32,7 @@ class model_factory():
|
||||
VAE_dtype = torch.float32,
|
||||
mixed_precision_transformer = False
|
||||
):
|
||||
|
||||
with open( os.path.join(checkpoint_dir, "qwen_scheduler_config.json"), 'r', encoding='utf-8') as f:
|
||||
scheduler_config = json.load(f)
|
||||
scheduler_config.pop("_class_name")
|
||||
scheduler_config.pop("_diffusers_version")
|
||||
|
||||
scheduler = FlowMatchEulerDiscreteScheduler(**scheduler_config)
|
||||
|
||||
|
||||
transformer_filename = model_filename[0]
|
||||
tokenizer = AutoTokenizer.from_pretrained(os.path.join(checkpoint_dir,"Qwen2.5-VL-7B-Instruct"))
|
||||
@ -61,13 +56,11 @@ class model_factory():
|
||||
|
||||
vae = offload.fast_load_transformers_model( os.path.join(checkpoint_dir,"qwen_vae.safetensors"), writable_tensors= True , modelClass=AutoencoderKLQwenImage, defaultConfigPath=os.path.join(checkpoint_dir,"qwen_vae_config.json"))
|
||||
|
||||
self.pipeline = QwenImagePipeline(vae, text_encoder, tokenizer, transformer, scheduler)
|
||||
self.pipeline = QwenImagePipeline(vae, text_encoder, tokenizer, transformer)
|
||||
self.vae=vae
|
||||
self.text_encoder=text_encoder
|
||||
self.tokenizer=tokenizer
|
||||
self.transformer=transformer
|
||||
self.scheduler=scheduler
|
||||
|
||||
|
||||
def generate(
|
||||
self,
|
||||
@ -86,6 +79,7 @@ class model_factory():
|
||||
video_prompt_type = "",
|
||||
VAE_tile_size = None,
|
||||
joint_pass = True,
|
||||
sample_solver='default',
|
||||
**bbargs
|
||||
):
|
||||
# Generate with different aspect ratios
|
||||
@ -97,6 +91,44 @@ class model_factory():
|
||||
"3:4": (1140, 1472)
|
||||
}
|
||||
|
||||
|
||||
if sample_solver =='lightning':
|
||||
scheduler_config = {
|
||||
"base_image_seq_len": 256,
|
||||
"base_shift": math.log(3), # We use shift=3 in distillation
|
||||
"invert_sigmas": False,
|
||||
"max_image_seq_len": 8192,
|
||||
"max_shift": math.log(3), # We use shift=3 in distillation
|
||||
"num_train_timesteps": 1000,
|
||||
"shift": 1.0,
|
||||
"shift_terminal": None, # set shift_terminal to None
|
||||
"stochastic_sampling": False,
|
||||
"time_shift_type": "exponential",
|
||||
"use_beta_sigmas": False,
|
||||
"use_dynamic_shifting": True,
|
||||
"use_exponential_sigmas": False,
|
||||
"use_karras_sigmas": False,
|
||||
}
|
||||
else:
|
||||
scheduler_config = {
|
||||
"base_image_seq_len": 256,
|
||||
"base_shift": 0.5,
|
||||
"invert_sigmas": False,
|
||||
"max_image_seq_len": 8192,
|
||||
"max_shift": 0.9,
|
||||
"num_train_timesteps": 1000,
|
||||
"shift": 1.0,
|
||||
"shift_terminal": 0.02,
|
||||
"stochastic_sampling": False,
|
||||
"time_shift_type": "exponential",
|
||||
"use_beta_sigmas": False,
|
||||
"use_dynamic_shifting": True,
|
||||
"use_exponential_sigmas": False,
|
||||
"use_karras_sigmas": False
|
||||
}
|
||||
|
||||
self.scheduler=FlowMatchEulerDiscreteScheduler(**scheduler_config)
|
||||
self.pipeline.scheduler = self.scheduler
|
||||
if VAE_tile_size is not None:
|
||||
self.vae.use_tiling = VAE_tile_size[0]
|
||||
self.vae.tile_latent_min_height = VAE_tile_size[1]
|
||||
|
||||
@ -108,7 +108,11 @@ class family_handler():
|
||||
"adaptive_projected_guidance" : True,
|
||||
"tea_cache" : not (base_model_type in ["i2v_2_2", "ti2v_2_2" ] or "URLs2" in model_def),
|
||||
"mag_cache" : True,
|
||||
|
||||
"sample_solvers":[
|
||||
("unipc", "unipc"),
|
||||
("euler", "euler"),
|
||||
("dpm++", "dpm++"),
|
||||
("flowmatch causvid", "causvid"), ]
|
||||
})
|
||||
|
||||
return extra_model_def
|
||||
@ -208,9 +212,17 @@ class family_handler():
|
||||
if hasattr(wan_model, "clip"):
|
||||
pipe["text_encoder_2"] = wan_model.clip.model
|
||||
return wan_model, pipe
|
||||
|
||||
|
||||
@staticmethod
|
||||
def fix_settings(base_model_type, settings_version, model_def, ui_defaults):
|
||||
if ui_defaults.get("sample_solver", "") == "":
|
||||
ui_defaults["sample_solver"] = "unipc"
|
||||
|
||||
@staticmethod
|
||||
def update_default_settings(base_model_type, model_def, ui_defaults):
|
||||
ui_defaults.update({
|
||||
"sample_solver": "unipc",
|
||||
})
|
||||
if base_model_type in ["fantasy"]:
|
||||
ui_defaults.update({
|
||||
"audio_guidance_scale": 5.0,
|
||||
|
||||
@ -17,7 +17,7 @@ gradio==5.23.0
|
||||
numpy>=1.23.5,<2
|
||||
einops
|
||||
moviepy==1.0.3
|
||||
mmgp==3.5.8
|
||||
mmgp==3.5.9
|
||||
peft==0.15.0
|
||||
mutagen
|
||||
pydantic==2.10.6
|
||||
|
||||
@ -337,15 +337,15 @@ def save_image(tensor,
|
||||
def _get_format_info(quality):
|
||||
"""Get format extension and parameters."""
|
||||
formats = {
|
||||
# JPEG with torchvision (works)
|
||||
'jpeg_95': {'ext': '.jpg', 'params': {'quality': 95}, 'use_pil': False},
|
||||
'jpeg_85': {'ext': '.jpg', 'params': {'quality': 85}, 'use_pil': False},
|
||||
'jpeg_70': {'ext': '.jpg', 'params': {'quality': 70}, 'use_pil': False},
|
||||
'jpeg_50': {'ext': '.jpg', 'params': {'quality': 50}, 'use_pil': False},
|
||||
|
||||
# JPEG with PIL (so 'quality' works)
|
||||
'jpeg_95': {'ext': '.jpg', 'params': {'quality': 95}, 'use_pil': True},
|
||||
'jpeg_85': {'ext': '.jpg', 'params': {'quality': 85}, 'use_pil': True},
|
||||
'jpeg_70': {'ext': '.jpg', 'params': {'quality': 70}, 'use_pil': True},
|
||||
'jpeg_50': {'ext': '.jpg', 'params': {'quality': 50}, 'use_pil': True},
|
||||
|
||||
# PNG with torchvision
|
||||
'png': {'ext': '.png', 'params': {}, 'use_pil': False},
|
||||
|
||||
|
||||
# WebP with PIL (for quality control)
|
||||
'webp_95': {'ext': '.webp', 'params': {'quality': 95}, 'use_pil': True},
|
||||
'webp_85': {'ext': '.webp', 'params': {'quality': 85}, 'use_pil': True},
|
||||
|
||||
@ -8,7 +8,8 @@ import sys
|
||||
import threading
|
||||
import time
|
||||
import numpy as np
|
||||
|
||||
import os
|
||||
os.environ["PYGAME_HIDE_SUPPORT_PROMPT"] = "1"
|
||||
|
||||
def generate_notification_beep(volume=50, sample_rate=44100):
|
||||
"""Generate pleasant C major chord notification sound"""
|
||||
|
||||
53
wgp.py
53
wgp.py
@ -54,7 +54,7 @@ global_queue_ref = []
|
||||
AUTOSAVE_FILENAME = "queue.zip"
|
||||
PROMPT_VARS_MAX = 10
|
||||
|
||||
target_mmgp_version = "3.5.8"
|
||||
target_mmgp_version = "3.5.9"
|
||||
WanGP_version = "7.77"
|
||||
settings_version = 2.23
|
||||
max_source_video_frames = 3000
|
||||
@ -1972,11 +1972,11 @@ def get_settings_file_name(model_type):
|
||||
return os.path.join(args.settings, model_type + "_settings.json")
|
||||
|
||||
def fix_settings(model_type, ui_defaults):
|
||||
if model_type == None: return
|
||||
if model_type is None: return
|
||||
|
||||
video_settings_version = ui_defaults.get("settings_version", 0)
|
||||
settings_version = ui_defaults.get("settings_version", 0)
|
||||
model_def = get_model_def(model_type)
|
||||
model_type = get_base_model_type(model_type)
|
||||
base_model_type = get_base_model_type(model_type)
|
||||
|
||||
prompts = ui_defaults.get("prompts", "")
|
||||
if len(prompts) > 0:
|
||||
@ -1987,43 +1987,43 @@ def fix_settings(model_type, ui_defaults):
|
||||
image_prompt_type = "S" if image_prompt_type == 0 else "SE"
|
||||
# if model_type == "flf2v_720p" and not "E" in image_prompt_type:
|
||||
# image_prompt_type = "SE"
|
||||
if video_settings_version <= 2:
|
||||
if settings_version <= 2:
|
||||
image_prompt_type = image_prompt_type.replace("G","")
|
||||
ui_defaults["image_prompt_type"] = image_prompt_type
|
||||
|
||||
if "lset_name" in ui_defaults: del ui_defaults["lset_name"]
|
||||
|
||||
audio_prompt_type = ui_defaults.get("audio_prompt_type", None)
|
||||
if video_settings_version < 2.2:
|
||||
if not model_type in ["vace_1.3B","vace_14B", "sky_df_1.3B", "sky_df_14B", "ltxv_13B"]:
|
||||
if settings_version < 2.2:
|
||||
if not base_model_type in ["vace_1.3B","vace_14B", "sky_df_1.3B", "sky_df_14B", "ltxv_13B"]:
|
||||
for p in ["sliding_window_size", "sliding_window_overlap", "sliding_window_overlap_noise", "sliding_window_discard_last_frames"]:
|
||||
if p in ui_defaults: del ui_defaults[p]
|
||||
|
||||
if audio_prompt_type == None :
|
||||
if any_audio_track(model_type):
|
||||
if any_audio_track(base_model_type):
|
||||
audio_prompt_type ="A"
|
||||
ui_defaults["audio_prompt_type"] = audio_prompt_type
|
||||
|
||||
|
||||
video_prompt_type = ui_defaults.get("video_prompt_type", "")
|
||||
any_reference_image = model_def.get("reference_image", False)
|
||||
if model_type in ["hunyuan_custom", "hunyuan_custom_edit", "hunyuan_custom_audio", "hunyuan_avatar", "phantom_14B", "phantom_1.3B"] or any_reference_image:
|
||||
if base_model_type in ["hunyuan_custom", "hunyuan_custom_edit", "hunyuan_custom_audio", "hunyuan_avatar", "phantom_14B", "phantom_1.3B"] or any_reference_image:
|
||||
if not "I" in video_prompt_type: # workaround for settings corruption
|
||||
video_prompt_type += "I"
|
||||
if model_type in ["hunyuan"]:
|
||||
if base_model_type in ["hunyuan"]:
|
||||
video_prompt_type = video_prompt_type.replace("I", "")
|
||||
|
||||
if model_type in ["flux"] and video_settings_version < 2.23:
|
||||
if base_model_type in ["flux"] and settings_version < 2.23:
|
||||
video_prompt_type = video_prompt_type.replace("K", "").replace("I", "KI")
|
||||
|
||||
remove_background_images_ref = ui_defaults.get("remove_background_images_ref", 1)
|
||||
if video_settings_version < 2.22:
|
||||
if settings_version < 2.22:
|
||||
if "I" in video_prompt_type:
|
||||
if remove_background_images_ref == 2:
|
||||
video_prompt_type = video_prompt_type.replace("I", "KI")
|
||||
if remove_background_images_ref != 0:
|
||||
remove_background_images_ref = 1
|
||||
if model_type in ["hunyuan_avatar"]: remove_background_images_ref = 0
|
||||
if base_model_type in ["hunyuan_avatar"]: remove_background_images_ref = 0
|
||||
ui_defaults["remove_background_images_ref"] = remove_background_images_ref
|
||||
|
||||
ui_defaults["video_prompt_type"] = video_prompt_type
|
||||
@ -2044,6 +2044,10 @@ def fix_settings(model_type, ui_defaults):
|
||||
del ui_defaults["tea_cache_start_step_perc"]
|
||||
ui_defaults["skip_steps_start_step_perc"] = tea_cache_start_step_perc
|
||||
|
||||
model_handler = get_model_handler(base_model_type)
|
||||
if hasattr(model_handler, "fix_settings"):
|
||||
model_handler.fix_settings(base_model_type, settings_version, model_def, ui_defaults)
|
||||
|
||||
def get_default_settings(model_type):
|
||||
def get_default_prompt(i2v):
|
||||
if i2v:
|
||||
@ -3174,8 +3178,8 @@ def select_video(state, input_file_list, event_data: gr.EventData):
|
||||
values += [video_outpainting]
|
||||
labels += ["Outpainting"]
|
||||
video_sample_solver = configs.get("sample_solver", "")
|
||||
if model_family == "wan":
|
||||
values += ["unipc" if len(video_sample_solver) ==0 else video_sample_solver]
|
||||
if model_def.get("sample_solvers", None) is not None and len(video_sample_solver) > 0 :
|
||||
values += [video_sample_solver]
|
||||
labels += ["Sampler Solver"]
|
||||
values += [video_resolution, video_length_summary, video_seed, video_guidance_scale, video_flow_shift, video_num_inference_steps]
|
||||
labels += [ "Resolution", video_length_label, "Seed", video_guidance_label, "Shift Scale", "Num Inference steps"]
|
||||
@ -5511,7 +5515,7 @@ def prepare_inputs_dict(target, inputs, model_type = None, model_filename = None
|
||||
if "force_fps" in inputs and len(inputs["force_fps"])== 0:
|
||||
pop += ["force_fps"]
|
||||
|
||||
if not get_model_family(model_type) == "wan" or diffusion_forcing:
|
||||
if model_def.get("sample_solvers", None) is None:
|
||||
pop += ["sample_solver"]
|
||||
|
||||
# if not (test_class_i2v(base_model_type) or diffusion_forcing or ltxv or recammaster or vace):
|
||||
@ -6974,16 +6978,15 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
||||
with gr.Row(visible = model_def.get("guidance_max_phases",1) >1 and not (no_guidance and image_outputs)) as guidance_row2:
|
||||
guidance2_scale = gr.Slider(1.0, 20.0, value=ui_defaults.get("guidance2_scale",5), step=0.5, label="Guidance2 (CFG)", visible=not (hunyuan_t2v or hunyuan_i2v or any_embedded_guidance) and not no_guidance)
|
||||
switch_threshold = gr.Slider(0, 1000, value=ui_defaults.get("switch_threshold",0), step=1, label="Guidance / Model Switch Threshold", visible=not (hunyuan_t2v or hunyuan_i2v or any_embedded_guidance) and not no_guidance)
|
||||
sample_solver_choices = model_def.get("sample_solvers", None)
|
||||
with gr.Row(visible = sample_solver_choices is not None ) as sample_solver_row:
|
||||
if sample_solver_choices is None:
|
||||
sample_solver = gr.Dropdown( value="", choices=[ ("", ""), ], visible= False, label= "Sampler Solver / Scheduler" )
|
||||
else:
|
||||
sample_solver = gr.Dropdown( value=ui_defaults.get("sample_solver", sample_solver_choices[0][1]),
|
||||
choices= sample_solver_choices, visible= True, label= "Sampler Solver / Scheduler"
|
||||
)
|
||||
|
||||
with gr.Row(visible = get_model_family(model_type) == "wan" and not diffusion_forcing ) as sample_solver_row:
|
||||
sample_solver = gr.Dropdown( value=ui_defaults.get("sample_solver",""),
|
||||
choices=[
|
||||
("unipc", ""),
|
||||
("euler", "euler"),
|
||||
("dpm++", "dpm++"),
|
||||
("flowmatch causvid", "causvid"),
|
||||
], visible= True, label= "Sampler Solver / Scheduler"
|
||||
)
|
||||
|
||||
with gr.Row(visible = vace) as control_net_weights_row:
|
||||
control_net_weight = gr.Slider(0.0, 2.0, value=ui_defaults.get("control_net_weight",1), step=0.1, label="Control Net Weight #1", visible=vace)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user