better suppor lightning loras

2025-11-04 14:16:57 +00:00 · 2025-08-12 04:59:28 +02:00 · 2025-08-12 04:59:28 +02:00 · 6eb994e18a
commit 6eb994e18a
parent 175e05fc1e
9 changed files with 107 additions and 49 deletions
--- a/README.md
+++ b/README.md
@ -34,6 +34,8 @@ Generation Settings are stored in each of the above regardless of the format (th

 Also you can now choose different output directories for images and videos.

+unexpected luck: fixed lightning 8 steps for Qwen, and lightning 4 steps for Wan 2.2, now you just need 1x multiplier no weird numbers. 
+
 ### August 10 2025: WanGP v7.76 - Faster than the VAE ...
 We have a funny one here today: FastWan 2.2 5B, the Fastest Video Generator, only 20s to generate 121 frames at 720p. The snag is that VAE is twice as slow... 
 Thanks to Kijai for extracting the Lora that is used to build the corresponding finetune.
--- a/models/qwen/pipeline_qwenimage.py
+++ b/models/qwen/pipeline_qwenimage.py
@ -151,14 +151,12 @@ class QwenImagePipeline(): #DiffusionPipeline
        text_encoder,
        tokenizer,
        transformer,
-        scheduler,
    ):
        
        self.vae=vae
        self.text_encoder=text_encoder
        self.tokenizer=tokenizer
        self.transformer=transformer
-        self.scheduler=scheduler

        self.vae_scale_factor = 2 ** len(self.vae.temperal_downsample) if getattr(self, "vae", None) else 8
        # QwenImage latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
--- a/models/qwen/qwen_handler.py
+++ b/models/qwen/qwen_handler.py
@ -11,6 +11,9 @@ class family_handler():
    def query_model_def(base_model_type, model_def):
        model_def_output = {
            "image_outputs" : True,
+            "sample_solvers":[
+                            ("Default", "default"),
+                            ("Lightning", "lightning")]
        }


@ -38,7 +41,7 @@ class family_handler():
        return  {  
            "repoId" : "DeepBeepMeep/Qwen_image", 
            "sourceFolderList" :  ["", "Qwen2.5-VL-7B-Instruct"],
-            "fileList" : [ ["qwen_vae.safetensors", "qwen_vae_config.json", "qwen_scheduler_config.json"], ["merges.txt", "tokenizer_config.json", "config.json", "vocab.json"] + computeList(text_encoder_filename)  ]
+            "fileList" : [ ["qwen_vae.safetensors", "qwen_vae_config.json"], ["merges.txt", "tokenizer_config.json", "config.json", "vocab.json"] + computeList(text_encoder_filename)  ]
            }

    @staticmethod
@ -64,10 +67,17 @@ class family_handler():

        return pipe_processor, pipe

+
+    @staticmethod
+    def fix_settings(base_model_type, settings_version, model_def, ui_defaults):
+        if ui_defaults.get("sample_solver", "") == "": 
+            ui_defaults["sample_solver"] = "default"
+
    @staticmethod
    def update_default_settings(base_model_type, model_def, ui_defaults):
        ui_defaults.update({
            "guidance_scale":  4,
+            "sample_solver": "default",
        })            
        if model_def.get("reference_image", False):
            ui_defaults.update({
--- a/models/qwen/qwen_main.py
+++ b/models/qwen/qwen_main.py
@ -5,6 +5,7 @@ from typing import Any, Callable, Dict, List, Optional, Union

 import numpy as np
 import torch, json, os
+import math

 from diffusers.image_processor import VaeImageProcessor
 from .transformer_qwenimage import QwenImageTransformer2DModel
@ -31,13 +32,7 @@ class model_factory():
        VAE_dtype = torch.float32,
        mixed_precision_transformer = False
    ):
-        
-        with open( os.path.join(checkpoint_dir, "qwen_scheduler_config.json"), 'r', encoding='utf-8') as f:
-            scheduler_config = json.load(f)
-        scheduler_config.pop("_class_name")
-        scheduler_config.pop("_diffusers_version")
-
-        scheduler = FlowMatchEulerDiscreteScheduler(**scheduler_config)
+    

        transformer_filename = model_filename[0]
        tokenizer = AutoTokenizer.from_pretrained(os.path.join(checkpoint_dir,"Qwen2.5-VL-7B-Instruct")) 
@ -61,13 +56,11 @@ class model_factory():

        vae = offload.fast_load_transformers_model( os.path.join(checkpoint_dir,"qwen_vae.safetensors"), writable_tensors= True , modelClass=AutoencoderKLQwenImage, defaultConfigPath=os.path.join(checkpoint_dir,"qwen_vae_config.json"))
        
-        self.pipeline = QwenImagePipeline(vae, text_encoder, tokenizer, transformer, scheduler)
+        self.pipeline = QwenImagePipeline(vae, text_encoder, tokenizer, transformer)
        self.vae=vae
        self.text_encoder=text_encoder
        self.tokenizer=tokenizer
        self.transformer=transformer
-        self.scheduler=scheduler
-

    def generate(
        self,
@ -86,6 +79,7 @@ class model_factory():
        video_prompt_type = "",
        VAE_tile_size = None, 
        joint_pass = True,
+        sample_solver='default',
        **bbargs
    ):
        # Generate with different aspect ratios
@ -97,6 +91,44 @@ class model_factory():
        "3:4": (1140, 1472)
        }
        
+
+        if sample_solver =='lightning':
+            scheduler_config = {
+                "base_image_seq_len": 256,
+                "base_shift": math.log(3),  # We use shift=3 in distillation
+                "invert_sigmas": False,
+                "max_image_seq_len": 8192,
+                "max_shift": math.log(3),  # We use shift=3 in distillation
+                "num_train_timesteps": 1000,
+                "shift": 1.0,
+                "shift_terminal": None,  # set shift_terminal to None
+                "stochastic_sampling": False,
+                "time_shift_type": "exponential",
+                "use_beta_sigmas": False,
+                "use_dynamic_shifting": True,
+                "use_exponential_sigmas": False,
+                "use_karras_sigmas": False,
+            }
+        else:
+            scheduler_config = {
+                "base_image_seq_len": 256,
+                "base_shift": 0.5,
+                "invert_sigmas": False,
+                "max_image_seq_len": 8192,
+                "max_shift": 0.9,
+                "num_train_timesteps": 1000,
+                "shift": 1.0,
+                "shift_terminal": 0.02,
+                "stochastic_sampling": False,
+                "time_shift_type": "exponential",
+                "use_beta_sigmas": False,
+                "use_dynamic_shifting": True,
+                "use_exponential_sigmas": False,
+                "use_karras_sigmas": False
+            }
+
+        self.scheduler=FlowMatchEulerDiscreteScheduler(**scheduler_config)
+        self.pipeline.scheduler = self.scheduler 
        if VAE_tile_size is not None:
            self.vae.use_tiling  = VAE_tile_size[0] 
            self.vae.tile_latent_min_height  = VAE_tile_size[1] 
--- a/models/wan/wan_handler.py
+++ b/models/wan/wan_handler.py
@ -108,7 +108,11 @@ class family_handler():
        "adaptive_projected_guidance" : True,  
        "tea_cache" : not (base_model_type in ["i2v_2_2", "ti2v_2_2" ] or "URLs2" in model_def),
        "mag_cache" : True,
-        
+        "sample_solvers":[
+                            ("unipc", "unipc"),
+                            ("euler", "euler"),
+                            ("dpm++", "dpm++"),
+                            ("flowmatch causvid", "causvid"), ]
        })

        return extra_model_def
@ -208,9 +212,17 @@ class family_handler():
        if hasattr(wan_model, "clip"):
            pipe["text_encoder_2"] = wan_model.clip.model
        return wan_model, pipe
-    
+
+    @staticmethod
+    def fix_settings(base_model_type, settings_version, model_def, ui_defaults):
+        if ui_defaults.get("sample_solver", "") == "": 
+            ui_defaults["sample_solver"] = "unipc"
+
    @staticmethod
    def update_default_settings(base_model_type, model_def, ui_defaults):
+        ui_defaults.update({
+            "sample_solver": "unipc",
+        })
        if base_model_type in ["fantasy"]:
            ui_defaults.update({
                "audio_guidance_scale": 5.0,
--- a/requirements.txt
+++ b/requirements.txt
@ -17,7 +17,7 @@ gradio==5.23.0
 numpy>=1.23.5,<2
 einops
 moviepy==1.0.3
-mmgp==3.5.8
+mmgp==3.5.9
 peft==0.15.0
 mutagen
 pydantic==2.10.6
--- a/shared/utils/audio_video.py
+++ b/shared/utils/audio_video.py
@ -337,15 +337,15 @@ def save_image(tensor,
 def _get_format_info(quality):
    """Get format extension and parameters."""
    formats = {
-        # JPEG with torchvision (works)
-        'jpeg_95': {'ext': '.jpg', 'params': {'quality': 95}, 'use_pil': False},
-        'jpeg_85': {'ext': '.jpg', 'params': {'quality': 85}, 'use_pil': False},
-        'jpeg_70': {'ext': '.jpg', 'params': {'quality': 70}, 'use_pil': False},
-        'jpeg_50': {'ext': '.jpg', 'params': {'quality': 50}, 'use_pil': False},
-        
+        # JPEG with PIL (so 'quality' works)
+        'jpeg_95': {'ext': '.jpg', 'params': {'quality': 95}, 'use_pil': True},
+        'jpeg_85': {'ext': '.jpg', 'params': {'quality': 85}, 'use_pil': True},
+        'jpeg_70': {'ext': '.jpg', 'params': {'quality': 70}, 'use_pil': True},
+        'jpeg_50': {'ext': '.jpg', 'params': {'quality': 50}, 'use_pil': True},
+
        # PNG with torchvision
        'png': {'ext': '.png', 'params': {}, 'use_pil': False},
-        
+
        # WebP with PIL (for quality control)
        'webp_95': {'ext': '.webp', 'params': {'quality': 95}, 'use_pil': True},
        'webp_85': {'ext': '.webp', 'params': {'quality': 85}, 'use_pil': True},
--- a/shared/utils/notification_sound.py
+++ b/shared/utils/notification_sound.py
@ -8,7 +8,8 @@ import sys
 import threading
 import time
 import numpy as np
-
+import os
+os.environ["PYGAME_HIDE_SUPPORT_PROMPT"] = "1"

 def generate_notification_beep(volume=50, sample_rate=44100):
    """Generate pleasant C major chord notification sound"""
--- a/wgp.py
+++ b/wgp.py
@ -54,7 +54,7 @@ global_queue_ref = []
 AUTOSAVE_FILENAME = "queue.zip"
 PROMPT_VARS_MAX = 10

-target_mmgp_version = "3.5.8"
+target_mmgp_version = "3.5.9"
 WanGP_version = "7.77"
 settings_version = 2.23
 max_source_video_frames = 3000
@ -1972,11 +1972,11 @@ def get_settings_file_name(model_type):
    return  os.path.join(args.settings, model_type + "_settings.json")

 def fix_settings(model_type, ui_defaults):
-    if model_type == None: return
+    if model_type is None: return

-    video_settings_version =  ui_defaults.get("settings_version", 0)
+    settings_version =  ui_defaults.get("settings_version", 0)
    model_def = get_model_def(model_type)
-    model_type = get_base_model_type(model_type)
+    base_model_type = get_base_model_type(model_type)

    prompts = ui_defaults.get("prompts", "")
    if len(prompts) > 0:
@ -1987,43 +1987,43 @@ def fix_settings(model_type, ui_defaults):
            image_prompt_type = "S" if image_prompt_type  == 0 else "SE"
        # if model_type == "flf2v_720p" and not "E" in image_prompt_type:
        #     image_prompt_type = "SE"
-        if video_settings_version <= 2:
+        if settings_version <= 2:
            image_prompt_type = image_prompt_type.replace("G","")
        ui_defaults["image_prompt_type"] = image_prompt_type

    if "lset_name" in ui_defaults: del ui_defaults["lset_name"]

    audio_prompt_type = ui_defaults.get("audio_prompt_type", None)
-    if video_settings_version < 2.2: 
-        if not model_type in ["vace_1.3B","vace_14B", "sky_df_1.3B", "sky_df_14B", "ltxv_13B"]:
+    if settings_version < 2.2: 
+        if not base_model_type in ["vace_1.3B","vace_14B", "sky_df_1.3B", "sky_df_14B", "ltxv_13B"]:
            for p in  ["sliding_window_size", "sliding_window_overlap", "sliding_window_overlap_noise", "sliding_window_discard_last_frames"]:
                if p in ui_defaults: del ui_defaults[p]

        if audio_prompt_type == None :
-            if any_audio_track(model_type):
+            if any_audio_track(base_model_type):
                audio_prompt_type ="A"
                ui_defaults["audio_prompt_type"] = audio_prompt_type


    video_prompt_type = ui_defaults.get("video_prompt_type", "")
    any_reference_image = model_def.get("reference_image", False)
-    if model_type in ["hunyuan_custom", "hunyuan_custom_edit", "hunyuan_custom_audio", "hunyuan_avatar", "phantom_14B", "phantom_1.3B"] or any_reference_image:
+    if base_model_type in ["hunyuan_custom", "hunyuan_custom_edit", "hunyuan_custom_audio", "hunyuan_avatar", "phantom_14B", "phantom_1.3B"] or any_reference_image:
        if not "I" in video_prompt_type:  # workaround for settings corruption
            video_prompt_type += "I" 
-    if model_type in ["hunyuan"]:
+    if base_model_type in ["hunyuan"]:
        video_prompt_type = video_prompt_type.replace("I", "")

-    if model_type in ["flux"] and video_settings_version < 2.23:
+    if base_model_type in ["flux"] and settings_version < 2.23:
        video_prompt_type = video_prompt_type.replace("K", "").replace("I", "KI")

    remove_background_images_ref = ui_defaults.get("remove_background_images_ref", 1)
-    if video_settings_version < 2.22:
+    if settings_version < 2.22:
        if "I" in video_prompt_type:
            if remove_background_images_ref == 2:
                video_prompt_type = video_prompt_type.replace("I", "KI")
        if remove_background_images_ref != 0:
            remove_background_images_ref = 1
-    if model_type in ["hunyuan_avatar"]: remove_background_images_ref = 0
+    if base_model_type in ["hunyuan_avatar"]: remove_background_images_ref = 0
    ui_defaults["remove_background_images_ref"] = remove_background_images_ref

    ui_defaults["video_prompt_type"] = video_prompt_type
@ -2044,6 +2044,10 @@ def fix_settings(model_type, ui_defaults):
        del ui_defaults["tea_cache_start_step_perc"]
        ui_defaults["skip_steps_start_step_perc"] = tea_cache_start_step_perc

+    model_handler = get_model_handler(base_model_type)
+    if hasattr(model_handler, "fix_settings"):
+        model_handler.fix_settings(base_model_type, settings_version, model_def, ui_defaults)
+
 def get_default_settings(model_type):
    def get_default_prompt(i2v):
        if i2v:
@ -3174,8 +3178,8 @@ def select_video(state, input_file_list, event_data: gr.EventData):
                values += [video_outpainting]
                labels += ["Outpainting"]
            video_sample_solver = configs.get("sample_solver", "")
-            if model_family == "wan":
-                values += ["unipc" if len(video_sample_solver) ==0 else video_sample_solver]
+            if model_def.get("sample_solvers", None) is not None and len(video_sample_solver) > 0 :
+                values += [video_sample_solver]
                labels += ["Sampler Solver"]                                        
            values += [video_resolution, video_length_summary, video_seed, video_guidance_scale, video_flow_shift, video_num_inference_steps]
            labels += [ "Resolution", video_length_label, "Seed", video_guidance_label, "Shift Scale", "Num Inference steps"]
@ -5511,7 +5515,7 @@ def prepare_inputs_dict(target, inputs, model_type = None, model_filename = None
    if "force_fps" in inputs and len(inputs["force_fps"])== 0:
        pop += ["force_fps"]

-    if not get_model_family(model_type) == "wan" or diffusion_forcing:
+    if model_def.get("sample_solvers", None) is None:
        pop += ["sample_solver"]
    
    # if not (test_class_i2v(base_model_type) or diffusion_forcing or ltxv or recammaster or vace):
@ -6974,16 +6978,15 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
                        with gr.Row(visible = model_def.get("guidance_max_phases",1) >1 and not (no_guidance and image_outputs)) as guidance_row2:
                            guidance2_scale = gr.Slider(1.0, 20.0, value=ui_defaults.get("guidance2_scale",5), step=0.5, label="Guidance2 (CFG)", visible=not (hunyuan_t2v or hunyuan_i2v or any_embedded_guidance) and not no_guidance)
                            switch_threshold = gr.Slider(0, 1000, value=ui_defaults.get("switch_threshold",0), step=1, label="Guidance / Model Switch Threshold", visible=not (hunyuan_t2v or hunyuan_i2v or any_embedded_guidance) and not no_guidance)
+                        sample_solver_choices = model_def.get("sample_solvers", None)
+                        with gr.Row(visible = sample_solver_choices is not None ) as sample_solver_row:
+                            if sample_solver_choices is None:
+                                sample_solver = gr.Dropdown( value="",  choices=[ ("", ""), ], visible= False, label= "Sampler Solver / Scheduler" )
+                            else:
+                                sample_solver = gr.Dropdown( value=ui_defaults.get("sample_solver", sample_solver_choices[0][1]), 
+                                    choices= sample_solver_choices, visible= True, label= "Sampler Solver / Scheduler"
+                                )

-                        with gr.Row(visible = get_model_family(model_type) == "wan" and not diffusion_forcing ) as sample_solver_row:
-                            sample_solver = gr.Dropdown( value=ui_defaults.get("sample_solver",""), 
-                                choices=[
-                                    ("unipc", ""),
-                                    ("euler", "euler"),
-                                    ("dpm++", "dpm++"),
-                                    ("flowmatch causvid", "causvid"),
-                                ], visible= True, label= "Sampler Solver / Scheduler"
-                            )

                        with gr.Row(visible = vace) as control_net_weights_row:
                            control_net_weight = gr.Slider(0.0, 2.0, value=ui_defaults.get("control_net_weight",1), step=0.1, label="Control Net Weight #1", visible=vace)