better non mmaudio support

2026-02-06 18:57:49 +00:00 · 2025-07-02 15:40:47 +02:00 · 2025-07-02 15:40:47 +02:00 · cb69c17018
commit cb69c17018
parent 65b3c3e7e4
2 changed files with 42 additions and 33 deletions
--- a/README.md
+++ b/README.md
@ -27,13 +27,13 @@ WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models
 - WanGP will keep the last generated videos in the Gallery and will remember the last model you used if you restart the app but kept the Web page open

 Taking care of your life is not enough, you want new stuff to play with ?
- MMAudio directly inside WanGP : add an audio soundtrack that matches the content of your video. By the way it is a low VRAM MMAudio and 6 GB of VRAM should be sufficient
+- MMAudio directly inside WanGP : add an audio soundtrack that matches the content of your video. By the way it is a low VRAM MMAudio and 6 GB of VRAM should be sufficient. You will need to go in the *Extensions* tab of the WanGP *Configuration* to enalbe MMAudio
 - Forgot to upsample your video during the generation ? want to try another MMAudio variation ? Fear not you can also apply upsampling or add an MMAudio track once the video generation is done. Even better you can ask WangGP for multiple variations of MMAudio to pick the one you like best
 - MagCache support: a new step skipping approach, supposed to be better than TeaCache. Makes a difference if you usually generate with a high number of steps
 - SageAttention2++ support : not just the compatibility but also a slightly reduced VRAM usage
 - Video2Video in Wan Text2Video : this is the paradox, a text2video can become a video2video if you start the denoising process later on an existing video
 - FusioniX upsampler: this is an illustration of Video2Video in Text2Video. Use the FusioniX text2video model with an output resolution of 1080p and a denoising strength of 0.25 and you will get one of the best upsamplers (in only 2/3 steps, you will need lots of VRAM though). Increase the denoising strength and you will get one of the best Video Restorer
- Preliminary support for multiple Wan Samplers
+- Preliminary support for multiple Wan Samplers / Schedulers

 ### June 23 2025: WanGP v6.3, Vace Unleashed. Thought we couldnt squeeze Vace even more ?
 - Multithreaded preprocessing when possible for faster generations
--- a/wgp.py
+++ b/wgp.py
@ -41,6 +41,7 @@ from preprocessing.matanyone  import app as matanyone_app
 from tqdm import tqdm
 import requests

+
 global_queue_ref = []
 AUTOSAVE_FILENAME = "queue.zip"
 PROMPT_VARS_MAX = 10
@ -2136,6 +2137,31 @@ def get_hunyuan_text_encoder_filename(text_encoder_quantization):
    return text_encoder_filename


+def process_files_def(repoId, sourceFolderList, fileList):
+    from huggingface_hub import hf_hub_download, snapshot_download    
+    targetRoot = "ckpts/" 
+    for sourceFolder, files in zip(sourceFolderList,fileList ):
+        if len(files)==0:
+            if not Path(targetRoot + sourceFolder).exists():
+                snapshot_download(repo_id=repoId,  allow_patterns=sourceFolder +"/*", local_dir= targetRoot)
+        else:
+            for onefile in files:     
+                if len(sourceFolder) > 0: 
+                    if not os.path.isfile(targetRoot + sourceFolder + "/" + onefile ):          
+                        hf_hub_download(repo_id=repoId,  filename=onefile, local_dir = targetRoot, subfolder=sourceFolder)
+                else:
+                    if not os.path.isfile(targetRoot + onefile ):          
+                        hf_hub_download(repo_id=repoId,  filename=onefile, local_dir = targetRoot)
+
+def download_mmaudio():
+    if server_config.get("mmaudio_enabled", 0) != 0:
+        enhancer_def = {
+            "repoId" : "DeepBeepMeep/Wan2.1",
+            "sourceFolderList" : [ "mmaudio", "DFN5B-CLIP-ViT-H-14-378"  ],
+            "fileList" : [ ["mmaudio_large_44k_v2.pth", "synchformer_state_dict.pth", "v1-44.pth"],["open_clip_config.json", "open_clip_pytorch_model.bin"]]
+        }
+        process_files_def(**enhancer_def)
+
 def download_models(model_filename, model_type):
    def computeList(filename):
        if filename == None:
@ -2144,22 +2170,8 @@ def download_models(model_filename, model_type):
        filename = filename[pos+1:]
        return [filename]        

-    def process_files_def(repoId, sourceFolderList, fileList):
-        targetRoot = "ckpts/" 
-        for sourceFolder, files in zip(sourceFolderList,fileList ):
-            if len(files)==0:
-                if not Path(targetRoot + sourceFolder).exists():
-                    snapshot_download(repo_id=repoId,  allow_patterns=sourceFolder +"/*", local_dir= targetRoot)
-            else:
-                for onefile in files:     
-                    if len(sourceFolder) > 0: 
-                        if not os.path.isfile(targetRoot + sourceFolder + "/" + onefile ):          
-                            hf_hub_download(repo_id=repoId,  filename=onefile, local_dir = targetRoot, subfolder=sourceFolder)
-                    else:
-                        if not os.path.isfile(targetRoot + onefile ):          
-                            hf_hub_download(repo_id=repoId,  filename=onefile, local_dir = targetRoot)

-    from huggingface_hub import hf_hub_download, snapshot_download    
+
    from urllib.request import urlretrieve
    from wan.utils.utils import create_progress_hook

@ -2180,15 +2192,7 @@ def download_models(model_filename, model_type):
        }
        process_files_def(**enhancer_def)

-    if server_config.get("mmaudio_enabled", 0) != 0:
-        enhancer_def = {
-            "repoId" : "DeepBeepMeep/Wan2.1",
-            "sourceFolderList" : [ "mmaudio", "DFN5B-CLIP-ViT-H-14-378"  ],
-            "fileList" : [ ["mmaudio_large_44k_v2.pth", "synchformer_state_dict.pth", "v1-44.pth"],["open_clip_config.json", "open_clip_pytorch_model.bin"]]
-        }
-        process_files_def(**enhancer_def)
-
-
+    download_mmaudio()

    def download_file(url,filename):
        if url.startswith("https://huggingface.co/") and "/resolve/main/" in url:
@ -2686,7 +2690,8 @@ def apply_changes(  state,
        model_choice = generate_dropdown_model_list(transformer_type)

    header = generate_header(state["model_type"], compile=compile, attention_mode= attention_mode)
-    return "<DIV ALIGN=CENTER>The new configuration has been succesfully applied</DIV>", header, model_choice, gr.Row(visible= server_config["enhancer_enabled"] == 1),  gr.Row(visible= server_config["mmaudio_enabled"] > 0)
+    mmaudio_enabled = server_config["mmaudio_enabled"] > 0
+    return "<DIV ALIGN=CENTER>The new configuration has been succesfully applied</DIV>", header, model_choice, gr.Row(visible= server_config["enhancer_enabled"] == 1),  gr.Row(visible= mmaudio_enabled), gr.Column(visible= mmaudio_enabled)



@ -3522,6 +3527,8 @@ def edit_video(
        configs["spatial_upsampling"] = spatial_upsampling

    any_mmaudio = MMAudio_setting != 0 and server_config.get("mmaudio_enabled", 0) != 0 and frames_count >=output_fps
+    if any_mmaudio: download_mmaudio()
+
    tmp_path = None
    any_change = False
    if sample != None:
@ -6552,11 +6559,12 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
                        with gr.Group(elem_classes= "postprocess"):
                            with gr.Column():
                                PP_temporal_upsampling, PP_spatial_upsampling = gen_upsampling_dropdowns("",  "", element_class ="postprocess")
+                            with gr.Column() as PP_MMAudio_col:
                                PP_MMAudio_setting, PP_MMAudio_prompt, PP_MMAudio_neg_prompt, _ =  gen_mmaudio_dropdowns(  0, "" , "", None, element_class ="postprocess" )
                                PP_MMAudio_seed = gr.Slider(-1, 999999999, value=-1, step=1, label="Seed (-1 for random)") 
                                PP_repeat_generation = gr.Slider(1, 25.0, value=1, step=1, label="Number of Sample Videos to Generate") 

-                        video_info_postprocessing_btn = gr.Button("Apply Upscaling & MMAudio", size ="sm", visible=True)
+                        video_info_postprocessing_btn = gr.Button("Apply Postprocessing", size ="sm", visible=True)
                    with gr.Tab("Add Videos", id= "video_add"):
                        files_to_load = gr.Files(label= "Files to Load in Gallery", height=120)
                        with gr.Row():
@ -6919,7 +6927,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
            )

    return ( state, loras_choices, lset_name, state,
-             video_guide, video_mask, image_refs, prompt_enhancer_row, mmaudio_tab  
+             video_guide, video_mask, image_refs, prompt_enhancer_row, mmaudio_tab, PP_MMAudio_col  
            ) 
 

@ -6937,7 +6945,7 @@ def generate_download_tab(lset_name,loras_choices, state):
    download_loras_btn.click(fn=download_loras, inputs=[], outputs=[download_status_row, download_status]).then(fn=refresh_lora_list, inputs=[state, lset_name,loras_choices], outputs=[lset_name, loras_choices])

    
-def generate_configuration_tab(state, blocks, header, model_choice, prompt_enhancer_row, mmaudio_tab):
+def generate_configuration_tab(state, blocks, header, model_choice, prompt_enhancer_row, mmaudio_tab, PP_MMAudio_col):
    gr.Markdown("Please click Apply Changes at the bottom so that the changes are effective. Some choices below may be locked if the app has been launched by specifying a config preset.")
    with gr.Column():
        with gr.Tabs():
@ -7199,7 +7207,7 @@ def generate_configuration_tab(state, blocks, header, model_choice, prompt_enhan
                    notification_sound_enabled_choice,
                    notification_sound_volume_choice
                ],
-                outputs= [msg , header, model_choice, prompt_enhancer_row, mmaudio_tab]
+                outputs= [msg , header, model_choice, prompt_enhancer_row, mmaudio_tab, PP_MMAudio_col]
        )

 def generate_about_tab():
@ -7725,7 +7733,7 @@ def create_ui():
                    header = gr.Markdown(generate_header(transformer_type, compile, attention_mode), visible= True)
                with gr.Row():
                    (   state, loras_choices, lset_name, state,
-                        video_guide, video_mask, image_refs, prompt_enhancer_row, mmaudio_tab
+                        video_guide, video_mask, image_refs, prompt_enhancer_row, mmaudio_tab, PP_MMAudio_col
                    ) = generate_video_tab(model_choice=model_choice, header=header, main = main)
            with gr.Tab("Guides", id="info") as info_tab:
                generate_info_tab()
@ -7735,7 +7743,7 @@ def create_ui():
                with gr.Tab("Downloads", id="downloads") as downloads_tab:
                    generate_download_tab(lset_name, loras_choices, state)
                with gr.Tab("Configuration", id="configuration") as configuration_tab:
-                    generate_configuration_tab(state, main, header, model_choice, prompt_enhancer_row, mmaudio_tab)
+                    generate_configuration_tab(state, main, header, model_choice, prompt_enhancer_row, mmaudio_tab, PP_MMAudio_col)
            with gr.Tab("About"):
                generate_about_tab()

@ -7766,3 +7774,4 @@ if __name__ == "__main__":
            url = "http://" + server_name 
        webbrowser.open(url + ":" + str(server_port), new = 0, autoraise = True)
    demo.launch(server_name=server_name, server_port=server_port, share=args.share, allowed_paths=[save_path])
+# Lucky me !!!