Merge branch 'main' into better_Readme

2026-01-11 16:53:34 +00:00 · 2025-06-05 15:33:28 +02:00 · 2025-06-05 15:33:28 +02:00 · 263bef944e
commit 263bef944e
parent 728a5b4535 7670af9610
4 changed files with 32 additions and 26 deletions
--- a/README.md
+++ b/README.md
@ -18,23 +18,23 @@ WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models
 **Discord Server to get Help from Other Users and show your Best Videos:** https://discord.gg/g7efUW9jGV

 ## 🔥 Latest Updates
+### May 28 2025: WanGP v5.31
+👋 Added Phantom 14B, a model that you can use to transfer objects / people in the video. My preference goes to Vace that remains the king of controlnets.
+VACE improvements: Better sliding window transitions, image mask support in Matanyone, new Extend Video feature, and enhanced background removal options.

-### May 26, 2025: Wan 2.1GP v5.3
+### May 26, 2025: WanGP v5.3
 👋 Settings management revolution! Now you can:
 - Select any generated video and click *Use Selected Video Settings* to instantly reuse its configuration
 - Drag & drop videos to automatically extract their settings metadata
 - Export/import settings as JSON files for easy sharing and backup

-### May 23, 2025: Wan 2.1GP v5.21
-👋 VACE improvements: Better sliding window transitions, image mask support in Matanyone, new Extend Video feature, and enhanced background removal options.
-
-### May 20, 2025: Wan 2.1GP v5.2
+### May 20, 2025: WanGP v5.2
 👋 **CausVid support** - Generate videos in just 4-12 steps with the new distilled Wan model! Also added experimental MoviiGen for 1080p generation (20GB+ VRAM required).

-### May 18, 2025: Wan 2.1GP v5.1
+### May 18, 2025: WanGP v5.1
 👋 **LTX Video 13B Distilled** - Generate high-quality videos in less than one minute!

-### May 17, 2025: Wan 2.1GP v5.0
+### May 17, 2025: WanGP v5.0
 👋 **One App to Rule Them All!** Added Hunyuan Video and LTX Video support, plus Vace 14B and integrated prompt enhancer.

 See full changelog: **[Changelog](docs/CHANGELOG.md)**
--- a/rife/inference.py
+++ b/rife/inference.py
@ -114,6 +114,6 @@ def temporal_interpolation(model_path, frames, exp, device ="cuda"):
    model.to(device=device)

    with torch.no_grad():    
-        output = process_frames(model, device, frames, exp)
+        output = process_frames(model, device, frames.float(), exp)

    return output
--- a/wan/text2video.py
+++ b/wan/text2video.py
@ -82,9 +82,9 @@ class WanT2V:
        from mmgp import offload
        # model_filename = "c:/temp/vace1.3/diffusion_pytorch_model.safetensors"
        # model_filename = "vace14B_quanto_bf16_int8.safetensors"
-        # model_filename = "c:/temp/movii/diffusion_pytorch_model-00001-of-00007.safetensors"
-        # config_filename= "c:/temp/movii/config.json"
-        self.model = offload.fast_load_transformers_model(model_filename, modelClass=WanModel,do_quantize= quantizeTransformer, writable_tensors= False) # , forcedConfigPath= config_filename)
+        # model_filename = "c:/temp/phantom/Phantom_Wan_14B-00001-of-00006.safetensors"
+        # config_filename= "c:/temp/phantom/config.json"
+        self.model = offload.fast_load_transformers_model(model_filename, modelClass=WanModel,do_quantize= quantizeTransformer, writable_tensors= False)#, forcedConfigPath= config_filename)
        # offload.load_model_data(self.model, "e:/vace.safetensors")
        # offload.load_model_data(self.model, "c:/temp/Phantom-Wan-1.3B.pth")
        # self.model.to(torch.bfloat16)
@ -92,8 +92,8 @@ class WanT2V:
        self.model.lock_layers_dtypes(torch.float32 if mixed_precision_transformer else dtype)
        # dtype = torch.bfloat16
        offload.change_dtype(self.model, dtype, True)
-        # offload.save_model(self.model, "wan2.1_moviigen_14B_mbf16.safetensors", config_file_path=config_filename)
-        # offload.save_model(self.model, "wan2.1_moviigen_14B_quanto_fp16_int8.safetensors", do_quantize= True, config_file_path=config_filename)
+        # offload.save_model(self.model, "wan2.1_phantom_14B_mbf16.safetensors", config_file_path=config_filename)
+        # offload.save_model(self.model, "wan2.1_phantom_14B_quanto_fp16_int8.safetensors", do_quantize= True, config_file_path=config_filename)
        self.model.eval().requires_grad_(False)


@ -477,7 +477,7 @@ class WanT2V:
                pass
                overlap_noise_factor = overlap_noise / 1000 
                latents[:, conditioning_latents_size + ref_images_count:]   = latents[:, conditioning_latents_size + ref_images_count:]  * (1.0 - overlap_noise_factor) + torch.randn_like(latents[:, conditioning_latents_size + ref_images_count:]) * overlap_noise_factor 
-                timestep = [torch.tensor([t.item()] * (conditioning_latents_size + ref_images_count) + [t.item() - overlap_noise]*(len(timesteps) - conditioning_latents_size - ref_images_count))]
+                #timestep = [torch.tensor([t.item()] * (conditioning_latents_size + ref_images_count) + [t.item() - overlap_noise]*(len(timesteps) - conditioning_latents_size - ref_images_count))]

            if target_camera != None:
                latent_model_input = torch.cat([latents, source_latents], dim=1)
@ -598,4 +598,4 @@ class WanT2V:
            setattr(target, "vace", module )
        delattr(model, "vace_blocks")

- 
+ 
--- a/wgp.py
+++ b/wgp.py
@ -43,7 +43,7 @@ AUTOSAVE_FILENAME = "queue.zip"
 PROMPT_VARS_MAX = 10

 target_mmgp_version = "3.4.7"
-WanGP_version = "5.3"
+WanGP_version = "5.31"
 prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None

 from importlib.metadata import version
@ -1525,11 +1525,12 @@ for path in  ["wan2.1_Vace_1.3B_preview_bf16.safetensors", "sky_reels2_diffusion
        os.remove( os.path.join("ckpts" , path))


-wan_choices_t2v=["ckpts/wan2.1_text2video_1.3B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_quanto_int8.safetensors", "ckpts/wan2.1_Vace_1.3B_mbf16.safetensors", 
+wan_choices_t2v=["ckpts/wan2.1_text2video_1.3B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_quanto_int8.safetensors",  
                         "ckpts/wan2.1_recammaster_1.3B_bf16.safetensors", "ckpts/sky_reels2_diffusion_forcing_1.3B_mbf16.safetensors", "ckpts/sky_reels2_diffusion_forcing_14B_bf16.safetensors",
                        "ckpts/sky_reels2_diffusion_forcing_14B_quanto_int8.safetensors",  "ckpts/sky_reels2_diffusion_forcing_720p_14B_mbf16.safetensors","ckpts/sky_reels2_diffusion_forcing_720p_14B_quanto_mbf16_int8.safetensors", 
-                        "ckpts/wan2_1_phantom_1.3B_mbf16.safetensors", "ckpts/wan2.1_Vace_14B_mbf16.safetensors", "ckpts/wan2.1_Vace_14B_quanto_mbf16_int8.safetensors",
+                        "ckpts/wan2.1_Vace_1.3B_mbf16.safetensors", "ckpts/wan2.1_Vace_14B_mbf16.safetensors", "ckpts/wan2.1_Vace_14B_quanto_mbf16_int8.safetensors",
                        "ckpts/wan2.1_moviigen1.1_14B_mbf16.safetensors", "ckpts/wan2.1_moviigen1.1_14B_quanto_mbf16_int8.safetensors",
+                        "ckpts/wan2_1_phantom_1.3B_mbf16.safetensors", "ckpts/wan2.1_phantom_14B_mbf16.safetensors", "ckpts/wan2.1_phantom_14B_quanto_mbf16_int8.safetensors", 
                        ]    
 wan_choices_i2v=["ckpts/wan2.1_image2video_480p_14B_mbf16.safetensors", "ckpts/wan2.1_image2video_480p_14B_quanto_mbf16_int8.safetensors", "ckpts/wan2.1_image2video_720p_14B_mbf16.safetensors",
                        "ckpts/wan2.1_image2video_720p_14B_quanto_mbf16_int8.safetensors", "ckpts/wan2.1_Fun_InP_1.3B_bf16.safetensors", "ckpts/wan2.1_Fun_InP_14B_bf16.safetensors",
@ -1548,12 +1549,12 @@ def get_dependent_models(model_filename, quantization, dtype_policy ):
        return [get_model_filename("ltxv_13B", quantization, dtype_policy)]
    else:
        return []
-model_types = [ "t2v_1.3B", "t2v", "i2v", "i2v_720p", "flf2v_720p", "vace_1.3B","vace_14B","moviigen", "phantom_1.3B", "fantasy",  "fun_inp_1.3B", "fun_inp", "recam_1.3B",  "sky_df_1.3B", "sky_df_14B", "sky_df_720p_14B", "ltxv_13B", "ltxv_13B_distilled", "hunyuan", "hunyuan_i2v", "hunyuan_custom"]
+model_types = [ "t2v_1.3B", "t2v", "i2v", "i2v_720p", "flf2v_720p", "vace_1.3B","vace_14B","moviigen", "phantom_1.3B", "phantom_14B", "fantasy",  "fun_inp_1.3B", "fun_inp", "recam_1.3B",  "sky_df_1.3B", "sky_df_14B", "sky_df_720p_14B", "ltxv_13B", "ltxv_13B_distilled", "hunyuan", "hunyuan_i2v", "hunyuan_custom"]
 model_signatures = {"t2v": "text2video_14B", "t2v_1.3B" : "text2video_1.3B",   "fun_inp_1.3B" : "Fun_InP_1.3B",  "fun_inp" :  "Fun_InP_14B", 
                    "i2v" : "image2video_480p", "i2v_720p" : "image2video_720p" , "vace_1.3B" : "Vace_1.3B", "vace_14B" : "Vace_14B","recam_1.3B": "recammaster_1.3B", 
                    "flf2v_720p" : "FLF2V_720p", "sky_df_1.3B" : "sky_reels2_diffusion_forcing_1.3B", "sky_df_14B" : "sky_reels2_diffusion_forcing_14B", 
                    "sky_df_720p_14B" : "sky_reels2_diffusion_forcing_720p_14B",  "moviigen" :"moviigen",
-                     "phantom_1.3B" : "phantom_1.3B", "fantasy" : "fantasy", "ltxv_13B" : "ltxv_0.9.7_13B_dev", "ltxv_13B_distilled" : "ltxv_0.9.7_13B_distilled",  "hunyuan" : "hunyuan_video_720", "hunyuan_i2v" : "hunyuan_video_i2v_720", "hunyuan_custom" : "hunyuan_video_custom" }
+                     "phantom_1.3B" : "phantom_1.3B", "phantom_14B" : "phantom_14B", "fantasy" : "fantasy", "ltxv_13B" : "ltxv_0.9.7_13B_dev", "ltxv_13B_distilled" : "ltxv_0.9.7_13B_distilled",  "hunyuan" : "hunyuan_video_720", "hunyuan_i2v" : "hunyuan_video_i2v_720", "hunyuan_custom" : "hunyuan_video_custom" }


 def get_model_type(model_filename):
@ -1611,8 +1612,12 @@ def get_model_name(model_filename, description_container = [""]):
        description = "The SkyReels 2 Diffusion Forcing model has been designed to generate very long videos that exceeds the usual 5s limit. You can also use this model to extend any existing video."
    elif "phantom" in model_filename:
        model_name = "Wan2.1 Phantom"
-        model_name += " 14B" if "14B" in model_filename else " 1.3B"
-        description = "The Phantom model is specialized to transfer people or objects of your choice into a generated Video. It produces very nices results when used at 720p."
+        if "14B" in model_filename:
+            model_name += " 14B"
+            description = "The Phantom model is specialized to transfer people or objects of your choice into a generated Video. It seems to produce better results if you keep the original background of the Image Referendes."
+        else:
+            model_name += " 1.3B"
+            description = "The Phantom model is specialized to transfer people or objects of your choice into a generated Video. It produces very nice results when used at 720p."
    elif "fantasy" in model_filename:
        model_name = "Wan2.1 Fantasy Speaking 720p"
        model_name += " 14B" if "14B" in model_filename else " 1.3B"
@ -1741,11 +1746,12 @@ def get_default_settings(filename):
            })


-        if get_model_type(filename) in ("phantom_1.3B"):
+        if get_model_type(filename) in ("phantom_1.3B", "phantom_14B"):
            ui_defaults.update({
                "guidance_scale": 7.5,
                "flow_shift": 5,
-                "resolution": "1280x720" 
+                "remove_background_images_ref": 0,
+                # "resolution": "1280x720" 
            })

        elif get_model_type(filename) in ("hunyuan_custom"):
@ -4116,6 +4122,7 @@ def load_settings_from_file(state, file_path):
        return gr.update(), gr.update(), None

    configs = None
+    tags = None
    if file_path.endswith(".json"):
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
@ -4124,7 +4131,6 @@ def load_settings_from_file(state, file_path):
            pass
    else:
        from mutagen.mp4 import MP4
-        tags = None
        try:
            file = MP4(file_path)
            tags = file.tags['©cmt'][0] 
@ -4138,7 +4144,7 @@ def load_settings_from_file(state, file_path):

    prompt = configs.get("prompt", "")
    current_model_filename = state["model_filename"]
-    model_filename = configs["model_filename"]
+    model_filename = configs.get("model_filename", current_model_filename)
    model_type = get_model_type(model_filename)
    defaults = state.get(model_type, None) 
    defaults = get_default_settings(model_filename) if defaults == None else defaults