Merge branch 'main' into better_Readme

This commit is contained in:
deepbeepmeep 2025-06-05 15:33:28 +02:00 committed by GitHub
commit 263bef944e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 32 additions and 26 deletions

View File

@ -18,23 +18,23 @@ WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models
**Discord Server to get Help from Other Users and show your Best Videos:** https://discord.gg/g7efUW9jGV
## 🔥 Latest Updates
### May 28 2025: WanGP v5.31
👋 Added Phantom 14B, a model that you can use to transfer objects / people in the video. My preference goes to Vace that remains the king of controlnets.
VACE improvements: Better sliding window transitions, image mask support in Matanyone, new Extend Video feature, and enhanced background removal options.
### May 26, 2025: Wan 2.1GP v5.3
### May 26, 2025: WanGP v5.3
👋 Settings management revolution! Now you can:
- Select any generated video and click *Use Selected Video Settings* to instantly reuse its configuration
- Drag & drop videos to automatically extract their settings metadata
- Export/import settings as JSON files for easy sharing and backup
### May 23, 2025: Wan 2.1GP v5.21
👋 VACE improvements: Better sliding window transitions, image mask support in Matanyone, new Extend Video feature, and enhanced background removal options.
### May 20, 2025: Wan 2.1GP v5.2
### May 20, 2025: WanGP v5.2
👋 **CausVid support** - Generate videos in just 4-12 steps with the new distilled Wan model! Also added experimental MoviiGen for 1080p generation (20GB+ VRAM required).
### May 18, 2025: Wan 2.1GP v5.1
### May 18, 2025: WanGP v5.1
👋 **LTX Video 13B Distilled** - Generate high-quality videos in less than one minute!
### May 17, 2025: Wan 2.1GP v5.0
### May 17, 2025: WanGP v5.0
👋 **One App to Rule Them All!** Added Hunyuan Video and LTX Video support, plus Vace 14B and integrated prompt enhancer.
See full changelog: **[Changelog](docs/CHANGELOG.md)**

View File

@ -114,6 +114,6 @@ def temporal_interpolation(model_path, frames, exp, device ="cuda"):
model.to(device=device)
with torch.no_grad():
output = process_frames(model, device, frames, exp)
output = process_frames(model, device, frames.float(), exp)
return output

View File

@ -82,9 +82,9 @@ class WanT2V:
from mmgp import offload
# model_filename = "c:/temp/vace1.3/diffusion_pytorch_model.safetensors"
# model_filename = "vace14B_quanto_bf16_int8.safetensors"
# model_filename = "c:/temp/movii/diffusion_pytorch_model-00001-of-00007.safetensors"
# config_filename= "c:/temp/movii/config.json"
self.model = offload.fast_load_transformers_model(model_filename, modelClass=WanModel,do_quantize= quantizeTransformer, writable_tensors= False) # , forcedConfigPath= config_filename)
# model_filename = "c:/temp/phantom/Phantom_Wan_14B-00001-of-00006.safetensors"
# config_filename= "c:/temp/phantom/config.json"
self.model = offload.fast_load_transformers_model(model_filename, modelClass=WanModel,do_quantize= quantizeTransformer, writable_tensors= False)#, forcedConfigPath= config_filename)
# offload.load_model_data(self.model, "e:/vace.safetensors")
# offload.load_model_data(self.model, "c:/temp/Phantom-Wan-1.3B.pth")
# self.model.to(torch.bfloat16)
@ -92,8 +92,8 @@ class WanT2V:
self.model.lock_layers_dtypes(torch.float32 if mixed_precision_transformer else dtype)
# dtype = torch.bfloat16
offload.change_dtype(self.model, dtype, True)
# offload.save_model(self.model, "wan2.1_moviigen_14B_mbf16.safetensors", config_file_path=config_filename)
# offload.save_model(self.model, "wan2.1_moviigen_14B_quanto_fp16_int8.safetensors", do_quantize= True, config_file_path=config_filename)
# offload.save_model(self.model, "wan2.1_phantom_14B_mbf16.safetensors", config_file_path=config_filename)
# offload.save_model(self.model, "wan2.1_phantom_14B_quanto_fp16_int8.safetensors", do_quantize= True, config_file_path=config_filename)
self.model.eval().requires_grad_(False)
@ -477,7 +477,7 @@ class WanT2V:
pass
overlap_noise_factor = overlap_noise / 1000
latents[:, conditioning_latents_size + ref_images_count:] = latents[:, conditioning_latents_size + ref_images_count:] * (1.0 - overlap_noise_factor) + torch.randn_like(latents[:, conditioning_latents_size + ref_images_count:]) * overlap_noise_factor
timestep = [torch.tensor([t.item()] * (conditioning_latents_size + ref_images_count) + [t.item() - overlap_noise]*(len(timesteps) - conditioning_latents_size - ref_images_count))]
#timestep = [torch.tensor([t.item()] * (conditioning_latents_size + ref_images_count) + [t.item() - overlap_noise]*(len(timesteps) - conditioning_latents_size - ref_images_count))]
if target_camera != None:
latent_model_input = torch.cat([latents, source_latents], dim=1)
@ -598,4 +598,4 @@ class WanT2V:
setattr(target, "vace", module )
delattr(model, "vace_blocks")

28
wgp.py
View File

@ -43,7 +43,7 @@ AUTOSAVE_FILENAME = "queue.zip"
PROMPT_VARS_MAX = 10
target_mmgp_version = "3.4.7"
WanGP_version = "5.3"
WanGP_version = "5.31"
prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None
from importlib.metadata import version
@ -1525,11 +1525,12 @@ for path in ["wan2.1_Vace_1.3B_preview_bf16.safetensors", "sky_reels2_diffusion
os.remove( os.path.join("ckpts" , path))
wan_choices_t2v=["ckpts/wan2.1_text2video_1.3B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_quanto_int8.safetensors", "ckpts/wan2.1_Vace_1.3B_mbf16.safetensors",
wan_choices_t2v=["ckpts/wan2.1_text2video_1.3B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_bf16.safetensors", "ckpts/wan2.1_text2video_14B_quanto_int8.safetensors",
"ckpts/wan2.1_recammaster_1.3B_bf16.safetensors", "ckpts/sky_reels2_diffusion_forcing_1.3B_mbf16.safetensors", "ckpts/sky_reels2_diffusion_forcing_14B_bf16.safetensors",
"ckpts/sky_reels2_diffusion_forcing_14B_quanto_int8.safetensors", "ckpts/sky_reels2_diffusion_forcing_720p_14B_mbf16.safetensors","ckpts/sky_reels2_diffusion_forcing_720p_14B_quanto_mbf16_int8.safetensors",
"ckpts/wan2_1_phantom_1.3B_mbf16.safetensors", "ckpts/wan2.1_Vace_14B_mbf16.safetensors", "ckpts/wan2.1_Vace_14B_quanto_mbf16_int8.safetensors",
"ckpts/wan2.1_Vace_1.3B_mbf16.safetensors", "ckpts/wan2.1_Vace_14B_mbf16.safetensors", "ckpts/wan2.1_Vace_14B_quanto_mbf16_int8.safetensors",
"ckpts/wan2.1_moviigen1.1_14B_mbf16.safetensors", "ckpts/wan2.1_moviigen1.1_14B_quanto_mbf16_int8.safetensors",
"ckpts/wan2_1_phantom_1.3B_mbf16.safetensors", "ckpts/wan2.1_phantom_14B_mbf16.safetensors", "ckpts/wan2.1_phantom_14B_quanto_mbf16_int8.safetensors",
]
wan_choices_i2v=["ckpts/wan2.1_image2video_480p_14B_mbf16.safetensors", "ckpts/wan2.1_image2video_480p_14B_quanto_mbf16_int8.safetensors", "ckpts/wan2.1_image2video_720p_14B_mbf16.safetensors",
"ckpts/wan2.1_image2video_720p_14B_quanto_mbf16_int8.safetensors", "ckpts/wan2.1_Fun_InP_1.3B_bf16.safetensors", "ckpts/wan2.1_Fun_InP_14B_bf16.safetensors",
@ -1548,12 +1549,12 @@ def get_dependent_models(model_filename, quantization, dtype_policy ):
return [get_model_filename("ltxv_13B", quantization, dtype_policy)]
else:
return []
model_types = [ "t2v_1.3B", "t2v", "i2v", "i2v_720p", "flf2v_720p", "vace_1.3B","vace_14B","moviigen", "phantom_1.3B", "fantasy", "fun_inp_1.3B", "fun_inp", "recam_1.3B", "sky_df_1.3B", "sky_df_14B", "sky_df_720p_14B", "ltxv_13B", "ltxv_13B_distilled", "hunyuan", "hunyuan_i2v", "hunyuan_custom"]
model_types = [ "t2v_1.3B", "t2v", "i2v", "i2v_720p", "flf2v_720p", "vace_1.3B","vace_14B","moviigen", "phantom_1.3B", "phantom_14B", "fantasy", "fun_inp_1.3B", "fun_inp", "recam_1.3B", "sky_df_1.3B", "sky_df_14B", "sky_df_720p_14B", "ltxv_13B", "ltxv_13B_distilled", "hunyuan", "hunyuan_i2v", "hunyuan_custom"]
model_signatures = {"t2v": "text2video_14B", "t2v_1.3B" : "text2video_1.3B", "fun_inp_1.3B" : "Fun_InP_1.3B", "fun_inp" : "Fun_InP_14B",
"i2v" : "image2video_480p", "i2v_720p" : "image2video_720p" , "vace_1.3B" : "Vace_1.3B", "vace_14B" : "Vace_14B","recam_1.3B": "recammaster_1.3B",
"flf2v_720p" : "FLF2V_720p", "sky_df_1.3B" : "sky_reels2_diffusion_forcing_1.3B", "sky_df_14B" : "sky_reels2_diffusion_forcing_14B",
"sky_df_720p_14B" : "sky_reels2_diffusion_forcing_720p_14B", "moviigen" :"moviigen",
"phantom_1.3B" : "phantom_1.3B", "fantasy" : "fantasy", "ltxv_13B" : "ltxv_0.9.7_13B_dev", "ltxv_13B_distilled" : "ltxv_0.9.7_13B_distilled", "hunyuan" : "hunyuan_video_720", "hunyuan_i2v" : "hunyuan_video_i2v_720", "hunyuan_custom" : "hunyuan_video_custom" }
"phantom_1.3B" : "phantom_1.3B", "phantom_14B" : "phantom_14B", "fantasy" : "fantasy", "ltxv_13B" : "ltxv_0.9.7_13B_dev", "ltxv_13B_distilled" : "ltxv_0.9.7_13B_distilled", "hunyuan" : "hunyuan_video_720", "hunyuan_i2v" : "hunyuan_video_i2v_720", "hunyuan_custom" : "hunyuan_video_custom" }
def get_model_type(model_filename):
@ -1611,8 +1612,12 @@ def get_model_name(model_filename, description_container = [""]):
description = "The SkyReels 2 Diffusion Forcing model has been designed to generate very long videos that exceeds the usual 5s limit. You can also use this model to extend any existing video."
elif "phantom" in model_filename:
model_name = "Wan2.1 Phantom"
model_name += " 14B" if "14B" in model_filename else " 1.3B"
description = "The Phantom model is specialized to transfer people or objects of your choice into a generated Video. It produces very nices results when used at 720p."
if "14B" in model_filename:
model_name += " 14B"
description = "The Phantom model is specialized to transfer people or objects of your choice into a generated Video. It seems to produce better results if you keep the original background of the Image Referendes."
else:
model_name += " 1.3B"
description = "The Phantom model is specialized to transfer people or objects of your choice into a generated Video. It produces very nice results when used at 720p."
elif "fantasy" in model_filename:
model_name = "Wan2.1 Fantasy Speaking 720p"
model_name += " 14B" if "14B" in model_filename else " 1.3B"
@ -1741,11 +1746,12 @@ def get_default_settings(filename):
})
if get_model_type(filename) in ("phantom_1.3B"):
if get_model_type(filename) in ("phantom_1.3B", "phantom_14B"):
ui_defaults.update({
"guidance_scale": 7.5,
"flow_shift": 5,
"resolution": "1280x720"
"remove_background_images_ref": 0,
# "resolution": "1280x720"
})
elif get_model_type(filename) in ("hunyuan_custom"):
@ -4116,6 +4122,7 @@ def load_settings_from_file(state, file_path):
return gr.update(), gr.update(), None
configs = None
tags = None
if file_path.endswith(".json"):
try:
with open(file_path, 'r', encoding='utf-8') as f:
@ -4124,7 +4131,6 @@ def load_settings_from_file(state, file_path):
pass
else:
from mutagen.mp4 import MP4
tags = None
try:
file = MP4(file_path)
tags = file.tags['©cmt'][0]
@ -4138,7 +4144,7 @@ def load_settings_from_file(state, file_path):
prompt = configs.get("prompt", "")
current_model_filename = state["model_filename"]
model_filename = configs["model_filename"]
model_filename = configs.get("model_filename", current_model_filename)
model_type = get_model_type(model_filename)
defaults = state.get(model_type, None)
defaults = get_default_settings(model_filename) if defaults == None else defaults