mirror of
https://github.com/Wan-Video/Wan2.1.git
synced 2025-11-04 14:16:57 +00:00
more fixes
This commit is contained in:
parent
6d9e60b309
commit
90b52d86ff
@ -189,9 +189,9 @@ class TextEncoder(nn.Module):
|
||||
if "llm" in text_encoder_type:
|
||||
from mmgp import offload
|
||||
forcedConfigPath= None if "i2v" in text_encoder_type else "ckpts/llava-llama-3-8b/config.json"
|
||||
self.model= offload.fast_load_transformers_model(self.model_path, forcedConfigPath=forcedConfigPath, modelPrefix= "model" if forcedConfigPath !=None else None)
|
||||
self.model= offload.fast_load_transformers_model(self.model_path, forcedConfigPath=forcedConfigPath)
|
||||
if forcedConfigPath != None:
|
||||
self.model.final_layer_norm = self.model.norm
|
||||
self.model.final_layer_norm = self.model.model.norm
|
||||
|
||||
else:
|
||||
self.model, self.model_path = load_text_encoder(
|
||||
|
||||
@ -155,14 +155,14 @@ class LTXV:
|
||||
):
|
||||
|
||||
self.mixed_precision_transformer = mixed_precision_transformer
|
||||
# ckpt_path = Path(ckpt_path)
|
||||
self.distilled = "distilled" in model_filepath[0]
|
||||
# with safe_open(ckpt_path, framework="pt") as f:
|
||||
# metadata = f.metadata()
|
||||
# config_str = metadata.get("config")
|
||||
# configs = json.loads(config_str)
|
||||
# allowed_inference_steps = configs.get("allowed_inference_steps", None)
|
||||
# transformer = Transformer3DModel.from_pretrained(ckpt_path)
|
||||
# offload.save_model(transformer, "ckpts/ltxv_0.9.7_13B_dev_bf16.safetensors", config_file_path="config_transformer.json")
|
||||
# transformer = offload.fast_load_transformers_model("c:/temp/ltxdistilled/diffusion_pytorch_model-00001-of-00006.safetensors", forcedConfigPath="c:/temp/ltxdistilled/config.json")
|
||||
|
||||
# vae = CausalVideoAutoencoder.from_pretrained(ckpt_path)
|
||||
vae = offload.fast_load_transformers_model("ckpts/ltxv_0.9.7_VAE.safetensors", modelClass=CausalVideoAutoencoder)
|
||||
@ -174,8 +174,11 @@ class LTXV:
|
||||
# vae = offload.fast_load_transformers_model("vae.safetensors", modelClass=CausalVideoAutoencoder, modelPrefix= "vae", forcedConfigPath="config_vae.json")
|
||||
# offload.save_model(vae, "vae.safetensors", config_file_path="config_vae.json")
|
||||
|
||||
|
||||
transformer = offload.fast_load_transformers_model(model_filepath, modelClass=Transformer3DModel)
|
||||
# model_filepath = "c:/temp/ltxd/ltxv-13b-0.9.7-distilled.safetensors"
|
||||
transformer = offload.fast_load_transformers_model(model_filepath, modelClass=Transformer3DModel, forcedConfigPath= "c:/temp/ltxd/config.json")
|
||||
# offload.save_model(transformer, "ckpts/ltxv_0.9.7_13B_distilled_bf16.safetensors", config_file_path= "c:/temp/ltxd/config.json")
|
||||
# offload.save_model(transformer, "ckpts/ltxv_0.9.7_13B_distilled_quanto_bf16_int8.safetensors", do_quantize= True, config_file_path="c:/temp/ltxd/config.json")
|
||||
# transformer = offload.fast_load_transformers_model(model_filepath, modelClass=Transformer3DModel)
|
||||
transformer._model_dtype = dtype
|
||||
if mixed_precision_transformer:
|
||||
transformer._lock_dtype = torch.float
|
||||
@ -295,7 +298,10 @@ class LTXV:
|
||||
conditioning_media_paths = None
|
||||
conditioning_start_frames = None
|
||||
|
||||
pipeline_config = "ltx_video/configs/ltxv-13b-0.9.7-dev.yaml"
|
||||
if self.distilled :
|
||||
pipeline_config = "ltx_video/configs/ltxv-13b-0.9.7-distilled.yaml"
|
||||
else:
|
||||
pipeline_config = "ltx_video/configs/ltxv-13b-0.9.7-dev.yaml"
|
||||
# check if pipeline_config is a file
|
||||
if not os.path.isfile(pipeline_config):
|
||||
raise ValueError(f"Pipeline config file {pipeline_config} does not exist")
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# import av
|
||||
import av
|
||||
import torch
|
||||
import io
|
||||
import numpy as np
|
||||
|
||||
@ -32,4 +32,5 @@ hydra-core
|
||||
librosa
|
||||
loguru
|
||||
sentencepiece
|
||||
av
|
||||
# rembg==2.0.65
|
||||
|
||||
@ -80,9 +80,9 @@ class WanT2V:
|
||||
|
||||
logging.info(f"Creating WanModel from {model_filename[-1]}")
|
||||
from mmgp import offload
|
||||
# model_filename = "c:/temp/vace/diffusion_pytorch_model-00001-of-00007.safetensors"
|
||||
# model_filename = "c:/temp/vace1.3/diffusion_pytorch_model.safetensors"
|
||||
# model_filename = "vace14B_quanto_bf16_int8.safetensors"
|
||||
self.model = offload.fast_load_transformers_model(model_filename, modelClass=WanModel,do_quantize= quantizeTransformer, writable_tensors= False) # , forcedConfigPath= "c:/temp/vace/vace_config.json")
|
||||
self.model = offload.fast_load_transformers_model(model_filename, modelClass=WanModel,do_quantize= quantizeTransformer, writable_tensors= False , forcedConfigPath= "c:/temp/vace1.3/config.json")
|
||||
# offload.load_model_data(self.model, "e:/vace.safetensors")
|
||||
# offload.load_model_data(self.model, "c:/temp/Phantom-Wan-1.3B.pth")
|
||||
# self.model.to(torch.bfloat16)
|
||||
@ -90,7 +90,7 @@ class WanT2V:
|
||||
self.model.lock_layers_dtypes(torch.float32 if mixed_precision_transformer else dtype)
|
||||
# dtype = torch.bfloat16
|
||||
offload.change_dtype(self.model, dtype, True)
|
||||
# offload.save_model(self.model, "vace14B_bf16.safetensors", config_file_path="c:/temp/vace/vace_config.json")
|
||||
# offload.save_model(self.model, "wan2.1_Vace1.3B_mbf16.safetensors", config_file_path="c:/temp/vace1.3/config.json")
|
||||
# offload.save_model(self.model, "vace14B_quanto_fp16_int8.safetensors", do_quantize= True, config_file_path="c:/temp/vace/vace_config.json")
|
||||
self.model.eval().requires_grad_(False)
|
||||
|
||||
|
||||
14
wgp.py
14
wgp.py
@ -1528,7 +1528,7 @@ wan_choices_i2v=["ckpts/wan2.1_image2video_480p_14B_mbf16.safetensors", "ckpts/w
|
||||
"ckpts/wan2.1_image2video_720p_14B_quanto_mbf16_int8.safetensors", "ckpts/wan2.1_Fun_InP_1.3B_bf16.safetensors", "ckpts/wan2.1_Fun_InP_14B_bf16.safetensors",
|
||||
"ckpts/wan2.1_Fun_InP_14B_quanto_int8.safetensors", "ckpts/wan2.1_FLF2V_720p_14B_bf16.safetensors", "ckpts/wan2.1_FLF2V_720p_14B_quanto_int8.safetensors",
|
||||
"ckpts/wan2.1_fantasy_speaking_14B_bf16.safetensors"]
|
||||
ltxv_choices= ["ckpts/ltxv_0.9.7_13B_dev_bf16.safetensors", "ckpts/ltxv_0.9.7_13B_dev_quanto_bf16_int8.safetensors"]
|
||||
ltxv_choices= ["ckpts/ltxv_0.9.7_13B_dev_bf16.safetensors", "ckpts/ltxv_0.9.7_13B_dev_quanto_bf16_int8.safetensors", "ckpts/ltxv_0.9.7_13B_distilled_bf16.safetensors", "ckpts/ltxv_0.9.7_13B_distilled_quanto_bf16_int8.safetensors"]
|
||||
|
||||
hunyuan_choices= ["ckpts/hunyuan_video_720_bf16.safetensors", "ckpts/hunyuan_video_720_quanto_int8.safetensors", "ckpts/hunyuan_video_i2v_720_bf16.safetensors", "ckpts/hunyuan_video_i2v_720_quanto_int8v2.safetensors",
|
||||
"ckpts/hunyuan_video_custom_720_bf16.safetensors", "ckpts/hunyuan_video_custom_720_quanto_bf16_int8.safetensors" ]
|
||||
@ -1539,12 +1539,12 @@ def get_dependent_models(model_filename, quantization, dtype_policy ):
|
||||
return [get_model_filename("i2v_720p", quantization, dtype_policy)]
|
||||
else:
|
||||
return []
|
||||
model_types = [ "t2v_1.3B", "vace_1.3B", "fun_inp_1.3B", "t2v", "i2v", "i2v_720p", "vace_14B", "fun_inp", "recam_1.3B", "flf2v_720p", "sky_df_1.3B", "sky_df_14B", "sky_df_720p_14B", "phantom_1.3B", "fantasy", "ltxv_13B", "hunyuan", "hunyuan_i2v", "hunyuan_custom"]
|
||||
model_types = [ "t2v_1.3B", "vace_1.3B", "fun_inp_1.3B", "t2v", "i2v", "i2v_720p", "vace_14B", "fun_inp", "recam_1.3B", "flf2v_720p", "sky_df_1.3B", "sky_df_14B", "sky_df_720p_14B", "phantom_1.3B", "fantasy", "ltxv_13B", "ltxv_13B_distilled", "hunyuan", "hunyuan_i2v", "hunyuan_custom"]
|
||||
model_signatures = {"t2v": "text2video_14B", "t2v_1.3B" : "text2video_1.3B", "fun_inp_1.3B" : "Fun_InP_1.3B", "fun_inp" : "Fun_InP_14B",
|
||||
"i2v" : "image2video_480p", "i2v_720p" : "image2video_720p" , "vace_1.3B" : "Vace_1.3B", "vace_14B" : "Vace_14B","recam_1.3B": "recammaster_1.3B",
|
||||
"flf2v_720p" : "FLF2V_720p", "sky_df_1.3B" : "sky_reels2_diffusion_forcing_1.3B", "sky_df_14B" : "sky_reels2_diffusion_forcing_14B",
|
||||
"sky_df_720p_14B" : "sky_reels2_diffusion_forcing_720p_14B",
|
||||
"phantom_1.3B" : "phantom_1.3B", "fantasy" : "fantasy", "ltxv_13B" : "ltxv_0.9.7_13B", "hunyuan" : "hunyuan_video_720", "hunyuan_i2v" : "hunyuan_video_i2v_720", "hunyuan_custom" : "hunyuan_video_custom" }
|
||||
"phantom_1.3B" : "phantom_1.3B", "fantasy" : "fantasy", "ltxv_13B" : "ltxv_0.9.7_13B_dev", "ltxv_13B_distilled" : "ltxv_0.9.7_13B_distilled", "hunyuan" : "hunyuan_video_720", "hunyuan_i2v" : "hunyuan_video_i2v_720", "hunyuan_custom" : "hunyuan_video_custom" }
|
||||
|
||||
|
||||
def get_model_type(model_filename):
|
||||
@ -1606,10 +1606,12 @@ def get_model_name(model_filename, description_container = [""]):
|
||||
model_name = "Wan2.1 Fantasy Speaking 720p"
|
||||
model_name += " 14B" if "14B" in model_filename else " 1.3B"
|
||||
description = "The Fantasy Speaking model corresponds to the original Wan image 2 video model combined with the Fantasy Speaking extension to process an audio Input."
|
||||
elif "ltxv" in model_filename:
|
||||
model_name = "LTX Video"
|
||||
model_name += " 0.9.7 13B" if "13B" in model_filename else " 0.9.6 2B"
|
||||
elif "ltxv_0.9.7_13B_dev" in model_filename:
|
||||
model_name = "LTX Video 0.9.7"
|
||||
description = "LTX Video is a fast model that can be used to generate long videos (up to 260 frames).It is recommended to keep the number of steps to 30 or you will need to update the file 'ltxv_video/configs/ltxv-13b-0.9.7-dev.yaml'.The LTX Video model expects very long prompt, so don't hesitate to use the Prompt Enhancer."
|
||||
elif "ltxv_0.9.7_13B_distilled" in model_filename:
|
||||
model_name = "LTX Video 0.9.7 distilled"
|
||||
description = "LTX Video is a fast model that can be used to generate long videos (up to 260 frames).This is the distilled / fast version. The LTX Video model expects very long prompt, so don't hesitate to use the Prompt Enhancer."
|
||||
elif "hunyuan_video_720" in model_filename:
|
||||
model_name = "Hunyuan Video text2video 720p"
|
||||
description = "Probably the best text 2 video model available."
|
||||
|
||||
Loading…
Reference in New Issue
Block a user