more fixes

This commit is contained in:
DeepBeepMeep 2025-05-17 16:51:20 +02:00
parent 6d9e60b309
commit 90b52d86ff
6 changed files with 26 additions and 17 deletions

View File

@ -189,9 +189,9 @@ class TextEncoder(nn.Module):
if "llm" in text_encoder_type: if "llm" in text_encoder_type:
from mmgp import offload from mmgp import offload
forcedConfigPath= None if "i2v" in text_encoder_type else "ckpts/llava-llama-3-8b/config.json" forcedConfigPath= None if "i2v" in text_encoder_type else "ckpts/llava-llama-3-8b/config.json"
self.model= offload.fast_load_transformers_model(self.model_path, forcedConfigPath=forcedConfigPath, modelPrefix= "model" if forcedConfigPath !=None else None) self.model= offload.fast_load_transformers_model(self.model_path, forcedConfigPath=forcedConfigPath)
if forcedConfigPath != None: if forcedConfigPath != None:
self.model.final_layer_norm = self.model.norm self.model.final_layer_norm = self.model.model.norm
else: else:
self.model, self.model_path = load_text_encoder( self.model, self.model_path = load_text_encoder(

View File

@ -155,14 +155,14 @@ class LTXV:
): ):
self.mixed_precision_transformer = mixed_precision_transformer self.mixed_precision_transformer = mixed_precision_transformer
# ckpt_path = Path(ckpt_path) self.distilled = "distilled" in model_filepath[0]
# with safe_open(ckpt_path, framework="pt") as f: # with safe_open(ckpt_path, framework="pt") as f:
# metadata = f.metadata() # metadata = f.metadata()
# config_str = metadata.get("config") # config_str = metadata.get("config")
# configs = json.loads(config_str) # configs = json.loads(config_str)
# allowed_inference_steps = configs.get("allowed_inference_steps", None) # allowed_inference_steps = configs.get("allowed_inference_steps", None)
# transformer = Transformer3DModel.from_pretrained(ckpt_path) # transformer = Transformer3DModel.from_pretrained(ckpt_path)
# offload.save_model(transformer, "ckpts/ltxv_0.9.7_13B_dev_bf16.safetensors", config_file_path="config_transformer.json") # transformer = offload.fast_load_transformers_model("c:/temp/ltxdistilled/diffusion_pytorch_model-00001-of-00006.safetensors", forcedConfigPath="c:/temp/ltxdistilled/config.json")
# vae = CausalVideoAutoencoder.from_pretrained(ckpt_path) # vae = CausalVideoAutoencoder.from_pretrained(ckpt_path)
vae = offload.fast_load_transformers_model("ckpts/ltxv_0.9.7_VAE.safetensors", modelClass=CausalVideoAutoencoder) vae = offload.fast_load_transformers_model("ckpts/ltxv_0.9.7_VAE.safetensors", modelClass=CausalVideoAutoencoder)
@ -174,8 +174,11 @@ class LTXV:
# vae = offload.fast_load_transformers_model("vae.safetensors", modelClass=CausalVideoAutoencoder, modelPrefix= "vae", forcedConfigPath="config_vae.json") # vae = offload.fast_load_transformers_model("vae.safetensors", modelClass=CausalVideoAutoencoder, modelPrefix= "vae", forcedConfigPath="config_vae.json")
# offload.save_model(vae, "vae.safetensors", config_file_path="config_vae.json") # offload.save_model(vae, "vae.safetensors", config_file_path="config_vae.json")
# model_filepath = "c:/temp/ltxd/ltxv-13b-0.9.7-distilled.safetensors"
transformer = offload.fast_load_transformers_model(model_filepath, modelClass=Transformer3DModel) transformer = offload.fast_load_transformers_model(model_filepath, modelClass=Transformer3DModel, forcedConfigPath= "c:/temp/ltxd/config.json")
# offload.save_model(transformer, "ckpts/ltxv_0.9.7_13B_distilled_bf16.safetensors", config_file_path= "c:/temp/ltxd/config.json")
# offload.save_model(transformer, "ckpts/ltxv_0.9.7_13B_distilled_quanto_bf16_int8.safetensors", do_quantize= True, config_file_path="c:/temp/ltxd/config.json")
# transformer = offload.fast_load_transformers_model(model_filepath, modelClass=Transformer3DModel)
transformer._model_dtype = dtype transformer._model_dtype = dtype
if mixed_precision_transformer: if mixed_precision_transformer:
transformer._lock_dtype = torch.float transformer._lock_dtype = torch.float
@ -295,6 +298,9 @@ class LTXV:
conditioning_media_paths = None conditioning_media_paths = None
conditioning_start_frames = None conditioning_start_frames = None
if self.distilled :
pipeline_config = "ltx_video/configs/ltxv-13b-0.9.7-distilled.yaml"
else:
pipeline_config = "ltx_video/configs/ltxv-13b-0.9.7-dev.yaml" pipeline_config = "ltx_video/configs/ltxv-13b-0.9.7-dev.yaml"
# check if pipeline_config is a file # check if pipeline_config is a file
if not os.path.isfile(pipeline_config): if not os.path.isfile(pipeline_config):

View File

@ -1,4 +1,4 @@
# import av import av
import torch import torch
import io import io
import numpy as np import numpy as np

View File

@ -32,4 +32,5 @@ hydra-core
librosa librosa
loguru loguru
sentencepiece sentencepiece
av
# rembg==2.0.65 # rembg==2.0.65

View File

@ -80,9 +80,9 @@ class WanT2V:
logging.info(f"Creating WanModel from {model_filename[-1]}") logging.info(f"Creating WanModel from {model_filename[-1]}")
from mmgp import offload from mmgp import offload
# model_filename = "c:/temp/vace/diffusion_pytorch_model-00001-of-00007.safetensors" # model_filename = "c:/temp/vace1.3/diffusion_pytorch_model.safetensors"
# model_filename = "vace14B_quanto_bf16_int8.safetensors" # model_filename = "vace14B_quanto_bf16_int8.safetensors"
self.model = offload.fast_load_transformers_model(model_filename, modelClass=WanModel,do_quantize= quantizeTransformer, writable_tensors= False) # , forcedConfigPath= "c:/temp/vace/vace_config.json") self.model = offload.fast_load_transformers_model(model_filename, modelClass=WanModel,do_quantize= quantizeTransformer, writable_tensors= False , forcedConfigPath= "c:/temp/vace1.3/config.json")
# offload.load_model_data(self.model, "e:/vace.safetensors") # offload.load_model_data(self.model, "e:/vace.safetensors")
# offload.load_model_data(self.model, "c:/temp/Phantom-Wan-1.3B.pth") # offload.load_model_data(self.model, "c:/temp/Phantom-Wan-1.3B.pth")
# self.model.to(torch.bfloat16) # self.model.to(torch.bfloat16)
@ -90,7 +90,7 @@ class WanT2V:
self.model.lock_layers_dtypes(torch.float32 if mixed_precision_transformer else dtype) self.model.lock_layers_dtypes(torch.float32 if mixed_precision_transformer else dtype)
# dtype = torch.bfloat16 # dtype = torch.bfloat16
offload.change_dtype(self.model, dtype, True) offload.change_dtype(self.model, dtype, True)
# offload.save_model(self.model, "vace14B_bf16.safetensors", config_file_path="c:/temp/vace/vace_config.json") # offload.save_model(self.model, "wan2.1_Vace1.3B_mbf16.safetensors", config_file_path="c:/temp/vace1.3/config.json")
# offload.save_model(self.model, "vace14B_quanto_fp16_int8.safetensors", do_quantize= True, config_file_path="c:/temp/vace/vace_config.json") # offload.save_model(self.model, "vace14B_quanto_fp16_int8.safetensors", do_quantize= True, config_file_path="c:/temp/vace/vace_config.json")
self.model.eval().requires_grad_(False) self.model.eval().requires_grad_(False)

14
wgp.py
View File

@ -1528,7 +1528,7 @@ wan_choices_i2v=["ckpts/wan2.1_image2video_480p_14B_mbf16.safetensors", "ckpts/w
"ckpts/wan2.1_image2video_720p_14B_quanto_mbf16_int8.safetensors", "ckpts/wan2.1_Fun_InP_1.3B_bf16.safetensors", "ckpts/wan2.1_Fun_InP_14B_bf16.safetensors", "ckpts/wan2.1_image2video_720p_14B_quanto_mbf16_int8.safetensors", "ckpts/wan2.1_Fun_InP_1.3B_bf16.safetensors", "ckpts/wan2.1_Fun_InP_14B_bf16.safetensors",
"ckpts/wan2.1_Fun_InP_14B_quanto_int8.safetensors", "ckpts/wan2.1_FLF2V_720p_14B_bf16.safetensors", "ckpts/wan2.1_FLF2V_720p_14B_quanto_int8.safetensors", "ckpts/wan2.1_Fun_InP_14B_quanto_int8.safetensors", "ckpts/wan2.1_FLF2V_720p_14B_bf16.safetensors", "ckpts/wan2.1_FLF2V_720p_14B_quanto_int8.safetensors",
"ckpts/wan2.1_fantasy_speaking_14B_bf16.safetensors"] "ckpts/wan2.1_fantasy_speaking_14B_bf16.safetensors"]
ltxv_choices= ["ckpts/ltxv_0.9.7_13B_dev_bf16.safetensors", "ckpts/ltxv_0.9.7_13B_dev_quanto_bf16_int8.safetensors"] ltxv_choices= ["ckpts/ltxv_0.9.7_13B_dev_bf16.safetensors", "ckpts/ltxv_0.9.7_13B_dev_quanto_bf16_int8.safetensors", "ckpts/ltxv_0.9.7_13B_distilled_bf16.safetensors", "ckpts/ltxv_0.9.7_13B_distilled_quanto_bf16_int8.safetensors"]
hunyuan_choices= ["ckpts/hunyuan_video_720_bf16.safetensors", "ckpts/hunyuan_video_720_quanto_int8.safetensors", "ckpts/hunyuan_video_i2v_720_bf16.safetensors", "ckpts/hunyuan_video_i2v_720_quanto_int8v2.safetensors", hunyuan_choices= ["ckpts/hunyuan_video_720_bf16.safetensors", "ckpts/hunyuan_video_720_quanto_int8.safetensors", "ckpts/hunyuan_video_i2v_720_bf16.safetensors", "ckpts/hunyuan_video_i2v_720_quanto_int8v2.safetensors",
"ckpts/hunyuan_video_custom_720_bf16.safetensors", "ckpts/hunyuan_video_custom_720_quanto_bf16_int8.safetensors" ] "ckpts/hunyuan_video_custom_720_bf16.safetensors", "ckpts/hunyuan_video_custom_720_quanto_bf16_int8.safetensors" ]
@ -1539,12 +1539,12 @@ def get_dependent_models(model_filename, quantization, dtype_policy ):
return [get_model_filename("i2v_720p", quantization, dtype_policy)] return [get_model_filename("i2v_720p", quantization, dtype_policy)]
else: else:
return [] return []
model_types = [ "t2v_1.3B", "vace_1.3B", "fun_inp_1.3B", "t2v", "i2v", "i2v_720p", "vace_14B", "fun_inp", "recam_1.3B", "flf2v_720p", "sky_df_1.3B", "sky_df_14B", "sky_df_720p_14B", "phantom_1.3B", "fantasy", "ltxv_13B", "hunyuan", "hunyuan_i2v", "hunyuan_custom"] model_types = [ "t2v_1.3B", "vace_1.3B", "fun_inp_1.3B", "t2v", "i2v", "i2v_720p", "vace_14B", "fun_inp", "recam_1.3B", "flf2v_720p", "sky_df_1.3B", "sky_df_14B", "sky_df_720p_14B", "phantom_1.3B", "fantasy", "ltxv_13B", "ltxv_13B_distilled", "hunyuan", "hunyuan_i2v", "hunyuan_custom"]
model_signatures = {"t2v": "text2video_14B", "t2v_1.3B" : "text2video_1.3B", "fun_inp_1.3B" : "Fun_InP_1.3B", "fun_inp" : "Fun_InP_14B", model_signatures = {"t2v": "text2video_14B", "t2v_1.3B" : "text2video_1.3B", "fun_inp_1.3B" : "Fun_InP_1.3B", "fun_inp" : "Fun_InP_14B",
"i2v" : "image2video_480p", "i2v_720p" : "image2video_720p" , "vace_1.3B" : "Vace_1.3B", "vace_14B" : "Vace_14B","recam_1.3B": "recammaster_1.3B", "i2v" : "image2video_480p", "i2v_720p" : "image2video_720p" , "vace_1.3B" : "Vace_1.3B", "vace_14B" : "Vace_14B","recam_1.3B": "recammaster_1.3B",
"flf2v_720p" : "FLF2V_720p", "sky_df_1.3B" : "sky_reels2_diffusion_forcing_1.3B", "sky_df_14B" : "sky_reels2_diffusion_forcing_14B", "flf2v_720p" : "FLF2V_720p", "sky_df_1.3B" : "sky_reels2_diffusion_forcing_1.3B", "sky_df_14B" : "sky_reels2_diffusion_forcing_14B",
"sky_df_720p_14B" : "sky_reels2_diffusion_forcing_720p_14B", "sky_df_720p_14B" : "sky_reels2_diffusion_forcing_720p_14B",
"phantom_1.3B" : "phantom_1.3B", "fantasy" : "fantasy", "ltxv_13B" : "ltxv_0.9.7_13B", "hunyuan" : "hunyuan_video_720", "hunyuan_i2v" : "hunyuan_video_i2v_720", "hunyuan_custom" : "hunyuan_video_custom" } "phantom_1.3B" : "phantom_1.3B", "fantasy" : "fantasy", "ltxv_13B" : "ltxv_0.9.7_13B_dev", "ltxv_13B_distilled" : "ltxv_0.9.7_13B_distilled", "hunyuan" : "hunyuan_video_720", "hunyuan_i2v" : "hunyuan_video_i2v_720", "hunyuan_custom" : "hunyuan_video_custom" }
def get_model_type(model_filename): def get_model_type(model_filename):
@ -1606,10 +1606,12 @@ def get_model_name(model_filename, description_container = [""]):
model_name = "Wan2.1 Fantasy Speaking 720p" model_name = "Wan2.1 Fantasy Speaking 720p"
model_name += " 14B" if "14B" in model_filename else " 1.3B" model_name += " 14B" if "14B" in model_filename else " 1.3B"
description = "The Fantasy Speaking model corresponds to the original Wan image 2 video model combined with the Fantasy Speaking extension to process an audio Input." description = "The Fantasy Speaking model corresponds to the original Wan image 2 video model combined with the Fantasy Speaking extension to process an audio Input."
elif "ltxv" in model_filename: elif "ltxv_0.9.7_13B_dev" in model_filename:
model_name = "LTX Video" model_name = "LTX Video 0.9.7"
model_name += " 0.9.7 13B" if "13B" in model_filename else " 0.9.6 2B"
description = "LTX Video is a fast model that can be used to generate long videos (up to 260 frames).It is recommended to keep the number of steps to 30 or you will need to update the file 'ltxv_video/configs/ltxv-13b-0.9.7-dev.yaml'.The LTX Video model expects very long prompt, so don't hesitate to use the Prompt Enhancer." description = "LTX Video is a fast model that can be used to generate long videos (up to 260 frames).It is recommended to keep the number of steps to 30 or you will need to update the file 'ltxv_video/configs/ltxv-13b-0.9.7-dev.yaml'.The LTX Video model expects very long prompt, so don't hesitate to use the Prompt Enhancer."
elif "ltxv_0.9.7_13B_distilled" in model_filename:
model_name = "LTX Video 0.9.7 distilled"
description = "LTX Video is a fast model that can be used to generate long videos (up to 260 frames).This is the distilled / fast version. The LTX Video model expects very long prompt, so don't hesitate to use the Prompt Enhancer."
elif "hunyuan_video_720" in model_filename: elif "hunyuan_video_720" in model_filename:
model_name = "Hunyuan Video text2video 720p" model_name = "Hunyuan Video text2video 720p"
description = "Probably the best text 2 video model available." description = "Probably the best text 2 video model available."