more fixes

This commit is contained in:
DeepBeepMeep 2025-05-17 16:51:20 +02:00
parent 6d9e60b309
commit 90b52d86ff
6 changed files with 26 additions and 17 deletions

View File

@ -189,9 +189,9 @@ class TextEncoder(nn.Module):
if "llm" in text_encoder_type:
from mmgp import offload
forcedConfigPath= None if "i2v" in text_encoder_type else "ckpts/llava-llama-3-8b/config.json"
self.model= offload.fast_load_transformers_model(self.model_path, forcedConfigPath=forcedConfigPath, modelPrefix= "model" if forcedConfigPath !=None else None)
self.model= offload.fast_load_transformers_model(self.model_path, forcedConfigPath=forcedConfigPath)
if forcedConfigPath != None:
self.model.final_layer_norm = self.model.norm
self.model.final_layer_norm = self.model.model.norm
else:
self.model, self.model_path = load_text_encoder(

View File

@ -155,14 +155,14 @@ class LTXV:
):
self.mixed_precision_transformer = mixed_precision_transformer
# ckpt_path = Path(ckpt_path)
self.distilled = "distilled" in model_filepath[0]
# with safe_open(ckpt_path, framework="pt") as f:
# metadata = f.metadata()
# config_str = metadata.get("config")
# configs = json.loads(config_str)
# allowed_inference_steps = configs.get("allowed_inference_steps", None)
# transformer = Transformer3DModel.from_pretrained(ckpt_path)
# offload.save_model(transformer, "ckpts/ltxv_0.9.7_13B_dev_bf16.safetensors", config_file_path="config_transformer.json")
# transformer = offload.fast_load_transformers_model("c:/temp/ltxdistilled/diffusion_pytorch_model-00001-of-00006.safetensors", forcedConfigPath="c:/temp/ltxdistilled/config.json")
# vae = CausalVideoAutoencoder.from_pretrained(ckpt_path)
vae = offload.fast_load_transformers_model("ckpts/ltxv_0.9.7_VAE.safetensors", modelClass=CausalVideoAutoencoder)
@ -174,8 +174,11 @@ class LTXV:
# vae = offload.fast_load_transformers_model("vae.safetensors", modelClass=CausalVideoAutoencoder, modelPrefix= "vae", forcedConfigPath="config_vae.json")
# offload.save_model(vae, "vae.safetensors", config_file_path="config_vae.json")
transformer = offload.fast_load_transformers_model(model_filepath, modelClass=Transformer3DModel)
# model_filepath = "c:/temp/ltxd/ltxv-13b-0.9.7-distilled.safetensors"
transformer = offload.fast_load_transformers_model(model_filepath, modelClass=Transformer3DModel, forcedConfigPath= "c:/temp/ltxd/config.json")
# offload.save_model(transformer, "ckpts/ltxv_0.9.7_13B_distilled_bf16.safetensors", config_file_path= "c:/temp/ltxd/config.json")
# offload.save_model(transformer, "ckpts/ltxv_0.9.7_13B_distilled_quanto_bf16_int8.safetensors", do_quantize= True, config_file_path="c:/temp/ltxd/config.json")
# transformer = offload.fast_load_transformers_model(model_filepath, modelClass=Transformer3DModel)
transformer._model_dtype = dtype
if mixed_precision_transformer:
transformer._lock_dtype = torch.float
@ -295,6 +298,9 @@ class LTXV:
conditioning_media_paths = None
conditioning_start_frames = None
if self.distilled :
pipeline_config = "ltx_video/configs/ltxv-13b-0.9.7-distilled.yaml"
else:
pipeline_config = "ltx_video/configs/ltxv-13b-0.9.7-dev.yaml"
# check if pipeline_config is a file
if not os.path.isfile(pipeline_config):

View File

@ -1,4 +1,4 @@
# import av
import av
import torch
import io
import numpy as np

View File

@ -32,4 +32,5 @@ hydra-core
librosa
loguru
sentencepiece
av
# rembg==2.0.65

View File

@ -80,9 +80,9 @@ class WanT2V:
logging.info(f"Creating WanModel from {model_filename[-1]}")
from mmgp import offload
# model_filename = "c:/temp/vace/diffusion_pytorch_model-00001-of-00007.safetensors"
# model_filename = "c:/temp/vace1.3/diffusion_pytorch_model.safetensors"
# model_filename = "vace14B_quanto_bf16_int8.safetensors"
self.model = offload.fast_load_transformers_model(model_filename, modelClass=WanModel,do_quantize= quantizeTransformer, writable_tensors= False) # , forcedConfigPath= "c:/temp/vace/vace_config.json")
self.model = offload.fast_load_transformers_model(model_filename, modelClass=WanModel,do_quantize= quantizeTransformer, writable_tensors= False , forcedConfigPath= "c:/temp/vace1.3/config.json")
# offload.load_model_data(self.model, "e:/vace.safetensors")
# offload.load_model_data(self.model, "c:/temp/Phantom-Wan-1.3B.pth")
# self.model.to(torch.bfloat16)
@ -90,7 +90,7 @@ class WanT2V:
self.model.lock_layers_dtypes(torch.float32 if mixed_precision_transformer else dtype)
# dtype = torch.bfloat16
offload.change_dtype(self.model, dtype, True)
# offload.save_model(self.model, "vace14B_bf16.safetensors", config_file_path="c:/temp/vace/vace_config.json")
# offload.save_model(self.model, "wan2.1_Vace1.3B_mbf16.safetensors", config_file_path="c:/temp/vace1.3/config.json")
# offload.save_model(self.model, "vace14B_quanto_fp16_int8.safetensors", do_quantize= True, config_file_path="c:/temp/vace/vace_config.json")
self.model.eval().requires_grad_(False)

14
wgp.py
View File

@ -1528,7 +1528,7 @@ wan_choices_i2v=["ckpts/wan2.1_image2video_480p_14B_mbf16.safetensors", "ckpts/w
"ckpts/wan2.1_image2video_720p_14B_quanto_mbf16_int8.safetensors", "ckpts/wan2.1_Fun_InP_1.3B_bf16.safetensors", "ckpts/wan2.1_Fun_InP_14B_bf16.safetensors",
"ckpts/wan2.1_Fun_InP_14B_quanto_int8.safetensors", "ckpts/wan2.1_FLF2V_720p_14B_bf16.safetensors", "ckpts/wan2.1_FLF2V_720p_14B_quanto_int8.safetensors",
"ckpts/wan2.1_fantasy_speaking_14B_bf16.safetensors"]
ltxv_choices= ["ckpts/ltxv_0.9.7_13B_dev_bf16.safetensors", "ckpts/ltxv_0.9.7_13B_dev_quanto_bf16_int8.safetensors"]
ltxv_choices= ["ckpts/ltxv_0.9.7_13B_dev_bf16.safetensors", "ckpts/ltxv_0.9.7_13B_dev_quanto_bf16_int8.safetensors", "ckpts/ltxv_0.9.7_13B_distilled_bf16.safetensors", "ckpts/ltxv_0.9.7_13B_distilled_quanto_bf16_int8.safetensors"]
hunyuan_choices= ["ckpts/hunyuan_video_720_bf16.safetensors", "ckpts/hunyuan_video_720_quanto_int8.safetensors", "ckpts/hunyuan_video_i2v_720_bf16.safetensors", "ckpts/hunyuan_video_i2v_720_quanto_int8v2.safetensors",
"ckpts/hunyuan_video_custom_720_bf16.safetensors", "ckpts/hunyuan_video_custom_720_quanto_bf16_int8.safetensors" ]
@ -1539,12 +1539,12 @@ def get_dependent_models(model_filename, quantization, dtype_policy ):
return [get_model_filename("i2v_720p", quantization, dtype_policy)]
else:
return []
model_types = [ "t2v_1.3B", "vace_1.3B", "fun_inp_1.3B", "t2v", "i2v", "i2v_720p", "vace_14B", "fun_inp", "recam_1.3B", "flf2v_720p", "sky_df_1.3B", "sky_df_14B", "sky_df_720p_14B", "phantom_1.3B", "fantasy", "ltxv_13B", "hunyuan", "hunyuan_i2v", "hunyuan_custom"]
model_types = [ "t2v_1.3B", "vace_1.3B", "fun_inp_1.3B", "t2v", "i2v", "i2v_720p", "vace_14B", "fun_inp", "recam_1.3B", "flf2v_720p", "sky_df_1.3B", "sky_df_14B", "sky_df_720p_14B", "phantom_1.3B", "fantasy", "ltxv_13B", "ltxv_13B_distilled", "hunyuan", "hunyuan_i2v", "hunyuan_custom"]
model_signatures = {"t2v": "text2video_14B", "t2v_1.3B" : "text2video_1.3B", "fun_inp_1.3B" : "Fun_InP_1.3B", "fun_inp" : "Fun_InP_14B",
"i2v" : "image2video_480p", "i2v_720p" : "image2video_720p" , "vace_1.3B" : "Vace_1.3B", "vace_14B" : "Vace_14B","recam_1.3B": "recammaster_1.3B",
"flf2v_720p" : "FLF2V_720p", "sky_df_1.3B" : "sky_reels2_diffusion_forcing_1.3B", "sky_df_14B" : "sky_reels2_diffusion_forcing_14B",
"sky_df_720p_14B" : "sky_reels2_diffusion_forcing_720p_14B",
"phantom_1.3B" : "phantom_1.3B", "fantasy" : "fantasy", "ltxv_13B" : "ltxv_0.9.7_13B", "hunyuan" : "hunyuan_video_720", "hunyuan_i2v" : "hunyuan_video_i2v_720", "hunyuan_custom" : "hunyuan_video_custom" }
"phantom_1.3B" : "phantom_1.3B", "fantasy" : "fantasy", "ltxv_13B" : "ltxv_0.9.7_13B_dev", "ltxv_13B_distilled" : "ltxv_0.9.7_13B_distilled", "hunyuan" : "hunyuan_video_720", "hunyuan_i2v" : "hunyuan_video_i2v_720", "hunyuan_custom" : "hunyuan_video_custom" }
def get_model_type(model_filename):
@ -1606,10 +1606,12 @@ def get_model_name(model_filename, description_container = [""]):
model_name = "Wan2.1 Fantasy Speaking 720p"
model_name += " 14B" if "14B" in model_filename else " 1.3B"
description = "The Fantasy Speaking model corresponds to the original Wan image 2 video model combined with the Fantasy Speaking extension to process an audio Input."
elif "ltxv" in model_filename:
model_name = "LTX Video"
model_name += " 0.9.7 13B" if "13B" in model_filename else " 0.9.6 2B"
elif "ltxv_0.9.7_13B_dev" in model_filename:
model_name = "LTX Video 0.9.7"
description = "LTX Video is a fast model that can be used to generate long videos (up to 260 frames).It is recommended to keep the number of steps to 30 or you will need to update the file 'ltxv_video/configs/ltxv-13b-0.9.7-dev.yaml'.The LTX Video model expects very long prompt, so don't hesitate to use the Prompt Enhancer."
elif "ltxv_0.9.7_13B_distilled" in model_filename:
model_name = "LTX Video 0.9.7 distilled"
description = "LTX Video is a fast model that can be used to generate long videos (up to 260 frames).This is the distilled / fast version. The LTX Video model expects very long prompt, so don't hesitate to use the Prompt Enhancer."
elif "hunyuan_video_720" in model_filename:
model_name = "Hunyuan Video text2video 720p"
description = "Probably the best text 2 video model available."