mirror of
https://github.com/Wan-Video/Wan2.1.git
synced 2025-11-04 14:16:57 +00:00
129 lines
5.4 KiB
Python
129 lines
5.4 KiB
Python
import torch
|
|
import gradio as gr
|
|
|
|
|
|
def get_qwen_text_encoder_filename(text_encoder_quantization):
|
|
text_encoder_filename = "ckpts/Qwen2.5-VL-7B-Instruct/Qwen2.5-VL-7B-Instruct_bf16.safetensors"
|
|
if text_encoder_quantization =="int8":
|
|
text_encoder_filename = text_encoder_filename.replace("bf16", "quanto_bf16_int8")
|
|
return text_encoder_filename
|
|
|
|
class family_handler():
|
|
@staticmethod
|
|
def query_model_def(base_model_type, model_def):
|
|
extra_model_def = {
|
|
"image_outputs" : True,
|
|
"sample_solvers":[
|
|
("Default", "default"),
|
|
("Lightning", "lightning")],
|
|
"guidance_max_phases" : 1,
|
|
"lock_image_refs_ratios": True,
|
|
}
|
|
|
|
if base_model_type in ["qwen_image_edit_20B"]:
|
|
extra_model_def["inpaint_support"] = True
|
|
extra_model_def["image_ref_choices"] = {
|
|
"choices": [
|
|
("None", ""),
|
|
("Conditional Images is first Main Subject / Landscape and may be followed by People / Objects", "KI"),
|
|
("Conditional Images are People / Objects", "I"),
|
|
],
|
|
"letters_filter": "KI",
|
|
}
|
|
extra_model_def["background_removal_label"]= "Remove Backgrounds only behind People / Objects except main Subject / Landscape"
|
|
extra_model_def["video_guide_outpainting"] = [2]
|
|
extra_model_def["model_modes"] = {
|
|
"choices": [
|
|
("Lora Inpainting: Inpainted area completely unrelated to occulted content", 1),
|
|
("Masked Denoising : Inpainted area may reuse some content that has been occulted", 0),
|
|
],
|
|
"default": 1,
|
|
"label" : "Inpainting Method",
|
|
"image_modes" : [2],
|
|
}
|
|
|
|
return extra_model_def
|
|
|
|
@staticmethod
|
|
def query_supported_types():
|
|
return ["qwen_image_20B", "qwen_image_edit_20B"]
|
|
|
|
@staticmethod
|
|
def query_family_maps():
|
|
return {}, {}
|
|
|
|
@staticmethod
|
|
def query_model_family():
|
|
return "qwen"
|
|
|
|
@staticmethod
|
|
def query_family_infos():
|
|
return {"qwen":(40, "Qwen")}
|
|
|
|
@staticmethod
|
|
def query_model_files(computeList, base_model_type, model_filename, text_encoder_quantization):
|
|
text_encoder_filename = get_qwen_text_encoder_filename(text_encoder_quantization)
|
|
return {
|
|
"repoId" : "DeepBeepMeep/Qwen_image",
|
|
"sourceFolderList" : ["", "Qwen2.5-VL-7B-Instruct"],
|
|
"fileList" : [ ["qwen_vae.safetensors", "qwen_vae_config.json"], ["merges.txt", "tokenizer_config.json", "config.json", "vocab.json", "video_preprocessor_config.json", "preprocessor_config.json"] + computeList(text_encoder_filename) ]
|
|
}
|
|
|
|
@staticmethod
|
|
def load_model(model_filename, model_type, base_model_type, model_def, quantizeTransformer = False, text_encoder_quantization = None, dtype = torch.bfloat16, VAE_dtype = torch.float32, mixed_precision_transformer = False, save_quantized = False, submodel_no_list = None):
|
|
from .qwen_main import model_factory
|
|
from mmgp import offload
|
|
|
|
pipe_processor = model_factory(
|
|
checkpoint_dir="ckpts",
|
|
model_filename=model_filename,
|
|
model_type = model_type,
|
|
model_def = model_def,
|
|
base_model_type=base_model_type,
|
|
text_encoder_filename= get_qwen_text_encoder_filename(text_encoder_quantization),
|
|
quantizeTransformer = quantizeTransformer,
|
|
dtype = dtype,
|
|
VAE_dtype = VAE_dtype,
|
|
mixed_precision_transformer = mixed_precision_transformer,
|
|
save_quantized = save_quantized
|
|
)
|
|
|
|
pipe = {"tokenizer" : pipe_processor.tokenizer, "transformer" : pipe_processor.transformer, "text_encoder" : pipe_processor.text_encoder, "vae" : pipe_processor.vae}
|
|
|
|
return pipe_processor, pipe
|
|
|
|
|
|
@staticmethod
|
|
def fix_settings(base_model_type, settings_version, model_def, ui_defaults):
|
|
if ui_defaults.get("sample_solver", "") == "":
|
|
ui_defaults["sample_solver"] = "default"
|
|
|
|
if settings_version < 2.32:
|
|
ui_defaults["denoising_strength"] = 1.
|
|
|
|
@staticmethod
|
|
def update_default_settings(base_model_type, model_def, ui_defaults):
|
|
ui_defaults.update({
|
|
"guidance_scale": 4,
|
|
"sample_solver": "default",
|
|
})
|
|
if base_model_type in ["qwen_image_edit_20B"]:
|
|
ui_defaults.update({
|
|
"video_prompt_type": "KI",
|
|
"denoising_strength" : 1.,
|
|
"model_mode" : 0,
|
|
})
|
|
|
|
def validate_generative_settings(base_model_type, model_def, inputs):
|
|
if base_model_type in ["qwen_image_edit_20B"]:
|
|
model_mode = inputs["model_mode"]
|
|
denoising_strength= inputs["denoising_strength"]
|
|
video_guide_outpainting= inputs["video_guide_outpainting"]
|
|
from wgp import get_outpainting_dims
|
|
outpainting_dims = get_outpainting_dims(video_guide_outpainting)
|
|
|
|
if denoising_strength < 1 and model_mode == 1:
|
|
gr.Info("Denoising Strength will be ignored while using Lora Inpainting")
|
|
if outpainting_dims is not None and model_mode == 0 :
|
|
return "Outpainting is not supported with Masked Denoising "
|