mirror of
https://github.com/Wan-Video/Wan2.1.git
synced 2025-11-04 14:16:57 +00:00
Flux Festival
This commit is contained in:
parent
119162373a
commit
9fa267087b
@ -20,7 +20,7 @@ WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models
|
|||||||
**Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep
|
**Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep
|
||||||
|
|
||||||
## 🔥 Latest Updates :
|
## 🔥 Latest Updates :
|
||||||
### September 5 2025: WanGP v8.5 - Wanna be a Cropper or a Painter ?
|
### September 11 2025: WanGP v8.5/8.55 - Wanna be a Cropper or a Painter ?
|
||||||
|
|
||||||
I have done some intensive internal refactoring of the generation pipeline to ease support of existing models or add new models. Nothing really visible but this makes WanGP is little more future proof.
|
I have done some intensive internal refactoring of the generation pipeline to ease support of existing models or add new models. Nothing really visible but this makes WanGP is little more future proof.
|
||||||
|
|
||||||
@ -38,6 +38,13 @@ Doing more sophisticated thing Vace Image Editor works very well too: try Image
|
|||||||
|
|
||||||
For the best quality I recommend to set in *Quality Tab* the option: "*Generate a 9 Frames Long video...*"
|
For the best quality I recommend to set in *Quality Tab* the option: "*Generate a 9 Frames Long video...*"
|
||||||
|
|
||||||
|
**update 8.55**: Flux Festival
|
||||||
|
- **Inpainting Mode** also added for *Flux Kontext*
|
||||||
|
- **Flux SRPO** : new finetune with x3 better quality vs Flux Dev according to its authors. I have also created a *Flux SRPO USO* finetune which is certainly the best open source *Style Transfer* tool available
|
||||||
|
- **Flux UMO**: model specialized in combining multiple reference objects / people together. Works quite well at 768x768
|
||||||
|
|
||||||
|
Good luck with finding your way through all the Flux models names !
|
||||||
|
|
||||||
### September 5 2025: WanGP v8.4 - Take me to Outer Space
|
### September 5 2025: WanGP v8.4 - Take me to Outer Space
|
||||||
You have probably seen these short AI generated movies created using *Nano Banana* and the *First Frame - Last Frame* feature of *Kling 2.0*. The idea is to generate an image, modify a part of it with Nano Banana and give the these two images to Kling that will generate the Video between these two images, use now the previous Last Frame as the new First Frame, rinse and repeat and you get a full movie.
|
You have probably seen these short AI generated movies created using *Nano Banana* and the *First Frame - Last Frame* feature of *Kling 2.0*. The idea is to generate an image, modify a part of it with Nano Banana and give the these two images to Kling that will generate the Video between these two images, use now the previous Last Frame as the new First Frame, rinse and repeat and you get a full movie.
|
||||||
|
|
||||||
|
|||||||
24
defaults/flux_dev_umo.json
Normal file
24
defaults/flux_dev_umo.json
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
"model": {
|
||||||
|
"name": "Flux 1 Dev UMO 12B",
|
||||||
|
"architecture": "flux",
|
||||||
|
"description": "FLUX.1 Dev UMO is a model that can Edit Images with a specialization in combining multiple image references (resized internally at 512x512 max) to produce an Image output. Best Image preservation at 768x768 Resolution Output.",
|
||||||
|
"URLs": "flux",
|
||||||
|
"flux-model": "flux-dev-umo",
|
||||||
|
"loras": ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-UMO_dit_lora_bf16.safetensors"],
|
||||||
|
"resolutions": [ ["1024x1024 (1:1)", "1024x1024"],
|
||||||
|
["768x1024 (3:4)", "768x1024"],
|
||||||
|
["1024x768 (4:3)", "1024x768"],
|
||||||
|
["512x1024 (1:2)", "512x1024"],
|
||||||
|
["1024x512 (2:1)", "1024x512"],
|
||||||
|
["768x768 (1:1)", "768x768"],
|
||||||
|
["768x512 (3:2)", "768x512"],
|
||||||
|
["512x768 (2:3)", "512x768"]]
|
||||||
|
},
|
||||||
|
"prompt": "the man is wearing a hat",
|
||||||
|
"embedded_guidance_scale": 4,
|
||||||
|
"resolution": "768x768",
|
||||||
|
"batch_size": 1
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2,7 +2,7 @@
|
|||||||
"model": {
|
"model": {
|
||||||
"name": "Flux 1 Dev USO 12B",
|
"name": "Flux 1 Dev USO 12B",
|
||||||
"architecture": "flux",
|
"architecture": "flux",
|
||||||
"description": "FLUX.1 Dev USO is a model specialized to Edit Images with a specialization in Style Transfers (up to two).",
|
"description": "FLUX.1 Dev USO is a model that can Edit Images with a specialization in Style Transfers (up to two).",
|
||||||
"modules": [ ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-USO_projector_bf16.safetensors"]],
|
"modules": [ ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-USO_projector_bf16.safetensors"]],
|
||||||
"URLs": "flux",
|
"URLs": "flux",
|
||||||
"loras": ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-USO_dit_lora_bf16.safetensors"],
|
"loras": ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-USO_dit_lora_bf16.safetensors"],
|
||||||
|
|||||||
15
defaults/flux_srpo.json
Normal file
15
defaults/flux_srpo.json
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"model": {
|
||||||
|
"name": "Flux 1 SRPO Dev 12B",
|
||||||
|
"architecture": "flux",
|
||||||
|
"description": "By fine-tuning the FLUX.1.dev model with optimized denoising and online reward adjustment, SRPO improves its human-evaluated realism and aesthetic quality by over 3x.",
|
||||||
|
"URLs": [
|
||||||
|
"https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-srpo-dev_bf16.safetensors",
|
||||||
|
"https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-srpo-dev_quanto_bf16_int8.safetensors"
|
||||||
|
],
|
||||||
|
"flux-model": "flux-dev"
|
||||||
|
},
|
||||||
|
"prompt": "draw a hat",
|
||||||
|
"resolution": "1024x1024",
|
||||||
|
"batch_size": 1
|
||||||
|
}
|
||||||
17
defaults/flux_srpo_uso.json
Normal file
17
defaults/flux_srpo_uso.json
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
{
|
||||||
|
"model": {
|
||||||
|
"name": "Flux 1 SRPO USO 12B",
|
||||||
|
"architecture": "flux",
|
||||||
|
"description": "FLUX.1 SRPO USO is a model that can Edit Images with a specialization in Style Transfers (up to two). It leverages the improved Image quality brought by the SRPO process",
|
||||||
|
"modules": [ "flux_dev_uso"],
|
||||||
|
"URLs": "flux_srpo",
|
||||||
|
"loras": "flux_dev_uso",
|
||||||
|
"flux-model": "flux-dev-uso"
|
||||||
|
},
|
||||||
|
"prompt": "the man is wearing a hat",
|
||||||
|
"embedded_guidance_scale": 4,
|
||||||
|
"resolution": "1024x1024",
|
||||||
|
"batch_size": 1
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -13,6 +13,7 @@ class family_handler():
|
|||||||
flux_schnell = flux_model == "flux-schnell"
|
flux_schnell = flux_model == "flux-schnell"
|
||||||
flux_chroma = flux_model == "flux-chroma"
|
flux_chroma = flux_model == "flux-chroma"
|
||||||
flux_uso = flux_model == "flux-dev-uso"
|
flux_uso = flux_model == "flux-dev-uso"
|
||||||
|
flux_umo = flux_model == "flux-dev-umo"
|
||||||
flux_kontext = flux_model == "flux-dev-kontext"
|
flux_kontext = flux_model == "flux-dev-kontext"
|
||||||
|
|
||||||
extra_model_def = {
|
extra_model_def = {
|
||||||
@ -35,6 +36,7 @@ class family_handler():
|
|||||||
}
|
}
|
||||||
|
|
||||||
if flux_kontext:
|
if flux_kontext:
|
||||||
|
extra_model_def["inpaint_support"] = True
|
||||||
extra_model_def["image_ref_choices"] = {
|
extra_model_def["image_ref_choices"] = {
|
||||||
"choices": [
|
"choices": [
|
||||||
("None", ""),
|
("None", ""),
|
||||||
@ -43,6 +45,15 @@ class family_handler():
|
|||||||
],
|
],
|
||||||
"letters_filter": "KI",
|
"letters_filter": "KI",
|
||||||
}
|
}
|
||||||
|
extra_model_def["background_removal_label"]= "Remove Backgrounds only behind People / Objects except main Subject / Landscape"
|
||||||
|
elif flux_umo:
|
||||||
|
extra_model_def["image_ref_choices"] = {
|
||||||
|
"choices": [
|
||||||
|
("Conditional Images are People / Objects", "I"),
|
||||||
|
],
|
||||||
|
"letters_filter": "I",
|
||||||
|
"visible": False
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
extra_model_def["lock_image_refs_ratios"] = True
|
extra_model_def["lock_image_refs_ratios"] = True
|
||||||
@ -131,10 +142,14 @@ class family_handler():
|
|||||||
video_prompt_type = video_prompt_type.replace("I", "KI")
|
video_prompt_type = video_prompt_type.replace("I", "KI")
|
||||||
ui_defaults["video_prompt_type"] = video_prompt_type
|
ui_defaults["video_prompt_type"] = video_prompt_type
|
||||||
|
|
||||||
|
if settings_version < 2.34:
|
||||||
|
ui_defaults["denoising_strength"] = 1.
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def update_default_settings(base_model_type, model_def, ui_defaults):
|
def update_default_settings(base_model_type, model_def, ui_defaults):
|
||||||
flux_model = model_def.get("flux-model", "flux-dev")
|
flux_model = model_def.get("flux-model", "flux-dev")
|
||||||
flux_uso = flux_model == "flux-dev-uso"
|
flux_uso = flux_model == "flux-dev-uso"
|
||||||
|
flux_umo = flux_model == "flux-dev-umo"
|
||||||
flux_kontext = flux_model == "flux-dev-kontext"
|
flux_kontext = flux_model == "flux-dev-kontext"
|
||||||
ui_defaults.update({
|
ui_defaults.update({
|
||||||
"embedded_guidance": 2.5,
|
"embedded_guidance": 2.5,
|
||||||
@ -143,5 +158,12 @@ class family_handler():
|
|||||||
if flux_kontext or flux_uso:
|
if flux_kontext or flux_uso:
|
||||||
ui_defaults.update({
|
ui_defaults.update({
|
||||||
"video_prompt_type": "KI",
|
"video_prompt_type": "KI",
|
||||||
|
"denoising_strength": 1.,
|
||||||
|
})
|
||||||
|
elif flux_umo:
|
||||||
|
ui_defaults.update({
|
||||||
|
"video_prompt_type": "I",
|
||||||
|
"remove_background_images_ref": 0,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -23,6 +23,35 @@ from .util import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
def preprocess_ref(raw_image: Image.Image, long_size: int = 512):
|
||||||
|
# 获取原始图像的宽度和高度
|
||||||
|
image_w, image_h = raw_image.size
|
||||||
|
|
||||||
|
# 计算长边和短边
|
||||||
|
if image_w >= image_h:
|
||||||
|
new_w = long_size
|
||||||
|
new_h = int((long_size / image_w) * image_h)
|
||||||
|
else:
|
||||||
|
new_h = long_size
|
||||||
|
new_w = int((long_size / image_h) * image_w)
|
||||||
|
|
||||||
|
# 按新的宽高进行等比例缩放
|
||||||
|
raw_image = raw_image.resize((new_w, new_h), resample=Image.LANCZOS)
|
||||||
|
target_w = new_w // 16 * 16
|
||||||
|
target_h = new_h // 16 * 16
|
||||||
|
|
||||||
|
# 计算裁剪的起始坐标以实现中心裁剪
|
||||||
|
left = (new_w - target_w) // 2
|
||||||
|
top = (new_h - target_h) // 2
|
||||||
|
right = left + target_w
|
||||||
|
bottom = top + target_h
|
||||||
|
|
||||||
|
# 进行中心裁剪
|
||||||
|
raw_image = raw_image.crop((left, top, right, bottom))
|
||||||
|
|
||||||
|
# 转换为 RGB 模式
|
||||||
|
raw_image = raw_image.convert("RGB")
|
||||||
|
return raw_image
|
||||||
|
|
||||||
def stitch_images(img1, img2):
|
def stitch_images(img1, img2):
|
||||||
# Resize img2 to match img1's height
|
# Resize img2 to match img1's height
|
||||||
@ -67,7 +96,7 @@ class model_factory:
|
|||||||
# self.name= "flux-schnell"
|
# self.name= "flux-schnell"
|
||||||
source = model_def.get("source", None)
|
source = model_def.get("source", None)
|
||||||
self.model = load_flow_model(self.name, model_filename[0] if source is None else source, torch_device)
|
self.model = load_flow_model(self.name, model_filename[0] if source is None else source, torch_device)
|
||||||
|
self.model_def = model_def
|
||||||
self.vae = load_ae(self.name, device=torch_device)
|
self.vae = load_ae(self.name, device=torch_device)
|
||||||
|
|
||||||
siglip_processor = siglip_model = feature_embedder = None
|
siglip_processor = siglip_model = feature_embedder = None
|
||||||
@ -113,6 +142,8 @@ class model_factory:
|
|||||||
n_prompt: str = None,
|
n_prompt: str = None,
|
||||||
sampling_steps: int = 20,
|
sampling_steps: int = 20,
|
||||||
input_ref_images = None,
|
input_ref_images = None,
|
||||||
|
image_guide= None,
|
||||||
|
image_mask= None,
|
||||||
width= 832,
|
width= 832,
|
||||||
height=480,
|
height=480,
|
||||||
embedded_guidance_scale: float = 2.5,
|
embedded_guidance_scale: float = 2.5,
|
||||||
@ -124,6 +155,7 @@ class model_factory:
|
|||||||
video_prompt_type = "",
|
video_prompt_type = "",
|
||||||
joint_pass = False,
|
joint_pass = False,
|
||||||
image_refs_relative_size = 100,
|
image_refs_relative_size = 100,
|
||||||
|
denoising_strength = 1.,
|
||||||
**bbargs
|
**bbargs
|
||||||
):
|
):
|
||||||
if self._interrupt:
|
if self._interrupt:
|
||||||
@ -132,8 +164,16 @@ class model_factory:
|
|||||||
if n_prompt is None or len(n_prompt) == 0: n_prompt = "low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors"
|
if n_prompt is None or len(n_prompt) == 0: n_prompt = "low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors"
|
||||||
device="cuda"
|
device="cuda"
|
||||||
flux_dev_uso = self.name in ['flux-dev-uso']
|
flux_dev_uso = self.name in ['flux-dev-uso']
|
||||||
image_stiching = not self.name in ['flux-dev-uso'] #and False
|
flux_dev_umo = self.name in ['flux-dev-umo']
|
||||||
|
latent_stiching = self.name in ['flux-dev-uso', 'flux-dev-umo']
|
||||||
|
|
||||||
|
lock_dimensions= False
|
||||||
|
|
||||||
input_ref_images = [] if input_ref_images is None else input_ref_images[:]
|
input_ref_images = [] if input_ref_images is None else input_ref_images[:]
|
||||||
|
if flux_dev_umo:
|
||||||
|
ref_long_side = 512 if len(input_ref_images) <= 1 else 320
|
||||||
|
input_ref_images = [preprocess_ref(img, ref_long_side) for img in input_ref_images]
|
||||||
|
lock_dimensions = True
|
||||||
ref_style_imgs = []
|
ref_style_imgs = []
|
||||||
if "I" in video_prompt_type and len(input_ref_images) > 0:
|
if "I" in video_prompt_type and len(input_ref_images) > 0:
|
||||||
if flux_dev_uso :
|
if flux_dev_uso :
|
||||||
@ -143,22 +183,26 @@ class model_factory:
|
|||||||
elif len(input_ref_images) > 1 :
|
elif len(input_ref_images) > 1 :
|
||||||
ref_style_imgs = input_ref_images[-1:]
|
ref_style_imgs = input_ref_images[-1:]
|
||||||
input_ref_images = input_ref_images[:-1]
|
input_ref_images = input_ref_images[:-1]
|
||||||
if image_stiching:
|
|
||||||
|
if latent_stiching:
|
||||||
|
# latents stiching with resize
|
||||||
|
if not lock_dimensions :
|
||||||
|
for i in range(len(input_ref_images)):
|
||||||
|
w, h = input_ref_images[i].size
|
||||||
|
image_height, image_width = calculate_new_dimensions(int(height*image_refs_relative_size/100), int(width*image_refs_relative_size/100), h, w, 0)
|
||||||
|
input_ref_images[i] = input_ref_images[i].resize((image_width, image_height), resample=Image.Resampling.LANCZOS)
|
||||||
|
else:
|
||||||
# image stiching method
|
# image stiching method
|
||||||
stiched = input_ref_images[0]
|
stiched = input_ref_images[0]
|
||||||
for new_img in input_ref_images[1:]:
|
for new_img in input_ref_images[1:]:
|
||||||
stiched = stitch_images(stiched, new_img)
|
stiched = stitch_images(stiched, new_img)
|
||||||
input_ref_images = [stiched]
|
input_ref_images = [stiched]
|
||||||
else:
|
elif image_guide is not None:
|
||||||
# latents stiching with resize
|
input_ref_images = [image_guide]
|
||||||
for i in range(len(input_ref_images)):
|
|
||||||
w, h = input_ref_images[i].size
|
|
||||||
image_height, image_width = calculate_new_dimensions(int(height*image_refs_relative_size/100), int(width*image_refs_relative_size/100), h, w, fit_into_canvas)
|
|
||||||
input_ref_images[i] = input_ref_images[i].resize((image_width, image_height), resample=Image.Resampling.LANCZOS)
|
|
||||||
else:
|
else:
|
||||||
input_ref_images = None
|
input_ref_images = None
|
||||||
|
|
||||||
if flux_dev_uso :
|
if self.name in ['flux-dev-uso', 'flux-dev-umo'] :
|
||||||
inp, height, width = prepare_multi_ip(
|
inp, height, width = prepare_multi_ip(
|
||||||
ae=self.vae,
|
ae=self.vae,
|
||||||
img_cond_list=input_ref_images,
|
img_cond_list=input_ref_images,
|
||||||
@ -177,6 +221,7 @@ class model_factory:
|
|||||||
bs=batch_size,
|
bs=batch_size,
|
||||||
seed=seed,
|
seed=seed,
|
||||||
device=device,
|
device=device,
|
||||||
|
img_mask=image_mask,
|
||||||
)
|
)
|
||||||
|
|
||||||
inp.update(prepare_prompt(self.t5, self.clip, batch_size, input_prompt))
|
inp.update(prepare_prompt(self.t5, self.clip, batch_size, input_prompt))
|
||||||
@ -198,13 +243,19 @@ class model_factory:
|
|||||||
return unpack(x.float(), height, width)
|
return unpack(x.float(), height, width)
|
||||||
|
|
||||||
# denoise initial noise
|
# denoise initial noise
|
||||||
x = denoise(self.model, **inp, timesteps=timesteps, guidance=embedded_guidance_scale, real_guidance_scale =guide_scale, callback=callback, pipeline=self, loras_slists= loras_slists, unpack_latent = unpack_latent, joint_pass = joint_pass)
|
x = denoise(self.model, **inp, timesteps=timesteps, guidance=embedded_guidance_scale, real_guidance_scale =guide_scale, callback=callback, pipeline=self, loras_slists= loras_slists, unpack_latent = unpack_latent, joint_pass = joint_pass, denoising_strength = denoising_strength)
|
||||||
if x==None: return None
|
if x==None: return None
|
||||||
# decode latents to pixel space
|
# decode latents to pixel space
|
||||||
x = unpack_latent(x)
|
x = unpack_latent(x)
|
||||||
with torch.autocast(device_type=device, dtype=torch.bfloat16):
|
with torch.autocast(device_type=device, dtype=torch.bfloat16):
|
||||||
x = self.vae.decode(x)
|
x = self.vae.decode(x)
|
||||||
|
|
||||||
|
if image_mask is not None:
|
||||||
|
from shared.utils.utils import convert_image_to_tensor
|
||||||
|
img_msk_rebuilt = inp["img_msk_rebuilt"]
|
||||||
|
img= convert_image_to_tensor(image_guide)
|
||||||
|
x = img.squeeze(2) * (1 - img_msk_rebuilt) + x.to(img) * img_msk_rebuilt
|
||||||
|
|
||||||
x = x.clamp(-1, 1)
|
x = x.clamp(-1, 1)
|
||||||
x = x.transpose(0, 1)
|
x = x.transpose(0, 1)
|
||||||
return x
|
return x
|
||||||
|
|||||||
@ -190,6 +190,21 @@ class Flux(nn.Module):
|
|||||||
v = swap_scale_shift(v)
|
v = swap_scale_shift(v)
|
||||||
k = k.replace("norm_out.linear", "final_layer.adaLN_modulation.1")
|
k = k.replace("norm_out.linear", "final_layer.adaLN_modulation.1")
|
||||||
new_sd[k] = v
|
new_sd[k] = v
|
||||||
|
# elif not first_key.startswith("diffusion_model.") and not first_key.startswith("transformer."):
|
||||||
|
# for k,v in sd.items():
|
||||||
|
# if "double" in k:
|
||||||
|
# k = k.replace(".processor.proj_lora1.", ".img_attn.proj.lora_")
|
||||||
|
# k = k.replace(".processor.proj_lora2.", ".txt_attn.proj.lora_")
|
||||||
|
# k = k.replace(".processor.qkv_lora1.", ".img_attn.qkv.lora_")
|
||||||
|
# k = k.replace(".processor.qkv_lora2.", ".txt_attn.qkv.lora_")
|
||||||
|
# else:
|
||||||
|
# k = k.replace(".processor.qkv_lora.", ".linear1_qkv.lora_")
|
||||||
|
# k = k.replace(".processor.proj_lora.", ".linear2.lora_")
|
||||||
|
|
||||||
|
# k = "diffusion_model." + k
|
||||||
|
# new_sd[k] = v
|
||||||
|
# from mmgp import safetensors2
|
||||||
|
# safetensors2.torch_write_file(new_sd, "fff.safetensors")
|
||||||
else:
|
else:
|
||||||
new_sd = sd
|
new_sd = sd
|
||||||
return new_sd
|
return new_sd
|
||||||
|
|||||||
@ -138,10 +138,12 @@ def prepare_kontext(
|
|||||||
target_width: int | None = None,
|
target_width: int | None = None,
|
||||||
target_height: int | None = None,
|
target_height: int | None = None,
|
||||||
bs: int = 1,
|
bs: int = 1,
|
||||||
|
img_mask = None,
|
||||||
) -> tuple[dict[str, Tensor], int, int]:
|
) -> tuple[dict[str, Tensor], int, int]:
|
||||||
# load and encode the conditioning image
|
# load and encode the conditioning image
|
||||||
|
|
||||||
|
res_match_output = img_mask is not None
|
||||||
|
|
||||||
img_cond_seq = None
|
img_cond_seq = None
|
||||||
img_cond_seq_ids = None
|
img_cond_seq_ids = None
|
||||||
if img_cond_list == None: img_cond_list = []
|
if img_cond_list == None: img_cond_list = []
|
||||||
@ -150,7 +152,9 @@ def prepare_kontext(
|
|||||||
for cond_no, img_cond in enumerate(img_cond_list):
|
for cond_no, img_cond in enumerate(img_cond_list):
|
||||||
width, height = img_cond.size
|
width, height = img_cond.size
|
||||||
aspect_ratio = width / height
|
aspect_ratio = width / height
|
||||||
|
if res_match_output:
|
||||||
|
width, height = target_width, target_height
|
||||||
|
else:
|
||||||
# Kontext is trained on specific resolutions, using one of them is recommended
|
# Kontext is trained on specific resolutions, using one of them is recommended
|
||||||
_, width, height = min((abs(aspect_ratio - w / h), w, h) for w, h in PREFERED_KONTEXT_RESOLUTIONS)
|
_, width, height = min((abs(aspect_ratio - w / h), w, h) for w, h in PREFERED_KONTEXT_RESOLUTIONS)
|
||||||
width = 2 * int(width / 16)
|
width = 2 * int(width / 16)
|
||||||
@ -193,6 +197,19 @@ def prepare_kontext(
|
|||||||
"img_cond_seq": img_cond_seq,
|
"img_cond_seq": img_cond_seq,
|
||||||
"img_cond_seq_ids": img_cond_seq_ids,
|
"img_cond_seq_ids": img_cond_seq_ids,
|
||||||
}
|
}
|
||||||
|
if img_mask is not None:
|
||||||
|
from shared.utils.utils import convert_image_to_tensor, convert_tensor_to_image
|
||||||
|
# image_height, image_width = calculate_new_dimensions(ref_height, ref_width, image_height, image_width, False, block_size=multiple_of)
|
||||||
|
image_mask_latents = convert_image_to_tensor(img_mask.resize((target_width // 16, target_height // 16), resample=Image.Resampling.LANCZOS))
|
||||||
|
image_mask_latents = torch.where(image_mask_latents>-0.5, 1., 0. )[0:1]
|
||||||
|
image_mask_rebuilt = image_mask_latents.repeat_interleave(16, dim=-1).repeat_interleave(16, dim=-2).unsqueeze(0)
|
||||||
|
convert_tensor_to_image( image_mask_rebuilt.squeeze(0).repeat(3,1,1)).save("mmm.png")
|
||||||
|
image_mask_latents = image_mask_latents.reshape(1, -1, 1).to(device)
|
||||||
|
return_dict.update({
|
||||||
|
"img_msk_latents": image_mask_latents,
|
||||||
|
"img_msk_rebuilt": image_mask_rebuilt,
|
||||||
|
})
|
||||||
|
|
||||||
img = get_noise(
|
img = get_noise(
|
||||||
bs,
|
bs,
|
||||||
target_height,
|
target_height,
|
||||||
@ -264,6 +281,9 @@ def denoise(
|
|||||||
loras_slists=None,
|
loras_slists=None,
|
||||||
unpack_latent = None,
|
unpack_latent = None,
|
||||||
joint_pass= False,
|
joint_pass= False,
|
||||||
|
img_msk_latents = None,
|
||||||
|
img_msk_rebuilt = None,
|
||||||
|
denoising_strength = 1,
|
||||||
):
|
):
|
||||||
|
|
||||||
kwargs = {'pipeline': pipeline, 'callback': callback, "img_len" : img.shape[1], "siglip_embedding": siglip_embedding, "siglip_embedding_ids": siglip_embedding_ids}
|
kwargs = {'pipeline': pipeline, 'callback': callback, "img_len" : img.shape[1], "siglip_embedding": siglip_embedding, "siglip_embedding_ids": siglip_embedding_ids}
|
||||||
@ -271,6 +291,21 @@ def denoise(
|
|||||||
if callback != None:
|
if callback != None:
|
||||||
callback(-1, None, True)
|
callback(-1, None, True)
|
||||||
|
|
||||||
|
original_image_latents = None if img_cond_seq is None else img_cond_seq.clone()
|
||||||
|
|
||||||
|
morph, first_step = False, 0
|
||||||
|
if img_msk_latents is not None:
|
||||||
|
randn = torch.randn_like(original_image_latents)
|
||||||
|
if denoising_strength < 1.:
|
||||||
|
first_step = int(len(timesteps) * (1. - denoising_strength))
|
||||||
|
if not morph:
|
||||||
|
latent_noise_factor = timesteps[first_step]
|
||||||
|
latents = original_image_latents * (1.0 - latent_noise_factor) + randn * latent_noise_factor
|
||||||
|
img = latents.to(img)
|
||||||
|
latents = None
|
||||||
|
timesteps = timesteps[first_step:]
|
||||||
|
|
||||||
|
|
||||||
updated_num_steps= len(timesteps) -1
|
updated_num_steps= len(timesteps) -1
|
||||||
if callback != None:
|
if callback != None:
|
||||||
from shared.utils.loras_mutipliers import update_loras_slists
|
from shared.utils.loras_mutipliers import update_loras_slists
|
||||||
@ -280,10 +315,14 @@ def denoise(
|
|||||||
# this is ignored for schnell
|
# this is ignored for schnell
|
||||||
guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype)
|
guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype)
|
||||||
for i, (t_curr, t_prev) in enumerate(zip(timesteps[:-1], timesteps[1:])):
|
for i, (t_curr, t_prev) in enumerate(zip(timesteps[:-1], timesteps[1:])):
|
||||||
offload.set_step_no_for_lora(model, i)
|
offload.set_step_no_for_lora(model, first_step + i)
|
||||||
if pipeline._interrupt:
|
if pipeline._interrupt:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
if img_msk_latents is not None and denoising_strength <1. and i == first_step and morph:
|
||||||
|
latent_noise_factor = t_curr/1000
|
||||||
|
img = original_image_latents * (1.0 - latent_noise_factor) + img * latent_noise_factor
|
||||||
|
|
||||||
t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
|
t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
|
||||||
img_input = img
|
img_input = img
|
||||||
img_input_ids = img_ids
|
img_input_ids = img_ids
|
||||||
@ -333,6 +372,14 @@ def denoise(
|
|||||||
pred = neg_pred + real_guidance_scale * (pred - neg_pred)
|
pred = neg_pred + real_guidance_scale * (pred - neg_pred)
|
||||||
|
|
||||||
img += (t_prev - t_curr) * pred
|
img += (t_prev - t_curr) * pred
|
||||||
|
|
||||||
|
if img_msk_latents is not None:
|
||||||
|
latent_noise_factor = t_prev
|
||||||
|
# noisy_image = original_image_latents * (1.0 - latent_noise_factor) + torch.randn_like(original_image_latents) * latent_noise_factor
|
||||||
|
noisy_image = original_image_latents * (1.0 - latent_noise_factor) + randn * latent_noise_factor
|
||||||
|
img = noisy_image * (1-img_msk_latents) + img_msk_latents * img
|
||||||
|
noisy_image = None
|
||||||
|
|
||||||
if callback is not None:
|
if callback is not None:
|
||||||
preview = unpack_latent(img).transpose(0,1)
|
preview = unpack_latent(img).transpose(0,1)
|
||||||
callback(i, preview, False)
|
callback(i, preview, False)
|
||||||
|
|||||||
@ -640,6 +640,38 @@ configs = {
|
|||||||
shift_factor=0.1159,
|
shift_factor=0.1159,
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
"flux-dev-umo": ModelSpec(
|
||||||
|
repo_id="",
|
||||||
|
repo_flow="",
|
||||||
|
repo_ae="ckpts/flux_vae.safetensors",
|
||||||
|
params=FluxParams(
|
||||||
|
in_channels=64,
|
||||||
|
out_channels=64,
|
||||||
|
vec_in_dim=768,
|
||||||
|
context_in_dim=4096,
|
||||||
|
hidden_size=3072,
|
||||||
|
mlp_ratio=4.0,
|
||||||
|
num_heads=24,
|
||||||
|
depth=19,
|
||||||
|
depth_single_blocks=38,
|
||||||
|
axes_dim=[16, 56, 56],
|
||||||
|
theta=10_000,
|
||||||
|
qkv_bias=True,
|
||||||
|
guidance_embed=True,
|
||||||
|
eso= True,
|
||||||
|
),
|
||||||
|
ae_params=AutoEncoderParams(
|
||||||
|
resolution=256,
|
||||||
|
in_channels=3,
|
||||||
|
ch=128,
|
||||||
|
out_ch=3,
|
||||||
|
ch_mult=[1, 2, 4, 4],
|
||||||
|
num_res_blocks=2,
|
||||||
|
z_channels=16,
|
||||||
|
scale_factor=0.3611,
|
||||||
|
shift_factor=0.1159,
|
||||||
|
),
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -714,14 +714,14 @@ class QwenImagePipeline(): #DiffusionPipeline
|
|||||||
image_mask_latents = convert_image_to_tensor(image_mask.resize((width // 16, height // 16), resample=Image.Resampling.LANCZOS))
|
image_mask_latents = convert_image_to_tensor(image_mask.resize((width // 16, height // 16), resample=Image.Resampling.LANCZOS))
|
||||||
image_mask_latents = torch.where(image_mask_latents>-0.5, 1., 0. )[0:1]
|
image_mask_latents = torch.where(image_mask_latents>-0.5, 1., 0. )[0:1]
|
||||||
image_mask_rebuilt = image_mask_latents.repeat_interleave(16, dim=-1).repeat_interleave(16, dim=-2).unsqueeze(0)
|
image_mask_rebuilt = image_mask_latents.repeat_interleave(16, dim=-1).repeat_interleave(16, dim=-2).unsqueeze(0)
|
||||||
convert_tensor_to_image( image_mask_rebuilt.squeeze(0).repeat(3,1,1)).save("mmm.png")
|
# convert_tensor_to_image( image_mask_rebuilt.squeeze(0).repeat(3,1,1)).save("mmm.png")
|
||||||
image_mask_latents = image_mask_latents.reshape(1, -1, 1).to(device)
|
image_mask_latents = image_mask_latents.reshape(1, -1, 1).to(device)
|
||||||
|
|
||||||
prompt_image = image
|
prompt_image = image
|
||||||
if image.size != (image_width, image_height):
|
if image.size != (image_width, image_height):
|
||||||
image = image.resize((image_width, image_height), resample=Image.Resampling.LANCZOS)
|
image = image.resize((image_width, image_height), resample=Image.Resampling.LANCZOS)
|
||||||
|
|
||||||
image.save("nnn.png")
|
# image.save("nnn.png")
|
||||||
image = convert_image_to_tensor(image).unsqueeze(0).unsqueeze(2)
|
image = convert_image_to_tensor(image).unsqueeze(0).unsqueeze(2)
|
||||||
|
|
||||||
has_neg_prompt = negative_prompt is not None or (
|
has_neg_prompt = negative_prompt is not None or (
|
||||||
@ -811,12 +811,15 @@ class QwenImagePipeline(): #DiffusionPipeline
|
|||||||
negative_txt_seq_lens = (
|
negative_txt_seq_lens = (
|
||||||
negative_prompt_embeds_mask.sum(dim=1).tolist() if negative_prompt_embeds_mask is not None else None
|
negative_prompt_embeds_mask.sum(dim=1).tolist() if negative_prompt_embeds_mask is not None else None
|
||||||
)
|
)
|
||||||
morph = False
|
morph, first_step = False, 0
|
||||||
if image_mask_latents is not None and denoising_strength <= 1.:
|
if image_mask_latents is not None:
|
||||||
|
randn = torch.randn_like(original_image_latents)
|
||||||
|
if denoising_strength < 1.:
|
||||||
first_step = int(len(timesteps) * (1. - denoising_strength))
|
first_step = int(len(timesteps) * (1. - denoising_strength))
|
||||||
if not morph:
|
if not morph:
|
||||||
latent_noise_factor = timesteps[first_step]/1000
|
latent_noise_factor = timesteps[first_step]/1000
|
||||||
latents = original_image_latents * (1.0 - latent_noise_factor) + torch.randn_like(original_image_latents) * latent_noise_factor
|
# latents = original_image_latents * (1.0 - latent_noise_factor) + torch.randn_like(original_image_latents) * latent_noise_factor
|
||||||
|
latents = original_image_latents * (1.0 - latent_noise_factor) + randn * latent_noise_factor
|
||||||
timesteps = timesteps[first_step:]
|
timesteps = timesteps[first_step:]
|
||||||
self.scheduler.timesteps = timesteps
|
self.scheduler.timesteps = timesteps
|
||||||
self.scheduler.sigmas= self.scheduler.sigmas[first_step:]
|
self.scheduler.sigmas= self.scheduler.sigmas[first_step:]
|
||||||
@ -831,6 +834,7 @@ class QwenImagePipeline(): #DiffusionPipeline
|
|||||||
|
|
||||||
|
|
||||||
for i, t in enumerate(timesteps):
|
for i, t in enumerate(timesteps):
|
||||||
|
offload.set_step_no_for_lora(self.transformer, first_step + i)
|
||||||
if self.interrupt:
|
if self.interrupt:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -905,7 +909,8 @@ class QwenImagePipeline(): #DiffusionPipeline
|
|||||||
if image_mask_latents is not None:
|
if image_mask_latents is not None:
|
||||||
next_t = timesteps[i+1] if i<len(timesteps)-1 else 0
|
next_t = timesteps[i+1] if i<len(timesteps)-1 else 0
|
||||||
latent_noise_factor = next_t / 1000
|
latent_noise_factor = next_t / 1000
|
||||||
noisy_image = original_image_latents * (1.0 - latent_noise_factor) + torch.randn_like(original_image_latents) * latent_noise_factor
|
# noisy_image = original_image_latents * (1.0 - latent_noise_factor) + torch.randn_like(original_image_latents) * latent_noise_factor
|
||||||
|
noisy_image = original_image_latents * (1.0 - latent_noise_factor) + randn * latent_noise_factor
|
||||||
latents = noisy_image * (1-image_mask_latents) + image_mask_latents * latents
|
latents = noisy_image * (1-image_mask_latents) + image_mask_latents * latents
|
||||||
noisy_image = None
|
noisy_image = None
|
||||||
|
|
||||||
|
|||||||
@ -28,6 +28,7 @@ class family_handler():
|
|||||||
],
|
],
|
||||||
"letters_filter": "KI",
|
"letters_filter": "KI",
|
||||||
}
|
}
|
||||||
|
extra_model_def["background_removal_label"]= "Remove Backgrounds only behind People / Objects except main Subject / Landscape"
|
||||||
|
|
||||||
return extra_model_def
|
return extra_model_def
|
||||||
|
|
||||||
|
|||||||
@ -846,7 +846,7 @@ class WanAny2V:
|
|||||||
for i, t in enumerate(tqdm(timesteps)):
|
for i, t in enumerate(tqdm(timesteps)):
|
||||||
guide_scale, guidance_switch_done, trans, denoising_extra = update_guidance(i, t, guide_scale, guide2_scale, guidance_switch_done, switch_threshold, trans, 2, denoising_extra)
|
guide_scale, guidance_switch_done, trans, denoising_extra = update_guidance(i, t, guide_scale, guide2_scale, guidance_switch_done, switch_threshold, trans, 2, denoising_extra)
|
||||||
guide_scale, guidance_switch2_done, trans, denoising_extra = update_guidance(i, t, guide_scale, guide3_scale, guidance_switch2_done, switch2_threshold, trans, 3, denoising_extra)
|
guide_scale, guidance_switch2_done, trans, denoising_extra = update_guidance(i, t, guide_scale, guide3_scale, guidance_switch2_done, switch2_threshold, trans, 3, denoising_extra)
|
||||||
offload.set_step_no_for_lora(trans, i)
|
offload.set_step_no_for_lora(trans, start_step_no + i)
|
||||||
timestep = torch.stack([t])
|
timestep = torch.stack([t])
|
||||||
|
|
||||||
if timestep_injection:
|
if timestep_injection:
|
||||||
|
|||||||
@ -165,6 +165,7 @@ class family_handler():
|
|||||||
}
|
}
|
||||||
|
|
||||||
extra_model_def["lock_image_refs_ratios"] = True
|
extra_model_def["lock_image_refs_ratios"] = True
|
||||||
|
extra_model_def["background_removal_label"]= "Remove Backgrounds behind People / Objects, keep it for Landscape or positioned Frames"
|
||||||
|
|
||||||
if base_model_type in ["standin"]:
|
if base_model_type in ["standin"]:
|
||||||
extra_model_def["lock_image_refs_ratios"] = True
|
extra_model_def["lock_image_refs_ratios"] = True
|
||||||
|
|||||||
34
wgp.py
34
wgp.py
@ -61,8 +61,8 @@ AUTOSAVE_FILENAME = "queue.zip"
|
|||||||
PROMPT_VARS_MAX = 10
|
PROMPT_VARS_MAX = 10
|
||||||
|
|
||||||
target_mmgp_version = "3.6.0"
|
target_mmgp_version = "3.6.0"
|
||||||
WanGP_version = "8.5"
|
WanGP_version = "8.55"
|
||||||
settings_version = 2.33
|
settings_version = 2.34
|
||||||
max_source_video_frames = 3000
|
max_source_video_frames = 3000
|
||||||
prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None
|
prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None
|
||||||
|
|
||||||
@ -487,7 +487,6 @@ def process_prompt_and_add_tasks(state, model_choice):
|
|||||||
image_mask = None
|
image_mask = None
|
||||||
|
|
||||||
if "G" in video_prompt_type:
|
if "G" in video_prompt_type:
|
||||||
if image_mode == 0:
|
|
||||||
gr.Info(f"With Denoising Strength {denoising_strength:.1f}, denoising will start at Step no {int(num_inference_steps * (1. - denoising_strength))} ")
|
gr.Info(f"With Denoising Strength {denoising_strength:.1f}, denoising will start at Step no {int(num_inference_steps * (1. - denoising_strength))} ")
|
||||||
else:
|
else:
|
||||||
denoising_strength = 1.0
|
denoising_strength = 1.0
|
||||||
@ -552,11 +551,13 @@ def process_prompt_and_add_tasks(state, model_choice):
|
|||||||
|
|
||||||
if test_any_sliding_window(model_type) and image_mode == 0:
|
if test_any_sliding_window(model_type) and image_mode == 0:
|
||||||
if video_length > sliding_window_size:
|
if video_length > sliding_window_size:
|
||||||
|
if model_type in ["t2v"] and not "G" in video_prompt_type :
|
||||||
|
gr.Info(f"You have requested to Generate Sliding Windows with a Text to Video model. Unless you use the Video to Video feature this is useless as a t2v model doesn't see past frames and it will generate the same video in each new window.")
|
||||||
|
return
|
||||||
full_video_length = video_length if video_source is None else video_length + sliding_window_overlap -1
|
full_video_length = video_length if video_source is None else video_length + sliding_window_overlap -1
|
||||||
extra = "" if full_video_length == video_length else f" including {sliding_window_overlap} added for Video Continuation"
|
extra = "" if full_video_length == video_length else f" including {sliding_window_overlap} added for Video Continuation"
|
||||||
no_windows = compute_sliding_window_no(full_video_length, sliding_window_size, sliding_window_discard_last_frames, sliding_window_overlap)
|
no_windows = compute_sliding_window_no(full_video_length, sliding_window_size, sliding_window_discard_last_frames, sliding_window_overlap)
|
||||||
gr.Info(f"The Number of Frames to generate ({video_length}{extra}) is greater than the Sliding Window Size ({sliding_window_size}), {no_windows} Windows will be generated")
|
gr.Info(f"The Number of Frames to generate ({video_length}{extra}) is greater than the Sliding Window Size ({sliding_window_size}), {no_windows} Windows will be generated")
|
||||||
|
|
||||||
if "recam" in model_filename:
|
if "recam" in model_filename:
|
||||||
if video_guide == None:
|
if video_guide == None:
|
||||||
gr.Info("You must provide a Control Video")
|
gr.Info("You must provide a Control Video")
|
||||||
@ -7019,8 +7020,14 @@ def categorize_resolution(resolution_str):
|
|||||||
return group
|
return group
|
||||||
return "1440p"
|
return "1440p"
|
||||||
|
|
||||||
def group_resolutions(resolutions, selected_resolution):
|
def group_resolutions(model_def, resolutions, selected_resolution):
|
||||||
|
|
||||||
|
model_resolutions = model_def.get("resolutions", None)
|
||||||
|
if model_resolutions is not None:
|
||||||
|
selected_group ="Locked"
|
||||||
|
available_groups = [selected_group ]
|
||||||
|
selected_group_resolutions = model_resolutions
|
||||||
|
else:
|
||||||
grouped_resolutions = {}
|
grouped_resolutions = {}
|
||||||
for resolution in resolutions:
|
for resolution in resolutions:
|
||||||
group = categorize_resolution(resolution[1])
|
group = categorize_resolution(resolution[1])
|
||||||
@ -7040,7 +7047,11 @@ def change_resolution_group(state, selected_group):
|
|||||||
model_def = get_model_def(model_type)
|
model_def = get_model_def(model_type)
|
||||||
model_resolutions = model_def.get("resolutions", None)
|
model_resolutions = model_def.get("resolutions", None)
|
||||||
resolution_choices, _ = get_resolution_choices(None, model_resolutions)
|
resolution_choices, _ = get_resolution_choices(None, model_resolutions)
|
||||||
|
if model_resolutions is None:
|
||||||
group_resolution_choices = [ resolution for resolution in resolution_choices if categorize_resolution(resolution[1]) == selected_group ]
|
group_resolution_choices = [ resolution for resolution in resolution_choices if categorize_resolution(resolution[1]) == selected_group ]
|
||||||
|
else:
|
||||||
|
last_resolution = group_resolution_choices[0][1]
|
||||||
|
return gr.update(choices= group_resolution_choices, value= last_resolution)
|
||||||
|
|
||||||
last_resolution_per_group = state["last_resolution_per_group"]
|
last_resolution_per_group = state["last_resolution_per_group"]
|
||||||
last_resolution = last_resolution_per_group.get(selected_group, "")
|
last_resolution = last_resolution_per_group.get(selected_group, "")
|
||||||
@ -7051,6 +7062,11 @@ def change_resolution_group(state, selected_group):
|
|||||||
|
|
||||||
|
|
||||||
def record_last_resolution(state, resolution):
|
def record_last_resolution(state, resolution):
|
||||||
|
|
||||||
|
model_type = state["model_type"]
|
||||||
|
model_def = get_model_def(model_type)
|
||||||
|
model_resolutions = model_def.get("resolutions", None)
|
||||||
|
if model_resolutions is not None: return
|
||||||
server_config["last_resolution_choice"] = resolution
|
server_config["last_resolution_choice"] = resolution
|
||||||
selected_group = categorize_resolution(resolution)
|
selected_group = categorize_resolution(resolution)
|
||||||
last_resolution_per_group = state["last_resolution_per_group"]
|
last_resolution_per_group = state["last_resolution_per_group"]
|
||||||
@ -7482,11 +7498,13 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|||||||
frames_positions = gr.Text(value=ui_defaults.get("frames_positions","") , visible= "F" in video_prompt_type_value, scale = 2, label= "Positions of Injected Frames separated by Spaces (1=first, no position for Objects / People)" )
|
frames_positions = gr.Text(value=ui_defaults.get("frames_positions","") , visible= "F" in video_prompt_type_value, scale = 2, label= "Positions of Injected Frames separated by Spaces (1=first, no position for Objects / People)" )
|
||||||
image_refs_relative_size = gr.Slider(20, 100, value=ui_defaults.get("image_refs_relative_size", 50), step=1, label="Rescale Internaly Image Ref (% in relation to Output Video) to change Output Composition", visible = model_def.get("any_image_refs_relative_size", False) and image_outputs)
|
image_refs_relative_size = gr.Slider(20, 100, value=ui_defaults.get("image_refs_relative_size", 50), step=1, label="Rescale Internaly Image Ref (% in relation to Output Video) to change Output Composition", visible = model_def.get("any_image_refs_relative_size", False) and image_outputs)
|
||||||
|
|
||||||
no_background_removal = model_def.get("no_background_removal", False)
|
no_background_removal = model_def.get("no_background_removal", False) or image_ref_choices is None
|
||||||
|
background_removal_label = model_def.get("background_removal_label", "Remove Backgrounds behind People / Objects")
|
||||||
|
|
||||||
remove_background_images_ref = gr.Dropdown(
|
remove_background_images_ref = gr.Dropdown(
|
||||||
choices=[
|
choices=[
|
||||||
("Keep Backgrounds behind all Reference Images", 0),
|
("Keep Backgrounds behind all Reference Images", 0),
|
||||||
("Remove Backgrounds only behind People / Objects except main Subject / Landscape" if (flux or qwen) else ("Remove Backgrounds behind People / Objects, keep it for Landscape or positioned Frames" if vace else "Remove Backgrounds behind People / Objects") , 1),
|
(background_removal_label, 1),
|
||||||
],
|
],
|
||||||
value=0 if no_background_removal else ui_defaults.get("remove_background_images_ref",1),
|
value=0 if no_background_removal else ui_defaults.get("remove_background_images_ref",1),
|
||||||
label="Automatic Removal of Background of People or Objects (Only)", scale = 3, visible= "I" in video_prompt_type_value and not no_background_removal
|
label="Automatic Removal of Background of People or Objects (Only)", scale = 3, visible= "I" in video_prompt_type_value and not no_background_removal
|
||||||
@ -7578,7 +7596,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
|||||||
current_resolution_choice = ui_defaults.get("resolution","832x480") if update_form or last_resolution is None else last_resolution
|
current_resolution_choice = ui_defaults.get("resolution","832x480") if update_form or last_resolution is None else last_resolution
|
||||||
model_resolutions = model_def.get("resolutions", None)
|
model_resolutions = model_def.get("resolutions", None)
|
||||||
resolution_choices, current_resolution_choice = get_resolution_choices(current_resolution_choice, model_resolutions)
|
resolution_choices, current_resolution_choice = get_resolution_choices(current_resolution_choice, model_resolutions)
|
||||||
available_groups, selected_group_resolutions, selected_group = group_resolutions(resolution_choices, current_resolution_choice)
|
available_groups, selected_group_resolutions, selected_group = group_resolutions(model_def,resolution_choices, current_resolution_choice)
|
||||||
resolution_group = gr.Dropdown(
|
resolution_group = gr.Dropdown(
|
||||||
choices = available_groups,
|
choices = available_groups,
|
||||||
value= selected_group,
|
value= selected_group,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user