mirror of
https://github.com/Wan-Video/Wan2.1.git
synced 2025-12-15 11:43:21 +00:00
improved Qwen Edit id preservation
This commit is contained in:
parent
95fdf3324a
commit
8842db7459
@ -20,14 +20,15 @@ WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models
|
||||
**Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep
|
||||
|
||||
## 🔥 Latest Updates :
|
||||
### August 21 2025: WanGP v8.0 - the killer of seven
|
||||
### August 21 2025: WanGP v8.01 - the killer of seven
|
||||
|
||||
- Qwen Image Edit : Flux Kontext challenger (prompt driven image edition). You should use it at high res (1080p) if you want to preserve identity of the original people / objects. It works with Qwen Lora Lightning 4 steps. I have also unlocked all the resolutions for Qwen models. Bonus Zone: support for multiple image compositions
|
||||
- Qwen Image Edit : Flux Kontext challenger (prompt driven image edition). Best results (including Identity preservation) will be obtained at 720p. Beyond you may get image outpainting and / or lose identity preservation. Below 720p prompt adherence will be worse. Qwen Image Edit works with Qwen Lora Lightning 4 steps. I have also unlocked all the resolutions for Qwen models. Bonus Zone: support for multiple image compositions but identity preservation won't be as good.
|
||||
- On demand Prompt Enhancer (needs to be enabled in Configuration Tab) that you can use to Enhance a Text Prompt before starting a Generation. You can refine the Enhanced Prompt or change the original Prompt.
|
||||
- Choice of a non censored Prompt Enhancer. Beware this is one is VRAM hungry and will require 12 GB of VRAM to work
|
||||
- Memory Profile customizable per model : useful to set for instance Profile 3 (preload the model entirely in VRAM) with only Image Generation models, if you have 24 GB of VRAM. In that case Generation will be much faster because with Image generators (contrary to Video generators) as a lot of time is wasted in offloading
|
||||
- Expert Guidance Mode: change the Guidance during the generation up to 2 times. Very useful with Wan 2.2 Ligthning to reduce the slow motion effect. The idea is to insert a CFG phase before the 2 accelerated phases that follow and have no Guidance. I have added the finetune *Wan2.2 Vace Lightning 3 Phases 14B* with a prebuilt configuration. Please note that it is a 8 steps process although the lora lightning is 4 steps. This expert guidance mode is also available with Wan 2.1.
|
||||
|
||||
*WanGP 8.01 update, improved Qwen Image Edit Identity Preservation*
|
||||
### August 12 2025: WanGP v7.7777 - Lucky Day(s)
|
||||
|
||||
This is your lucky day ! thanks to new configuration options that will let you store generated Videos and Images in lossless compressed formats, you will find they in fact they look two times better without doing anything !
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
"model": {
|
||||
"name": "Qwen Image Edit 20B",
|
||||
"architecture": "qwen_image_edit_20B",
|
||||
"description": "Qwen Image Edit is generative model that will generate very high quality images. It can be used to edit a Subject or combine multiple Subjects. It is one of the few models capable to generate in the image very long texts.",
|
||||
"description": "Qwen Image Edit is a generative model that can generate very high quality images with long texts in it. Best results will be at 720p. Use it to edit a Subject or combine multiple Subjects. ",
|
||||
"URLs": [
|
||||
"https://huggingface.co/DeepBeepMeep/Qwen_image/resolve/main/qwen_image_edit_20B_bf16.safetensors",
|
||||
"https://huggingface.co/DeepBeepMeep/Qwen_image/resolve/main/qwen_image_edit_20B_quanto_bf16_int8.safetensors"
|
||||
@ -14,6 +14,6 @@
|
||||
"image_outputs": true
|
||||
},
|
||||
"prompt": "add a hat",
|
||||
"resolution": "1920x1088",
|
||||
"resolution": "1280x720",
|
||||
"batch_size": 1
|
||||
}
|
||||
@ -18,7 +18,7 @@ from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import torch, json
|
||||
|
||||
import math
|
||||
from diffusers.image_processor import VaeImageProcessor
|
||||
from .transformer_qwenimage import QwenImageTransformer2DModel
|
||||
|
||||
@ -28,6 +28,7 @@ from transformers import Qwen2_5_VLForConditionalGeneration, Qwen2Tokenizer, Aut
|
||||
from .autoencoder_kl_qwenimage import AutoencoderKLQwenImage
|
||||
from diffusers import FlowMatchEulerDiscreteScheduler
|
||||
from PIL import Image
|
||||
from shared.utils.utils import calculate_new_dimensions
|
||||
|
||||
XLA_AVAILABLE = False
|
||||
|
||||
@ -686,13 +687,17 @@ class QwenImagePipeline(): #DiffusionPipeline
|
||||
image = image[0] if isinstance(image, list) else image
|
||||
image_height, image_width = self.image_processor.get_default_height_width(image)
|
||||
aspect_ratio = image_width / image_height
|
||||
if True :
|
||||
if False :
|
||||
_, image_width, image_height = min(
|
||||
(abs(aspect_ratio - w / h), w, h) for w, h in PREFERRED_QWENIMAGE_RESOLUTIONS
|
||||
)
|
||||
image_width = image_width // multiple_of * multiple_of
|
||||
image_height = image_height // multiple_of * multiple_of
|
||||
# image = self.image_processor.resize(image, image_height, image_width)
|
||||
ref_height, ref_width = 1568, 672
|
||||
if height * width < ref_height * ref_width: ref_height , ref_width = height , width
|
||||
if image_height * image_width > ref_height * ref_width:
|
||||
image_height, image_width = calculate_new_dimensions(ref_height, ref_width, image_height, image_width, False, block_size=multiple_of)
|
||||
|
||||
image = image.resize((image_width,image_height), resample=Image.Resampling.LANCZOS)
|
||||
prompt_image = image
|
||||
image = self.image_processor.preprocess(image, image_height, image_width)
|
||||
|
||||
6
wgp.py
6
wgp.py
@ -6588,9 +6588,9 @@ def get_resolution_choices(current_resolution_choice, model_resolutions= None):
|
||||
("1920x832 (21:9)", "1920x832"),
|
||||
("832x1920 (9:21)", "832x1920"),
|
||||
# 720p
|
||||
("1024x1024 (1:1)", "1024x1024"),
|
||||
("1280x720 (16:9)", "1280x720"),
|
||||
("720x1280 (9:16)", "720x1280"),
|
||||
("1024x1024 (1:1)", "1024x1024"),
|
||||
("1280x544 (21:9)", "1280x544"),
|
||||
("544x1280 (9:21)", "544x1280"),
|
||||
("1104x832 (4:3)", "1104x832"),
|
||||
@ -7096,10 +7096,10 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
|
||||
video_prompt_type_image_refs = gr.Dropdown(
|
||||
choices=[
|
||||
("None", ""),
|
||||
("Conditional Images are People / Objects", "I"),
|
||||
("Conditional Images is first Main Subject / Landscape and may be followed by People / Objects", "KI"),
|
||||
("Conditional Images are People / Objects", "I"),
|
||||
],
|
||||
value=filter_letters(video_prompt_type_value, "KFI"),
|
||||
value=filter_letters(video_prompt_type_value, "KI"),
|
||||
visible = True,
|
||||
show_label=False,
|
||||
label="Reference Images Combination Method", scale = 2
|
||||
|
||||
Loading…
Reference in New Issue
Block a user