improved Qwen Edit id preservation

2026-02-07 03:07:47 +00:00 · 2025-08-22 00:00:04 +02:00 · 2025-08-22 00:00:04 +02:00 · 8842db7459
commit 8842db7459
parent 95fdf3324a
4 changed files with 16 additions and 10 deletions
--- a/README.md
+++ b/README.md
@ -20,14 +20,15 @@ WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models
 **Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep

 ## 🔥 Latest Updates : 
-### August 21 2025: WanGP v8.0 - the killer of seven
+### August 21 2025: WanGP v8.01 - the killer of seven

- Qwen Image Edit : Flux Kontext challenger (prompt driven image edition). You should use it at high res (1080p) if you want to preserve identity of the original people / objects. It works with Qwen Lora Lightning 4 steps. I have also unlocked all the resolutions for Qwen models. Bonus Zone: support for multiple image compositions
+- Qwen Image Edit : Flux Kontext challenger (prompt driven image edition). Best results (including Identity preservation) will be obtained at 720p. Beyond you may get image outpainting and / or lose identity preservation. Below 720p prompt adherence will be worse. Qwen Image Edit works with Qwen Lora Lightning 4 steps. I have also unlocked all the resolutions for Qwen models. Bonus Zone: support for multiple image compositions but identity preservation won't be as good.
 - On demand Prompt Enhancer (needs to be enabled in Configuration Tab) that you can use to Enhance a Text Prompt before starting a Generation. You can refine the Enhanced Prompt or change the original Prompt.
 - Choice of a non censored Prompt Enhancer. Beware this is one is VRAM hungry and will require 12 GB of VRAM to work
 - Memory Profile customizable per model : useful to set for instance Profile 3 (preload the model entirely in VRAM) with only Image Generation models, if you have 24 GB of VRAM. In that case Generation will be much faster because with Image generators (contrary to Video generators) as a lot of time is wasted in offloading 
 - Expert Guidance Mode: change the Guidance during the generation up to 2 times. Very useful with Wan 2.2 Ligthning to reduce the slow motion effect. The idea is to insert a CFG phase before the 2 accelerated phases that follow and have no Guidance. I have added the finetune *Wan2.2 Vace Lightning 3 Phases 14B* with a prebuilt configuration. Please note that it is a 8 steps process although the lora lightning is 4 steps. This expert guidance mode is also available with Wan 2.1.

+*WanGP 8.01 update, improved Qwen Image Edit Identity Preservation*
 ### August 12 2025: WanGP v7.7777 - Lucky Day(s)

 This is your lucky day ! thanks to new configuration options that will let you store generated Videos and Images in lossless compressed formats, you will find they in fact they look two times better without doing anything !
--- a/defaults/qwen_image_edit_20B.json
+++ b/defaults/qwen_image_edit_20B.json
@ -2,7 +2,7 @@
    "model": {
        "name": "Qwen Image Edit 20B",
        "architecture": "qwen_image_edit_20B",
-        "description": "Qwen Image Edit is generative model that will generate very high quality images. It can be used to edit a Subject or combine multiple Subjects. It is one of the few models capable to generate in the image very long texts.",
+        "description": "Qwen Image Edit is a generative model that can generate very high quality images with long texts in it. Best results will be at 720p. Use it to edit a Subject or combine multiple Subjects. ",
        "URLs": [
            "https://huggingface.co/DeepBeepMeep/Qwen_image/resolve/main/qwen_image_edit_20B_bf16.safetensors",
            "https://huggingface.co/DeepBeepMeep/Qwen_image/resolve/main/qwen_image_edit_20B_quanto_bf16_int8.safetensors"
@ -14,6 +14,6 @@
        "image_outputs": true
    },
    "prompt": "add a hat",
-    "resolution": "1920x1088",
+    "resolution": "1280x720",
    "batch_size": 1
 }
--- a/models/qwen/pipeline_qwenimage.py
+++ b/models/qwen/pipeline_qwenimage.py
@ -18,7 +18,7 @@ from typing import Any, Callable, Dict, List, Optional, Union

 import numpy as np
 import torch, json
-
+import math
 from diffusers.image_processor import VaeImageProcessor
 from .transformer_qwenimage import QwenImageTransformer2DModel

@ -28,6 +28,7 @@ from transformers import Qwen2_5_VLForConditionalGeneration, Qwen2Tokenizer, Aut
 from .autoencoder_kl_qwenimage import AutoencoderKLQwenImage
 from diffusers import FlowMatchEulerDiscreteScheduler
 from PIL import Image
+from shared.utils.utils import calculate_new_dimensions

 XLA_AVAILABLE = False

@ -686,13 +687,17 @@ class QwenImagePipeline(): #DiffusionPipeline
            image = image[0] if isinstance(image, list) else image
            image_height, image_width = self.image_processor.get_default_height_width(image)
            aspect_ratio = image_width / image_height
-            if True :
+            if False :
                _, image_width, image_height = min(
                    (abs(aspect_ratio - w / h), w, h) for w, h in PREFERRED_QWENIMAGE_RESOLUTIONS
                )
            image_width = image_width // multiple_of * multiple_of
            image_height = image_height // multiple_of * multiple_of
-            # image = self.image_processor.resize(image, image_height, image_width)
+            ref_height, ref_width = 1568, 672
+            if height * width < ref_height * ref_width: ref_height , ref_width = height , width  
+            if image_height * image_width > ref_height * ref_width:
+                image_height, image_width = calculate_new_dimensions(ref_height, ref_width, image_height, image_width, False, block_size=multiple_of)
+
            image = image.resize((image_width,image_height), resample=Image.Resampling.LANCZOS) 
            prompt_image = image
            image = self.image_processor.preprocess(image, image_height, image_width)
--- a/wgp.py
+++ b/wgp.py
@ -6588,9 +6588,9 @@ def get_resolution_choices(current_resolution_choice, model_resolutions= None):
            ("1920x832 (21:9)", "1920x832"),
            ("832x1920 (9:21)", "832x1920"),
            # 720p
+            ("1024x1024 (1:1)", "1024x1024"),
            ("1280x720 (16:9)", "1280x720"),
            ("720x1280 (9:16)", "720x1280"), 
-            ("1024x1024 (1:1)", "1024x1024"),
            ("1280x544 (21:9)", "1280x544"),
            ("544x1280 (9:21)", "544x1280"),
            ("1104x832 (4:3)", "1104x832"),
@ -7096,10 +7096,10 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
                        video_prompt_type_image_refs = gr.Dropdown(
                            choices=[
                                ("None", ""),
-                                ("Conditional Images are People / Objects", "I"),
                                ("Conditional Images is first Main Subject / Landscape and may be followed by People / Objects", "KI"),
+                                ("Conditional Images are People / Objects", "I"),
                                ],
-                            value=filter_letters(video_prompt_type_value, "KFI"),
+                            value=filter_letters(video_prompt_type_value, "KI"),
                            visible = True,
                            show_label=False,
                            label="Reference Images Combination Method", scale = 2