add fallback for Qwen Image and older GPUs

2026-02-06 18:57:49 +00:00 · 2025-08-08 23:17:27 +02:00 · 2025-08-08 23:17:27 +02:00 · 9545f2e5ef
commit 9545f2e5ef
parent 27d4c8eb4d
4 changed files with 90 additions and 14 deletions
--- a/README.md
+++ b/README.md
@ -20,13 +20,15 @@ WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models
 **Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep
 ## 🔥 Latest Updates : 
-### August 8 2025: WanGP v7.72 - Qwen Rebirth
+### August 8 2025: WanGP v7.73 - Qwen Rebirth
 Ever wondered what impact not using Guidance has on a model that expects it ? Just look at Qween Image in WanGP 7.71 whose outputs were erratic. Somehow I had convinced myself that Qwen was a distilled model. In fact Qwen was dying for a negative prompt. And in WanGP 7.72 there is at last one for him.
 As Qwen is not so picky after all I have added also quantized text encoder which reduces the RAM requirements of Qwen by 10 GB (the text encoder quantized version produced garbage before) 
 Hopefully this new release solves as well the Sage/Sage2 blacscreen on some GPUs.
 *7.73 update: still Sage / Sage2 bug for GPUs before RTX40xx. I have added a detection mechanism that forces Sdpa attention if that's the case*
 ### August 6 2025: WanGP v7.71 - Picky, picky
--- a/defaults/qwen_image_20B.json
+++ b/defaults/qwen_image_20B.json
@ -11,8 +11,8 @@
 						["1664x928 (16:9)", "1664x928"],
 						["928x1664 (9:16)", "928x1664"],
 						["1472x1140 (4:3)", "1472x1140"],
-						["1140x1472 (3:4)", "1140x1472"]
+						["1140x1472 (3:4)", "1140x1472"]],
-						],		
+		"attention": {"<89" : "sdpa"},
        "image_outputs": true
    },
    "prompt": "draw a hat",
--- a/shared/match_archi.py
+++ b/shared/match_archi.py
@ -0,0 +1,64 @@
 import re
 def match_nvidia_architecture(conditions_dict, architecture):
    """
    Match Nvidia architecture against condition dictionary.
    Args:
        conditions_dict: dict with condition strings as keys, parameters as values
        architecture: int representing architecture (e.g., 89 for Ada Lovelace)
    Returns:
        list of matched parameters
    Condition syntax:
        - Operators: '<', '>', '<=', '>=', '=' (or no operator for equality)
        - OR: '+' between conditions (e.g., '<=50+>89')
        - AND: '&' between conditions (e.g., '>=70&<90')
        - Examples:
          * '<89': architectures below Ada (89)
          * '>=75': architectures 75 and above
          * '89': exactly Ada architecture
          * '<=50+>89': Maxwell (50) and below OR above Ada
          * '>=70&<90': Ampere range (70-89)
    """
    def eval_condition(cond, arch):
        """Evaluate single condition against architecture"""
        cond = cond.strip()
        if not cond:
            return False
        # Parse operator and value using regex
        match = re.match(r'(>=|<=|>|<|=?)(\d+)', cond)
        if not match:
            return False
        op, val = match.groups()
        val = int(val)
        # Handle operators
        if op in ('', '='):
            return arch == val
        elif op == '>=':
            return arch >= val
        elif op == '<=':
            return arch <= val
        elif op == '>':
            return arch > val
        elif op == '<':
            return arch < val
        return False
    def matches_condition(condition_str, arch):
        """Check if architecture matches full condition string"""
        # Split by '+' for OR conditions, then by '&' for AND conditions
        return any(
            all(eval_condition(and_cond, arch) for and_cond in or_cond.split('&'))
            for or_cond in condition_str.split('+')
            if or_cond.strip()
        )
    # Return all parameters where conditions match
    return [params for condition, params in conditions_dict.items() 
            if matches_condition(condition, architecture)]
--- a/wgp.py
+++ b/wgp.py
@ -18,7 +18,7 @@ from shared.utils import notification_sound
 from shared.utils.loras_mutipliers import preparse_loras_multipliers, parse_loras_multipliers
 from shared.utils.utils import cache_video, convert_tensor_to_image, save_image, get_video_info, get_file_creation_date, convert_image_to_video
 from shared.utils.utils import extract_audio_tracks, combine_video_with_audio_tracks, combine_and_concatenate_video_with_audio_tracks, cleanup_temp_audio_files, calculate_new_dimensions
-
+from shared.match_archi import match_nvidia_architecture
 from shared.attention import get_attention_modes, get_supported_attention_modes
 from huggingface_hub import hf_hub_download, snapshot_download    
 import torch
@ -50,7 +50,7 @@ AUTOSAVE_FILENAME = "queue.zip"
 PROMPT_VARS_MAX = 10
 target_mmgp_version = "3.5.7"
-WanGP_version = "7.72"
+WanGP_version = "7.73"
 settings_version = 2.23
 max_source_video_frames = 3000
 prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None
@ -2670,12 +2670,15 @@ def generate_header(model_type, compile, attention_mode):
    model_filename = get_model_filename(model_type, transformer_quantization, transformer_dtype_policy) or "" 
    description  = description_container[0]
    header = f"<DIV style=height:{60 if server_config.get('display_stats', 0) == 1 else 40}px>{description}</DIV>"
-
+    overridden_attention = get_overridden_attention(model_type)
-    header += "<DIV style='align:right;width:100%'><FONT SIZE=3>Attention mode <B>" + (attention_mode if attention_mode!="auto" else "auto/" + get_auto_attention() )
+    attn_mode = attention_mode if overridden_attention == None else overridden_attention 
    header += "<DIV style='align:right;width:100%'><FONT SIZE=3>Attention mode <B>" + (attn_mode if attn_mode!="auto" else "auto/" + get_auto_attention() )
    if attention_mode not in attention_modes_installed:
        header += " -NOT INSTALLED-"
    elif attention_mode not in attention_modes_supported:
        header += " -NOT SUPPORTED-"
    elif overridden_attention is not None and attention_mode != overridden_attention:
        header += " -MODEL SPECIFIC-"
    header += "</B>"
    if compile:
@ -3821,6 +3824,17 @@ def edit_video(
        cleanup_temp_audio_files(audio_tracks)
    clear_status(state)
 def get_overridden_attention(model_type):
    model_def = get_model_def(model_type)
    override_attention = model_def.get("attention", None)
    if override_attention is None: return None
    gpu_version = gpu_major * 10 + gpu_minor
    attention_list = match_nvidia_architecture(override_attention, gpu_version) 
    if len(attention_list ) == 0: return None
    override_attention = attention_list[0]
    if override_attention is not None and override_attention not in attention_modes_supported: return None
    return override_attention
 def get_transformer_loras(model_type):
    model_def = get_model_def(model_type)
    transformer_loras_filenames = get_model_recursive_prop(model_type, "loras", return_list=True)
@ -3971,13 +3985,9 @@ def generate_video(
        wan_model, offloadobj = load_models(model_type)
        send_cmd("status", "Model loaded")
        reload_needed=  False
-    override_attention = model_def.get("attention", None)
+    overridden_attention = get_overridden_attention(model_type)
-    if override_attention is not None:
+    # if overridden_attention is not None and overridden_attention !=  attention_mode: print(f"Attention mode has been overriden to {overridden_attention} for model type '{model_type}'")
-        if isinstance(override_attention, dict):
+    attn = overridden_attention if overridden_attention is not None else attention_mode
            override_attention = override_attention.get(gpu_major, None)
        if override_attention is not None and override_attention not in attention_modes_supported: override_attention = None
        if override_attention !=  attention_mode: print(f"Attention mode has been overriden to {override_attention} for model type '{model_type}'")
    attn = override_attention if override_attention is not None else attention_mode
    if attention_mode == "auto":
        attn = get_auto_attention()
    elif attention_mode in attention_modes_supported: