Merge branch 'main' of https://github.com/deepbeepmeep/Wan2GP

2025-11-04 14:16:57 +00:00 · 2025-06-13 18:41:37 +02:00 · 2025-06-13 18:41:37 +02:00 · 24bb01cae3
commit 24bb01cae3
parent 73cf4e43c3 e3a1ed8f8d
3 changed files with 362 additions and 39 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -34,4 +34,6 @@ loguru
 sentencepiece
 av
 opencv-python
+pygame>=2.1.0
+sounddevice>=0.4.0
 # rembg==2.0.65
--- a/wan/utils/notification_sound.py
+++ b/wan/utils/notification_sound.py
@ -0,0 +1,261 @@
+"""Add commentMore actions
+Notification sounds for Wan2GP video generation application
+Pure Python audio notification system with multiple backend support
+"""
+
+import os
+import sys
+import threading
+import time
+import numpy as np
+
+
+def generate_notification_beep(volume=50, sample_rate=44100):
+    """Generate pleasant C major chord notification sound"""
+    if volume == 0:
+        return np.array([])
+    
+    volume = max(0, min(100, volume))
+    
+    # Volume curve mapping: 25%->50%, 50%->75%, 75%->100%, 100%->105%
+    if volume <= 25:
+        volume_mapped = (volume / 25.0) * 0.5
+    elif volume <= 50:
+        volume_mapped = 0.5 + ((volume - 25) / 25.0) * 0.25
+    elif volume <= 75:
+        volume_mapped = 0.75 + ((volume - 50) / 25.0) * 0.25
+    else:
+        volume_mapped = 1.0 + ((volume - 75) / 25.0) * 0.05  # Only 5% boost instead of 15%
+    
+    volume = volume_mapped
+    
+    # C major chord frequencies
+    freq_c = 261.63  # C4
+    freq_e = 329.63  # E4  
+    freq_g = 392.00  # G4
+    
+    duration = 0.8
+    t = np.linspace(0, duration, int(sample_rate * duration), False)
+    
+    # Generate chord components
+    wave_c = np.sin(freq_c * 2 * np.pi * t) * 0.4
+    wave_e = np.sin(freq_e * 2 * np.pi * t) * 0.3
+    wave_g = np.sin(freq_g * 2 * np.pi * t) * 0.2
+    
+    wave = wave_c + wave_e + wave_g
+    
+    # Prevent clipping
+    max_amplitude = np.max(np.abs(wave))
+    if max_amplitude > 0:
+        wave = wave / max_amplitude * 0.8
+    
+    # ADSR envelope
+    def apply_adsr_envelope(wave_data):
+        length = len(wave_data)
+        attack_time = int(0.2 * length)
+        decay_time = int(0.1 * length)
+        release_time = int(0.5 * length)
+        
+        envelope = np.ones(length)
+        
+        if attack_time > 0:
+            envelope[:attack_time] = np.power(np.linspace(0, 1, attack_time), 3)
+        
+        if decay_time > 0:
+            start_idx = attack_time
+            end_idx = attack_time + decay_time
+            envelope[start_idx:end_idx] = np.linspace(1, 0.85, decay_time)
+        
+        if release_time > 0:
+            start_idx = length - release_time
+            envelope[start_idx:] = 0.85 * np.exp(-4 * np.linspace(0, 1, release_time))
+        
+        return wave_data * envelope
+    
+    wave = apply_adsr_envelope(wave)
+    
+    # Simple low-pass filter
+    def simple_lowpass_filter(signal, cutoff_ratio=0.8):
+        window_size = max(3, int(len(signal) * 0.001))
+        if window_size % 2 == 0:
+            window_size += 1
+        
+        kernel = np.ones(window_size) / window_size
+        padded = np.pad(signal, window_size//2, mode='edge')
+        filtered = np.convolve(padded, kernel, mode='same')
+        return filtered[window_size//2:-window_size//2]
+    
+    wave = simple_lowpass_filter(wave)
+    
+    # Add reverb effect
+    if len(wave) > sample_rate // 4:
+        delay_samples = int(0.12 * sample_rate)
+        reverb = np.zeros_like(wave)
+        reverb[delay_samples:] = wave[:-delay_samples] * 0.08
+        wave = wave + reverb
+    
+    # Apply volume first, then normalize to prevent clipping
+    wave = wave * volume * 0.5
+    
+    # Final normalization with safety margin
+    max_amplitude = np.max(np.abs(wave))
+    if max_amplitude > 0.85:  # If approaching clipping threshold
+        wave = wave / max_amplitude * 0.85  # More conservative normalization
+    
+    return wave
+
+
+def play_audio_with_pygame(audio_data, sample_rate=44100):
+    """Play audio using pygame backend"""
+    try:
+        import pygame
+        # Initialize pygame mixer only if not already initialized
+        if not pygame.mixer.get_init():
+            pygame.mixer.pre_init(frequency=sample_rate, size=-16, channels=2, buffer=1024)
+            pygame.mixer.init()
+        else:
+            # Reinitialize with new settings if needed
+            current_freq, current_size, current_channels = pygame.mixer.get_init()
+            if current_freq != sample_rate or current_channels != 2:
+                pygame.mixer.quit()
+                pygame.mixer.pre_init(frequency=sample_rate, size=-16, channels=2, buffer=1024)
+                pygame.mixer.init()
+        
+        audio_int16 = (audio_data * 32767).astype(np.int16)
+        
+        # Convert mono to stereo
+        if len(audio_int16.shape) == 1:
+            stereo_data = np.column_stack((audio_int16, audio_int16))
+        else:
+            stereo_data = audio_int16
+        
+        sound = pygame.sndarray.make_sound(stereo_data)
+        sound.play()
+        pygame.time.wait(int(len(audio_data) / sample_rate * 1000) + 100)
+        # Don't quit mixer - this can interfere with Gradio server
+        # pygame.mixer.quit()
+        return True
+        
+    except ImportError:
+        return False
+    except Exception as e:
+        print(f"Pygame error: {e}")
+        return False
+
+
+def play_audio_with_sounddevice(audio_data, sample_rate=44100):
+    """Play audio using sounddevice backend"""
+    try:
+        import sounddevice as sd
+        sd.play(audio_data, sample_rate)
+        sd.wait()
+        return True
+        
+    except ImportError:
+        return False
+    except Exception as e:
+        print(f"Sounddevice error: {e}")
+        return False
+
+
+def play_audio_with_winsound(audio_data, sample_rate=44100):
+    """Play audio using winsound backend (Windows only)"""
+    if sys.platform != "win32":
+        return False
+        
+    try:
+        import winsound
+        import wave
+        import tempfile
+        import uuid
+        
+        temp_dir = tempfile.gettempdir()
+        temp_filename = os.path.join(temp_dir, f"notification_{uuid.uuid4().hex}.wav")
+        
+        try:
+            with wave.open(temp_filename, 'w') as wav_file:
+                wav_file.setnchannels(1)
+                wav_file.setsampwidth(2)
+                wav_file.setframerate(sample_rate)
+                
+                audio_int16 = (audio_data * 32767).astype(np.int16)
+                wav_file.writeframes(audio_int16.tobytes())
+            
+            winsound.PlaySound(temp_filename, winsound.SND_FILENAME)
+            
+        finally:
+            # Clean up temp file
+            for _ in range(3):
+                try:
+                    if os.path.exists(temp_filename):
+                        os.unlink(temp_filename)
+                    break
+                except:
+                    time.sleep(0.1)
+            
+        return True
+        
+    except ImportError:
+        return False
+    except Exception as e:
+        print(f"Winsound error: {e}")
+        return False
+
+
+def play_notification_sound(volume=50):
+    """Play notification sound with specified volume"""
+    if volume == 0:
+        return
+    
+    audio_data = generate_notification_beep(volume=volume)
+    
+    if len(audio_data) == 0:
+        return
+    
+    # Try audio backends in order
+    audio_backends = [
+        play_audio_with_pygame,
+        play_audio_with_sounddevice,
+        play_audio_with_winsound,
+    ]
+    
+    for backend in audio_backends:
+        try:
+            if backend(audio_data):
+                return
+        except Exception as e:
+            continue
+    
+    # Fallback: terminal beep
+    print(f"All audio backends failed, using terminal beep")
+    print('\a')
+
+
+def play_notification_async(volume=50):
+    """Play notification sound asynchronously (non-blocking)"""
+    def play_sound():
+        try:
+            play_notification_sound(volume)
+        except Exception as e:
+            print(f"Error playing notification sound: {e}")
+    
+    sound_thread = threading.Thread(target=play_sound, daemon=True)
+    sound_thread.start()
+
+
+def notify_video_completion(video_path=None, volume=50):
+    """Notify about completed video generation"""
+    play_notification_async(volume)
+
+
+if __name__ == "__main__":
+    print("Testing notification sounds with different volumes...")
+    print("Auto-detecting available audio backends...")
+    
+    volumes = [25, 50, 75, 100]
+    for vol in volumes:
+        print(f"Testing volume {vol}%:")
+        play_notification_sound(vol)
+        time.sleep(2)
+    
+    print("Test completed!")
--- a/wgp.py
+++ b/wgp.py
@ -14,6 +14,7 @@ import gradio as gr
 import random
 import json
 import wan
+from wan.utils import notification_sound
 from wan.configs import MAX_AREA_CONFIGS, WAN_CONFIGS, SUPPORTED_SIZES, VACE_SIZE_CONFIGS
 from wan.utils.utils import cache_video
 from wan.modules.attention import get_attention_modes, get_supported_attention_modes
@ -1518,20 +1519,22 @@ for src,tgt in zip(src_move,tgt_move):
    

 if not Path(server_config_filename).is_file():
-    server_config = {"attention_mode" : "auto",  
-                     "transformer_types": [], 
-                     "transformer_quantization": "int8",
-                     "text_encoder_quantization" : "int8",
-                     "save_path": "outputs", #os.path.join(os.getcwd(), 
-                     "compile" : "",
-                     "metadata_type": "metadata",
-                     "default_ui": "t2v",
-                     "boost" : 1,
-                     "clear_file_list" : 5,
-                     "vae_config": 0,
-                     "profile" : profile_type.LowRAM_LowVRAM,
-                     "preload_model_policy": [],
-                     "UI_theme": "default" }
+    server_config = {
+        "attention_mode" : "auto",  
+        "transformer_types": [], 
+        "transformer_quantization": "int8",
+        "text_encoder_quantization" : "int8",
+        "save_path": "outputs", #os.path.join(os.getcwd(), 
+        "compile" : "",
+        "metadata_type": "metadata",
+        "default_ui": "t2v",
+        "boost" : 1,
+        "clear_file_list" : 5,
+        "vae_config": 0,
+        "profile" : profile_type.LowRAM_LowVRAM,
+        "preload_model_policy": [],
+        "UI_theme": "default"
+    }

    with open(server_config_filename, "w", encoding="utf-8") as writer:
        writer.write(json.dumps(server_config))
@ -2440,33 +2443,38 @@ def apply_changes(  state,
                    UI_theme_choice = "default",
                    enhancer_enabled_choice = 0,
                    fit_canvas_choice = 0,
-                    preload_in_VRAM_choice = 0
+                    preload_in_VRAM_choice = 0,
+                    notification_sound_enabled_choice = 1,
+                    notification_sound_volume_choice = 50
 ):
    if args.lock_config:
        return
    if gen_in_progress:
        return "<DIV ALIGN=CENTER>Unable to change config when a generation is in progress</DIV>", gr.update(), gr.update()
    global offloadobj, wan_model, server_config, loras, loras_names, default_loras_choices, default_loras_multis_str, default_lora_preset_prompt, default_lora_preset, loras_presets
-    server_config = {"attention_mode" : attention_choice,  
-                     "transformer_types": transformer_types_choices, 
-                     "text_encoder_quantization" : text_encoder_quantization_choice,
-                     "save_path" : save_path_choice,
-                     "compile" : compile_choice,
-                     "profile" : profile_choice,
-                     "vae_config" : vae_config_choice,
-                     "vae_precision" : VAE_precision_choice,
-                     "mixed_precision" : mixed_precision_choice,
-                     "metadata_type": metadata_choice,
-                     "transformer_quantization" : quantization_choice,
-                     "transformer_dtype_policy" : transformer_dtype_policy_choice,
-                     "boost" : boost_choice,
-                     "clear_file_list" : clear_file_list,
-                     "preload_model_policy" : preload_model_policy_choice,
-                     "UI_theme" : UI_theme_choice,
-                     "fit_canvas": fit_canvas_choice,
-                     "enhancer_enabled" : enhancer_enabled_choice,
-                     "preload_in_VRAM" : preload_in_VRAM_choice
-                       }
+    server_config = {
+        "attention_mode" : attention_choice,  
+        "transformer_types": transformer_types_choices, 
+        "text_encoder_quantization" : text_encoder_quantization_choice,
+        "save_path" : save_path_choice,
+        "compile" : compile_choice,
+        "profile" : profile_choice,
+        "vae_config" : vae_config_choice,
+        "vae_precision" : VAE_precision_choice,
+        "mixed_precision" : mixed_precision_choice,
+        "metadata_type": metadata_choice,
+        "transformer_quantization" : quantization_choice,
+        "transformer_dtype_policy" : transformer_dtype_policy_choice,
+        "boost" : boost_choice,
+        "clear_file_list" : clear_file_list,
+        "preload_model_policy" : preload_model_policy_choice,
+        "UI_theme" : UI_theme_choice,
+        "fit_canvas": fit_canvas_choice,
+        "enhancer_enabled" : enhancer_enabled_choice,
+        "preload_in_VRAM" : preload_in_VRAM_choice,
+        "notification_sound_enabled" : notification_sound_enabled_choice,
+        "notification_sound_volume" : notification_sound_volume_choice
+    }

    if Path(server_config_filename).is_file():
        with open(server_config_filename, "r", encoding="utf-8") as reader:
@ -2500,7 +2508,7 @@ def apply_changes(  state,
    transformer_types = server_config["transformer_types"]
    model_filename = get_model_filename(transformer_type, transformer_quantization, transformer_dtype_policy)
    state["model_filename"] = model_filename
-    if all(change in ["attention_mode", "vae_config", "boost", "save_path", "metadata_type", "clear_file_list", "fit_canvas"] for change in changes ):
+    if all(change in ["attention_mode", "vae_config", "boost", "save_path", "metadata_type", "clear_file_list", "fit_canvas", "notification_sound_enabled", "notification_sound_volume"] for change in changes ):
        model_choice = gr.Dropdown()
    else:
        reload_needed = True
@ -2651,7 +2659,21 @@ def refresh_gallery(state): #, msg
        for img_uri in list_uri:
            thumbnails += f'<TD><img src="{img_uri}" alt="Start" style="max-width:{thumbnail_size}; max-height:{thumbnail_size}; display: block; margin: auto; object-fit: contain;" /></TD>'
        
-        html = "<STYLE> #PINFO, #PINFO  th, #PINFO td {border: 1px solid #CCCCCC;background-color:#FFFFFF;}</STYLE><TABLE WIDTH=100% ID=PINFO ><TR><TD width=100%>" + prompt + "</TD>" + thumbnails + "</TR></TABLE>" 
+        # Get current theme from server config  
+        current_theme = server_config.get("UI_theme", "default")
+        
+        # Use minimal, adaptive styling that blends with any background
+        # This creates a subtle container that doesn't interfere with the page's theme
+        table_style = """
+            border: 1px solid rgba(128, 128, 128, 0.3); 
+            background-color: transparent; 
+            color: inherit; 
+            padding: 8px;
+            border-radius: 6px;
+            box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
+        """
+        
+        html = f"<TABLE WIDTH=100% ID=PINFO style='{table_style}'><TR><TD width=100% style='{table_style}'>" + prompt + "</TD>" + thumbnails + "</TR></TABLE>" 
        html_output = gr.HTML(html, visible= True)
        return gr.Gallery(selected_index=choice, value = file_list), html_output, gr.Button(visible=False), gr.Button(visible=True), gr.Row(visible=True), update_queue_data(queue), gr.Button(interactive=  abort_interactive), gr.Button(visible= onemorewindow_visible)

@ -3567,6 +3589,17 @@ def generate_video(
                file_list.append(video_path)
                file_settings_list.append(configs)
                
+                # Play notification sound for single video
+                try:
+                    if server_config.get("notification_sound_enabled", 1):
+                        volume = server_config.get("notification_sound_volume", 50)
+                        notification_sound.notify_video_completion(
+                            video_path=video_path, 
+                            volume=volume
+                        )
+                except Exception as e:
+                    print(f"Error playing notification sound for individual video: {e}")
+
                send_cmd("output")

        seed += 1
@ -3909,6 +3942,13 @@ def process_tasks(state):
        status = f"Video generation was aborted. Total Generation Time: {end_time-start_time:.1f}s" 
    else:
        status = f"Total Generation Time: {end_time-start_time:.1f}s" 
+        # Play notification sound when video generation completed successfully
+        try:
+            if server_config.get("notification_sound_enabled", 1):
+                volume = server_config.get("notification_sound_volume", 50)
+                notification_sound.notify_video_completion(volume=volume)
+        except Exception as e:
+            print(f"Error playing notification sound: {e}")
    gen["status"] = status
    gen["status_display"] =  False

@ -5742,6 +5782,24 @@ def generate_configuration_tab(state, blocks, header, model_choice, prompt_enhan
                )
                preload_in_VRAM_choice = gr.Slider(0, 40000, value=server_config.get("preload_in_VRAM", 0), step=100, label="Number of MB of Models that are Preloaded in VRAM (0 will use Profile default)")

+            with gr.Tab("Notifications"):
+                gr.Markdown("### Notification Settings")
+                notification_sound_enabled_choice = gr.Dropdown(
+                    choices=[
+                        ("On", 1),
+                        ("Off", 0),
+                    ],
+                    value=server_config.get("notification_sound_enabled", 1),
+                    label="Notification Sound Enabled"
+                )
+
+                notification_sound_volume_choice = gr.Slider(
+                    minimum=0,
+                    maximum=100,
+                    value=server_config.get("notification_sound_volume", 50),
+                    step=5,
+                    label="Notification Sound Volume (0 = silent, 100 = very loud)"
+                )


        
@ -5769,7 +5827,9 @@ def generate_configuration_tab(state, blocks, header, model_choice, prompt_enhan
                    UI_theme_choice,
                    enhancer_enabled_choice,
                    fit_canvas_choice,
-                    preload_in_VRAM_choice
+                    preload_in_VRAM_choice,
+                    notification_sound_enabled_choice,
+                    notification_sound_volume_choice
                ],
                outputs= [msg , header, model_choice, prompt_enhancer_row]
        )