diff --git a/README.md b/README.md index 0d23846..b01ac90 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,8 @@ In this repository, we present **Wan2.1**, a comprehensive and open suite of vid ## 🔥 Latest News!! - -* Mar 03, 2025: 👋 Wan2.1GP v1.2: Implementented tiling on VAE encoding and decoding. No more VRAM peaks at the beginning and at the end +* Mar 03, 2025: 👋 Wan2.1GP v1.3: Support for Image to Video with multiples images for different images / prompts combinations (requires *--multiple-images* switch), and added command line *--preload x* to preload in VRAM x MB of the main diffusion model if you find there is too much unused VRAM and you want to (slightly) accelerate the generation process +* Mar 03, 2025: 👋 Wan2.1GP v1.2: Implemented tiling on VAE encoding and decoding. No more VRAM peaks at the beginning and at the end * Mar 03, 2025: 👋 Wan2.1GP v1.1: added Tea Cache support for faster generations: optimization of kijai's implementation (https://github.com/kijai/ComfyUI-WanVideoWrapper/) of teacache (https://github.com/ali-vilab/TeaCache) * Mar 02, 2025: 👋 Wan2.1GP by DeepBeepMeep v1 brings: - Support for all Wan including the Image to Video model @@ -121,6 +121,11 @@ To run the image to video generator (in Low VRAM mode): python gradio_server.py --i2v ``` +To be able to input multiple images with the image to video generator: +```bash +python gradio_server.py --i2v --multiple-images +``` + Within the application you can configure which video generator will be launched without specifying a command line switch. To run the application while loading entirely the diffusion model in VRAM (slightly faster but requires 24 GB of VRAM for a 8 bits quantized 14B model ) @@ -155,18 +160,22 @@ You will find prebuilt Loras on https://civitai.com/ or you will be able to buil --lora-preset preset : name of preset gile (without the extension) to preload --verbose level : default (1) : level of information between 0 and 2\ --server-port portno : default (7860) : Gradio port no\ ---server-name name : default (0.0.0.0) : Gradio server name\ +--server-name name : default (localhost) : Gradio server name\ --open-browser : open automatically Browser when launching Gradio Server\ +--lock-config : prevent modifying the video engine configuration from the interface\ +--share : create a shareable URL on huggingface so that your server can be accessed remotely\ +--multiple-images : Images as a starting point for new videos\ --compile : turn on pytorch compilation\ --attention mode: force attention mode among, sdpa, flash, sage, sage2\ ---profile no : default (4) : no of profile between 1 and 5 +--profile no : default (4) : no of profile between 1 and 5\ +--preload no : number in Megabytes to preload partially the diffusion model in VRAM , may offer speed gains especially on ### Profiles (for power users only) You can choose between 5 profiles, but two are really relevant here : -- LowRAM_HighVRAM (3): loads entirely the model in VRAM, slightly faster, but less VRAM -- LowRAM_LowVRAM (4): load only the part of the models that is needed, low VRAM and low RAM requirement but slightly slower - +- LowRAM_HighVRAM (3): loads entirely the model in VRAM if possible, slightly faster, but less VRAM available for the video data after that +- LowRAM_LowVRAM (4): loads only the part of the model that is needed, low VRAM and low RAM requirement but slightly slower +You can adjust the number of megabytes to preload a model, with --preload nnn (nnn is the number of megabytes to preload) ### Other Models for the GPU Poor - HuanyuanVideoGP: https://github.com/deepbeepmeep/HunyuanVideoGP :\ diff --git a/gradio_server.py b/gradio_server.py index bf0996b..e7a350d 100644 --- a/gradio_server.py +++ b/gradio_server.py @@ -13,38 +13,17 @@ import random import json import wan from wan.configs import MAX_AREA_CONFIGS, WAN_CONFIGS, SUPPORTED_SIZES -from wan.utils.prompt_extend import DashScopePromptExpander, QwenPromptExpander from wan.utils.utils import cache_video from wan.modules.attention import get_attention_modes import torch import gc import traceback import math +import asyncio def _parse_args(): parser = argparse.ArgumentParser( description="Generate a video from a text prompt or image using Gradio") - parser.add_argument( - "--ckpt_dir_720p", - type=str, - default=None, - help="The path to the checkpoint directory.") - parser.add_argument( - "--ckpt_dir_480p", - type=str, - default=None, - help="The path to the checkpoint directory.") - parser.add_argument( - "--prompt_extend_method", - type=str, - default="local_qwen", - choices=["dashscope", "local_qwen"], - help="The prompt extend method to use.") - parser.add_argument( - "--prompt_extend_model", - type=str, - default=None, - help="The prompt extend model to use.") parser.add_argument( "--quantize-transformer", @@ -52,6 +31,31 @@ def _parse_args(): help="On the fly 'transformer' quantization" ) + parser.add_argument( + "--share", + action="store_true", + help="Create a shared URL to access webserver remotely" + ) + + parser.add_argument( + "--lock-config", + action="store_true", + help="Prevent modifying the configuration from the web interface" + ) + + parser.add_argument( + "--preload", + type=str, + default="0", + help="Megabytes of the diffusion model to preload in VRAM" + ) + + parser.add_argument( + "--multiple-images", + action="store_true", + help="Allow inputting multiple images with image to video" + ) + parser.add_argument( "--lora-dir-i2v", @@ -163,9 +167,6 @@ def _parse_args(): args = parser.parse_args() - args.ckpt_dir_720p = "../ckpts" # os.path.join("ckpt") - args.ckpt_dir_480p = "../ckpts" # os.path.join("ckpt") - assert args.ckpt_dir_720p is not None or args.ckpt_dir_480p is not None, "Please specify at least one checkpoint directory." return args @@ -179,7 +180,7 @@ lock_ui_attention = False lock_ui_transformer = False lock_ui_compile = False - +preload =int(args.preload) force_profile_no = int(args.profile) verbose_level = int(args.verbose) quantizeTransformer = args.quantize_transformer @@ -433,7 +434,10 @@ def load_models(i2v, lora_dir, lora_preselected_preset ): kwargs = { "extraModelsToQuantize": None} if profile == 2 or profile == 4: - kwargs["budgets"] = { "transformer" : 100, "text_encoder" : 100, "*" : 1000 } + kwargs["budgets"] = { "transformer" : 100 if preload == 0 else preload, "text_encoder" : 100, "*" : 1000 } + elif profile == 3: + kwargs["budgets"] = { "*" : "70%" } + loras, loras_names, default_loras_choices, default_loras_multis_str, default_lora_preset, loras_presets = setup_loras(pipe, lora_dir, lora_preselected_preset, None) offloadobj = offload.profile(pipe, profile_no= profile, compile = compile, quantizeTransformer = quantizeTransformer, **kwargs) @@ -484,7 +488,8 @@ def apply_changes( state, vae_config_choice, default_ui_choice ="t2v", ): - + if args.lock_config: + return if gen_in_progress: yield "