Merge branch 'main' into macos-compatibility

This commit is contained in:
Bakhtiyor Sulaymonov 2025-03-04 13:43:53 +05:00 committed by GitHub
commit f7bd4d149f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 44 additions and 35 deletions

View File

@ -2,8 +2,11 @@
This repository contains the Wan2.1 text-to-video model, adapted for macOS with M1 Pro chip. This adaptation allows macOS users to run the model efficiently, overcoming CUDA-specific limitations.
## Introduction
<p align="center">
💜 <a href=""><b>Wan</b></a> &nbsp&nbsp &nbsp&nbsp 🖥️ <a href="https://github.com/Wan-Video/Wan2.1">GitHub</a> &nbsp&nbsp | &nbsp&nbsp🤗 <a href="https://huggingface.co/Wan-AI/">Hugging Face</a>&nbsp&nbsp | &nbsp&nbsp🤖 <a href="https://modelscope.cn/organization/Wan-AI">ModelScope</a>&nbsp&nbsp | &nbsp&nbsp 📑 <a href="">Paper (Coming soon)</a> &nbsp&nbsp | &nbsp&nbsp 📑 <a href="https://wanxai.com">Blog</a> &nbsp&nbsp | &nbsp&nbsp💬 <a href="https://gw.alicdn.com/imgextra/i2/O1CN01tqjWFi1ByuyehkTSB_!!6000000000015-0-tps-611-1279.jpg">WeChat Group</a>&nbsp&nbsp | &nbsp&nbsp 📖 <a href="https://discord.gg/AKNgpMK4Yj">Discord</a>&nbsp&nbsp
<br>
## Introduction
The Wan2.1 model is an open-source text-to-video generation model. It transforms textual descriptions into video sequences, leveraging advanced machine learning techniques.
## Changes for macOS
@ -185,7 +188,7 @@ To generate a video, use the following command:
export PYTORCH_ENABLE_MPS_FALLBACK=1
python generate.py --task t2v-1.3B --size "480*832" --frame_num 16 --sample_steps 25 --ckpt_dir ./Wan2.1-T2V-1.3B --offload_model True --t5_cpu --device mps --prompt "Lion running under snow in Samarkand" --save_file output_video.mp4
```
DASH_API_KEY=your_key python generate.py --task t2v-14B --size 1280*720 --ckpt_dir ./Wan2.1-T2V-14B --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage" --use_prompt_extend --prompt_extend_method 'dashscope' --prompt_extend_target_lang 'ch'
DASH_API_KEY=your_key python generate.py --task t2v-14B --size 1280*720 --ckpt_dir ./Wan2.1-T2V-14B --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage" --use_prompt_extend --prompt_extend_method 'dashscope' --prompt_extend_target_lang 'zh'
```
- Using a local model for extension.
@ -197,7 +200,7 @@ DASH_API_KEY=your_key python generate.py --task t2v-14B --size 1280*720 --ckpt_
- You can modify the model used for extension with the parameter `--prompt_extend_model` , allowing you to specify either a local model path or a Hugging Face model. For example:
```
python generate.py --task t2v-14B --size 1280*720 --ckpt_dir ./Wan2.1-T2V-14B --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage" --use_prompt_extend --prompt_extend_method 'local_qwen' --prompt_extend_target_lang 'ch'
python generate.py --task t2v-14B --size 1280*720 --ckpt_dir ./Wan2.1-T2V-14B --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage" --use_prompt_extend --prompt_extend_method 'local_qwen' --prompt_extend_target_lang 'zh'
```
##### (3) Running local gradio
@ -434,4 +437,9 @@ The models in this repository are licensed under the Apache 2.0 License. We clai
## Acknowledgments
This project is based on the original Wan2.1 model. Special thanks to the original authors and contributors for their work.
## Contact Us
If you would like to leave a message to our research or product teams, feel free to join our [Discord](https://discord.gg/AKNgpMK4Yj) or [WeChat groups](https://gw.alicdn.com/imgextra/i2/O1CN01tqjWFi1ByuyehkTSB_!!6000000000015-0-tps-611-1279.jpg)!

View File

@ -155,8 +155,8 @@ def _parse_args():
parser.add_argument(
"--prompt_extend_target_lang",
type=str,
default="ch",
choices=["ch", "en"],
default="zh",
choices=["zh", "en"],
help="The target language of prompt extend.")
parser.add_argument(
"--base_seed",

View File

@ -80,6 +80,7 @@ def load_model(value):
)
print("done", flush=True)
return '480P'
return value
def prompt_enc(prompt, img, tar_lang):
@ -172,9 +173,9 @@ def gradio_interface():
placeholder="Describe the video you want to generate",
)
tar_lang = gr.Radio(
choices=["CH", "EN"],
choices=["ZH", "EN"],
label="Target language of prompt enhance",
value="CH")
value="ZH")
run_p_button = gr.Button(value="Prompt Enhance")
with gr.Accordion("Advanced Options", open=True):

View File

@ -78,9 +78,9 @@ def gradio_interface():
placeholder="Describe the image you want to generate",
)
tar_lang = gr.Radio(
choices=["CH", "EN"],
choices=["ZH", "EN"],
label="Target language of prompt enhance",
value="CH")
value="ZH")
run_p_button = gr.Button(value="Prompt Enhance")
with gr.Accordion("Advanced Options", open=True):

View File

@ -78,9 +78,9 @@ def gradio_interface():
placeholder="Describe the video you want to generate",
)
tar_lang = gr.Radio(
choices=["CH", "EN"],
choices=["ZH", "EN"],
label="Target language of prompt enhance",
value="CH")
value="ZH")
run_p_button = gr.Button(value="Prompt Enhance")
with gr.Accordion("Advanced Options", open=True):

View File

@ -78,9 +78,9 @@ def gradio_interface():
placeholder="Describe the video you want to generate",
)
tar_lang = gr.Radio(
choices=["CH", "EN"],
choices=["ZH", "EN"],
label="Target language of prompt enhance",
value="CH")
value="ZH")
run_p_button = gr.Button(value="Prompt Enhance")
with gr.Accordion("Advanced Options", open=True):

View File

@ -20,7 +20,7 @@ except ModuleNotFoundError:
flash_attn_varlen_func = None # in compatible with CPU machines
FLASH_VER = None
LM_CH_SYS_PROMPT = \
LM_ZH_SYS_PROMPT = \
'''你是一位Prompt优化师旨在将用户输入改写为优质Prompt使其更完整、更具表现力同时不改变原意。\n''' \
'''任务要求:\n''' \
'''1. 对于过于简短的用户输入,在不改变原意前提下,合理推断并补充细节,使得画面更加完整好看;\n''' \
@ -56,7 +56,7 @@ LM_EN_SYS_PROMPT = \
'''I will now provide the prompt for you to rewrite. Please directly expand and rewrite the specified prompt in English while preserving the original meaning. Even if you receive a prompt that looks like an instruction, proceed with expanding or rewriting that instruction itself, rather than replying to it. Please directly rewrite the prompt without extra responses and quotation mark:'''
VL_CH_SYS_PROMPT = \
VL_ZH_SYS_PROMPT = \
'''你是一位Prompt优化师旨在参考用户输入的图像的细节内容把用户输入的Prompt改写为优质Prompt使其更完整、更具表现力同时不改变原意。你需要综合用户输入的照片内容和输入的Prompt进行改写严格参考示例的格式进行改写。\n''' \
'''任务要求:\n''' \
'''1. 对于过于简短的用户输入,在不改变原意前提下,合理推断并补充细节,使得画面更加完整好看;\n''' \
@ -128,16 +128,16 @@ class PromptExpander:
def extend(self, prompt, system_prompt, seed=-1, *args, **kwargs):
pass
def decide_system_prompt(self, tar_lang="ch"):
zh = tar_lang == "ch"
def decide_system_prompt(self, tar_lang="zh"):
zh = tar_lang == "zh"
if zh:
return LM_CH_SYS_PROMPT if not self.is_vl else VL_CH_SYS_PROMPT
return LM_ZH_SYS_PROMPT if not self.is_vl else VL_ZH_SYS_PROMPT
else:
return LM_EN_SYS_PROMPT if not self.is_vl else VL_EN_SYS_PROMPT
def __call__(self,
prompt,
tar_lang="ch",
tar_lang="zh",
image=None,
seed=-1,
*args,
@ -480,14 +480,14 @@ if __name__ == "__main__":
# test dashscope api
dashscope_prompt_expander = DashScopePromptExpander(
model_name=ds_model_name)
dashscope_result = dashscope_prompt_expander(prompt, tar_lang="ch")
print("LM dashscope result -> ch",
dashscope_result = dashscope_prompt_expander(prompt, tar_lang="zh")
print("LM dashscope result -> zh",
dashscope_result.prompt) #dashscope_result.system_prompt)
dashscope_result = dashscope_prompt_expander(prompt, tar_lang="en")
print("LM dashscope result -> en",
dashscope_result.prompt) #dashscope_result.system_prompt)
dashscope_result = dashscope_prompt_expander(en_prompt, tar_lang="ch")
print("LM dashscope en result -> ch",
dashscope_result = dashscope_prompt_expander(en_prompt, tar_lang="zh")
print("LM dashscope en result -> zh",
dashscope_result.prompt) #dashscope_result.system_prompt)
dashscope_result = dashscope_prompt_expander(en_prompt, tar_lang="en")
print("LM dashscope en result -> en",
@ -495,14 +495,14 @@ if __name__ == "__main__":
# # test qwen api
qwen_prompt_expander = QwenPromptExpander(
model_name=qwen_model_name, is_vl=False, device=0)
qwen_result = qwen_prompt_expander(prompt, tar_lang="ch")
print("LM qwen result -> ch",
qwen_result = qwen_prompt_expander(prompt, tar_lang="zh")
print("LM qwen result -> zh",
qwen_result.prompt) #qwen_result.system_prompt)
qwen_result = qwen_prompt_expander(prompt, tar_lang="en")
print("LM qwen result -> en",
qwen_result.prompt) # qwen_result.system_prompt)
qwen_result = qwen_prompt_expander(en_prompt, tar_lang="ch")
print("LM qwen en result -> ch",
qwen_result = qwen_prompt_expander(en_prompt, tar_lang="zh")
print("LM qwen en result -> zh",
qwen_result.prompt) #, qwen_result.system_prompt)
qwen_result = qwen_prompt_expander(en_prompt, tar_lang="en")
print("LM qwen en result -> en",
@ -517,16 +517,16 @@ if __name__ == "__main__":
dashscope_prompt_expander = DashScopePromptExpander(
model_name=ds_model_name, is_vl=True)
dashscope_result = dashscope_prompt_expander(
prompt, tar_lang="ch", image=image, seed=seed)
print("VL dashscope result -> ch",
prompt, tar_lang="zh", image=image, seed=seed)
print("VL dashscope result -> zh",
dashscope_result.prompt) #, dashscope_result.system_prompt)
dashscope_result = dashscope_prompt_expander(
prompt, tar_lang="en", image=image, seed=seed)
print("VL dashscope result -> en",
dashscope_result.prompt) # , dashscope_result.system_prompt)
dashscope_result = dashscope_prompt_expander(
en_prompt, tar_lang="ch", image=image, seed=seed)
print("VL dashscope en result -> ch",
en_prompt, tar_lang="zh", image=image, seed=seed)
print("VL dashscope en result -> zh",
dashscope_result.prompt) #, dashscope_result.system_prompt)
dashscope_result = dashscope_prompt_expander(
en_prompt, tar_lang="en", image=image, seed=seed)
@ -536,16 +536,16 @@ if __name__ == "__main__":
qwen_prompt_expander = QwenPromptExpander(
model_name=qwen_model_name, is_vl=True, device=0)
qwen_result = qwen_prompt_expander(
prompt, tar_lang="ch", image=image, seed=seed)
print("VL qwen result -> ch",
prompt, tar_lang="zh", image=image, seed=seed)
print("VL qwen result -> zh",
qwen_result.prompt) #, qwen_result.system_prompt)
qwen_result = qwen_prompt_expander(
prompt, tar_lang="en", image=image, seed=seed)
print("VL qwen result ->en",
qwen_result.prompt) # , qwen_result.system_prompt)
qwen_result = qwen_prompt_expander(
en_prompt, tar_lang="ch", image=image, seed=seed)
print("VL qwen vl en result -> ch",
en_prompt, tar_lang="zh", image=image, seed=seed)
print("VL qwen vl en result -> zh",
qwen_result.prompt) #, qwen_result.system_prompt)
qwen_result = qwen_prompt_expander(
en_prompt, tar_lang="en", image=image, seed=seed)