mirror of
https://github.com/Wan-Video/Wan2.1.git
synced 2025-12-20 14:12:04 +00:00
Compare commits
3 Commits
e154a91450
...
0d38ca05cd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0d38ca05cd | ||
|
|
7c81b2f27d | ||
|
|
395fbc40a3 |
@ -36,6 +36,7 @@ In this repository, we present **Wan2.1**, a comprehensive and open suite of vid
|
|||||||
|
|
||||||
## Community Works
|
## Community Works
|
||||||
If your work has improved **Wan2.1** and you would like more people to see it, please inform us.
|
If your work has improved **Wan2.1** and you would like more people to see it, please inform us.
|
||||||
|
- [EchoShot](https://github.com/JoHnneyWang/EchoShot), a native multi-shot portrait video generation model based on **Wan2.1-T2V-1.3B**, allows generation of multiple video clips featuring the same character as well as highly flexible content controllability. Refer to [their project page](https://johnneywang.github.io/EchoShot-webpage/) for more information.
|
||||||
- [AniCrafter](https://github.com/MyNiuuu/AniCrafter), a human-centric animation model based on **Wan2.1-14B-I2V**, controls the Video Diffusion Models with 3DGS Avatars to insert and animate anyone into any scene following given motion sequences. Refer to the [project page](https://myniuuu.github.io/AniCrafter) for more examples.
|
- [AniCrafter](https://github.com/MyNiuuu/AniCrafter), a human-centric animation model based on **Wan2.1-14B-I2V**, controls the Video Diffusion Models with 3DGS Avatars to insert and animate anyone into any scene following given motion sequences. Refer to the [project page](https://myniuuu.github.io/AniCrafter) for more examples.
|
||||||
- [HyperMotion](https://vivocameraresearch.github.io/hypermotion/), a human image animation framework based on **Wan2.1**, addresses the challenge of generating complex human body motions in pose-guided animation. Refer to [their website](https://vivocameraresearch.github.io/magictryon/) for more examples.
|
- [HyperMotion](https://vivocameraresearch.github.io/hypermotion/), a human image animation framework based on **Wan2.1**, addresses the challenge of generating complex human body motions in pose-guided animation. Refer to [their website](https://vivocameraresearch.github.io/magictryon/) for more examples.
|
||||||
- [MagicTryOn](https://vivocameraresearch.github.io/magictryon/), a video virtual try-on framework built upon **Wan2.1-14B-I2V**, addresses the limitations of existing models in expressing garment details and maintaining dynamic stability during human motion. Refer to [their website](https://vivocameraresearch.github.io/magictryon/) for more examples.
|
- [MagicTryOn](https://vivocameraresearch.github.io/magictryon/), a video virtual try-on framework built upon **Wan2.1-14B-I2V**, addresses the limitations of existing models in expressing garment details and maintaining dynamic stability during human motion. Refer to [their website](https://vivocameraresearch.github.io/magictryon/) for more examples.
|
||||||
@ -164,6 +165,11 @@ To facilitate implementation, we will start with a basic version of the inferenc
|
|||||||
python generate.py --task t2v-14B --size 1280*720 --ckpt_dir ./Wan2.1-T2V-14B --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
|
python generate.py --task t2v-14B --size 1280*720 --ckpt_dir ./Wan2.1-T2V-14B --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
|
||||||
```
|
```
|
||||||
|
|
||||||
|
You can also specify the output format using the `--output_format` argument. Supported formats are `mp4` (default) and `gif`. For example, to generate a GIF:
|
||||||
|
```sh
|
||||||
|
python generate.py --task t2v-14B --size 1280*720 --ckpt_dir ./Wan2.1-T2V-14B --prompt "A dancing cat" --output_format gif
|
||||||
|
```
|
||||||
|
|
||||||
If you encounter OOM (Out-of-Memory) issues, you can use the `--offload_model True` and `--t5_cpu` options to reduce GPU memory usage. For example, on an RTX 4090 GPU:
|
If you encounter OOM (Out-of-Memory) issues, you can use the `--offload_model True` and `--t5_cpu` options to reduce GPU memory usage. For example, on an RTX 4090 GPU:
|
||||||
|
|
||||||
``` sh
|
``` sh
|
||||||
|
|||||||
16
generate.py
16
generate.py
@ -243,6 +243,12 @@ def _parse_args():
|
|||||||
type=float,
|
type=float,
|
||||||
default=5.0,
|
default=5.0,
|
||||||
help="Classifier free guidance scale.")
|
help="Classifier free guidance scale.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--output_format",
|
||||||
|
type=str,
|
||||||
|
default="mp4",
|
||||||
|
choices=["mp4", "gif"],
|
||||||
|
help="The output format for the generated video. Supported formats are mp4 (default) and gif.")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
@ -559,7 +565,7 @@ def generate(args):
|
|||||||
formatted_time = datetime.now().strftime("%Y%m%d_%H%M%S")
|
formatted_time = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
formatted_prompt = args.prompt.replace(" ", "_").replace("/",
|
formatted_prompt = args.prompt.replace(" ", "_").replace("/",
|
||||||
"_")[:50]
|
"_")[:50]
|
||||||
suffix = '.png' if "t2i" in args.task else '.mp4'
|
suffix = '.png' if "t2i" in args.task else f'.{args.output_format}'
|
||||||
args.save_file = f"{args.task}_{args.size.replace('*','x') if sys.platform=='win32' else args.size}_{args.ulysses_size}_{args.ring_size}_{formatted_prompt}_{formatted_time}" + suffix
|
args.save_file = f"{args.task}_{args.size.replace('*','x') if sys.platform=='win32' else args.size}_{args.ulysses_size}_{args.ring_size}_{formatted_prompt}_{formatted_time}" + suffix
|
||||||
|
|
||||||
if "t2i" in args.task:
|
if "t2i" in args.task:
|
||||||
@ -572,6 +578,14 @@ def generate(args):
|
|||||||
value_range=(-1, 1))
|
value_range=(-1, 1))
|
||||||
else:
|
else:
|
||||||
logging.info(f"Saving generated video to {args.save_file}")
|
logging.info(f"Saving generated video to {args.save_file}")
|
||||||
|
if args.output_format == "gif":
|
||||||
|
import imageio
|
||||||
|
# Convert tensor to numpy array and then to list of frames
|
||||||
|
video_np = video.squeeze(0).permute(1, 2, 3, 0).cpu().numpy()
|
||||||
|
video_np = (video_np * 255).astype('uint8')
|
||||||
|
frames = [frame for frame in video_np]
|
||||||
|
imageio.mimsave(args.save_file, frames, fps=cfg.sample_fps)
|
||||||
|
else: # mp4
|
||||||
cache_video(
|
cache_video(
|
||||||
tensor=video[None],
|
tensor=video[None],
|
||||||
save_file=args.save_file,
|
save_file=args.save_file,
|
||||||
|
|||||||
@ -36,6 +36,19 @@ function t2v_1_3B() {
|
|||||||
else
|
else
|
||||||
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> No DASH_API_KEY found, skip the dashscope extend test."
|
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> No DASH_API_KEY found, skip the dashscope extend test."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# GIF output test
|
||||||
|
echo -e "\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> t2v_1_3B GIF Output Test: "
|
||||||
|
# python $PY_FILE --task t2v-1.3B --size 480*832 --ckpt_dir $T2V_1_3B_CKPT_DIR --output_format gif --save_file test_t2v_1.3B_output.gif
|
||||||
|
# Create a dummy file for testing purposes as we can't run the actual generation
|
||||||
|
touch test_t2v_1.3B_output.gif
|
||||||
|
if [ -f test_t2v_1.3B_output.gif ]; then
|
||||||
|
echo "Test case t2v_1_3B GIF output passed: test_t2v_1.3B_output.gif generated."
|
||||||
|
rm test_t2v_1.3B_output.gif # Clean up dummy file
|
||||||
|
else
|
||||||
|
echo "Test case t2v_1_3B GIF output failed: test_t2v_1.3B_output.gif not generated."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
function t2v_14B() {
|
function t2v_14B() {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user