lucky day

This commit is contained in:
deepbeepmeep 2025-08-12 00:52:22 +02:00
parent 58c1549962
commit 175e05fc1e
7 changed files with 564 additions and 356 deletions

View File

@ -20,6 +20,20 @@ WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models
**Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep **Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep
## 🔥 Latest Updates : ## 🔥 Latest Updates :
### August 11 2025: WanGP v7.77 - Lucky Day
This is your lucky day ! thanks to new configuration options that will let you store generated Videos and Images in lossless compressed formats, you will find they in fact they look two times better without doing anything !
Just kidding, they will be only marginally better, but at least this opens the way to professionnal editing.
Support:
- Video: x264, x264 lossless, x265
- Images: jpeg, png, webp, wbp lossless
Generation Settings are stored in each of the above regardless of the format (that was the hard part).
Also you can now choose different output directories for images and videos.
### August 10 2025: WanGP v7.76 - Faster than the VAE ... ### August 10 2025: WanGP v7.76 - Faster than the VAE ...
We have a funny one here today: FastWan 2.2 5B, the Fastest Video Generator, only 20s to generate 121 frames at 720p. The snag is that VAE is twice as slow... We have a funny one here today: FastWan 2.2 5B, the Fastest Video Generator, only 20s to generate 121 frames at 720p. The snag is that VAE is twice as slow...
Thanks to Kijai for extracting the Lora that is used to build the corresponding finetune. Thanks to Kijai for extracting the Lora that is used to build the corresponding finetune.

View File

@ -131,7 +131,7 @@ from pathlib import Path
import torch import torch
def remux_with_audio(video_path: Path, output_path: Path, audio: torch.Tensor, sampling_rate: int): def remux_with_audio(video_path: Path, output_path: Path, audio: torch.Tensor, sampling_rate: int):
from shared.utils.utils import extract_audio_tracks, combine_video_with_audio_tracks, cleanup_temp_audio_files from shared.utils.audio_video import extract_audio_tracks, combine_video_with_audio_tracks, cleanup_temp_audio_files
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f: with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
temp_path = Path(f.name) temp_path = Path(f.name)

View File

@ -21,6 +21,7 @@ from segment_anything.modeling.image_encoder import window_partition, window_unp
from .utils.get_default_model import get_matanyone_model from .utils.get_default_model import get_matanyone_model
from .matanyone.inference.inference_core import InferenceCore from .matanyone.inference.inference_core import InferenceCore
from .matanyone_wrapper import matanyone from .matanyone_wrapper import matanyone
from shared.utils.audio_video import save_video, save_image
arg_device = "cuda" arg_device = "cuda"
arg_sam_model_type="vit_h" arg_sam_model_type="vit_h"
@ -377,14 +378,14 @@ def show_mask(video_state, interactive_state, mask_dropdown):
return select_frame return select_frame
def save_video(frames, output_path, fps): # def save_video(frames, output_path, fps):
writer = imageio.get_writer( output_path, fps=fps, codec='libx264', quality=8) # writer = imageio.get_writer( output_path, fps=fps, codec='libx264', quality=8)
for frame in frames: # for frame in frames:
writer.append_data(frame) # writer.append_data(frame)
writer.close() # writer.close()
return output_path # return output_path
def mask_to_xyxy_box(mask): def mask_to_xyxy_box(mask):
rows, cols = np.where(mask == 255) rows, cols = np.where(mask == 255)
@ -535,20 +536,20 @@ def video_matting(video_state,video_input, end_slider, matting_type, interactive
file_name= video_state["video_name"] file_name= video_state["video_name"]
file_name = ".".join(file_name.split(".")[:-1]) file_name = ".".join(file_name.split(".")[:-1])
from shared.utils.utils import extract_audio_tracks, combine_video_with_audio_tracks, cleanup_temp_audio_files from shared.utils.audio_video import extract_audio_tracks, combine_video_with_audio_tracks, cleanup_temp_audio_files
source_audio_tracks, audio_metadata = extract_audio_tracks(video_input) source_audio_tracks, audio_metadata = extract_audio_tracks(video_input)
output_fg_path = f"./mask_outputs/{file_name}_fg.mp4" output_fg_path = f"./mask_outputs/{file_name}_fg.mp4"
output_fg_temp_path = f"./mask_outputs/{file_name}_fg_tmp.mp4" output_fg_temp_path = f"./mask_outputs/{file_name}_fg_tmp.mp4"
if len(source_audio_tracks) == 0: if len(source_audio_tracks) == 0:
foreground_output = save_video(foreground, output_path=output_fg_path , fps=fps) foreground_output = save_video(foreground,output_fg_path , fps=fps, codec_type= video_output_codec)
else: else:
foreground_output_tmp = save_video(foreground, output_path=output_fg_temp_path , fps=fps) foreground_output_tmp = save_video(foreground, output_fg_temp_path , fps=fps, codec_type= video_output_codec)
combine_video_with_audio_tracks(output_fg_temp_path, source_audio_tracks, output_fg_path, audio_metadata=audio_metadata) combine_video_with_audio_tracks(output_fg_temp_path, source_audio_tracks, output_fg_path, audio_metadata=audio_metadata)
cleanup_temp_audio_files(source_audio_tracks) cleanup_temp_audio_files(source_audio_tracks)
os.remove(foreground_output_tmp) os.remove(foreground_output_tmp)
foreground_output = output_fg_path foreground_output = output_fg_path
alpha_output = save_video(alpha, output_path="./mask_outputs/{}_alpha.mp4".format(file_name), fps=fps) alpha_output = save_video(alpha, "./mask_outputs/{}_alpha.mp4".format(file_name), fps=fps, codec_type= video_output_codec)
return foreground_output, alpha_output, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True) return foreground_output, alpha_output, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
@ -745,8 +746,12 @@ def teleport_to_video_tab(tab_state):
return gr.Tabs(selected="video_gen") return gr.Tabs(selected="video_gen")
def display(tabs, tab_state, vace_video_input, vace_image_input, vace_video_mask, vace_image_mask, vace_image_refs): def display(tabs, tab_state, server_config, vace_video_input, vace_image_input, vace_video_mask, vace_image_mask, vace_image_refs):
# my_tab.select(fn=load_unload_models, inputs=[], outputs=[]) # my_tab.select(fn=load_unload_models, inputs=[], outputs=[])
global image_output_codec, video_output_codec
image_output_codec = server_config.get("image_output_codec", None)
video_output_codec = server_config.get("video_output_codec", None)
media_url = "https://github.com/pq-yang/MatAnyone/releases/download/media/" media_url = "https://github.com/pq-yang/MatAnyone/releases/download/media/"

View File

@ -46,6 +46,7 @@ soundfile
ffmpeg-python ffmpeg-python
pyannote.audio pyannote.audio
pynvml pynvml
piexif
#huggingface_hub[hf_xet] #slow down everything !!!! #huggingface_hub[hf_xet] #slow down everything !!!!
# num2words # num2words
# spacy # spacy

421
shared/utils/audio_video.py Normal file
View File

@ -0,0 +1,421 @@
import subprocess
import tempfile, os
import ffmpeg
import torchvision.transforms.functional as TF
import torch.nn.functional as F
import cv2
import tempfile
import imageio
import binascii
import torchvision
import torch
from PIL import Image
import os.path as osp
import json
def rand_name(length=8, suffix=''):
name = binascii.b2a_hex(os.urandom(length)).decode('utf-8')
if suffix:
if not suffix.startswith('.'):
suffix = '.' + suffix
name += suffix
return name
def extract_audio_tracks(source_video, verbose=False, query_only=False):
"""
Extract all audio tracks from a source video into temporary AAC files.
Returns:
Tuple:
- List of temp file paths for extracted audio tracks
- List of corresponding metadata dicts:
{'codec', 'sample_rate', 'channels', 'duration', 'language'}
where 'duration' is set to container duration (for consistency).
"""
probe = ffmpeg.probe(source_video)
audio_streams = [s for s in probe['streams'] if s['codec_type'] == 'audio']
container_duration = float(probe['format'].get('duration', 0.0))
if not audio_streams:
if query_only: return 0
if verbose: print(f"No audio track found in {source_video}")
return [], []
if query_only:
return len(audio_streams)
if verbose:
print(f"Found {len(audio_streams)} audio track(s), container duration = {container_duration:.3f}s")
file_paths = []
metadata = []
for i, stream in enumerate(audio_streams):
fd, temp_path = tempfile.mkstemp(suffix=f'_track{i}.aac', prefix='audio_')
os.close(fd)
file_paths.append(temp_path)
metadata.append({
'codec': stream.get('codec_name'),
'sample_rate': int(stream.get('sample_rate', 0)),
'channels': int(stream.get('channels', 0)),
'duration': container_duration,
'language': stream.get('tags', {}).get('language', None)
})
ffmpeg.input(source_video).output(
temp_path,
**{f'map': f'0:a:{i}', 'acodec': 'aac', 'b:a': '128k'}
).overwrite_output().run(quiet=not verbose)
return file_paths, metadata
def combine_and_concatenate_video_with_audio_tracks(
save_path_tmp, video_path,
source_audio_tracks, new_audio_tracks,
source_audio_duration, audio_sampling_rate,
new_audio_from_start=False,
source_audio_metadata=None,
audio_bitrate='128k',
audio_codec='aac',
verbose = False
):
inputs, filters, maps, idx = ['-i', video_path], [], ['-map', '0:v'], 1
metadata_args = []
sources = source_audio_tracks or []
news = new_audio_tracks or []
duplicate_source = len(sources) == 1 and len(news) > 1
N = len(news) if source_audio_duration == 0 else max(len(sources), len(news)) or 1
for i in range(N):
s = (sources[i] if i < len(sources)
else sources[0] if duplicate_source else None)
n = news[i] if len(news) == N else (news[0] if news else None)
if source_audio_duration == 0:
if n:
inputs += ['-i', n]
filters.append(f'[{idx}:a]apad=pad_dur=100[aout{i}]')
idx += 1
else:
filters.append(f'anullsrc=r={audio_sampling_rate}:cl=mono,apad=pad_dur=100[aout{i}]')
else:
if s:
inputs += ['-i', s]
meta = source_audio_metadata[i] if source_audio_metadata and i < len(source_audio_metadata) else {}
needs_filter = (
meta.get('codec') != audio_codec or
meta.get('sample_rate') != audio_sampling_rate or
meta.get('channels') != 1 or
meta.get('duration', 0) < source_audio_duration
)
if needs_filter:
filters.append(
f'[{idx}:a]aresample={audio_sampling_rate},aformat=channel_layouts=mono,'
f'apad=pad_dur={source_audio_duration},atrim=0:{source_audio_duration},asetpts=PTS-STARTPTS[s{i}]')
else:
filters.append(
f'[{idx}:a]apad=pad_dur={source_audio_duration},atrim=0:{source_audio_duration},asetpts=PTS-STARTPTS[s{i}]')
if lang := meta.get('language'):
metadata_args += ['-metadata:s:a:' + str(i), f'language={lang}']
idx += 1
else:
filters.append(
f'anullsrc=r={audio_sampling_rate}:cl=mono,atrim=0:{source_audio_duration},asetpts=PTS-STARTPTS[s{i}]')
if n:
inputs += ['-i', n]
start = '0' if new_audio_from_start else source_audio_duration
filters.append(
f'[{idx}:a]aresample={audio_sampling_rate},aformat=channel_layouts=mono,'
f'atrim=start={start},asetpts=PTS-STARTPTS[n{i}]')
filters.append(f'[s{i}][n{i}]concat=n=2:v=0:a=1[aout{i}]')
idx += 1
else:
filters.append(f'[s{i}]apad=pad_dur=100[aout{i}]')
maps += ['-map', f'[aout{i}]']
cmd = ['ffmpeg', '-y', *inputs,
'-filter_complex', ';'.join(filters), # ✅ Only change made
*maps, *metadata_args,
'-c:v', 'copy',
'-c:a', audio_codec,
'-b:a', audio_bitrate,
'-ar', str(audio_sampling_rate),
'-ac', '1',
'-shortest', save_path_tmp]
if verbose:
print(f"ffmpeg command: {cmd}")
try:
subprocess.run(cmd, check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
raise Exception(f"FFmpeg error: {e.stderr}")
def combine_video_with_audio_tracks(target_video, audio_tracks, output_video,
audio_metadata=None, verbose=False):
if not audio_tracks:
if verbose: print("No audio tracks to combine."); return False
dur = float(next(s for s in ffmpeg.probe(target_video)['streams']
if s['codec_type'] == 'video')['duration'])
if verbose: print(f"Video duration: {dur:.3f}s")
cmd = ['ffmpeg', '-y', '-i', target_video]
for path in audio_tracks:
cmd += ['-i', path]
cmd += ['-map', '0:v']
for i in range(len(audio_tracks)):
cmd += ['-map', f'{i+1}:a']
for i, meta in enumerate(audio_metadata or []):
if (lang := meta.get('language')):
cmd += ['-metadata:s:a:' + str(i), f'language={lang}']
cmd += ['-c:v', 'copy', '-c:a', 'copy', '-t', str(dur), output_video]
result = subprocess.run(cmd, capture_output=not verbose, text=True)
if result.returncode != 0:
raise Exception(f"FFmpeg error:\n{result.stderr}")
if verbose:
print(f"Created {output_video} with {len(audio_tracks)} audio track(s)")
return True
def cleanup_temp_audio_files(audio_tracks, verbose=False):
"""
Clean up temporary audio files.
Args:
audio_tracks: List of audio file paths to delete
verbose: Enable verbose output (default: False)
Returns:
Number of files successfully deleted
"""
deleted_count = 0
for audio_path in audio_tracks:
try:
if os.path.exists(audio_path):
os.unlink(audio_path)
deleted_count += 1
if verbose:
print(f"Cleaned up {audio_path}")
except PermissionError:
print(f"Warning: Could not delete {audio_path} (file may be in use)")
except Exception as e:
print(f"Warning: Error deleting {audio_path}: {e}")
if verbose and deleted_count > 0:
print(f"Successfully deleted {deleted_count} temporary audio file(s)")
return deleted_count
def save_video(tensor,
save_file=None,
fps=30,
codec_type='libx264_8',
container='mp4',
nrow=8,
normalize=True,
value_range=(-1, 1),
retry=5):
"""Save tensor as video with configurable codec and container options."""
suffix = f'.{container}'
cache_file = osp.join('/tmp', rand_name(suffix=suffix)) if save_file is None else save_file
if not cache_file.endswith(suffix):
cache_file = osp.splitext(cache_file)[0] + suffix
# Configure codec parameters
codec_params = _get_codec_params(codec_type, container)
# Process and save
error = None
for _ in range(retry):
try:
if torch.is_tensor(tensor):
# Preprocess tensor
tensor = tensor.clamp(min(value_range), max(value_range))
tensor = torch.stack([
torchvision.utils.make_grid(u, nrow=nrow, normalize=normalize, value_range=value_range)
for u in tensor.unbind(2)
], dim=1).permute(1, 2, 3, 0)
tensor = (tensor * 255).type(torch.uint8).cpu()
arrays = tensor.numpy()
else:
arrays = tensor
# Write video (silence ffmpeg logs)
writer = imageio.get_writer(cache_file, fps=fps, ffmpeg_log_level='error', **codec_params)
for frame in arrays:
writer.append_data(frame)
writer.close()
return cache_file
except Exception as e:
error = e
print(f"error saving {save_file}: {e}")
def _get_codec_params(codec_type, container):
"""Get codec parameters based on codec type and container."""
if codec_type == 'libx264_8':
return {'codec': 'libx264', 'quality': 8, 'pixelformat': 'yuv420p'}
elif codec_type == 'libx264_10':
return {'codec': 'libx264', 'quality': 10, 'pixelformat': 'yuv420p'}
elif codec_type == 'libx265_28':
return {'codec': 'libx265', 'pixelformat': 'yuv420p', 'output_params': ['-crf', '28', '-x265-params', 'log-level=none','-hide_banner', '-nostats']}
elif codec_type == 'libx265_8':
return {'codec': 'libx265', 'pixelformat': 'yuv420p', 'output_params': ['-crf', '8', '-x265-params', 'log-level=none','-hide_banner', '-nostats']}
elif codec_type == 'libx264_lossless':
if container == 'mkv':
return {'codec': 'ffv1', 'pixelformat': 'rgb24'}
else: # mp4
return {'codec': 'libx264', 'output_params': ['-crf', '0'], 'pixelformat': 'yuv444p'}
else: # libx264
return {'codec': 'libx264', 'pixelformat': 'yuv420p'}
def save_image(tensor,
save_file,
nrow=8,
normalize=True,
value_range=(-1, 1),
quality='jpeg_95', # 'jpeg_95', 'jpeg_85', 'jpeg_70', 'jpeg_50', 'webp_95', 'webp_85', 'webp_70', 'webp_50', 'png', 'webp_lossless'
retry=5):
"""Save tensor as image with configurable format and quality."""
# Get format and quality settings
format_info = _get_format_info(quality)
# Rename file extension to match requested format
save_file = osp.splitext(save_file)[0] + format_info['ext']
# Save image
error = None
for _ in range(retry):
try:
tensor = tensor.clamp(min(value_range), max(value_range))
if format_info['use_pil']:
# Use PIL for WebP and advanced options
grid = torchvision.utils.make_grid(tensor, nrow=nrow, normalize=normalize, value_range=value_range)
# Convert to PIL Image
grid = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()
img = Image.fromarray(grid)
img.save(save_file, **format_info['params'])
else:
# Use torchvision for JPEG and PNG
torchvision.utils.save_image(
tensor, save_file, nrow=nrow, normalize=normalize,
value_range=value_range, **format_info['params']
)
break
except Exception as e:
error = e
continue
else:
print(f'cache_image failed, error: {error}', flush=True)
return save_file
def _get_format_info(quality):
"""Get format extension and parameters."""
formats = {
# JPEG with torchvision (works)
'jpeg_95': {'ext': '.jpg', 'params': {'quality': 95}, 'use_pil': False},
'jpeg_85': {'ext': '.jpg', 'params': {'quality': 85}, 'use_pil': False},
'jpeg_70': {'ext': '.jpg', 'params': {'quality': 70}, 'use_pil': False},
'jpeg_50': {'ext': '.jpg', 'params': {'quality': 50}, 'use_pil': False},
# PNG with torchvision
'png': {'ext': '.png', 'params': {}, 'use_pil': False},
# WebP with PIL (for quality control)
'webp_95': {'ext': '.webp', 'params': {'quality': 95}, 'use_pil': True},
'webp_85': {'ext': '.webp', 'params': {'quality': 85}, 'use_pil': True},
'webp_70': {'ext': '.webp', 'params': {'quality': 70}, 'use_pil': True},
'webp_50': {'ext': '.webp', 'params': {'quality': 50}, 'use_pil': True},
'webp_lossless': {'ext': '.webp', 'params': {'lossless': True}, 'use_pil': True},
}
return formats.get(quality, formats['jpeg_95'])
from PIL import Image, PngImagePlugin
def _enc_uc(s):
try: return b"ASCII\0\0\0" + s.encode("ascii")
except UnicodeEncodeError: return b"UNICODE\0" + s.encode("utf-16le")
def _dec_uc(b):
if not isinstance(b, (bytes, bytearray)):
try: b = bytes(b)
except Exception: return None
if b.startswith(b"ASCII\0\0\0"): return b[8:].decode("ascii", "ignore")
if b.startswith(b"UNICODE\0"): return b[8:].decode("utf-16le", "ignore")
return b.decode("utf-8", "ignore")
def save_image_metadata(image_path, metadata_dict, **save_kwargs):
try:
j = json.dumps(metadata_dict, ensure_ascii=False)
ext = os.path.splitext(image_path)[1].lower()
with Image.open(image_path) as im:
if ext == ".png":
pi = PngImagePlugin.PngInfo(); pi.add_text("comment", j)
im.save(image_path, pnginfo=pi, **save_kwargs); return True
if ext in (".jpg", ".jpeg"):
im.save(image_path, comment=j.encode("utf-8"), **save_kwargs); return True
if ext == ".webp":
import piexif
exif = {"0th":{}, "Exif":{piexif.ExifIFD.UserComment:_enc_uc(j)}, "GPS":{}, "1st":{}, "thumbnail":None}
im.save(image_path, format="WEBP", exif=piexif.dump(exif), **save_kwargs); return True
raise ValueError("Unsupported format")
except Exception as e:
print(f"Error saving metadata: {e}"); return False
def read_image_metadata(image_path):
try:
ext = os.path.splitext(image_path)[1].lower()
with Image.open(image_path) as im:
if ext == ".png":
val = (getattr(im, "text", {}) or {}).get("comment") or im.info.get("comment")
return json.loads(val) if val else None
if ext in (".jpg", ".jpeg"):
val = im.info.get("comment")
if isinstance(val, (bytes, bytearray)): val = val.decode("utf-8", "ignore")
if val:
try: return json.loads(val)
except Exception: pass
exif = getattr(im, "getexif", lambda: None)()
if exif:
uc = exif.get(37510) # UserComment
s = _dec_uc(uc) if uc else None
if s:
try: return json.loads(s)
except Exception: pass
return None
if ext == ".webp":
exif_bytes = Image.open(image_path).info.get("exif")
if not exif_bytes: return None
import piexif
uc = piexif.load(exif_bytes).get("Exif", {}).get(piexif.ExifIFD.UserComment)
s = _dec_uc(uc) if uc else None
return json.loads(s) if s else None
return None
except Exception as e:
print(f"Error reading metadata: {e}"); return None

View File

@ -1,6 +1,5 @@
# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved. # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
import argparse import argparse
import binascii
import os import os
import os.path as osp import os.path as osp
import torchvision.transforms.functional as TF import torchvision.transforms.functional as TF
@ -10,7 +9,6 @@ import tempfile
import imageio import imageio
import torch import torch
import decord import decord
import torchvision
from PIL import Image from PIL import Image
import numpy as np import numpy as np
from rembg import remove, new_session from rembg import remove, new_session
@ -21,8 +19,6 @@ import tempfile
import subprocess import subprocess
import json import json
__all__ = ['cache_video', 'cache_image', 'str2bool']
from PIL import Image from PIL import Image
@ -218,84 +214,6 @@ def resize_and_remove_background(img_list, budget_width, budget_height, rm_backg
return output_list return output_list
def rand_name(length=8, suffix=''):
name = binascii.b2a_hex(os.urandom(length)).decode('utf-8')
if suffix:
if not suffix.startswith('.'):
suffix = '.' + suffix
name += suffix
return name
def cache_video(tensor,
save_file=None,
fps=30,
suffix='.mp4',
nrow=8,
normalize=True,
value_range=(-1, 1),
retry=5):
# cache file
cache_file = osp.join('/tmp', rand_name(
suffix=suffix)) if save_file is None else save_file
# save to cache
error = None
for _ in range(retry):
try:
# preprocess
tensor = tensor.clamp(min(value_range), max(value_range))
tensor = torch.stack([
torchvision.utils.make_grid(
u, nrow=nrow, normalize=normalize, value_range=value_range)
for u in tensor.unbind(2)
],
dim=1).permute(1, 2, 3, 0)
tensor = (tensor * 255).type(torch.uint8).cpu()
# write video
writer = imageio.get_writer(
cache_file, fps=fps, codec='libx264', quality=8)
for frame in tensor.numpy():
writer.append_data(frame)
writer.close()
return cache_file
except Exception as e:
error = e
continue
else:
print(f'cache_video failed, error: {error}', flush=True)
return None
def cache_image(tensor,
save_file,
nrow=8,
normalize=True,
value_range=(-1, 1),
retry=5):
# cache file
suffix = osp.splitext(save_file)[1]
if suffix.lower() not in [
'.jpg', '.jpeg', '.png', '.tiff', '.gif', '.webp'
]:
suffix = '.png'
# save to cache
error = None
for _ in range(retry):
try:
tensor = tensor.clamp(min(value_range), max(value_range))
torchvision.utils.save_image(
tensor,
save_file,
nrow=nrow,
normalize=normalize,
value_range=value_range)
return save_file
except Exception as e:
error = e
continue
def str2bool(v): def str2bool(v):
@ -435,212 +353,3 @@ def create_progress_hook(filename):
return hook return hook
import tempfile, os
import ffmpeg
def extract_audio_tracks(source_video, verbose=False, query_only=False):
"""
Extract all audio tracks from a source video into temporary AAC files.
Returns:
Tuple:
- List of temp file paths for extracted audio tracks
- List of corresponding metadata dicts:
{'codec', 'sample_rate', 'channels', 'duration', 'language'}
where 'duration' is set to container duration (for consistency).
"""
probe = ffmpeg.probe(source_video)
audio_streams = [s for s in probe['streams'] if s['codec_type'] == 'audio']
container_duration = float(probe['format'].get('duration', 0.0))
if not audio_streams:
if query_only: return 0
if verbose: print(f"No audio track found in {source_video}")
return [], []
if query_only:
return len(audio_streams)
if verbose:
print(f"Found {len(audio_streams)} audio track(s), container duration = {container_duration:.3f}s")
file_paths = []
metadata = []
for i, stream in enumerate(audio_streams):
fd, temp_path = tempfile.mkstemp(suffix=f'_track{i}.aac', prefix='audio_')
os.close(fd)
file_paths.append(temp_path)
metadata.append({
'codec': stream.get('codec_name'),
'sample_rate': int(stream.get('sample_rate', 0)),
'channels': int(stream.get('channels', 0)),
'duration': container_duration,
'language': stream.get('tags', {}).get('language', None)
})
ffmpeg.input(source_video).output(
temp_path,
**{f'map': f'0:a:{i}', 'acodec': 'aac', 'b:a': '128k'}
).overwrite_output().run(quiet=not verbose)
return file_paths, metadata
import subprocess
import subprocess
def combine_and_concatenate_video_with_audio_tracks(
save_path_tmp, video_path,
source_audio_tracks, new_audio_tracks,
source_audio_duration, audio_sampling_rate,
new_audio_from_start=False,
source_audio_metadata=None,
audio_bitrate='128k',
audio_codec='aac',
verbose = False
):
inputs, filters, maps, idx = ['-i', video_path], [], ['-map', '0:v'], 1
metadata_args = []
sources = source_audio_tracks or []
news = new_audio_tracks or []
duplicate_source = len(sources) == 1 and len(news) > 1
N = len(news) if source_audio_duration == 0 else max(len(sources), len(news)) or 1
for i in range(N):
s = (sources[i] if i < len(sources)
else sources[0] if duplicate_source else None)
n = news[i] if len(news) == N else (news[0] if news else None)
if source_audio_duration == 0:
if n:
inputs += ['-i', n]
filters.append(f'[{idx}:a]apad=pad_dur=100[aout{i}]')
idx += 1
else:
filters.append(f'anullsrc=r={audio_sampling_rate}:cl=mono,apad=pad_dur=100[aout{i}]')
else:
if s:
inputs += ['-i', s]
meta = source_audio_metadata[i] if source_audio_metadata and i < len(source_audio_metadata) else {}
needs_filter = (
meta.get('codec') != audio_codec or
meta.get('sample_rate') != audio_sampling_rate or
meta.get('channels') != 1 or
meta.get('duration', 0) < source_audio_duration
)
if needs_filter:
filters.append(
f'[{idx}:a]aresample={audio_sampling_rate},aformat=channel_layouts=mono,'
f'apad=pad_dur={source_audio_duration},atrim=0:{source_audio_duration},asetpts=PTS-STARTPTS[s{i}]')
else:
filters.append(
f'[{idx}:a]apad=pad_dur={source_audio_duration},atrim=0:{source_audio_duration},asetpts=PTS-STARTPTS[s{i}]')
if lang := meta.get('language'):
metadata_args += ['-metadata:s:a:' + str(i), f'language={lang}']
idx += 1
else:
filters.append(
f'anullsrc=r={audio_sampling_rate}:cl=mono,atrim=0:{source_audio_duration},asetpts=PTS-STARTPTS[s{i}]')
if n:
inputs += ['-i', n]
start = '0' if new_audio_from_start else source_audio_duration
filters.append(
f'[{idx}:a]aresample={audio_sampling_rate},aformat=channel_layouts=mono,'
f'atrim=start={start},asetpts=PTS-STARTPTS[n{i}]')
filters.append(f'[s{i}][n{i}]concat=n=2:v=0:a=1[aout{i}]')
idx += 1
else:
filters.append(f'[s{i}]apad=pad_dur=100[aout{i}]')
maps += ['-map', f'[aout{i}]']
cmd = ['ffmpeg', '-y', *inputs,
'-filter_complex', ';'.join(filters), # ✅ Only change made
*maps, *metadata_args,
'-c:v', 'copy',
'-c:a', audio_codec,
'-b:a', audio_bitrate,
'-ar', str(audio_sampling_rate),
'-ac', '1',
'-shortest', save_path_tmp]
if verbose:
print(f"ffmpeg command: {cmd}")
try:
subprocess.run(cmd, check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
raise Exception(f"FFmpeg error: {e.stderr}")
import ffmpeg
import subprocess
import ffmpeg
def combine_video_with_audio_tracks(target_video, audio_tracks, output_video,
audio_metadata=None, verbose=False):
if not audio_tracks:
if verbose: print("No audio tracks to combine."); return False
dur = float(next(s for s in ffmpeg.probe(target_video)['streams']
if s['codec_type'] == 'video')['duration'])
if verbose: print(f"Video duration: {dur:.3f}s")
cmd = ['ffmpeg', '-y', '-i', target_video]
for path in audio_tracks:
cmd += ['-i', path]
cmd += ['-map', '0:v']
for i in range(len(audio_tracks)):
cmd += ['-map', f'{i+1}:a']
for i, meta in enumerate(audio_metadata or []):
if (lang := meta.get('language')):
cmd += ['-metadata:s:a:' + str(i), f'language={lang}']
cmd += ['-c:v', 'copy', '-c:a', 'copy', '-t', str(dur), output_video]
result = subprocess.run(cmd, capture_output=not verbose, text=True)
if result.returncode != 0:
raise Exception(f"FFmpeg error:\n{result.stderr}")
if verbose:
print(f"Created {output_video} with {len(audio_tracks)} audio track(s)")
return True
def cleanup_temp_audio_files(audio_tracks, verbose=False):
"""
Clean up temporary audio files.
Args:
audio_tracks: List of audio file paths to delete
verbose: Enable verbose output (default: False)
Returns:
Number of files successfully deleted
"""
deleted_count = 0
for audio_path in audio_tracks:
try:
if os.path.exists(audio_path):
os.unlink(audio_path)
deleted_count += 1
if verbose:
print(f"Cleaned up {audio_path}")
except PermissionError:
print(f"Warning: Could not delete {audio_path} (file may be in use)")
except Exception as e:
print(f"Warning: Error deleting {audio_path}: {e}")
if verbose and deleted_count > 0:
print(f"Successfully deleted {deleted_count} temporary audio file(s)")
return deleted_count

162
wgp.py
View File

@ -13,11 +13,13 @@ from datetime import datetime
import gradio as gr import gradio as gr
import random import random
import json import json
import numpy as np
import importlib import importlib
from shared.utils import notification_sound from shared.utils import notification_sound
from shared.utils.loras_mutipliers import preparse_loras_multipliers, parse_loras_multipliers from shared.utils.loras_mutipliers import preparse_loras_multipliers, parse_loras_multipliers
from shared.utils.utils import cache_video, convert_tensor_to_image, save_image, get_video_info, get_file_creation_date, convert_image_to_video from shared.utils.utils import convert_tensor_to_image, save_image, get_video_info, get_file_creation_date, convert_image_to_video, calculate_new_dimensions
from shared.utils.utils import extract_audio_tracks, combine_video_with_audio_tracks, combine_and_concatenate_video_with_audio_tracks, cleanup_temp_audio_files, calculate_new_dimensions from shared.utils.audio_video import extract_audio_tracks, combine_video_with_audio_tracks, combine_and_concatenate_video_with_audio_tracks, cleanup_temp_audio_files, save_video, save_image
from shared.utils.audio_video import save_image_metadata, read_image_metadata
from shared.match_archi import match_nvidia_architecture from shared.match_archi import match_nvidia_architecture
from shared.attention import get_attention_modes, get_supported_attention_modes from shared.attention import get_attention_modes, get_supported_attention_modes
from huggingface_hub import hf_hub_download, snapshot_download from huggingface_hub import hf_hub_download, snapshot_download
@ -53,7 +55,7 @@ AUTOSAVE_FILENAME = "queue.zip"
PROMPT_VARS_MAX = 10 PROMPT_VARS_MAX = 10
target_mmgp_version = "3.5.8" target_mmgp_version = "3.5.8"
WanGP_version = "7.76" WanGP_version = "7.77"
settings_version = 2.23 settings_version = 2.23
max_source_video_frames = 3000 max_source_video_frames = 3000
prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None
@ -1712,7 +1714,8 @@ if not Path(server_config_filename).is_file():
"transformer_types": [], "transformer_types": [],
"transformer_quantization": "int8", "transformer_quantization": "int8",
"text_encoder_quantization" : "int8", "text_encoder_quantization" : "int8",
"save_path": "outputs", #os.path.join(os.getcwd(), "save_path": "outputs",
"image_save_path": "outputs",
"compile" : "", "compile" : "",
"metadata_type": "metadata", "metadata_type": "metadata",
"boost" : 1, "boost" : 1,
@ -2186,7 +2189,11 @@ if len(args.vae_config) > 0:
vae_config = int(args.vae_config) vae_config = int(args.vae_config)
reload_needed = False reload_needed = False
save_path = server_config.get("save_path", os.path.join(os.getcwd(), "gradio_outputs")) save_path = server_config.get("save_path", os.path.join(os.getcwd(), "outputs"))
image_save_path = server_config.get("image_save_path", os.path.join(os.getcwd(), "outputs"))
if not "video_output_codec" in server_config: server_config["video_output_codec"]= "libx264_8"
if not "image_output_codec" in server_config: server_config["image_output_codec"]= "jpeg_95"
preload_model_policy = server_config.get("preload_model_policy", []) preload_model_policy = server_config.get("preload_model_policy", [])
@ -2699,6 +2706,7 @@ def apply_changes( state,
VAE_precision_choice, VAE_precision_choice,
mixed_precision_choice, mixed_precision_choice,
save_path_choice, save_path_choice,
image_save_path_choice,
attention_choice, attention_choice,
compile_choice, compile_choice,
profile_choice, profile_choice,
@ -2718,6 +2726,9 @@ def apply_changes( state,
notification_sound_volume_choice = 50, notification_sound_volume_choice = 50,
max_frames_multiplier_choice = 1, max_frames_multiplier_choice = 1,
display_stats_choice = 0, display_stats_choice = 0,
video_output_codec_choice = None,
image_output_codec_choice = None,
audio_output_codec_choice = None,
last_resolution_choice = None, last_resolution_choice = None,
): ):
if args.lock_config: if args.lock_config:
@ -2730,6 +2741,7 @@ def apply_changes( state,
"transformer_types": transformer_types_choices, "transformer_types": transformer_types_choices,
"text_encoder_quantization" : text_encoder_quantization_choice, "text_encoder_quantization" : text_encoder_quantization_choice,
"save_path" : save_path_choice, "save_path" : save_path_choice,
"image_save_path" : image_save_path_choice,
"compile" : compile_choice, "compile" : compile_choice,
"profile" : profile_choice, "profile" : profile_choice,
"vae_config" : vae_config_choice, "vae_config" : vae_config_choice,
@ -2751,6 +2763,9 @@ def apply_changes( state,
"notification_sound_volume" : notification_sound_volume_choice, "notification_sound_volume" : notification_sound_volume_choice,
"max_frames_multiplier" : max_frames_multiplier_choice, "max_frames_multiplier" : max_frames_multiplier_choice,
"display_stats" : display_stats_choice, "display_stats" : display_stats_choice,
"video_output_codec" : video_output_codec_choice,
"image_output_codec" : image_output_codec_choice,
"audio_output_codec" : audio_output_codec_choice,
"last_model_type" : state["model_type"], "last_model_type" : state["model_type"],
"last_model_per_family": state["last_model_per_family"], "last_model_per_family": state["last_model_per_family"],
"last_advanced_choice": state["advanced"], "last_advanced_choice": state["advanced"],
@ -2784,6 +2799,7 @@ def apply_changes( state,
vae_config = server_config["vae_config"] vae_config = server_config["vae_config"]
boost = server_config["boost"] boost = server_config["boost"]
save_path = server_config["save_path"] save_path = server_config["save_path"]
image_save_path = server_config["image_save_path"]
preload_model_policy = server_config["preload_model_policy"] preload_model_policy = server_config["preload_model_policy"]
transformer_quantization = server_config["transformer_quantization"] transformer_quantization = server_config["transformer_quantization"]
transformer_dtype_policy = server_config["transformer_dtype_policy"] transformer_dtype_policy = server_config["transformer_dtype_policy"]
@ -2791,7 +2807,9 @@ def apply_changes( state,
transformer_types = server_config["transformer_types"] transformer_types = server_config["transformer_types"]
model_filename = get_model_filename(transformer_type, transformer_quantization, transformer_dtype_policy) model_filename = get_model_filename(transformer_type, transformer_quantization, transformer_dtype_policy)
state["model_filename"] = model_filename state["model_filename"] = model_filename
if all(change in ["attention_mode", "vae_config", "boost", "save_path", "metadata_type", "clear_file_list", "fit_canvas", "depth_anything_v2_variant", "notification_sound_enabled", "notification_sound_volume", "mmaudio_enabled", "max_frames_multiplier", "display_stats"] for change in changes ): if all(change in ["attention_mode", "vae_config", "boost", "save_path", "metadata_type", "clear_file_list", "fit_canvas", "depth_anything_v2_variant",
"notification_sound_enabled", "notification_sound_volume", "mmaudio_enabled", "max_frames_multiplier", "display_stats",
"video_output_codec", "image_output_codec", "audio_output_codec"] for change in changes ):
model_family = gr.Dropdown() model_family = gr.Dropdown()
model_choice = gr.Dropdown() model_choice = gr.Dropdown()
else: else:
@ -2802,18 +2820,6 @@ def apply_changes( state,
mmaudio_enabled = server_config["mmaudio_enabled"] > 0 mmaudio_enabled = server_config["mmaudio_enabled"] > 0
return "<DIV ALIGN=CENTER>The new configuration has been succesfully applied</DIV>", header, model_family, model_choice, gr.Row(visible= server_config["enhancer_enabled"] == 1), gr.Row(visible= mmaudio_enabled), gr.Column(visible= mmaudio_enabled) return "<DIV ALIGN=CENTER>The new configuration has been succesfully applied</DIV>", header, model_family, model_choice, gr.Row(visible= server_config["enhancer_enabled"] == 1), gr.Row(visible= mmaudio_enabled), gr.Column(visible= mmaudio_enabled)
from moviepy.editor import ImageSequenceClip
import numpy as np
def save_video(final_frames, output_path, fps=24):
assert final_frames.ndim == 4 and final_frames.shape[3] == 3, f"invalid shape: {final_frames} (need t h w c)"
if final_frames.dtype != np.uint8:
final_frames = (final_frames * 255).astype(np.uint8)
ImageSequenceClip(list(final_frames), fps=fps).write_videofile(output_path, verbose= False)
def get_gen_info(state): def get_gen_info(state):
cache = state.get("gen", None) cache = state.get("gen", None)
if cache == None: if cache == None:
@ -3754,7 +3760,7 @@ def edit_video(
any_change = False any_change = False
if sample != None: if sample != None:
video_path =get_available_filename(save_path, video_source, "_tmp") if any_mmaudio or has_already_audio else get_available_filename(save_path, video_source, "_post") video_path =get_available_filename(save_path, video_source, "_tmp") if any_mmaudio or has_already_audio else get_available_filename(save_path, video_source, "_post")
cache_video( tensor=sample[None], save_file=video_path, fps=output_fps, nrow=1, normalize=True, value_range=(-1, 1)) save_video( tensor=sample[None], save_file=video_path, fps=output_fps, nrow=1, normalize=True, value_range=(-1, 1), codec_type= server_config.get("video_output_codec", None))
if any_mmaudio or has_already_audio: tmp_path = video_path if any_mmaudio or has_already_audio: tmp_path = video_path
any_change = True any_change = True
@ -3970,7 +3976,7 @@ def generate_video(
process_map_video_guide = { "P": "pose", "D" : "depth", "S": "scribble", "E": "canny", "L": "flow", "C": "gray", "M": "inpaint", "U": "identity"} process_map_video_guide = { "P": "pose", "D" : "depth", "S": "scribble", "E": "canny", "L": "flow", "C": "gray", "M": "inpaint", "U": "identity"}
processes_names = { "pose": "Open Pose", "depth": "Depth Mask", "scribble" : "Shapes", "flow" : "Flow Map", "gray" : "Gray Levels", "inpaint" : "Inpaint Mask", "identity": "Identity Mask", "raw" : "Raw Format", "canny" : "Canny Edges"} processes_names = { "pose": "Open Pose", "depth": "Depth Mask", "scribble" : "Shapes", "flow" : "Flow Map", "gray" : "Gray Levels", "inpaint" : "Inpaint Mask", "identity": "Identity Mask", "raw" : "Raw Format", "canny" : "Canny Edges"}
global wan_model, offloadobj, reload_needed, save_path global wan_model, offloadobj, reload_needed
gen = get_gen_info(state) gen = get_gen_info(state)
torch.set_grad_enabled(False) torch.set_grad_enabled(False)
if mode.startswith("edit_"): if mode.startswith("edit_"):
@ -4238,6 +4244,7 @@ def generate_video(
torch.set_grad_enabled(False) torch.set_grad_enabled(False)
os.makedirs(save_path, exist_ok=True) os.makedirs(save_path, exist_ok=True)
os.makedirs(image_save_path, exist_ok=True)
gc.collect() gc.collect()
torch.cuda.empty_cache() torch.cuda.empty_cache()
wan_model._interrupt = False wan_model._interrupt = False
@ -4709,17 +4716,18 @@ def generate_video(
any_mmaudio = MMAudio_setting != 0 and server_config.get("mmaudio_enabled", 0) != 0 and sample.shape[1] >=fps any_mmaudio = MMAudio_setting != 0 and server_config.get("mmaudio_enabled", 0) != 0 and sample.shape[1] >=fps
if is_image: if is_image:
sample = sample.permute(1,2,3,0) #c f h w -> f h w c image_path = os.path.join(image_save_path, file_name)
new_video_path = [] sample = sample.transpose(1,0) #c f h w -> f c h w
new_image_path = []
for no, img in enumerate(sample): for no, img in enumerate(sample):
img = Image.fromarray((127.5 * (img + 1.0)).cpu().byte().numpy()) img_path = os.path.splitext(image_path)[0] + ("" if no==0 else f"_{no}") + ".jpg"
img_path = os.path.splitext(video_path)[0] + ("" if no==0 else f"_{no}") + ".jpg" new_image_path.append(save_image(img, save_file = img_path, quality = server_config.get("image_output_codec", None)))
new_video_path.append(img_path)
img.save(img_path) video_path= new_image_path
video_path= new_video_path
elif len(control_audio_tracks) > 0 or len(source_audio_tracks) > 0 or output_new_audio_filepath is not None or any_mmaudio or output_new_audio_data is not None or audio_source is not None: elif len(control_audio_tracks) > 0 or len(source_audio_tracks) > 0 or output_new_audio_filepath is not None or any_mmaudio or output_new_audio_data is not None or audio_source is not None:
video_path = os.path.join(save_path, file_name)
save_path_tmp = video_path[:-4] + "_tmp.mp4" save_path_tmp = video_path[:-4] + "_tmp.mp4"
cache_video( tensor=sample[None], save_file=save_path_tmp, fps=output_fps, nrow=1, normalize=True, value_range=(-1, 1)) save_video( tensor=sample[None], save_file=save_path_tmp, fps=output_fps, nrow=1, normalize=True, value_range=(-1, 1), codec_type = server_config.get("video_output_codec", None))
output_new_audio_temp_filepath = None output_new_audio_temp_filepath = None
new_audio_from_start = reset_control_aligment new_audio_from_start = reset_control_aligment
source_audio_duration = source_video_frames_count / fps source_audio_duration = source_video_frames_count / fps
@ -4746,7 +4754,7 @@ def generate_video(
if output_new_audio_temp_filepath is not None: os.remove(output_new_audio_temp_filepath) if output_new_audio_temp_filepath is not None: os.remove(output_new_audio_temp_filepath)
else: else:
cache_video( tensor=sample[None], save_file=video_path, fps=output_fps, nrow=1, normalize=True, value_range=(-1, 1)) save_video( tensor=sample[None], save_file=video_path, fps=output_fps, nrow=1, normalize=True, value_range=(-1, 1), codec_type= server_config.get("video_output_codec", None))
end_time = time.time() end_time = time.time()
@ -4756,6 +4764,11 @@ def generate_video(
inputs.pop("mode") inputs.pop("mode")
inputs["model_type"] = model_type inputs["model_type"] = model_type
inputs["model_filename"] = original_filename inputs["model_filename"] = original_filename
if is_image:
inputs["image_quality"] = server_config.get("image_output_codec", None)
else:
inputs["video_quality"] = server_config.get("video_output_codec", None)
modules = get_model_recursive_prop(model_type, "modules", return_list= True) modules = get_model_recursive_prop(model_type, "modules", return_list= True)
if len(modules) > 0 : inputs["modules"] = modules if len(modules) > 0 : inputs["modules"] = modules
if len(transformer_loras_filenames) > 0: if len(transformer_loras_filenames) > 0:
@ -4778,8 +4791,7 @@ def generate_video(
json.dump(configs, f, indent=4) json.dump(configs, f, indent=4)
elif metadata_choice == "metadata": elif metadata_choice == "metadata":
if is_image: if is_image:
with Image.open(path) as img: save_image_metadata(path, configs)
img.save(path, comment=json.dumps(configs))
else: else:
from mutagen.mp4 import MP4 from mutagen.mp4 import MP4
file = MP4(path) file = MP4(path)
@ -5690,7 +5702,7 @@ def has_video_file_extension(filename):
def has_image_file_extension(filename): def has_image_file_extension(filename):
extension = os.path.splitext(filename)[-1] extension = os.path.splitext(filename)[-1]
return extension in [".jpeg", ".jpg", ".png", ".bmp", ".tiff"] return extension in [".jpeg", ".jpg", ".png", ".webp", ".bmp", ".tiff"]
def add_videos_to_gallery(state, input_file_list, choice, files_to_load): def add_videos_to_gallery(state, input_file_list, choice, files_to_load):
gen = get_gen_info(state) gen = get_gen_info(state)
@ -5795,7 +5807,7 @@ def use_video_settings(state, input_file_list, choice):
def get_settings_from_file(state, file_path, allow_json, merge_with_defaults, switch_type_if_compatible): def get_settings_from_file(state, file_path, allow_json, merge_with_defaults, switch_type_if_compatible):
configs = None configs = None
tags = None any_image_or_video = False
if file_path.endswith(".json") and allow_json: if file_path.endswith(".json") and allow_json:
try: try:
with open(file_path, 'r', encoding='utf-8') as f: with open(file_path, 'r', encoding='utf-8') as f:
@ -5807,22 +5819,22 @@ def get_settings_from_file(state, file_path, allow_json, merge_with_defaults, sw
try: try:
file = MP4(file_path) file = MP4(file_path)
tags = file.tags['©cmt'][0] tags = file.tags['©cmt'][0]
configs = json.loads(tags)
any_image_or_video = True
except: except:
pass pass
elif has_image_file_extension(file_path): elif has_image_file_extension(file_path):
try: try:
with Image.open(file_path) as img: configs = read_image_metadata(file_path)
tags = img.info["comment"] any_image_or_video = True
except: except:
pass pass
if tags is not None: if configs is None: return None, False
try: try:
configs = json.loads(tags) if not "WanGP" in configs.get("type", ""): configs = None
if not "WanGP" in configs.get("type", ""): configs = None except:
except: configs = None
configs = None
if configs == None:
return None, False
current_model_filename = state["model_filename"] current_model_filename = state["model_filename"]
current_model_type = state["model_type"] current_model_type = state["model_type"]
@ -5848,7 +5860,7 @@ def get_settings_from_file(state, file_path, allow_json, merge_with_defaults, sw
configs = defaults configs = defaults
configs["model_type"] = model_type configs["model_type"] = model_type
return configs, tags != None return configs, any_image_or_video
def record_image_mode_tab(state, evt:gr.SelectData): def record_image_mode_tab(state, evt:gr.SelectData):
state["image_mode_tab"] = 0 if evt.index ==0 else 1 state["image_mode_tab"] = 0 if evt.index ==0 else 1
@ -7849,10 +7861,6 @@ def generate_configuration_tab(state, blocks, header, model_family, model_choice
label="User Interface Theme. You will need to restart the App the see new Theme." label="User Interface Theme. You will need to restart the App the see new Theme."
) )
save_path_choice = gr.Textbox(
label="Output Folder for Generated Videos (need to restart app to be taken into account)",
value=server_config.get("save_path", save_path)
)
with gr.Tab("Performance"): with gr.Tab("Performance"):
@ -7976,6 +7984,53 @@ def generate_configuration_tab(state, blocks, header, model_family, model_choice
label="MMAudio (if enabled, 10 GB of extra models will be downloaded)" label="MMAudio (if enabled, 10 GB of extra models will be downloaded)"
) )
with gr.Tab("Outputs"):
video_output_codec_choice = gr.Dropdown(
choices=[
("x265 Balanced Quality (CRF 28)", 'libx265_28'),
("x264 Balanced Quality (Level 8)", 'libx264_8'),
("x265 High Quality (CRF 8)", 'libx265_8'),
("x264 High Quality (Level 10)", 'libx264_10'),
("x264 Lossless", 'libx264_lossless'),
],
value=server_config.get("video_output_codec", "libx264_8"),
label="Video Codec to use"
)
image_output_codec_choice = gr.Dropdown(
choices=[
("JPEG Quality 85", 'jpeg_85'),
("WEBP Quality 85", 'webp_85'),
("JPEG Quality 95", 'jpeg_95'),
("WEBP Quality 95", 'webp_95'),
("WEBP Lossless", 'webp_lossless'),
("PNG Lossless", 'png'),
],
value=server_config.get("image_output_codec", "jpeg_95"),
label="Image Codec to use"
)
audio_output_codec_choice = gr.Dropdown(
choices=[
("AAC 128 kbit", 'aac_128'),
],
value=server_config.get("audio_output_codec", "aac_128"),
visible = False,
label="Audio Codec to use"
)
video_save_path_choice = gr.Textbox(
label="Output Folder for Generated Videos (need to restart app to be taken into account)",
value=server_config.get("save_path", save_path)
)
image_save_path_choice = gr.Textbox(
label="Output Folder for Generated Images (need to restart app to be taken into account)",
value=server_config.get("image_save_path", image_save_path)
)
with gr.Tab("Notifications"): with gr.Tab("Notifications"):
gr.Markdown("### Notification Settings") gr.Markdown("### Notification Settings")
notification_sound_enabled_choice = gr.Dropdown( notification_sound_enabled_choice = gr.Dropdown(
@ -8008,7 +8063,8 @@ def generate_configuration_tab(state, blocks, header, model_family, model_choice
text_encoder_quantization_choice, text_encoder_quantization_choice,
VAE_precision_choice, VAE_precision_choice,
mixed_precision_choice, mixed_precision_choice,
save_path_choice, video_save_path_choice,
image_save_path_choice,
attention_choice, attention_choice,
compile_choice, compile_choice,
profile_choice, profile_choice,
@ -8028,6 +8084,9 @@ def generate_configuration_tab(state, blocks, header, model_family, model_choice
notification_sound_volume_choice, notification_sound_volume_choice,
max_frames_multiplier_choice, max_frames_multiplier_choice,
display_stats_choice, display_stats_choice,
video_output_codec_choice,
image_output_codec_choice,
audio_output_codec_choice,
resolution, resolution,
], ],
outputs= [msg , header, model_family, model_choice, prompt_enhancer_row, mmaudio_tab, PP_MMAudio_col] outputs= [msg , header, model_family, model_choice, prompt_enhancer_row, mmaudio_tab, PP_MMAudio_col]
@ -8626,7 +8685,7 @@ def create_ui():
with gr.Tab("Guides", id="info") as info_tab: with gr.Tab("Guides", id="info") as info_tab:
generate_info_tab() generate_info_tab()
with gr.Tab("Video Mask Creator", id="video_mask_creator") as video_mask_creator: with gr.Tab("Video Mask Creator", id="video_mask_creator") as video_mask_creator:
matanyone_app.display(main_tabs, tab_state, video_guide, image_guide, video_mask, image_mask, image_refs) matanyone_app.display(main_tabs, tab_state, server_config, video_guide, image_guide, video_mask, image_mask, image_refs)
if not args.lock_config: if not args.lock_config:
with gr.Tab("Downloads", id="downloads") as downloads_tab: with gr.Tab("Downloads", id="downloads") as downloads_tab:
generate_download_tab(lset_name, loras_choices, state) generate_download_tab(lset_name, loras_choices, state)
@ -8662,5 +8721,4 @@ if __name__ == "__main__":
else: else:
url = "http://" + server_name url = "http://" + server_name
webbrowser.open(url + ":" + str(server_port), new = 0, autoraise = True) webbrowser.open(url + ":" + str(server_port), new = 0, autoraise = True)
demo.launch(favicon_path="favicon.png", server_name=server_name, server_port=server_port, share=args.share, allowed_paths=[save_path]) demo.launch(favicon_path="favicon.png", server_name=server_name, server_port=server_port, share=args.share, allowed_paths=[save_path] + [] if save_path == image_save_path else [image_save_path] )