lucky day

2025-11-04 14:16:57 +00:00 · 2025-08-12 00:52:22 +02:00 · 2025-08-12 00:52:22 +02:00 · 175e05fc1e
commit 175e05fc1e
parent 58c1549962
7 changed files with 564 additions and 356 deletions
--- a/README.md
+++ b/README.md
@ -20,6 +20,20 @@ WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models
 **Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep
 ## 🔥 Latest Updates : 
 ### August 11 2025: WanGP v7.77 - Lucky Day
 This is your lucky day ! thanks to new configuration options that will let you store generated Videos and Images in lossless compressed formats, you will find they in fact they look two times better without doing anything !
 Just kidding, they will be only marginally better, but at least this opens the way to professionnal editing.
 Support:
 - Video: x264, x264 lossless, x265
 - Images: jpeg, png, webp, wbp lossless
 Generation Settings are stored in each of the above regardless of the format (that was the hard part).
 Also you can now choose different output directories for images and videos.
 ### August 10 2025: WanGP v7.76 - Faster than the VAE ...
 We have a funny one here today: FastWan 2.2 5B, the Fastest Video Generator, only 20s to generate 121 frames at 720p. The snag is that VAE is twice as slow... 
 Thanks to Kijai for extracting the Lora that is used to build the corresponding finetune.
--- a/postprocessing/mmaudio/data/av_utils.py
+++ b/postprocessing/mmaudio/data/av_utils.py
@ -131,7 +131,7 @@ from pathlib import Path
 import torch
 def remux_with_audio(video_path: Path, output_path: Path, audio: torch.Tensor, sampling_rate: int):
-    from shared.utils.utils import extract_audio_tracks, combine_video_with_audio_tracks, cleanup_temp_audio_files
+    from shared.utils.audio_video import extract_audio_tracks, combine_video_with_audio_tracks, cleanup_temp_audio_files
    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
        temp_path = Path(f.name)
--- a/preprocessing/matanyone/app.py
+++ b/preprocessing/matanyone/app.py
@ -21,6 +21,7 @@ from segment_anything.modeling.image_encoder import window_partition, window_unp
 from .utils.get_default_model import get_matanyone_model
 from .matanyone.inference.inference_core import InferenceCore
 from .matanyone_wrapper import matanyone
 from shared.utils.audio_video import save_video, save_image
 arg_device = "cuda"
 arg_sam_model_type="vit_h"
@ -377,14 +378,14 @@ def show_mask(video_state, interactive_state, mask_dropdown):
        return select_frame
-def save_video(frames, output_path, fps):
+# def save_video(frames, output_path, fps):
-    writer = imageio.get_writer( output_path, fps=fps, codec='libx264', quality=8)
+#     writer = imageio.get_writer( output_path, fps=fps, codec='libx264', quality=8)
-    for frame in frames:
+#     for frame in frames:
-        writer.append_data(frame)
+#         writer.append_data(frame)
-    writer.close()
+#     writer.close()
-    return output_path
+#     return output_path
 def mask_to_xyxy_box(mask):
    rows, cols = np.where(mask == 255)
@ -535,20 +536,20 @@ def video_matting(video_state,video_input, end_slider, matting_type, interactive
    file_name= video_state["video_name"]
    file_name = ".".join(file_name.split(".")[:-1]) 
-    from shared.utils.utils import extract_audio_tracks, combine_video_with_audio_tracks, cleanup_temp_audio_files    
+    from shared.utils.audio_video import extract_audio_tracks, combine_video_with_audio_tracks, cleanup_temp_audio_files    
    source_audio_tracks, audio_metadata  = extract_audio_tracks(video_input)
    output_fg_path =  f"./mask_outputs/{file_name}_fg.mp4"
    output_fg_temp_path =  f"./mask_outputs/{file_name}_fg_tmp.mp4"
    if len(source_audio_tracks) == 0:
-        foreground_output = save_video(foreground, output_path=output_fg_path , fps=fps)
+        foreground_output = save_video(foreground,output_fg_path , fps=fps, codec_type= video_output_codec)
    else:
-        foreground_output_tmp = save_video(foreground, output_path=output_fg_temp_path , fps=fps)
+        foreground_output_tmp = save_video(foreground, output_fg_temp_path , fps=fps,  codec_type= video_output_codec)
        combine_video_with_audio_tracks(output_fg_temp_path, source_audio_tracks, output_fg_path, audio_metadata=audio_metadata)
        cleanup_temp_audio_files(source_audio_tracks)
        os.remove(foreground_output_tmp)
        foreground_output = output_fg_path
-    alpha_output = save_video(alpha, output_path="./mask_outputs/{}_alpha.mp4".format(file_name), fps=fps)
+    alpha_output = save_video(alpha, "./mask_outputs/{}_alpha.mp4".format(file_name), fps=fps, codec_type= video_output_codec)
    return foreground_output, alpha_output, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
@ -745,8 +746,12 @@ def teleport_to_video_tab(tab_state):
    return gr.Tabs(selected="video_gen")
-def display(tabs, tab_state, vace_video_input, vace_image_input, vace_video_mask, vace_image_mask, vace_image_refs):
+def display(tabs, tab_state, server_config,  vace_video_input, vace_image_input, vace_video_mask, vace_image_mask, vace_image_refs):
    # my_tab.select(fn=load_unload_models, inputs=[], outputs=[])
    global image_output_codec, video_output_codec
    image_output_codec = server_config.get("image_output_codec", None)
    video_output_codec = server_config.get("video_output_codec", None)
    media_url = "https://github.com/pq-yang/MatAnyone/releases/download/media/"
--- a/requirements.txt
+++ b/requirements.txt
@ -46,6 +46,7 @@ soundfile
 ffmpeg-python
 pyannote.audio
 pynvml
 piexif
 #huggingface_hub[hf_xet]  #slow down everything !!!!
 # num2words
 # spacy
--- a/shared/utils/audio_video.py
+++ b/shared/utils/audio_video.py
@ -0,0 +1,421 @@
 import subprocess
 import tempfile, os
 import ffmpeg
 import torchvision.transforms.functional as TF
 import torch.nn.functional as F
 import cv2
 import tempfile
 import imageio
 import binascii
 import torchvision
 import torch
 from PIL import Image
 import os.path as osp
 import json
 def rand_name(length=8, suffix=''):
    name = binascii.b2a_hex(os.urandom(length)).decode('utf-8')
    if suffix:
        if not suffix.startswith('.'):
            suffix = '.' + suffix
        name += suffix
    return name
 def extract_audio_tracks(source_video, verbose=False, query_only=False):
    """
    Extract all audio tracks from a source video into temporary AAC files.
    Returns:
        Tuple:
          - List of temp file paths for extracted audio tracks
          - List of corresponding metadata dicts:
              {'codec', 'sample_rate', 'channels', 'duration', 'language'}
              where 'duration' is set to container duration (for consistency).
    """
    probe = ffmpeg.probe(source_video)
    audio_streams = [s for s in probe['streams'] if s['codec_type'] == 'audio']
    container_duration = float(probe['format'].get('duration', 0.0))
    if not audio_streams:
        if query_only: return 0
        if verbose: print(f"No audio track found in {source_video}")
        return [], []
    if query_only:
        return len(audio_streams)
    if verbose:
        print(f"Found {len(audio_streams)} audio track(s), container duration = {container_duration:.3f}s")
    file_paths = []
    metadata = []
    for i, stream in enumerate(audio_streams):
        fd, temp_path = tempfile.mkstemp(suffix=f'_track{i}.aac', prefix='audio_')
        os.close(fd)
        file_paths.append(temp_path)
        metadata.append({
            'codec': stream.get('codec_name'),
            'sample_rate': int(stream.get('sample_rate', 0)),
            'channels': int(stream.get('channels', 0)),
            'duration': container_duration,
            'language': stream.get('tags', {}).get('language', None)
        })
        ffmpeg.input(source_video).output(
            temp_path,
            **{f'map': f'0:a:{i}', 'acodec': 'aac', 'b:a': '128k'}
        ).overwrite_output().run(quiet=not verbose)
    return file_paths, metadata
 def combine_and_concatenate_video_with_audio_tracks(
    save_path_tmp, video_path,
    source_audio_tracks, new_audio_tracks,
    source_audio_duration, audio_sampling_rate,
    new_audio_from_start=False,
    source_audio_metadata=None,
    audio_bitrate='128k',
    audio_codec='aac',
    verbose = False
 ):
    inputs, filters, maps, idx = ['-i', video_path], [], ['-map', '0:v'], 1
    metadata_args = []
    sources = source_audio_tracks or []
    news = new_audio_tracks or []
    duplicate_source = len(sources) == 1 and len(news) > 1
    N = len(news) if source_audio_duration == 0 else max(len(sources), len(news)) or 1
    for i in range(N):
        s = (sources[i] if i < len(sources)
             else sources[0] if duplicate_source else None)
        n = news[i] if len(news) == N else (news[0] if news else None)
        if source_audio_duration == 0:
            if n:
                inputs += ['-i', n]
                filters.append(f'[{idx}:a]apad=pad_dur=100[aout{i}]')
                idx += 1
            else:
                filters.append(f'anullsrc=r={audio_sampling_rate}:cl=mono,apad=pad_dur=100[aout{i}]')
        else:
            if s:
                inputs += ['-i', s]
                meta = source_audio_metadata[i] if source_audio_metadata and i < len(source_audio_metadata) else {}
                needs_filter = (
                    meta.get('codec') != audio_codec or
                    meta.get('sample_rate') != audio_sampling_rate or
                    meta.get('channels') != 1 or
                    meta.get('duration', 0) < source_audio_duration
                )
                if needs_filter:
                    filters.append(
                        f'[{idx}:a]aresample={audio_sampling_rate},aformat=channel_layouts=mono,'
                        f'apad=pad_dur={source_audio_duration},atrim=0:{source_audio_duration},asetpts=PTS-STARTPTS[s{i}]')
                else:
                    filters.append(
                        f'[{idx}:a]apad=pad_dur={source_audio_duration},atrim=0:{source_audio_duration},asetpts=PTS-STARTPTS[s{i}]')
                if lang := meta.get('language'):
                    metadata_args += ['-metadata:s:a:' + str(i), f'language={lang}']
                idx += 1
            else:
                filters.append(
                    f'anullsrc=r={audio_sampling_rate}:cl=mono,atrim=0:{source_audio_duration},asetpts=PTS-STARTPTS[s{i}]')
            if n:
                inputs += ['-i', n]
                start = '0' if new_audio_from_start else source_audio_duration
                filters.append(
                    f'[{idx}:a]aresample={audio_sampling_rate},aformat=channel_layouts=mono,'
                    f'atrim=start={start},asetpts=PTS-STARTPTS[n{i}]')
                filters.append(f'[s{i}][n{i}]concat=n=2:v=0:a=1[aout{i}]')
                idx += 1
            else:
                filters.append(f'[s{i}]apad=pad_dur=100[aout{i}]')
        maps += ['-map', f'[aout{i}]']
    cmd = ['ffmpeg', '-y', *inputs,
           '-filter_complex', ';'.join(filters),  # ✅ Only change made
           *maps, *metadata_args,
           '-c:v', 'copy',
           '-c:a', audio_codec,
           '-b:a', audio_bitrate,
           '-ar', str(audio_sampling_rate),
           '-ac', '1',
           '-shortest', save_path_tmp]
    if verbose:
        print(f"ffmpeg command: {cmd}")
    try:
        subprocess.run(cmd, check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        raise Exception(f"FFmpeg error: {e.stderr}")
 def combine_video_with_audio_tracks(target_video, audio_tracks, output_video,
                                     audio_metadata=None, verbose=False):
    if not audio_tracks:
        if verbose: print("No audio tracks to combine."); return False
    dur = float(next(s for s in ffmpeg.probe(target_video)['streams']
                     if s['codec_type'] == 'video')['duration'])
    if verbose: print(f"Video duration: {dur:.3f}s")
    cmd = ['ffmpeg', '-y', '-i', target_video]
    for path in audio_tracks:
        cmd += ['-i', path]
    cmd += ['-map', '0:v']
    for i in range(len(audio_tracks)):
        cmd += ['-map', f'{i+1}:a']
    for i, meta in enumerate(audio_metadata or []):
        if (lang := meta.get('language')):
            cmd += ['-metadata:s:a:' + str(i), f'language={lang}']
    cmd += ['-c:v', 'copy', '-c:a', 'copy', '-t', str(dur), output_video]
    result = subprocess.run(cmd, capture_output=not verbose, text=True)
    if result.returncode != 0:
        raise Exception(f"FFmpeg error:\n{result.stderr}")
    if verbose:
        print(f"Created {output_video} with {len(audio_tracks)} audio track(s)")
    return True
 def cleanup_temp_audio_files(audio_tracks, verbose=False):
    """
    Clean up temporary audio files.
    Args:
        audio_tracks: List of audio file paths to delete
        verbose: Enable verbose output (default: False)
    Returns:
        Number of files successfully deleted
    """
    deleted_count = 0
    for audio_path in audio_tracks:
        try:
            if os.path.exists(audio_path):
                os.unlink(audio_path)
                deleted_count += 1
                if verbose:
                    print(f"Cleaned up {audio_path}")
        except PermissionError:
            print(f"Warning: Could not delete {audio_path} (file may be in use)")
        except Exception as e:
            print(f"Warning: Error deleting {audio_path}: {e}")
    if verbose and deleted_count > 0:
        print(f"Successfully deleted {deleted_count} temporary audio file(s)")
    return deleted_count
 def save_video(tensor,
                save_file=None,
                fps=30,
                codec_type='libx264_8',
                container='mp4',
                nrow=8,
                normalize=True,
                value_range=(-1, 1),
                retry=5):
    """Save tensor as video with configurable codec and container options."""
    suffix = f'.{container}'
    cache_file = osp.join('/tmp', rand_name(suffix=suffix)) if save_file is None else save_file
    if not cache_file.endswith(suffix):
        cache_file = osp.splitext(cache_file)[0] + suffix
    # Configure codec parameters
    codec_params = _get_codec_params(codec_type, container)
    # Process and save
    error = None
    for _ in range(retry):
        try:
            if torch.is_tensor(tensor):
                # Preprocess tensor
                tensor = tensor.clamp(min(value_range), max(value_range))
                tensor = torch.stack([
                    torchvision.utils.make_grid(u, nrow=nrow, normalize=normalize, value_range=value_range)
                    for u in tensor.unbind(2)
                ], dim=1).permute(1, 2, 3, 0)
                tensor = (tensor * 255).type(torch.uint8).cpu()
                arrays = tensor.numpy()
            else:
                arrays = tensor
            # Write video (silence ffmpeg logs)
            writer = imageio.get_writer(cache_file, fps=fps, ffmpeg_log_level='error', **codec_params)
            for frame in arrays:
                writer.append_data(frame)
            writer.close()
            return cache_file
        except Exception as e:
            error = e
            print(f"error saving {save_file}: {e}")
 def _get_codec_params(codec_type, container):
    """Get codec parameters based on codec type and container."""
    if codec_type == 'libx264_8':
        return {'codec': 'libx264', 'quality': 8, 'pixelformat': 'yuv420p'}
    elif codec_type == 'libx264_10':
        return {'codec': 'libx264', 'quality': 10, 'pixelformat': 'yuv420p'}
    elif codec_type == 'libx265_28':
        return {'codec': 'libx265', 'pixelformat': 'yuv420p', 'output_params': ['-crf', '28', '-x265-params', 'log-level=none','-hide_banner', '-nostats']}
    elif codec_type == 'libx265_8':
        return {'codec': 'libx265', 'pixelformat': 'yuv420p', 'output_params': ['-crf', '8', '-x265-params', 'log-level=none','-hide_banner', '-nostats']}
    elif codec_type == 'libx264_lossless':
        if container == 'mkv':
            return {'codec': 'ffv1', 'pixelformat': 'rgb24'}
        else:  # mp4
            return {'codec': 'libx264', 'output_params': ['-crf', '0'], 'pixelformat': 'yuv444p'}
    else:  # libx264
        return {'codec': 'libx264', 'pixelformat': 'yuv420p'}
 def save_image(tensor,
                save_file,
                nrow=8,
                normalize=True,
                value_range=(-1, 1),
                quality='jpeg_95',  # 'jpeg_95', 'jpeg_85', 'jpeg_70', 'jpeg_50', 'webp_95', 'webp_85', 'webp_70', 'webp_50', 'png', 'webp_lossless'
                retry=5):
    """Save tensor as image with configurable format and quality."""
    # Get format and quality settings
    format_info = _get_format_info(quality)
    # Rename file extension to match requested format
    save_file = osp.splitext(save_file)[0] + format_info['ext']
    # Save image
    error = None
    for _ in range(retry):
        try:
            tensor = tensor.clamp(min(value_range), max(value_range))
            if format_info['use_pil']:
                # Use PIL for WebP and advanced options
                grid = torchvision.utils.make_grid(tensor, nrow=nrow, normalize=normalize, value_range=value_range)
                # Convert to PIL Image
                grid = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()
                img = Image.fromarray(grid)
                img.save(save_file, **format_info['params'])
            else:
                # Use torchvision for JPEG and PNG
                torchvision.utils.save_image(
                    tensor, save_file, nrow=nrow, normalize=normalize, 
                    value_range=value_range, **format_info['params']
                )
            break
        except Exception as e:
            error = e
            continue
    else:
        print(f'cache_image failed, error: {error}', flush=True)
    return save_file
 def _get_format_info(quality):
    """Get format extension and parameters."""
    formats = {
        # JPEG with torchvision (works)
        'jpeg_95': {'ext': '.jpg', 'params': {'quality': 95}, 'use_pil': False},
        'jpeg_85': {'ext': '.jpg', 'params': {'quality': 85}, 'use_pil': False},
        'jpeg_70': {'ext': '.jpg', 'params': {'quality': 70}, 'use_pil': False},
        'jpeg_50': {'ext': '.jpg', 'params': {'quality': 50}, 'use_pil': False},
        # PNG with torchvision
        'png': {'ext': '.png', 'params': {}, 'use_pil': False},
        # WebP with PIL (for quality control)
        'webp_95': {'ext': '.webp', 'params': {'quality': 95}, 'use_pil': True},
        'webp_85': {'ext': '.webp', 'params': {'quality': 85}, 'use_pil': True},
        'webp_70': {'ext': '.webp', 'params': {'quality': 70}, 'use_pil': True},
        'webp_50': {'ext': '.webp', 'params': {'quality': 50}, 'use_pil': True},
        'webp_lossless': {'ext': '.webp', 'params': {'lossless': True}, 'use_pil': True},
    }
    return formats.get(quality, formats['jpeg_95'])
 from PIL import Image, PngImagePlugin
 def _enc_uc(s):
    try: return b"ASCII\0\0\0" + s.encode("ascii")
    except UnicodeEncodeError: return b"UNICODE\0" + s.encode("utf-16le")
 def _dec_uc(b):
    if not isinstance(b, (bytes, bytearray)):
        try: b = bytes(b)
        except Exception: return None
    if b.startswith(b"ASCII\0\0\0"): return b[8:].decode("ascii", "ignore")
    if b.startswith(b"UNICODE\0"):   return b[8:].decode("utf-16le", "ignore")
    return b.decode("utf-8", "ignore")
 def save_image_metadata(image_path, metadata_dict, **save_kwargs):
    try:
        j = json.dumps(metadata_dict, ensure_ascii=False)
        ext = os.path.splitext(image_path)[1].lower()
        with Image.open(image_path) as im:
            if ext == ".png":
                pi = PngImagePlugin.PngInfo(); pi.add_text("comment", j)
                im.save(image_path, pnginfo=pi, **save_kwargs); return True
            if ext in (".jpg", ".jpeg"):
                im.save(image_path, comment=j.encode("utf-8"), **save_kwargs); return True
            if ext == ".webp":
                import piexif
                exif = {"0th":{}, "Exif":{piexif.ExifIFD.UserComment:_enc_uc(j)}, "GPS":{}, "1st":{}, "thumbnail":None}
                im.save(image_path, format="WEBP", exif=piexif.dump(exif), **save_kwargs); return True
            raise ValueError("Unsupported format")
    except Exception as e:
        print(f"Error saving metadata: {e}"); return False
 def read_image_metadata(image_path):
    try:
        ext = os.path.splitext(image_path)[1].lower()
        with Image.open(image_path) as im:
            if ext == ".png":
                val = (getattr(im, "text", {}) or {}).get("comment") or im.info.get("comment")
                return json.loads(val) if val else None
            if ext in (".jpg", ".jpeg"):
                val = im.info.get("comment")
                if isinstance(val, (bytes, bytearray)): val = val.decode("utf-8", "ignore")
                if val:
                    try: return json.loads(val)
                    except Exception: pass
                exif = getattr(im, "getexif", lambda: None)()
                if exif:
                    uc = exif.get(37510)  # UserComment
                    s = _dec_uc(uc) if uc else None
                    if s:
                        try: return json.loads(s)
                        except Exception: pass
                return None
            if ext == ".webp":
                exif_bytes = Image.open(image_path).info.get("exif")
                if not exif_bytes: return None
                import piexif
                uc = piexif.load(exif_bytes).get("Exif", {}).get(piexif.ExifIFD.UserComment)
                s = _dec_uc(uc) if uc else None
                return json.loads(s) if s else None
            return None
    except Exception as e:
        print(f"Error reading metadata: {e}"); return None
--- a/shared/utils/utils.py
+++ b/shared/utils/utils.py
@ -1,6 +1,5 @@
 # Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
 import argparse
 import binascii
 import os
 import os.path as osp
 import torchvision.transforms.functional as TF
@ -10,7 +9,6 @@ import tempfile
 import imageio
 import torch
 import decord
 import torchvision
 from PIL import Image
 import numpy as np
 from rembg import remove, new_session
@ -21,8 +19,6 @@ import tempfile
 import subprocess
 import json
 __all__ = ['cache_video', 'cache_image', 'str2bool']
 from PIL import Image
@ -218,84 +214,6 @@ def resize_and_remove_background(img_list, budget_width, budget_height, rm_backg
    return output_list
 def rand_name(length=8, suffix=''):
    name = binascii.b2a_hex(os.urandom(length)).decode('utf-8')
    if suffix:
        if not suffix.startswith('.'):
            suffix = '.' + suffix
        name += suffix
    return name
 def cache_video(tensor,
                save_file=None,
                fps=30,
                suffix='.mp4',
                nrow=8,
                normalize=True,
                value_range=(-1, 1),
                retry=5):
    # cache file
    cache_file = osp.join('/tmp', rand_name(
        suffix=suffix)) if save_file is None else save_file
    # save to cache
    error = None
    for _ in range(retry):
        try:
            # preprocess
            tensor = tensor.clamp(min(value_range), max(value_range))
            tensor = torch.stack([
                torchvision.utils.make_grid(
                    u, nrow=nrow, normalize=normalize, value_range=value_range)
                for u in tensor.unbind(2)
            ],
                                 dim=1).permute(1, 2, 3, 0)
            tensor = (tensor * 255).type(torch.uint8).cpu()
            # write video
            writer = imageio.get_writer(
                cache_file, fps=fps, codec='libx264', quality=8)
            for frame in tensor.numpy():
                writer.append_data(frame)
            writer.close()
            return cache_file
        except Exception as e:
            error = e
            continue
    else:
        print(f'cache_video failed, error: {error}', flush=True)
        return None
 def cache_image(tensor,
                save_file,
                nrow=8,
                normalize=True,
                value_range=(-1, 1),
                retry=5):
    # cache file
    suffix = osp.splitext(save_file)[1]
    if suffix.lower() not in [
            '.jpg', '.jpeg', '.png', '.tiff', '.gif', '.webp'
    ]:
        suffix = '.png'
    # save to cache
    error = None
    for _ in range(retry):
        try:
            tensor = tensor.clamp(min(value_range), max(value_range))
            torchvision.utils.save_image(
                tensor,
                save_file,
                nrow=nrow,
                normalize=normalize,
                value_range=value_range)
            return save_file
        except Exception as e:
            error = e
            continue
 def str2bool(v):
@ -435,212 +353,3 @@ def create_progress_hook(filename):
    return hook
 import tempfile, os
 import ffmpeg
 def extract_audio_tracks(source_video, verbose=False, query_only=False):
    """
    Extract all audio tracks from a source video into temporary AAC files.
    Returns:
        Tuple:
          - List of temp file paths for extracted audio tracks
          - List of corresponding metadata dicts:
              {'codec', 'sample_rate', 'channels', 'duration', 'language'}
              where 'duration' is set to container duration (for consistency).
    """
    probe = ffmpeg.probe(source_video)
    audio_streams = [s for s in probe['streams'] if s['codec_type'] == 'audio']
    container_duration = float(probe['format'].get('duration', 0.0))
    if not audio_streams:
        if query_only: return 0
        if verbose: print(f"No audio track found in {source_video}")
        return [], []
    if query_only:
        return len(audio_streams)
    if verbose:
        print(f"Found {len(audio_streams)} audio track(s), container duration = {container_duration:.3f}s")
    file_paths = []
    metadata = []
    for i, stream in enumerate(audio_streams):
        fd, temp_path = tempfile.mkstemp(suffix=f'_track{i}.aac', prefix='audio_')
        os.close(fd)
        file_paths.append(temp_path)
        metadata.append({
            'codec': stream.get('codec_name'),
            'sample_rate': int(stream.get('sample_rate', 0)),
            'channels': int(stream.get('channels', 0)),
            'duration': container_duration,
            'language': stream.get('tags', {}).get('language', None)
        })
        ffmpeg.input(source_video).output(
            temp_path,
            **{f'map': f'0:a:{i}', 'acodec': 'aac', 'b:a': '128k'}
        ).overwrite_output().run(quiet=not verbose)
    return file_paths, metadata
 import subprocess
 import subprocess
 def combine_and_concatenate_video_with_audio_tracks(
    save_path_tmp, video_path,
    source_audio_tracks, new_audio_tracks,
    source_audio_duration, audio_sampling_rate,
    new_audio_from_start=False,
    source_audio_metadata=None,
    audio_bitrate='128k',
    audio_codec='aac',
    verbose = False
 ):
    inputs, filters, maps, idx = ['-i', video_path], [], ['-map', '0:v'], 1
    metadata_args = []
    sources = source_audio_tracks or []
    news = new_audio_tracks or []
    duplicate_source = len(sources) == 1 and len(news) > 1
    N = len(news) if source_audio_duration == 0 else max(len(sources), len(news)) or 1
    for i in range(N):
        s = (sources[i] if i < len(sources)
             else sources[0] if duplicate_source else None)
        n = news[i] if len(news) == N else (news[0] if news else None)
        if source_audio_duration == 0:
            if n:
                inputs += ['-i', n]
                filters.append(f'[{idx}:a]apad=pad_dur=100[aout{i}]')
                idx += 1
            else:
                filters.append(f'anullsrc=r={audio_sampling_rate}:cl=mono,apad=pad_dur=100[aout{i}]')
        else:
            if s:
                inputs += ['-i', s]
                meta = source_audio_metadata[i] if source_audio_metadata and i < len(source_audio_metadata) else {}
                needs_filter = (
                    meta.get('codec') != audio_codec or
                    meta.get('sample_rate') != audio_sampling_rate or
                    meta.get('channels') != 1 or
                    meta.get('duration', 0) < source_audio_duration
                )
                if needs_filter:
                    filters.append(
                        f'[{idx}:a]aresample={audio_sampling_rate},aformat=channel_layouts=mono,'
                        f'apad=pad_dur={source_audio_duration},atrim=0:{source_audio_duration},asetpts=PTS-STARTPTS[s{i}]')
                else:
                    filters.append(
                        f'[{idx}:a]apad=pad_dur={source_audio_duration},atrim=0:{source_audio_duration},asetpts=PTS-STARTPTS[s{i}]')
                if lang := meta.get('language'):
                    metadata_args += ['-metadata:s:a:' + str(i), f'language={lang}']
                idx += 1
            else:
                filters.append(
                    f'anullsrc=r={audio_sampling_rate}:cl=mono,atrim=0:{source_audio_duration},asetpts=PTS-STARTPTS[s{i}]')
            if n:
                inputs += ['-i', n]
                start = '0' if new_audio_from_start else source_audio_duration
                filters.append(
                    f'[{idx}:a]aresample={audio_sampling_rate},aformat=channel_layouts=mono,'
                    f'atrim=start={start},asetpts=PTS-STARTPTS[n{i}]')
                filters.append(f'[s{i}][n{i}]concat=n=2:v=0:a=1[aout{i}]')
                idx += 1
            else:
                filters.append(f'[s{i}]apad=pad_dur=100[aout{i}]')
        maps += ['-map', f'[aout{i}]']
    cmd = ['ffmpeg', '-y', *inputs,
           '-filter_complex', ';'.join(filters),  # ✅ Only change made
           *maps, *metadata_args,
           '-c:v', 'copy',
           '-c:a', audio_codec,
           '-b:a', audio_bitrate,
           '-ar', str(audio_sampling_rate),
           '-ac', '1',
           '-shortest', save_path_tmp]
    if verbose:
        print(f"ffmpeg command: {cmd}")
    try:
        subprocess.run(cmd, check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        raise Exception(f"FFmpeg error: {e.stderr}")
 import ffmpeg
 import subprocess
 import ffmpeg
 def combine_video_with_audio_tracks(target_video, audio_tracks, output_video,
                                     audio_metadata=None, verbose=False):
    if not audio_tracks:
        if verbose: print("No audio tracks to combine."); return False
    dur = float(next(s for s in ffmpeg.probe(target_video)['streams']
                     if s['codec_type'] == 'video')['duration'])
    if verbose: print(f"Video duration: {dur:.3f}s")
    cmd = ['ffmpeg', '-y', '-i', target_video]
    for path in audio_tracks:
        cmd += ['-i', path]
    cmd += ['-map', '0:v']
    for i in range(len(audio_tracks)):
        cmd += ['-map', f'{i+1}:a']
    for i, meta in enumerate(audio_metadata or []):
        if (lang := meta.get('language')):
            cmd += ['-metadata:s:a:' + str(i), f'language={lang}']
    cmd += ['-c:v', 'copy', '-c:a', 'copy', '-t', str(dur), output_video]
    result = subprocess.run(cmd, capture_output=not verbose, text=True)
    if result.returncode != 0:
        raise Exception(f"FFmpeg error:\n{result.stderr}")
    if verbose:
        print(f"Created {output_video} with {len(audio_tracks)} audio track(s)")
    return True
 def cleanup_temp_audio_files(audio_tracks, verbose=False):
    """
    Clean up temporary audio files.
    Args:
        audio_tracks: List of audio file paths to delete
        verbose: Enable verbose output (default: False)
    Returns:
        Number of files successfully deleted
    """
    deleted_count = 0
    for audio_path in audio_tracks:
        try:
            if os.path.exists(audio_path):
                os.unlink(audio_path)
                deleted_count += 1
                if verbose:
                    print(f"Cleaned up {audio_path}")
        except PermissionError:
            print(f"Warning: Could not delete {audio_path} (file may be in use)")
        except Exception as e:
            print(f"Warning: Error deleting {audio_path}: {e}")
    if verbose and deleted_count > 0:
        print(f"Successfully deleted {deleted_count} temporary audio file(s)")
    return deleted_count
--- a/wgp.py
+++ b/wgp.py
@ -13,11 +13,13 @@ from datetime import datetime
 import gradio as gr
 import random
 import json
 import numpy as np
 import importlib
 from shared.utils import notification_sound
 from shared.utils.loras_mutipliers import preparse_loras_multipliers, parse_loras_multipliers
-from shared.utils.utils import cache_video, convert_tensor_to_image, save_image, get_video_info, get_file_creation_date, convert_image_to_video
+from shared.utils.utils import convert_tensor_to_image, save_image, get_video_info, get_file_creation_date, convert_image_to_video, calculate_new_dimensions
-from shared.utils.utils import extract_audio_tracks, combine_video_with_audio_tracks, combine_and_concatenate_video_with_audio_tracks, cleanup_temp_audio_files, calculate_new_dimensions
+from shared.utils.audio_video import extract_audio_tracks, combine_video_with_audio_tracks, combine_and_concatenate_video_with_audio_tracks, cleanup_temp_audio_files,  save_video, save_image
 from shared.utils.audio_video import save_image_metadata, read_image_metadata
 from shared.match_archi import match_nvidia_architecture
 from shared.attention import get_attention_modes, get_supported_attention_modes
 from huggingface_hub import hf_hub_download, snapshot_download    
@ -53,7 +55,7 @@ AUTOSAVE_FILENAME = "queue.zip"
 PROMPT_VARS_MAX = 10
 target_mmgp_version = "3.5.8"
-WanGP_version = "7.76"
+WanGP_version = "7.77"
 settings_version = 2.23
 max_source_video_frames = 3000
 prompt_enhancer_image_caption_model, prompt_enhancer_image_caption_processor, prompt_enhancer_llm_model, prompt_enhancer_llm_tokenizer = None, None, None, None
@ -1712,7 +1714,8 @@ if not Path(server_config_filename).is_file():
        "transformer_types": [], 
        "transformer_quantization": "int8",
        "text_encoder_quantization" : "int8",
-        "save_path": "outputs", #os.path.join(os.getcwd(), 
+        "save_path": "outputs",  
        "image_save_path": "outputs",  
        "compile" : "",
        "metadata_type": "metadata",
        "boost" : 1,
@ -2186,7 +2189,11 @@ if len(args.vae_config) > 0:
    vae_config = int(args.vae_config)
 reload_needed = False
-save_path = server_config.get("save_path", os.path.join(os.getcwd(), "gradio_outputs"))
+save_path = server_config.get("save_path", os.path.join(os.getcwd(), "outputs"))
 image_save_path = server_config.get("image_save_path", os.path.join(os.getcwd(), "outputs"))
 if not "video_output_codec" in server_config: server_config["video_output_codec"]= "libx264_8"
 if not "image_output_codec" in server_config: server_config["image_output_codec"]= "jpeg_95"
 preload_model_policy = server_config.get("preload_model_policy", []) 
@ -2699,6 +2706,7 @@ def apply_changes(  state,
                    VAE_precision_choice,
                    mixed_precision_choice,
                    save_path_choice,
                    image_save_path_choice,
                    attention_choice,
                    compile_choice,
                    profile_choice,
@ -2718,6 +2726,9 @@ def apply_changes(  state,
                    notification_sound_volume_choice = 50,
                    max_frames_multiplier_choice = 1,
                    display_stats_choice = 0,
                    video_output_codec_choice = None,
                    image_output_codec_choice = None,
                    audio_output_codec_choice = None,
                    last_resolution_choice = None,
 ):
    if args.lock_config:
@ -2730,6 +2741,7 @@ def apply_changes(  state,
        "transformer_types": transformer_types_choices, 
        "text_encoder_quantization" : text_encoder_quantization_choice,
        "save_path" : save_path_choice,
        "image_save_path" : image_save_path_choice,
        "compile" : compile_choice,
        "profile" : profile_choice,
        "vae_config" : vae_config_choice,
@ -2751,6 +2763,9 @@ def apply_changes(  state,
        "notification_sound_volume" : notification_sound_volume_choice,
        "max_frames_multiplier" : max_frames_multiplier_choice,
        "display_stats" : display_stats_choice,
        "video_output_codec" : video_output_codec_choice,
        "image_output_codec" : image_output_codec_choice,
        "audio_output_codec" : audio_output_codec_choice,
        "last_model_type" : state["model_type"],
        "last_model_per_family":  state["last_model_per_family"],
        "last_advanced_choice": state["advanced"], 
@ -2784,6 +2799,7 @@ def apply_changes(  state,
    vae_config = server_config["vae_config"]
    boost = server_config["boost"]
    save_path = server_config["save_path"]
    image_save_path = server_config["image_save_path"]
    preload_model_policy = server_config["preload_model_policy"]
    transformer_quantization = server_config["transformer_quantization"]
    transformer_dtype_policy = server_config["transformer_dtype_policy"]
@ -2791,7 +2807,9 @@ def apply_changes(  state,
    transformer_types = server_config["transformer_types"]
    model_filename = get_model_filename(transformer_type, transformer_quantization, transformer_dtype_policy)
    state["model_filename"] = model_filename
-    if all(change in ["attention_mode", "vae_config", "boost", "save_path", "metadata_type", "clear_file_list", "fit_canvas", "depth_anything_v2_variant", "notification_sound_enabled", "notification_sound_volume", "mmaudio_enabled", "max_frames_multiplier", "display_stats"] for change in changes ):
+    if all(change in ["attention_mode", "vae_config", "boost", "save_path", "metadata_type", "clear_file_list", "fit_canvas", "depth_anything_v2_variant", 
                      "notification_sound_enabled", "notification_sound_volume", "mmaudio_enabled", "max_frames_multiplier", "display_stats",
                      "video_output_codec", "image_output_codec", "audio_output_codec"] for change in changes ):
        model_family = gr.Dropdown()
        model_choice = gr.Dropdown()
    else:
@ -2802,18 +2820,6 @@ def apply_changes(  state,
    mmaudio_enabled = server_config["mmaudio_enabled"] > 0
    return "<DIV ALIGN=CENTER>The new configuration has been succesfully applied</DIV>", header, model_family, model_choice, gr.Row(visible= server_config["enhancer_enabled"] == 1),  gr.Row(visible= mmaudio_enabled), gr.Column(visible= mmaudio_enabled)
 from moviepy.editor import ImageSequenceClip
 import numpy as np
 def save_video(final_frames, output_path, fps=24):
    assert final_frames.ndim == 4 and final_frames.shape[3] == 3, f"invalid shape: {final_frames} (need t h w c)"
    if final_frames.dtype != np.uint8:
        final_frames = (final_frames * 255).astype(np.uint8)
    ImageSequenceClip(list(final_frames), fps=fps).write_videofile(output_path, verbose= False)
 def get_gen_info(state):
    cache = state.get("gen", None)
    if cache == None:
@ -3754,7 +3760,7 @@ def edit_video(
    any_change = False
    if sample != None:
        video_path =get_available_filename(save_path, video_source, "_tmp") if any_mmaudio or has_already_audio else get_available_filename(save_path, video_source, "_post")  
-        cache_video( tensor=sample[None], save_file=video_path, fps=output_fps, nrow=1, normalize=True, value_range=(-1, 1))
+        save_video( tensor=sample[None], save_file=video_path, fps=output_fps, nrow=1, normalize=True, value_range=(-1, 1), codec_type= server_config.get("video_output_codec", None))
        if any_mmaudio or has_already_audio: tmp_path = video_path
        any_change = True
@ -3970,7 +3976,7 @@ def generate_video(
    process_map_video_guide = { "P": "pose", "D" : "depth", "S": "scribble", "E": "canny", "L": "flow", "C": "gray", "M": "inpaint", "U": "identity"}
    processes_names = { "pose": "Open Pose", "depth": "Depth Mask", "scribble" : "Shapes", "flow" : "Flow Map", "gray" : "Gray Levels", "inpaint" : "Inpaint Mask", "identity": "Identity Mask", "raw" : "Raw Format", "canny" : "Canny Edges"}
-    global wan_model, offloadobj, reload_needed, save_path
+    global wan_model, offloadobj, reload_needed
    gen = get_gen_info(state)
    torch.set_grad_enabled(False) 
    if mode.startswith("edit_"):
@ -4238,6 +4244,7 @@ def generate_video(
    torch.set_grad_enabled(False) 
    os.makedirs(save_path, exist_ok=True)
    os.makedirs(image_save_path, exist_ok=True)
    gc.collect()
    torch.cuda.empty_cache()
    wan_model._interrupt = False
@ -4709,17 +4716,18 @@ def generate_video(
                any_mmaudio = MMAudio_setting != 0 and server_config.get("mmaudio_enabled", 0) != 0 and sample.shape[1] >=fps
                if is_image:    
-                    sample =  sample.permute(1,2,3,0)  #c f h w -> f h w c
+                    image_path = os.path.join(image_save_path, file_name)
-                    new_video_path = []
+                    sample =  sample.transpose(1,0)  #c f h w -> f c h w 
                    new_image_path = []
                    for no, img in enumerate(sample):  
-                        img = Image.fromarray((127.5 * (img + 1.0)).cpu().byte().numpy())
+                        img_path = os.path.splitext(image_path)[0] + ("" if no==0 else f"_{no}") + ".jpg" 
-                        img_path = os.path.splitext(video_path)[0] + ("" if no==0 else f"_{no}") + ".jpg" 
+                        new_image_path.append(save_image(img, save_file = img_path, quality = server_config.get("image_output_codec", None)))
-                        new_video_path.append(img_path)
+
-                        img.save(img_path)
+                    video_path= new_image_path
                    video_path= new_video_path
                elif len(control_audio_tracks) > 0 or len(source_audio_tracks) > 0 or output_new_audio_filepath is not None or any_mmaudio or output_new_audio_data is not None or audio_source is not None:
                    video_path = os.path.join(save_path, file_name)
                    save_path_tmp = video_path[:-4] + "_tmp.mp4"
-                    cache_video( tensor=sample[None], save_file=save_path_tmp, fps=output_fps, nrow=1, normalize=True, value_range=(-1, 1))
+                    save_video( tensor=sample[None], save_file=save_path_tmp, fps=output_fps, nrow=1, normalize=True, value_range=(-1, 1), codec_type = server_config.get("video_output_codec", None))
                    output_new_audio_temp_filepath = None
                    new_audio_from_start =  reset_control_aligment
                    source_audio_duration = source_video_frames_count / fps
@ -4746,7 +4754,7 @@ def generate_video(
                    if output_new_audio_temp_filepath is not None: os.remove(output_new_audio_temp_filepath)
                else:
-                    cache_video( tensor=sample[None], save_file=video_path, fps=output_fps, nrow=1, normalize=True, value_range=(-1, 1))
+                    save_video( tensor=sample[None], save_file=video_path, fps=output_fps, nrow=1, normalize=True, value_range=(-1, 1),  codec_type= server_config.get("video_output_codec", None))
                end_time = time.time()
@ -4756,6 +4764,11 @@ def generate_video(
                inputs.pop("mode")
                inputs["model_type"] = model_type
                inputs["model_filename"] = original_filename
                if is_image:
                    inputs["image_quality"] = server_config.get("image_output_codec", None)
                else:
                    inputs["video_quality"] = server_config.get("video_output_codec", None)
                modules = get_model_recursive_prop(model_type, "modules", return_list= True)
                if len(modules) > 0 : inputs["modules"] = modules
                if len(transformer_loras_filenames) > 0:
@ -4778,8 +4791,7 @@ def generate_video(
                            json.dump(configs, f, indent=4)
                    elif metadata_choice == "metadata":
                        if is_image:
-                            with Image.open(path) as img:
+                            save_image_metadata(path, configs)
                                img.save(path, comment=json.dumps(configs))
                        else:
                            from mutagen.mp4 import MP4
                            file = MP4(path)
@ -5690,7 +5702,7 @@ def has_video_file_extension(filename):
 def has_image_file_extension(filename):
    extension = os.path.splitext(filename)[-1]
-    return extension in [".jpeg", ".jpg", ".png", ".bmp", ".tiff"]
+    return extension in [".jpeg", ".jpg", ".png", ".webp", ".bmp", ".tiff"]
 def add_videos_to_gallery(state, input_file_list, choice, files_to_load):
    gen = get_gen_info(state)
@ -5795,7 +5807,7 @@ def use_video_settings(state, input_file_list, choice):
 def get_settings_from_file(state, file_path, allow_json, merge_with_defaults, switch_type_if_compatible):    
    configs = None
-    tags = None
+    any_image_or_video = False
    if file_path.endswith(".json") and allow_json:
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
@ -5807,22 +5819,22 @@ def get_settings_from_file(state, file_path, allow_json, merge_with_defaults, sw
        try:
            file = MP4(file_path)
            tags = file.tags['©cmt'][0] 
            configs = json.loads(tags)
            any_image_or_video = True
        except:
            pass
    elif has_image_file_extension(file_path):
        try:
-            with Image.open(file_path) as img:
+            configs = read_image_metadata(file_path)
-                tags = img.info["comment"]
+            any_image_or_video = True
        except:
            pass
-    if tags is not None:
+    if configs is None: return None, False
-        try:
+    try:
-            configs = json.loads(tags)
+        if not "WanGP" in configs.get("type", ""): configs = None 
-            if not "WanGP" in configs.get("type", ""): configs = None 
+    except:
-        except:
+        configs = None
-            configs = None
+        
    if configs == None:
        return None, False
    current_model_filename = state["model_filename"]
    current_model_type = state["model_type"]
@ -5848,7 +5860,7 @@ def get_settings_from_file(state, file_path, allow_json, merge_with_defaults, sw
        configs = defaults
    configs["model_type"] = model_type
-    return configs, tags != None
+    return configs, any_image_or_video
 def record_image_mode_tab(state, evt:gr.SelectData):
    state["image_mode_tab"] = 0 if evt.index ==0 else 1
@ -7849,10 +7861,6 @@ def generate_configuration_tab(state, blocks, header, model_family, model_choice
                    label="User Interface Theme. You will need to restart the App the see new Theme."
                )
                save_path_choice = gr.Textbox(
                    label="Output Folder for Generated Videos (need to restart app to be taken into account)",
                    value=server_config.get("save_path", save_path)
                )
            with gr.Tab("Performance"):
@ -7976,6 +7984,53 @@ def generate_configuration_tab(state, blocks, header, model_family, model_choice
                    label="MMAudio (if enabled, 10 GB of extra models will be downloaded)"
                )
            with gr.Tab("Outputs"):
                video_output_codec_choice = gr.Dropdown(
                    choices=[
                        ("x265 Balanced Quality (CRF 28)", 'libx265_28'),
                        ("x264 Balanced Quality (Level 8)", 'libx264_8'),
                        ("x265 High Quality (CRF 8)", 'libx265_8'),
                        ("x264 High Quality (Level 10)", 'libx264_10'),
                        ("x264 Lossless", 'libx264_lossless'),
                    ],
                    value=server_config.get("video_output_codec", "libx264_8"),
                    label="Video Codec to use"
                )
                image_output_codec_choice = gr.Dropdown(
                    choices=[
                        ("JPEG Quality 85", 'jpeg_85'),
                        ("WEBP Quality 85", 'webp_85'),
                        ("JPEG Quality 95", 'jpeg_95'),
                        ("WEBP Quality 95", 'webp_95'),
                        ("WEBP Lossless", 'webp_lossless'),
                        ("PNG Lossless", 'png'),
                    ],
                    value=server_config.get("image_output_codec", "jpeg_95"),
                    label="Image Codec to use"
                )
                audio_output_codec_choice = gr.Dropdown(
                    choices=[
                        ("AAC 128 kbit", 'aac_128'),
                    ],
                    value=server_config.get("audio_output_codec", "aac_128"),
                    visible = False,
                    label="Audio Codec to use"
                )
                video_save_path_choice = gr.Textbox(
                    label="Output Folder for Generated Videos (need to restart app to be taken into account)",
                    value=server_config.get("save_path", save_path)
                )
                image_save_path_choice = gr.Textbox(
                    label="Output Folder for Generated Images (need to restart app to be taken into account)",
                    value=server_config.get("image_save_path", image_save_path)
                )
            with gr.Tab("Notifications"):
                gr.Markdown("### Notification Settings")
                notification_sound_enabled_choice = gr.Dropdown(
@ -8008,7 +8063,8 @@ def generate_configuration_tab(state, blocks, header, model_family, model_choice
                    text_encoder_quantization_choice,
                    VAE_precision_choice,
                    mixed_precision_choice,
-                    save_path_choice,
+                    video_save_path_choice,
                    image_save_path_choice,
                    attention_choice,
                    compile_choice,                            
                    profile_choice,
@ -8028,6 +8084,9 @@ def generate_configuration_tab(state, blocks, header, model_family, model_choice
                    notification_sound_volume_choice,
                    max_frames_multiplier_choice,
                    display_stats_choice,
                    video_output_codec_choice,
                    image_output_codec_choice,
                    audio_output_codec_choice,
                    resolution,
                ],
                outputs= [msg , header, model_family, model_choice, prompt_enhancer_row, mmaudio_tab, PP_MMAudio_col]
@ -8626,7 +8685,7 @@ def create_ui():
            with gr.Tab("Guides", id="info") as info_tab:
                generate_info_tab()
            with gr.Tab("Video Mask Creator", id="video_mask_creator") as video_mask_creator:
-                matanyone_app.display(main_tabs, tab_state, video_guide, image_guide, video_mask, image_mask, image_refs)
+                matanyone_app.display(main_tabs, tab_state, server_config, video_guide, image_guide, video_mask, image_mask, image_refs)
            if not args.lock_config:
                with gr.Tab("Downloads", id="downloads") as downloads_tab:
                    generate_download_tab(lset_name, loras_choices, state)
@ -8662,5 +8721,4 @@ if __name__ == "__main__":
        else:
            url = "http://" + server_name 
        webbrowser.open(url + ":" + str(server_port), new = 0, autoraise = True)
-    demo.launch(favicon_path="favicon.png",  server_name=server_name, server_port=server_port, share=args.share, allowed_paths=[save_path])
+    demo.launch(favicon_path="favicon.png",  server_name=server_name, server_port=server_port, share=args.share, allowed_paths=[save_path] + [] if save_path == image_save_path else [image_save_path] )