diff --git a/defaults/flux_dev.json b/defaults/flux_dev.json index d9f5607..87bab0f 100644 --- a/defaults/flux_dev.json +++ b/defaults/flux_dev.json @@ -12,5 +12,5 @@ }, "prompt": "draw a hat", "resolution": "1280x720", - "video_length": 1 + "batch_size": 1 } \ No newline at end of file diff --git a/defaults/flux_dev_kontext.json b/defaults/flux_dev_kontext.json index efebe30..8945918 100644 --- a/defaults/flux_dev_kontext.json +++ b/defaults/flux_dev_kontext.json @@ -13,7 +13,7 @@ }, "prompt": "add a hat", "resolution": "1280x720", - "video_length": 1 + "batch_size": 1 } \ No newline at end of file diff --git a/defaults/flux_schnell.json b/defaults/flux_schnell.json index 8df8f55..d7abcde 100644 --- a/defaults/flux_schnell.json +++ b/defaults/flux_schnell.json @@ -13,5 +13,5 @@ "prompt": "draw a hat", "resolution": "1280x720", "num_inference_steps": 10, - "video_length": 1 + "batch_size": 1 } \ No newline at end of file diff --git a/defaults/t2i.json b/defaults/t2i.json index f49f426..f6aa8fb 100644 --- a/defaults/t2i.json +++ b/defaults/t2i.json @@ -6,7 +6,7 @@ "image_outputs": true, "URLs": "t2v" }, - "video_length": 1, + "batch_size": 1, "resolution": "1280x720" } diff --git a/defaults/vace_14B_fusionix_t2i.json b/defaults/vace_14B_fusionix_t2i.json index 75fbf42..fc687fa 100644 --- a/defaults/vace_14B_fusionix_t2i.json +++ b/defaults/vace_14B_fusionix_t2i.json @@ -12,5 +12,5 @@ "resolution": "1280x720", "guidance_scale": 1, "num_inference_steps": 10, - "video_length": 1 + "batch_size": 1 } \ No newline at end of file diff --git a/wgp.py b/wgp.py index b0ea395..bf702c1 100644 --- a/wgp.py +++ b/wgp.py @@ -598,7 +598,7 @@ def add_video_task(**inputs): "id": current_task_id, "params": inputs.copy(), "repeats": inputs["repeat_generation"], - "length": inputs["video_length"], + "length": inputs["video_length"], # !!! "steps": inputs["num_inference_steps"], "prompt": inputs["prompt"], "start_image_labels": start_image_labels, @@ -3141,9 +3141,9 @@ def select_video(state, input_file_list, event_data: gr.EventData): if not has_video_file_extension(file_name): img = Image.open(file_name) width, height = img.size - configs = None is_image = True - nb_audio_tracks = 0 + frames_count = fps = 1 + nb_audio_tracks = 0 else: fps, width, height, frames_count = get_video_info(file_name) is_image = False @@ -3219,9 +3219,14 @@ def select_video(state, input_file_list, event_data: gr.EventData): video_length_summary = f"{video_length} frames" video_window_no = configs.get("window_no", 0) if video_window_no > 0: video_length_summary +=f", Window no {video_window_no }" - video_length_summary += " (" - if video_length != frames_count: video_length_summary += f"real: {frames_count} frames, " - video_length_summary += f"{frames_count/fps:.1f}s, {round(fps)} fps)" + if is_image: + video_length_summary = configs.get("batch_size", 1) + video_length_label = "Number of Images" + else: + video_length_summary += " (" + video_length_label = "Video Length" + if video_length != frames_count: video_length_summary += f"real: {frames_count} frames, " + video_length_summary += f"{frames_count/fps:.1f}s, {round(fps)} fps)" video_guidance_scale = configs.get("guidance_scale", None) video_embedded_guidance_scale = configs.get("embedded_guidance_scale ", None) if get_model_family(video_model_type) in ["hunyuan", "flux"]: @@ -3255,7 +3260,7 @@ def select_video(state, input_file_list, event_data: gr.EventData): values += [video_outpainting] labels += ["Outpainting"] values += [video_resolution, video_length_summary, video_seed, video_guidance_scale, video_flow_shift, video_num_inference_steps] - labels += [ "Resolution", "Video Length", "Seed", video_guidance_label, "Flow Shift", "Num Inference steps"] + labels += [ "Resolution", video_length_label, "Seed", video_guidance_label, "Flow Shift", "Num Inference steps"] video_negative_prompt = configs.get("negative_prompt", "") if len(video_negative_prompt) > 0: values += [video_negative_prompt] @@ -3914,10 +3919,12 @@ def get_transformer_loras(model_type): def generate_video( task, send_cmd, + image_mode, prompt, negative_prompt, resolution, video_length, + batch_size, seed, force_fps, num_inference_steps, @@ -4008,9 +4015,8 @@ def generate_video( model_def = get_model_def(model_type) - is_image = model_def.get("image_outputs", False) + is_image = image_mode == 1 if is_image: - batch_size = video_length video_length = 1 else: batch_size = 1 @@ -4819,7 +4825,7 @@ def generate_video( if prompt_enhancer_image_caption_model != None and prompt_enhancer !=None and len(prompt_enhancer)>0: configs["enhanced_prompt"] = "\n".join(prompts) configs["generation_time"] = round(end_time-start_time) - if is_image: configs["is_image"] = True + # if is_image: configs["is_image"] = True metadata_choice = server_config.get("metadata_type","metadata") video_path = [video_path] if not isinstance(video_path, list) else video_path for no, path in enumerate(video_path): @@ -5763,7 +5769,7 @@ def prepare_inputs_dict(target, inputs, model_type = None, model_filename = None if base_model_type in ["t2v"]: unsaved_params = unsaved_params[2:] pop += unsaved_params if not vace: - pop += ["frames_positions", "video_guide_outpainting"] + pop += ["frames_positions", "video_guide_outpainting", "control_net_weight", "control_net_weight2"] if not (diffusion_forcing or ltxv or vace): pop += ["keep_frames_video_source"] @@ -5772,11 +5778,14 @@ def prepare_inputs_dict(target, inputs, model_type = None, model_filename = None pop += ["sliding_window_size", "sliding_window_overlap", "sliding_window_overlap_noise", "sliding_window_discard_last_frames"] if not base_model_type in ["fantasy", "multitalk", "vace_multitalk_14B"]: - pop += ["audio_guidance_scale"] + pop += ["audio_guidance_scale", "speakers_locations"] if not model_family in ["hunyuan", "flux"]: pop += ["embedded_guidance_scale"] + if not model_family in ["hunyuan", "wan"]: + pop += ["skip_steps_cache_type", "skip_steps_multiplier", "skip_steps_start_step_perc"] + if model_def.get("no_guidance", False) or ltxv: pop += ["guidance_scale", "audio_guidance_scale", "embedded_guidance_scale"] @@ -5793,7 +5802,6 @@ def prepare_inputs_dict(target, inputs, model_type = None, model_filename = None for k in pop: if k in inputs: inputs.pop(k) - if target == "metadata": inputs = {k: v for k,v in inputs.items() if v != None } @@ -5969,6 +5977,7 @@ def use_video_settings(state, input_file_list, choice): file_list, file_settings_list = get_file_list(state, input_file_list) if choice != None and choice >=0 and len(file_list)>0: configs = file_settings_list[choice] + file_name= file_list[choice] if configs == None: gr.Info("No Settings to Extract") else: @@ -5978,8 +5987,11 @@ def use_video_settings(state, input_file_list, choice): defaults.update(configs) current_model_type = state["model_type"] prompt = configs.get("prompt", "") - set_model_settings(state, model_type, defaults) - gr.Info(f"Settings Loaded from Video with prompt '{prompt[:100]}'") + set_model_settings(state, model_type, defaults) + if has_image_file_extension(file_name): + gr.Info(f"Settings Loaded from Image with prompt '{prompt[:100]}'") + else: + gr.Info(f"Settings Loaded from Video with prompt '{prompt[:100]}'") if are_model_types_compatible(model_type,current_model_type): return gr.update(), str(time.time()) else: @@ -6005,14 +6017,18 @@ def get_settings_from_file(state, file_path, allow_json, merge_with_defaults, sw tags = file.tags['©cmt'][0] except: pass - elif file_path.endswith(".jpg"): + elif has_image_file_extension(file_path): try: with Image.open(file_path) as img: tags = img.info["comment"] except: pass if tags is not None: - configs = json.loads(tags) + try: + configs = json.loads(tags) + if not "WanGP" in configs.get("type", ""): configs = None + except: + configs = None if configs == None: return None, False @@ -6072,10 +6088,12 @@ def load_settings_from_file(state, file_path): def save_inputs( target, lset_name, + image_mode, prompt, negative_prompt, resolution, video_length, + batch_size, seed, force_fps, num_inference_steps, @@ -6382,12 +6400,12 @@ def init_process_queue_if_any(state): def get_modal_image(image_base64, label): return "