fixed recamaster

2025-11-04 14:16:57 +00:00 · 2025-08-11 03:44:19 +02:00 · 2025-08-11 03:44:19 +02:00 · 57328d6da7
commit 57328d6da7
parent ae5de88cbf
5 changed files with 17 additions and 13 deletions
--- a/README.md
+++ b/README.md
@ -20,11 +20,11 @@ WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models
 **Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep
 ## 🔥 Latest Updates : 
-### August 8 2025: WanGP v7.75 - Faster than the VAE ...
+### August 10 2025: WanGP v7.75 - Faster than the VAE ...
 We have a funny one here today: FastWan 2.2 5B, the Fastest Video Generator, only 20s to generate 121 frames at 720p. The snag is that VAE is twice as slow... 
 Thanks to Kijai for extracting the Lora that is used to build the corresponding finetune.
-### August 8 2025: WanGP v7.74 - Qwen Rebirth part 2
+### August 9 2025: WanGP v7.74 - Qwen Rebirth part 2
 Added support for Qwen Lightning lora for a 8 steps generation (https://huggingface.co/lightx2v/Qwen-Image-Lightning/blob/main/Qwen-Image-Lightning-8steps-V1.0.safetensors). Lora is not normalized and you can use a multiplier around 0.1.
 Mag Cache support for all the Wan2.2 models Don't forget to set guidance to 1 and 8 denoising steps , your gen will be 7x faster !
--- a/defaults/i2v_2_2_multitalk.json
+++ b/defaults/i2v_2_2_multitalk.json
@ -8,7 +8,7 @@
 		"URLs": "i2v_2_2",
 		"URLs2": "i2v_2_2",
 		"group": "wan2_2",
-		"visible": false
+		"visible": true
 	},
 	"switch_threshold" : 900,
 	"guidance_scale" : 3.5,
--- a/docs/LORAS.md
+++ b/docs/LORAS.md
@ -65,11 +65,16 @@ For dynamic effects over generation steps, use comma-separated values:
 With models like Wan 2.2 that uses internally two diffusion models (*High noise* / *Low Noise*) you can specify which Loras you want to be applied for a specific phase by separating each phase with a ";".
-For instance, if you want to disable a lora for phase *High Noise* and enablesit only for phase *Low Noise*:
+For instance, if you want to disable a lora for phase *High Noise* and enables it only for phase *Low Noise*:
 ```
 0;1
 ```
 Also with Wan 2.2, if you have two loras and you want the first one to be applied only during the High noise and the second one during the Low noise phase:
 ```
 1;0 0;1 
 ```
 As usual, you can use any float for of multiplier and have a multiplier varries throughout one phase for one Lora:
 ```
 0.9,0.8;1.2,1.1,1
--- a/models/wan/any2video.py
+++ b/models/wan/any2video.py
@ -603,10 +603,9 @@ class WanAny2V:
        if recam:
            # should be be in fact in input_frames since it is control video not a video to be extended
            target_camera = model_mode
-            width = input_video.shape[2]
+            height,width = input_video.shape[-2:]
            height = input_video.shape[1]
            input_video = input_video.to(dtype=self.dtype , device=self.device)
-            source_latents = self.vae.encode([input_video])[0] #.to(dtype=self.dtype, device=self.device)
+            source_latents = self.vae.encode([input_video])[0].unsqueeze(0) #.to(dtype=self.dtype, device=self.device)
            del input_video
            # Process target camera (recammaster)
            from shared.utils.cammmaster_tools import get_camera_embedding
@ -617,7 +616,7 @@ class WanAny2V:
        # Video 2 Video
        if denoising_strength < 1. and input_frames != None:
            height, width = input_frames.shape[-2:]
-            source_latents = self.vae.encode([input_frames])[0]
+            source_latents = self.vae.encode([input_frames])[0].unsqueeze(0)
            injection_denoising_step = 0
            inject_from_start = False
            if input_frames != None and denoising_strength < 1 :
@ -630,7 +629,7 @@ class WanAny2V:
                if len(keep_frames_parsed) == 0  or image_outputs or  (overlapped_frames_num + len(keep_frames_parsed)) == input_frames.shape[1] and all(keep_frames_parsed) : keep_frames_parsed = [] 
                injection_denoising_step = int(sampling_steps * (1. - denoising_strength) )
                latent_keep_frames = []
-                if source_latents.shape[1] < lat_frames or len(keep_frames_parsed) > 0:
+                if source_latents.shape[2] < lat_frames or len(keep_frames_parsed) > 0:
                    inject_from_start = True
                    if len(keep_frames_parsed) >0 :
                        if overlapped_frames_num > 0: keep_frames_parsed = [True] * overlapped_frames_num + keep_frames_parsed
@ -792,14 +791,14 @@ class WanAny2V:
                noise = torch.randn(batch_size, *target_shape, dtype=torch.float32, device=self.device, generator=seed_g)
                if inject_from_start:
                    new_latents = latents.clone()
-                    new_latents[:,:, :source_latents.shape[1] ] = noise[:, :, :source_latents.shape[1] ] * sigma + (1 - sigma) * source_latents.unsqueeze(0)
+                    new_latents[:,:, :source_latents.shape[2] ] = noise[:, :, :source_latents.shape[2] ] * sigma + (1 - sigma) * source_latents
                    for latent_no, keep_latent in enumerate(latent_keep_frames):
                        if not keep_latent:
                            new_latents[:, :, latent_no:latent_no+1 ] = latents[:, :, latent_no:latent_no+1]
                    latents = new_latents
                    new_latents = None
                else:
-                    latents = noise * sigma + (1 - sigma) * source_latents.unsqueeze(0)
+                    latents = noise * sigma + (1 - sigma) * source_latents
                noise = None
            if extended_overlapped_latents != None:
@ -811,7 +810,7 @@ class WanAny2V:
                        zz[0:16, ref_images_count:extended_overlapped_latents.shape[2] ]   = extended_overlapped_latents[0, :, ref_images_count:]  * (1.0 - overlap_noise_factor) + torch.randn_like(extended_overlapped_latents[0, :, ref_images_count:] ) * overlap_noise_factor 
            if target_camera != None:
-                latent_model_input = torch.cat([latents, source_latents.unsqueeze(0).expand(*expand_shape)], dim=2) # !!!!
+                latent_model_input = torch.cat([latents, source_latents.expand(*expand_shape)], dim=2)
            else:
                latent_model_input = latents
--- a/models/wan/wan_handler.py
+++ b/models/wan/wan_handler.py
@ -83,7 +83,7 @@ class family_handler():
        vace_class = base_model_type in ["vace_14B", "vace_1.3B", "vace_multitalk_14B"] 
        extra_model_def["vace_class"] = vace_class
-        if base_model_type in ["multitalk", "vace_multitalk_14B"]:
+        if base_model_type in ["multitalk", "vace_multitalk_14B", "i2v_2_2_multitalk"]:
            fps = 25
        elif base_model_type in ["fantasy"]:
            fps = 23