Wan2.1/models/ltx_video/configs/ltxv-13b-0.9.7-dev.original.yaml


pipeline_type: multi-scale
checkpoint_path: "ltxv-13b-0.9.7-dev.safetensors"
downscale_factor: 0.6666666
spatial_upscaler_model_path: "ltxv-spatial-upscaler-0.9.7.safetensors"
stg_mode: "attention_values" # options: "attention_values", "attention_skip", "residual", "transformer_block"
decode_timestep: 0.05
decode_noise_scale: 0.025
text_encoder_model_name_or_path: "PixArt-alpha/PixArt-XL-2-1024-MS"
sampler: "from_checkpoint" # options: "uniform", "linear-quadratic", "from_checkpoint"
prompt_enhancement_words_threshold: 120
prompt_enhancer_image_caption_model_name_or_path: "MiaoshouAI/Florence-2-large-PromptGen-v2.0"
prompt_enhancer_llm_model_name_or_path: "unsloth/Llama-3.2-3B-Instruct"
stochastic_sampling: false


first_pass:
  #13b Dynamic
  guidance_scale:  [1, 6, 8, 6, 1, 1]
  stg_scale:  [0, 4, 4, 4, 2, 1]
  rescaling_scale: [1, 0.5, 0.5, 1, 1, 1]
  guidance_timesteps: [1.0, 0.9933, 0.9850, 0.9767, 0.9008, 0.6180]
  skip_block_list:  [[11, 25, 35, 39], [22, 35, 39], [28], [28], [28], [28]]
  num_inference_steps: 30 #default


second_pass:
  #13b Dynamic
  guidance_scale:  [1, 6, 8, 6, 1, 1]
  stg_scale:  [0, 4, 4, 4, 2, 1]
  rescaling_scale: [1, 0.5, 0.5, 1, 1, 1]
  guidance_timesteps: [1.0, 0.9933, 0.9850, 0.9767, 0.9008, 0.6180]
  skip_block_list:  [[11, 25, 35, 39], [22, 35, 39], [28], [28], [28], [28]]
  #13b Upscale
  # guidance_scale:  [1, 1, 1, 1, 1, 1]
  # stg_scale:  [1, 1, 1, 1, 1, 1]
  # rescaling_scale: [1, 1, 1, 1, 1, 1]
  # guidance_timesteps: [1.0, 0.9933, 0.9850, 0.9767, 0.9008, 0.6180]
  # skip_block_list:  [[42], [42], [42], [42], [42], [42]]
  num_inference_steps: 30 #default
  strength: 0.85