mirror of
https://github.com/Wan-Video/Wan2.1.git
synced 2025-11-04 14:16:57 +00:00
add multitalk support for audio in mp4
This commit is contained in:
parent
e5abb1b9bc
commit
56a51b79f0
@ -60,6 +60,29 @@ def get_embedding(speech_array, wav2vec_feature_extractor, audio_encoder, sr=160
|
|||||||
audio_emb = audio_emb.cpu().detach()
|
audio_emb = audio_emb.cpu().detach()
|
||||||
return audio_emb
|
return audio_emb
|
||||||
|
|
||||||
|
def extract_audio_from_video(filename, sample_rate):
|
||||||
|
raw_audio_path = filename.split('/')[-1].split('.')[0]+'.wav'
|
||||||
|
ffmpeg_command = [
|
||||||
|
"ffmpeg",
|
||||||
|
"-y",
|
||||||
|
"-i",
|
||||||
|
str(filename),
|
||||||
|
"-vn",
|
||||||
|
"-acodec",
|
||||||
|
"pcm_s16le",
|
||||||
|
"-ar",
|
||||||
|
"16000",
|
||||||
|
"-ac",
|
||||||
|
"2",
|
||||||
|
str(raw_audio_path),
|
||||||
|
]
|
||||||
|
subprocess.run(ffmpeg_command, check=True)
|
||||||
|
human_speech_array, sr = librosa.load(raw_audio_path, sr=sample_rate)
|
||||||
|
human_speech_array = loudness_norm(human_speech_array, sr)
|
||||||
|
os.remove(raw_audio_path)
|
||||||
|
|
||||||
|
return human_speech_array
|
||||||
|
|
||||||
def audio_prepare_single(audio_path, sample_rate=16000, duration = 0):
|
def audio_prepare_single(audio_path, sample_rate=16000, duration = 0):
|
||||||
ext = os.path.splitext(audio_path)[1].lower()
|
ext = os.path.splitext(audio_path)[1].lower()
|
||||||
if ext in ['.mp4', '.mov', '.avi', '.mkv']:
|
if ext in ['.mp4', '.mov', '.avi', '.mkv']:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user