mirror of
https://github.com/Wan-Video/Wan2.1.git
synced 2025-11-05 14:33:15 +00:00
149 lines
4.0 KiB
Python
149 lines
4.0 KiB
Python
"""Kokoro TTS CLI
|
|
Example usage:
|
|
python3 -m kokoro --text "The sky above the port was the color of television, tuned to a dead channel." -o file.wav --debug
|
|
|
|
echo "Bom dia mundo, como vão vocês" > text.txt
|
|
python3 -m kokoro -i text.txt -l p --voice pm_alex > audio.wav
|
|
|
|
Common issues:
|
|
pip not installed: `uv pip install pip`
|
|
(Temporary workaround while https://github.com/explosion/spaCy/issues/13747 is not fixed)
|
|
|
|
espeak not installed: `apt-get install espeak-ng`
|
|
"""
|
|
|
|
import argparse
|
|
import wave
|
|
from pathlib import Path
|
|
from typing import Generator, TYPE_CHECKING
|
|
|
|
import numpy as np
|
|
from loguru import logger
|
|
|
|
languages = [
|
|
"a", # American English
|
|
"b", # British English
|
|
"h", # Hindi
|
|
"e", # Spanish
|
|
"f", # French
|
|
"i", # Italian
|
|
"p", # Brazilian Portuguese
|
|
"j", # Japanese
|
|
"z", # Mandarin Chinese
|
|
]
|
|
|
|
if TYPE_CHECKING:
|
|
from kokoro import KPipeline
|
|
|
|
|
|
def generate_audio(
|
|
text: str, kokoro_language: str, voice: str, speed=1
|
|
) -> Generator["KPipeline.Result", None, None]:
|
|
from kokoro import KPipeline
|
|
|
|
if not voice.startswith(kokoro_language):
|
|
logger.warning(f"Voice {voice} is not made for language {kokoro_language}")
|
|
pipeline = KPipeline(lang_code=kokoro_language)
|
|
yield from pipeline(text, voice=voice, speed=speed, split_pattern=r"\n+")
|
|
|
|
|
|
def generate_and_save_audio(
|
|
output_file: Path, text: str, kokoro_language: str, voice: str, speed=1
|
|
) -> None:
|
|
with wave.open(str(output_file.resolve()), "wb") as wav_file:
|
|
wav_file.setnchannels(1) # Mono audio
|
|
wav_file.setsampwidth(2) # 2 bytes per sample (16-bit audio)
|
|
wav_file.setframerate(24000) # Sample rate
|
|
|
|
for result in generate_audio(
|
|
text, kokoro_language=kokoro_language, voice=voice, speed=speed
|
|
):
|
|
logger.debug(result.phonemes)
|
|
if result.audio is None:
|
|
continue
|
|
audio_bytes = (result.audio.numpy() * 32767).astype(np.int16).tobytes()
|
|
wav_file.writeframes(audio_bytes)
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
"-m",
|
|
"--voice",
|
|
default="af_heart",
|
|
help="Voice to use",
|
|
)
|
|
parser.add_argument(
|
|
"-l",
|
|
"--language",
|
|
help="Language to use (defaults to the one corresponding to the voice)",
|
|
choices=languages,
|
|
)
|
|
parser.add_argument(
|
|
"-o",
|
|
"--output-file",
|
|
"--output_file",
|
|
type=Path,
|
|
help="Path to output WAV file",
|
|
required=True,
|
|
)
|
|
parser.add_argument(
|
|
"-i",
|
|
"--input-file",
|
|
"--input_file",
|
|
type=Path,
|
|
help="Path to input text file (default: stdin)",
|
|
)
|
|
parser.add_argument(
|
|
"-t",
|
|
"--text",
|
|
help="Text to use instead of reading from stdin",
|
|
)
|
|
parser.add_argument(
|
|
"-s",
|
|
"--speed",
|
|
type=float,
|
|
default=1.0,
|
|
help="Speech speed",
|
|
)
|
|
parser.add_argument(
|
|
"--debug",
|
|
action="store_true",
|
|
help="Print DEBUG messages to console",
|
|
)
|
|
args = parser.parse_args()
|
|
if args.debug:
|
|
logger.level("DEBUG")
|
|
logger.debug(args)
|
|
|
|
lang = args.language or args.voice[0]
|
|
|
|
if args.text is not None and args.input_file is not None:
|
|
raise Exception("You cannot specify both 'text' and 'input_file'")
|
|
elif args.text:
|
|
text = args.text
|
|
elif args.input_file:
|
|
file: Path = args.input_file
|
|
text = file.read_text()
|
|
else:
|
|
import sys
|
|
print("Press Ctrl+D to stop reading input and start generating", flush=True)
|
|
text = '\n'.join(sys.stdin)
|
|
|
|
logger.debug(f"Input text: {text!r}")
|
|
|
|
out_file: Path = args.output_file
|
|
if not out_file.suffix == ".wav":
|
|
logger.warning("The output file name should end with .wav")
|
|
generate_and_save_audio(
|
|
output_file=out_file,
|
|
text=text,
|
|
kokoro_language=lang,
|
|
voice=args.voice,
|
|
speed=args.speed,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|