The tts_to_file and tts methods of TTS/api.py themselves support the speed parameter, but the internal tts method and synthesizer.tts method do not pass the speed parameter, resulting in the speed parameter being meaningless. After adding it, the speaking speed change function can be used normally.

This commit is contained in:
qwq 2024-04-15 16:27:26 +08:00
parent dbf1a08a0d
commit 4b8fde68d0
2 changed files with 21 additions and 0 deletions

View File

@ -283,6 +283,7 @@ class TTS(nn.Module):
style_text=None,
reference_speaker_name=None,
split_sentences=split_sentences,
speed=speed,
**kwargs,
)
return wav
@ -337,6 +338,7 @@ class TTS(nn.Module):
language=language,
speaker_wav=speaker_wav,
split_sentences=split_sentences,
speed=speed,
**kwargs,
)
self.synthesizer.save_wav(wav=wav, path=file_path, pipe_out=pipe_out)

View File

@ -0,0 +1,19 @@
import torch
from TTS.api import TTS
# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"
# List available 🐸TTS models
print(TTS().list_models())
# Init TTS
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
# Run TTS
# ❗ Since this model is multi-lingual voice cloning model, we must set the target speaker_wav and language
# Text to speech list of amplitude values as output
# wav = tts.tts(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en")
# Text to speech to a file
# The tts_to_file and tts methods of TTS/api.py themselves support the speed parameter, but the internal tts method and synthesizer.tts method do not pass the speed parameter, resulting in the speed parameter being meaningless. After adding it, the speaking speed change function can be used normally.
tts.tts_to_file(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav", speed=0.5)