mirror of https://github.com/coqui-ai/TTS.git
Add XTTS v2.0 inference unit tests
This commit is contained in:
parent
8133b10540
commit
b621ab17d1
|
@ -126,6 +126,58 @@ def test_xtts_streaming():
|
|||
assert len(wav_chuncks) > 1
|
||||
|
||||
|
||||
def test_xtts_v2():
|
||||
"""XTTS is too big to run on github actions. We need to test it locally"""
|
||||
output_path = os.path.join(get_tests_output_path(), "output.wav")
|
||||
speaker_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav")
|
||||
use_gpu = torch.cuda.is_available()
|
||||
if use_gpu:
|
||||
run_cli(
|
||||
"yes | "
|
||||
f"tts --model_name tts_models/multilingual/multi-dataset/xtts_v2 "
|
||||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False --use_cuda True '
|
||||
f'--speaker_wav "{speaker_wav}" --language_idx "en"'
|
||||
)
|
||||
else:
|
||||
run_cli(
|
||||
"yes | "
|
||||
f"tts --model_name tts_models/multilingual/multi-dataset/xtts_v2 "
|
||||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False '
|
||||
f'--speaker_wav "{speaker_wav}" --language_idx "en"'
|
||||
)
|
||||
|
||||
|
||||
def test_xtts_v2_streaming():
|
||||
"""Testing the new inference_stream method"""
|
||||
from TTS.tts.configs.xtts_config import XttsConfig
|
||||
from TTS.tts.models.xtts import Xtts
|
||||
|
||||
speaker_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav")
|
||||
model_path = os.path.join(get_user_data_dir("tts"), "tts_models--multilingual--multi-dataset--xtts_v2")
|
||||
config = XttsConfig()
|
||||
config.load_json(os.path.join(model_path, "config.json"))
|
||||
model = Xtts.init_from_config(config)
|
||||
model.load_checkpoint(config, checkpoint_dir=model_path)
|
||||
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
|
||||
|
||||
print("Computing speaker latents...")
|
||||
gpt_cond_latent, _, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)
|
||||
|
||||
print("Inference...")
|
||||
chunks = model.inference_stream(
|
||||
"It took me quite a long time to develop a voice and now that I have it I am not going to be silent.",
|
||||
"en",
|
||||
gpt_cond_latent,
|
||||
speaker_embedding,
|
||||
)
|
||||
wav_chuncks = []
|
||||
for i, chunk in enumerate(chunks):
|
||||
if i == 0:
|
||||
assert chunk.shape[-1] > 5000
|
||||
wav_chuncks.append(chunk)
|
||||
assert len(wav_chuncks) > 1
|
||||
|
||||
|
||||
def test_tortoise():
|
||||
output_path = os.path.join(get_tests_output_path(), "output.wav")
|
||||
use_gpu = torch.cuda.is_available()
|
||||
|
|
Loading…
Reference in New Issue