From b146de4ce8b6252eb0869f25dc9717b7cbc85765 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 6 Nov 2023 15:22:18 -0300 Subject: [PATCH] Bug fix on XTTS v2.0 Trainer --- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 3 +-- tests/xtts_tests/test_xtts_v2-0_gpt_train.py | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index ef32a4ab..80e06340 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -237,8 +237,7 @@ class GPTTrainer(BaseTTS): self.config, s_info["speaker_wav"], s_info["language"], - gpt_cond_len=3, - decoder="ne_hifigan", + gpt_cond_len=3 )["wav"] test_audios["{}-audio".format(idx)] = wav diff --git a/tests/xtts_tests/test_xtts_v2-0_gpt_train.py b/tests/xtts_tests/test_xtts_v2-0_gpt_train.py index 3d6ef60e..b19b7210 100644 --- a/tests/xtts_tests/test_xtts_v2-0_gpt_train.py +++ b/tests/xtts_tests/test_xtts_v2-0_gpt_train.py @@ -87,7 +87,9 @@ model_args = GPTArgs( gpt_use_masking_gt_prompt_approach=True, gpt_use_perceiver_resampler=True, ) + audio_config = XttsAudioConfig(sample_rate=22050, dvae_sample_rate=22050, output_sample_rate=24000) + config = GPTTrainerConfig( epochs=1, output_path=OUT_PATH,