diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py index aacfc647..fa8d79bc 100644 --- a/TTS/tts/datasets/formatters.py +++ b/TTS/tts/datasets/formatters.py @@ -328,7 +328,7 @@ def vctk(root_path, meta_files=None, wavs_path="wav48_silence_trimmed", mic="mic else: wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + f"_{mic}.{file_ext}") if os.path.exists(wav_file): - items.append([text, wav_file, "VCTK_" + speaker_id]) + items.append({"text": text, "audio_file": wav_file, "speaker_name": "VCTK_" + speaker_id}) else: print(f" [!] wav files don't exist - {wav_file}") return items @@ -348,7 +348,7 @@ def vctk_old(root_path, meta_files=None, wavs_path="wav48"): with open(meta_file, "r", encoding="utf-8") as file_text: text = file_text.readlines()[0] wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + ".wav") - items.append([text, wav_file, "VCTK_old_" + speaker_id]) + items.append({"text": text, "audio_file": wav_file, "speaker_name": "VCTK_old_" + speaker_id}) return items diff --git a/recipes/vctk/vits/train_vits.py b/recipes/vctk/vits/train_vits.py index dff4eefc..84e8a058 100644 --- a/recipes/vctk/vits/train_vits.py +++ b/recipes/vctk/vits/train_vits.py @@ -53,6 +53,7 @@ config = VitsConfig( epochs=1000, text_cleaner="english_cleaners", use_phonemes=True, + phoneme_language="en", phoneme_cache_path=os.path.join(output_path, "phoneme_cache"), compute_input_seq_cache=True, print_step=25,