From c670365507e55ec857505792358d4d43b063fe95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eren=20G=C3=B6lge?= <egolge@coqui.ai>
Date: Tue, 8 Mar 2022 14:20:14 +0100
Subject: [PATCH] Fix VCTK recipe and formatter

---
 TTS/tts/datasets/formatters.py  | 4 ++--
 recipes/vctk/vits/train_vits.py | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py
index aacfc647..fa8d79bc 100644
--- a/TTS/tts/datasets/formatters.py
+++ b/TTS/tts/datasets/formatters.py
@@ -328,7 +328,7 @@ def vctk(root_path, meta_files=None, wavs_path="wav48_silence_trimmed", mic="mic
         else:
             wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + f"_{mic}.{file_ext}")
         if os.path.exists(wav_file):
-            items.append([text, wav_file, "VCTK_" + speaker_id])
+            items.append({"text": text, "audio_file": wav_file, "speaker_name": "VCTK_" + speaker_id})
         else:
             print(f" [!] wav files don't exist - {wav_file}")
     return items
@@ -348,7 +348,7 @@ def vctk_old(root_path, meta_files=None, wavs_path="wav48"):
         with open(meta_file, "r", encoding="utf-8") as file_text:
             text = file_text.readlines()[0]
         wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + ".wav")
-        items.append([text, wav_file, "VCTK_old_" + speaker_id])
+        items.append({"text": text, "audio_file": wav_file, "speaker_name": "VCTK_old_" + speaker_id})
     return items
 
 
diff --git a/recipes/vctk/vits/train_vits.py b/recipes/vctk/vits/train_vits.py
index dff4eefc..84e8a058 100644
--- a/recipes/vctk/vits/train_vits.py
+++ b/recipes/vctk/vits/train_vits.py
@@ -53,6 +53,7 @@ config = VitsConfig(
     epochs=1000,
     text_cleaner="english_cleaners",
     use_phonemes=True,
+    phoneme_language="en",
     phoneme_cache_path=os.path.join(output_path, "phoneme_cache"),
     compute_input_seq_cache=True,
     print_step=25,