From 28d98da422b149fd61cf2ba6e37bd89d2cdd52c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eren=20G=C3=B6lge?= <egolge@coqui.ai>
Date: Wed, 8 Dec 2021 14:45:32 +0000
Subject: [PATCH] Update VCTK formatter

---
 TTS/tts/datasets/formatters.py | 37 +++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py
index 5cbc93db..1375757a 100644
--- a/TTS/tts/datasets/formatters.py
+++ b/TTS/tts/datasets/formatters.py
@@ -289,8 +289,10 @@ def brspeech(root_path, meta_file, ignored_speakers=None):
     return items
 
 
-def vctk(root_path, meta_files=None, wavs_path="wav48", ignored_speakers=None):
-    """homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz"""
+def vctk(root_path, meta_files=None, wavs_path="wav22", mic="mic2", ignored_speakers=None):
+    """https://datashare.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"""
+    file_ext = 'flac'
+    test_speakers = meta_files
     items = []
     meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
     for meta_file in meta_files:
@@ -302,26 +304,33 @@ def vctk(root_path, meta_files=None, wavs_path="wav48", ignored_speakers=None):
                 continue
         with open(meta_file, "r", encoding="utf-8") as file_text:
             text = file_text.readlines()[0]
-        wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + ".wav")
-        items.append({"text": text, "audio_file": wav_file, "speaker_name": "VCTK_" + speaker_id})
-
+        # p280 has no mic2 recordings
+        if speaker_id == "p280":
+            wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + f"_mic1.{file_ext}")
+        else:
+            wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + f"_{mic}.{file_ext}")
+        if os.path.exists(wav_file):
+            items.append([text, wav_file, "VCTK_" + speaker_id])
+        else:
+            print(f" [!] wav files don't exist - {wav_file}")
     return items
 
 
-def vctk_slim(root_path, meta_files=None, wavs_path="wav48", ignored_speakers=None):  # pylint: disable=unused-argument
+def vctk_old(root_path, meta_files=None, wavs_path="wav48"):
     """homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz"""
+    test_speakers = meta_files
     items = []
-    txt_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
-    for text_file in txt_files:
-        _, speaker_id, txt_file = os.path.relpath(text_file, root_path).split(os.sep)
+    meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
+    for meta_file in meta_files:
+        _, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep)
         file_id = txt_file.split(".")[0]
-        # ignore speakers
-        if isinstance(ignored_speakers, list):
-            if speaker_id in ignored_speakers:
+        if isinstance(test_speakers, list):  # if is list ignore this speakers ids
+            if speaker_id in test_speakers:
                 continue
+        with open(meta_file, "r", encoding="utf-8") as file_text:
+            text = file_text.readlines()[0]
         wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + ".wav")
-        items.append([None, wav_file, "VCTK_" + speaker_id])
-
+        items.append([text, wav_file, "VCTK_old_" + speaker_id])
     return items