From 90eac13bb222e4188be638cd09932a39badc906f Mon Sep 17 00:00:00 2001 From: Edresson Date: Mon, 22 Nov 2021 18:42:54 -0300 Subject: [PATCH] Rename ununsed_speakers to ignored_speakers --- TTS/config/shared_configs.py | 2 +- TTS/tts/datasets/__init__.py | 7 +++--- TTS/tts/datasets/formatters.py | 42 +++++++++++++++++----------------- 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/TTS/config/shared_configs.py b/TTS/config/shared_configs.py index f1ea2e0f..c52cfe8a 100644 --- a/TTS/config/shared_configs.py +++ b/TTS/config/shared_configs.py @@ -198,7 +198,7 @@ class BaseDatasetConfig(Coqpit): name: str = "" path: str = "" meta_file_train: str = "" - ununsed_speakers: List[str] = None + ignored_speakers: List[str] = None language: str = "" meta_file_val: str = "" meta_file_attn_mask: str = "" diff --git a/TTS/tts/datasets/__init__.py b/TTS/tts/datasets/__init__.py index 3673e188..40eed7e3 100644 --- a/TTS/tts/datasets/__init__.py +++ b/TTS/tts/datasets/__init__.py @@ -67,22 +67,21 @@ def load_tts_samples( root_path = dataset["path"] meta_file_train = dataset["meta_file_train"] meta_file_val = dataset["meta_file_val"] - ununsed_speakers = dataset["ununsed_speakers"] + ignored_speakers = dataset["ignored_speakers"] language = dataset["language"] # setup the right data processor if formatter is None: formatter = _get_formatter_by_name(name) # load train set - meta_data_train = formatter(root_path, meta_file_train, ununsed_speakers=ununsed_speakers) - # TODO: remove the loops and pass language as a parameter to preprocessor for faster load + meta_data_train = formatter(root_path, meta_file_train, ignored_speakers=ignored_speakers) meta_data_train = [[*item, language] for item in meta_data_train] print(f" | > Found {len(meta_data_train)} files in {Path(root_path).resolve()}") # load evaluation split if set if eval_split: if meta_file_val: - meta_data_eval = formatter(root_path, meta_file_val, ununsed_speakers=ununsed_speakers) + meta_data_eval = formatter(root_path, meta_file_val, ignored_speakers=ignored_speakers) meta_data_eval = [[*item, language] for item in meta_data_eval] else: meta_data_eval, meta_data_train = split_dataset(meta_data_train) diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py index 49a1ced4..1f23f85e 100644 --- a/TTS/tts/datasets/formatters.py +++ b/TTS/tts/datasets/formatters.py @@ -59,7 +59,7 @@ def mozilla_de(root_path, meta_file, **kwargs): # pylint: disable=unused-argume return items -def mailabs(root_path, meta_files=None, ununsed_speakers=None): +def mailabs(root_path, meta_files=None, ignored_speakers=None): """Normalizes M-AI-Labs meta data files to TTS format Args: @@ -88,8 +88,8 @@ def mailabs(root_path, meta_files=None, ununsed_speakers=None): continue speaker_name = speaker_name_match.group("speaker_name") # ignore speakers - if isinstance(ununsed_speakers, list): - if speaker_name in ununsed_speakers: + if isinstance(ignored_speakers, list): + if speaker_name in ignored_speakers: continue print(" | > {}".format(csv_file)) with open(txt_file, "r", encoding="utf-8") as ttf: @@ -197,7 +197,7 @@ def nancy(root_path, meta_file, **kwargs): # pylint: disable=unused-argument return items -def common_voice(root_path, meta_file, ununsed_speakers=None): +def common_voice(root_path, meta_file, ignored_speakers=None): """Normalize the common voice meta data file to TTS format.""" txt_file = os.path.join(root_path, meta_file) items = [] @@ -209,15 +209,15 @@ def common_voice(root_path, meta_file, ununsed_speakers=None): text = cols[2] speaker_name = cols[0] # ignore speakers - if isinstance(ununsed_speakers, list): - if speaker_name in ununsed_speakers: + if isinstance(ignored_speakers, list): + if speaker_name in ignored_speakers: continue wav_file = os.path.join(root_path, "clips", cols[1].replace(".mp3", ".wav")) items.append([text, wav_file, "MCV_" + speaker_name]) return items -def libri_tts(root_path, meta_files=None, ununsed_speakers=None): +def libri_tts(root_path, meta_files=None, ignored_speakers=None): """https://ai.google/tools/datasets/libri-tts/""" items = [] if not meta_files: @@ -237,8 +237,8 @@ def libri_tts(root_path, meta_files=None, ununsed_speakers=None): wav_file = os.path.join(_root_path, file_name + ".wav") text = cols[2] # ignore speakers - if isinstance(ununsed_speakers, list): - if speaker_name in ununsed_speakers: + if isinstance(ignored_speakers, list): + if speaker_name in ignored_speakers: continue items.append([text, wav_file, "LTTS_" + speaker_name]) for item in items: @@ -265,7 +265,7 @@ def custom_turkish(root_path, meta_file, **kwargs): # pylint: disable=unused-ar # ToDo: add the dataset link when the dataset is released publicly -def brspeech(root_path, meta_file, ununsed_speakers=None): +def brspeech(root_path, meta_file, ignored_speakers=None): """BRSpeech 3.0 beta""" txt_file = os.path.join(root_path, meta_file) items = [] @@ -278,14 +278,14 @@ def brspeech(root_path, meta_file, ununsed_speakers=None): text = cols[2] speaker_id = cols[3] # ignore speakers - if isinstance(ununsed_speakers, list): - if speaker_id in ununsed_speakers: + if isinstance(ignored_speakers, list): + if speaker_id in ignored_speakers: continue items.append([text, wav_file, speaker_id]) return items -def vctk(root_path, meta_files=None, wavs_path="wav48", ununsed_speakers=None): +def vctk(root_path, meta_files=None, wavs_path="wav48", ignored_speakers=None): """homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz""" items = [] meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True) @@ -293,8 +293,8 @@ def vctk(root_path, meta_files=None, wavs_path="wav48", ununsed_speakers=None): _, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep) file_id = txt_file.split(".")[0] # ignore speakers - if isinstance(ununsed_speakers, list): - if speaker_id in ununsed_speakers: + if isinstance(ignored_speakers, list): + if speaker_id in ignored_speakers: continue with open(meta_file, "r", encoding="utf-8") as file_text: text = file_text.readlines()[0] @@ -304,7 +304,7 @@ def vctk(root_path, meta_files=None, wavs_path="wav48", ununsed_speakers=None): return items -def vctk_slim(root_path, meta_files=None, wavs_path="wav48", ununsed_speakers=None): # pylint: disable=unused-argument +def vctk_slim(root_path, meta_files=None, wavs_path="wav48", ignored_speakers=None): # pylint: disable=unused-argument """homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz""" items = [] txt_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True) @@ -312,8 +312,8 @@ def vctk_slim(root_path, meta_files=None, wavs_path="wav48", ununsed_speakers=No _, speaker_id, txt_file = os.path.relpath(text_file, root_path).split(os.sep) file_id = txt_file.split(".")[0] # ignore speakers - if isinstance(ununsed_speakers, list): - if speaker_id in ununsed_speakers: + if isinstance(ignored_speakers, list): + if speaker_id in ignored_speakers: continue wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + ".wav") items.append([None, wav_file, "VCTK_" + speaker_id]) @@ -321,7 +321,7 @@ def vctk_slim(root_path, meta_files=None, wavs_path="wav48", ununsed_speakers=No return items -def mls(root_path, meta_files=None, ununsed_speakers=None): +def mls(root_path, meta_files=None, ignored_speakers=None): """http://www.openslr.org/94/""" items = [] with open(os.path.join(root_path, meta_files), "r", encoding="utf-8") as meta: @@ -331,8 +331,8 @@ def mls(root_path, meta_files=None, ununsed_speakers=None): speaker, book, *_ = file.split("_") wav_file = os.path.join(root_path, os.path.dirname(meta_files), "audio", speaker, book, file + ".wav") # ignore speakers - if isinstance(ununsed_speakers, list): - if speaker in ununsed_speakers: + if isinstance(ignored_speakers, list): + if speaker in ignored_speakers: continue items.append([text, wav_file, "MLS_" + speaker]) return items