diff --git a/TTS/config/shared_configs.py b/TTS/config/shared_configs.py index 9e9d4692..f2bd40ad 100644 --- a/TTS/config/shared_configs.py +++ b/TTS/config/shared_configs.py @@ -193,9 +193,12 @@ class BaseDatasetConfig(Coqpit): Name of the dataset meta file. Or a list of speakers to be ignored at training for multi-speaker datasets. Defaults to None. - unused_speakers (List): + ignored_speakers (List): List of speakers IDs that are not used at the training. Default None. + language (str): + Language code of the dataset. If defined, it overrides `phoneme_language`. Defaults to None. + meta_file_val (str): Name of the dataset meta file that defines the instances used at validation. diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py index 4f8a6e17..2f20c865 100644 --- a/TTS/tts/datasets/dataset.py +++ b/TTS/tts/datasets/dataset.py @@ -273,9 +273,9 @@ class TTSDataset(Dataset): item = args[0] func_args = args[1] text, wav_file, *_ = item - if item[3] is not None and item[3] != '': - # If language is specified in dataset, overwrite phoneme_language key - func_args[3] = item[3] + func_args[3] = ( + item[3] if item[3] else func_args[3] + ) # override phoneme language if specified by the dataset formatter phonemes = TTSDataset._load_or_generate_phoneme_sequence(wav_file, text, *func_args) return phonemes diff --git a/recipes/vctk/vits/train_vits.py b/recipes/vctk/vits/train_vits.py index a7ec2eae..7eb741c4 100644 --- a/recipes/vctk/vits/train_vits.py +++ b/recipes/vctk/vits/train_vits.py @@ -10,7 +10,9 @@ from TTS.tts.utils.speakers import SpeakerManager from TTS.utils.audio import AudioProcessor output_path = os.path.dirname(os.path.abspath(__file__)) -dataset_config = BaseDatasetConfig(name="vctk", meta_file_train="", path=os.path.join(output_path, "../VCTK/")) +dataset_config = BaseDatasetConfig( + name="vctk", meta_file_train="", language="en-us", path=os.path.join(output_path, "../VCTK/") +) audio_config = BaseAudioConfig( @@ -49,7 +51,6 @@ config = VitsConfig( epochs=1000, text_cleaner="english_cleaners", use_phonemes=True, - phoneme_language="en-us", phoneme_cache_path=os.path.join(output_path, "phoneme_cache"), compute_input_seq_cache=True, print_step=25,