diff --git a/TTS/tts/utils/text/phonemizers/__init__.py b/TTS/tts/utils/text/phonemizers/__init__.py index 90a526a7..374d0c8a 100644 --- a/TTS/tts/utils/text/phonemizers/__init__.py +++ b/TTS/tts/utils/text/phonemizers/__init__.py @@ -27,6 +27,7 @@ DEF_LANG_TO_PHONEMIZER["en"] = DEF_LANG_TO_PHONEMIZER["en-us"] DEF_LANG_TO_PHONEMIZER["ja-jp"] = JA_JP_Phonemizer.name() DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name() + def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer: """Initiate a phonemizer by name diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py index 3ed0a76a..4d435162 100644 --- a/TTS/utils/audio.py +++ b/TTS/utils/audio.py @@ -371,7 +371,9 @@ class AudioProcessor(object): self.hop_length = hop_length self.win_length = win_length assert min_level_db != 0.0, " [!] min_level_db is 0" - assert self.win_length <= self.fft_size, f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}" + assert ( + self.win_length <= self.fft_size + ), f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}" members = vars(self) if verbose: print(" > Setting up Audio Processor...") diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index dd397687..26b837b8 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -3,8 +3,8 @@ import json import os import zipfile from pathlib import Path -from typing import Tuple from shutil import copyfile, rmtree +from typing import Tuple import requests @@ -144,7 +144,7 @@ class ModelManager(object): output_model_path, output_config_path = self._find_files(output_path) return output_model_path, output_config_path, model_item - def _find_files(self, output_path:str) -> Tuple[str, str]: + def _find_files(self, output_path: str) -> Tuple[str, str]: """Find the model and config files in the output path Args: diff --git a/recipes/ljspeech/align_tts/train_aligntts.py b/recipes/ljspeech/align_tts/train_aligntts.py index d27d0fa1..591b1509 100644 --- a/recipes/ljspeech/align_tts/train_aligntts.py +++ b/recipes/ljspeech/align_tts/train_aligntts.py @@ -49,7 +49,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # init model model = AlignTTS(config, ap, tokenizer) diff --git a/recipes/ljspeech/fast_pitch/train_fast_pitch.py b/recipes/ljspeech/fast_pitch/train_fast_pitch.py index 1f10ef07..a84658f3 100644 --- a/recipes/ljspeech/fast_pitch/train_fast_pitch.py +++ b/recipes/ljspeech/fast_pitch/train_fast_pitch.py @@ -84,7 +84,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # init the model model = ForwardTTS(config, ap, tokenizer, speaker_manager=None) diff --git a/recipes/ljspeech/fast_speech/train_fast_speech.py b/recipes/ljspeech/fast_speech/train_fast_speech.py index e5a601a7..0245dd93 100644 --- a/recipes/ljspeech/fast_speech/train_fast_speech.py +++ b/recipes/ljspeech/fast_speech/train_fast_speech.py @@ -83,7 +83,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # init the model model = ForwardTTS(config, ap, tokenizer) diff --git a/recipes/ljspeech/glow_tts/train_glowtts.py b/recipes/ljspeech/glow_tts/train_glowtts.py index 47d03fe3..a0b4ac48 100644 --- a/recipes/ljspeech/glow_tts/train_glowtts.py +++ b/recipes/ljspeech/glow_tts/train_glowtts.py @@ -60,7 +60,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # INITIALIZE THE MODEL # Models take a config object and a speaker manager as input diff --git a/recipes/ljspeech/speedy_speech/train_speedy_speech.py b/recipes/ljspeech/speedy_speech/train_speedy_speech.py index a19e9053..1ab3db1c 100644 --- a/recipes/ljspeech/speedy_speech/train_speedy_speech.py +++ b/recipes/ljspeech/speedy_speech/train_speedy_speech.py @@ -67,7 +67,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # init model model = ForwardTTS(config, ap, tokenizer) diff --git a/recipes/ljspeech/tacotron2-DCA/train_tacotron_dca.py b/recipes/ljspeech/tacotron2-DCA/train_tacotron_dca.py index 19a9f315..a9f253ea 100644 --- a/recipes/ljspeech/tacotron2-DCA/train_tacotron_dca.py +++ b/recipes/ljspeech/tacotron2-DCA/train_tacotron_dca.py @@ -77,7 +77,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # INITIALIZE THE MODEL # Models take a config object and a speaker manager as input diff --git a/recipes/ljspeech/tacotron2-DDC/train_tacotron_ddc.py b/recipes/ljspeech/tacotron2-DDC/train_tacotron_ddc.py index 029698d8..99089db8 100644 --- a/recipes/ljspeech/tacotron2-DDC/train_tacotron_ddc.py +++ b/recipes/ljspeech/tacotron2-DDC/train_tacotron_ddc.py @@ -74,7 +74,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # INITIALIZE THE MODEL # Models take a config object and a speaker manager as input diff --git a/recipes/ljspeech/vits_tts/train_vits.py b/recipes/ljspeech/vits_tts/train_vits.py index e38dc200..c070b3f1 100644 --- a/recipes/ljspeech/vits_tts/train_vits.py +++ b/recipes/ljspeech/vits_tts/train_vits.py @@ -69,7 +69,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # init model model = Vits(config, ap, tokenizer, speaker_manager=None) diff --git a/recipes/multilingual/vits_tts/train_vits_tts.py b/recipes/multilingual/vits_tts/train_vits_tts.py index 9e0cb4c8..94692f00 100644 --- a/recipes/multilingual/vits_tts/train_vits_tts.py +++ b/recipes/multilingual/vits_tts/train_vits_tts.py @@ -109,7 +109,12 @@ config.from_dict(config.to_dict()) ap = AudioProcessor(**config.audio.to_dict()) # load training samples -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # init speaker manager for multi-speaker training # it maps speaker-id to speaker-name in the model and data-loader diff --git a/recipes/vctk/fast_pitch/train_fast_pitch.py b/recipes/vctk/fast_pitch/train_fast_pitch.py index d066a539..05cdc72a 100644 --- a/recipes/vctk/fast_pitch/train_fast_pitch.py +++ b/recipes/vctk/fast_pitch/train_fast_pitch.py @@ -71,7 +71,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # init speaker manager for multi-speaker training # it maps speaker-id to speaker-name in the model and data-loader diff --git a/recipes/vctk/fast_speech/train_fast_speech.py b/recipes/vctk/fast_speech/train_fast_speech.py index dbe23351..a294272a 100644 --- a/recipes/vctk/fast_speech/train_fast_speech.py +++ b/recipes/vctk/fast_speech/train_fast_speech.py @@ -69,7 +69,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # init speaker manager for multi-speaker training # it maps speaker-id to speaker-name in the model and data-loader diff --git a/recipes/vctk/glow_tts/train_glow_tts.py b/recipes/vctk/glow_tts/train_glow_tts.py index 8a891e5d..0bf686b1 100644 --- a/recipes/vctk/glow_tts/train_glow_tts.py +++ b/recipes/vctk/glow_tts/train_glow_tts.py @@ -69,7 +69,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # init speaker manager for multi-speaker training # it maps speaker-id to speaker-name in the model and data-loader diff --git a/recipes/vctk/speedy_speech/train_speedy_speech.py b/recipes/vctk/speedy_speech/train_speedy_speech.py index d9353af2..4208a9b6 100644 --- a/recipes/vctk/speedy_speech/train_speedy_speech.py +++ b/recipes/vctk/speedy_speech/train_speedy_speech.py @@ -69,7 +69,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # init speaker manager for multi-speaker training # it maps speaker-id to speaker-name in the model and data-loader diff --git a/recipes/vctk/tacotron-DDC/train_tacotron-DDC.py b/recipes/vctk/tacotron-DDC/train_tacotron-DDC.py index 14007239..d67038a4 100644 --- a/recipes/vctk/tacotron-DDC/train_tacotron-DDC.py +++ b/recipes/vctk/tacotron-DDC/train_tacotron-DDC.py @@ -72,7 +72,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # init speaker manager for multi-speaker training # it mainly handles speaker-id to speaker-name for the model and the data-loader diff --git a/recipes/vctk/tacotron2-DDC/train_tacotron2-ddc.py b/recipes/vctk/tacotron2-DDC/train_tacotron2-ddc.py index ab2e1bc9..b860df85 100644 --- a/recipes/vctk/tacotron2-DDC/train_tacotron2-ddc.py +++ b/recipes/vctk/tacotron2-DDC/train_tacotron2-ddc.py @@ -78,7 +78,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # init speaker manager for multi-speaker training # it mainly handles speaker-id to speaker-name for the model and the data-loader diff --git a/recipes/vctk/tacotron2/train_tacotron2.py b/recipes/vctk/tacotron2/train_tacotron2.py index 48934e2a..d27dd78c 100644 --- a/recipes/vctk/tacotron2/train_tacotron2.py +++ b/recipes/vctk/tacotron2/train_tacotron2.py @@ -78,7 +78,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # init speaker manager for multi-speaker training # it mainly handles speaker-id to speaker-name for the model and the data-loader diff --git a/recipes/vctk/vits/train_vits.py b/recipes/vctk/vits/train_vits.py index 443dbbd1..61d60ca1 100644 --- a/recipes/vctk/vits/train_vits.py +++ b/recipes/vctk/vits/train_vits.py @@ -79,7 +79,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config) # You can define your custom sample loader returning the list of samples. # Or define your custom formatter and pass it to the `load_tts_samples`. # Check `TTS.tts.datasets.load_tts_samples` for more details. -train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) +train_samples, eval_samples = load_tts_samples( + dataset_config, + eval_split=True, + eval_split_max_size=config.eval_split_max_size, + eval_split_size=config.eval_split_size, +) # init speaker manager for multi-speaker training # it maps speaker-id to speaker-name in the model and data-loader