diff --git a/TTS/config/shared_configs.py b/TTS/config/shared_configs.py index f2bd40ad..217282ad 100644 --- a/TTS/config/shared_configs.py +++ b/TTS/config/shared_configs.py @@ -291,7 +291,7 @@ class BaseTrainingConfig(Coqpit): log_model_step (int): Number of steps required to log a checkpoint as W&B artifact - save_step (int):ipt + save_step (int): Number of steps required to save the next checkpoint. checkpoint (bool): diff --git a/TTS/model.py b/TTS/model.py index efa00b2a..6ce11e63 100644 --- a/TTS/model.py +++ b/TTS/model.py @@ -159,4 +159,3 @@ class BaseModel(nn.Module, ABC): def format_batch(self): pass - diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py index 3e5226aa..73680f32 100644 --- a/TTS/tts/models/glow_tts.py +++ b/TTS/tts/models/glow_tts.py @@ -522,14 +522,17 @@ class GlowTTS(BaseTTS): # init characters if config.use_phonemes: from TTS.tts.utils.text.characters import IPAPhonemes + characters = IPAPhonemes().init_from_config(config) else: from TTS.tts.utils.text.characters import Graphemes + characters = Graphemes().init_from_config(config) config.num_chars = characters.num_chars from TTS.utils.audio import AudioProcessor + ap = AudioProcessor.init_from_config(config) tokenizer = TTSTokenizer.init_from_config(config) speaker_manager = SpeakerManager.init_from_config(config) - return GlowTTS(config, ap, tokenizer, speaker_manager) \ No newline at end of file + return GlowTTS(config, ap, tokenizer, speaker_manager) diff --git a/TTS/tts/utils/text/tokenizer.py b/TTS/tts/utils/text/tokenizer.py index ada5e57b..fac430f0 100644 --- a/TTS/tts/utils/text/tokenizer.py +++ b/TTS/tts/utils/text/tokenizer.py @@ -42,7 +42,7 @@ class TTSTokenizer: add_blank: bool = False, use_eos_bos=False, ): - self.text_cleaner = text_cleaner or (lambda x: x) + self.text_cleaner = text_cleaner self.use_phonemes = use_phonemes self.add_blank = add_blank self.use_eos_bos = use_eos_bos @@ -50,6 +50,16 @@ class TTSTokenizer: self.not_found_characters = [] self.phonemizer = phonemizer + @property + def characters(self): + return self._characters + + @characters.setter + def characters(self, new_characters): + self._characters = new_characters + self.pad_id = self.characters.char_to_id(self.characters.pad) + self.blank_id = self.characters.char_to_id(self.characters.blank) + def encode(self, text: str) -> List[int]: """Encodes a string of text as a sequence of IDs.""" token_ids = [] @@ -61,6 +71,7 @@ class TTSTokenizer: # discard but store not found characters if char not in self.not_found_characters: self.not_found_characters.append(char) + print(text) print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.") return token_ids @@ -88,7 +99,8 @@ class TTSTokenizer: 5. Text to token IDs """ # TODO: text cleaner should pick the right routine based on the language - text = self.text_cleaner(text) + if self.text_cleaner is not None: + text = self.text_cleaner(text) if self.use_phonemes: text = self.phonemizer.phonemize(text, separator="") if self.add_blank: @@ -144,7 +156,9 @@ class TTSTokenizer: if "phonemizer" in config and config.phonemizer: phonemizer = get_phonemizer_by_name(config.phonemizer, **phonemizer_kwargs) else: - phonemizer = get_phonemizer_by_name(DEF_LANG_TO_PHONEMIZER[config.phoneme_language], **phonemizer_kwargs) + phonemizer = get_phonemizer_by_name( + DEF_LANG_TO_PHONEMIZER[config.phoneme_language], **phonemizer_kwargs + ) else: # init character set characters = Graphemes().init_from_config(config) diff --git a/tests/text_tests/test_tokenizer.py b/tests/text_tests/test_tokenizer.py index 6c48d276..4d3fb0ce 100644 --- a/tests/text_tests/test_tokenizer.py +++ b/tests/text_tests/test_tokenizer.py @@ -56,10 +56,10 @@ class TestTTSTokenizer(unittest.TestCase): self.ph = ESpeak("en-us") self.tokenizer_local = TTSTokenizer(use_phonemes=True, characters=IPAPhonemes(), phonemizer=self.ph) self.assertEqual(len(self.tokenizer.not_found_characters), 0) - text = "Yolk of one egg beaten light" + text = "Yolk of one egg beaten light" ids = self.tokenizer_local.text_to_ids(text) text_hat = self.tokenizer_local.ids_to_text(ids) - self.assertEqual(self.tokenizer_local.not_found_characters, ['̩']) + self.assertEqual(self.tokenizer_local.not_found_characters, ["̩"]) self.assertEqual(text_hat, "jˈoʊk ʌv wˈʌn ˈɛɡ bˈiːʔn lˈaɪt") def test_init_from_config(self):