From 0a47a7eac00eaaf3769b5b4275e68bfc9f41c4a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Tue, 7 Dec 2021 12:59:11 +0000 Subject: [PATCH] Update tests --- tests/inference_tests/test_synthesize.py | 12 ++-- tests/text_tests/test_characters.py | 4 +- tests/text_tests/test_phonemizer.py | 85 +++++++++++++++++++++--- tests/text_tests/test_tokenizer.py | 14 ++-- tests/tts_tests/test_glow_tts_train.py | 1 - tests/tts_tests/test_vits_train.py | 1 - 6 files changed, 90 insertions(+), 27 deletions(-) diff --git a/tests/inference_tests/test_synthesize.py b/tests/inference_tests/test_synthesize.py index 635506ab..42b77172 100644 --- a/tests/inference_tests/test_synthesize.py +++ b/tests/inference_tests/test_synthesize.py @@ -19,9 +19,9 @@ def test_synthesize(): f'--text "This is an example." --out_path "{output_path}"' ) - # multi-speaker model - run_cli("tts --model_name tts_models/en/vctk/sc-glow-tts --list_speaker_idxs") - run_cli( - f'tts --model_name tts_models/en/vctk/sc-glow-tts --speaker_idx "p304" ' - f'--text "This is an example." --out_path "{output_path}"' - ) + # multi-speaker SC-Glow model + # run_cli("tts --model_name tts_models/en/vctk/sc-glow-tts --list_speaker_idxs") + # run_cli( + # f'tts --model_name tts_models/en/vctk/sc-glow-tts --speaker_idx "p304" ' + # f'--text "This is an example." --out_path "{output_path}"' + # ) diff --git a/tests/text_tests/test_characters.py b/tests/text_tests/test_characters.py index ed84b5b4..3f4086d5 100644 --- a/tests/text_tests/test_characters.py +++ b/tests/text_tests/test_characters.py @@ -2,6 +2,8 @@ import unittest from TTS.tts.utils.text.characters import BaseCharacters, Graphemes, IPAPhonemes, create_graphemes, create_phonemes +# pylint: disable=protected-access + def test_make_symbols(): _ = create_phonemes() @@ -12,7 +14,7 @@ class BaseCharacterTest(unittest.TestCase): def setUp(self): self.characters_empty = BaseCharacters("", "", pad="", eos="", bos="", blank="", is_unique=True, is_sorted=True) - def test_default_character_sets(self): + def test_default_character_sets(self): # pylint: disable=no-self-use """Test initiation of default character sets""" _ = IPAPhonemes() _ = Graphemes() diff --git a/tests/text_tests/test_phonemizer.py b/tests/text_tests/test_phonemizer.py index aa7a5499..512cc195 100644 --- a/tests/text_tests/test_phonemizer.py +++ b/tests/text_tests/test_phonemizer.py @@ -1,20 +1,38 @@ import unittest -from TTS.tts.utils.text.characters import BaseCharacters, Graphemes, IPAPhonemes, create_graphemes, create_phonemes from TTS.tts.utils.text.phonemizers import ESpeak, Gruut, JA_JP_Phonemizer, ZH_CN_Phonemizer -from TTS.tts.utils.text.tokenizer import TTSTokenizer -EXAMPLE_TEXT = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!" +EXAMPLE_TEXTs = [ + "Recent research at Harvard has shown meditating", + "for as little as 8 weeks can actually increase, the grey matter", + "in the parts of the brain responsible", + "for emotional regulation and learning!", +] + + +EXPECTED_ESPEAK_PHONEMES = [ + "ɹ|ˈiː|s|ə|n|t ɹ|ɪ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ɪ|t|ˌeɪ|ɾ|ɪ|ŋ", + "f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|n|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ", + "ɪ|n|ð|ə p|ˈɑːɹ|t|s ʌ|v|ð|ə b|ɹ|ˈeɪ|n ɹ|ɪ|s|p|ˈɑː|n|s|ə|b|əl", + "f|ɔː|ɹ ɪ|m|ˈoʊ|ʃ|ə|n|əl ɹ|ˌɛ|ɡ|j|uː|l|ˈeɪ|ʃ|ə|n|| æ|n|d l|ˈɜː|n|ɪ|ŋ!", +] + + +EXPECTED_ESPEAKNG_PHONEMES = [ + "ɹ|ˈiː|s|ə|n|t ɹ|ᵻ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ᵻ|t|ˌeɪ|ɾ|ɪ|ŋ", + "f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|ŋ|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ", + "ɪ|n|ð|ə p|ˈɑːɹ|t|s ʌ|v|ð|ə b|ɹ|ˈeɪ|n ɹ|ᵻ|s|p|ˈɑː|n|s|ᵻ|b|əl", + "f|ɔː|ɹ ɪ|m|ˈoʊ|ʃ|ə|n|əl ɹ|ˌɛ|ɡ|j|ʊ|l|ˈeɪ|ʃ|ə|n|| æ|n|d l|ˈɜː|n|ɪ|ŋ!", +] class TestEspeakPhonemizer(unittest.TestCase): def setUp(self): - self.phonemizer = ESpeak(language="en-us") - self.EXPECTED_PHONEMES = "ɹ|ˈiː|s|ə|n|t ɹ|ɪ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ᵻ|t|ˌeɪ|ɾ|ɪ|ŋ f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|n|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ|ɹ ɪ|n|ð|ə p|ˈɑːɹ|t|s ʌ|v|ð|ə b|ɹ|ˈeɪ|n ɹ|ɪ|s|p|ˈɑː|n|s|ə|b|əl f|ɔː|ɹ ɪ|m|ˈoʊ|ʃ|ə|n|əl ɹ|ˌɛ|ɡ|j|uː|l|ˈeɪ|ʃ|ə|n|| æ|n|d l|ˈɜː|n|ɪ|ŋ!" + self.phonemizer = ESpeak(language="en-us", backend="espeak") - def test_phonemize(self): - output = self.phonemizer.phonemize(EXAMPLE_TEXT, separator="|") - self.assertEqual(output, self.EXPECTED_PHONEMES) + for text, ph in zip(EXAMPLE_TEXTs, EXPECTED_ESPEAK_PHONEMES): + phonemes = self.phonemizer.phonemize(text) + self.assertEqual(phonemes, ph) # multiple punctuations text = "Be a voice, not an! echo?" @@ -48,14 +66,59 @@ class TestEspeakPhonemizer(unittest.TestCase): self.assertTrue(self.phonemizer.is_available()) +class TestEspeakNgPhonemizer(unittest.TestCase): + def setUp(self): + self.phonemizer = ESpeak(language="en-us", backend="espeak-ng") + + for text, ph in zip(EXAMPLE_TEXTs, EXPECTED_ESPEAKNG_PHONEMES): + phonemes = self.phonemizer.phonemize(text) + self.assertEqual(phonemes, ph) + + # multiple punctuations + text = "Be a voice, not an! echo?" + gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ?" + output = self.phonemizer.phonemize(text, separator="|") + output = output.replace("|", "") + self.assertEqual(output, gt) + + # not ending with punctuation + text = "Be a voice, not an! echo" + gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ" + output = self.phonemizer.phonemize(text, separator="") + self.assertEqual(output, gt) + + # extra space after the sentence + text = "Be a voice, not an! echo. " + gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ." + output = self.phonemizer.phonemize(text, separator="") + self.assertEqual(output, gt) + + def test_name(self): + self.assertEqual(self.phonemizer.name(), "espeak") + + def test_get_supported_languages(self): + self.assertIsInstance(self.phonemizer.supported_languages(), dict) + + def test_get_version(self): + self.assertIsInstance(self.phonemizer.version(), str) + + def test_is_available(self): + self.assertTrue(self.phonemizer.is_available()) + + class TestGruutPhonemizer(unittest.TestCase): def setUp(self): self.phonemizer = Gruut(language="en-us", use_espeak_phonemes=True, keep_stress=False) - self.EXPECTED_PHONEMES = "ɹ|i|ː|s|ə|n|t| ɹ|ᵻ|s|ɜ|ː|t|ʃ| æ|ɾ| h|ɑ|ː|ɹ|v|ɚ|d| h|ɐ|z| ʃ|o|ʊ|n| m|ɛ|d|ᵻ|t|e|ɪ|ɾ|ɪ|ŋ| f|ɔ|ː|ɹ| æ|z| l|ɪ|ɾ|ə|l| æ|z| e|ɪ|t| w|i|ː|k|s| k|æ|ŋ| æ|k|t|ʃ|u|ː|ə|l|i| ɪ|ŋ|k|ɹ|i|ː|s, ð|ə| ɡ|ɹ|e|ɪ| m|æ|ɾ|ɚ| ɪ|n| ð|ə| p|ɑ|ː|ɹ|t|s| ʌ|v| ð|ə| b|ɹ|e|ɪ|n| ɹ|ᵻ|s|p|ɑ|ː|n|s|ᵻ|b|ə|l| f|ɔ|ː|ɹ| ɪ|m|o|ʊ|ʃ|ə|n|ə|l| ɹ|ɛ|ɡ|j|ʊ|l|e|ɪ|ʃ|ə|n| æ|n|d| l|ɜ|ː|n|ɪ|ŋ!" + self.EXPECTED_PHONEMES = ["ɹ|i|ː|s|ə|n|t| ɹ|ᵻ|s|ɜ|ː|t|ʃ| æ|ɾ| h|ɑ|ː|ɹ|v|ɚ|d| h|ɐ|z| ʃ|o|ʊ|n| m|ɛ|d|ᵻ|t|e|ɪ|ɾ|ɪ|ŋ", + "f|ɔ|ː|ɹ| æ|z| l|ɪ|ɾ|ə|l| æ|z| e|ɪ|t| w|i|ː|k|s| k|æ|ŋ| æ|k|t|ʃ|u|ː|ə|l|i| ɪ|ŋ|k|ɹ|i|ː|s, ð|ə| ɡ|ɹ|e|ɪ| m|æ|ɾ|ɚ", + "ɪ|n| ð|ə| p|ɑ|ː|ɹ|t|s| ʌ|v| ð|ə| b|ɹ|e|ɪ|n| ɹ|ᵻ|s|p|ɑ|ː|n|s|ᵻ|b|ə|l", + "f|ɔ|ː|ɹ| ɪ|m|o|ʊ|ʃ|ə|n|ə|l| ɹ|ɛ|ɡ|j|ʊ|l|e|ɪ|ʃ|ə|n| æ|n|d| l|ɜ|ː|n|ɪ|ŋ!" + ] def test_phonemize(self): - output = self.phonemizer.phonemize(EXAMPLE_TEXT, separator="|") - self.assertEqual(output, self.EXPECTED_PHONEMES) + for text, ph in zip(EXAMPLE_TEXTs, self.EXPECTED_PHONEMES): + phonemes = self.phonemizer.phonemize(text, separator="|") + self.assertEqual(phonemes, ph) # multiple punctuations text = "Be a voice, not an! echo?" diff --git a/tests/text_tests/test_tokenizer.py b/tests/text_tests/test_tokenizer.py index 4d3fb0ce..47174518 100644 --- a/tests/text_tests/test_tokenizer.py +++ b/tests/text_tests/test_tokenizer.py @@ -1,6 +1,5 @@ import unittest from dataclasses import dataclass -from os import sep from coqpit import Coqpit @@ -13,7 +12,7 @@ class TestTTSTokenizer(unittest.TestCase): def setUp(self): self.tokenizer = TTSTokenizer(use_phonemes=False, characters=Graphemes()) - self.ph = ESpeak("tr") + self.ph = ESpeak("tr", backend="espeak") self.tokenizer_ph = TTSTokenizer(use_phonemes=True, characters=IPAPhonemes(), phonemizer=self.ph) def test_encode_decode_graphemes(self): @@ -54,12 +53,12 @@ class TestTTSTokenizer(unittest.TestCase): def test_not_found_characters(self): self.ph = ESpeak("en-us") - self.tokenizer_local = TTSTokenizer(use_phonemes=True, characters=IPAPhonemes(), phonemizer=self.ph) + tokenizer_local = TTSTokenizer(use_phonemes=True, characters=IPAPhonemes(), phonemizer=self.ph) self.assertEqual(len(self.tokenizer.not_found_characters), 0) text = "Yolk of one egg beaten light" - ids = self.tokenizer_local.text_to_ids(text) - text_hat = self.tokenizer_local.ids_to_text(ids) - self.assertEqual(self.tokenizer_local.not_found_characters, ["̩"]) + ids = tokenizer_local.text_to_ids(text) + text_hat = tokenizer_local.ids_to_text(ids) + self.assertEqual(tokenizer_local.not_found_characters, ["̩"]) self.assertEqual(text_hat, "jˈoʊk ʌv wˈʌn ˈɛɡ bˈiːʔn lˈaɪt") def test_init_from_config(self): @@ -85,7 +84,8 @@ class TestTTSTokenizer(unittest.TestCase): text_cleaner: str = "phoneme_cleaners" characters = Characters() - tokenizer_ph = TTSTokenizer.init_from_config(TokenizerConfig()) + tokenizer_ph, _ = TTSTokenizer.init_from_config(TokenizerConfig()) + tokenizer_ph.phonemizer.backend = "espeak" text = "Bu bir Örnek." text_ph = "" + self.ph.phonemize(text, separator="") + "" ids = tokenizer_ph.text_to_ids(text) diff --git a/tests/tts_tests/test_glow_tts_train.py b/tests/tts_tests/test_glow_tts_train.py index 5a5533b6..e5dc44ee 100644 --- a/tests/tts_tests/test_glow_tts_train.py +++ b/tests/tts_tests/test_glow_tts_train.py @@ -17,7 +17,6 @@ config = GlowTTSConfig( num_eval_loader_workers=0, text_cleaner="english_cleaners", use_phonemes=True, - use_espeak_phonemes=True, phoneme_language="en-us", phoneme_cache_path="tests/data/ljspeech/phoneme_cache/", run_eval=True, diff --git a/tests/tts_tests/test_vits_train.py b/tests/tts_tests/test_vits_train.py index 54e655ff..ec9a5915 100644 --- a/tests/tts_tests/test_vits_train.py +++ b/tests/tts_tests/test_vits_train.py @@ -17,7 +17,6 @@ config = VitsConfig( num_eval_loader_workers=0, text_cleaner="english_cleaners", use_phonemes=True, - use_espeak_phonemes=True, phoneme_language="en-us", phoneme_cache_path="tests/data/ljspeech/phoneme_cache/", run_eval=True,