Add tests for gruut phonemization

2021-06-09 11:52:10 -04:00 · 2021-06-09 11:52:10 -04:00 · 47191f3ecc
parent 67869e77f9
commit 47191f3ecc
3 changed files with 168 additions and 36 deletions
--- a/TTS/init.py
+++ b/TTS/init.py
@ -1,6 +1,5 @@
 import os
 with open(os.path.join(os.path.dirname(__file__), "VERSION")) as f:
    version = f.read().strip()
--- a/TTS/tts/utils/text/init.py
+++ b/TTS/tts/utils/text/init.py
@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 import re
 import unicodedata
 import gruut
 from packaging import version
@ -26,32 +27,34 @@ _CURLY_RE = re.compile(r"(.*?)\{(.+?)\}(.*)")
 # Regular expression matching punctuations, ignoring empty space
 PHONEME_PUNCTUATION_PATTERN = r"[" + _punctuations.replace(" ", "") + "]+"
-# language -> source phoneme -> dest phoneme
+# Table for str.translate to fix gruut/TTS phoneme mismatch
-# Used to make gruut's phonemes fit better with eSpeak's.
+GRUUT_TRANS_TABLE = str.maketrans("g", "ɡ")
-GRUUT_PHONEME_MAP = {
+
-    "en-us": {
+
-        "i": "iː",
+def clean_gruut_phonemes(ph_list):
-        "ɑ": "ɑː",
+    """Decompose, substitute, and clean gruut phonemes for TTS.
-        "ɚ": "ɜːɹ",
+
-    },
+    Parameters:
-    "de": {
+            ph_list (list[str]): list of phonemes from gruut
-        "ʁ": "ɾ",
+
-        "g": "ɡ",
+    Returns:
-        "ʔ": "",
+            clean_list (list[str]): decomposed/clean list of phonemes for TTS
-    },
+                    Dipthongs, etc. are decomposed into single characters
-    "nl": {
+                    Unicode combining characters are removed (e.g., ties)
-        "a": "aː",
+    """
-        "e": "eː",
+    cleaned_phonemes = []
-        "ʏ": "ɵ",
+
-        "ʋ": "w",
+    for phoneme_text in ph_list:
-        "ɹ": "r",
+        # Decompose into codepoints (ã -> ["a", "\u0303"])
-        "ɔː": "oː",
+        phoneme_text = unicodedata.normalize("NFD", phoneme_text)
-    },
+        for codepoint in phoneme_text.translate(GRUUT_TRANS_TABLE):
-    "es": {
+            if unicodedata.combining(codepoint) > 0:
-        "ɾ": "r",
+                # Skip combining characters like ties
-        "g": "ɣ",
+                continue
-    },
+
-}
+            cleaned_phonemes.append(codepoint)
    return cleaned_phonemes
 def text2phone(text, language):
@ -82,21 +85,14 @@ def text2phone(text, language):
            lang=language,
            return_format="word_phonemes",
            phonemizer_args={
-                "remove_stress": True,  # remove primary/secondary stress
+                "remove_accents": True,  # remove accute/grave accents (Swedish)
                "ipa_minor_breaks": False,  # don't replace commas/semi-colons with IPA |
                "ipa_major_breaks": False,  # don't replace periods with IPA ‖
            },
        )
        ph_map = GRUUT_PHONEME_MAP.get(language)
        if ph_map:
            # Re-map phonemes to fit with eSpeak conventions
            for word in ph_list:
                for p_idx, p in enumerate(word):
                    word[p_idx] = ph_map.get(p, p)
        # Join and re-split to break apart dipthongs, suprasegmentals, etc.
-        ph_words = ["|".join(word_phonemes) for word_phonemes in ph_list]
+        ph_words = ["|".join(clean_gruut_phonemes(word_phonemes)) for word_phonemes in ph_list]
        ph = "| ".join(ph_words)
        print(" > Phonemes: {}".format(ph))
--- a/tests/test_text_processing.py
+++ b/tests/test_text_processing.py
@ -0,0 +1,137 @@
 """Tests for text to phoneme converstion"""
 import unittest
 import gruut
 from gruut_ipa import IPA, Phonemes
 from TTS.tts.utils.text import clean_gruut_phonemes, phoneme_to_sequence
 from TTS.tts.utils.text import phonemes as all_phonemes
 from TTS.tts.utils.text import sequence_to_phoneme
 # -----------------------------------------------------------------------------
 EXAMPLE_TEXT = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!"
 # Raw phonemes from run of gruut with example text (en-us).
 # This includes IPA ties, etc.
 EXAMPLE_PHONEMES = [
    ["ɹ", "ˈi", "s", "ə", "n", "t"],
    ["ɹ", "i", "s", "ˈɚ", "t͡ʃ"],
    ["ˈæ", "t"],
    ["h", "ˈɑ", "ɹ", "v", "ɚ", "d"],
    ["h", "ˈæ", "z"],
    ["ʃ", "ˈoʊ", "n"],
    ["m", "ˈɛ", "d", "ɪ", "t", "ˌeɪ", "t", "ɪ", "ŋ"],
    ["f", "ɚ"],
    ["ˈæ", "z"],
    ["l", "ˈɪ", "t", "ə", "l"],
    ["ˈæ", "z"],
    ["ˈeɪ", "t"],
    ["w", "ˈi", "k", "s"],
    ["k", "ə", "n"],
    ["ˈæ", "k", "t͡ʃ", "ə", "l", "i"],
    ["ɪ", "ŋ", "k", "ɹ", "ˈi", "s"],
    [","],
    ["ð", "ə"],
    ["ɡ", "ɹ", "ˈeɪ"],
    ["m", "ˈæ", "t", "ɚ"],
    ["ˈɪ", "n"],
    ["ð", "ə"],
    ["p", "ˈɑ", "ɹ", "t", "s"],
    ["ə", "v"],
    ["ð", "ə"],
    ["b", "ɹ", "ˈeɪ", "n"],
    ["ɹ", "i", "s", "p", "ˈɑ", "n", "s", "ɪ", "b", "ə", "l"],
    ["f", "ɚ"],
    ["ɪ", "m", "ˈoʊ", "ʃ", "ə", "n", "ə", "l"],
    ["ɹ", "ˌɛ", "ɡ", "j", "ə", "l", "ˈeɪ", "ʃ", "ə", "n"],
    ["ˈæ", "n", "d"],
    ["l", "ˈɚ", "n", "ɪ", "ŋ"],
    ["!"],
 ]
 # -----------------------------------------------------------------------------
 class TextProcessingTextCase(unittest.TestCase):
    """Tests for text to phoneme conversion"""
    def test_all_phonemes_in_tts(self):
        """Ensure that all phonemes from gruut are present in TTS phonemes"""
        tts_phonemes = set(all_phonemes)
        # Check stress characters
        for suprasegmental in [IPA.STRESS_PRIMARY, IPA.STRESS_SECONDARY]:
            self.assertIn(suprasegmental, tts_phonemes)
        # Check that gruut's phonemes are a subset of TTS phonemes
        for lang in gruut.get_supported_languages():
            for phoneme in Phonemes.from_language(lang):
                for codepoint in clean_gruut_phonemes(phoneme.text):
                    self.assertIn(codepoint, tts_phonemes)
    def test_phoneme_to_sequence(self):
        """Verify example (text -> sequence -> phoneme string) pipeline"""
        lang = "en-us"
        expected_phoneme_str = " ".join(
            "".join(clean_gruut_phonemes(word_phonemes)) for word_phonemes in EXAMPLE_PHONEMES
        )
        # Ensure that TTS produces same phoneme string
        text_cleaner = ["phoneme_cleaners"]
        actual_sequence = phoneme_to_sequence(EXAMPLE_TEXT, text_cleaner, lang)
        actual_phoneme_str = sequence_to_phoneme(actual_sequence)
        self.assertEqual(actual_phoneme_str, expected_phoneme_str)
    def test_phoneme_to_sequence_with_blank_token(self):
        """Verify example (text -> sequence -> phoneme string) pipeline with blank token"""
        lang = "en-us"
        text_cleaner = ["phoneme_cleaners"]
        # Create with/without blank sequences
        sequence_without_blank = phoneme_to_sequence(EXAMPLE_TEXT, text_cleaner, lang, add_blank=False)
        sequence_with_blank = phoneme_to_sequence(EXAMPLE_TEXT, text_cleaner, lang, add_blank=True)
        # With blank sequence should be bigger
        self.assertGreater(len(sequence_with_blank), len(sequence_without_blank))
        # But phoneme strings should still be identical
        phoneme_str_without_blank = sequence_to_phoneme(sequence_without_blank, add_blank=False)
        phoneme_str_with_blank = sequence_to_phoneme(sequence_with_blank, add_blank=True)
        self.assertEqual(phoneme_str_with_blank, phoneme_str_without_blank)
    def test_messy_text(self):
        """Verify text with extra punctuation/whitespace/etc. makes it through the pipeline"""
        text = '"Be" a! voice, [NOT]? (an eCHo.   '
        lang = "en-us"
        expected_phonemes = [
            ["b", "ˈi"],
            ["ə"],
            ["!"],
            ["v", "ˈɔɪ", "s"],
            [","],
            ["n", "ˈɑ", "t"],
            ["?"],
            ["ə", "n"],
            ["ˈɛ", "k", "oʊ"],
            ["."],
        ]
        expected_phoneme_str = " ".join(
            "".join(clean_gruut_phonemes(word_phonemes)) for word_phonemes in expected_phonemes
        )
        # Ensure that TTS produces same phoneme string
        text_cleaner = ["phoneme_cleaners"]
        actual_sequence = phoneme_to_sequence(text, text_cleaner, lang)
        actual_phoneme_str = sequence_to_phoneme(actual_sequence)
        self.assertEqual(actual_phoneme_str, expected_phoneme_str)
 # -----------------------------------------------------------------------------
 if __name__ == "__main__":
    unittest.main()