mirror of https://github.com/coqui-ai/TTS.git
Add tests for gruut phonemization
This commit is contained in:
parent
67869e77f9
commit
47191f3ecc
|
@ -1,6 +1,5 @@
|
|||
import os
|
||||
|
||||
|
||||
with open(os.path.join(os.path.dirname(__file__), "VERSION")) as f:
|
||||
version = f.read().strip()
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
|
||||
import gruut
|
||||
from packaging import version
|
||||
|
@ -26,32 +27,34 @@ _CURLY_RE = re.compile(r"(.*?)\{(.+?)\}(.*)")
|
|||
# Regular expression matching punctuations, ignoring empty space
|
||||
PHONEME_PUNCTUATION_PATTERN = r"[" + _punctuations.replace(" ", "") + "]+"
|
||||
|
||||
# language -> source phoneme -> dest phoneme
|
||||
# Used to make gruut's phonemes fit better with eSpeak's.
|
||||
GRUUT_PHONEME_MAP = {
|
||||
"en-us": {
|
||||
"i": "iː",
|
||||
"ɑ": "ɑː",
|
||||
"ɚ": "ɜːɹ",
|
||||
},
|
||||
"de": {
|
||||
"ʁ": "ɾ",
|
||||
"g": "ɡ",
|
||||
"ʔ": "",
|
||||
},
|
||||
"nl": {
|
||||
"a": "aː",
|
||||
"e": "eː",
|
||||
"ʏ": "ɵ",
|
||||
"ʋ": "w",
|
||||
"ɹ": "r",
|
||||
"ɔː": "oː",
|
||||
},
|
||||
"es": {
|
||||
"ɾ": "r",
|
||||
"g": "ɣ",
|
||||
},
|
||||
}
|
||||
# Table for str.translate to fix gruut/TTS phoneme mismatch
|
||||
GRUUT_TRANS_TABLE = str.maketrans("g", "ɡ")
|
||||
|
||||
|
||||
def clean_gruut_phonemes(ph_list):
|
||||
"""Decompose, substitute, and clean gruut phonemes for TTS.
|
||||
|
||||
Parameters:
|
||||
ph_list (list[str]): list of phonemes from gruut
|
||||
|
||||
Returns:
|
||||
clean_list (list[str]): decomposed/clean list of phonemes for TTS
|
||||
Dipthongs, etc. are decomposed into single characters
|
||||
Unicode combining characters are removed (e.g., ties)
|
||||
"""
|
||||
cleaned_phonemes = []
|
||||
|
||||
for phoneme_text in ph_list:
|
||||
# Decompose into codepoints (ã -> ["a", "\u0303"])
|
||||
phoneme_text = unicodedata.normalize("NFD", phoneme_text)
|
||||
for codepoint in phoneme_text.translate(GRUUT_TRANS_TABLE):
|
||||
if unicodedata.combining(codepoint) > 0:
|
||||
# Skip combining characters like ties
|
||||
continue
|
||||
|
||||
cleaned_phonemes.append(codepoint)
|
||||
|
||||
return cleaned_phonemes
|
||||
|
||||
|
||||
def text2phone(text, language):
|
||||
|
@ -82,21 +85,14 @@ def text2phone(text, language):
|
|||
lang=language,
|
||||
return_format="word_phonemes",
|
||||
phonemizer_args={
|
||||
"remove_stress": True, # remove primary/secondary stress
|
||||
"remove_accents": True, # remove accute/grave accents (Swedish)
|
||||
"ipa_minor_breaks": False, # don't replace commas/semi-colons with IPA |
|
||||
"ipa_major_breaks": False, # don't replace periods with IPA ‖
|
||||
},
|
||||
)
|
||||
|
||||
ph_map = GRUUT_PHONEME_MAP.get(language)
|
||||
if ph_map:
|
||||
# Re-map phonemes to fit with eSpeak conventions
|
||||
for word in ph_list:
|
||||
for p_idx, p in enumerate(word):
|
||||
word[p_idx] = ph_map.get(p, p)
|
||||
|
||||
# Join and re-split to break apart dipthongs, suprasegmentals, etc.
|
||||
ph_words = ["|".join(word_phonemes) for word_phonemes in ph_list]
|
||||
ph_words = ["|".join(clean_gruut_phonemes(word_phonemes)) for word_phonemes in ph_list]
|
||||
ph = "| ".join(ph_words)
|
||||
|
||||
print(" > Phonemes: {}".format(ph))
|
||||
|
|
|
@ -0,0 +1,137 @@
|
|||
"""Tests for text to phoneme converstion"""
|
||||
import unittest
|
||||
|
||||
import gruut
|
||||
from gruut_ipa import IPA, Phonemes
|
||||
|
||||
from TTS.tts.utils.text import clean_gruut_phonemes, phoneme_to_sequence
|
||||
from TTS.tts.utils.text import phonemes as all_phonemes
|
||||
from TTS.tts.utils.text import sequence_to_phoneme
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
EXAMPLE_TEXT = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!"
|
||||
|
||||
# Raw phonemes from run of gruut with example text (en-us).
|
||||
# This includes IPA ties, etc.
|
||||
EXAMPLE_PHONEMES = [
|
||||
["ɹ", "ˈi", "s", "ə", "n", "t"],
|
||||
["ɹ", "i", "s", "ˈɚ", "t͡ʃ"],
|
||||
["ˈæ", "t"],
|
||||
["h", "ˈɑ", "ɹ", "v", "ɚ", "d"],
|
||||
["h", "ˈæ", "z"],
|
||||
["ʃ", "ˈoʊ", "n"],
|
||||
["m", "ˈɛ", "d", "ɪ", "t", "ˌeɪ", "t", "ɪ", "ŋ"],
|
||||
["f", "ɚ"],
|
||||
["ˈæ", "z"],
|
||||
["l", "ˈɪ", "t", "ə", "l"],
|
||||
["ˈæ", "z"],
|
||||
["ˈeɪ", "t"],
|
||||
["w", "ˈi", "k", "s"],
|
||||
["k", "ə", "n"],
|
||||
["ˈæ", "k", "t͡ʃ", "ə", "l", "i"],
|
||||
["ɪ", "ŋ", "k", "ɹ", "ˈi", "s"],
|
||||
[","],
|
||||
["ð", "ə"],
|
||||
["ɡ", "ɹ", "ˈeɪ"],
|
||||
["m", "ˈæ", "t", "ɚ"],
|
||||
["ˈɪ", "n"],
|
||||
["ð", "ə"],
|
||||
["p", "ˈɑ", "ɹ", "t", "s"],
|
||||
["ə", "v"],
|
||||
["ð", "ə"],
|
||||
["b", "ɹ", "ˈeɪ", "n"],
|
||||
["ɹ", "i", "s", "p", "ˈɑ", "n", "s", "ɪ", "b", "ə", "l"],
|
||||
["f", "ɚ"],
|
||||
["ɪ", "m", "ˈoʊ", "ʃ", "ə", "n", "ə", "l"],
|
||||
["ɹ", "ˌɛ", "ɡ", "j", "ə", "l", "ˈeɪ", "ʃ", "ə", "n"],
|
||||
["ˈæ", "n", "d"],
|
||||
["l", "ˈɚ", "n", "ɪ", "ŋ"],
|
||||
["!"],
|
||||
]
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TextProcessingTextCase(unittest.TestCase):
|
||||
"""Tests for text to phoneme conversion"""
|
||||
|
||||
def test_all_phonemes_in_tts(self):
|
||||
"""Ensure that all phonemes from gruut are present in TTS phonemes"""
|
||||
tts_phonemes = set(all_phonemes)
|
||||
|
||||
# Check stress characters
|
||||
for suprasegmental in [IPA.STRESS_PRIMARY, IPA.STRESS_SECONDARY]:
|
||||
self.assertIn(suprasegmental, tts_phonemes)
|
||||
|
||||
# Check that gruut's phonemes are a subset of TTS phonemes
|
||||
for lang in gruut.get_supported_languages():
|
||||
for phoneme in Phonemes.from_language(lang):
|
||||
for codepoint in clean_gruut_phonemes(phoneme.text):
|
||||
|
||||
self.assertIn(codepoint, tts_phonemes)
|
||||
|
||||
def test_phoneme_to_sequence(self):
|
||||
"""Verify example (text -> sequence -> phoneme string) pipeline"""
|
||||
lang = "en-us"
|
||||
expected_phoneme_str = " ".join(
|
||||
"".join(clean_gruut_phonemes(word_phonemes)) for word_phonemes in EXAMPLE_PHONEMES
|
||||
)
|
||||
|
||||
# Ensure that TTS produces same phoneme string
|
||||
text_cleaner = ["phoneme_cleaners"]
|
||||
actual_sequence = phoneme_to_sequence(EXAMPLE_TEXT, text_cleaner, lang)
|
||||
actual_phoneme_str = sequence_to_phoneme(actual_sequence)
|
||||
|
||||
self.assertEqual(actual_phoneme_str, expected_phoneme_str)
|
||||
|
||||
def test_phoneme_to_sequence_with_blank_token(self):
|
||||
"""Verify example (text -> sequence -> phoneme string) pipeline with blank token"""
|
||||
lang = "en-us"
|
||||
text_cleaner = ["phoneme_cleaners"]
|
||||
|
||||
# Create with/without blank sequences
|
||||
sequence_without_blank = phoneme_to_sequence(EXAMPLE_TEXT, text_cleaner, lang, add_blank=False)
|
||||
sequence_with_blank = phoneme_to_sequence(EXAMPLE_TEXT, text_cleaner, lang, add_blank=True)
|
||||
|
||||
# With blank sequence should be bigger
|
||||
self.assertGreater(len(sequence_with_blank), len(sequence_without_blank))
|
||||
|
||||
# But phoneme strings should still be identical
|
||||
phoneme_str_without_blank = sequence_to_phoneme(sequence_without_blank, add_blank=False)
|
||||
phoneme_str_with_blank = sequence_to_phoneme(sequence_with_blank, add_blank=True)
|
||||
|
||||
self.assertEqual(phoneme_str_with_blank, phoneme_str_without_blank)
|
||||
|
||||
def test_messy_text(self):
|
||||
"""Verify text with extra punctuation/whitespace/etc. makes it through the pipeline"""
|
||||
text = '"Be" a! voice, [NOT]? (an eCHo. '
|
||||
lang = "en-us"
|
||||
expected_phonemes = [
|
||||
["b", "ˈi"],
|
||||
["ə"],
|
||||
["!"],
|
||||
["v", "ˈɔɪ", "s"],
|
||||
[","],
|
||||
["n", "ˈɑ", "t"],
|
||||
["?"],
|
||||
["ə", "n"],
|
||||
["ˈɛ", "k", "oʊ"],
|
||||
["."],
|
||||
]
|
||||
expected_phoneme_str = " ".join(
|
||||
"".join(clean_gruut_phonemes(word_phonemes)) for word_phonemes in expected_phonemes
|
||||
)
|
||||
|
||||
# Ensure that TTS produces same phoneme string
|
||||
text_cleaner = ["phoneme_cleaners"]
|
||||
actual_sequence = phoneme_to_sequence(text, text_cleaner, lang)
|
||||
actual_phoneme_str = sequence_to_phoneme(actual_sequence)
|
||||
|
||||
self.assertEqual(actual_phoneme_str, expected_phoneme_str)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Loading…
Reference in New Issue