mirror of https://github.com/coqui-ai/TTS.git
update test_text_processing for espeak-ng
This commit is contained in:
parent
b28c724c04
commit
4244096ccb
|
@ -24,7 +24,7 @@ _phonemes = phonemes
|
|||
_CURLY_RE = re.compile(r'(.*?)\{(.+?)\}(.*)')
|
||||
|
||||
# Regular expression matching punctuations, ignoring empty space
|
||||
PHONEME_PUNCTUATION_PATTERN = r'['+_punctuations+']+'
|
||||
PHONEME_PUNCTUATION_PATTERN = r'['+_punctuations.replace(' ', '')+']+'
|
||||
|
||||
|
||||
def text2phone(text, language):
|
||||
|
|
|
@ -19,7 +19,7 @@ def test_phoneme_to_sequence():
|
|||
text_hat = sequence_to_phoneme(sequence)
|
||||
sequence_with_params = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters)
|
||||
text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters)
|
||||
gt = "ɹiːsənt ɹɪsɜːtʃ æt hɑːɹvɚd hɐz ʃoʊn mɛdᵻteɪɾɪŋ fɔːɹ æz lɪɾəl æz eɪt wiːks kæn æktʃuːəli ɪnkɹiːs, ðə ɡɹeɪ mæɾɚɹ ɪnðə pɑːɹts ʌvðə bɹeɪn ɹɪspɑːnsəbəl fɔːɹ ɪmoʊʃənəl ɹɛɡjuːleɪʃən ænd lɜːnɪŋ!"
|
||||
gt = 'ɹiːsənt ɹᵻsɜːtʃ æt hɑːɹvɚd hɐz ʃoʊn mɛdᵻteɪɾɪŋ fɔːɹ æz lɪɾəl æz eɪt wiːks kæn æktʃuːəli ɪŋkɹiːs, ðə ɡɹeɪ mæɾɚɹ ɪnðə pɑːɹts ʌvðə bɹeɪn ɹᵻspɑːnsᵻbəl fɔːɹ ɪmoʊʃənəl ɹɛɡjʊleɪʃən ænd lɜːnɪŋ!'
|
||||
assert text_hat == text_hat_with_params == gt
|
||||
|
||||
# multiple punctuations
|
||||
|
@ -28,7 +28,7 @@ def test_phoneme_to_sequence():
|
|||
text_hat = sequence_to_phoneme(sequence)
|
||||
_ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters)
|
||||
text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters)
|
||||
gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ?"
|
||||
gt = "biː ɐ vɔɪs, nɑːt æn! ɛkoʊ?"
|
||||
print(text_hat)
|
||||
print(len(sequence))
|
||||
assert text_hat == text_hat_with_params == gt
|
||||
|
@ -39,7 +39,7 @@ def test_phoneme_to_sequence():
|
|||
text_hat = sequence_to_phoneme(sequence)
|
||||
_ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters)
|
||||
text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters)
|
||||
gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ"
|
||||
gt = "biː ɐ vɔɪs, nɑːt æn! ɛkoʊ"
|
||||
print(text_hat)
|
||||
print(len(sequence))
|
||||
assert text_hat == text_hat_with_params == gt
|
||||
|
@ -61,7 +61,7 @@ def test_phoneme_to_sequence():
|
|||
text_hat = sequence_to_phoneme(sequence)
|
||||
_ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters)
|
||||
text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters)
|
||||
gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ."
|
||||
gt = "biː ɐ vɔɪs, nɑːt æn! ɛkoʊ."
|
||||
print(text_hat)
|
||||
print(len(sequence))
|
||||
assert text_hat == text_hat_with_params == gt
|
||||
|
@ -72,7 +72,7 @@ def test_phoneme_to_sequence():
|
|||
text_hat = sequence_to_phoneme(sequence)
|
||||
_ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters)
|
||||
text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters)
|
||||
gt = "^biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ.~"
|
||||
gt = "^biː ɐ vɔɪs, nɑːt æn! ɛkoʊ.~"
|
||||
print(text_hat)
|
||||
print(len(sequence))
|
||||
assert text_hat == text_hat_with_params == gt
|
||||
|
@ -83,7 +83,7 @@ def test_phoneme_to_sequence():
|
|||
text_hat = sequence_to_phoneme(sequence)
|
||||
_ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters)
|
||||
text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters)
|
||||
gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ"
|
||||
gt = "biː ɐ vɔɪs, nɑːt æn! ɛkoʊ"
|
||||
print(text_hat)
|
||||
print(len(sequence))
|
||||
assert text_hat == text_hat_with_params == gt
|
||||
|
@ -97,7 +97,7 @@ def test_phoneme_to_sequence_with_blank_token():
|
|||
text_hat = sequence_to_phoneme(sequence)
|
||||
_ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True)
|
||||
text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True)
|
||||
gt = "ɹiːsənt ɹɪsɜːtʃ æt hɑːɹvɚd hɐz ʃoʊn mɛdᵻteɪɾɪŋ fɔːɹ æz lɪɾəl æz eɪt wiːks kæn æktʃuːəli ɪnkɹiːs, ðə ɡɹeɪ mæɾɚɹ ɪnðə pɑːɹts ʌvðə bɹeɪn ɹɪspɑːnsəbəl fɔːɹ ɪmoʊʃənəl ɹɛɡjuːleɪʃən ænd lɜːnɪŋ!"
|
||||
gt = "ɹiːsənt ɹᵻsɜːtʃ æt hɑːɹvɚd hɐz ʃoʊn mɛdᵻteɪɾɪŋ fɔːɹ æz lɪɾəl æz eɪt wiːks kæn æktʃuːəli ɪŋkɹiːs, ðə ɡɹeɪ mæɾɚɹ ɪnðə pɑːɹts ʌvðə bɹeɪn ɹᵻspɑːnsᵻbəl fɔːɹ ɪmoʊʃənəl ɹɛɡjʊleɪʃən ænd lɜːnɪŋ!"
|
||||
assert text_hat == text_hat_with_params == gt
|
||||
|
||||
# multiple punctuations
|
||||
|
@ -106,7 +106,7 @@ def test_phoneme_to_sequence_with_blank_token():
|
|||
text_hat = sequence_to_phoneme(sequence)
|
||||
_ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True)
|
||||
text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True)
|
||||
gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ?"
|
||||
gt = 'biː ɐ vɔɪs, nɑːt æn! ɛkoʊ?'
|
||||
print(text_hat)
|
||||
print(len(sequence))
|
||||
assert text_hat == text_hat_with_params == gt
|
||||
|
@ -117,7 +117,7 @@ def test_phoneme_to_sequence_with_blank_token():
|
|||
text_hat = sequence_to_phoneme(sequence)
|
||||
_ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True)
|
||||
text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True)
|
||||
gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ"
|
||||
gt = 'biː ɐ vɔɪs, nɑːt æn! ɛkoʊ'
|
||||
print(text_hat)
|
||||
print(len(sequence))
|
||||
assert text_hat == text_hat_with_params == gt
|
||||
|
@ -128,7 +128,7 @@ def test_phoneme_to_sequence_with_blank_token():
|
|||
text_hat = sequence_to_phoneme(sequence)
|
||||
_ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True)
|
||||
text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True)
|
||||
gt = "biː ɐ vɔɪs, nɑːt ɐn ɛkoʊ!"
|
||||
gt = 'biː ɐ vɔɪs, nɑːt ɐn ɛkoʊ!'
|
||||
print(text_hat)
|
||||
print(len(sequence))
|
||||
assert text_hat == text_hat_with_params == gt
|
||||
|
@ -139,7 +139,7 @@ def test_phoneme_to_sequence_with_blank_token():
|
|||
text_hat = sequence_to_phoneme(sequence)
|
||||
_ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True)
|
||||
text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True)
|
||||
gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ."
|
||||
gt = 'biː ɐ vɔɪs, nɑːt æn! ɛkoʊ.'
|
||||
print(text_hat)
|
||||
print(len(sequence))
|
||||
assert text_hat == text_hat_with_params == gt
|
||||
|
@ -150,7 +150,7 @@ def test_phoneme_to_sequence_with_blank_token():
|
|||
text_hat = sequence_to_phoneme(sequence)
|
||||
_ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True)
|
||||
text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True)
|
||||
gt = "^biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ.~"
|
||||
gt = "^biː ɐ vɔɪs, nɑːt æn! ɛkoʊ.~"
|
||||
print(text_hat)
|
||||
print(len(sequence))
|
||||
assert text_hat == text_hat_with_params == gt
|
||||
|
@ -161,14 +161,14 @@ def test_phoneme_to_sequence_with_blank_token():
|
|||
text_hat = sequence_to_phoneme(sequence)
|
||||
_ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters, add_blank=True)
|
||||
text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters, add_blank=True)
|
||||
gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ"
|
||||
gt = "biː ɐ vɔɪs, nɑːt æn! ɛkoʊ"
|
||||
print(text_hat)
|
||||
print(len(sequence))
|
||||
assert text_hat == text_hat_with_params == gt
|
||||
|
||||
def test_text2phone():
|
||||
text = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!"
|
||||
gt = "ɹ|iː|s|ə|n|t| |ɹ|ɪ|s|ɜː|tʃ| |æ|t| |h|ɑːɹ|v|ɚ|d| |h|ɐ|z| |ʃ|oʊ|n| |m|ɛ|d|ᵻ|t|eɪ|ɾ|ɪ|ŋ| |f|ɔː|ɹ| |æ|z| |l|ɪ|ɾ|əl| |æ|z| |eɪ|t| |w|iː|k|s| |k|æ|n| |æ|k|tʃ|uː|əl|i| |ɪ|n|k|ɹ|iː|s|,| |ð|ə| |ɡ|ɹ|eɪ| |m|æ|ɾ|ɚ|ɹ| |ɪ|n|ð|ə| |p|ɑːɹ|t|s| |ʌ|v|ð|ə| |b|ɹ|eɪ|n| |ɹ|ɪ|s|p|ɑː|n|s|ə|b|əl| |f|ɔː|ɹ| |ɪ|m|oʊ|ʃ|ə|n|əl| |ɹ|ɛ|ɡ|j|uː|l|eɪ|ʃ|ə|n| |æ|n|d| |l|ɜː|n|ɪ|ŋ|!"
|
||||
gt = 'ɹ|iː|s|ə|n|t| |ɹ|ᵻ|s|ɜː|tʃ| |æ|t| |h|ɑːɹ|v|ɚ|d| |h|ɐ|z| |ʃ|oʊ|n| |m|ɛ|d|ᵻ|t|eɪ|ɾ|ɪ|ŋ| |f|ɔː|ɹ| |æ|z| |l|ɪ|ɾ|əl| |æ|z| |eɪ|t| |w|iː|k|s| |k|æ|n| |æ|k|tʃ|uː|əl|i| |ɪ|ŋ|k|ɹ|iː|s|,| |ð|ə| |ɡ|ɹ|eɪ| |m|æ|ɾ|ɚ|ɹ| |ɪ|n|ð|ə| |p|ɑːɹ|t|s| |ʌ|v|ð|ə| |b|ɹ|eɪ|n| |ɹ|ᵻ|s|p|ɑː|n|s|ᵻ|b|əl| |f|ɔː|ɹ| |ɪ|m|oʊ|ʃ|ə|n|əl| |ɹ|ɛ|ɡ|j|ʊ|l|eɪ|ʃ|ə|n| |æ|n|d| |l|ɜː|n|ɪ|ŋ|!'
|
||||
lang = "en-us"
|
||||
ph = text2phone(text, lang)
|
||||
assert gt == ph
|
||||
|
|
Loading…
Reference in New Issue