Upgrade to gruut 2.0 (#882)

This commit is contained in:
Michael Hansen 2021-10-31 06:41:55 -04:00 committed by GitHub
parent f38c5ee6c1
commit 3bc043faeb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 44 additions and 35 deletions

View File

@ -5,6 +5,7 @@ import re
from typing import Dict, List from typing import Dict, List
import gruut import gruut
from gruut_ipa import IPA
from TTS.tts.utils.text import cleaners from TTS.tts.utils.text import cleaners
from TTS.tts.utils.text.chinese_mandarin.phonemizer import chinese_text_to_phonemes from TTS.tts.utils.text.chinese_mandarin.phonemizer import chinese_text_to_phonemes
@ -32,7 +33,7 @@ PHONEME_PUNCTUATION_PATTERN = r"[" + _punctuations.replace(" ", "") + "]+"
GRUUT_TRANS_TABLE = str.maketrans("g", "ɡ") GRUUT_TRANS_TABLE = str.maketrans("g", "ɡ")
def text2phone(text, language, use_espeak_phonemes=False): def text2phone(text, language, use_espeak_phonemes=False, keep_stress=False):
"""Convert graphemes to phonemes. """Convert graphemes to phonemes.
Parameters: Parameters:
text (str): text to phonemize text (str): text to phonemize
@ -51,36 +52,44 @@ def text2phone(text, language, use_espeak_phonemes=False):
ph = japanese_text_to_phonemes(text) ph = japanese_text_to_phonemes(text)
return ph return ph
if gruut.is_language_supported(language): if not gruut.is_language_supported(language):
# Use gruut for phonemization raise ValueError(f" [!] Language {language} is not supported for phonemization.")
phonemizer_args = {
"remove_stress": True,
"ipa_minor_breaks": False, # don't replace commas/semi-colons with IPA |
"ipa_major_breaks": False, # don't replace periods with IPA ‖
}
if use_espeak_phonemes: # Use gruut for phonemization
# Use a lexicon/g2p model train on eSpeak IPA instead of gruut IPA. ph_list = []
# This is intended for backwards compatibility with TTS<=v0.0.13 for sentence in gruut.sentences(text, lang=language, espeak=use_espeak_phonemes):
# pre-trained models. for word in sentence:
phonemizer_args["model_prefix"] = "espeak" if word.is_break:
# Use actual character for break phoneme (e.g., comma)
if ph_list:
# Join with previous word
ph_list[-1].append(word.text)
else:
# First word is punctuation
ph_list.append([word.text])
elif word.phonemes:
# Add phonemes for word
word_phonemes = []
ph_list = gruut.text_to_phonemes( for word_phoneme in word.phonemes:
text, if not keep_stress:
lang=language, # Remove primary/secondary stress
return_format="word_phonemes", word_phoneme = IPA.without_stress(word_phoneme)
phonemizer_args=phonemizer_args,
)
# Join and re-split to break apart dipthongs, suprasegmentals, etc. word_phoneme = word_phoneme.translate(GRUUT_TRANS_TABLE)
ph_words = ["|".join(word_phonemes) for word_phonemes in ph_list]
ph = "| ".join(ph_words)
# Fix a few phonemes if word_phoneme:
ph = ph.translate(GRUUT_TRANS_TABLE) # Flatten phonemes
return ph word_phonemes.extend(word_phoneme)
raise ValueError(f" [!] Language {language} is not supported for phonemization.") if word_phonemes:
ph_list.append(word_phonemes)
# Join and re-split to break apart dipthongs, suprasegmentals, etc.
ph_words = ["|".join(word_phonemes) for word_phonemes in ph_list]
ph = "| ".join(ph_words)
return ph
def intersperse(sequence, token): def intersperse(sequence, token):

View File

@ -23,6 +23,6 @@ coqpit
mecab-python3==1.0.3 mecab-python3==1.0.3
unidic-lite==1.0.8 unidic-lite==1.0.8
# gruut+supported langs # gruut+supported langs
gruut[cs,de,es,fr,it,nl,pt,ru,sv]~=1.2.0 gruut[cs,de,es,fr,it,nl,pt,ru,sv]~=2.0.0
fsspec>=2021.04.0 fsspec>=2021.04.0
pyworld pyworld

View File

@ -9,12 +9,12 @@ LANG = "en-us"
EXAMPLE_TEXT = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!" EXAMPLE_TEXT = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!"
EXPECTED_PHONEMES = "ɹ|iː|s|ə|n|t| ɹ|ᵻ|s|ɜː|tʃ| æ|t| h|ɑːɹ|v|ɚ|d| h|æ|z| ʃ|oʊ|n| m|ɛ|d|ᵻ|t|eɪ|ɾ|ɪ|ŋ| f|ɔːɹ| æ|z| l|ɪ|ɾ|əl| æ|z| eɪ|t| w|iː|k|s| k|æ|n| æ|k|tʃ|uː|əl|i| ɪ|ŋ|k|ɹ|iː|s| ,| ð|ə| ɡ|ɹ|eɪ| m|æ|ɾ|ɚ| ɪ|n| ð|ə| p|ɑːɹ|t|s| ʌ|v| ð|ə| b|ɹ|eɪ|n| ɹ|ᵻ|s|p|ɑː|n|s|ᵻ|b|əl| f|ɔːɹ| ɪ|m|oʊ|ʃ|ə|n|əl| ɹ|ɛ|ɡ|j|ʊ|l|eɪ|ʃ|ə|n| æ|n|d| l|ɜː|n|ɪ|ŋ| !" EXPECTED_PHONEMES = "ɹ|i|ː|s|ə|n|t| ɹ|ᵻ|s|ɜ|ː|t|ʃ| æ|ɾ| h|ɑ|ː|ɹ|v|ɚ|d| h|ɐ|z| ʃ|o|ʊ|n| m|ɛ|d|ᵻ|t|e|ɪ|ɾ|ɪ|ŋ| f|ɔ|ː|ɹ| æ|z| l|ɪ|ɾ|ə|l| æ|z| e|ɪ|t| w|i|ː|k|s| k|æ|ŋ| æ|k|t|ʃ|u|ː|ə|l|i| ɪ|ŋ|k|ɹ|i|ː|s|,| ð|ə| ɡ|ɹ|e|ɪ| m|æ|ɾ|ɚ| ɪ|n| ð|ə| p|ɑ|ː|ɹ|t|s| ʌ|v| ð|ə| b|ɹ|e|ɪ|n| ɹ|ᵻ|s|p|ɑ|ː|n|s|ᵻ|b|ə|l| f|ɔ|ː|ɹ| ɪ|m|o|ʊ|ʃ|ə|n|ə|l| ɹ|ɛ|ɡ|j|ʊ|l|e|ɪ|ʃ|ə|n| æ|n|d| l|ɜ|ː|n|ɪ|ŋ|!"
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
class TextProcessingTextCase(unittest.TestCase): class TextProcessingTestCase(unittest.TestCase):
"""Tests for text to phoneme conversion""" """Tests for text to phoneme conversion"""
def test_phoneme_to_sequence(self): def test_phoneme_to_sequence(self):
@ -40,7 +40,7 @@ class TextProcessingTextCase(unittest.TestCase):
sequence = phoneme_to_sequence(text, text_cleaner, LANG, add_blank=add_blank, use_espeak_phonemes=True) sequence = phoneme_to_sequence(text, text_cleaner, LANG, add_blank=add_blank, use_espeak_phonemes=True)
text_hat = sequence_to_phoneme(sequence) text_hat = sequence_to_phoneme(sequence)
text_hat_with_params = sequence_to_phoneme(sequence) text_hat_with_params = sequence_to_phoneme(sequence)
gt = "biː ɐ vɔɪs , nɑːt ɐn ! ɛkoʊ ?" gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ?"
print(text_hat) print(text_hat)
print(len(sequence)) print(len(sequence))
self.assertEqual(text_hat, text_hat_with_params) self.assertEqual(text_hat, text_hat_with_params)
@ -51,7 +51,7 @@ class TextProcessingTextCase(unittest.TestCase):
sequence = phoneme_to_sequence(text, text_cleaner, LANG, add_blank=add_blank, use_espeak_phonemes=True) sequence = phoneme_to_sequence(text, text_cleaner, LANG, add_blank=add_blank, use_espeak_phonemes=True)
text_hat = sequence_to_phoneme(sequence) text_hat = sequence_to_phoneme(sequence)
text_hat_with_params = sequence_to_phoneme(sequence) text_hat_with_params = sequence_to_phoneme(sequence)
gt = "biː ɐ vɔɪs , nɑːt ɐn ! ɛkoʊ" gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ"
print(text_hat) print(text_hat)
print(len(sequence)) print(len(sequence))
self.assertEqual(text_hat, text_hat_with_params) self.assertEqual(text_hat, text_hat_with_params)
@ -62,7 +62,7 @@ class TextProcessingTextCase(unittest.TestCase):
sequence = phoneme_to_sequence(text, text_cleaner, LANG, add_blank=add_blank, use_espeak_phonemes=True) sequence = phoneme_to_sequence(text, text_cleaner, LANG, add_blank=add_blank, use_espeak_phonemes=True)
text_hat = sequence_to_phoneme(sequence) text_hat = sequence_to_phoneme(sequence)
text_hat_with_params = sequence_to_phoneme(sequence) text_hat_with_params = sequence_to_phoneme(sequence)
gt = "biː ɐ vɔɪs , nɑːt ɐn ɛkoʊ !" gt = "biː ɐ vɔɪs, nɑːt ɐn ɛkoʊ!"
print(text_hat) print(text_hat)
print(len(sequence)) print(len(sequence))
self.assertEqual(text_hat, text_hat_with_params) self.assertEqual(text_hat, text_hat_with_params)
@ -73,7 +73,7 @@ class TextProcessingTextCase(unittest.TestCase):
sequence = phoneme_to_sequence(text, text_cleaner, LANG, add_blank=add_blank, use_espeak_phonemes=True) sequence = phoneme_to_sequence(text, text_cleaner, LANG, add_blank=add_blank, use_espeak_phonemes=True)
text_hat = sequence_to_phoneme(sequence) text_hat = sequence_to_phoneme(sequence)
text_hat_with_params = sequence_to_phoneme(sequence) text_hat_with_params = sequence_to_phoneme(sequence)
gt = "biː ɐ vɔɪs , nɑːt ɐn ! ɛkoʊ ." gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ."
print(text_hat) print(text_hat)
print(len(sequence)) print(len(sequence))
self.assertEqual(text_hat, text_hat_with_params) self.assertEqual(text_hat, text_hat_with_params)
@ -86,7 +86,7 @@ class TextProcessingTextCase(unittest.TestCase):
) )
text_hat = sequence_to_phoneme(sequence) text_hat = sequence_to_phoneme(sequence)
text_hat_with_params = sequence_to_phoneme(sequence) text_hat_with_params = sequence_to_phoneme(sequence)
gt = "^biː ɐ vɔɪs , nɑːt ɐn ! ɛkoʊ .~" gt = "^biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ.~"
print(text_hat) print(text_hat)
print(len(sequence)) print(len(sequence))
self.assertEqual(text_hat, text_hat_with_params) self.assertEqual(text_hat, text_hat_with_params)