diff --git a/TTS/tts/utils/text/__init__.py b/TTS/tts/utils/text/__init__.py index f9f44167..49e7a08a 100644 --- a/TTS/tts/utils/text/__init__.py +++ b/TTS/tts/utils/text/__init__.py @@ -2,6 +2,7 @@ import re +import gruut from packaging import version from TTS.tts.utils.text import cleaners @@ -25,6 +26,33 @@ _CURLY_RE = re.compile(r"(.*?)\{(.+?)\}(.*)") # Regular expression matching punctuations, ignoring empty space PHONEME_PUNCTUATION_PATTERN = r"[" + _punctuations.replace(" ", "") + "]+" +# language -> source phoneme -> dest phoneme +# Used to make gruut's phonemes fit better with eSpeak's. +GRUUT_PHONEME_MAP = { + "en-us": { + "i": "iː", + "ɑ": "ɑː", + "ɚ": "ɜːɹ", + }, + "de": { + "ʁ": "ɾ", + "g": "ɡ", + "ʔ": "", + }, + "nl": { + "a": "aː", + "e": "eː", + "ʏ": "ɵ", + "ʋ": "w", + "ɹ": "r", + "ɔː": "oː", + }, + "es": { + "ɾ": "r", + "g": "ɣ", + }, +} + def text2phone(text, language): """Convert graphemes to phonemes. @@ -39,10 +67,39 @@ def text2phone(text, language): # TO REVIEW : How to have a good implementation for this? if language == "zh-CN": ph = chinese_text_to_phonemes(text) + print(" > Phonemes: {}".format(ph)) return ph if language == "ja-jp": ph = japanese_text_to_phonemes(text) + print(" > Phonemes: {}".format(ph)) + return ph + + if gruut.is_language_supported(language): + # Use gruut for phonemization + ph_list = gruut.text_to_phonemes( + text, + lang=language, + return_format="word_phonemes", + phonemizer_args={ + "remove_stress": True, # remove primary/secondary stress + "ipa_minor_breaks": False, # don't replace commas/semi-colons with IPA | + "ipa_major_breaks": False, # don't replace periods with IPA ‖ + }, + ) + + ph_map = GRUUT_PHONEME_MAP.get(language) + if ph_map: + # Re-map phonemes to fit with eSpeak conventions + for word in ph_list: + for p_idx, p in enumerate(word): + word[p_idx] = ph_map.get(p, p) + + # Join and re-split to break apart dipthongs, suprasegmentals, etc. + ph_words = ["|".join(word_phonemes) for word_phonemes in ph_list] + ph = "| ".join(ph_words) + + print(" > Phonemes: {}".format(ph)) return ph raise ValueError(f" [!] Language {language} is not supported for phonemization.") diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index cf7df7de..f5165079 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -102,10 +102,10 @@ class ModelManager(object): output_model_path = os.path.join(output_path, "model_file.pth.tar") output_config_path = os.path.join(output_path, "config.json") # NOTE : band-aid for removing phoneme support - if "needs_phonemizer" in model_item and model_item["needs_phonemizer"]: - raise RuntimeError( - " [!] Use 🐸TTS <= v0.0.13 for this model. Current version does not support phoneme based models." - ) + # if "needs_phonemizer" in model_item and model_item["needs_phonemizer"]: + # raise RuntimeError( + # " [!] Use 🐸TTS <= v0.0.13 for this model. Current version does not support phoneme based models." + # ) if os.path.exists(output_path): print(f" > {model_name} is already downloaded.") else: diff --git a/requirements.txt b/requirements.txt index fde48978..a2fb4132 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,3 +22,5 @@ coqpit # japanese g2p deps mecab-python3==1.0.3 unidic-lite==1.0.8 +# gruut+supported langs +gruut[cs,de,es,fr,it,nl,pt,ru,sv]~=1.0.0