mirror of https://github.com/coqui-ai/TTS.git
Use gruut for phonemization
This commit is contained in:
parent
d245b5d48f
commit
67869e77f9
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
import gruut
|
||||||
from packaging import version
|
from packaging import version
|
||||||
|
|
||||||
from TTS.tts.utils.text import cleaners
|
from TTS.tts.utils.text import cleaners
|
||||||
|
@ -25,6 +26,33 @@ _CURLY_RE = re.compile(r"(.*?)\{(.+?)\}(.*)")
|
||||||
# Regular expression matching punctuations, ignoring empty space
|
# Regular expression matching punctuations, ignoring empty space
|
||||||
PHONEME_PUNCTUATION_PATTERN = r"[" + _punctuations.replace(" ", "") + "]+"
|
PHONEME_PUNCTUATION_PATTERN = r"[" + _punctuations.replace(" ", "") + "]+"
|
||||||
|
|
||||||
|
# language -> source phoneme -> dest phoneme
|
||||||
|
# Used to make gruut's phonemes fit better with eSpeak's.
|
||||||
|
GRUUT_PHONEME_MAP = {
|
||||||
|
"en-us": {
|
||||||
|
"i": "iː",
|
||||||
|
"ɑ": "ɑː",
|
||||||
|
"ɚ": "ɜːɹ",
|
||||||
|
},
|
||||||
|
"de": {
|
||||||
|
"ʁ": "ɾ",
|
||||||
|
"g": "ɡ",
|
||||||
|
"ʔ": "",
|
||||||
|
},
|
||||||
|
"nl": {
|
||||||
|
"a": "aː",
|
||||||
|
"e": "eː",
|
||||||
|
"ʏ": "ɵ",
|
||||||
|
"ʋ": "w",
|
||||||
|
"ɹ": "r",
|
||||||
|
"ɔː": "oː",
|
||||||
|
},
|
||||||
|
"es": {
|
||||||
|
"ɾ": "r",
|
||||||
|
"g": "ɣ",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def text2phone(text, language):
|
def text2phone(text, language):
|
||||||
"""Convert graphemes to phonemes.
|
"""Convert graphemes to phonemes.
|
||||||
|
@ -39,10 +67,39 @@ def text2phone(text, language):
|
||||||
# TO REVIEW : How to have a good implementation for this?
|
# TO REVIEW : How to have a good implementation for this?
|
||||||
if language == "zh-CN":
|
if language == "zh-CN":
|
||||||
ph = chinese_text_to_phonemes(text)
|
ph = chinese_text_to_phonemes(text)
|
||||||
|
print(" > Phonemes: {}".format(ph))
|
||||||
return ph
|
return ph
|
||||||
|
|
||||||
if language == "ja-jp":
|
if language == "ja-jp":
|
||||||
ph = japanese_text_to_phonemes(text)
|
ph = japanese_text_to_phonemes(text)
|
||||||
|
print(" > Phonemes: {}".format(ph))
|
||||||
|
return ph
|
||||||
|
|
||||||
|
if gruut.is_language_supported(language):
|
||||||
|
# Use gruut for phonemization
|
||||||
|
ph_list = gruut.text_to_phonemes(
|
||||||
|
text,
|
||||||
|
lang=language,
|
||||||
|
return_format="word_phonemes",
|
||||||
|
phonemizer_args={
|
||||||
|
"remove_stress": True, # remove primary/secondary stress
|
||||||
|
"ipa_minor_breaks": False, # don't replace commas/semi-colons with IPA |
|
||||||
|
"ipa_major_breaks": False, # don't replace periods with IPA ‖
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
ph_map = GRUUT_PHONEME_MAP.get(language)
|
||||||
|
if ph_map:
|
||||||
|
# Re-map phonemes to fit with eSpeak conventions
|
||||||
|
for word in ph_list:
|
||||||
|
for p_idx, p in enumerate(word):
|
||||||
|
word[p_idx] = ph_map.get(p, p)
|
||||||
|
|
||||||
|
# Join and re-split to break apart dipthongs, suprasegmentals, etc.
|
||||||
|
ph_words = ["|".join(word_phonemes) for word_phonemes in ph_list]
|
||||||
|
ph = "| ".join(ph_words)
|
||||||
|
|
||||||
|
print(" > Phonemes: {}".format(ph))
|
||||||
return ph
|
return ph
|
||||||
|
|
||||||
raise ValueError(f" [!] Language {language} is not supported for phonemization.")
|
raise ValueError(f" [!] Language {language} is not supported for phonemization.")
|
||||||
|
|
|
@ -102,10 +102,10 @@ class ModelManager(object):
|
||||||
output_model_path = os.path.join(output_path, "model_file.pth.tar")
|
output_model_path = os.path.join(output_path, "model_file.pth.tar")
|
||||||
output_config_path = os.path.join(output_path, "config.json")
|
output_config_path = os.path.join(output_path, "config.json")
|
||||||
# NOTE : band-aid for removing phoneme support
|
# NOTE : band-aid for removing phoneme support
|
||||||
if "needs_phonemizer" in model_item and model_item["needs_phonemizer"]:
|
# if "needs_phonemizer" in model_item and model_item["needs_phonemizer"]:
|
||||||
raise RuntimeError(
|
# raise RuntimeError(
|
||||||
" [!] Use 🐸TTS <= v0.0.13 for this model. Current version does not support phoneme based models."
|
# " [!] Use 🐸TTS <= v0.0.13 for this model. Current version does not support phoneme based models."
|
||||||
)
|
# )
|
||||||
if os.path.exists(output_path):
|
if os.path.exists(output_path):
|
||||||
print(f" > {model_name} is already downloaded.")
|
print(f" > {model_name} is already downloaded.")
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -22,3 +22,5 @@ coqpit
|
||||||
# japanese g2p deps
|
# japanese g2p deps
|
||||||
mecab-python3==1.0.3
|
mecab-python3==1.0.3
|
||||||
unidic-lite==1.0.8
|
unidic-lite==1.0.8
|
||||||
|
# gruut+supported langs
|
||||||
|
gruut[cs,de,es,fr,it,nl,pt,ru,sv]~=1.0.0
|
||||||
|
|
Loading…
Reference in New Issue