From 618b509204cf045f20dc972bbb3220ed160ab0ca Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Fri, 11 Jun 2021 10:43:52 -0400 Subject: [PATCH] =?UTF-8?q?Use=20combined=20characters=20available=20in=20?= =?UTF-8?q?TTS=20phonemes=20(like=20=C3=A7)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TTS/tts/utils/text/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/TTS/tts/utils/text/__init__.py b/TTS/tts/utils/text/__init__.py index 3d2f5004..73bd829c 100644 --- a/TTS/tts/utils/text/__init__.py +++ b/TTS/tts/utils/text/__init__.py @@ -53,6 +53,11 @@ def clean_gruut_phonemes(ph_list): cleaned_phonemes = [] for phoneme_text in ph_list: + phoneme_text = unicodedata.normalize("NFC", phoneme_text) + if phoneme_text in phonemes: + cleaned_phonemes.append(phoneme_text) + continue + # Decompose into codepoints (ã -> ["a", "\u0303"]) phoneme_text = unicodedata.normalize("NFD", phoneme_text) for codepoint in phoneme_text.translate(GRUUT_TRANS_TABLE):