diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py index 024f79c6..281da221 100644 --- a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py +++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py @@ -1,4 +1,5 @@ import logging +import re import subprocess from typing import Dict, List @@ -163,6 +164,13 @@ class ESpeak(BasePhonemizer): # dealing with the conditions descrived above ph_decoded = ph_decoded[:1].replace("_", "") + ph_decoded[1:] + + # espeak-ng backend can add language flags that need to be removed: + # "sɛʁtˈɛ̃ mˈo kɔm (en)fˈʊtbɔːl(fr) ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ." + # phonemize needs to remove the language flags of the returned text: + # "sɛʁtˈɛ̃ mˈo kɔm fˈʊtbɔːl ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ." + ph_decoded = re.sub(r"\(.+?\)", "", ph_decoded) + phonemes += ph_decoded.strip() return phonemes.replace("_", separator)