From b9629135db173344eef4316136542b49b00b9808 Mon Sep 17 00:00:00 2001 From: Eren Golge Date: Tue, 8 Jan 2019 17:08:50 +0100 Subject: [PATCH] phonemizer updates for utils.text --- utils/text/__init__.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/utils/text/__init__.py b/utils/text/__init__.py index 35e8713b..5bc2f697 100644 --- a/utils/text/__init__.py +++ b/utils/text/__init__.py @@ -67,36 +67,36 @@ def sequence_to_phoneme(sequence): return result.replace('}{', ' ') - def text2phone(text): ''' Convert graphemes to phonemes. ''' - try: - ph = epi.trans_list(text, normpunc=True) - except: - ph = None + seperator = phonemizer.separator.Separator(' ', '', '|') + #try: + punctuations = re.findall(pat, text) + ph = phonemizer.phonemize(text, separator=seperator, strip=False, njobs=1, backend='espeak', language='en-us') + # Replace \n with matching punctuations. + for punct in punctuations[:-1]: + ph = ph.replace(' \n', punct+'| ', 1) + ph = ph[:-1] + punctuations[-1] + #except: + # ph = None return ph - + def phoneme_to_sequence(text, cleaner_names): ''' TODO: This ignores punctuations ''' sequence = [] clean_text = _clean_text(text, cleaner_names) - for word in clean_text.split(): - phonemes_text = text2phone(word) + phonemes = text2phone(clean_text) + print(phonemes.replace('|', '')) + if phonemes is None: + print("!! After phoneme conversion the result is None. -- {} ".format(clean_text)) + for phoneme in phonemes.split('|'): # print(word, ' -- ', phonemes_text) - if phonemes_text == None: - print("!! After phoneme conversion the result is None. -- {} ".format(word)) - continue - sequence += _phoneme_to_sequence(phonemes_text) - if word[0] in _punctuations: - sequence.append(_phonemes_to_id[word[0]]) - elif word[-1] in _punctuations: - sequence.append(_phonemes_to_id[word[-1]]) - sequence.append(_phonemes_to_id[' ']) + sequence += _phoneme_to_sequence(phoneme) # Aeepnd EOS char sequence.append(_phonemes_to_id['~']) return sequence