phonemizer updates for utils.text

2019-01-08 17:08:50 +01:00 · 2019-01-08 17:08:50 +01:00 · b9629135db
parent 0103ee8967
commit b9629135db
1 changed files with 17 additions and 17 deletions
--- a/utils/text/init.py
+++ b/utils/text/init.py
@ -67,15 +67,20 @@ def sequence_to_phoneme(sequence):
    return result.replace('}{', ' ')
 def text2phone(text):
    '''
    Convert graphemes to phonemes.
    '''
-    try:
+    seperator = phonemizer.separator.Separator(' ', '', '|')
-        ph = epi.trans_list(text, normpunc=True)
+    #try:
-    except:
+    punctuations = re.findall(pat, text)
-        ph = None
+    ph = phonemizer.phonemize(text, separator=seperator, strip=False, njobs=1, backend='espeak', language='en-us')
    # Replace \n with matching punctuations.
    for punct in punctuations[:-1]:
        ph = ph.replace(' \n', punct+'| ', 1)
    ph = ph[:-1] + punctuations[-1]
    #except:
    #    ph = None
    return ph
@ -85,18 +90,13 @@ def phoneme_to_sequence(text, cleaner_names):
    '''
    sequence = []
    clean_text = _clean_text(text, cleaner_names)
-    for word in clean_text.split():
+    phonemes = text2phone(clean_text)
-        phonemes_text = text2phone(word)
+    print(phonemes.replace('|', ''))
    if phonemes is None:
        print("!! After phoneme conversion the result is None. -- {} ".format(clean_text))
    for phoneme in phonemes.split('|'):
        # print(word, ' -- ', phonemes_text)
-        if phonemes_text == None:
+        sequence += _phoneme_to_sequence(phoneme)
            print("!! After phoneme conversion the result is None. -- {} ".format(word))
            continue
        sequence += _phoneme_to_sequence(phonemes_text)
        if word[0] in _punctuations:
            sequence.append(_phonemes_to_id[word[0]])
        elif word[-1] in _punctuations:
            sequence.append(_phonemes_to_id[word[-1]])
        sequence.append(_phonemes_to_id[' '])
    # Aeepnd EOS char
    sequence.append(_phonemes_to_id['~'])
    return sequence