Convesntional update s

2019-01-01 20:10:06 +01:00 · 2019-01-01 20:10:06 +01:00 · e6750ca652
parent 5f22e2a83a
commit e6750ca652
3 changed files with 28 additions and 20 deletions
--- a/utils/text/init.py
+++ b/utils/text/init.py
@ -2,7 +2,7 @@

 import re
 from utils.text import cleaners
-from utils.text.symbols import symbols, phonemes
+from utils.text.symbols import symbols, phonemes, _punctuations
 from utils.text.cmudict import text2phone

 # Mappings from symbol to numeric ID and vice versa:
@ -16,22 +16,39 @@ _id_to_phonemes = {i: s for i, s in enumerate(phonemes)}
 _curly_re = re.compile(r'(.*?)\{(.+?)\}(.*)')


-def phonem_to_sequence(text, cleaner_names):
+def phoneme_to_sequence(text, cleaner_names):
    '''
    TODO: This ignores punctuations
    '''
    sequence = []
    clean_text = _clean_text(text, cleaner_names)
    for word in clean_text.split():
-        phonems_text = text2phone(word)
-        if phonems_text == None:
+        phonemes_text = text2phone(word)
+        if phonemes_text == None:
+            print("!! After phoneme conversion the result is None. -- {} ".format(word))
            continue
-        sequence += _phonem_to_sequence(phonems_text)
+        sequence += _phoneme_to_sequence(phonemes_text)
+        if word[0] in _punctuations:
+            sequence.append(_phonemes_to_id[word[0]])
+        elif word[-1] in _punctuations:
+            sequence.append(_phonemes_to_id[word[-1]])
        sequence.append(_phonemes_to_id[' '])
+    # Aeepnd EOS char
    sequence.append(_phonemes_to_id['~'])
    return sequence


+def sequence_to_phoneme(sequence):
+    '''Converts a sequence of IDs back to a string'''
+    result = ''
+    for symbol_id in sequence:
+        if symbol_id in _id_to_phonemes:
+            s = _id_to_phonemes[symbol_id]
+            print(s)
+            result += s
+    return result.replace('}{', ' ')
+
+
 def text_to_sequence(text, cleaner_names):
    '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.

@ -76,17 +93,6 @@ def sequence_to_text(sequence):
    return result.replace('}{', ' ')


-def sequence_to_phonem(sequence):
-    '''Converts a sequence of IDs back to a string'''
-    result = ''
-    for symbol_id in sequence:
-        if symbol_id in _id_to_phonemes:
-            s = _id_to_phonemes[symbol_id]
-            print(s)
-            result += s
-    return result.replace('}{', ' ')
-
-
 def _clean_text(text, cleaner_names):
    for name in cleaner_names:
        cleaner = getattr(cleaners, name)
@ -100,8 +106,8 @@ def _symbols_to_sequence(symbols):
    return [_symbol_to_id[s] for s in symbols if _should_keep_symbol(s)]


-def _phonem_to_sequence(phonemes):
-    return [_phonemes_to_id[s] for s in phonemes.split(" ") if _should_keep_phonem(s)]
+def _phoneme_to_sequence(phonemes):
+    return [_phonemes_to_id[s] for s in phonemes.split(" ") if _should_keep_phoneme(s)]


 def _arpabet_to_sequence(text):
@ -112,5 +118,5 @@ def _should_keep_symbol(s):
    return s in _symbol_to_id and s is not '_' and s is not '~'


-def _should_keep_phonem(p):
+def _should_keep_phoneme(p):
    return p in _phonemes_to_id and p is not '_' and p is not '~'
--- a/utils/text/cmudict.py
+++ b/utils/text/cmudict.py
@ -65,7 +65,7 @@ _phonemes = set(_phonemes)
 def text2phone(text):
    seperator = phonemizer.separator.Separator('', '', ' ')
    try:
-        ph = phonemizer.phonemize(text, separator=seperator)
+        ph = phonemizer.phonemize(text, separator=seperator, strip=True, njobs=1)
    except:
        ph = None
    return ph
--- a/utils/text/symbols.py
+++ b/utils/text/symbols.py
@ -20,5 +20,7 @@ symbols = [_pad, _eos] + list(_characters) + _arpabet
 phonemes = [_pad, _eos] + list(cmudict._phonemes) + list(_punctuations)

 if __name__ == '__main__':
+    print(" > TTS symbols ")
    print(symbols)
+    print(" > TTS phonemes ")
    print(phonemes)