mirror of https://github.com/coqui-ai/TTS.git
Convesntional update s
This commit is contained in:
parent
5f22e2a83a
commit
e6750ca652
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from utils.text import cleaners
|
from utils.text import cleaners
|
||||||
from utils.text.symbols import symbols, phonemes
|
from utils.text.symbols import symbols, phonemes, _punctuations
|
||||||
from utils.text.cmudict import text2phone
|
from utils.text.cmudict import text2phone
|
||||||
|
|
||||||
# Mappings from symbol to numeric ID and vice versa:
|
# Mappings from symbol to numeric ID and vice versa:
|
||||||
|
@ -16,22 +16,39 @@ _id_to_phonemes = {i: s for i, s in enumerate(phonemes)}
|
||||||
_curly_re = re.compile(r'(.*?)\{(.+?)\}(.*)')
|
_curly_re = re.compile(r'(.*?)\{(.+?)\}(.*)')
|
||||||
|
|
||||||
|
|
||||||
def phonem_to_sequence(text, cleaner_names):
|
def phoneme_to_sequence(text, cleaner_names):
|
||||||
'''
|
'''
|
||||||
TODO: This ignores punctuations
|
TODO: This ignores punctuations
|
||||||
'''
|
'''
|
||||||
sequence = []
|
sequence = []
|
||||||
clean_text = _clean_text(text, cleaner_names)
|
clean_text = _clean_text(text, cleaner_names)
|
||||||
for word in clean_text.split():
|
for word in clean_text.split():
|
||||||
phonems_text = text2phone(word)
|
phonemes_text = text2phone(word)
|
||||||
if phonems_text == None:
|
if phonemes_text == None:
|
||||||
|
print("!! After phoneme conversion the result is None. -- {} ".format(word))
|
||||||
continue
|
continue
|
||||||
sequence += _phonem_to_sequence(phonems_text)
|
sequence += _phoneme_to_sequence(phonemes_text)
|
||||||
|
if word[0] in _punctuations:
|
||||||
|
sequence.append(_phonemes_to_id[word[0]])
|
||||||
|
elif word[-1] in _punctuations:
|
||||||
|
sequence.append(_phonemes_to_id[word[-1]])
|
||||||
sequence.append(_phonemes_to_id[' '])
|
sequence.append(_phonemes_to_id[' '])
|
||||||
|
# Aeepnd EOS char
|
||||||
sequence.append(_phonemes_to_id['~'])
|
sequence.append(_phonemes_to_id['~'])
|
||||||
return sequence
|
return sequence
|
||||||
|
|
||||||
|
|
||||||
|
def sequence_to_phoneme(sequence):
|
||||||
|
'''Converts a sequence of IDs back to a string'''
|
||||||
|
result = ''
|
||||||
|
for symbol_id in sequence:
|
||||||
|
if symbol_id in _id_to_phonemes:
|
||||||
|
s = _id_to_phonemes[symbol_id]
|
||||||
|
print(s)
|
||||||
|
result += s
|
||||||
|
return result.replace('}{', ' ')
|
||||||
|
|
||||||
|
|
||||||
def text_to_sequence(text, cleaner_names):
|
def text_to_sequence(text, cleaner_names):
|
||||||
'''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
|
'''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
|
||||||
|
|
||||||
|
@ -76,17 +93,6 @@ def sequence_to_text(sequence):
|
||||||
return result.replace('}{', ' ')
|
return result.replace('}{', ' ')
|
||||||
|
|
||||||
|
|
||||||
def sequence_to_phonem(sequence):
|
|
||||||
'''Converts a sequence of IDs back to a string'''
|
|
||||||
result = ''
|
|
||||||
for symbol_id in sequence:
|
|
||||||
if symbol_id in _id_to_phonemes:
|
|
||||||
s = _id_to_phonemes[symbol_id]
|
|
||||||
print(s)
|
|
||||||
result += s
|
|
||||||
return result.replace('}{', ' ')
|
|
||||||
|
|
||||||
|
|
||||||
def _clean_text(text, cleaner_names):
|
def _clean_text(text, cleaner_names):
|
||||||
for name in cleaner_names:
|
for name in cleaner_names:
|
||||||
cleaner = getattr(cleaners, name)
|
cleaner = getattr(cleaners, name)
|
||||||
|
@ -100,8 +106,8 @@ def _symbols_to_sequence(symbols):
|
||||||
return [_symbol_to_id[s] for s in symbols if _should_keep_symbol(s)]
|
return [_symbol_to_id[s] for s in symbols if _should_keep_symbol(s)]
|
||||||
|
|
||||||
|
|
||||||
def _phonem_to_sequence(phonemes):
|
def _phoneme_to_sequence(phonemes):
|
||||||
return [_phonemes_to_id[s] for s in phonemes.split(" ") if _should_keep_phonem(s)]
|
return [_phonemes_to_id[s] for s in phonemes.split(" ") if _should_keep_phoneme(s)]
|
||||||
|
|
||||||
|
|
||||||
def _arpabet_to_sequence(text):
|
def _arpabet_to_sequence(text):
|
||||||
|
@ -112,5 +118,5 @@ def _should_keep_symbol(s):
|
||||||
return s in _symbol_to_id and s is not '_' and s is not '~'
|
return s in _symbol_to_id and s is not '_' and s is not '~'
|
||||||
|
|
||||||
|
|
||||||
def _should_keep_phonem(p):
|
def _should_keep_phoneme(p):
|
||||||
return p in _phonemes_to_id and p is not '_' and p is not '~'
|
return p in _phonemes_to_id and p is not '_' and p is not '~'
|
||||||
|
|
|
@ -65,7 +65,7 @@ _phonemes = set(_phonemes)
|
||||||
def text2phone(text):
|
def text2phone(text):
|
||||||
seperator = phonemizer.separator.Separator('', '', ' ')
|
seperator = phonemizer.separator.Separator('', '', ' ')
|
||||||
try:
|
try:
|
||||||
ph = phonemizer.phonemize(text, separator=seperator)
|
ph = phonemizer.phonemize(text, separator=seperator, strip=True, njobs=1)
|
||||||
except:
|
except:
|
||||||
ph = None
|
ph = None
|
||||||
return ph
|
return ph
|
||||||
|
|
|
@ -20,5 +20,7 @@ symbols = [_pad, _eos] + list(_characters) + _arpabet
|
||||||
phonemes = [_pad, _eos] + list(cmudict._phonemes) + list(_punctuations)
|
phonemes = [_pad, _eos] + list(cmudict._phonemes) + list(_punctuations)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
print(" > TTS symbols ")
|
||||||
print(symbols)
|
print(symbols)
|
||||||
|
print(" > TTS phonemes ")
|
||||||
print(phonemes)
|
print(phonemes)
|
||||||
|
|
Loading…
Reference in New Issue