mirror of https://github.com/coqui-ai/TTS.git
Server update s
This commit is contained in:
parent
9915220539
commit
01dbfb3a0f
|
@ -8,5 +8,6 @@
|
||||||
"wavernn_config":"config.json", // wavernn config file
|
"wavernn_config":"config.json", // wavernn config file
|
||||||
"is_wavernn_batched":true,
|
"is_wavernn_batched":true,
|
||||||
"port": 5002,
|
"port": 5002,
|
||||||
"use_cuda": true
|
"use_cuda": true,
|
||||||
|
"debug": true
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,4 +27,4 @@ def tts():
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
app.run(debug=True, host='0.0.0.0', port=config.port)
|
app.run(debug=config.debug, host='0.0.0.0', port=config.port)
|
||||||
|
|
|
@ -10,6 +10,14 @@ from utils.audio import AudioProcessor
|
||||||
from utils.generic_utils import load_config, setup_model
|
from utils.generic_utils import load_config, setup_model
|
||||||
from utils.text import phoneme_to_sequence, phonemes, symbols, text_to_sequence, sequence_to_phoneme
|
from utils.text import phoneme_to_sequence, phonemes, symbols, text_to_sequence, sequence_to_phoneme
|
||||||
|
|
||||||
|
import re
|
||||||
|
alphabets= "([A-Za-z])"
|
||||||
|
prefixes = "(Mr|St|Mrs|Ms|Dr)[.]"
|
||||||
|
suffixes = "(Inc|Ltd|Jr|Sr|Co)"
|
||||||
|
starters = "(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
|
||||||
|
acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
|
||||||
|
websites = "[.](com|net|org|io|gov)"
|
||||||
|
|
||||||
|
|
||||||
class Synthesizer(object):
|
class Synthesizer(object):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
|
@ -48,6 +56,7 @@ class Synthesizer(object):
|
||||||
if use_cuda:
|
if use_cuda:
|
||||||
self.tts_model.cuda()
|
self.tts_model.cuda()
|
||||||
self.tts_model.eval()
|
self.tts_model.eval()
|
||||||
|
self.tts_model.decoder.max_decoder_steps = 3000
|
||||||
|
|
||||||
def load_wavernn(self, lib_path, model_path, model_file, model_config, use_cuda):
|
def load_wavernn(self, lib_path, model_path, model_file, model_config, use_cuda):
|
||||||
sys.path.append(lib_path) # set this if TTS is not installed globally
|
sys.path.append(lib_path) # set this if TTS is not installed globally
|
||||||
|
@ -83,14 +92,42 @@ class Synthesizer(object):
|
||||||
wav = np.array(wav)
|
wav = np.array(wav)
|
||||||
self.ap.save_wav(wav, path)
|
self.ap.save_wav(wav, path)
|
||||||
|
|
||||||
|
def split_into_sentences(self, text):
|
||||||
|
text = " " + text + " "
|
||||||
|
text = text.replace("\n"," ")
|
||||||
|
text = re.sub(prefixes,"\\1<prd>",text)
|
||||||
|
text = re.sub(websites,"<prd>\\1",text)
|
||||||
|
if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
|
||||||
|
text = re.sub("\s" + alphabets + "[.] "," \\1<prd> ",text)
|
||||||
|
text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text)
|
||||||
|
text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>\\3<prd>",text)
|
||||||
|
text = re.sub(alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>",text)
|
||||||
|
text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text)
|
||||||
|
text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text)
|
||||||
|
text = re.sub(" " + alphabets + "[.]"," \\1<prd>",text)
|
||||||
|
if "”" in text: text = text.replace(".”","”.")
|
||||||
|
if "\"" in text: text = text.replace(".\"","\".")
|
||||||
|
if "!" in text: text = text.replace("!\"","\"!")
|
||||||
|
if "?" in text: text = text.replace("?\"","\"?")
|
||||||
|
text = text.replace(".",".<stop>")
|
||||||
|
text = text.replace("?","?<stop>")
|
||||||
|
text = text.replace("!","!<stop>")
|
||||||
|
text = text.replace("<prd>",".")
|
||||||
|
sentences = text.split("<stop>")
|
||||||
|
sentences = sentences[:-1]
|
||||||
|
sentences = [s.strip() for s in sentences]
|
||||||
|
return sentences
|
||||||
|
|
||||||
def tts(self, text):
|
def tts(self, text):
|
||||||
wavs = []
|
wavs = []
|
||||||
for sen in text.split('.'):
|
sens = self.split_into_sentences(text)
|
||||||
|
if len(sens) == 0:
|
||||||
|
sens = [text+'.']
|
||||||
|
for sen in sens:
|
||||||
if len(sen) < 3:
|
if len(sen) < 3:
|
||||||
continue
|
continue
|
||||||
sen = sen.strip()
|
sen = sen.strip()
|
||||||
print(sen)
|
print(sen)
|
||||||
sen = sen.strip()
|
|
||||||
|
|
||||||
seq = np.array(self.input_adapter(sen))
|
seq = np.array(self.input_adapter(sen))
|
||||||
text_hat = sequence_to_phoneme(seq)
|
text_hat = sequence_to_phoneme(seq)
|
||||||
|
|
|
@ -50,6 +50,7 @@ def phoneme_to_sequence(text, cleaner_names, language, enable_eos_bos=False):
|
||||||
sequence = [_phonemes_to_id['^']]
|
sequence = [_phonemes_to_id['^']]
|
||||||
else:
|
else:
|
||||||
sequence = []
|
sequence = []
|
||||||
|
text = text.replace(":", "")
|
||||||
clean_text = _clean_text(text, cleaner_names)
|
clean_text = _clean_text(text, cleaner_names)
|
||||||
phonemes = text2phone(clean_text, language)
|
phonemes = text2phone(clean_text, language)
|
||||||
if phonemes is None:
|
if phonemes is None:
|
||||||
|
|
|
@ -28,6 +28,10 @@ _arpabet = ['@' + s for s in _phonemes]
|
||||||
symbols = [_pad, _eos, _bos] + list(_characters) + _arpabet
|
symbols = [_pad, _eos, _bos] + list(_characters) + _arpabet
|
||||||
phonemes = [_pad, _eos, _bos] + list(_phonemes) + list(_punctuations)
|
phonemes = [_pad, _eos, _bos] + list(_phonemes) + list(_punctuations)
|
||||||
|
|
||||||
|
# Generate ALIEN language
|
||||||
|
# from random import shuffle
|
||||||
|
# shuffle(phonemes)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(" > TTS symbols ")
|
print(" > TTS symbols ")
|
||||||
print(symbols)
|
print(symbols)
|
||||||
|
|
Loading…
Reference in New Issue