mirror of https://github.com/coqui-ai/TTS.git
demo server update
This commit is contained in:
parent
2bbb3f7a40
commit
1827f77752
|
@ -1,11 +1,12 @@
|
||||||
{
|
{
|
||||||
"tts_path":"/media/erogol/data_ssd/Data/models/ljspeech_models/ljspeech-April-08-2019_07+32PM-8a47b46/", // tts model root folder
|
"tts_path":"/media/erogol/data_ssd/Models/libri_tts/ljspeech-July-22-2019_10+45AM-ee706b5/", // tts model root folder
|
||||||
"tts_file":"checkpoint_261000.pth.tar", // tts checkpoint file
|
"tts_file":"best_model.pth.tar", // tts checkpoint file
|
||||||
"tts_config":"config.json", // tts config.json file
|
"tts_config":"config.json", // tts config.json file
|
||||||
"wavernn_lib_path": "/home/erogol/projects/", // Rootpath to wavernn project folder to be important. If this is none, model uses GL for speech synthesis.
|
"tts_speakers": null, // json file listing speaker ids. null if no speaker embedding.
|
||||||
"wavernn_path":"/media/erogol/data_ssd/Data/models/wavernn/ljspeech/mold_ljspeech_best_model/", // wavernn model root path
|
"wavernn_lib_path": "/home/erogol/projects/", // Rootpath to wavernn project folder to be imported. If this is null, model uses GL for speech synthesis.
|
||||||
"wavernn_file":"checkpoint_433000.pth.tar", // wavernn checkpoint file name
|
"wavernn_path":"/media/erogol/data_ssd/Models/wavernn/universal/4910/", // wavernn model root path
|
||||||
"wavernn_config":"config.json", // wavernn config file
|
"wavernn_file":"best_model_16K.pth.tar", // wavernn checkpoint file name
|
||||||
|
"wavernn_config":"config_16K.json", // wavernn config file
|
||||||
"is_wavernn_batched":true,
|
"is_wavernn_batched":true,
|
||||||
"port": 5002,
|
"port": 5002,
|
||||||
"use_cuda": true,
|
"use_cuda": true,
|
||||||
|
|
|
@ -8,6 +8,7 @@ import sys
|
||||||
from utils.audio import AudioProcessor
|
from utils.audio import AudioProcessor
|
||||||
from utils.generic_utils import load_config, setup_model
|
from utils.generic_utils import load_config, setup_model
|
||||||
from utils.text import phoneme_to_sequence, phonemes, symbols, text_to_sequence, sequence_to_phoneme
|
from utils.text import phoneme_to_sequence, phonemes, symbols, text_to_sequence, sequence_to_phoneme
|
||||||
|
from utils.speakers import load_speaker_mapping
|
||||||
|
|
||||||
import re
|
import re
|
||||||
alphabets = r"([A-Za-z])"
|
alphabets = r"([A-Za-z])"
|
||||||
|
@ -44,7 +45,13 @@ class Synthesizer(object):
|
||||||
else:
|
else:
|
||||||
self.input_size = len(symbols)
|
self.input_size = len(symbols)
|
||||||
self.input_adapter = lambda sen: text_to_sequence(sen, [self.tts_config.text_cleaner])
|
self.input_adapter = lambda sen: text_to_sequence(sen, [self.tts_config.text_cleaner])
|
||||||
self.tts_model = setup_model(self.input_size, c=self.tts_config) #FIXME: missing num_speakers argument to setup_model
|
# load speakers
|
||||||
|
if self.config.tts_speakers is not None:
|
||||||
|
self.tts_speakers = load_speaker_mapping(os.path.join(model_path, self.config.tts_speakers))
|
||||||
|
num_speakers = len(self.tts_speakers)
|
||||||
|
else:
|
||||||
|
num_speakers = 0
|
||||||
|
self.tts_model = setup_model(self.input_size, num_speakers=num_speakers , c=self.tts_config)
|
||||||
# load model state
|
# load model state
|
||||||
if use_cuda:
|
if use_cuda:
|
||||||
cp = torch.load(self.model_file)
|
cp = torch.load(self.model_file)
|
||||||
|
@ -58,6 +65,7 @@ class Synthesizer(object):
|
||||||
self.tts_model.decoder.max_decoder_steps = 3000
|
self.tts_model.decoder.max_decoder_steps = 3000
|
||||||
|
|
||||||
def load_wavernn(self, lib_path, model_path, model_file, model_config, use_cuda):
|
def load_wavernn(self, lib_path, model_path, model_file, model_config, use_cuda):
|
||||||
|
# TODO: set a function in wavernn code base for model setup and call it here.
|
||||||
sys.path.append(lib_path) # set this if TTS is not installed globally
|
sys.path.append(lib_path) # set this if TTS is not installed globally
|
||||||
from WaveRNN.models.wavernn import Model
|
from WaveRNN.models.wavernn import Model
|
||||||
wavernn_config = os.path.join(model_path, model_config)
|
wavernn_config = os.path.join(model_path, model_config)
|
||||||
|
@ -70,8 +78,11 @@ class Synthesizer(object):
|
||||||
rnn_dims=512,
|
rnn_dims=512,
|
||||||
fc_dims=512,
|
fc_dims=512,
|
||||||
mode=self.wavernn_config.mode,
|
mode=self.wavernn_config.mode,
|
||||||
pad=2,
|
mulaw=self.wavernn_config.mulaw,
|
||||||
upsample_factors=self.wavernn_config.upsample_factors, # set this depending on dataset
|
pad=self.wavernn_config.pad,
|
||||||
|
use_aux_net=self.wavernn_config.use_aux_net,
|
||||||
|
use_upsample_net = self.wavernn_config.use_upsample_net,
|
||||||
|
upsample_factors=self.wavernn_config.upsample_factors,
|
||||||
feat_dims=80,
|
feat_dims=80,
|
||||||
compute_dims=128,
|
compute_dims=128,
|
||||||
res_out_dims=128,
|
res_out_dims=128,
|
||||||
|
|
Loading…
Reference in New Issue