demo server update

This commit is contained in:
Eren Golge 2019-07-22 15:10:06 +02:00
parent 47c0c5b2db
commit 54fb236c86
2 changed files with 21 additions and 9 deletions

View File

@ -1,11 +1,12 @@
{
"tts_path":"/media/erogol/data_ssd/Data/models/ljspeech_models/ljspeech-April-08-2019_07+32PM-8a47b46/", // tts model root folder
"tts_file":"checkpoint_261000.pth.tar", // tts checkpoint file
"tts_path":"/media/erogol/data_ssd/Models/libri_tts/ljspeech-July-22-2019_10+45AM-ee706b5/", // tts model root folder
"tts_file":"best_model.pth.tar", // tts checkpoint file
"tts_config":"config.json", // tts config.json file
"wavernn_lib_path": "/home/erogol/projects/", // Rootpath to wavernn project folder to be important. If this is none, model uses GL for speech synthesis.
"wavernn_path":"/media/erogol/data_ssd/Data/models/wavernn/ljspeech/mold_ljspeech_best_model/", // wavernn model root path
"wavernn_file":"checkpoint_433000.pth.tar", // wavernn checkpoint file name
"wavernn_config":"config.json", // wavernn config file
"tts_speakers": null, // json file listing speaker ids. null if no speaker embedding.
"wavernn_lib_path": "/home/erogol/projects/", // Rootpath to wavernn project folder to be imported. If this is null, model uses GL for speech synthesis.
"wavernn_path":"/media/erogol/data_ssd/Models/wavernn/universal/4910/", // wavernn model root path
"wavernn_file":"best_model_16K.pth.tar", // wavernn checkpoint file name
"wavernn_config":"config_16K.json", // wavernn config file
"is_wavernn_batched":true,
"port": 5002,
"use_cuda": true,

View File

@ -8,6 +8,7 @@ import sys
from utils.audio import AudioProcessor
from utils.generic_utils import load_config, setup_model
from utils.text import phoneme_to_sequence, phonemes, symbols, text_to_sequence, sequence_to_phoneme
from utils.speakers import load_speaker_mapping
import re
alphabets = r"([A-Za-z])"
@ -44,7 +45,13 @@ class Synthesizer(object):
else:
self.input_size = len(symbols)
self.input_adapter = lambda sen: text_to_sequence(sen, [self.tts_config.text_cleaner])
self.tts_model = setup_model(self.input_size, c=self.tts_config) #FIXME: missing num_speakers argument to setup_model
# load speakers
if self.config.tts_speakers is not None:
self.tts_speakers = load_speaker_mapping(os.path.join(model_path, self.config.tts_speakers))
num_speakers = len(self.tts_speakers)
else:
num_speakers = 0
self.tts_model = setup_model(self.input_size, num_speakers=num_speakers , c=self.tts_config)
# load model state
if use_cuda:
cp = torch.load(self.model_file)
@ -58,6 +65,7 @@ class Synthesizer(object):
self.tts_model.decoder.max_decoder_steps = 3000
def load_wavernn(self, lib_path, model_path, model_file, model_config, use_cuda):
# TODO: set a function in wavernn code base for model setup and call it here.
sys.path.append(lib_path) # set this if TTS is not installed globally
from WaveRNN.models.wavernn import Model
wavernn_config = os.path.join(model_path, model_config)
@ -70,8 +78,11 @@ class Synthesizer(object):
rnn_dims=512,
fc_dims=512,
mode=self.wavernn_config.mode,
pad=2,
upsample_factors=self.wavernn_config.upsample_factors, # set this depending on dataset
mulaw=self.wavernn_config.mulaw,
pad=self.wavernn_config.pad,
use_aux_net=self.wavernn_config.use_aux_net,
use_upsample_net = self.wavernn_config.use_upsample_net,
upsample_factors=self.wavernn_config.upsample_factors,
feat_dims=80,
compute_dims=128,
res_out_dims=128,