fix synthesize.py

This commit is contained in:
Eren Golge 2019-09-24 17:19:04 +02:00
parent 113f5860b8
commit 23f6743ac9
1 changed files with 22 additions and 3 deletions

View File

@ -2,6 +2,7 @@ import os
import time
import argparse
import torch
import json
import string
from TTS.utils.synthesis import synthesis
@ -16,22 +17,27 @@ def tts(model,
VC,
text,
ap,
ap_vocoder,
use_cuda,
batched_vocoder,
speaker_id=None,
figures=False):
t_1 = time.time()
use_vocoder_model = vocoder_model is not None
waveform, alignment, decoder_outputs, postnet_output, stop_tokens = synthesis(
model, text, C, use_cuda, ap, False, C.enable_eos_bos_chars)
model, text, C, use_cuda, ap, speaker_id, False, C.enable_eos_bos_chars)
if C.model == "Tacotron" and use_vocoder_model:
postnet_output = ap.out_linear_to_mel(postnet_output.T).T
# correct if there is a scale difference b/w two models
postnet_output = ap._denormalize(postnet_output)
postnet_output = ap_vocoder._normalize(postnet_output)
if use_vocoder_model:
vocoder_input = torch.FloatTensor(postnet_output.T).unsqueeze(0)
waveform = vocoder_model.generate(
vocoder_input.cuda() if use_cuda else vocoder_input,
batched=batched_vocoder,
target=11000,
overlap=550)
target=8000,
overlap=400)
print(" > Run-time: {}".format(time.time() - t_1))
return alignment, postnet_output, stop_tokens, waveform
@ -81,6 +87,12 @@ if __name__ == "__main__":
help="JSON file for multi-speaker model.",
default=""
)
parser.add_argument(
'--speaker_id',
type=int,
help="target speaker_id if the model is multi-speaker.",
default=None
)
args = parser.parse_args()
if args.vocoder_path != "":
@ -109,10 +121,12 @@ if __name__ == "__main__":
model.eval()
if args.use_cuda:
model.cuda()
model.decoder.set_r(cp['r'])
# load vocoder model
if args.vocoder_path != "":
VC = load_config(args.vocoder_config_path)
ap_vocoder = AudioProcessor(**VC.audio)
bits = 10
vocoder_model = VocoderModel(
rnn_dims=512,
@ -127,6 +141,8 @@ if __name__ == "__main__":
res_blocks=10,
hop_length=ap.hop_length,
sample_rate=ap.sample_rate,
use_aux_net=True,
use_upsample_net=True
)
check = torch.load(args.vocoder_path)
@ -137,6 +153,7 @@ if __name__ == "__main__":
else:
vocoder_model = None
VC = None
ap_vocoder = None
# synthesize voice
print(" > Text: {}".format(args.text))
@ -147,8 +164,10 @@ if __name__ == "__main__":
VC,
args.text,
ap,
ap_vocoder,
args.use_cuda,
args.batched_vocoder,
speaker_id=args.speaker_id,
figures=False)
# save the results