From e540a5495996e7fec9142b0c372f6c8b37356577 Mon Sep 17 00:00:00 2001 From: erogol Date: Wed, 19 Feb 2020 18:24:02 +0100 Subject: [PATCH] fix synthesize.py --- synthesize.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/synthesize.py b/synthesize.py index cb0ee8af..a338f8b8 100644 --- a/synthesize.py +++ b/synthesize.py @@ -25,14 +25,16 @@ def tts(model, t_1 = time.time() use_vocoder_model = vocoder_model is not None waveform, alignment, _, postnet_output, stop_tokens = synthesis( - model, text, C, use_cuda, ap, speaker_id, False, - C.enable_eos_bos_chars) + model, text, C, use_cuda, ap, speaker_id, style_wav=False, + truncated=False, enable_eos_bos_chars=C.enable_eos_bos_chars, + use_griffin_lim=(not use_vocoder_model), do_trim_silence=True) + if C.model == "Tacotron" and use_vocoder_model: postnet_output = ap.out_linear_to_mel(postnet_output.T).T # correct if there is a scale difference b/w two models - postnet_output = ap._denormalize(postnet_output) - postnet_output = ap_vocoder._normalize(postnet_output) if use_vocoder_model: + postnet_output = ap._denormalize(postnet_output) + postnet_output = ap_vocoder._normalize(postnet_output) vocoder_input = torch.FloatTensor(postnet_output.T).unsqueeze(0) waveform = vocoder_model.generate( vocoder_input.cuda() if use_cuda else vocoder_input, @@ -58,7 +60,7 @@ if __name__ == "__main__": parser.add_argument( 'out_path', type=str, - help='Path to save final wav file.', + help='Path to save final wav file. Wav file will be names as the text given.', ) parser.add_argument('--use_cuda', type=bool,