diff --git a/utils/synthesis.py b/utils/synthesis.py index 19938611..2c26e883 100644 --- a/utils/synthesis.py +++ b/utils/synthesis.py @@ -9,7 +9,6 @@ from matplotlib import pylab as plt def synthesis(m, s, CONFIG, use_cuda, ap): - """ Given the text, synthesising the audio """ text_cleaner = [CONFIG.text_cleaner] if CONFIG.use_phonemes: seq = np.asarray( @@ -20,11 +19,14 @@ def synthesis(m, s, CONFIG, use_cuda, ap): chars_var = torch.from_numpy(seq).unsqueeze(0) if use_cuda: chars_var = chars_var.cuda() - mel_spec, linear_spec, alignments, stop_tokens = m.inference( + decoder_output, postnet_output, alignments, stop_tokens = m.inference( chars_var.long()) - linear_spec = linear_spec[0].data.cpu().numpy() - mel_spec = mel_spec[0].data.cpu().numpy() + postnet_output = postnet_output[0].data.cpu().numpy() + decoder_output = decoder_output[0].data.cpu().numpy() alignment = alignments[0].cpu().data.numpy() - wav = ap.inv_spectrogram(linear_spec.T) + if CONFIG.model == "Tacotron": + wav = ap.inv_spectrogram(postnet_output.T) + else: + wav = ap.inv_mel_spectrogram(postnet_output.T) wav = wav[:ap.find_endpoint(wav)] - return wav, alignment, linear_spec, mel_spec, stop_tokens \ No newline at end of file + return wav, alignment, decoder_output, postnet_output, stop_tokens \ No newline at end of file