From 3b57e88a66ba1f410be70dbd2ad2899b5b1bcb0e Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 13 Feb 2020 15:49:46 +0100 Subject: [PATCH] Use PWGAN if available in Synthesizer.tts --- server/synthesizer.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/server/synthesizer.py b/server/synthesizer.py index 75fd4e76..455bd332 100644 --- a/server/synthesizer.py +++ b/server/synthesizer.py @@ -168,9 +168,16 @@ class Synthesizer(object): postnet_output, decoder_output, _ = parse_outputs( postnet_output, decoder_output, alignments) + if self.pwgan: + vocoder_input = torch.FloatTensor(postnet_output.T).unsqueeze(0) + if self.use_cuda: + vocoder_input.cuda() + wav = self.pwgan.inference(vocoder_input, hop_size=self.ap.hop_length) if self.wavernn: - postnet_output = postnet_output[0].data.cpu().numpy() - wav = self.wavernn.generate(torch.FloatTensor(postnet_output.T).unsqueeze(0).cuda(), batched=self.config.is_wavernn_batched, target=11000, overlap=550) + vocoder_input = torch.FloatTensor(postnet_output.T).unsqueeze(0) + if self.use_cuda: + vocoder_input.cuda() + wav = self.wavernn.generate(vocoder_input, batched=self.config.is_wavernn_batched, target=11000, overlap=550) else: wav = inv_spectrogram(postnet_output, self.ap, self.tts_config) # trim silence