From c9790bee2c058adb7c09ff5318d5c6513820c7dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Wed, 26 May 2021 09:54:48 +0200 Subject: [PATCH] update tacotron model to return `model_outputs` --- TTS/tts/models/tacotron.py | 8 ++++---- TTS/tts/models/tacotron2.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/TTS/tts/models/tacotron.py b/TTS/tts/models/tacotron.py index 23bd839f..34f04159 100644 --- a/TTS/tts/models/tacotron.py +++ b/TTS/tts/models/tacotron.py @@ -255,7 +255,7 @@ class Tacotron(TacotronAbstract): outputs['alignments_backward'] = alignments_backward outputs['decoder_outputs_backward'] = decoder_outputs_backward outputs.update({ - 'postnet_outputs': postnet_outputs, + 'model_outputs': postnet_outputs, 'decoder_outputs': decoder_outputs, 'alignments': alignments, 'stop_tokens': stop_tokens @@ -287,7 +287,7 @@ class Tacotron(TacotronAbstract): postnet_outputs = self.last_linear(postnet_outputs) decoder_outputs = decoder_outputs.transpose(1, 2) outputs = { - 'postnet_outputs': postnet_outputs, + 'model_outputs': postnet_outputs, 'decoder_outputs': decoder_outputs, 'alignments': alignments, 'stop_tokens': stop_tokens @@ -335,7 +335,7 @@ class Tacotron(TacotronAbstract): # compute loss loss_dict = criterion( - outputs['postnet_outputs'], + outputs['model_outputs'], outputs['decoder_outputs'], mel_input, linear_input, @@ -355,7 +355,7 @@ class Tacotron(TacotronAbstract): return outputs, loss_dict def train_log(self, ap, batch, outputs): - postnet_outputs = outputs['postnet_outputs'] + postnet_outputs = outputs['model_outputs'] alignments = outputs['alignments'] alignments_backward = outputs['alignments_backward'] mel_input = batch['mel_input'] diff --git a/TTS/tts/models/tacotron2.py b/TTS/tts/models/tacotron2.py index 51b181e4..04b97606 100644 --- a/TTS/tts/models/tacotron2.py +++ b/TTS/tts/models/tacotron2.py @@ -233,7 +233,7 @@ class Tacotron2(TacotronAbstract): outputs['alignments_backward'] = alignments_backward outputs['decoder_outputs_backward'] = decoder_outputs_backward outputs.update({ - 'postnet_outputs': postnet_outputs, + 'model_outputs': postnet_outputs, 'decoder_outputs': decoder_outputs, 'alignments': alignments, 'stop_tokens': stop_tokens @@ -254,7 +254,7 @@ class Tacotron2(TacotronAbstract): x_vector = self.speaker_embedding(cond_input['speaker_ids'])[:, None] x_vector = torch.unsqueeze(x_vector, 0).transpose(1, 2) else: - x_vector = cond_input + x_vector = cond_input['x_vectors'] encoder_outputs = self._concat_speaker_embedding( encoder_outputs, x_vector) @@ -266,7 +266,7 @@ class Tacotron2(TacotronAbstract): decoder_outputs, postnet_outputs, alignments = self.shape_outputs( decoder_outputs, postnet_outputs, alignments) outputs = { - 'postnet_outputs': postnet_outputs, + 'model_outputs': postnet_outputs, 'decoder_outputs': decoder_outputs, 'alignments': alignments, 'stop_tokens': stop_tokens