fix speaker-embeddings dimension during inference

This commit is contained in:
Alexander Korolev 2021-06-01 11:06:35 +02:00 committed by GitHub
parent d0ab0382fc
commit 5b89ef2c6e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 1 additions and 0 deletions

View File

@ -277,6 +277,7 @@ class Tacotron2(TacotronAbstract):
if self.num_speakers > 1:
if not self.embeddings_per_sample:
speaker_embeddings = self.speaker_embedding(speaker_ids)[:, None]
speaker_embeddings = torch.unsqueeze(speaker_embeddings, 0).transpose(1, 2)
encoder_outputs = self._concat_speaker_embedding(encoder_outputs, speaker_embeddings)
mel_outputs, alignments, stop_tokens = self.decoder.inference_truncated(encoder_outputs)