From a9ce1d4f199adebc8ad5ce5e659951b2c81b3c69 Mon Sep 17 00:00:00 2001 From: Eren Golge Date: Wed, 6 Mar 2019 13:43:29 +0100 Subject: [PATCH] bug fix for tacotron and tests update --- models/tacotron.py | 2 +- models/tacotron2.py | 2 +- tests/layers_tests.py | 2 +- tests/tacotron_tests.py | 7 ++++--- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/models/tacotron.py b/models/tacotron.py index 3ecd3f9e..11ff8740 100644 --- a/models/tacotron.py +++ b/models/tacotron.py @@ -28,7 +28,7 @@ class Tacotron(nn.Module): nn.Linear(self.postnet.cbhg.gru_features * 2, linear_dim), nn.Sigmoid()) - def forward(self, characters, text_lengths, mel_specs=None): + def forward(self, characters, text_lengths, mel_specs): B = characters.size(0) mask = sequence_mask(text_lengths).to(characters.device) inputs = self.embedding(characters) diff --git a/models/tacotron2.py b/models/tacotron2.py index 70bdd89a..c0cbba26 100644 --- a/models/tacotron2.py +++ b/models/tacotron2.py @@ -28,7 +28,7 @@ class Tacotron2(nn.Module): def forward(self, text, text_lengths, mel_specs=None): # compute mask for padding - mask = sequence_mask(text_lengths).to(characters.device) + mask = sequence_mask(text_lengths).to(text.device) embedded_inputs = self.embedding(text).transpose(1, 2) encoder_outputs = self.encoder(embedded_inputs, text_lengths) mel_outputs, stop_tokens, alignments = self.decoder( diff --git a/tests/layers_tests.py b/tests/layers_tests.py index 5f769f9c..4ac7c8fc 100644 --- a/tests/layers_tests.py +++ b/tests/layers_tests.py @@ -42,7 +42,7 @@ class DecoderTests(unittest.TestCase): dummy_input = T.rand(4, 8, 256) dummy_memory = T.rand(4, 2, 80) - output, alignment, stop_tokens = layer(dummy_input, dummy_memory) + output, alignment, stop_tokens = layer(dummy_input, dummy_memory, mask=None) assert output.shape[0] == 4 assert output.shape[1] == 1, "size not {}".format(output.shape[1]) diff --git a/tests/tacotron_tests.py b/tests/tacotron_tests.py index 866e1aa4..2f76469a 100644 --- a/tests/tacotron_tests.py +++ b/tests/tacotron_tests.py @@ -21,6 +21,7 @@ c = load_config(os.path.join(file_path, 'test_config.json')) class TacotronTrainTest(unittest.TestCase): def test_train_step(self): input = torch.randint(0, 24, (8, 128)).long().to(device) + input_lengths = torch.randint(100, 129, (8, )).long().to(device) mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) linear_spec = torch.rand(8, 30, c.audio['num_freq']).to(device) mel_lengths = torch.randint(20, 30, (8, )).long().to(device) @@ -35,8 +36,8 @@ class TacotronTrainTest(unittest.TestCase): criterion = L1LossMasked().to(device) criterion_st = nn.BCELoss().to(device) - model = Tacotron(32, c.embedding_size, c.audio['num_freq'], c.audio['num_mels'], - c.r, c.memory_size).to(device) + model = Tacotron(32, c.audio['num_freq'], c.audio['num_mels'], + c.r, memory_size=c.memory_size).to(device) model.train() model_ref = copy.deepcopy(model) count = 0 @@ -47,7 +48,7 @@ class TacotronTrainTest(unittest.TestCase): optimizer = optim.Adam(model.parameters(), lr=c.lr) for i in range(5): mel_out, linear_out, align, stop_tokens = model.forward( - input, mel_spec) + input, input_lengths, mel_spec) assert stop_tokens.data.max() <= 1.0 assert stop_tokens.data.min() >= 0.0 optimizer.zero_grad()