bug fix for tacotron and tests update

2019-03-06 13:43:29 +01:00 · 2019-03-06 13:43:29 +01:00 · a9ce1d4f19
parent b031a65677
commit a9ce1d4f19
4 changed files with 7 additions and 6 deletions
--- a/models/tacotron.py
+++ b/models/tacotron.py
@ -28,7 +28,7 @@ class Tacotron(nn.Module):
            nn.Linear(self.postnet.cbhg.gru_features * 2, linear_dim),
            nn.Sigmoid())

-    def forward(self, characters, text_lengths, mel_specs=None):
+    def forward(self, characters, text_lengths, mel_specs):
        B = characters.size(0)
        mask = sequence_mask(text_lengths).to(characters.device)
        inputs = self.embedding(characters)
--- a/models/tacotron2.py
+++ b/models/tacotron2.py
@ -28,7 +28,7 @@ class Tacotron2(nn.Module):

    def forward(self, text, text_lengths, mel_specs=None):
        # compute mask for padding
-        mask = sequence_mask(text_lengths).to(characters.device)
+        mask = sequence_mask(text_lengths).to(text.device)
        embedded_inputs = self.embedding(text).transpose(1, 2)
        encoder_outputs = self.encoder(embedded_inputs, text_lengths)
        mel_outputs, stop_tokens, alignments = self.decoder(
--- a/tests/layers_tests.py
+++ b/tests/layers_tests.py
@ -42,7 +42,7 @@ class DecoderTests(unittest.TestCase):
        dummy_input = T.rand(4, 8, 256)
        dummy_memory = T.rand(4, 2, 80)

-        output, alignment, stop_tokens = layer(dummy_input, dummy_memory)
+        output, alignment, stop_tokens = layer(dummy_input, dummy_memory, mask=None)

        assert output.shape[0] == 4
        assert output.shape[1] == 1, "size not {}".format(output.shape[1])
--- a/tests/tacotron_tests.py
+++ b/tests/tacotron_tests.py
@ -21,6 +21,7 @@ c = load_config(os.path.join(file_path, 'test_config.json'))
 class TacotronTrainTest(unittest.TestCase):
    def test_train_step(self):
        input = torch.randint(0, 24, (8, 128)).long().to(device)
+        input_lengths = torch.randint(100, 129, (8, )).long().to(device)
        mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
        linear_spec = torch.rand(8, 30, c.audio['num_freq']).to(device)
        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
@ -35,8 +36,8 @@ class TacotronTrainTest(unittest.TestCase):

        criterion = L1LossMasked().to(device)
        criterion_st = nn.BCELoss().to(device)
-        model = Tacotron(32, c.embedding_size, c.audio['num_freq'], c.audio['num_mels'],
-                         c.r, c.memory_size).to(device)
+        model = Tacotron(32, c.audio['num_freq'], c.audio['num_mels'],
+                         c.r, memory_size=c.memory_size).to(device)
        model.train()
        model_ref = copy.deepcopy(model)
        count = 0
@ -47,7 +48,7 @@ class TacotronTrainTest(unittest.TestCase):
        optimizer = optim.Adam(model.parameters(), lr=c.lr)
        for i in range(5):
            mel_out, linear_out, align, stop_tokens = model.forward(
-                input, mel_spec)
+                input, input_lengths, mel_spec)
            assert stop_tokens.data.max() <= 1.0
            assert stop_tokens.data.min() >= 0.0
            optimizer.zero_grad()