From a9ce1d4f199adebc8ad5ce5e659951b2c81b3c69 Mon Sep 17 00:00:00 2001
From: Eren Golge <egolge@mozilla.com>
Date: Wed, 6 Mar 2019 13:43:29 +0100
Subject: [PATCH] bug fix for tacotron and tests update

---
 models/tacotron.py      | 2 +-
 models/tacotron2.py     | 2 +-
 tests/layers_tests.py   | 2 +-
 tests/tacotron_tests.py | 7 ++++---
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/models/tacotron.py b/models/tacotron.py
index 3ecd3f9e..11ff8740 100644
--- a/models/tacotron.py
+++ b/models/tacotron.py
@@ -28,7 +28,7 @@ class Tacotron(nn.Module):
             nn.Linear(self.postnet.cbhg.gru_features * 2, linear_dim),
             nn.Sigmoid())
 
-    def forward(self, characters, text_lengths, mel_specs=None):
+    def forward(self, characters, text_lengths, mel_specs):
         B = characters.size(0)
         mask = sequence_mask(text_lengths).to(characters.device)
         inputs = self.embedding(characters)
diff --git a/models/tacotron2.py b/models/tacotron2.py
index 70bdd89a..c0cbba26 100644
--- a/models/tacotron2.py
+++ b/models/tacotron2.py
@@ -28,7 +28,7 @@ class Tacotron2(nn.Module):
 
     def forward(self, text, text_lengths, mel_specs=None):
         # compute mask for padding
-        mask = sequence_mask(text_lengths).to(characters.device)
+        mask = sequence_mask(text_lengths).to(text.device)
         embedded_inputs = self.embedding(text).transpose(1, 2)
         encoder_outputs = self.encoder(embedded_inputs, text_lengths)
         mel_outputs, stop_tokens, alignments = self.decoder(
diff --git a/tests/layers_tests.py b/tests/layers_tests.py
index 5f769f9c..4ac7c8fc 100644
--- a/tests/layers_tests.py
+++ b/tests/layers_tests.py
@@ -42,7 +42,7 @@ class DecoderTests(unittest.TestCase):
         dummy_input = T.rand(4, 8, 256)
         dummy_memory = T.rand(4, 2, 80)
 
-        output, alignment, stop_tokens = layer(dummy_input, dummy_memory)
+        output, alignment, stop_tokens = layer(dummy_input, dummy_memory, mask=None)
 
         assert output.shape[0] == 4
         assert output.shape[1] == 1, "size not {}".format(output.shape[1])
diff --git a/tests/tacotron_tests.py b/tests/tacotron_tests.py
index 866e1aa4..2f76469a 100644
--- a/tests/tacotron_tests.py
+++ b/tests/tacotron_tests.py
@@ -21,6 +21,7 @@ c = load_config(os.path.join(file_path, 'test_config.json'))
 class TacotronTrainTest(unittest.TestCase):
     def test_train_step(self):
         input = torch.randint(0, 24, (8, 128)).long().to(device)
+        input_lengths = torch.randint(100, 129, (8, )).long().to(device)
         mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
         linear_spec = torch.rand(8, 30, c.audio['num_freq']).to(device)
         mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
@@ -35,8 +36,8 @@ class TacotronTrainTest(unittest.TestCase):
 
         criterion = L1LossMasked().to(device)
         criterion_st = nn.BCELoss().to(device)
-        model = Tacotron(32, c.embedding_size, c.audio['num_freq'], c.audio['num_mels'],
-                         c.r, c.memory_size).to(device)
+        model = Tacotron(32, c.audio['num_freq'], c.audio['num_mels'],
+                         c.r, memory_size=c.memory_size).to(device)
         model.train()
         model_ref = copy.deepcopy(model)
         count = 0
@@ -47,7 +48,7 @@ class TacotronTrainTest(unittest.TestCase):
         optimizer = optim.Adam(model.parameters(), lr=c.lr)
         for i in range(5):
             mel_out, linear_out, align, stop_tokens = model.forward(
-                input, mel_spec)
+                input, input_lengths, mel_spec)
             assert stop_tokens.data.max() <= 1.0
             assert stop_tokens.data.min() >= 0.0
             optimizer.zero_grad()