From b9e0faca98cf036c2c21d21b966381a29557080e Mon Sep 17 00:00:00 2001
From: Eren Golge <egolge@mozilla.com>
Date: Thu, 31 Oct 2019 16:31:49 +0100
Subject: [PATCH] config update and bug fixes

---
 config.json             | 8 ++++++++
 layers/common_layers.py | 4 ++--
 layers/tacotron2.py     | 2 +-
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/config.json b/config.json
index c958ebf3..75e50849 100644
--- a/config.json
+++ b/config.json
@@ -34,6 +34,7 @@
 
     "reinit_layers": [],    // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers.
 
+<<<<<<< HEAD
     // TRAINING
     "batch_size": 32,       // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
     "eval_batch_size":16,   
@@ -47,6 +48,9 @@
     "test_sentences_file": null,  // set a file to load sentences to be used for testing. If it is null then we use default english sentences.
 
     // OPTIMIZER
+=======
+    "model": "Tacotron2",          // one of the model in models/    
+>>>>>>> config update and bug fixes
     "grad_clip": 1,                // upper limit for gradients for clipping.
     "epochs": 1000,                // total number of epochs to train.
     "lr": 0.0001,                  // Initial learning rate. If Noam decay is active, maximum learning rate.
@@ -59,8 +63,12 @@
     "prenet_type": "original",     // "original" or "bn".
     "prenet_dropout": true,        // enable/disable dropout at prenet. 
 
+<<<<<<< HEAD
     // ATTENTION
     "attention_type": "original",  // 'original' or 'graves'
+=======
+    "attention_type": "graves",  // 'original' or 'graves'
+>>>>>>> config update and bug fixes
     "attention_heads": 5,          // number of attention heads (only for 'graves')
     "attention_norm": "sigmoid",   // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
     "windowing": false,            // Enables attention windowing. Used only in eval mode.
diff --git a/layers/common_layers.py b/layers/common_layers.py
index 07f97588..8ea54f0e 100644
--- a/layers/common_layers.py
+++ b/layers/common_layers.py
@@ -134,11 +134,12 @@ class GravesAttention(nn.Module):
     def preprocess_inputs(self, inputs):
         return None
 
-    def forward(self, query, inputs, mask):
+    def forward(self, query, inputs, processed_inputs, mask):
         """
         shapes:
             query: B x D_attention_rnn
             inputs: B x T_in x D_encoder
+            processed_inputs: place_holder
             mask: B x T_in
         """
         gbk_t = self.N_a(query)
@@ -176,7 +177,6 @@ class GravesAttention(nn.Module):
         context = torch.bmm(alpha_t.unsqueeze(1), inputs).squeeze(1)
         self.attention_weights = alpha_t
         self.mu_prev = mu_t
-        breakpoint()
         return context
 
 
diff --git a/layers/tacotron2.py b/layers/tacotron2.py
index 1472bcff..aef30e10 100644
--- a/layers/tacotron2.py
+++ b/layers/tacotron2.py
@@ -180,7 +180,7 @@ class Decoder(nn.Module):
             self.context = torch.zeros(1, device=inputs.device).repeat(
                 B, self.encoder_embedding_dim)
         self.inputs = inputs
-        self.processed_inputs = self.attention.inputs_layer(inputs)
+        self.processed_inputs = self.attention.preprocess_inputs(inputs)
         self.mask = mask
 
     def _reshape_memory(self, memory):