From c72470bcfc13f030fd30356f865b182c683a503e Mon Sep 17 00:00:00 2001
From: Eren Golge <egolge@mozilla.com>
Date: Mon, 24 Jun 2019 16:57:29 +0200
Subject: [PATCH 1/2] update forward attention

---
 layers/common_layers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/layers/common_layers.py b/layers/common_layers.py
index b6f72bc1..f7b8e7ed 100644
--- a/layers/common_layers.py
+++ b/layers/common_layers.py
@@ -208,7 +208,7 @@ class Attention(nn.Module):
             _, n = prev_alpha.max(1)
             val, n2 = alpha.max(1)
             for b in range(alignment.shape[0]):
-                alpha[b, n[b] + 2:] = 0
+                alpha[b, n[b] + 3:] = 0
                 alpha[b, :(n[b] - 1)] = 0  # ignore all previous states to prevent repetition.
                 alpha[b, (n[b] - 2)] = 0.01 * val[b]  # smoothing factor for the prev step
         # compute attention weights

From 464cc2975634397317838cb70cd35d8b7f1867a5 Mon Sep 17 00:00:00 2001
From: Eren Golge <egolge@mozilla.com>
Date: Wed, 26 Jun 2019 14:11:30 +0200
Subject: [PATCH 2/2] Make optional reampling of the read wav

---
 utils/audio.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/utils/audio.py b/utils/audio.py
index e14f2b7e..c943e0bb 100644
--- a/utils/audio.py
+++ b/utils/audio.py
@@ -230,12 +230,13 @@ class AudioProcessor(object):
         x = np.sign(wav) / mu * ((1 + mu) ** np.abs(wav) - 1)
         return x
 
-    def load_wav(self, filename, encode=False):
-        x, sr = sf.read(filename)
-        # x, sr = librosa.load(filename, sr=self.sample_rate)
+    def load_wav(self, filename, sr=None):
+        if sr is None:
+            x, sr = sf.read(filename)
+        else:
+            x, sr = librosa.load(filename, sr=sr)
         if self.do_trim_silence:
             x = self.trim_silence(x)
-        # sr, x = io.wavfile.read(filename)
         assert self.sample_rate == sr, "%s vs %s"%(self.sample_rate, sr)
         return x