From c72470bcfc13f030fd30356f865b182c683a503e Mon Sep 17 00:00:00 2001 From: Eren Golge Date: Mon, 24 Jun 2019 16:57:29 +0200 Subject: [PATCH 1/2] update forward attention --- layers/common_layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/layers/common_layers.py b/layers/common_layers.py index b6f72bc1..f7b8e7ed 100644 --- a/layers/common_layers.py +++ b/layers/common_layers.py @@ -208,7 +208,7 @@ class Attention(nn.Module): _, n = prev_alpha.max(1) val, n2 = alpha.max(1) for b in range(alignment.shape[0]): - alpha[b, n[b] + 2:] = 0 + alpha[b, n[b] + 3:] = 0 alpha[b, :(n[b] - 1)] = 0 # ignore all previous states to prevent repetition. alpha[b, (n[b] - 2)] = 0.01 * val[b] # smoothing factor for the prev step # compute attention weights From 464cc2975634397317838cb70cd35d8b7f1867a5 Mon Sep 17 00:00:00 2001 From: Eren Golge Date: Wed, 26 Jun 2019 14:11:30 +0200 Subject: [PATCH 2/2] Make optional reampling of the read wav --- utils/audio.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/utils/audio.py b/utils/audio.py index e14f2b7e..c943e0bb 100644 --- a/utils/audio.py +++ b/utils/audio.py @@ -230,12 +230,13 @@ class AudioProcessor(object): x = np.sign(wav) / mu * ((1 + mu) ** np.abs(wav) - 1) return x - def load_wav(self, filename, encode=False): - x, sr = sf.read(filename) - # x, sr = librosa.load(filename, sr=self.sample_rate) + def load_wav(self, filename, sr=None): + if sr is None: + x, sr = sf.read(filename) + else: + x, sr = librosa.load(filename, sr=sr) if self.do_trim_silence: x = self.trim_silence(x) - # sr, x = io.wavfile.read(filename) assert self.sample_rate == sr, "%s vs %s"%(self.sample_rate, sr) return x