From 20b4211af54f68415f24717bbaaf8415b94196f1 Mon Sep 17 00:00:00 2001 From: geneing Date: Fri, 3 Jan 2020 23:59:20 -0800 Subject: [PATCH] Change to GMMv2b --- layers/common_layers.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/layers/common_layers.py b/layers/common_layers.py index 006aa57a..66ffcd1c 100644 --- a/layers/common_layers.py +++ b/layers/common_layers.py @@ -159,20 +159,22 @@ class GravesAttention(nn.Module): k_t = gbk_t[:, 2, :] # attention GMM parameters - inv_sig_t = torch.exp(-torch.clamp(b_t, min=-6, max=9)) # variance + sig_t = torch.nn.functional.softplus(b_t)+self.eps + + #inv_sig_t = torch.exp(-torch.clamp(b_t, min=-6, max=9)) # variance mu_t = self.mu_prev + torch.nn.functional.softplus(k_t) - g_t = torch.softmax(g_t, dim=-1) * inv_sig_t + self.eps + g_t = torch.softmax(g_t, dim=-1) / sig_t + self.eps # each B x K x T_in g_t = g_t.unsqueeze(2).expand(g_t.size(0), g_t.size(1), inputs.size(1)) - inv_sig_t = inv_sig_t.unsqueeze(2).expand_as(g_t) + sig_t = sig_t.unsqueeze(2).expand_as(g_t) mu_t_ = mu_t.unsqueeze(2).expand_as(g_t) j = self.J[:g_t.size(0), :, :inputs.size(1)] # attention weights - phi_t = g_t * torch.exp(-0.5 * inv_sig_t * (mu_t_ - j)**2) + phi_t = g_t * torch.exp(-0.5 * (mu_t_ - j)**2 / (sig_t**2)) alpha_t = self.COEF * torch.sum(phi_t, 1) # apply masking