mirror of https://github.com/coqui-ai/TTS.git
Remove empty lines
This commit is contained in:
parent
e6112f7b2d
commit
a5f66b58e0
|
@ -22,11 +22,9 @@ class BahdanauAttention(nn.Module):
|
|||
# (batch, 1, dim)
|
||||
processed_query = self.query_layer(query)
|
||||
processed_annots = self.annot_layer(annots)
|
||||
|
||||
# (batch, max_time, 1)
|
||||
alignment = self.v(nn.functional.tanh(
|
||||
processed_query + processed_annots))
|
||||
|
||||
# (batch, max_time)
|
||||
return alignment.squeeze(-1)
|
||||
|
||||
|
@ -54,31 +52,23 @@ class AttentionRNN(nn.Module):
|
|||
|
||||
def forward(self, memory, context, rnn_state, annotations,
|
||||
mask=None, annotations_lengths=None):
|
||||
|
||||
if annotations_lengths is not None and mask is None:
|
||||
mask = get_mask_from_lengths(annotations, annotations_lengths)
|
||||
|
||||
# Concat input query and previous context context
|
||||
rnn_input = torch.cat((memory, context), -1)
|
||||
#rnn_input = rnn_input.unsqueeze(1)
|
||||
|
||||
# Feed it to RNN
|
||||
# s_i = f(y_{i-1}, c_{i}, s_{i-1})
|
||||
rnn_output = self.rnn_cell(rnn_input, rnn_state)
|
||||
|
||||
# Alignment
|
||||
# (batch, max_time)
|
||||
# e_{ij} = a(s_{i-1}, h_j)
|
||||
alignment = self.alignment_model(annotations, rnn_output)
|
||||
|
||||
# TODO: needs recheck.
|
||||
if mask is not None:
|
||||
mask = mask.view(query.size(0), -1)
|
||||
alignment.data.masked_fill_(mask, self.score_mask_value)
|
||||
|
||||
# Normalize context weight
|
||||
alignment = F.softmax(alignment, dim=-1)
|
||||
|
||||
# Attention context vector
|
||||
# (batch, 1, dim)
|
||||
# c_i = \sum_{j=1}^{T_x} \alpha_{ij} h_j
|
||||
|
|
|
@ -102,22 +102,18 @@ class CBHG(nn.Module):
|
|||
super(CBHG, self).__init__()
|
||||
self.in_features = in_features
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
# list of conv1d bank with filter size k=1...K
|
||||
# TODO: try dilational layers instead
|
||||
self.conv1d_banks = nn.ModuleList(
|
||||
[BatchNormConv1d(in_features, in_features, kernel_size=k, stride=1,
|
||||
padding=k // 2, activation=self.relu)
|
||||
for k in range(1, K + 1)])
|
||||
|
||||
# max pooling of conv bank
|
||||
# TODO: try average pooling OR larger kernel size
|
||||
self.max_pool1d = nn.MaxPool1d(kernel_size=2, stride=1, padding=1)
|
||||
|
||||
out_features = [K * in_features] + projections[:-1]
|
||||
activations = [self.relu] * (len(projections) - 1)
|
||||
activations += [None]
|
||||
|
||||
# setup conv1d projection layers
|
||||
layer_set = []
|
||||
for (in_size, out_size, ac) in zip(out_features, projections, activations):
|
||||
|
@ -125,12 +121,10 @@ class CBHG(nn.Module):
|
|||
padding=1, activation=ac)
|
||||
layer_set.append(layer)
|
||||
self.conv1d_projections = nn.ModuleList(layer_set)
|
||||
|
||||
# setup Highway layers
|
||||
self.pre_highway = nn.Linear(projections[-1], in_features, bias=False)
|
||||
self.highways = nn.ModuleList(
|
||||
[Highway(in_features, in_features) for _ in range(num_highways)])
|
||||
|
||||
# bi-directional GPU layer
|
||||
self.gru = nn.GRU(
|
||||
in_features, in_features, 1, batch_first=True, bidirectional=True)
|
||||
|
@ -138,14 +132,11 @@ class CBHG(nn.Module):
|
|||
def forward(self, inputs):
|
||||
# (B, T_in, in_features)
|
||||
x = inputs
|
||||
|
||||
# Needed to perform conv1d on time-axis
|
||||
# (B, in_features, T_in)
|
||||
if x.size(-1) == self.in_features:
|
||||
x = x.transpose(1, 2)
|
||||
|
||||
T = x.size(-1)
|
||||
|
||||
# (B, in_features*K, T_in)
|
||||
# Concat conv1d bank outputs
|
||||
outs = []
|
||||
|
@ -153,29 +144,22 @@ class CBHG(nn.Module):
|
|||
out = conv1d(x)
|
||||
out = out[:, :, :T]
|
||||
outs.append(out)
|
||||
|
||||
x = torch.cat(outs, dim=1)
|
||||
assert x.size(1) == self.in_features * len(self.conv1d_banks)
|
||||
|
||||
x = self.max_pool1d(x)[:, :, :T]
|
||||
|
||||
for conv1d in self.conv1d_projections:
|
||||
x = conv1d(x)
|
||||
|
||||
# (B, T_in, in_features)
|
||||
# Back to the original shape
|
||||
x = x.transpose(1, 2)
|
||||
|
||||
if x.size(-1) != self.in_features:
|
||||
x = self.pre_highway(x)
|
||||
|
||||
# Residual connection
|
||||
# TODO: try residual scaling as in Deep Voice 3
|
||||
# TODO: try plain residual layers
|
||||
x += inputs
|
||||
for highway in self.highways:
|
||||
x = highway(x)
|
||||
|
||||
# (B, T_in, in_features*2)
|
||||
# TODO: replace GRU with convolution as in Deep Voice 3
|
||||
self.gru.flatten_parameters()
|
||||
|
|
Loading…
Reference in New Issue