mirror of https://github.com/coqui-ai/TTS.git
Remove empty lines
This commit is contained in:
parent
e6112f7b2d
commit
a5f66b58e0
|
@ -22,11 +22,9 @@ class BahdanauAttention(nn.Module):
|
||||||
# (batch, 1, dim)
|
# (batch, 1, dim)
|
||||||
processed_query = self.query_layer(query)
|
processed_query = self.query_layer(query)
|
||||||
processed_annots = self.annot_layer(annots)
|
processed_annots = self.annot_layer(annots)
|
||||||
|
|
||||||
# (batch, max_time, 1)
|
# (batch, max_time, 1)
|
||||||
alignment = self.v(nn.functional.tanh(
|
alignment = self.v(nn.functional.tanh(
|
||||||
processed_query + processed_annots))
|
processed_query + processed_annots))
|
||||||
|
|
||||||
# (batch, max_time)
|
# (batch, max_time)
|
||||||
return alignment.squeeze(-1)
|
return alignment.squeeze(-1)
|
||||||
|
|
||||||
|
@ -54,31 +52,23 @@ class AttentionRNN(nn.Module):
|
||||||
|
|
||||||
def forward(self, memory, context, rnn_state, annotations,
|
def forward(self, memory, context, rnn_state, annotations,
|
||||||
mask=None, annotations_lengths=None):
|
mask=None, annotations_lengths=None):
|
||||||
|
|
||||||
if annotations_lengths is not None and mask is None:
|
if annotations_lengths is not None and mask is None:
|
||||||
mask = get_mask_from_lengths(annotations, annotations_lengths)
|
mask = get_mask_from_lengths(annotations, annotations_lengths)
|
||||||
|
|
||||||
# Concat input query and previous context context
|
# Concat input query and previous context context
|
||||||
rnn_input = torch.cat((memory, context), -1)
|
rnn_input = torch.cat((memory, context), -1)
|
||||||
#rnn_input = rnn_input.unsqueeze(1)
|
|
||||||
|
|
||||||
# Feed it to RNN
|
# Feed it to RNN
|
||||||
# s_i = f(y_{i-1}, c_{i}, s_{i-1})
|
# s_i = f(y_{i-1}, c_{i}, s_{i-1})
|
||||||
rnn_output = self.rnn_cell(rnn_input, rnn_state)
|
rnn_output = self.rnn_cell(rnn_input, rnn_state)
|
||||||
|
|
||||||
# Alignment
|
# Alignment
|
||||||
# (batch, max_time)
|
# (batch, max_time)
|
||||||
# e_{ij} = a(s_{i-1}, h_j)
|
# e_{ij} = a(s_{i-1}, h_j)
|
||||||
alignment = self.alignment_model(annotations, rnn_output)
|
alignment = self.alignment_model(annotations, rnn_output)
|
||||||
|
|
||||||
# TODO: needs recheck.
|
# TODO: needs recheck.
|
||||||
if mask is not None:
|
if mask is not None:
|
||||||
mask = mask.view(query.size(0), -1)
|
mask = mask.view(query.size(0), -1)
|
||||||
alignment.data.masked_fill_(mask, self.score_mask_value)
|
alignment.data.masked_fill_(mask, self.score_mask_value)
|
||||||
|
|
||||||
# Normalize context weight
|
# Normalize context weight
|
||||||
alignment = F.softmax(alignment, dim=-1)
|
alignment = F.softmax(alignment, dim=-1)
|
||||||
|
|
||||||
# Attention context vector
|
# Attention context vector
|
||||||
# (batch, 1, dim)
|
# (batch, 1, dim)
|
||||||
# c_i = \sum_{j=1}^{T_x} \alpha_{ij} h_j
|
# c_i = \sum_{j=1}^{T_x} \alpha_{ij} h_j
|
||||||
|
|
|
@ -102,22 +102,18 @@ class CBHG(nn.Module):
|
||||||
super(CBHG, self).__init__()
|
super(CBHG, self).__init__()
|
||||||
self.in_features = in_features
|
self.in_features = in_features
|
||||||
self.relu = nn.ReLU()
|
self.relu = nn.ReLU()
|
||||||
|
|
||||||
# list of conv1d bank with filter size k=1...K
|
# list of conv1d bank with filter size k=1...K
|
||||||
# TODO: try dilational layers instead
|
# TODO: try dilational layers instead
|
||||||
self.conv1d_banks = nn.ModuleList(
|
self.conv1d_banks = nn.ModuleList(
|
||||||
[BatchNormConv1d(in_features, in_features, kernel_size=k, stride=1,
|
[BatchNormConv1d(in_features, in_features, kernel_size=k, stride=1,
|
||||||
padding=k // 2, activation=self.relu)
|
padding=k // 2, activation=self.relu)
|
||||||
for k in range(1, K + 1)])
|
for k in range(1, K + 1)])
|
||||||
|
|
||||||
# max pooling of conv bank
|
# max pooling of conv bank
|
||||||
# TODO: try average pooling OR larger kernel size
|
# TODO: try average pooling OR larger kernel size
|
||||||
self.max_pool1d = nn.MaxPool1d(kernel_size=2, stride=1, padding=1)
|
self.max_pool1d = nn.MaxPool1d(kernel_size=2, stride=1, padding=1)
|
||||||
|
|
||||||
out_features = [K * in_features] + projections[:-1]
|
out_features = [K * in_features] + projections[:-1]
|
||||||
activations = [self.relu] * (len(projections) - 1)
|
activations = [self.relu] * (len(projections) - 1)
|
||||||
activations += [None]
|
activations += [None]
|
||||||
|
|
||||||
# setup conv1d projection layers
|
# setup conv1d projection layers
|
||||||
layer_set = []
|
layer_set = []
|
||||||
for (in_size, out_size, ac) in zip(out_features, projections, activations):
|
for (in_size, out_size, ac) in zip(out_features, projections, activations):
|
||||||
|
@ -125,12 +121,10 @@ class CBHG(nn.Module):
|
||||||
padding=1, activation=ac)
|
padding=1, activation=ac)
|
||||||
layer_set.append(layer)
|
layer_set.append(layer)
|
||||||
self.conv1d_projections = nn.ModuleList(layer_set)
|
self.conv1d_projections = nn.ModuleList(layer_set)
|
||||||
|
|
||||||
# setup Highway layers
|
# setup Highway layers
|
||||||
self.pre_highway = nn.Linear(projections[-1], in_features, bias=False)
|
self.pre_highway = nn.Linear(projections[-1], in_features, bias=False)
|
||||||
self.highways = nn.ModuleList(
|
self.highways = nn.ModuleList(
|
||||||
[Highway(in_features, in_features) for _ in range(num_highways)])
|
[Highway(in_features, in_features) for _ in range(num_highways)])
|
||||||
|
|
||||||
# bi-directional GPU layer
|
# bi-directional GPU layer
|
||||||
self.gru = nn.GRU(
|
self.gru = nn.GRU(
|
||||||
in_features, in_features, 1, batch_first=True, bidirectional=True)
|
in_features, in_features, 1, batch_first=True, bidirectional=True)
|
||||||
|
@ -138,14 +132,11 @@ class CBHG(nn.Module):
|
||||||
def forward(self, inputs):
|
def forward(self, inputs):
|
||||||
# (B, T_in, in_features)
|
# (B, T_in, in_features)
|
||||||
x = inputs
|
x = inputs
|
||||||
|
|
||||||
# Needed to perform conv1d on time-axis
|
# Needed to perform conv1d on time-axis
|
||||||
# (B, in_features, T_in)
|
# (B, in_features, T_in)
|
||||||
if x.size(-1) == self.in_features:
|
if x.size(-1) == self.in_features:
|
||||||
x = x.transpose(1, 2)
|
x = x.transpose(1, 2)
|
||||||
|
|
||||||
T = x.size(-1)
|
T = x.size(-1)
|
||||||
|
|
||||||
# (B, in_features*K, T_in)
|
# (B, in_features*K, T_in)
|
||||||
# Concat conv1d bank outputs
|
# Concat conv1d bank outputs
|
||||||
outs = []
|
outs = []
|
||||||
|
@ -153,29 +144,22 @@ class CBHG(nn.Module):
|
||||||
out = conv1d(x)
|
out = conv1d(x)
|
||||||
out = out[:, :, :T]
|
out = out[:, :, :T]
|
||||||
outs.append(out)
|
outs.append(out)
|
||||||
|
|
||||||
x = torch.cat(outs, dim=1)
|
x = torch.cat(outs, dim=1)
|
||||||
assert x.size(1) == self.in_features * len(self.conv1d_banks)
|
assert x.size(1) == self.in_features * len(self.conv1d_banks)
|
||||||
|
|
||||||
x = self.max_pool1d(x)[:, :, :T]
|
x = self.max_pool1d(x)[:, :, :T]
|
||||||
|
|
||||||
for conv1d in self.conv1d_projections:
|
for conv1d in self.conv1d_projections:
|
||||||
x = conv1d(x)
|
x = conv1d(x)
|
||||||
|
|
||||||
# (B, T_in, in_features)
|
# (B, T_in, in_features)
|
||||||
# Back to the original shape
|
# Back to the original shape
|
||||||
x = x.transpose(1, 2)
|
x = x.transpose(1, 2)
|
||||||
|
|
||||||
if x.size(-1) != self.in_features:
|
if x.size(-1) != self.in_features:
|
||||||
x = self.pre_highway(x)
|
x = self.pre_highway(x)
|
||||||
|
|
||||||
# Residual connection
|
# Residual connection
|
||||||
# TODO: try residual scaling as in Deep Voice 3
|
# TODO: try residual scaling as in Deep Voice 3
|
||||||
# TODO: try plain residual layers
|
# TODO: try plain residual layers
|
||||||
x += inputs
|
x += inputs
|
||||||
for highway in self.highways:
|
for highway in self.highways:
|
||||||
x = highway(x)
|
x = highway(x)
|
||||||
|
|
||||||
# (B, T_in, in_features*2)
|
# (B, T_in, in_features*2)
|
||||||
# TODO: replace GRU with convolution as in Deep Voice 3
|
# TODO: replace GRU with convolution as in Deep Voice 3
|
||||||
self.gru.flatten_parameters()
|
self.gru.flatten_parameters()
|
||||||
|
|
Loading…
Reference in New Issue