From b407779a83dc81b60f00d062d42a769605a13ded Mon Sep 17 00:00:00 2001 From: Eren Golge Date: Mon, 19 Mar 2018 10:38:47 -0700 Subject: [PATCH] Dont use teacher forcing at test time --- layers/tacotron.py | 6 ++++-- train.py | 3 +-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/layers/tacotron.py b/layers/tacotron.py index b76c1e9f..c0828d08 100644 --- a/layers/tacotron.py +++ b/layers/tacotron.py @@ -48,6 +48,7 @@ class BatchNormConv1d(nn.Module): - input: batch x dims - output: batch x dims """ + def __init__(self, in_channels, out_channels, kernel_size, stride, padding, activation=None): super(BatchNormConv1d, self).__init__() @@ -241,7 +242,8 @@ class Decoder(nn.Module): Args: inputs: Encoder outputs. memory (None): Decoder memory (autoregression. If None (at eval-time), - decoder outputs are used as decoder inputs. + decoder outputs are used as decoder inputs. If None, it uses the last + output as the input. Shapes: - inputs: batch x time x encoder_out_dim @@ -293,7 +295,7 @@ class Decoder(nn.Module): memory_input = torch.div(outputs[-1] + memory[t-1], 2.0) # add a random noise noise = torch.autograd.Variable( - memory_input.data.new(memory_input.size()).normal_(0.0, 1.0)) + memory_input.data.new(memory_input.size()).normal_(0.0, 0.5)) memory_input = memory_input + noise # Prenet diff --git a/train.py b/train.py index 7b32d74c..53027615 100644 --- a/train.py +++ b/train.py @@ -228,8 +228,7 @@ def evaluate(model, criterion, data_loader, current_step): linear_spec_var = linear_spec_var.cuda() # forward pass - mel_output, linear_output, alignments =\ - model.forward(text_input_var, mel_spec_var) + mel_output, linear_output, alignments = model.forward(text_input_var) # loss computation mel_loss = criterion(mel_output, mel_spec_var)