Harmonized teacher-forcing

2018-02-26 05:33:54 -08:00 · 2018-02-26 05:33:54 -08:00 · 56f8b2d19f
parent b5f2181e04
commit 56f8b2d19f
4 changed files with 24 additions and 12 deletions
--- a/config.json
+++ b/config.json
@ -12,7 +12,7 @@
  "text_cleaner": "english_cleaners",

  "epochs": 2000,
-  "lr": 0.003,
+  "lr": 0.001,
  "batch_size": 180,
  "r": 5,

--- a/layers/tacotron.py
+++ b/layers/tacotron.py
@ -307,9 +307,13 @@ class Decoder(nn.Module):
                else:
                    # combine prev. model output and prev. real target
                    memory_input = torch.div(outputs[-1] + memory[t-1], 2.0)
+                    memory_input = torch.nn.functional.dropout(memory_input,
+                                                               0.1,
+                                                               training=True)
                    # add a random noise
-                    memory_input += torch.autograd.Variable(
-                        torch.randn(memory_input.size())).type_as(memory_input)
+                    noise = torch.autograd.Variable(
+                        memory_input.data.new(ins.size()).normal_(0.0, 1.0))
+                    memory_input = memory_input + noise

            # Prenet
            processed_memory = self.prenet(memory_input)
@ -360,5 +364,5 @@ class Decoder(nn.Module):
        return outputs, alignments


-def is_end_of_frames(output, eps=0.1): #0.2 
+def is_end_of_frames(output, eps=0.2): #0.2
    return (output.data <= eps).all()
--- a/train.py
+++ b/train.py
@ -90,9 +90,6 @@ def main(args):
    # onnx.export(model, dummy_input, model_proto_path, verbose=True)
    # tb.add_graph_onnx(model_proto_path)

-    if use_cuda:
-        model = nn.DataParallel(model.cuda())
-
    optimizer = optim.Adam(model.parameters(), lr=c.lr)

    if args.restore_step:
@ -103,10 +100,20 @@ def main(args):
        print("\n > Model restored from step %d\n" % args.restore_step)
        start_epoch = checkpoint['step'] // len(dataloader)
        best_loss = checkpoint['linear_loss']
-    else:
+    elif args.restore_path:
+        checkpoint = torch.load(args.restore_path)
+        model.load_state_dict(checkpoint['model'])
+        optimizer.load_state_dict(checkpoint['optimizer'])
+        print("\n > Model restored from step %d\n" % checkpoint['step'])
+        start_epoch = checkpoint['step'] // len(dataloader)
+        best_loss = checkpoint['linear_loss']
        start_epoch = 0
+    else:
        print("\n > Starting a new training")

+    if use_cuda:
+        model = nn.DataParallel(model.cuda())
+
    num_params = count_parameters(model)
    print(" | > Model has {} parameters".format(num_params))

@ -142,9 +149,9 @@ def main(args):
            current_step = num_iter + args.restore_step + epoch * len(dataloader) + 1

            # setup lr
-            current_lr = lr_decay(c.lr, current_step)
-            for params_group in optimizer.param_groups:
-                params_group['lr'] = current_lr
+            # current_lr = lr_decay(c.lr, current_step)
+            # for params_group in optimizer.param_groups:
+            #    params_group['lr'] = current_lr

            optimizer.zero_grad()

@ -192,7 +199,7 @@ def main(args):
            # loss = loss.cuda()

            loss.backward()
-            grad_norm = nn.utils.clip_grad_norm(model.parameters(), 1.)  ## TODO: maybe no need
+            grad_norm = nn.utils.clip_grad_norm(model.parameters(), 0.5)  ## TODO: maybe no need
            optimizer.step()

            step_time = time.time() - start_time
--- a/utils/generic_utils.py
+++ b/utils/generic_utils.py
@ -7,6 +7,7 @@ import datetime
 import json
 import torch
 import numpy as np
+from collections import OrderedDict


 class AttrDict(dict):