partial model initialization

2018-12-11 17:53:08 +01:00 · 2018-12-11 17:53:08 +01:00 · 8d865629a0
parent 619c73f0f1
commit 8d865629a0
2 changed files with 17 additions and 7 deletions
--- a/layers/tacotron.py
+++ b/layers/tacotron.py
@ -339,7 +339,6 @@ class Decoder(nn.Module):

    def _reshape_memory(self, memory):
        B = memory.shape[0]
-        if memory is not None:
        # Grouping multiple frames if necessary
        if memory.size(-1) == self.memory_dim:
            memory = memory.contiguous()
@ -370,6 +369,7 @@ class Decoder(nn.Module):
        T = inputs.size(1)
        # Run greedy decoding if memory is None
        greedy = not self.training
+        if memory is not None:
            memory = self._reshape_memory(memory)
        T_decoder = memory.size(0)
        # go frame as zeros matrix
--- a/train.py
+++ b/train.py
@ -401,6 +401,16 @@ def main(args):
    if args.restore_path:
        checkpoint = torch.load(args.restore_path)
        model.load_state_dict(checkpoint['model'])
+        # Partial initialization: if there is a mismatch with new and old layer, it is skipped.
+        # 1. filter out unnecessary keys
+        pretrained_dict = {
+            k: v
+            for k, v in checkpoint['model'].items() if k in model_dict
+        }
+        # 2. overwrite entries in the existing state dict
+        model_dict.update(pretrained_dict)
+        # 3. load the new state dict
+        model.load_state_dict(model_dict)
        if use_cuda:
            model = model.cuda()
            criterion.cuda()