partial model initialization

This commit is contained in:
Eren Golge 2018-12-11 17:53:08 +01:00
parent 619c73f0f1
commit 8d865629a0
2 changed files with 17 additions and 7 deletions

View File

@ -339,7 +339,6 @@ class Decoder(nn.Module):
def _reshape_memory(self, memory): def _reshape_memory(self, memory):
B = memory.shape[0] B = memory.shape[0]
if memory is not None:
# Grouping multiple frames if necessary # Grouping multiple frames if necessary
if memory.size(-1) == self.memory_dim: if memory.size(-1) == self.memory_dim:
memory = memory.contiguous() memory = memory.contiguous()
@ -370,6 +369,7 @@ class Decoder(nn.Module):
T = inputs.size(1) T = inputs.size(1)
# Run greedy decoding if memory is None # Run greedy decoding if memory is None
greedy = not self.training greedy = not self.training
if memory is not None:
memory = self._reshape_memory(memory) memory = self._reshape_memory(memory)
T_decoder = memory.size(0) T_decoder = memory.size(0)
# go frame as zeros matrix # go frame as zeros matrix

View File

@ -401,6 +401,16 @@ def main(args):
if args.restore_path: if args.restore_path:
checkpoint = torch.load(args.restore_path) checkpoint = torch.load(args.restore_path)
model.load_state_dict(checkpoint['model']) model.load_state_dict(checkpoint['model'])
# Partial initialization: if there is a mismatch with new and old layer, it is skipped.
# 1. filter out unnecessary keys
pretrained_dict = {
k: v
for k, v in checkpoint['model'].items() if k in model_dict
}
# 2. overwrite entries in the existing state dict
model_dict.update(pretrained_dict)
# 3. load the new state dict
model.load_state_dict(model_dict)
if use_cuda: if use_cuda:
model = model.cuda() model = model.cuda()
criterion.cuda() criterion.cuda()