remove zero-frame in dataloader

This commit is contained in:
Eren Golge 2019-11-19 12:58:54 +01:00
parent ee788bc558
commit 4873620bc2
2 changed files with 7 additions and 6 deletions

View File

@ -193,22 +193,22 @@ class MyDataset(Dataset):
mel = [self.ap.melspectrogram(w).astype('float32') for w in wav]
linear = [self.ap.spectrogram(w).astype('float32') for w in wav]
mel_lengths = [m.shape[1] + 1 for m in mel] # +1 for zero-frame
mel_lengths = [m.shape[1] for m in mel]
# compute 'stop token' targets
stop_targets = [
np.array([0.] * (mel_len - 1)) for mel_len in mel_lengths
np.array([0.] * (mel_len - 1) + [1.]) for mel_len in mel_lengths
]
# PAD stop targets
stop_targets = prepare_stop_target(stop_targets,
self.outputs_per_step)
# PAD sequences with largest length of the batch
# PAD sequences with longest instance in the batch
text = prepare_data(text).astype(np.int32)
wav = prepare_data(wav)
# PAD features with largest length + a zero frame
# PAD features with longest instance
linear = prepare_tensor(linear, self.outputs_per_step)
mel = prepare_tensor(mel, self.outputs_per_step)
assert mel.shape[2] == linear.shape[2]

View File

@ -24,7 +24,7 @@ def _pad_tensor(x, length):
def prepare_tensor(inputs, out_steps):
max_len = max((x.shape[1] for x in inputs)) + 1 # zero-frame
max_len = max((x.shape[1] for x in inputs))
remainder = max_len % out_steps
pad_len = max_len + (out_steps - remainder) if remainder > 0 else max_len
return np.stack([_pad_tensor(x, pad_len) for x in inputs])
@ -38,7 +38,8 @@ def _pad_stop_target(x, length):
def prepare_stop_target(inputs, out_steps):
max_len = max((x.shape[0] for x in inputs)) + 1 # zero-frame
""" Pad row vectors with 1. """
max_len = max((x.shape[0] for x in inputs))
remainder = max_len % out_steps
pad_len = max_len + (out_steps - remainder) if remainder > 0 else max_len
return np.stack([_pad_stop_target(x, pad_len) for x in inputs])