mirror of https://github.com/coqui-ai/TTS.git
bug fix for tacotron and tests update
This commit is contained in:
parent
b031a65677
commit
a9ce1d4f19
|
@ -28,7 +28,7 @@ class Tacotron(nn.Module):
|
|||
nn.Linear(self.postnet.cbhg.gru_features * 2, linear_dim),
|
||||
nn.Sigmoid())
|
||||
|
||||
def forward(self, characters, text_lengths, mel_specs=None):
|
||||
def forward(self, characters, text_lengths, mel_specs):
|
||||
B = characters.size(0)
|
||||
mask = sequence_mask(text_lengths).to(characters.device)
|
||||
inputs = self.embedding(characters)
|
||||
|
|
|
@ -28,7 +28,7 @@ class Tacotron2(nn.Module):
|
|||
|
||||
def forward(self, text, text_lengths, mel_specs=None):
|
||||
# compute mask for padding
|
||||
mask = sequence_mask(text_lengths).to(characters.device)
|
||||
mask = sequence_mask(text_lengths).to(text.device)
|
||||
embedded_inputs = self.embedding(text).transpose(1, 2)
|
||||
encoder_outputs = self.encoder(embedded_inputs, text_lengths)
|
||||
mel_outputs, stop_tokens, alignments = self.decoder(
|
||||
|
|
|
@ -42,7 +42,7 @@ class DecoderTests(unittest.TestCase):
|
|||
dummy_input = T.rand(4, 8, 256)
|
||||
dummy_memory = T.rand(4, 2, 80)
|
||||
|
||||
output, alignment, stop_tokens = layer(dummy_input, dummy_memory)
|
||||
output, alignment, stop_tokens = layer(dummy_input, dummy_memory, mask=None)
|
||||
|
||||
assert output.shape[0] == 4
|
||||
assert output.shape[1] == 1, "size not {}".format(output.shape[1])
|
||||
|
|
|
@ -21,6 +21,7 @@ c = load_config(os.path.join(file_path, 'test_config.json'))
|
|||
class TacotronTrainTest(unittest.TestCase):
|
||||
def test_train_step(self):
|
||||
input = torch.randint(0, 24, (8, 128)).long().to(device)
|
||||
input_lengths = torch.randint(100, 129, (8, )).long().to(device)
|
||||
mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
|
||||
linear_spec = torch.rand(8, 30, c.audio['num_freq']).to(device)
|
||||
mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
|
||||
|
@ -35,8 +36,8 @@ class TacotronTrainTest(unittest.TestCase):
|
|||
|
||||
criterion = L1LossMasked().to(device)
|
||||
criterion_st = nn.BCELoss().to(device)
|
||||
model = Tacotron(32, c.embedding_size, c.audio['num_freq'], c.audio['num_mels'],
|
||||
c.r, c.memory_size).to(device)
|
||||
model = Tacotron(32, c.audio['num_freq'], c.audio['num_mels'],
|
||||
c.r, memory_size=c.memory_size).to(device)
|
||||
model.train()
|
||||
model_ref = copy.deepcopy(model)
|
||||
count = 0
|
||||
|
@ -47,7 +48,7 @@ class TacotronTrainTest(unittest.TestCase):
|
|||
optimizer = optim.Adam(model.parameters(), lr=c.lr)
|
||||
for i in range(5):
|
||||
mel_out, linear_out, align, stop_tokens = model.forward(
|
||||
input, mel_spec)
|
||||
input, input_lengths, mel_spec)
|
||||
assert stop_tokens.data.max() <= 1.0
|
||||
assert stop_tokens.data.min() >= 0.0
|
||||
optimizer.zero_grad()
|
||||
|
|
Loading…
Reference in New Issue