mirror of https://github.com/coqui-ai/TTS.git
Separate loss tests
This commit is contained in:
parent
c17ff17a18
commit
eefd482f51
|
@ -0,0 +1,202 @@
|
||||||
|
import unittest
|
||||||
|
import torch as T
|
||||||
|
|
||||||
|
from TTS.tts.utils.helpers import sequence_mask
|
||||||
|
from TTS.tts.layers.losses import L1LossMasked, SSIMLoss, MSELossMasked
|
||||||
|
|
||||||
|
|
||||||
|
class L1LossMaskedTests(unittest.TestCase):
|
||||||
|
def test_in_out(self): # pylint: disable=no-self-use
|
||||||
|
# test input == target
|
||||||
|
layer = L1LossMasked(seq_len_norm=False)
|
||||||
|
dummy_input = T.ones(4, 8, 128).float()
|
||||||
|
dummy_target = T.ones(4, 8, 128).float()
|
||||||
|
dummy_length = (T.ones(4) * 8).long()
|
||||||
|
output = layer(dummy_input, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 0.0
|
||||||
|
|
||||||
|
# test input != target
|
||||||
|
dummy_input = T.ones(4, 8, 128).float()
|
||||||
|
dummy_target = T.zeros(4, 8, 128).float()
|
||||||
|
dummy_length = (T.ones(4) * 8).long()
|
||||||
|
output = layer(dummy_input, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 1.0, "1.0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
# test if padded values of input makes any difference
|
||||||
|
dummy_input = T.ones(4, 8, 128).float()
|
||||||
|
dummy_target = T.zeros(4, 8, 128).float()
|
||||||
|
dummy_length = (T.arange(5, 9)).long()
|
||||||
|
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
||||||
|
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 1.0, "1.0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
dummy_input = T.rand(4, 8, 128).float()
|
||||||
|
dummy_target = dummy_input.detach()
|
||||||
|
dummy_length = (T.arange(5, 9)).long()
|
||||||
|
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
||||||
|
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 0, "0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
# seq_len_norm = True
|
||||||
|
# test input == target
|
||||||
|
layer = L1LossMasked(seq_len_norm=True)
|
||||||
|
dummy_input = T.ones(4, 8, 128).float()
|
||||||
|
dummy_target = T.ones(4, 8, 128).float()
|
||||||
|
dummy_length = (T.ones(4) * 8).long()
|
||||||
|
output = layer(dummy_input, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 0.0
|
||||||
|
|
||||||
|
# test input != target
|
||||||
|
dummy_input = T.ones(4, 8, 128).float()
|
||||||
|
dummy_target = T.zeros(4, 8, 128).float()
|
||||||
|
dummy_length = (T.ones(4) * 8).long()
|
||||||
|
output = layer(dummy_input, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 1.0, "1.0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
# test if padded values of input makes any difference
|
||||||
|
dummy_input = T.ones(4, 8, 128).float()
|
||||||
|
dummy_target = T.zeros(4, 8, 128).float()
|
||||||
|
dummy_length = (T.arange(5, 9)).long()
|
||||||
|
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
||||||
|
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
||||||
|
assert abs(output.item() - 1.0) < 1e-5, "1.0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
dummy_input = T.rand(4, 8, 128).float()
|
||||||
|
dummy_target = dummy_input.detach()
|
||||||
|
dummy_length = (T.arange(5, 9)).long()
|
||||||
|
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
||||||
|
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 0, "0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
|
||||||
|
class MSELossMaskedTests(unittest.TestCase):
|
||||||
|
def test_in_out(self): # pylint: disable=no-self-use
|
||||||
|
# test input == target
|
||||||
|
layer = MSELossMasked(seq_len_norm=False)
|
||||||
|
dummy_input = T.ones(4, 8, 128).float()
|
||||||
|
dummy_target = T.ones(4, 8, 128).float()
|
||||||
|
dummy_length = (T.ones(4) * 8).long()
|
||||||
|
output = layer(dummy_input, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 0.0
|
||||||
|
|
||||||
|
# test input != target
|
||||||
|
dummy_input = T.ones(4, 8, 128).float()
|
||||||
|
dummy_target = T.zeros(4, 8, 128).float()
|
||||||
|
dummy_length = (T.ones(4) * 8).long()
|
||||||
|
output = layer(dummy_input, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 1.0, "1.0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
# test if padded values of input makes any difference
|
||||||
|
dummy_input = T.ones(4, 8, 128).float()
|
||||||
|
dummy_target = T.zeros(4, 8, 128).float()
|
||||||
|
dummy_length = (T.arange(5, 9)).long()
|
||||||
|
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
||||||
|
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 1.0, "1.0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
dummy_input = T.rand(4, 8, 128).float()
|
||||||
|
dummy_target = dummy_input.detach()
|
||||||
|
dummy_length = (T.arange(5, 9)).long()
|
||||||
|
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
||||||
|
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 0, "0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
# seq_len_norm = True
|
||||||
|
# test input == target
|
||||||
|
layer = MSELossMasked(seq_len_norm=True)
|
||||||
|
dummy_input = T.ones(4, 8, 128).float()
|
||||||
|
dummy_target = T.ones(4, 8, 128).float()
|
||||||
|
dummy_length = (T.ones(4) * 8).long()
|
||||||
|
output = layer(dummy_input, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 0.0
|
||||||
|
|
||||||
|
# test input != target
|
||||||
|
dummy_input = T.ones(4, 8, 128).float()
|
||||||
|
dummy_target = T.zeros(4, 8, 128).float()
|
||||||
|
dummy_length = (T.ones(4) * 8).long()
|
||||||
|
output = layer(dummy_input, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 1.0, "1.0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
# test if padded values of input makes any difference
|
||||||
|
dummy_input = T.ones(4, 8, 128).float()
|
||||||
|
dummy_target = T.zeros(4, 8, 128).float()
|
||||||
|
dummy_length = (T.arange(5, 9)).long()
|
||||||
|
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
||||||
|
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
||||||
|
assert abs(output.item() - 1.0) < 1e-5, "1.0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
dummy_input = T.rand(4, 8, 128).float()
|
||||||
|
dummy_target = dummy_input.detach()
|
||||||
|
dummy_length = (T.arange(5, 9)).long()
|
||||||
|
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
||||||
|
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 0, "0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SSIMLossTests(unittest.TestCase):
|
||||||
|
def test_in_out(self): # pylint: disable=no-self-use
|
||||||
|
# test input == target
|
||||||
|
layer = SSIMLoss()
|
||||||
|
dummy_input = T.ones(4, 57, 128).float()
|
||||||
|
dummy_target = T.ones(4, 57, 128).float()
|
||||||
|
dummy_length = (T.ones(4) * 8).long()
|
||||||
|
output = layer(dummy_input, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 0.0
|
||||||
|
|
||||||
|
# test input != target
|
||||||
|
dummy_input = T.arange(0, 4 * 57 * 128)
|
||||||
|
dummy_input = dummy_input.reshape(4, 57, 128).float()
|
||||||
|
dummy_target = T.arange(-4 * 57 * 128, 0)
|
||||||
|
dummy_target = dummy_target.reshape(4, 57, 128).float()
|
||||||
|
dummy_target = (-dummy_target)
|
||||||
|
|
||||||
|
dummy_length = (T.ones(4) * 58).long()
|
||||||
|
output = layer(dummy_input, dummy_target, dummy_length)
|
||||||
|
assert output.item() >= 1.0, "0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
# test if padded values of input makes any difference
|
||||||
|
dummy_input = T.ones(4, 57, 128).float()
|
||||||
|
dummy_target = T.zeros(4, 57, 128).float()
|
||||||
|
dummy_length = (T.arange(54, 58)).long()
|
||||||
|
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
||||||
|
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 0.0
|
||||||
|
|
||||||
|
dummy_input = T.rand(4, 57, 128).float()
|
||||||
|
dummy_target = dummy_input.detach()
|
||||||
|
dummy_length = (T.arange(54, 58)).long()
|
||||||
|
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
||||||
|
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 0, "0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
# seq_len_norm = True
|
||||||
|
# test input == target
|
||||||
|
layer = L1LossMasked(seq_len_norm=True)
|
||||||
|
dummy_input = T.ones(4, 57, 128).float()
|
||||||
|
dummy_target = T.ones(4, 57, 128).float()
|
||||||
|
dummy_length = (T.ones(4) * 8).long()
|
||||||
|
output = layer(dummy_input, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 0.0
|
||||||
|
|
||||||
|
# test input != target
|
||||||
|
dummy_input = T.ones(4, 57, 128).float()
|
||||||
|
dummy_target = T.zeros(4, 57, 128).float()
|
||||||
|
dummy_length = (T.ones(4) * 8).long()
|
||||||
|
output = layer(dummy_input, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 1.0, "1.0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
# test if padded values of input makes any difference
|
||||||
|
dummy_input = T.ones(4, 57, 128).float()
|
||||||
|
dummy_target = T.zeros(4, 57, 128).float()
|
||||||
|
dummy_length = (T.arange(54, 58)).long()
|
||||||
|
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
||||||
|
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
||||||
|
assert abs(output.item() - 1.0) < 1e-5, "1.0 vs {}".format(output.item())
|
||||||
|
|
||||||
|
dummy_input = T.rand(4, 57, 128).float()
|
||||||
|
dummy_target = dummy_input.detach()
|
||||||
|
dummy_length = (T.arange(54, 58)).long()
|
||||||
|
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
||||||
|
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
||||||
|
assert output.item() == 0, "0 vs {}".format(output.item())
|
|
@ -2,9 +2,7 @@ import unittest
|
||||||
|
|
||||||
import torch as T
|
import torch as T
|
||||||
|
|
||||||
from TTS.tts.layers.losses import L1LossMasked, SSIMLoss
|
|
||||||
from TTS.tts.layers.tacotron.tacotron import CBHG, Decoder, Encoder, Prenet
|
from TTS.tts.layers.tacotron.tacotron import CBHG, Decoder, Encoder, Prenet
|
||||||
from TTS.tts.utils.helpers import sequence_mask
|
|
||||||
|
|
||||||
# pylint: disable=unused-variable
|
# pylint: disable=unused-variable
|
||||||
|
|
||||||
|
@ -85,131 +83,3 @@ class EncoderTests(unittest.TestCase):
|
||||||
assert output.shape[0] == 4
|
assert output.shape[0] == 4
|
||||||
assert output.shape[1] == 8
|
assert output.shape[1] == 8
|
||||||
assert output.shape[2] == 256 # 128 * 2 BiRNN
|
assert output.shape[2] == 256 # 128 * 2 BiRNN
|
||||||
|
|
||||||
|
|
||||||
class L1LossMaskedTests(unittest.TestCase):
|
|
||||||
def test_in_out(self): # pylint: disable=no-self-use
|
|
||||||
# test input == target
|
|
||||||
layer = L1LossMasked(seq_len_norm=False)
|
|
||||||
dummy_input = T.ones(4, 8, 128).float()
|
|
||||||
dummy_target = T.ones(4, 8, 128).float()
|
|
||||||
dummy_length = (T.ones(4) * 8).long()
|
|
||||||
output = layer(dummy_input, dummy_target, dummy_length)
|
|
||||||
assert output.item() == 0.0
|
|
||||||
|
|
||||||
# test input != target
|
|
||||||
dummy_input = T.ones(4, 8, 128).float()
|
|
||||||
dummy_target = T.zeros(4, 8, 128).float()
|
|
||||||
dummy_length = (T.ones(4) * 8).long()
|
|
||||||
output = layer(dummy_input, dummy_target, dummy_length)
|
|
||||||
assert output.item() == 1.0, "1.0 vs {}".format(output.item())
|
|
||||||
|
|
||||||
# test if padded values of input makes any difference
|
|
||||||
dummy_input = T.ones(4, 8, 128).float()
|
|
||||||
dummy_target = T.zeros(4, 8, 128).float()
|
|
||||||
dummy_length = (T.arange(5, 9)).long()
|
|
||||||
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
|
||||||
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
|
||||||
assert output.item() == 1.0, "1.0 vs {}".format(output.item())
|
|
||||||
|
|
||||||
dummy_input = T.rand(4, 8, 128).float()
|
|
||||||
dummy_target = dummy_input.detach()
|
|
||||||
dummy_length = (T.arange(5, 9)).long()
|
|
||||||
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
|
||||||
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
|
||||||
assert output.item() == 0, "0 vs {}".format(output.item())
|
|
||||||
|
|
||||||
# seq_len_norm = True
|
|
||||||
# test input == target
|
|
||||||
layer = L1LossMasked(seq_len_norm=True)
|
|
||||||
dummy_input = T.ones(4, 8, 128).float()
|
|
||||||
dummy_target = T.ones(4, 8, 128).float()
|
|
||||||
dummy_length = (T.ones(4) * 8).long()
|
|
||||||
output = layer(dummy_input, dummy_target, dummy_length)
|
|
||||||
assert output.item() == 0.0
|
|
||||||
|
|
||||||
# test input != target
|
|
||||||
dummy_input = T.ones(4, 8, 128).float()
|
|
||||||
dummy_target = T.zeros(4, 8, 128).float()
|
|
||||||
dummy_length = (T.ones(4) * 8).long()
|
|
||||||
output = layer(dummy_input, dummy_target, dummy_length)
|
|
||||||
assert output.item() == 1.0, "1.0 vs {}".format(output.item())
|
|
||||||
|
|
||||||
# test if padded values of input makes any difference
|
|
||||||
dummy_input = T.ones(4, 8, 128).float()
|
|
||||||
dummy_target = T.zeros(4, 8, 128).float()
|
|
||||||
dummy_length = (T.arange(5, 9)).long()
|
|
||||||
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
|
||||||
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
|
||||||
assert abs(output.item() - 1.0) < 1e-5, "1.0 vs {}".format(output.item())
|
|
||||||
|
|
||||||
dummy_input = T.rand(4, 8, 128).float()
|
|
||||||
dummy_target = dummy_input.detach()
|
|
||||||
dummy_length = (T.arange(5, 9)).long()
|
|
||||||
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
|
||||||
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
|
||||||
assert output.item() == 0, "0 vs {}".format(output.item())
|
|
||||||
|
|
||||||
|
|
||||||
class SSIMLossTests(unittest.TestCase):
|
|
||||||
def test_in_out(self): # pylint: disable=no-self-use
|
|
||||||
# test input == target
|
|
||||||
layer = SSIMLoss()
|
|
||||||
dummy_input = T.ones(4, 8, 128).float()
|
|
||||||
dummy_target = T.ones(4, 8, 128).float()
|
|
||||||
dummy_length = (T.ones(4) * 8).long()
|
|
||||||
output = layer(dummy_input, dummy_target, dummy_length)
|
|
||||||
assert output.item() == 0.0
|
|
||||||
|
|
||||||
# test input != target
|
|
||||||
dummy_input = T.ones(4, 8, 128).float()
|
|
||||||
dummy_target = T.zeros(4, 8, 128).float()
|
|
||||||
dummy_length = (T.ones(4) * 8).long()
|
|
||||||
output = layer(dummy_input, dummy_target, dummy_length)
|
|
||||||
assert abs(output.item() - 1.0) < 1e-4, "1.0 vs {}".format(output.item())
|
|
||||||
|
|
||||||
# test if padded values of input makes any difference
|
|
||||||
dummy_input = T.ones(4, 8, 128).float()
|
|
||||||
dummy_target = T.zeros(4, 8, 128).float()
|
|
||||||
dummy_length = (T.arange(5, 9)).long()
|
|
||||||
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
|
||||||
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
|
||||||
assert abs(output.item() - 1.0) < 1e-4, "1.0 vs {}".format(output.item())
|
|
||||||
|
|
||||||
dummy_input = T.rand(4, 8, 128).float()
|
|
||||||
dummy_target = dummy_input.detach()
|
|
||||||
dummy_length = (T.arange(5, 9)).long()
|
|
||||||
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
|
||||||
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
|
||||||
assert output.item() == 0, "0 vs {}".format(output.item())
|
|
||||||
|
|
||||||
# seq_len_norm = True
|
|
||||||
# test input == target
|
|
||||||
layer = L1LossMasked(seq_len_norm=True)
|
|
||||||
dummy_input = T.ones(4, 8, 128).float()
|
|
||||||
dummy_target = T.ones(4, 8, 128).float()
|
|
||||||
dummy_length = (T.ones(4) * 8).long()
|
|
||||||
output = layer(dummy_input, dummy_target, dummy_length)
|
|
||||||
assert output.item() == 0.0
|
|
||||||
|
|
||||||
# test input != target
|
|
||||||
dummy_input = T.ones(4, 8, 128).float()
|
|
||||||
dummy_target = T.zeros(4, 8, 128).float()
|
|
||||||
dummy_length = (T.ones(4) * 8).long()
|
|
||||||
output = layer(dummy_input, dummy_target, dummy_length)
|
|
||||||
assert output.item() == 1.0, "1.0 vs {}".format(output.item())
|
|
||||||
|
|
||||||
# test if padded values of input makes any difference
|
|
||||||
dummy_input = T.ones(4, 8, 128).float()
|
|
||||||
dummy_target = T.zeros(4, 8, 128).float()
|
|
||||||
dummy_length = (T.arange(5, 9)).long()
|
|
||||||
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
|
||||||
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
|
||||||
assert abs(output.item() - 1.0) < 1e-5, "1.0 vs {}".format(output.item())
|
|
||||||
|
|
||||||
dummy_input = T.rand(4, 8, 128).float()
|
|
||||||
dummy_target = dummy_input.detach()
|
|
||||||
dummy_length = (T.arange(5, 9)).long()
|
|
||||||
mask = ((sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
|
|
||||||
output = layer(dummy_input + mask, dummy_target, dummy_length)
|
|
||||||
assert output.item() == 0, "0 vs {}".format(output.item())
|
|
||||||
|
|
Loading…
Reference in New Issue