From 1dbc51c6b509d8787b2beda7a9d3e65e45b997b8 Mon Sep 17 00:00:00 2001
From: Eren Golge <egolge@mozilla.com>
Date: Sat, 24 Mar 2018 19:22:45 -0700
Subject: [PATCH] convert loss to layer and add test

---
 layers/losses.py      | 66 +++++++++++++++++++++++--------------------
 tests/layers_tests.py | 26 +++++++++++++++++
 train.py              |  5 +++-
 3 files changed, 66 insertions(+), 31 deletions(-)

diff --git a/layers/losses.py b/layers/losses.py
index 18f4099a..67bc0f22 100644
--- a/layers/losses.py
+++ b/layers/losses.py
@@ -1,6 +1,7 @@
 import torch 
 from torch.nn import functional
 from torch.autograd import Variable
+from torch import nn
 
 
 # from https://gist.github.com/jihunchoi/f1434a77df9db1bb337417854b398df1
@@ -18,34 +19,39 @@ def _sequence_mask(sequence_length, max_len=None):
     return seq_range_expand < seq_length_expand
 
 
-def L1LossMasked(input, target, length):
-    """
-    Args:
-        logits: A Variable containing a FloatTensor of size
-            (batch, max_len, num_classes) which contains the
-            unnormalized probability for each class.
-        target: A Variable containing a LongTensor of size
-            (batch, max_len) which contains the index of the true
-            class for each corresponding step.
-        length: A Variable containing a LongTensor of size (batch,)
-            which contains the length of each data in a batch.
-    Returns:
-        loss: An average loss value masked by the length.
-    """
-    input = input.contiguous()
-    target = target.contiguous()
+class L1LossMasked(nn.Module):
+    
+    def __init__(self):
+        super(L1LossMasked, self).__init__()
+    
+    def forward(self, input, target, length):
+        """
+        Args:
+            logits: A Variable containing a FloatTensor of size
+                (batch, max_len, num_classes) which contains the
+                unnormalized probability for each class.
+            target: A Variable containing a LongTensor of size
+                (batch, max_len) which contains the index of the true
+                class for each corresponding step.
+            length: A Variable containing a LongTensor of size (batch,)
+                which contains the length of each data in a batch.
+        Returns:
+            loss: An average loss value masked by the length.
+        """
+        input = input.contiguous()
+        target = target.contiguous()
 
-    # logits_flat: (batch * max_len, dim)
-    input = input.view(-1, input.size(-1))
-    # target_flat: (batch * max_len, dim)
-    target_flat = target.view(-1, 1)
-    # losses_flat: (batch * max_len, dim)
-    losses_flat = functional.l1_loss(input, target, size_average=False,
-                         reduce=False)
-    # losses: (batch, max_len)
-    losses = losses_flat.view(*target.size())
-    # mask: (batch, max_len)
-    mask = _sequence_mask(sequence_length=length, max_len=target.size(1)).unsqueeze(2)
-    losses = losses * mask.float()
-    loss = losses.sum() / (length.float().sum() * target.shape[2])
-    return loss
\ No newline at end of file
+        # logits_flat: (batch * max_len, dim)
+        input = input.view(-1, input.size(-1))
+        # target_flat: (batch * max_len, dim)
+        target_flat = target.view(-1, 1)
+        # losses_flat: (batch * max_len, dim)
+        losses_flat = functional.l1_loss(input, target, size_average=False,
+                             reduce=False)
+        # losses: (batch, max_len, dim)
+        losses = losses_flat.view(*target.size())
+        # mask: (batch, max_len, 1)
+        mask = _sequence_mask(sequence_length=length, max_len=target.size(1)).unsqueeze(2)
+        losses = losses * mask.float()
+        loss = losses.sum() / (length.float().sum() * float(target.shape[2]))
+        return loss
\ No newline at end of file
diff --git a/tests/layers_tests.py b/tests/layers_tests.py
index 14739bf9..246fce8c 100644
--- a/tests/layers_tests.py
+++ b/tests/layers_tests.py
@@ -2,6 +2,7 @@ import unittest
 import torch as T
 
 from TTS.layers.tacotron import Prenet, CBHG, Decoder, Encoder
+from layers.losses import L1LossMasked, _sequence_mask
 
 
 class PrenetTests(unittest.TestCase):
@@ -57,4 +58,29 @@ class EncoderTests(unittest.TestCase):
         assert output.shape[0] == 4
         assert output.shape[1] == 8
         assert output.shape[2] == 256  # 128 * 2 BiRNN
+        
 
+class L1LossMaskedTests(unittest.TestCase):
+    
+    def test_in_out(self):
+        layer = L1LossMasked()
+        dummy_input = T.autograd.Variable(T.ones(4, 8, 128).float())
+        dummy_target = T.autograd.Variable(T.ones(4, 8, 128).float())
+        dummy_length = T.autograd.Variable((T.ones(4) * 8).long())
+        output = layer(dummy_input, dummy_target, dummy_length)
+        assert output.shape[0] == 1
+        assert len(output.shape) == 1
+        assert output.data[0] == 0.0
+        
+        dummy_input = T.autograd.Variable(T.ones(4, 8, 128).float())
+        dummy_target = T.autograd.Variable(T.zeros(4, 8, 128).float())
+        dummy_length = T.autograd.Variable((T.ones(4) * 8).long())
+        output = layer(dummy_input, dummy_target, dummy_length)
+        assert output.data[0] == 1.0, "1.0 vs {}".format(output.data[0])
+
+        dummy_input = T.autograd.Variable(T.ones(4, 8, 128).float())
+        dummy_target = T.autograd.Variable(T.zeros(4, 8, 128).float())
+        dummy_length = T.autograd.Variable((T.arange(5,9)).long())
+        mask = ((_sequence_mask(dummy_length).float() - 1.0) * 100.0).unsqueeze(2)
+        output = layer(dummy_input + mask, dummy_target, dummy_length)
+        assert output.data[0] == 1.0, "1.0 vs {}".format(output.data[0])
diff --git a/train.py b/train.py
index 4e132662..97876036 100644
--- a/train.py
+++ b/train.py
@@ -349,7 +349,10 @@ def main(args):
 
     optimizer = optim.Adam(model.parameters(), lr=c.lr)
     
-    criterion = L1LossMasked
+    if use_cuda:
+        criterion = L1LossMasked().cuda()
+    else:
+        criterion = L1LossMasked()   
 
     if args.restore_path:
         checkpoint = torch.load(args.restore_path)