From eabd7e6a52cb6d15446d3c5b399f14254c6a3d7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eren=20G=C3=B6lge?= <egolge@coqui.ai>
Date: Thu, 11 Mar 2021 18:14:53 +0100
Subject: [PATCH] fix #374

---
 .github/workflows/main.yml                    |  2 +-
 TTS/bin/train_tacotron.py                     |  2 +-
 TTS/tts/layers/losses.py                      | 22 +++++++++----------
 ...config.json => test_tacotron2_config.json} |  0
 tests/test_tacotron_train.sh                  | 14 +++++++++++-
 tests/test_train_tts.py                       |  0
 6 files changed, 25 insertions(+), 15 deletions(-)
 rename tests/inputs/{test_train_config.json => test_tacotron2_config.json} (100%)
 delete mode 100644 tests/test_train_tts.py

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index d59e9a6c..afefad2c 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -46,7 +46,7 @@ jobs:
           python3 setup.py egg_info
       - name: Lint check
         run: |
-          cardboardlinter -n auto
+          cardboardlinter --refspec main -n auto
       - name: Unit tests
         run: nosetests tests --nocapture --processes=0 --process-timeout=20 --process-restartworker
       - name: Test scripts
diff --git a/TTS/bin/train_tacotron.py b/TTS/bin/train_tacotron.py
index 0887c2cc..331571d7 100644
--- a/TTS/bin/train_tacotron.py
+++ b/TTS/bin/train_tacotron.py
@@ -85,7 +85,7 @@ def format_data(data):
     text_input = data[0]
     text_lengths = data[1]
     speaker_names = data[2]
-    linear_input = data[3] if c.model in ["Tacotron"] else None
+    linear_input = data[3] if c.model.lower() in ["tacotron"] else None
     mel_input = data[4]
     mel_lengths = data[5]
     stop_targets = data[6]
diff --git a/TTS/tts/layers/losses.py b/TTS/tts/layers/losses.py
index 50575b80..c5497054 100644
--- a/TTS/tts/layers/losses.py
+++ b/TTS/tts/layers/losses.py
@@ -297,6 +297,11 @@ class TacotronLoss(torch.nn.Module):
                 stopnet_output, stopnet_target, output_lens, decoder_b_output,
                 alignments, alignment_lens, alignments_backwards, input_lens):
 
+
+        # decoder outputs linear or mel spectrograms for Tacotron and Tacotron2
+        # the target should be set acccordingly
+        postnet_target = linear_input if self.config.model.lower() in ["tacotron"] else mel_input
+
         return_dict = {}
         # remove lengths if no masking is applied
         if not self.config.loss_masking:
@@ -307,20 +312,13 @@ class TacotronLoss(torch.nn.Module):
                 decoder_loss = self.criterion(decoder_output, mel_input,
                                               output_lens)
             if self.postnet_alpha > 0:
-                if self.config.model in ["Tacotron", "TacotronGST"]:
-                    postnet_loss = self.criterion(postnet_output, linear_input,
-                                                  output_lens)
-                else:
-                    postnet_loss = self.criterion(postnet_output, mel_input,
-                                                  output_lens)
+                postnet_loss = self.criterion(postnet_output, postnet_target,
+                                                output_lens)
         else:
             if self.decoder_alpha > 0:
                 decoder_loss = self.criterion(decoder_output, mel_input)
             if self.postnet_alpha > 0:
-                if self.config.model in ["Tacotron", "TacotronGST"]:
-                    postnet_loss = self.criterion(postnet_output, linear_input)
-                else:
-                    postnet_loss = self.criterion(postnet_output, mel_input)
+                postnet_loss = self.criterion(postnet_output, postnet_target)
         loss = self.decoder_alpha * decoder_loss + self.postnet_alpha * postnet_loss
         return_dict['decoder_loss'] = decoder_loss
         return_dict['postnet_loss'] = postnet_loss
@@ -373,7 +371,7 @@ class TacotronLoss(torch.nn.Module):
 
         # postnet differential spectral loss
         if self.config.postnet_diff_spec_alpha > 0:
-            postnet_diff_spec_loss = self.criterion_diff_spec(postnet_output, mel_input, output_lens)
+            postnet_diff_spec_loss = self.criterion_diff_spec(postnet_output, postnet_target, output_lens)
             loss += postnet_diff_spec_loss * self.postnet_diff_spec_alpha
             return_dict['postnet_diff_spec_loss'] = postnet_diff_spec_loss
 
@@ -385,7 +383,7 @@ class TacotronLoss(torch.nn.Module):
 
         # postnet ssim loss
         if self.config.postnet_ssim_alpha > 0:
-            postnet_ssim_loss = self.criterion_ssim(postnet_output, mel_input, output_lens)
+            postnet_ssim_loss = self.criterion_ssim(postnet_output, postnet_target, output_lens)
             loss += postnet_ssim_loss * self.postnet_ssim_alpha
             return_dict['postnet_ssim_loss'] = postnet_ssim_loss
 
diff --git a/tests/inputs/test_train_config.json b/tests/inputs/test_tacotron2_config.json
similarity index 100%
rename from tests/inputs/test_train_config.json
rename to tests/inputs/test_tacotron2_config.json
diff --git a/tests/test_tacotron_train.sh b/tests/test_tacotron_train.sh
index 9268ea96..fa9930a7 100755
--- a/tests/test_tacotron_train.sh
+++ b/tests/test_tacotron_train.sh
@@ -3,7 +3,7 @@ set -xe
 BASEDIR=$(dirname "$0")
 echo "$BASEDIR"
 # run training
-CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/inputs/test_train_config.json
+CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/inputs/test_tacotron_config.json
 # find the training folder
 LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
 echo $LATEST_FOLDER
@@ -11,3 +11,15 @@ echo $LATEST_FOLDER
 CUDA_VISIBLE_DEVICES=""  python TTS/bin/train_tacotron.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
 # remove all the outputs
 rm -rf $BASEDIR/train_outputs/
+
+# Tacotron2
+# run training
+CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/inputs/test_tacotron2_config.json
+# find the training folder
+LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
+echo $LATEST_FOLDER
+# continue the previous training
+CUDA_VISIBLE_DEVICES=""  python TTS/bin/train_tacotron.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
+# remove all the outputs
+rm -rf $BASEDIR/train_outputs/
+
diff --git a/tests/test_train_tts.py b/tests/test_train_tts.py
deleted file mode 100644
index e69de29b..00000000