fix #374

2021-03-11 18:14:53 +01:00 · 2021-03-11 18:14:53 +01:00 · 65533f33e9
parent 884096f3f7
commit 65533f33e9
6 changed files with 25 additions and 15 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -45,7 +45,7 @@ jobs:
          python3 setup.py egg_info
      - name: Lint check
        run: |
-          cardboardlinter -n auto
+          cardboardlinter --refspec main -n auto
      - name: Unit tests
        run: nosetests tests --nocapture --processes=0 --process-timeout=20 --process-restartworker
      - name: Test scripts
--- a/TTS/bin/train_tacotron.py
+++ b/TTS/bin/train_tacotron.py
@ -85,7 +85,7 @@ def format_data(data):
    text_input = data[0]
    text_lengths = data[1]
    speaker_names = data[2]
-    linear_input = data[3] if c.model in ["Tacotron"] else None
+    linear_input = data[3] if c.model.lower() in ["tacotron"] else None
    mel_input = data[4]
    mel_lengths = data[5]
    stop_targets = data[6]
--- a/TTS/tts/layers/losses.py
+++ b/TTS/tts/layers/losses.py
@ -297,6 +297,11 @@ class TacotronLoss(torch.nn.Module):
                stopnet_output, stopnet_target, output_lens, decoder_b_output,
                alignments, alignment_lens, alignments_backwards, input_lens):

+
+        # decoder outputs linear or mel spectrograms for Tacotron and Tacotron2
+        # the target should be set acccordingly
+        postnet_target = linear_input if self.config.model.lower() in ["tacotron"] else mel_input
+
        return_dict = {}
        # remove lengths if no masking is applied
        if not self.config.loss_masking:
@ -307,20 +312,13 @@ class TacotronLoss(torch.nn.Module):
                decoder_loss = self.criterion(decoder_output, mel_input,
                                              output_lens)
            if self.postnet_alpha > 0:
-                if self.config.model in ["Tacotron", "TacotronGST"]:
-                    postnet_loss = self.criterion(postnet_output, linear_input,
-                                                  output_lens)
-                else:
-                    postnet_loss = self.criterion(postnet_output, mel_input,
-                                                  output_lens)
+                postnet_loss = self.criterion(postnet_output, postnet_target,
+                                                output_lens)
        else:
            if self.decoder_alpha > 0:
                decoder_loss = self.criterion(decoder_output, mel_input)
            if self.postnet_alpha > 0:
-                if self.config.model in ["Tacotron", "TacotronGST"]:
-                    postnet_loss = self.criterion(postnet_output, linear_input)
-                else:
-                    postnet_loss = self.criterion(postnet_output, mel_input)
+                postnet_loss = self.criterion(postnet_output, postnet_target)
        loss = self.decoder_alpha * decoder_loss + self.postnet_alpha * postnet_loss
        return_dict['decoder_loss'] = decoder_loss
        return_dict['postnet_loss'] = postnet_loss
@ -373,7 +371,7 @@ class TacotronLoss(torch.nn.Module):

        # postnet differential spectral loss
        if self.config.postnet_diff_spec_alpha > 0:
-            postnet_diff_spec_loss = self.criterion_diff_spec(postnet_output, mel_input, output_lens)
+            postnet_diff_spec_loss = self.criterion_diff_spec(postnet_output, postnet_target, output_lens)
            loss += postnet_diff_spec_loss * self.postnet_diff_spec_alpha
            return_dict['postnet_diff_spec_loss'] = postnet_diff_spec_loss

@ -385,7 +383,7 @@ class TacotronLoss(torch.nn.Module):

        # postnet ssim loss
        if self.config.postnet_ssim_alpha > 0:
-            postnet_ssim_loss = self.criterion_ssim(postnet_output, mel_input, output_lens)
+            postnet_ssim_loss = self.criterion_ssim(postnet_output, postnet_target, output_lens)
            loss += postnet_ssim_loss * self.postnet_ssim_alpha
            return_dict['postnet_ssim_loss'] = postnet_ssim_loss

--- a/tests/inputs/test_tacotron2_config.json
+++ b/tests/inputs/test_tacotron2_config.json
--- a/tests/test_tacotron_train.sh
+++ b/tests/test_tacotron_train.sh
@ -3,7 +3,7 @@ set -xe
 BASEDIR=$(dirname "$0")
 echo "$BASEDIR"
 # run training
-CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/inputs/test_train_config.json
+CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/inputs/test_tacotron_config.json
 # find the training folder
 LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
 echo $LATEST_FOLDER
@ -11,3 +11,15 @@ echo $LATEST_FOLDER
 CUDA_VISIBLE_DEVICES=""  python TTS/bin/train_tacotron.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
 # remove all the outputs
 rm -rf $BASEDIR/train_outputs/
+
+# Tacotron2
+# run training
+CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/inputs/test_tacotron2_config.json
+# find the training folder
+LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
+echo $LATEST_FOLDER
+# continue the previous training
+CUDA_VISIBLE_DEVICES=""  python TTS/bin/train_tacotron.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
+# remove all the outputs
+rm -rf $BASEDIR/train_outputs/
+
--- a/tests/test_train_tts.py
+++ b/tests/test_train_tts.py