From eabd7e6a52cb6d15446d3c5b399f14254c6a3d7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Thu, 11 Mar 2021 18:14:53 +0100 Subject: [PATCH] fix #374 --- .github/workflows/main.yml | 2 +- TTS/bin/train_tacotron.py | 2 +- TTS/tts/layers/losses.py | 22 +++++++++---------- ...config.json => test_tacotron2_config.json} | 0 tests/test_tacotron_train.sh | 14 +++++++++++- tests/test_train_tts.py | 0 6 files changed, 25 insertions(+), 15 deletions(-) rename tests/inputs/{test_train_config.json => test_tacotron2_config.json} (100%) delete mode 100644 tests/test_train_tts.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d59e9a6c..afefad2c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -46,7 +46,7 @@ jobs: python3 setup.py egg_info - name: Lint check run: | - cardboardlinter -n auto + cardboardlinter --refspec main -n auto - name: Unit tests run: nosetests tests --nocapture --processes=0 --process-timeout=20 --process-restartworker - name: Test scripts diff --git a/TTS/bin/train_tacotron.py b/TTS/bin/train_tacotron.py index 0887c2cc..331571d7 100644 --- a/TTS/bin/train_tacotron.py +++ b/TTS/bin/train_tacotron.py @@ -85,7 +85,7 @@ def format_data(data): text_input = data[0] text_lengths = data[1] speaker_names = data[2] - linear_input = data[3] if c.model in ["Tacotron"] else None + linear_input = data[3] if c.model.lower() in ["tacotron"] else None mel_input = data[4] mel_lengths = data[5] stop_targets = data[6] diff --git a/TTS/tts/layers/losses.py b/TTS/tts/layers/losses.py index 50575b80..c5497054 100644 --- a/TTS/tts/layers/losses.py +++ b/TTS/tts/layers/losses.py @@ -297,6 +297,11 @@ class TacotronLoss(torch.nn.Module): stopnet_output, stopnet_target, output_lens, decoder_b_output, alignments, alignment_lens, alignments_backwards, input_lens): + + # decoder outputs linear or mel spectrograms for Tacotron and Tacotron2 + # the target should be set acccordingly + postnet_target = linear_input if self.config.model.lower() in ["tacotron"] else mel_input + return_dict = {} # remove lengths if no masking is applied if not self.config.loss_masking: @@ -307,20 +312,13 @@ class TacotronLoss(torch.nn.Module): decoder_loss = self.criterion(decoder_output, mel_input, output_lens) if self.postnet_alpha > 0: - if self.config.model in ["Tacotron", "TacotronGST"]: - postnet_loss = self.criterion(postnet_output, linear_input, - output_lens) - else: - postnet_loss = self.criterion(postnet_output, mel_input, - output_lens) + postnet_loss = self.criterion(postnet_output, postnet_target, + output_lens) else: if self.decoder_alpha > 0: decoder_loss = self.criterion(decoder_output, mel_input) if self.postnet_alpha > 0: - if self.config.model in ["Tacotron", "TacotronGST"]: - postnet_loss = self.criterion(postnet_output, linear_input) - else: - postnet_loss = self.criterion(postnet_output, mel_input) + postnet_loss = self.criterion(postnet_output, postnet_target) loss = self.decoder_alpha * decoder_loss + self.postnet_alpha * postnet_loss return_dict['decoder_loss'] = decoder_loss return_dict['postnet_loss'] = postnet_loss @@ -373,7 +371,7 @@ class TacotronLoss(torch.nn.Module): # postnet differential spectral loss if self.config.postnet_diff_spec_alpha > 0: - postnet_diff_spec_loss = self.criterion_diff_spec(postnet_output, mel_input, output_lens) + postnet_diff_spec_loss = self.criterion_diff_spec(postnet_output, postnet_target, output_lens) loss += postnet_diff_spec_loss * self.postnet_diff_spec_alpha return_dict['postnet_diff_spec_loss'] = postnet_diff_spec_loss @@ -385,7 +383,7 @@ class TacotronLoss(torch.nn.Module): # postnet ssim loss if self.config.postnet_ssim_alpha > 0: - postnet_ssim_loss = self.criterion_ssim(postnet_output, mel_input, output_lens) + postnet_ssim_loss = self.criterion_ssim(postnet_output, postnet_target, output_lens) loss += postnet_ssim_loss * self.postnet_ssim_alpha return_dict['postnet_ssim_loss'] = postnet_ssim_loss diff --git a/tests/inputs/test_train_config.json b/tests/inputs/test_tacotron2_config.json similarity index 100% rename from tests/inputs/test_train_config.json rename to tests/inputs/test_tacotron2_config.json diff --git a/tests/test_tacotron_train.sh b/tests/test_tacotron_train.sh index 9268ea96..fa9930a7 100755 --- a/tests/test_tacotron_train.sh +++ b/tests/test_tacotron_train.sh @@ -3,7 +3,7 @@ set -xe BASEDIR=$(dirname "$0") echo "$BASEDIR" # run training -CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/inputs/test_train_config.json +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/inputs/test_tacotron_config.json # find the training folder LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) echo $LATEST_FOLDER @@ -11,3 +11,15 @@ echo $LATEST_FOLDER CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER # remove all the outputs rm -rf $BASEDIR/train_outputs/ + +# Tacotron2 +# run training +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --config_path $BASEDIR/inputs/test_tacotron2_config.json +# find the training folder +LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) +echo $LATEST_FOLDER +# continue the previous training +CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tacotron.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER +# remove all the outputs +rm -rf $BASEDIR/train_outputs/ + diff --git a/tests/test_train_tts.py b/tests/test_train_tts.py deleted file mode 100644 index e69de29b..00000000