fix ton of tesnting bugs

2020-11-12 16:33:29 +01:00 · 2020-11-12 16:33:29 +01:00 · 6cc464ead6
parent 25551c4634
commit 6cc464ead6
15 changed files with 54 additions and 31 deletions
--- a/TTS/tts/utils/generic_utils.py
+++ b/TTS/tts/utils/generic_utils.py
@ -182,13 +182,14 @@ def check_config_tts(c):

    # loss parameters
    check_argument('loss_masking', c, restricted=True, val_type=bool)
-    check_argument('decoder_loss_alpha', c, restricted=True, val_type=float, min_val=0)
-    check_argument('postnet_loss_alpha', c, restricted=True, val_type=float, min_val=0)
-    check_argument('postnet_diff_spec_alpha', c, restricted=True, val_type=float, min_val=0)
-    check_argument('decoder_diff_spec_alpha', c, restricted=True, val_type=float, min_val=0)
-    check_argument('decoder_ssim_alpha', c, restricted=True, val_type=float, min_val=0)
-    check_argument('postnet_ssim_alpha', c, restricted=True, val_type=float, min_val=0)
-    check_argument('ga_alpha', c, restricted=True, val_type=float, min_val=0)
+    if c['model'].lower() in ['tacotron', 'tacotron2']:
+        check_argument('decoder_loss_alpha', c, restricted=True, val_type=float, min_val=0)
+        check_argument('postnet_loss_alpha', c, restricted=True, val_type=float, min_val=0)
+        check_argument('postnet_diff_spec_alpha', c, restricted=True, val_type=float, min_val=0)
+        check_argument('decoder_diff_spec_alpha', c, restricted=True, val_type=float, min_val=0)
+        check_argument('decoder_ssim_alpha', c, restricted=True, val_type=float, min_val=0)
+        check_argument('postnet_ssim_alpha', c, restricted=True, val_type=float, min_val=0)
+        check_argument('ga_alpha', c, restricted=True, val_type=float, min_val=0)

    # validation parameters
    check_argument('run_eval', c, restricted=True, val_type=bool)
--- a/TTS/utils/generic_utils.py
+++ b/TTS/utils/generic_utils.py
@ -4,6 +4,7 @@ import os
 import shutil
 import subprocess
 import contextlib
+import platform

 import torch

@ -12,10 +13,10 @@ def set_amp_context(mixed_precision):
    if mixed_precision:
        cm = torch.cuda.amp.autocast()
    else:
-        if os.python.version<=3.6:
+        if platform.python_version() <= "3.6.0":
            cm = contextlib.suppress()
        else:
-            cm = nullcontext()
+            cm = contextlib.nullcontext()
    return cm


--- a/TTS/vocoder/models/wavernn.py
+++ b/TTS/vocoder/models/wavernn.py
@ -271,9 +271,12 @@ class WaveRNN(nn.Module):

        with torch.no_grad():
            if isinstance(mels, np.ndarray):
-                mels = torch.FloatTensor(mels).unsqueeze(0).to(device)
-            #mels = torch.FloatTensor(mels).cuda().unsqueeze(0)
+                mels = torch.FloatTensor(mels).to(device)
+
+            if mels.ndim == 2:
+                mels = mels.unsqueeze(0)
            wave_len = (mels.size(-1) - 1) * self.hop_length
+
            mels = self.pad_tensor(mels.transpose(
                1, 2), pad=self.pad, side="both")
            mels, aux = self.upsample(mels.transpose(1, 2))
--- a/run_tests.sh
+++ b/run_tests.sh
@ -1,3 +1,4 @@
+set -e
 TF_CPP_MIN_LOG_LEVEL=3

 # tests
--- a/tests/inputs/test_train_config.json
+++ b/tests/inputs/test_train_config.json
@ -67,7 +67,7 @@
    "gradual_training": [[0, 7, 4]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
    "loss_masking": true,         // enable / disable loss masking against the sequence padding.
    "ga_alpha": 10.0,        // weight for guided attention loss. If > 0, guided attention is enabled.
-    "apex_amp_level": null,
+    "mixed_precision": false,

    // VALIDATION
    "run_eval": true,
@ -75,14 +75,15 @@
    "test_sentences_file": null,  // set a file to load sentences to be used for testing. If it is null then we use default english sentences.

    // LOSS SETTINGS
-    "loss_masking": false,       // enable / disable loss masking against the sequence padding.
-    "decoder_loss_alpha": 0.5,  // decoder loss weight. If > 0, it is enabled
-    "postnet_loss_alpha": 0.25, // postnet loss weight. If > 0, it is enabled
-    "ga_alpha": 10.0,           // weight for guided attention loss. If > 0, guided attention is enabled.
-    "decoder_diff_spec_alpha": 0.25,     // differential spectral loss weight. If > 0, it is enabled
+    "loss_masking": true,       // enable / disable loss masking against the sequence padding.
+    "decoder_loss_alpha": 0.5,  // original decoder loss weight. If > 0, it is enabled
+    "postnet_loss_alpha": 0.25, // original postnet loss weight. If > 0, it is enabled
    "postnet_diff_spec_alpha": 0.25,     // differential spectral loss weight. If > 0, it is enabled
-    "decoder_ssim_alpha": 0.5,     // differential spectral loss weight. If > 0, it is enabled
-    "postnet_ssim_alpha": 0.25,     // differential spectral loss weight. If > 0, it is enabled
+    "decoder_diff_spec_alpha": 0.25,     // differential spectral loss weight. If > 0, it is enabled
+    "decoder_ssim_alpha": 0.5,     // decoder ssim loss weight. If > 0, it is enabled
+    "postnet_ssim_alpha": 0.25,     // postnet ssim loss weight. If > 0, it is enabled
+    "ga_alpha": 5.0,           // weight for guided attention loss. If > 0, guided attention is enabled.
+    "stopnet_pos_weight": 15.0, // pos class weight for stopnet loss since there are way more negative samples than positive samples.

    // OPTIMIZER
    "noam_schedule": false,        // use noam warmup and lr schedule.
--- a/tests/inputs/test_vocoder_wavegrad.json
+++ b/tests/inputs/test_vocoder_wavegrad.json
@ -55,7 +55,8 @@
            [1, 2, 1, 2],
            [1, 2, 4, 8],
            [1, 2, 4, 8],
-            [1, 2, 4, 8]]
+            [1, 2, 4, 8]],
+        "use_weight_norm": true
    },

    // DATASET
--- a/tests/inputs/test_vocoder_wavernn_config.json
+++ b/tests/inputs/test_vocoder_wavernn_config.json
@ -59,6 +59,19 @@
    "data_path": "tests/data/ljspeech/wavs/",	// path containing training wav files
    "feature_path": null, 			// path containing computed features from wav files if null compute them

+    // MODEL PARAMETERS
+    "wavernn_model_params": {
+        "rnn_dims": 512,
+        "fc_dims": 512,
+        "compute_dims": 128,
+        "res_out_dims": 128,
+        "num_res_blocks": 10,
+        "use_aux_net": true,
+        "use_upsample_net": true,
+        "upsample_factors": [4, 8, 8] 	// this needs to correctly factorise hop_length
+    },
+    "mixed_precision": false,
+
    // TRAINING
    "batch_size": 4,       	// Batch size for training. Lower values than 32 might cause hard to learn attention.
    "epochs": 1,        	// total number of epochs to train.
--- a/tests/test_glow-tts_train.sh
+++ b/tests/test_glow-tts_train.sh
@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-
+set -xe
 BASEDIR=$(dirname "$0")
 echo "$BASEDIR"
 # run training
--- a/tests/test_tacotron_model.py
+++ b/tests/test_tacotron_model.py
@ -294,6 +294,7 @@ class SCGSTMultiSpeakeTacotronTrainTest(unittest.TestCase):
        mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
        linear_spec = torch.rand(8, 30, c.audio['fft_size']).to(device)
        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
+        mel_lengths[-1] = mel_spec.size(1)
        stop_targets = torch.zeros(8, 30, 1).float().to(device)
        speaker_embeddings = torch.rand(8, 55).to(device)

--- a/tests/test_tacotron_train.sh
+++ b/tests/test_tacotron_train.sh
@ -1,5 +1,6 @@
 #!/usr/bin/env bash

+set -xe
 BASEDIR=$(dirname "$0")
 echo "$BASEDIR"
 # run training
--- a/tests/test_tts_train.sh
+++ b/tests/test_tts_train.sh
@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-
+set -xe
 BASEDIR=$(dirname "$0")
 echo "$BASEDIR"
 # run training
--- a/tests/test_vocoder_gan_train.sh
+++ b/tests/test_vocoder_gan_train.sh
@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-
+set -xe
 BASEDIR=$(dirname "$0")
 echo "$BASEDIR"
 # create run dir
--- a/tests/test_vocoder_wavegrad_train.sh
+++ b/tests/test_vocoder_wavegrad_train.sh
@ -1,9 +1,9 @@
 #!/usr/bin/env bash
-
+set -xe
 BASEDIR=$(dirname "$0")
 echo "$BASEDIR"
 # create run dir
-mkdir $BASEDIR/train_outputs
+mkdir -p $BASEDIR/train_outputs
 # run training
 CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavegrad.py --config_path $BASEDIR/inputs/test_vocoder_wavegrad.json
 # find the training folder
--- a/tests/test_vocoder_wavernn.py
+++ b/tests/test_vocoder_wavernn.py
@ -27,5 +27,5 @@ def test_wavernn():
    dummy_y = torch.rand((80, y_size))
    output = model(dummy_x, dummy_m)
    assert np.all(output.shape == (2, 1280, 4 * 256)), output.shape
-    output = model.generate(dummy_y, True, 5500, 550, False)
+    output = model.inference(dummy_y, True, 5500, 550)
    assert np.all(output.shape == (256 * (y_size - 1),))
--- a/tests/test_vocoder_wavernn_train.sh
+++ b/tests/test_vocoder_wavernn_train.sh
@ -1,9 +1,9 @@
 #!/usr/bin/env bash
-
+set -xe
 BASEDIR=$(dirname "$0")
 echo "$BASEDIR"
 # create run dir
-mkdir $BASEDIR/train_outputs
+mkdir -p $BASEDIR/train_outputs
 # run training
 CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --config_path $BASEDIR/inputs/test_vocoder_wavernn_config.json
 # find the training folder