fix ton of tesnting bugs

This commit is contained in:
erogol 2020-11-12 16:33:29 +01:00
parent 25551c4634
commit 6cc464ead6
15 changed files with 54 additions and 31 deletions

View File

@ -182,13 +182,14 @@ def check_config_tts(c):
# loss parameters
check_argument('loss_masking', c, restricted=True, val_type=bool)
check_argument('decoder_loss_alpha', c, restricted=True, val_type=float, min_val=0)
check_argument('postnet_loss_alpha', c, restricted=True, val_type=float, min_val=0)
check_argument('postnet_diff_spec_alpha', c, restricted=True, val_type=float, min_val=0)
check_argument('decoder_diff_spec_alpha', c, restricted=True, val_type=float, min_val=0)
check_argument('decoder_ssim_alpha', c, restricted=True, val_type=float, min_val=0)
check_argument('postnet_ssim_alpha', c, restricted=True, val_type=float, min_val=0)
check_argument('ga_alpha', c, restricted=True, val_type=float, min_val=0)
if c['model'].lower() in ['tacotron', 'tacotron2']:
check_argument('decoder_loss_alpha', c, restricted=True, val_type=float, min_val=0)
check_argument('postnet_loss_alpha', c, restricted=True, val_type=float, min_val=0)
check_argument('postnet_diff_spec_alpha', c, restricted=True, val_type=float, min_val=0)
check_argument('decoder_diff_spec_alpha', c, restricted=True, val_type=float, min_val=0)
check_argument('decoder_ssim_alpha', c, restricted=True, val_type=float, min_val=0)
check_argument('postnet_ssim_alpha', c, restricted=True, val_type=float, min_val=0)
check_argument('ga_alpha', c, restricted=True, val_type=float, min_val=0)
# validation parameters
check_argument('run_eval', c, restricted=True, val_type=bool)

View File

@ -4,6 +4,7 @@ import os
import shutil
import subprocess
import contextlib
import platform
import torch
@ -12,10 +13,10 @@ def set_amp_context(mixed_precision):
if mixed_precision:
cm = torch.cuda.amp.autocast()
else:
if os.python.version<=3.6:
if platform.python_version() <= "3.6.0":
cm = contextlib.suppress()
else:
cm = nullcontext()
cm = contextlib.nullcontext()
return cm

View File

@ -271,9 +271,12 @@ class WaveRNN(nn.Module):
with torch.no_grad():
if isinstance(mels, np.ndarray):
mels = torch.FloatTensor(mels).unsqueeze(0).to(device)
#mels = torch.FloatTensor(mels).cuda().unsqueeze(0)
mels = torch.FloatTensor(mels).to(device)
if mels.ndim == 2:
mels = mels.unsqueeze(0)
wave_len = (mels.size(-1) - 1) * self.hop_length
mels = self.pad_tensor(mels.transpose(
1, 2), pad=self.pad, side="both")
mels, aux = self.upsample(mels.transpose(1, 2))

View File

@ -1,3 +1,4 @@
set -e
TF_CPP_MIN_LOG_LEVEL=3
# tests

View File

@ -67,7 +67,7 @@
"gradual_training": [[0, 7, 4]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
"loss_masking": true, // enable / disable loss masking against the sequence padding.
"ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled.
"apex_amp_level": null,
"mixed_precision": false,
// VALIDATION
"run_eval": true,
@ -75,14 +75,15 @@
"test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences.
// LOSS SETTINGS
"loss_masking": false, // enable / disable loss masking against the sequence padding.
"decoder_loss_alpha": 0.5, // decoder loss weight. If > 0, it is enabled
"postnet_loss_alpha": 0.25, // postnet loss weight. If > 0, it is enabled
"ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled.
"decoder_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
"loss_masking": true, // enable / disable loss masking against the sequence padding.
"decoder_loss_alpha": 0.5, // original decoder loss weight. If > 0, it is enabled
"postnet_loss_alpha": 0.25, // original postnet loss weight. If > 0, it is enabled
"postnet_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
"decoder_ssim_alpha": 0.5, // differential spectral loss weight. If > 0, it is enabled
"postnet_ssim_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
"decoder_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
"decoder_ssim_alpha": 0.5, // decoder ssim loss weight. If > 0, it is enabled
"postnet_ssim_alpha": 0.25, // postnet ssim loss weight. If > 0, it is enabled
"ga_alpha": 5.0, // weight for guided attention loss. If > 0, guided attention is enabled.
"stopnet_pos_weight": 15.0, // pos class weight for stopnet loss since there are way more negative samples than positive samples.
// OPTIMIZER
"noam_schedule": false, // use noam warmup and lr schedule.

View File

@ -55,7 +55,8 @@
[1, 2, 1, 2],
[1, 2, 4, 8],
[1, 2, 4, 8],
[1, 2, 4, 8]]
[1, 2, 4, 8]],
"use_weight_norm": true
},
// DATASET

View File

@ -59,6 +59,19 @@
"data_path": "tests/data/ljspeech/wavs/", // path containing training wav files
"feature_path": null, // path containing computed features from wav files if null compute them
// MODEL PARAMETERS
"wavernn_model_params": {
"rnn_dims": 512,
"fc_dims": 512,
"compute_dims": 128,
"res_out_dims": 128,
"num_res_blocks": 10,
"use_aux_net": true,
"use_upsample_net": true,
"upsample_factors": [4, 8, 8] // this needs to correctly factorise hop_length
},
"mixed_precision": false,
// TRAINING
"batch_size": 4, // Batch size for training. Lower values than 32 might cause hard to learn attention.
"epochs": 1, // total number of epochs to train.

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
set -xe
BASEDIR=$(dirname "$0")
echo "$BASEDIR"
# run training

View File

@ -294,6 +294,7 @@ class SCGSTMultiSpeakeTacotronTrainTest(unittest.TestCase):
mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
linear_spec = torch.rand(8, 30, c.audio['fft_size']).to(device)
mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
mel_lengths[-1] = mel_spec.size(1)
stop_targets = torch.zeros(8, 30, 1).float().to(device)
speaker_embeddings = torch.rand(8, 55).to(device)

View File

@ -1,5 +1,6 @@
#!/usr/bin/env bash
set -xe
BASEDIR=$(dirname "$0")
echo "$BASEDIR"
# run training

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
set -xe
BASEDIR=$(dirname "$0")
echo "$BASEDIR"
# run training

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
set -xe
BASEDIR=$(dirname "$0")
echo "$BASEDIR"
# create run dir

View File

@ -1,9 +1,9 @@
#!/usr/bin/env bash
set -xe
BASEDIR=$(dirname "$0")
echo "$BASEDIR"
# create run dir
mkdir $BASEDIR/train_outputs
mkdir -p $BASEDIR/train_outputs
# run training
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavegrad.py --config_path $BASEDIR/inputs/test_vocoder_wavegrad.json
# find the training folder

View File

@ -27,5 +27,5 @@ def test_wavernn():
dummy_y = torch.rand((80, y_size))
output = model(dummy_x, dummy_m)
assert np.all(output.shape == (2, 1280, 4 * 256)), output.shape
output = model.generate(dummy_y, True, 5500, 550, False)
output = model.inference(dummy_y, True, 5500, 550)
assert np.all(output.shape == (256 * (y_size - 1),))

View File

@ -1,9 +1,9 @@
#!/usr/bin/env bash
set -xe
BASEDIR=$(dirname "$0")
echo "$BASEDIR"
# create run dir
mkdir $BASEDIR/train_outputs
mkdir -p $BASEDIR/train_outputs
# run training
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --config_path $BASEDIR/inputs/test_vocoder_wavernn_config.json
# find the training folder