mirror of https://github.com/coqui-ai/TTS.git
fix ton of tesnting bugs
This commit is contained in:
parent
25551c4634
commit
6cc464ead6
|
@ -182,13 +182,14 @@ def check_config_tts(c):
|
|||
|
||||
# loss parameters
|
||||
check_argument('loss_masking', c, restricted=True, val_type=bool)
|
||||
check_argument('decoder_loss_alpha', c, restricted=True, val_type=float, min_val=0)
|
||||
check_argument('postnet_loss_alpha', c, restricted=True, val_type=float, min_val=0)
|
||||
check_argument('postnet_diff_spec_alpha', c, restricted=True, val_type=float, min_val=0)
|
||||
check_argument('decoder_diff_spec_alpha', c, restricted=True, val_type=float, min_val=0)
|
||||
check_argument('decoder_ssim_alpha', c, restricted=True, val_type=float, min_val=0)
|
||||
check_argument('postnet_ssim_alpha', c, restricted=True, val_type=float, min_val=0)
|
||||
check_argument('ga_alpha', c, restricted=True, val_type=float, min_val=0)
|
||||
if c['model'].lower() in ['tacotron', 'tacotron2']:
|
||||
check_argument('decoder_loss_alpha', c, restricted=True, val_type=float, min_val=0)
|
||||
check_argument('postnet_loss_alpha', c, restricted=True, val_type=float, min_val=0)
|
||||
check_argument('postnet_diff_spec_alpha', c, restricted=True, val_type=float, min_val=0)
|
||||
check_argument('decoder_diff_spec_alpha', c, restricted=True, val_type=float, min_val=0)
|
||||
check_argument('decoder_ssim_alpha', c, restricted=True, val_type=float, min_val=0)
|
||||
check_argument('postnet_ssim_alpha', c, restricted=True, val_type=float, min_val=0)
|
||||
check_argument('ga_alpha', c, restricted=True, val_type=float, min_val=0)
|
||||
|
||||
# validation parameters
|
||||
check_argument('run_eval', c, restricted=True, val_type=bool)
|
||||
|
|
|
@ -4,6 +4,7 @@ import os
|
|||
import shutil
|
||||
import subprocess
|
||||
import contextlib
|
||||
import platform
|
||||
|
||||
import torch
|
||||
|
||||
|
@ -12,10 +13,10 @@ def set_amp_context(mixed_precision):
|
|||
if mixed_precision:
|
||||
cm = torch.cuda.amp.autocast()
|
||||
else:
|
||||
if os.python.version<=3.6:
|
||||
if platform.python_version() <= "3.6.0":
|
||||
cm = contextlib.suppress()
|
||||
else:
|
||||
cm = nullcontext()
|
||||
cm = contextlib.nullcontext()
|
||||
return cm
|
||||
|
||||
|
||||
|
|
|
@ -271,9 +271,12 @@ class WaveRNN(nn.Module):
|
|||
|
||||
with torch.no_grad():
|
||||
if isinstance(mels, np.ndarray):
|
||||
mels = torch.FloatTensor(mels).unsqueeze(0).to(device)
|
||||
#mels = torch.FloatTensor(mels).cuda().unsqueeze(0)
|
||||
mels = torch.FloatTensor(mels).to(device)
|
||||
|
||||
if mels.ndim == 2:
|
||||
mels = mels.unsqueeze(0)
|
||||
wave_len = (mels.size(-1) - 1) * self.hop_length
|
||||
|
||||
mels = self.pad_tensor(mels.transpose(
|
||||
1, 2), pad=self.pad, side="both")
|
||||
mels, aux = self.upsample(mels.transpose(1, 2))
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
set -e
|
||||
TF_CPP_MIN_LOG_LEVEL=3
|
||||
|
||||
# tests
|
||||
|
|
|
@ -67,7 +67,7 @@
|
|||
"gradual_training": [[0, 7, 4]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
|
||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||
"ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled.
|
||||
"apex_amp_level": null,
|
||||
"mixed_precision": false,
|
||||
|
||||
// VALIDATION
|
||||
"run_eval": true,
|
||||
|
@ -75,14 +75,15 @@
|
|||
"test_sentences_file": null, // set a file to load sentences to be used for testing. If it is null then we use default english sentences.
|
||||
|
||||
// LOSS SETTINGS
|
||||
"loss_masking": false, // enable / disable loss masking against the sequence padding.
|
||||
"decoder_loss_alpha": 0.5, // decoder loss weight. If > 0, it is enabled
|
||||
"postnet_loss_alpha": 0.25, // postnet loss weight. If > 0, it is enabled
|
||||
"ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled.
|
||||
"decoder_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||
"decoder_loss_alpha": 0.5, // original decoder loss weight. If > 0, it is enabled
|
||||
"postnet_loss_alpha": 0.25, // original postnet loss weight. If > 0, it is enabled
|
||||
"postnet_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||
"decoder_ssim_alpha": 0.5, // differential spectral loss weight. If > 0, it is enabled
|
||||
"postnet_ssim_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||
"decoder_diff_spec_alpha": 0.25, // differential spectral loss weight. If > 0, it is enabled
|
||||
"decoder_ssim_alpha": 0.5, // decoder ssim loss weight. If > 0, it is enabled
|
||||
"postnet_ssim_alpha": 0.25, // postnet ssim loss weight. If > 0, it is enabled
|
||||
"ga_alpha": 5.0, // weight for guided attention loss. If > 0, guided attention is enabled.
|
||||
"stopnet_pos_weight": 15.0, // pos class weight for stopnet loss since there are way more negative samples than positive samples.
|
||||
|
||||
// OPTIMIZER
|
||||
"noam_schedule": false, // use noam warmup and lr schedule.
|
||||
|
|
|
@ -55,7 +55,8 @@
|
|||
[1, 2, 1, 2],
|
||||
[1, 2, 4, 8],
|
||||
[1, 2, 4, 8],
|
||||
[1, 2, 4, 8]]
|
||||
[1, 2, 4, 8]],
|
||||
"use_weight_norm": true
|
||||
},
|
||||
|
||||
// DATASET
|
||||
|
|
|
@ -59,6 +59,19 @@
|
|||
"data_path": "tests/data/ljspeech/wavs/", // path containing training wav files
|
||||
"feature_path": null, // path containing computed features from wav files if null compute them
|
||||
|
||||
// MODEL PARAMETERS
|
||||
"wavernn_model_params": {
|
||||
"rnn_dims": 512,
|
||||
"fc_dims": 512,
|
||||
"compute_dims": 128,
|
||||
"res_out_dims": 128,
|
||||
"num_res_blocks": 10,
|
||||
"use_aux_net": true,
|
||||
"use_upsample_net": true,
|
||||
"upsample_factors": [4, 8, 8] // this needs to correctly factorise hop_length
|
||||
},
|
||||
"mixed_precision": false,
|
||||
|
||||
// TRAINING
|
||||
"batch_size": 4, // Batch size for training. Lower values than 32 might cause hard to learn attention.
|
||||
"epochs": 1, // total number of epochs to train.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -xe
|
||||
BASEDIR=$(dirname "$0")
|
||||
echo "$BASEDIR"
|
||||
# run training
|
||||
|
|
|
@ -294,6 +294,7 @@ class SCGSTMultiSpeakeTacotronTrainTest(unittest.TestCase):
|
|||
mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
|
||||
linear_spec = torch.rand(8, 30, c.audio['fft_size']).to(device)
|
||||
mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
|
||||
mel_lengths[-1] = mel_spec.size(1)
|
||||
stop_targets = torch.zeros(8, 30, 1).float().to(device)
|
||||
speaker_embeddings = torch.rand(8, 55).to(device)
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -xe
|
||||
BASEDIR=$(dirname "$0")
|
||||
echo "$BASEDIR"
|
||||
# run training
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -xe
|
||||
BASEDIR=$(dirname "$0")
|
||||
echo "$BASEDIR"
|
||||
# run training
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -xe
|
||||
BASEDIR=$(dirname "$0")
|
||||
echo "$BASEDIR"
|
||||
# create run dir
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -xe
|
||||
BASEDIR=$(dirname "$0")
|
||||
echo "$BASEDIR"
|
||||
# create run dir
|
||||
mkdir $BASEDIR/train_outputs
|
||||
mkdir -p $BASEDIR/train_outputs
|
||||
# run training
|
||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavegrad.py --config_path $BASEDIR/inputs/test_vocoder_wavegrad.json
|
||||
# find the training folder
|
||||
|
|
|
@ -27,5 +27,5 @@ def test_wavernn():
|
|||
dummy_y = torch.rand((80, y_size))
|
||||
output = model(dummy_x, dummy_m)
|
||||
assert np.all(output.shape == (2, 1280, 4 * 256)), output.shape
|
||||
output = model.generate(dummy_y, True, 5500, 550, False)
|
||||
output = model.inference(dummy_y, True, 5500, 550)
|
||||
assert np.all(output.shape == (256 * (y_size - 1),))
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -xe
|
||||
BASEDIR=$(dirname "$0")
|
||||
echo "$BASEDIR"
|
||||
# create run dir
|
||||
mkdir $BASEDIR/train_outputs
|
||||
mkdir -p $BASEDIR/train_outputs
|
||||
# run training
|
||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder_wavernn.py --config_path $BASEDIR/inputs/test_vocoder_wavernn_config.json
|
||||
# find the training folder
|
||||
|
|
Loading…
Reference in New Issue