mirror of https://github.com/coqui-ai/TTS.git
Remove minor bugs and make code trainable
This commit is contained in:
parent
ef6ff4e95c
commit
b533474e3b
|
@ -31,7 +31,7 @@
|
||||||
"symmetric_norm": true, // move normalization to range [-1, 1]
|
"symmetric_norm": true, // move normalization to range [-1, 1]
|
||||||
"max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm]
|
"max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm]
|
||||||
"clip_norm": true, // clip normalized values into the range.
|
"clip_norm": true, // clip normalized values into the range.
|
||||||
"stats_path": "/home/erogol/Data/libritts/LibriTTS/scale_stats.npy" // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored
|
"stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored
|
||||||
},
|
},
|
||||||
|
|
||||||
// DISTRIBUTED TRAINING
|
// DISTRIBUTED TRAINING
|
||||||
|
@ -44,11 +44,11 @@
|
||||||
"use_pqmf": false,
|
"use_pqmf": false,
|
||||||
|
|
||||||
// LOSS PARAMETERS
|
// LOSS PARAMETERS
|
||||||
"use_stft_loss": false,
|
"use_stft_loss": true,
|
||||||
"use_subband_stft_loss": false,
|
"use_subband_stft_loss": false,
|
||||||
"use_mse_gan_loss": true,
|
"use_mse_gan_loss": true,
|
||||||
"use_hinge_gan_loss": false,
|
"use_hinge_gan_loss": false,
|
||||||
"use_feat_match_loss": false, // use only with melgan discriminators
|
"use_feat_match_loss": true, // use only with melgan discriminators
|
||||||
|
|
||||||
// loss weights
|
// loss weights
|
||||||
"stft_loss_weight": 0.5,
|
"stft_loss_weight": 0.5,
|
||||||
|
@ -67,14 +67,14 @@
|
||||||
"target_loss": "avg_G_loss", // loss value to pick the best model to save after each epoch
|
"target_loss": "avg_G_loss", // loss value to pick the best model to save after each epoch
|
||||||
|
|
||||||
// DISCRIMINATOR
|
// DISCRIMINATOR
|
||||||
"discriminator_model": "hifigan_mpd_discriminator",
|
"discriminator_model": "multi_period_discriminator",
|
||||||
"discriminator_model_params":{
|
"discriminator_model_params":{
|
||||||
"peroids": [2, 3, 5, 7, 11],
|
"peroids": [2, 3, 5, 7, 11],
|
||||||
"base_channels": 16,
|
"base_channels": 16,
|
||||||
"max_channels":512,
|
"max_channels":512,
|
||||||
"downsample_factors":[4, 4, 4]
|
"downsample_factors":[4, 4, 4]
|
||||||
},
|
},
|
||||||
"steps_to_start_discriminator": 1, // steps required to start GAN trainining.1
|
"steps_to_start_discriminator": 0, // steps required to start GAN trainining.1
|
||||||
|
|
||||||
// GENERATOR
|
// GENERATOR
|
||||||
"generator_model": "hifigan_generator",
|
"generator_model": "hifigan_generator",
|
||||||
|
@ -87,7 +87,7 @@
|
||||||
},
|
},
|
||||||
|
|
||||||
// DATASET
|
// DATASET
|
||||||
"data_path": "/home/erogol/Data/libritts/LibriTTS/train-clean-360/",
|
"data_path": "/workspace/LJSpeech-1.1/",
|
||||||
"feature_path": null,
|
"feature_path": null,
|
||||||
"seq_len": 16384,
|
"seq_len": 16384,
|
||||||
"pad_short": 2000,
|
"pad_short": 2000,
|
||||||
|
@ -98,7 +98,7 @@
|
||||||
"reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers.
|
"reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers.
|
||||||
|
|
||||||
// TRAINING
|
// TRAINING
|
||||||
"batch_size": 48, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
|
"batch_size": 16, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
|
||||||
|
|
||||||
// VALIDATION
|
// VALIDATION
|
||||||
"run_eval": true,
|
"run_eval": true,
|
||||||
|
@ -136,7 +136,7 @@
|
||||||
"eval_split_size": 10,
|
"eval_split_size": 10,
|
||||||
|
|
||||||
// PATHS
|
// PATHS
|
||||||
"output_path": "/home/erogol/Models/"
|
"output_path": "/workspace/Models/"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3,11 +3,11 @@ from torch import nn
|
||||||
from TTS.vocoder.layers.hifigan import MRF
|
from TTS.vocoder.layers.hifigan import MRF
|
||||||
|
|
||||||
|
|
||||||
class Generator(nn.Module):
|
class HifiganGenerator(nn.Module):
|
||||||
|
|
||||||
def __init__(self, in_channels=80, out_channels=1, base_channels=512, upsample_kernel=[16, 16, 4, 4],
|
def __init__(self, in_channels=80, out_channels=1, base_channels=512, upsample_kernel=[16, 16, 4, 4],
|
||||||
resblock_kernel_sizes=[3, 7, 11], resblock_dilation_sizes=[1, 3, 5]):
|
resblock_kernel_sizes=[3, 7, 11], resblock_dilation_sizes=[1, 3, 5]):
|
||||||
super(Generator, self).__init__()
|
super(HifiganGenerator, self).__init__()
|
||||||
|
|
||||||
self.inference_padding = 2
|
self.inference_padding = 2
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue