mirror of https://github.com/coqui-ai/TTS.git
wavegrad config updates
This commit is contained in:
parent
c8a4c771a8
commit
5b5b9fcfdd
|
@ -30,11 +30,11 @@
|
||||||
"symmetric_norm": true, // move normalization to range [-1, 1]
|
"symmetric_norm": true, // move normalization to range [-1, 1]
|
||||||
"max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm]
|
"max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm]
|
||||||
"clip_norm": true, // clip normalized values into the range.
|
"clip_norm": true, // clip normalized values into the range.
|
||||||
"stats_path": "/home/erogol/Data/libritts/LibriTTS/scale_stats.npy" // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored
|
"stats_path": "/home/erogol/Data/libritts/LibriTTS/scale_stats_wavegrad.npy" // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored
|
||||||
},
|
},
|
||||||
|
|
||||||
// DISTRIBUTED TRAINING
|
// DISTRIBUTED TRAINING
|
||||||
"apex_amp_level": null, // APEX amp optimization level. "O1" is currently supported.
|
"apex_amp_level": "O1", // APEX amp optimization level. "O1" is currently supported.
|
||||||
"distributed":{
|
"distributed":{
|
||||||
"backend": "nccl",
|
"backend": "nccl",
|
||||||
"url": "tcp:\/\/localhost:54322"
|
"url": "tcp:\/\/localhost:54322"
|
||||||
|
@ -45,8 +45,8 @@
|
||||||
// MODEL PARAMETERS
|
// MODEL PARAMETERS
|
||||||
"generator_model": "wavegrad",
|
"generator_model": "wavegrad",
|
||||||
"model_params":{
|
"model_params":{
|
||||||
"x_conv_channels":32,
|
"y_conv_channels":32,
|
||||||
"c_conv_channels":768,
|
"x_conv_channels":768,
|
||||||
"ublock_out_channels": [512, 512, 256, 128, 128],
|
"ublock_out_channels": [512, 512, 256, 128, 128],
|
||||||
"dblock_out_channels": [128, 128, 256, 512],
|
"dblock_out_channels": [128, 128, 256, 512],
|
||||||
"upsample_factors": [4, 4, 4, 2, 2],
|
"upsample_factors": [4, 4, 4, 2, 2],
|
||||||
|
@ -62,15 +62,15 @@
|
||||||
"data_path": "/home/erogol/Data/libritts/LibriTTS/train-clean-360/", // root data path. It finds all wav files recursively from there.
|
"data_path": "/home/erogol/Data/libritts/LibriTTS/train-clean-360/", // root data path. It finds all wav files recursively from there.
|
||||||
"feature_path": null, // if you use precomputed features
|
"feature_path": null, // if you use precomputed features
|
||||||
"seq_len": 6144, // 24 * hop_length
|
"seq_len": 6144, // 24 * hop_length
|
||||||
"pad_short": 2000, // additional padding for short wavs
|
"pad_short": 0, // additional padding for short wavs
|
||||||
"conv_pad": 0, // additional padding against convolutions applied to spectrograms
|
"conv_pad": 0, // additional padding against convolutions applied to spectrograms
|
||||||
"use_noise_augment": false, // add noise to the audio signal for augmentation
|
"use_noise_augment": false, // add noise to the audio signal for augmentation
|
||||||
"use_cache": true, // use in memory cache to keep the computed features. This might cause OOM.
|
"use_cache": false, // use in memory cache to keep the computed features. This might cause OOM.
|
||||||
|
|
||||||
"reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers.
|
"reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers.
|
||||||
|
|
||||||
// TRAINING
|
// TRAINING
|
||||||
"batch_size": 64, // Batch size for training.
|
"batch_size": 96, // Batch size for training.
|
||||||
"train_noise_schedule":{
|
"train_noise_schedule":{
|
||||||
"min_val": 1e-6,
|
"min_val": 1e-6,
|
||||||
"max_val": 1e-2,
|
"max_val": 1e-2,
|
||||||
|
@ -87,7 +87,7 @@
|
||||||
|
|
||||||
// OPTIMIZER
|
// OPTIMIZER
|
||||||
"epochs": 10000, // total number of epochs to train.
|
"epochs": 10000, // total number of epochs to train.
|
||||||
"clip_grad": 1, // Generator gradient clipping threshold. Apply gradient clipping if > 0
|
"clip_grad": 1.0, // Generator gradient clipping threshold. Apply gradient clipping if > 0
|
||||||
"lr_scheduler": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
"lr_scheduler": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
||||||
"lr_scheduler_params": {
|
"lr_scheduler_params": {
|
||||||
"gamma": 0.5,
|
"gamma": 0.5,
|
||||||
|
@ -96,16 +96,16 @@
|
||||||
"lr": 1e-4, // Initial learning rate. If Noam decay is active, maximum learning rate.
|
"lr": 1e-4, // Initial learning rate. If Noam decay is active, maximum learning rate.
|
||||||
|
|
||||||
// TENSORBOARD and LOGGING
|
// TENSORBOARD and LOGGING
|
||||||
"print_step": 25, // Number of steps to log traning on console.
|
"print_step": 50, // Number of steps to log traning on console.
|
||||||
"print_eval": false, // If True, it prints loss values for each step in eval run.
|
"print_eval": false, // If True, it prints loss values for each step in eval run.
|
||||||
"save_step": 10000, // Number of training steps expected to plot training stats on TB and save model checkpoints.
|
"save_step": 10000, // Number of training steps expected to plot training stats on TB and save model checkpoints.
|
||||||
"checkpoint": true, // If true, it saves checkpoints per "save_step"
|
"checkpoint": true, // If true, it saves checkpoints per "save_step"
|
||||||
"tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging.
|
"tb_model_param_stats": true, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging.
|
||||||
|
|
||||||
// DATA LOADING
|
// DATA LOADING
|
||||||
"num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
"num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
||||||
"num_val_loader_workers": 4, // number of evaluation data loader processes.
|
"num_val_loader_workers": 4, // number of evaluation data loader processes.
|
||||||
"eval_split_size": 10,
|
"eval_split_size": 256,
|
||||||
|
|
||||||
// PATHS
|
// PATHS
|
||||||
"output_path": "/home/erogol/Models/LJSpeech/"
|
"output_path": "/home/erogol/Models/LJSpeech/"
|
||||||
|
|
Loading…
Reference in New Issue