Remove minor bugs and make code trainable

This commit is contained in:
rishikksh20 2021-03-04 00:24:32 +05:30 committed by Eren Gölge
parent ef6ff4e95c
commit b533474e3b
2 changed files with 10 additions and 10 deletions

View File

@ -31,7 +31,7 @@
"symmetric_norm": true, // move normalization to range [-1, 1] "symmetric_norm": true, // move normalization to range [-1, 1]
"max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm]
"clip_norm": true, // clip normalized values into the range. "clip_norm": true, // clip normalized values into the range.
"stats_path": "/home/erogol/Data/libritts/LibriTTS/scale_stats.npy" // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored "stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored
}, },
// DISTRIBUTED TRAINING // DISTRIBUTED TRAINING
@ -44,11 +44,11 @@
"use_pqmf": false, "use_pqmf": false,
// LOSS PARAMETERS // LOSS PARAMETERS
"use_stft_loss": false, "use_stft_loss": true,
"use_subband_stft_loss": false, "use_subband_stft_loss": false,
"use_mse_gan_loss": true, "use_mse_gan_loss": true,
"use_hinge_gan_loss": false, "use_hinge_gan_loss": false,
"use_feat_match_loss": false, // use only with melgan discriminators "use_feat_match_loss": true, // use only with melgan discriminators
// loss weights // loss weights
"stft_loss_weight": 0.5, "stft_loss_weight": 0.5,
@ -67,14 +67,14 @@
"target_loss": "avg_G_loss", // loss value to pick the best model to save after each epoch "target_loss": "avg_G_loss", // loss value to pick the best model to save after each epoch
// DISCRIMINATOR // DISCRIMINATOR
"discriminator_model": "hifigan_mpd_discriminator", "discriminator_model": "multi_period_discriminator",
"discriminator_model_params":{ "discriminator_model_params":{
"peroids": [2, 3, 5, 7, 11], "peroids": [2, 3, 5, 7, 11],
"base_channels": 16, "base_channels": 16,
"max_channels":512, "max_channels":512,
"downsample_factors":[4, 4, 4] "downsample_factors":[4, 4, 4]
}, },
"steps_to_start_discriminator": 1, // steps required to start GAN trainining.1 "steps_to_start_discriminator": 0, // steps required to start GAN trainining.1
// GENERATOR // GENERATOR
"generator_model": "hifigan_generator", "generator_model": "hifigan_generator",
@ -87,7 +87,7 @@
}, },
// DATASET // DATASET
"data_path": "/home/erogol/Data/libritts/LibriTTS/train-clean-360/", "data_path": "/workspace/LJSpeech-1.1/",
"feature_path": null, "feature_path": null,
"seq_len": 16384, "seq_len": 16384,
"pad_short": 2000, "pad_short": 2000,
@ -98,7 +98,7 @@
"reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers.
// TRAINING // TRAINING
"batch_size": 48, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. "batch_size": 16, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
// VALIDATION // VALIDATION
"run_eval": true, "run_eval": true,
@ -136,7 +136,7 @@
"eval_split_size": 10, "eval_split_size": 10,
// PATHS // PATHS
"output_path": "/home/erogol/Models/" "output_path": "/workspace/Models/"
} }

View File

@ -3,11 +3,11 @@ from torch import nn
from TTS.vocoder.layers.hifigan import MRF from TTS.vocoder.layers.hifigan import MRF
class Generator(nn.Module): class HifiganGenerator(nn.Module):
def __init__(self, in_channels=80, out_channels=1, base_channels=512, upsample_kernel=[16, 16, 4, 4], def __init__(self, in_channels=80, out_channels=1, base_channels=512, upsample_kernel=[16, 16, 4, 4],
resblock_kernel_sizes=[3, 7, 11], resblock_dilation_sizes=[1, 3, 5]): resblock_kernel_sizes=[3, 7, 11], resblock_dilation_sizes=[1, 3, 5]):
super(Generator, self).__init__() super(HifiganGenerator, self).__init__()
self.inference_padding = 2 self.inference_padding = 2