From a14d7bc5dbdaa8eb9ccc3904e1294ceeaf85e9bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Mon, 5 Apr 2021 11:31:58 +0200 Subject: [PATCH] hifigan config update --- TTS/vocoder/configs/modified_hifigan.json | 50 +++++++++++------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/TTS/vocoder/configs/modified_hifigan.json b/TTS/vocoder/configs/modified_hifigan.json index 77b1cb52..c50a575b 100644 --- a/TTS/vocoder/configs/modified_hifigan.json +++ b/TTS/vocoder/configs/modified_hifigan.json @@ -67,13 +67,13 @@ "target_loss": "avg_G_loss", // loss value to pick the best model to save after each epoch // DISCRIMINATOR - "discriminator_model": "multi_period_discriminator", - "discriminator_model_params":{ - "peroids": [2, 3, 5, 7, 11], - "base_channels": 16, - "max_channels":512, - "downsample_factors":[4, 4, 4] - }, + "discriminator_model": "hifigan_discriminator", + //"discriminator_model_params":{ + // "peroids": [2, 3, 5, 7, 11], + // "base_channels": 16, + // "max_channels":512, + // "downsample_factors":[4, 4, 4] + //}, "steps_to_start_discriminator": 0, // steps required to start GAN trainining.1 // GENERATOR @@ -83,18 +83,18 @@ "upsample_kernel_sizes": [16,16,4,4], "upsample_initial_channel": 512, "resblock_kernel_sizes": [3,7,11], - "resblock_dilation_sizes": [1,3,5] + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "resblock_type": "1" }, // DATASET - "data_path": "/workspace/LJSpeech-1.1/", - "feature_path": null, + "data_path": "/home/erogol/gdrive/Datasets/non-binary-voice-files/vo_voice_quality_transformation/", + "feature_path": "/home/erogol/gdrive/Datasets/non-binary-voice-files/tacotron-DCA/", "seq_len": 16384, "pad_short": 2000, "conv_pad": 0, "use_noise_augment": false, "use_cache": true, - "reinit_layers": [], // give a list of layer names to restore from the given checkpoint. If not defined, it reloads all heuristically matching layers. // TRAINING @@ -110,18 +110,18 @@ "wd": 0.0, // Weight decay weight. "gen_clip_grad": -1, // Generator gradient clipping threshold. Apply gradient clipping if > 0 "disc_clip_grad": -1, // Discriminator gradient clipping threshold. - "lr_scheduler_gen": "ExponentialLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate - "lr_scheduler_gen_params": { - "gamma": 0.999, - "last_epoch": -1 - }, - "lr_scheduler_disc": "ExponentialLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate - "lr_scheduler_disc_params": { - "gamma": 0.999, - "last_epoch": -1 - }, - "lr_gen": 0.0002, // Initial learning rate. If Noam decay is active, maximum learning rate. - "lr_disc": 0.0002, + // "lr_scheduler_gen": "ExponentialLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + // "lr_scheduler_gen_params": { + // "gamma": 0.999, + // "last_epoch": -1 + // }, + // "lr_scheduler_disc": "ExponentialLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate + // "lr_scheduler_disc_params": { + // "gamma": 0.999, + // "last_epoch": -1 + // }, + "lr_gen": 0.00001, // Initial learning rate. If Noam decay is active, maximum learning rate. + "lr_disc": 0.00001, // TENSORBOARD and LOGGING "print_step": 25, // Number of steps to log traning on console. @@ -131,12 +131,12 @@ "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. // DATA LOADING - "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_loader_workers": 8, // number of training data loader processes. Don't set it too big. 4-8 are good values. "num_val_loader_workers": 4, // number of evaluation data loader processes. "eval_split_size": 10, // PATHS - "output_path": "/workspace/Models/" + "output_path": "/home/erogol/gdrive/Trainings/sam/" }