From dc166b42e37a20ab950c3d39cfacb33444dca102 Mon Sep 17 00:00:00 2001 From: erogol Date: Wed, 20 May 2020 11:55:32 +0200 Subject: [PATCH] update config.json --- config.json | 16 ++++++++-------- utils/training.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/config.json b/config.json index ffc8fee5..1180c12a 100644 --- a/config.json +++ b/config.json @@ -34,7 +34,7 @@ "signal_norm": true, // normalize spec values. Mean-Var normalization if 'stats_path' is defined otherwise range normalization defined by the other params. "min_level_db": -100, // lower bound for normalization "symmetric_norm": true, // move normalization to range [-1, 1] - "max_norm": 1.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] + "max_norm": 4.0, // scale normalization to range [-max_norm, max_norm] or [0, max_norm] "clip_norm": true, // clip normalized values into the range. "stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored }, @@ -74,15 +74,15 @@ // OPTIMIZER "noam_schedule": false, // use noam warmup and lr schedule. - "grad_clip": 1.0, // upper limit for gradients for clipping. + "grad_clip": 1.0, // upper limit for gradients for clipping. "epochs": 1000, // total number of epochs to train. "lr": 0.0001, // Initial learning rate. If Noam decay is active, maximum learning rate. - "wd": 0.000001, // Weight decay weight. + "wd": 0.000001, // Weight decay weight. "warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr" - "seq_len_norm": false, // Normalize eash sample loss with its length to alleviate imbalanced datasets. Use it if your dataset is small or has skewed distribution of sequence lengths. + "seq_len_norm": false, // Normalize eash sample loss with its length to alleviate imbalanced datasets. Use it if your dataset is small or has skewed distribution of sequence lengths. // TACOTRON PRENET - "memory_size": -1, // ONLY TACOTRON - size of the memory queue used fro storing last decoder predictions for auto-regression. If < 0, memory queue is disabled and decoder only uses the last prediction frame. + "memory_size": -1, // ONLY TACOTRON - size of the memory queue used fro storing last decoder predictions for auto-regression. If < 0, memory queue is disabled and decoder only uses the last prediction frame. "prenet_type": "original", // "original" or "bn". "prenet_dropout": true, // enable/disable dropout at prenet. @@ -91,15 +91,15 @@ "attention_heads": 4, // number of attention heads (only for 'graves') "attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron. "windowing": false, // Enables attention windowing. Used only in eval mode. - "use_forward_attn": false, // if it uses forward attention. In general, it aligns faster. + "use_forward_attn": false, // if it uses forward attention. In general, it aligns faster. "forward_attn_mask": false, // Additional masking forcing monotonicity only in eval mode. "transition_agent": false, // enable/disable transition agent of forward attention. - "location_attn": true, // enable_disable location sensitive attention. It is enabled for TACOTRON by default. + "location_attn": true, // enable_disable location sensitive attention. It is enabled for TACOTRON by default. "bidirectional_decoder": false, // use https://arxiv.org/abs/1907.09006. Use it, if attention does not work well with your dataset. // STOPNET "stopnet": true, // Train stopnet predicting the end of synthesis. - "separate_stopnet": true, // Train stopnet seperately if 'stopnet==true'. It prevents stopnet loss to influence the rest of the model. It causes a better model, but it trains SLOWER. + "separate_stopnet": true, // Train stopnet seperately if 'stopnet==true'. It prevents stopnet loss to influence the rest of the model. It causes a better model, but it trains SLOWER. // TENSORBOARD and LOGGING "print_step": 25, // Number of steps to log traning on console. diff --git a/utils/training.py b/utils/training.py index 6739132e..ebf8fd13 100644 --- a/utils/training.py +++ b/utils/training.py @@ -9,7 +9,7 @@ def check_update(model, grad_clip, ignore_stopnet=False): grad_norm = torch.nn.utils.clip_grad_norm_([param for name, param in model.named_parameters() if 'stopnet' not in name], grad_clip) else: grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip) - if np.isinf(grad_norm): + if torch.isinf(grad_norm): print(" | > Gradient is INF !!") skip_flag = True return grad_norm, skip_flag