diff --git a/config.json b/config.json
index 926f48bf..1e358d53 100644
--- a/config.json
+++ b/config.json
@@ -39,6 +39,7 @@
     "eval_batch_size":16,   
     "r": 7,                 // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled.  
     "gradual_training": [[0, 7, 64], [1, 5, 64], [50000, 3, 32], [130000, 2, 16], [290000, 1, 8]], // ONLY TACOTRON - set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled.  
+    "loss_masking": true,         // enable / disable loss masking against the sequence padding.
 
     // VALIDATION
     "run_eval": true,
@@ -66,8 +67,6 @@
     "transition_agent": false,     // enable/disable transition agent of forward attention.
     "location_attn": true,        // enable_disable location sensitive attention. It is enabled for TACOTRON by default.
     "bidirectional_decoder": false,  // use https://arxiv.org/abs/1907.09006. Use it, if attention does not work well with your dataset.
-    "loss_masking": true,         // enable / disable loss masking against the sequence padding.
-    "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
 
     // STOPNET
     "stopnet": true,               // Train stopnet predicting the end of synthesis. 
@@ -81,6 +80,7 @@
     
     // DATA LOADING
     "text_cleaner": "phoneme_cleaners",
+    "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
     "num_loader_workers": 4,        // number of training data loader processes. Don't set it too big. 4-8 are good values.
     "num_val_loader_workers": 4,    // number of evaluation data loader processes.
     "batch_group_size": 0,  //Number of batches to shuffle after bucketing.