From 0becef4b58d34440737e65e37562b0b40d1d9054 Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Mon, 26 Oct 2020 17:16:16 +0100
Subject: [PATCH] small updates

---
 TTS/bin/train_wavernn_vocoder.py        |  1 -
 TTS/vocoder/configs/wavernn_config.json | 30 ++++++++++++-------------
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/TTS/bin/train_wavernn_vocoder.py b/TTS/bin/train_wavernn_vocoder.py
index 61664a65..90e30256 100644
--- a/TTS/bin/train_wavernn_vocoder.py
+++ b/TTS/bin/train_wavernn_vocoder.py
@@ -95,7 +95,6 @@ def train(model, optimizer, criterion, scheduler, ap, global_step, epoch):
     end_time = time.time()
     c_logger.print_train_start()
     # train loop
-    print(" > Training", flush=True)
     for num_iter, data in enumerate(data_loader):
         start_time = time.time()
         x_input, mels, y_coarse = format_data(data)
diff --git a/TTS/vocoder/configs/wavernn_config.json b/TTS/vocoder/configs/wavernn_config.json
index 9a9fbdae..8f290b80 100644
--- a/TTS/vocoder/configs/wavernn_config.json
+++ b/TTS/vocoder/configs/wavernn_config.json
@@ -1,7 +1,7 @@
 {
     "run_name": "wavernn_test",
     "run_description": "wavernn_test training",
-    
+
 // AUDIO PARAMETERS
     "audio": {
         "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame.
@@ -29,7 +29,7 @@
         "clip_norm": true, // clip normalized values into the range.
         "stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored
     },
-    
+
 // Generating / Synthesizing
     "batched": true,
     "target_samples": 11000, // target number of samples to be generated in each batch entry
@@ -39,11 +39,11 @@
     //     "backend": "nccl",
     //     "url": "tcp:\/\/localhost:54321"
     // },
-    
-// MODEL MODE 
-    "mode": 10, // mold [string], gauss [string], bits [int]
+
+// MODEL MODE
+    "mode": "mold", // mold [string], gauss [string], bits [int]
     "mulaw": true, // apply mulaw if mode is bits
-    
+
 // MODEL PARAMETERS
     "wavernn_model_params": {
         "rnn_dims": 512,
@@ -55,14 +55,14 @@
         "use_upsample_net": true,
         "upsample_factors": [4, 8, 8] 	// this needs to correctly factorise hop_length
     },
-    
+
 // DATASET
     //"use_gta": true,								// use computed gta features from the tts model
-    "data_path": "/media/alexander/LinuxFS/SpeechData/GothicSpeech/NPC_Speech", // path containing training wav files
+    "data_path": "/home/erogol/Data/LJSpeech-1.1/wavs/", // path containing training wav files
     "feature_path": null, // path containing computed features from wav files if null compute them
     "seq_len": 1280, // has to be devideable by hop_length
     "padding": 2, // pad the input for resnet to see wider input length
-    
+
 // TRAINING
     "batch_size": 64, // Batch size for training.
     "epochs": 10000, // total number of epochs to train.
@@ -70,7 +70,7 @@
 // VALIDATION
     "run_eval": true,
     "test_every_epochs": 10, // Test after set number of epochs (Test every 10 epochs for example)
-    
+
 // OPTIMIZER
     "grad_clip": 4, // apply gradient clipping if > 0
     "lr_scheduler": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
@@ -79,19 +79,19 @@
         "milestones": [200000, 400000, 600000]
     },
     "lr": 1e-4, // initial learning rate
-    
+
 // TENSORBOARD and LOGGING
     "print_step": 25, // Number of steps to log traning on console.
     "print_eval": false, // If True, it prints loss values for each step in eval run.
     "save_step": 25000, // Number of training steps expected to plot training stats on TB and save model checkpoints.
     "checkpoint": true, // If true, it saves checkpoints per "save_step"
     "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging.
-    
+
 // DATA LOADING
     "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values.
     "num_val_loader_workers": 4, // number of evaluation data loader processes.
-    "eval_split_size": 50, // number of samples for testing	
-    
+    "eval_split_size": 50, // number of samples for testing
+
 // PATHS
-    "output_path": "output/training/path"
+    "output_path": "/home/erogol/Models/LJSpeech/"
 }