small updates

2020-10-26 17:16:16 +01:00 · 2020-10-26 17:16:16 +01:00 · 0becef4b58
parent 2ee47e9568
commit 0becef4b58
2 changed files with 15 additions and 16 deletions
--- a/TTS/bin/train_wavernn_vocoder.py
+++ b/TTS/bin/train_wavernn_vocoder.py
@ -95,7 +95,6 @@ def train(model, optimizer, criterion, scheduler, ap, global_step, epoch):
    end_time = time.time()
    c_logger.print_train_start()
    # train loop
-    print(" > Training", flush=True)
    for num_iter, data in enumerate(data_loader):
        start_time = time.time()
        x_input, mels, y_coarse = format_data(data)
--- a/TTS/vocoder/configs/wavernn_config.json
+++ b/TTS/vocoder/configs/wavernn_config.json
@ -1,7 +1,7 @@
 {
    "run_name": "wavernn_test",
    "run_description": "wavernn_test training",
-    
+
 // AUDIO PARAMETERS
    "audio": {
        "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame.
@ -29,7 +29,7 @@
        "clip_norm": true, // clip normalized values into the range.
        "stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored
    },
-    
+
 // Generating / Synthesizing
    "batched": true,
    "target_samples": 11000, // target number of samples to be generated in each batch entry
@ -39,11 +39,11 @@
    //     "backend": "nccl",
    //     "url": "tcp:\/\/localhost:54321"
    // },
-    
-// MODEL MODE 
-    "mode": 10, // mold [string], gauss [string], bits [int]
+
+// MODEL MODE
+    "mode": "mold", // mold [string], gauss [string], bits [int]
    "mulaw": true, // apply mulaw if mode is bits
-    
+
 // MODEL PARAMETERS
    "wavernn_model_params": {
        "rnn_dims": 512,
@ -55,14 +55,14 @@
        "use_upsample_net": true,
        "upsample_factors": [4, 8, 8] 	// this needs to correctly factorise hop_length
    },
-    
+
 // DATASET
    //"use_gta": true,								// use computed gta features from the tts model
-    "data_path": "/media/alexander/LinuxFS/SpeechData/GothicSpeech/NPC_Speech", // path containing training wav files
+    "data_path": "/home/erogol/Data/LJSpeech-1.1/wavs/", // path containing training wav files
    "feature_path": null, // path containing computed features from wav files if null compute them
    "seq_len": 1280, // has to be devideable by hop_length
    "padding": 2, // pad the input for resnet to see wider input length
-    
+
 // TRAINING
    "batch_size": 64, // Batch size for training.
    "epochs": 10000, // total number of epochs to train.
@ -70,7 +70,7 @@
 // VALIDATION
    "run_eval": true,
    "test_every_epochs": 10, // Test after set number of epochs (Test every 10 epochs for example)
-    
+
 // OPTIMIZER
    "grad_clip": 4, // apply gradient clipping if > 0
    "lr_scheduler": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
@ -79,19 +79,19 @@
        "milestones": [200000, 400000, 600000]
    },
    "lr": 1e-4, // initial learning rate
-    
+
 // TENSORBOARD and LOGGING
    "print_step": 25, // Number of steps to log traning on console.
    "print_eval": false, // If True, it prints loss values for each step in eval run.
    "save_step": 25000, // Number of training steps expected to plot training stats on TB and save model checkpoints.
    "checkpoint": true, // If true, it saves checkpoints per "save_step"
    "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging.
-    
+
 // DATA LOADING
    "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values.
    "num_val_loader_workers": 4, // number of evaluation data loader processes.
-    "eval_split_size": 50, // number of samples for testing	
-    
+    "eval_split_size": 50, // number of samples for testing
+
 // PATHS
-    "output_path": "output/training/path"
+    "output_path": "/home/erogol/Models/LJSpeech/"
 }