From 0becef4b58d34440737e65e37562b0b40d1d9054 Mon Sep 17 00:00:00 2001 From: erogol Date: Mon, 26 Oct 2020 17:16:16 +0100 Subject: [PATCH] small updates --- TTS/bin/train_wavernn_vocoder.py | 1 - TTS/vocoder/configs/wavernn_config.json | 30 ++++++++++++------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/TTS/bin/train_wavernn_vocoder.py b/TTS/bin/train_wavernn_vocoder.py index 61664a65..90e30256 100644 --- a/TTS/bin/train_wavernn_vocoder.py +++ b/TTS/bin/train_wavernn_vocoder.py @@ -95,7 +95,6 @@ def train(model, optimizer, criterion, scheduler, ap, global_step, epoch): end_time = time.time() c_logger.print_train_start() # train loop - print(" > Training", flush=True) for num_iter, data in enumerate(data_loader): start_time = time.time() x_input, mels, y_coarse = format_data(data) diff --git a/TTS/vocoder/configs/wavernn_config.json b/TTS/vocoder/configs/wavernn_config.json index 9a9fbdae..8f290b80 100644 --- a/TTS/vocoder/configs/wavernn_config.json +++ b/TTS/vocoder/configs/wavernn_config.json @@ -1,7 +1,7 @@ { "run_name": "wavernn_test", "run_description": "wavernn_test training", - + // AUDIO PARAMETERS "audio": { "fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame. @@ -29,7 +29,7 @@ "clip_norm": true, // clip normalized values into the range. "stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored }, - + // Generating / Synthesizing "batched": true, "target_samples": 11000, // target number of samples to be generated in each batch entry @@ -39,11 +39,11 @@ // "backend": "nccl", // "url": "tcp:\/\/localhost:54321" // }, - -// MODEL MODE - "mode": 10, // mold [string], gauss [string], bits [int] + +// MODEL MODE + "mode": "mold", // mold [string], gauss [string], bits [int] "mulaw": true, // apply mulaw if mode is bits - + // MODEL PARAMETERS "wavernn_model_params": { "rnn_dims": 512, @@ -55,14 +55,14 @@ "use_upsample_net": true, "upsample_factors": [4, 8, 8] // this needs to correctly factorise hop_length }, - + // DATASET //"use_gta": true, // use computed gta features from the tts model - "data_path": "/media/alexander/LinuxFS/SpeechData/GothicSpeech/NPC_Speech", // path containing training wav files + "data_path": "/home/erogol/Data/LJSpeech-1.1/wavs/", // path containing training wav files "feature_path": null, // path containing computed features from wav files if null compute them "seq_len": 1280, // has to be devideable by hop_length "padding": 2, // pad the input for resnet to see wider input length - + // TRAINING "batch_size": 64, // Batch size for training. "epochs": 10000, // total number of epochs to train. @@ -70,7 +70,7 @@ // VALIDATION "run_eval": true, "test_every_epochs": 10, // Test after set number of epochs (Test every 10 epochs for example) - + // OPTIMIZER "grad_clip": 4, // apply gradient clipping if > 0 "lr_scheduler": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate @@ -79,19 +79,19 @@ "milestones": [200000, 400000, 600000] }, "lr": 1e-4, // initial learning rate - + // TENSORBOARD and LOGGING "print_step": 25, // Number of steps to log traning on console. "print_eval": false, // If True, it prints loss values for each step in eval run. "save_step": 25000, // Number of training steps expected to plot training stats on TB and save model checkpoints. "checkpoint": true, // If true, it saves checkpoints per "save_step" "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. - + // DATA LOADING "num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values. "num_val_loader_workers": 4, // number of evaluation data loader processes. - "eval_split_size": 50, // number of samples for testing - + "eval_split_size": 50, // number of samples for testing + // PATHS - "output_path": "output/training/path" + "output_path": "/home/erogol/Models/LJSpeech/" }