small updates

This commit is contained in:
erogol 2020-10-26 17:16:16 +01:00
parent 2ee47e9568
commit 0becef4b58
2 changed files with 15 additions and 16 deletions

View File

@ -95,7 +95,6 @@ def train(model, optimizer, criterion, scheduler, ap, global_step, epoch):
end_time = time.time()
c_logger.print_train_start()
# train loop
print(" > Training", flush=True)
for num_iter, data in enumerate(data_loader):
start_time = time.time()
x_input, mels, y_coarse = format_data(data)

View File

@ -1,7 +1,7 @@
{
"run_name": "wavernn_test",
"run_description": "wavernn_test training",
// AUDIO PARAMETERS
"audio": {
"fft_size": 1024, // number of stft frequency levels. Size of the linear spectogram frame.
@ -29,7 +29,7 @@
"clip_norm": true, // clip normalized values into the range.
"stats_path": null // DO NOT USE WITH MULTI_SPEAKER MODEL. scaler stats file computed by 'compute_statistics.py'. If it is defined, mean-std based notmalization is used and other normalization params are ignored
},
// Generating / Synthesizing
"batched": true,
"target_samples": 11000, // target number of samples to be generated in each batch entry
@ -39,11 +39,11 @@
// "backend": "nccl",
// "url": "tcp:\/\/localhost:54321"
// },
// MODEL MODE
"mode": 10, // mold [string], gauss [string], bits [int]
// MODEL MODE
"mode": "mold", // mold [string], gauss [string], bits [int]
"mulaw": true, // apply mulaw if mode is bits
// MODEL PARAMETERS
"wavernn_model_params": {
"rnn_dims": 512,
@ -55,14 +55,14 @@
"use_upsample_net": true,
"upsample_factors": [4, 8, 8] // this needs to correctly factorise hop_length
},
// DATASET
//"use_gta": true, // use computed gta features from the tts model
"data_path": "/media/alexander/LinuxFS/SpeechData/GothicSpeech/NPC_Speech", // path containing training wav files
"data_path": "/home/erogol/Data/LJSpeech-1.1/wavs/", // path containing training wav files
"feature_path": null, // path containing computed features from wav files if null compute them
"seq_len": 1280, // has to be devideable by hop_length
"padding": 2, // pad the input for resnet to see wider input length
// TRAINING
"batch_size": 64, // Batch size for training.
"epochs": 10000, // total number of epochs to train.
@ -70,7 +70,7 @@
// VALIDATION
"run_eval": true,
"test_every_epochs": 10, // Test after set number of epochs (Test every 10 epochs for example)
// OPTIMIZER
"grad_clip": 4, // apply gradient clipping if > 0
"lr_scheduler": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
@ -79,19 +79,19 @@
"milestones": [200000, 400000, 600000]
},
"lr": 1e-4, // initial learning rate
// TENSORBOARD and LOGGING
"print_step": 25, // Number of steps to log traning on console.
"print_eval": false, // If True, it prints loss values for each step in eval run.
"save_step": 25000, // Number of training steps expected to plot training stats on TB and save model checkpoints.
"checkpoint": true, // If true, it saves checkpoints per "save_step"
"tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging.
// DATA LOADING
"num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values.
"num_val_loader_workers": 4, // number of evaluation data loader processes.
"eval_split_size": 50, // number of samples for testing
"eval_split_size": 50, // number of samples for testing
// PATHS
"output_path": "output/training/path"
"output_path": "/home/erogol/Models/LJSpeech/"
}