From 9915220539af1c58dac2d1a93b25caeb8d898751 Mon Sep 17 00:00:00 2001 From: Eren Golge Date: Thu, 18 Apr 2019 17:34:54 +0200 Subject: [PATCH] config upate --- .compute | 10 +++++++--- config_cluster.json | 18 +++++++++--------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/.compute b/.compute index c559bcab..3e21a5bf 100644 --- a/.compute +++ b/.compute @@ -1,7 +1,11 @@ #!/bin/bash -ls ${SHARED_DIR}/data/ +# ls ${USER_DIR}/MozillaDataset/Mozilla/batch1/wavs_no_processing +yes | apt-get install sox +soxi /data/rw/home/MozillaDataset/Mozilla/batch18/wavs_no_processing/18_167.wav pip3 install https://download.pytorch.org/whl/cu100/torch-1.0.1.post2-cp36-cp36m-linux_x86_64.whl yes | apt-get install espeak python3 setup.py develop -# python3 distribute.py --config_path config_cluster.json --data_path ${SHARED_DIR}/data/Blizzard/Nancy/ --restore_path ${USER_DIR}/best_model_4467.pth.tar -python3 distribute.py --config_path config_cluster.json --data_path ${USER_DIR}/Mozilla/ \ No newline at end of file +# wget https://www.dropbox.com/s/evaouukiwb7krz8/MozillaDataset.tar.gz?dl=0 -O ${USER_DIR}/MozillaDataset.tar.gz +# tar -xzvf ${USER_DIR}/MozillaDataset.tar.gz --no-same-owner -C ${USER_DIR} +# python3 distribute.py --config_path config_cluster.json --data_path ${USER_DIR}/MozillaDataset/Mozilla/ --restore_path ${USER_DIR}/best_model_4583.pth.tar +python3 distribute.py --config_path config_cluster.json --data_path ${USER_DIR}/MozillaDataset/Mozilla/ \ No newline at end of file diff --git a/config_cluster.json b/config_cluster.json index 59d9f52f..47e9569e 100644 --- a/config_cluster.json +++ b/config_cluster.json @@ -1,6 +1,6 @@ { "run_name": "mozilla-fattn", - "run_description": "Finetune 4583, Mozilla with 0 batch group size and fattn", + "run_description": "Finetune 4583, Mozilla with 4 batch group size and fattn, batch size 16->24", "audio":{ // Audio processing parameters @@ -40,13 +40,13 @@ "windowing": false, // Enables attention windowing. Used only in eval mode. "memory_size": 5, // ONLY TACOTRON - memory queue size used to queue network predictions to feed autoregressive connection. Useful if r < 5. "attention_norm": "softmax", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron. - "prenet_type": "original", // ONLY TACOTRON2 - "original" or "bn". + "prenet_type": "bn", // ONLY TACOTRON2 - "original" or "bn". "use_forward_attn": true, // ONLY TACOTRON2 - if it uses forward attention. In general, it aligns faster. "transition_agent": false, // ONLY TACOTRON2 - enable/disable transition agent of forward attention. "loss_masking": false, // enable / disable loss masking against the sequence padding. "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. - "batch_size": 16, // Batch size for training. Lower values than 32 might cause hard to learn attention. + "batch_size": 2, // Batch size for training. Lower values than 32 might cause hard to learn attention. "eval_batch_size":16, "r": 1, // Number of frames to predict for step. "wd": 0.000001, // Weight decay weight. @@ -54,10 +54,10 @@ "save_step": 1000, // Number of training steps expected to save traning stats and checkpoints. "print_step": 10, // Number of steps to log traning on console. "tb_model_param_stats": true, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. - "batch_group_size": 0, //Number of batches to shuffle after bucketing. + "batch_group_size": 4, //Number of batches to shuffle after bucketing. "run_eval": false, - "test_delay_epochs": 10, //Until attention is aligned, testing only wastes computation time. + "test_delay_epochs": 5, //Until attention is aligned, testing only wastes computation time. "data_path": "/media/erogol/data_ssd/Data/LJSpeech-1.1", // DATASET-RELATED: can overwritten from command argument "meta_file_train": "prompts_train.data", // DATASET-RELATED: metafile for training dataloader. "meta_file_val": "prompts_val.data", // DATASET-RELATED: metafile for evaluation dataloader. @@ -65,10 +65,10 @@ "min_seq_len": 0, // DATASET-RELATED: minimum text length to use in training "max_seq_len": 150, // DATASET-RELATED: maximum text length "output_path": "../keep/", // DATASET-RELATED: output path for all training outputs. - "num_loader_workers": 8, // number of training data loader processes. Don't set it too big. 4-8 are good values. - "num_val_loader_workers": 4, // number of evaluation data loader processes. - "phoneme_cache_path": "nancy_us_phonemes", // phoneme computation is slow, therefore, it caches results in the given folder. + "num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values. + "num_val_loader_workers": 0, // number of evaluation data loader processes. + "phoneme_cache_path": "mozilla_us_phonemes", // phoneme computation is slow, therefore, it caches results in the given folder. "use_phonemes": true, // use phonemes instead of raw characters. It is suggested for better pronounciation. "phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages "text_cleaner": "phoneme_cleaners" -} \ No newline at end of file +}