diff --git a/tests/inputs/test_align_tts.json b/tests/inputs/test_align_tts.json index 964cc66d..a0d677ad 100644 --- a/tests/inputs/test_align_tts.json +++ b/tests/inputs/test_align_tts.json @@ -123,7 +123,7 @@ "text_cleaner": "english_cleaners", "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. "num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values. - "num_val_loader_workers": 0, // number of evaluation data loader processes. + "num_eval_loader_workers": 0, // number of evaluation data loader processes. "batch_group_size": 0, //Number of batches to shuffle after bucketing. "min_seq_len": 2, // DATASET-RELATED: minimum text length to use in training "max_seq_len": 300, // DATASET-RELATED: maximum text length @@ -140,8 +140,8 @@ // MULTI-SPEAKER and GST "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. - "use_external_speaker_embedding_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 - "external_speaker_embedding_file": "/home/erogol/Data/libritts/speakers.json", // if not null and use_external_speaker_embedding_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 + "use_d_vector_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 + "d_vector_file": "/home/erogol/Data/libritts/speakers.json", // if not null and use_d_vector_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 // DATASETS diff --git a/tests/inputs/test_glow_tts.json b/tests/inputs/test_glow_tts.json index 64cc3822..6dd86057 100644 --- a/tests/inputs/test_glow_tts.json +++ b/tests/inputs/test_glow_tts.json @@ -115,7 +115,7 @@ "text_cleaner": "phoneme_cleaners", "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. "num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values. - "num_val_loader_workers": 0, // number of evaluation data loader processes. + "num_eval_loader_workers": 0, // number of evaluation data loader processes. "batch_group_size": 0, //Number of batches to shuffle after bucketing. "min_seq_len": 3, // DATASET-RELATED: minimum text length to use in training "max_seq_len": 500, // DATASET-RELATED: maximum text length @@ -132,8 +132,8 @@ "phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages // MULTI-SPEAKER and GST - "use_external_speaker_embedding_file": false, - "external_speaker_embedding_file": null, + "use_d_vector_file": false, + "d_vector_file": null, "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. // DATASETS diff --git a/tests/inputs/test_speedy_speech.json b/tests/inputs/test_speedy_speech.json index a29fc992..02783d21 100644 --- a/tests/inputs/test_speedy_speech.json +++ b/tests/inputs/test_speedy_speech.json @@ -120,7 +120,7 @@ "text_cleaner": "english_cleaners", "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. "num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values. - "num_val_loader_workers": 0, // number of evaluation data loader processes. + "num_eval_loader_workers": 0, // number of evaluation data loader processes. "batch_group_size": 0, //Number of batches to shuffle after bucketing. "min_seq_len": 2, // DATASET-RELATED: minimum text length to use in training "max_seq_len": 300, // DATASET-RELATED: maximum text length @@ -137,8 +137,8 @@ // MULTI-SPEAKER and GST "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. - "use_external_speaker_embedding_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 - "external_speaker_embedding_file": "/home/erogol/Data/libritts/speakers.json", // if not null and use_external_speaker_embedding_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 + "use_d_vector_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 + "d_vector_file": "/home/erogol/Data/libritts/speakers.json", // if not null and use_d_vector_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558 // DATASETS diff --git a/tests/inputs/test_tacotron2_config.json b/tests/inputs/test_tacotron2_config.json index cc2c1bb5..6c82891d 100644 --- a/tests/inputs/test_tacotron2_config.json +++ b/tests/inputs/test_tacotron2_config.json @@ -130,7 +130,7 @@ "text_cleaner": "phoneme_cleaners", "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. "num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values. - "num_val_loader_workers": 0, // number of evaluation data loader processes. + "num_eval_loader_workers": 0, // number of evaluation data loader processes. "batch_group_size": 0, //Number of batches to shuffle after bucketing. "min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training "max_seq_len": 153, // DATASET-RELATED: maximum text length @@ -145,8 +145,8 @@ "phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages // MULTI-SPEAKER and GST - "use_external_speaker_embedding_file": false, - "external_speaker_embedding_file": null, + "use_d_vector_file": false, + "d_vector_file": null, "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. "use_gst": true, // use global style tokens "gst": { // gst parameter if gst is enabled diff --git a/tests/inputs/test_tacotron_bd_config.json b/tests/inputs/test_tacotron_bd_config.json index 9d2935aa..fbf3c001 100644 --- a/tests/inputs/test_tacotron_bd_config.json +++ b/tests/inputs/test_tacotron_bd_config.json @@ -130,7 +130,7 @@ "text_cleaner": "phoneme_cleaners", "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. "num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values. - "num_val_loader_workers": 0, // number of evaluation data loader processes. + "num_eval_loader_workers": 0, // number of evaluation data loader processes. "batch_group_size": 0, //Number of batches to shuffle after bucketing. "min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training "max_seq_len": 153, // DATASET-RELATED: maximum text length @@ -145,8 +145,8 @@ "phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages // MULTI-SPEAKER and GST - "use_external_speaker_embedding_file": false, - "external_speaker_embedding_file": null, + "use_d_vector_file": false, + "d_vector_file": null, "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. "use_gst": true, // use global style tokens "gst": { // gst parameter if gst is enabled diff --git a/tests/inputs/test_tacotron_config.json b/tests/inputs/test_tacotron_config.json index c8fae623..b60ed35e 100644 --- a/tests/inputs/test_tacotron_config.json +++ b/tests/inputs/test_tacotron_config.json @@ -130,7 +130,7 @@ "text_cleaner": "phoneme_cleaners", "enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars. "num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values. - "num_val_loader_workers": 0, // number of evaluation data loader processes. + "num_eval_loader_workers": 0, // number of evaluation data loader processes. "batch_group_size": 0, //Number of batches to shuffle after bucketing. "min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training "max_seq_len": 153, // DATASET-RELATED: maximum text length @@ -145,8 +145,8 @@ "phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages // MULTI-SPEAKER and GST - "use_external_speaker_embedding_file": false, - "external_speaker_embedding_file": null, + "use_d_vector_file": false, + "d_vector_file": null, "use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning. "use_gst": true, // use global style tokens "gst": { // gst parameter if gst is enabled diff --git a/tests/inputs/test_vocoder_multiband_melgan_config.json b/tests/inputs/test_vocoder_multiband_melgan_config.json index 794a3fcc..b8b192e4 100644 --- a/tests/inputs/test_vocoder_multiband_melgan_config.json +++ b/tests/inputs/test_vocoder_multiband_melgan_config.json @@ -157,7 +157,7 @@ // DATA LOADING "num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values. - "num_val_loader_workers": 0, // number of evaluation data loader processes. + "num_eval_loader_workers": 0, // number of evaluation data loader processes. "eval_split_size": 10, // PATHS diff --git a/tests/inputs/test_vocoder_wavegrad.json b/tests/inputs/test_vocoder_wavegrad.json index f6208e8d..6378c07a 100644 --- a/tests/inputs/test_vocoder_wavegrad.json +++ b/tests/inputs/test_vocoder_wavegrad.json @@ -88,7 +88,7 @@ // OPTIMIZER "epochs": 1, // total number of epochs to train. - "clip_grad": 1.0, // Generator gradient clipping threshold. Apply gradient clipping if > 0 + "grad_clip": 1.0, // Generator gradient clipping threshold. Apply gradient clipping if > 0 "lr_scheduler": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate "lr_scheduler_params": { "gamma": 0.5, @@ -107,7 +107,7 @@ // DATA LOADING "num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values. - "num_val_loader_workers": 0, // number of evaluation data loader processes. + "num_eval_loader_workers": 0, // number of evaluation data loader processes. "eval_split_size": 4, // PATHS diff --git a/tests/inputs/test_vocoder_wavernn_config.json b/tests/inputs/test_vocoder_wavernn_config.json index decafa70..ee4e5f8e 100644 --- a/tests/inputs/test_vocoder_wavernn_config.json +++ b/tests/inputs/test_vocoder_wavernn_config.json @@ -55,7 +55,7 @@ "padding": 2, // pad the input for resnet to see wider input length // GENERATOR - for backward compatibility - "generator_model": "WaveRNN", + "generator_model": "Wavernn", // DATASET //"use_gta": true, // use computed gta features from the tts model @@ -103,7 +103,7 @@ // DATA LOADING "num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values. - "num_val_loader_workers": 0, // number of evaluation data loader processes. + "num_eval_loader_workers": 0, // number of evaluation data loader processes. "eval_split_size": 10, // number of samples for testing // PATHS