mirror of https://github.com/coqui-ai/TTS.git
Update model test configs
This commit is contained in:
parent
acd96a4940
commit
76e590f33c
|
@ -123,7 +123,7 @@
|
||||||
"text_cleaner": "english_cleaners",
|
"text_cleaner": "english_cleaners",
|
||||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||||
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
||||||
"num_val_loader_workers": 0, // number of evaluation data loader processes.
|
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
|
||||||
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
|
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
|
||||||
"min_seq_len": 2, // DATASET-RELATED: minimum text length to use in training
|
"min_seq_len": 2, // DATASET-RELATED: minimum text length to use in training
|
||||||
"max_seq_len": 300, // DATASET-RELATED: maximum text length
|
"max_seq_len": 300, // DATASET-RELATED: maximum text length
|
||||||
|
@ -140,8 +140,8 @@
|
||||||
|
|
||||||
// MULTI-SPEAKER and GST
|
// MULTI-SPEAKER and GST
|
||||||
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
|
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
|
||||||
"use_external_speaker_embedding_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
|
"use_d_vector_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
|
||||||
"external_speaker_embedding_file": "/home/erogol/Data/libritts/speakers.json", // if not null and use_external_speaker_embedding_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
|
"d_vector_file": "/home/erogol/Data/libritts/speakers.json", // if not null and use_d_vector_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
|
||||||
|
|
||||||
|
|
||||||
// DATASETS
|
// DATASETS
|
||||||
|
|
|
@ -115,7 +115,7 @@
|
||||||
"text_cleaner": "phoneme_cleaners",
|
"text_cleaner": "phoneme_cleaners",
|
||||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||||
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
||||||
"num_val_loader_workers": 0, // number of evaluation data loader processes.
|
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
|
||||||
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
|
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
|
||||||
"min_seq_len": 3, // DATASET-RELATED: minimum text length to use in training
|
"min_seq_len": 3, // DATASET-RELATED: minimum text length to use in training
|
||||||
"max_seq_len": 500, // DATASET-RELATED: maximum text length
|
"max_seq_len": 500, // DATASET-RELATED: maximum text length
|
||||||
|
@ -132,8 +132,8 @@
|
||||||
"phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages
|
"phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages
|
||||||
|
|
||||||
// MULTI-SPEAKER and GST
|
// MULTI-SPEAKER and GST
|
||||||
"use_external_speaker_embedding_file": false,
|
"use_d_vector_file": false,
|
||||||
"external_speaker_embedding_file": null,
|
"d_vector_file": null,
|
||||||
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
|
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
|
||||||
|
|
||||||
// DATASETS
|
// DATASETS
|
||||||
|
|
|
@ -120,7 +120,7 @@
|
||||||
"text_cleaner": "english_cleaners",
|
"text_cleaner": "english_cleaners",
|
||||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||||
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
||||||
"num_val_loader_workers": 0, // number of evaluation data loader processes.
|
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
|
||||||
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
|
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
|
||||||
"min_seq_len": 2, // DATASET-RELATED: minimum text length to use in training
|
"min_seq_len": 2, // DATASET-RELATED: minimum text length to use in training
|
||||||
"max_seq_len": 300, // DATASET-RELATED: maximum text length
|
"max_seq_len": 300, // DATASET-RELATED: maximum text length
|
||||||
|
@ -137,8 +137,8 @@
|
||||||
|
|
||||||
// MULTI-SPEAKER and GST
|
// MULTI-SPEAKER and GST
|
||||||
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
|
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
|
||||||
"use_external_speaker_embedding_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
|
"use_d_vector_file": false, // if true, forces the model to use external embedding per sample instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
|
||||||
"external_speaker_embedding_file": "/home/erogol/Data/libritts/speakers.json", // if not null and use_external_speaker_embedding_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
|
"d_vector_file": "/home/erogol/Data/libritts/speakers.json", // if not null and use_d_vector_file is true, it is used to load a specific embedding file and thus uses these embeddings instead of nn.embeddings, that is, it supports external embeddings such as those used at: https://arxiv.org/abs /1806.04558
|
||||||
|
|
||||||
|
|
||||||
// DATASETS
|
// DATASETS
|
||||||
|
|
|
@ -130,7 +130,7 @@
|
||||||
"text_cleaner": "phoneme_cleaners",
|
"text_cleaner": "phoneme_cleaners",
|
||||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||||
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
||||||
"num_val_loader_workers": 0, // number of evaluation data loader processes.
|
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
|
||||||
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
|
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
|
||||||
"min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training
|
"min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training
|
||||||
"max_seq_len": 153, // DATASET-RELATED: maximum text length
|
"max_seq_len": 153, // DATASET-RELATED: maximum text length
|
||||||
|
@ -145,8 +145,8 @@
|
||||||
"phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages
|
"phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages
|
||||||
|
|
||||||
// MULTI-SPEAKER and GST
|
// MULTI-SPEAKER and GST
|
||||||
"use_external_speaker_embedding_file": false,
|
"use_d_vector_file": false,
|
||||||
"external_speaker_embedding_file": null,
|
"d_vector_file": null,
|
||||||
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
|
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
|
||||||
"use_gst": true, // use global style tokens
|
"use_gst": true, // use global style tokens
|
||||||
"gst": { // gst parameter if gst is enabled
|
"gst": { // gst parameter if gst is enabled
|
||||||
|
|
|
@ -130,7 +130,7 @@
|
||||||
"text_cleaner": "phoneme_cleaners",
|
"text_cleaner": "phoneme_cleaners",
|
||||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||||
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
||||||
"num_val_loader_workers": 0, // number of evaluation data loader processes.
|
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
|
||||||
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
|
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
|
||||||
"min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training
|
"min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training
|
||||||
"max_seq_len": 153, // DATASET-RELATED: maximum text length
|
"max_seq_len": 153, // DATASET-RELATED: maximum text length
|
||||||
|
@ -145,8 +145,8 @@
|
||||||
"phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages
|
"phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages
|
||||||
|
|
||||||
// MULTI-SPEAKER and GST
|
// MULTI-SPEAKER and GST
|
||||||
"use_external_speaker_embedding_file": false,
|
"use_d_vector_file": false,
|
||||||
"external_speaker_embedding_file": null,
|
"d_vector_file": null,
|
||||||
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
|
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
|
||||||
"use_gst": true, // use global style tokens
|
"use_gst": true, // use global style tokens
|
||||||
"gst": { // gst parameter if gst is enabled
|
"gst": { // gst parameter if gst is enabled
|
||||||
|
|
|
@ -130,7 +130,7 @@
|
||||||
"text_cleaner": "phoneme_cleaners",
|
"text_cleaner": "phoneme_cleaners",
|
||||||
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
"enable_eos_bos_chars": false, // enable/disable beginning of sentence and end of sentence chars.
|
||||||
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
||||||
"num_val_loader_workers": 0, // number of evaluation data loader processes.
|
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
|
||||||
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
|
"batch_group_size": 0, //Number of batches to shuffle after bucketing.
|
||||||
"min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training
|
"min_seq_len": 6, // DATASET-RELATED: minimum text length to use in training
|
||||||
"max_seq_len": 153, // DATASET-RELATED: maximum text length
|
"max_seq_len": 153, // DATASET-RELATED: maximum text length
|
||||||
|
@ -145,8 +145,8 @@
|
||||||
"phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages
|
"phoneme_language": "en-us", // depending on your target language, pick one from https://github.com/bootphon/phonemizer#languages
|
||||||
|
|
||||||
// MULTI-SPEAKER and GST
|
// MULTI-SPEAKER and GST
|
||||||
"use_external_speaker_embedding_file": false,
|
"use_d_vector_file": false,
|
||||||
"external_speaker_embedding_file": null,
|
"d_vector_file": null,
|
||||||
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
|
"use_speaker_embedding": false, // use speaker embedding to enable multi-speaker learning.
|
||||||
"use_gst": true, // use global style tokens
|
"use_gst": true, // use global style tokens
|
||||||
"gst": { // gst parameter if gst is enabled
|
"gst": { // gst parameter if gst is enabled
|
||||||
|
|
|
@ -157,7 +157,7 @@
|
||||||
|
|
||||||
// DATA LOADING
|
// DATA LOADING
|
||||||
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
||||||
"num_val_loader_workers": 0, // number of evaluation data loader processes.
|
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
|
||||||
"eval_split_size": 10,
|
"eval_split_size": 10,
|
||||||
|
|
||||||
// PATHS
|
// PATHS
|
||||||
|
|
|
@ -88,7 +88,7 @@
|
||||||
|
|
||||||
// OPTIMIZER
|
// OPTIMIZER
|
||||||
"epochs": 1, // total number of epochs to train.
|
"epochs": 1, // total number of epochs to train.
|
||||||
"clip_grad": 1.0, // Generator gradient clipping threshold. Apply gradient clipping if > 0
|
"grad_clip": 1.0, // Generator gradient clipping threshold. Apply gradient clipping if > 0
|
||||||
"lr_scheduler": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
"lr_scheduler": "MultiStepLR", // one of the schedulers from https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
|
||||||
"lr_scheduler_params": {
|
"lr_scheduler_params": {
|
||||||
"gamma": 0.5,
|
"gamma": 0.5,
|
||||||
|
@ -107,7 +107,7 @@
|
||||||
|
|
||||||
// DATA LOADING
|
// DATA LOADING
|
||||||
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
||||||
"num_val_loader_workers": 0, // number of evaluation data loader processes.
|
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
|
||||||
"eval_split_size": 4,
|
"eval_split_size": 4,
|
||||||
|
|
||||||
// PATHS
|
// PATHS
|
||||||
|
|
|
@ -55,7 +55,7 @@
|
||||||
"padding": 2, // pad the input for resnet to see wider input length
|
"padding": 2, // pad the input for resnet to see wider input length
|
||||||
|
|
||||||
// GENERATOR - for backward compatibility
|
// GENERATOR - for backward compatibility
|
||||||
"generator_model": "WaveRNN",
|
"generator_model": "Wavernn",
|
||||||
|
|
||||||
// DATASET
|
// DATASET
|
||||||
//"use_gta": true, // use computed gta features from the tts model
|
//"use_gta": true, // use computed gta features from the tts model
|
||||||
|
@ -103,7 +103,7 @@
|
||||||
|
|
||||||
// DATA LOADING
|
// DATA LOADING
|
||||||
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
"num_loader_workers": 0, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
||||||
"num_val_loader_workers": 0, // number of evaluation data loader processes.
|
"num_eval_loader_workers": 0, // number of evaluation data loader processes.
|
||||||
"eval_split_size": 10, // number of samples for testing
|
"eval_split_size": 10, // number of samples for testing
|
||||||
|
|
||||||
// PATHS
|
// PATHS
|
||||||
|
|
Loading…
Reference in New Issue