mirror of https://github.com/coqui-ai/TTS.git
config fixes and enable graves attention wq
This commit is contained in:
parent
9809fda144
commit
2996d63145
20
config.json
20
config.json
|
@ -23,8 +23,8 @@
|
||||||
"clip_norm": true, // clip normalized values into the range.
|
"clip_norm": true, // clip normalized values into the range.
|
||||||
"mel_fmin": 0.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!!
|
"mel_fmin": 0.0, // minimum freq level for mel-spec. ~50 for male and ~95 for female voices. Tune for dataset!!
|
||||||
"mel_fmax": 8000.0, // maximum freq level for mel-spec. Tune for dataset!!
|
"mel_fmax": 8000.0, // maximum freq level for mel-spec. Tune for dataset!!
|
||||||
"do_trim_silence": true // enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true)
|
"do_trim_silence": true, // enable trimming of slience of audio as you load it. LJspeech (false), TWEB (false), Nancy (true)
|
||||||
"trim_db": 60, // threshold for timming silence. Set this according to your dataset.
|
"trim_db": 60 // threshold for timming silence. Set this according to your dataset.
|
||||||
},
|
},
|
||||||
|
|
||||||
// DISTRIBUTED TRAINING
|
// DISTRIBUTED TRAINING
|
||||||
|
@ -62,14 +62,14 @@
|
||||||
"prenet_dropout": true, // enable/disable dropout at prenet.
|
"prenet_dropout": true, // enable/disable dropout at prenet.
|
||||||
|
|
||||||
// ATTENTION
|
// ATTENTION
|
||||||
"attention_type": "original", // 'original' or 'graves'
|
"attention_type": "graves", // 'original' or 'graves'
|
||||||
"attention_heads": 5, // number of attention heads (only for 'graves')
|
"attention_heads": 4, // number of attention heads (only for 'graves')
|
||||||
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
"attention_norm": "sigmoid", // softmax or sigmoid. Suggested to use softmax for Tacotron2 and sigmoid for Tacotron.
|
||||||
"windowing": false, // Enables attention windowing. Used only in eval mode.
|
"windowing": false, // Enables attention windowing. Used only in eval mode.
|
||||||
"use_forward_attn": false, // if it uses forward attention. In general, it aligns faster.
|
"use_forward_attn": false, // if it uses forward attention. In general, it aligns faster.
|
||||||
"forward_attn_mask": false, // Additional masking forcing monotonicity only in eval mode.
|
"forward_attn_mask": false, // Additional masking forcing monotonicity only in eval mode.
|
||||||
"transition_agent": false, // enable/disable transition agent of forward attention.
|
"transition_agent": false, // enable/disable transition agent of forward attention.
|
||||||
"location_attn": true, // enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
"location_attn": false, // enable_disable location sensitive attention. It is enabled for TACOTRON by default.
|
||||||
"bidirectional_decoder": false, // use https://arxiv.org/abs/1907.09006. Use it, if attention does not work well with your dataset.
|
"bidirectional_decoder": false, // use https://arxiv.org/abs/1907.09006. Use it, if attention does not work well with your dataset.
|
||||||
|
|
||||||
// STOPNET
|
// STOPNET
|
||||||
|
@ -92,8 +92,8 @@
|
||||||
"max_seq_len": 150, // DATASET-RELATED: maximum text length
|
"max_seq_len": 150, // DATASET-RELATED: maximum text length
|
||||||
|
|
||||||
// PATHS
|
// PATHS
|
||||||
"output_path": "/data5/rw/pit/keep/", // DATASET-RELATED: output path for all training outputs.
|
// "output_path": "/data5/rw/pit/keep/", // DATASET-RELATED: output path for all training outputs.
|
||||||
// "output_path": "/media/erogol/data_ssd/Models/runs/",
|
"output_path": "/home/erogol/Models/LJSpeech/",
|
||||||
|
|
||||||
// PHONEMES
|
// PHONEMES
|
||||||
"phoneme_cache_path": "mozilla_us_phonemes", // phoneme computation is slow, therefore, it caches results in the given folder.
|
"phoneme_cache_path": "mozilla_us_phonemes", // phoneme computation is slow, therefore, it caches results in the given folder.
|
||||||
|
@ -110,10 +110,10 @@
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"name": "ljspeech",
|
"name": "ljspeech",
|
||||||
"path": "/root/LJSpeech-1.1/",
|
"path": "/home/erogol/Data/LJSpeech-1.1/",
|
||||||
// "path": "/home/erogol/Data/LJSpeech-1.1",
|
// "path": "/home/erogol/Data/LJSpeech-1.1",
|
||||||
"meta_file_train": "metadata_train.csv",
|
"meta_file_train": "metadata.csv",
|
||||||
"meta_file_val": "metadata_val.csv"
|
"meta_file_val": null
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue