update configs for tts models to match the field typed with the expected

values
This commit is contained in:
Eren Gölge 2021-05-12 00:56:25 +02:00
parent 8058aaa304
commit 0213e1cbf4
4 changed files with 5 additions and 5 deletions

View File

@ -1,5 +1,5 @@
from dataclasses import asdict, dataclass from dataclasses import asdict, dataclass
from typing import List, Union
from coqpit import MISSING, Coqpit, check_argument from coqpit import MISSING, Coqpit, check_argument
@ -137,7 +137,7 @@ class BaseAudioConfig(Coqpit):
class BaseDatasetConfig(Coqpit): class BaseDatasetConfig(Coqpit):
name: str = None name: str = None
path: str = None path: str = None
meta_file_train: str = None meta_file_train: Union[str, List] = None # TODO: don't take ignored speakers for multi-speaker datasets over this. This is Union for SC-Glow compat.
meta_file_val: str = None meta_file_val: str = None
meta_file_attn_mask: str = None meta_file_attn_mask: str = None

View File

@ -14,7 +14,7 @@ class TacotronConfig(BaseTTSConfig):
gst_style_input: str = None gst_style_input: str = None
# model specific params # model specific params
r: int = 2 r: int = 2
gradual_training: List = None gradual_training: List[List] = None
memory_size: int = -1 memory_size: int = -1
prenet_type: str = "original" prenet_type: str = "original"
prenet_dropout: bool = True prenet_dropout: bool = True

View File

@ -64,7 +64,7 @@
"batch_size": 1, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. "batch_size": 1, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
"eval_batch_size":1, "eval_batch_size":1,
"r": 7, // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled. "r": 7, // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled.
"gradual_training": [[0, 7, 4]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed. "gradual_training": [[0, 7, 4], [1, 5, 2]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
"loss_masking": true, // enable / disable loss masking against the sequence padding. "loss_masking": true, // enable / disable loss masking against the sequence padding.
"ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled. "ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled.
"mixed_precision": false, "mixed_precision": false,

View File

@ -64,7 +64,7 @@
"batch_size": 1, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'. "batch_size": 1, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
"eval_batch_size":1, "eval_batch_size":1,
"r": 7, // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled. "r": 7, // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled.
"gradual_training": [[0, 7, 4]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed. "gradual_training": [[0, 7, 4], [1, 5, 2]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
"loss_masking": true, // enable / disable loss masking against the sequence padding. "loss_masking": true, // enable / disable loss masking against the sequence padding.
"ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled. "ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled.
"mixed_precision": false, "mixed_precision": false,