update configs for tts models to match the field typed with the expected

values
2021-05-12 00:56:25 +02:00 · 2021-05-12 00:56:25 +02:00 · 0213e1cbf4
parent 8058aaa304
commit 0213e1cbf4
4 changed files with 5 additions and 5 deletions
--- a/TTS/config/shared_configs.py
+++ b/TTS/config/shared_configs.py
@ -1,5 +1,5 @@
 from dataclasses import asdict, dataclass
-
+from typing import List, Union
 from coqpit import MISSING, Coqpit, check_argument
@ -137,7 +137,7 @@ class BaseAudioConfig(Coqpit):
 class BaseDatasetConfig(Coqpit):
    name: str = None
    path: str = None
-    meta_file_train: str = None
+    meta_file_train: Union[str, List] = None  # TODO: don't take ignored speakers for multi-speaker datasets over this. This is Union for SC-Glow compat.
    meta_file_val: str = None
    meta_file_attn_mask: str = None
--- a/TTS/tts/configs/tacotron_config.py
+++ b/TTS/tts/configs/tacotron_config.py
@ -14,7 +14,7 @@ class TacotronConfig(BaseTTSConfig):
    gst_style_input: str = None
    # model specific params
    r: int = 2
-    gradual_training: List = None
+    gradual_training: List[List] = None
    memory_size: int = -1
    prenet_type: str = "original"
    prenet_dropout: bool = True
--- a/tests/inputs/test_tacotron2_config.json
+++ b/tests/inputs/test_tacotron2_config.json
@ -64,7 +64,7 @@
    "batch_size": 1,       // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
    "eval_batch_size":1,
    "r": 7,                 // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled.
-    "gradual_training": [[0, 7, 4]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
+    "gradual_training": [[0, 7, 4], [1, 5, 2]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
    "loss_masking": true,         // enable / disable loss masking against the sequence padding.
    "ga_alpha": 10.0,        // weight for guided attention loss. If > 0, guided attention is enabled.
    "mixed_precision": false,
--- a/tests/inputs/test_tacotron_config.json
+++ b/tests/inputs/test_tacotron_config.json
@ -64,7 +64,7 @@
    "batch_size": 1,       // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
    "eval_batch_size":1,
    "r": 7,                 // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled.
-    "gradual_training": [[0, 7, 4]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
+    "gradual_training": [[0, 7, 4], [1, 5, 2]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
    "loss_masking": true,         // enable / disable loss masking against the sequence padding.
    "ga_alpha": 10.0,        // weight for guided attention loss. If > 0, guided attention is enabled.
    "mixed_precision": false,