From 0213e1cbf424f1b5ed1299a47c9d24062e1410df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eren=20G=C3=B6lge?= <egolge@coqui.ai>
Date: Wed, 12 May 2021 00:56:25 +0200
Subject: [PATCH] update configs for tts models to match the field typed with
 the expected values

---
 TTS/config/shared_configs.py            | 4 ++--
 TTS/tts/configs/tacotron_config.py      | 2 +-
 tests/inputs/test_tacotron2_config.json | 2 +-
 tests/inputs/test_tacotron_config.json  | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/TTS/config/shared_configs.py b/TTS/config/shared_configs.py
index b10cc9bf..153b3279 100644
--- a/TTS/config/shared_configs.py
+++ b/TTS/config/shared_configs.py
@@ -1,5 +1,5 @@
 from dataclasses import asdict, dataclass
-
+from typing import List, Union
 from coqpit import MISSING, Coqpit, check_argument
 
 
@@ -137,7 +137,7 @@ class BaseAudioConfig(Coqpit):
 class BaseDatasetConfig(Coqpit):
     name: str = None
     path: str = None
-    meta_file_train: str = None
+    meta_file_train: Union[str, List] = None  # TODO: don't take ignored speakers for multi-speaker datasets over this. This is Union for SC-Glow compat.
     meta_file_val: str = None
     meta_file_attn_mask: str = None
 
diff --git a/TTS/tts/configs/tacotron_config.py b/TTS/tts/configs/tacotron_config.py
index 6f08e89f..5c509927 100644
--- a/TTS/tts/configs/tacotron_config.py
+++ b/TTS/tts/configs/tacotron_config.py
@@ -14,7 +14,7 @@ class TacotronConfig(BaseTTSConfig):
     gst_style_input: str = None
     # model specific params
     r: int = 2
-    gradual_training: List = None
+    gradual_training: List[List] = None
     memory_size: int = -1
     prenet_type: str = "original"
     prenet_dropout: bool = True
diff --git a/tests/inputs/test_tacotron2_config.json b/tests/inputs/test_tacotron2_config.json
index 779f925d..2bf1f840 100644
--- a/tests/inputs/test_tacotron2_config.json
+++ b/tests/inputs/test_tacotron2_config.json
@@ -64,7 +64,7 @@
     "batch_size": 1,       // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
     "eval_batch_size":1,
     "r": 7,                 // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled.
-    "gradual_training": [[0, 7, 4]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
+    "gradual_training": [[0, 7, 4], [1, 5, 2]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
     "loss_masking": true,         // enable / disable loss masking against the sequence padding.
     "ga_alpha": 10.0,        // weight for guided attention loss. If > 0, guided attention is enabled.
     "mixed_precision": false,
diff --git a/tests/inputs/test_tacotron_config.json b/tests/inputs/test_tacotron_config.json
index a2fdd690..12da4762 100644
--- a/tests/inputs/test_tacotron_config.json
+++ b/tests/inputs/test_tacotron_config.json
@@ -64,7 +64,7 @@
     "batch_size": 1,       // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
     "eval_batch_size":1,
     "r": 7,                 // Number of decoder frames to predict per iteration. Set the initial values if gradual training is enabled.
-    "gradual_training": [[0, 7, 4]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
+    "gradual_training": [[0, 7, 4], [1, 5, 2]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
     "loss_masking": true,         // enable / disable loss masking against the sequence padding.
     "ga_alpha": 10.0,        // weight for guided attention loss. If > 0, guided attention is enabled.
     "mixed_precision": false,