From 623ea41634a795c54d84dabb9ad6087f9619200f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eren=20G=C3=B6lge?= <erogol@hotmail.com>
Date: Thu, 14 Sep 2023 15:21:48 +0200
Subject: [PATCH] Fix model tests (#2943)

---
 TTS/.models.json                              |   5 +-
 TTS/config/__init__.py                        |   2 +-
 TTS/utils/manage.py                           |  27 +++
 TTS/utils/synthesizer.py                      |   2 +-
 .../multilingual/cml_yourtts/train_yourtts.py | 189 +++++-------------
 tests/zoo_tests/test_models.py                |  42 ++--
 6 files changed, 106 insertions(+), 161 deletions(-)

diff --git a/TTS/.models.json b/TTS/.models.json
index 07ef3902..1eaaab71 100644
--- a/TTS/.models.json
+++ b/TTS/.models.json
@@ -11,8 +11,9 @@
                     ],
                     "default_vocoder": null,
                     "commit": "e9a1953e",
-                    "license": "Coqui Community Model License",
-                    "contact": "info@coqui.ai"
+                    "license": "CPML",
+                    "contact": "info@coqui.ai",
+                    "tos_required": true
                 },
                 "your_tts": {
                     "description": "Your TTS model accompanying the paper https://arxiv.org/abs/2112.02418",
diff --git a/TTS/config/__init__.py b/TTS/config/__init__.py
index b9200cd0..25b4baef 100644
--- a/TTS/config/__init__.py
+++ b/TTS/config/__init__.py
@@ -41,6 +41,7 @@ def register_config(model_name: str) -> Coqpit:
     # TODO: fix this
     if model_name == "xtts":
         from TTS.tts.configs.xtts_config import XttsConfig
+
         config_class = XttsConfig
     paths = ["TTS.tts.configs", "TTS.vocoder.configs", "TTS.encoder.configs", "TTS.vc.configs"]
     for path in paths:
@@ -96,7 +97,6 @@ def load_config(config_path: str) -> Coqpit:
         raise TypeError(f" [!] Unknown config file type {ext}")
     config_dict.update(data)
     model_name = _process_model_name(config_dict)
-    breakpoint
     config_class = register_config(model_name.lower())
     config = config_class()
     config.from_dict(config_dict)
diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py
index ed48758f..79eed828 100644
--- a/TTS/utils/manage.py
+++ b/TTS/utils/manage.py
@@ -21,6 +21,7 @@ LICENSE_URLS = {
     "apache 2.0": "https://choosealicense.com/licenses/apache-2.0/",
     "apache2": "https://choosealicense.com/licenses/apache-2.0/",
     "cc-by-sa 4.0": "https://creativecommons.org/licenses/by-sa/4.0/",
+    "cpml": "https://coqui.ai/cpml.txt"
 }
 
 
@@ -295,6 +296,29 @@ class ModelManager(object):
         model_item = self.set_model_url(model_item)
         return model_item, model_full_name, model
 
+    def ask_tos(self, model_full_path):
+        """Ask the user to agree to the terms of service"""
+        tos_path = os.path.join(model_full_path, "tos_agreed.txt")
+        if not os.path.exists(tos_path):
+            print(" > You must agree to the terms of service to use this model.")
+            print(" | > Please see the terms of service at https://coqui.ai/cpml.txt")
+            print(' | > "I have read, understood and agreed the Terms and Conditions." - [y/n]')
+            answer = input(" | | > ")
+            if answer.lower() == "y":
+                with open(tos_path, "w") as f:
+                    f.write("I have read, understood ad agree the Terms and Conditions.")
+            else:
+                raise Exception("You must agree to the terms of service to use this model.")
+
+    def tos_agreed(self, model_item, model_full_path):
+        """Check if the user has agreed to the terms of service"""
+        if "tos_required" in model_item and model_item["tos_required"]:
+            tos_path = os.path.join(model_full_path, "tos_agreed.txt")
+            if os.path.exists(tos_path):
+                return True
+            return False
+        return True
+
     def download_model(self, model_name):
         """Download model files given the full model name.
         Model name is in the format
@@ -316,6 +340,9 @@ class ModelManager(object):
             print(f" > {model_name} is already downloaded.")
         else:
             os.makedirs(output_path, exist_ok=True)
+             # handle TOS
+            if not self.tos_agreed(model_item, output_path):
+                self.ask_tos(output_path)
             print(f" > Downloading model to {output_path}")
             try:
                 if "fairseq" in model_name:
diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py
index e6f35460..24a078f5 100644
--- a/TTS/utils/synthesizer.py
+++ b/TTS/utils/synthesizer.py
@@ -338,7 +338,7 @@ class Synthesizer(nn.Module):
 
             elif language_name and isinstance(language_name, str):
                 try:
-                    language_id = self.tts_model.language_manager.name_to_id[language_id]
+                    language_id = self.tts_model.language_manager.name_to_id[language_name]
                 except KeyError as e:
                     raise ValueError(
                         f" [!] Looks like you use a multi-lingual model. "
diff --git a/recipes/multilingual/cml_yourtts/train_yourtts.py b/recipes/multilingual/cml_yourtts/train_yourtts.py
index 8c4ec581..25a2fd0a 100644
--- a/recipes/multilingual/cml_yourtts/train_yourtts.py
+++ b/recipes/multilingual/cml_yourtts/train_yourtts.py
@@ -27,7 +27,7 @@ RUN_NAME = "YourTTS-CML-TTS"
 OUT_PATH = os.path.dirname(os.path.abspath(__file__))  # "/raid/coqui/Checkpoints/original-YourTTS/"
 
 # If you want to do transfer learning and speedup your training you can set here the path to the CML-TTS available checkpoint that cam be downloaded here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
-RESTORE_PATH = "/raid/edresson/CML_YourTTS/checkpoints_yourtts_cml_tts_dataset/best_model.pth" # Download the checkpoint here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
+RESTORE_PATH = "/raid/edresson/CML_YourTTS/checkpoints_yourtts_cml_tts_dataset/best_model.pth"  # Download the checkpoint here:  https://drive.google.com/u/2/uc?id=1yDCSJ1pFZQTHhL09GMbOrdjcPULApa0p
 
 # This paramter is useful to debug, it skips the training epochs and just do the evaluation  and produce the test sentences
 SKIP_TRAIN_EPOCH = False
@@ -47,7 +47,7 @@ MAX_AUDIO_LEN_IN_SECONDS = float("inf")
 CML_DATASET_PATH = "./datasets/CML-TTS-Dataset/"
 
 
-### Download LibriTTS dataset 
+### Download LibriTTS dataset
 # it will automatic download the dataset, if you have problems you can comment it and manually donwload and extract it ! Download link: https://www.openslr.org/resources/60/train-clean-360.tar.gz
 LIBRITTS_DOWNLOAD_PATH = "./datasets/LibriTTS/"
 # Check if LibriTTS dataset is not already downloaded, if not download it
@@ -62,7 +62,7 @@ libritts_config = BaseDatasetConfig(
     meta_file_train="",
     meta_file_val="",
     path=os.path.join(LIBRITTS_DOWNLOAD_PATH, "train-clean-360/"),
-    language="en"
+    language="en",
 )
 
 # init CML-TTS configs
@@ -71,8 +71,8 @@ pt_config = BaseDatasetConfig(
     dataset_name="cml_tts",
     meta_file_train="train.csv",
     meta_file_val="",
-    path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_portuguese_v0.1/"),
-    language="pt-br"
+    path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_portuguese_v0.1/"),
+    language="pt-br",
 )
 
 pl_config = BaseDatasetConfig(
@@ -80,8 +80,8 @@ pl_config = BaseDatasetConfig(
     dataset_name="cml_tts",
     meta_file_train="train.csv",
     meta_file_val="",
-    path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_polish_v0.1/"),
-    language="pl"
+    path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_polish_v0.1/"),
+    language="pl",
 )
 
 it_config = BaseDatasetConfig(
@@ -89,8 +89,8 @@ it_config = BaseDatasetConfig(
     dataset_name="cml_tts",
     meta_file_train="train.csv",
     meta_file_val="",
-    path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_italian_v0.1/"),
-    language="it"
+    path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_italian_v0.1/"),
+    language="it",
 )
 
 fr_config = BaseDatasetConfig(
@@ -98,8 +98,8 @@ fr_config = BaseDatasetConfig(
     dataset_name="cml_tts",
     meta_file_train="train.csv",
     meta_file_val="",
-    path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_french_v0.1/"),
-    language="fr"
+    path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_french_v0.1/"),
+    language="fr",
 )
 
 du_config = BaseDatasetConfig(
@@ -107,8 +107,8 @@ du_config = BaseDatasetConfig(
     dataset_name="cml_tts",
     meta_file_train="train.csv",
     meta_file_val="",
-    path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_dutch_v0.1/"),
-    language="du"
+    path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_dutch_v0.1/"),
+    language="du",
 )
 
 ge_config = BaseDatasetConfig(
@@ -116,8 +116,8 @@ ge_config = BaseDatasetConfig(
     dataset_name="cml_tts",
     meta_file_train="train.csv",
     meta_file_val="",
-    path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_german_v0.1/"),
-    language="ge"
+    path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_german_v0.1/"),
+    language="ge",
 )
 
 sp_config = BaseDatasetConfig(
@@ -125,8 +125,8 @@ sp_config = BaseDatasetConfig(
     dataset_name="cml_tts",
     meta_file_train="train.csv",
     meta_file_val="",
-    path=os.path.join(CML_DATASET_PATH,"cml_tts_dataset_spanish_v0.1/"),
-    language="sp"
+    path=os.path.join(CML_DATASET_PATH, "cml_tts_dataset_spanish_v0.1/"),
+    language="sp",
 )
 
 # Add here all datasets configs Note: If you want to add new datasets, just add them here and it will automatically compute the speaker embeddings (d-vectors) for this new dataset :)
@@ -247,150 +247,55 @@ config = VitsConfig(
     max_audio_len=SAMPLE_RATE * MAX_AUDIO_LEN_IN_SECONDS,
     mixed_precision=False,
     test_sentences=[
-        [
-            "Voc\u00ea ter\u00e1 a vista do topo da montanha que voc\u00ea escalar.",
-            "9351",
-            None,
-            "pt-br"
-        ],
-        [
-            "Quando voc\u00ea n\u00e3o corre nenhum risco, voc\u00ea arrisca tudo.",
-            "12249",
-            None,
-            "pt-br"
-        ],
+        ["Voc\u00ea ter\u00e1 a vista do topo da montanha que voc\u00ea escalar.", "9351", None, "pt-br"],
+        ["Quando voc\u00ea n\u00e3o corre nenhum risco, voc\u00ea arrisca tudo.", "12249", None, "pt-br"],
         [
             "S\u00e3o necess\u00e1rios muitos anos de trabalho para ter sucesso da noite para o dia.",
             "2961",
             None,
-            "pt-br"
-        ],
-        [
-            "You'll have the view of the top of the mountain that you climb.",
-            "LTTS_6574",
-            None,
-            "en"
-        ],
-        [
-            "When you don\u2019t take any risks, you risk everything.",
-            "LTTS_6206",
-            None,
-            "en"
-        ],
-        [
-            "Are necessary too many years of work to succeed overnight.",
-            "LTTS_5717",
-            None,
-            "en"
-        ],
-        [
-            "Je hebt uitzicht op de top van de berg die je beklimt.",
-            "960",
-            None,
-            "du"
-        ],
-        [
-            "Als je geen risico neemt, riskeer je alles.",
-            "2450",
-            None,
-            "du"
-        ],
-        [
-            "Zijn te veel jaren werk nodig om van de ene op de andere dag te slagen.",
-            "10984",
-            None,
-            "du"
-        ],
-        [
-            "Vous aurez la vue sur le sommet de la montagne que vous gravirez.",
-            "6381",
-            None,
-            "fr"
-        ],
-        [
-            "Quand tu ne prends aucun risque, tu risques tout.",
-            "2825",
-            None,
-            "fr"
+            "pt-br",
         ],
+        ["You'll have the view of the top of the mountain that you climb.", "LTTS_6574", None, "en"],
+        ["When you don\u2019t take any risks, you risk everything.", "LTTS_6206", None, "en"],
+        ["Are necessary too many years of work to succeed overnight.", "LTTS_5717", None, "en"],
+        ["Je hebt uitzicht op de top van de berg die je beklimt.", "960", None, "du"],
+        ["Als je geen risico neemt, riskeer je alles.", "2450", None, "du"],
+        ["Zijn te veel jaren werk nodig om van de ene op de andere dag te slagen.", "10984", None, "du"],
+        ["Vous aurez la vue sur le sommet de la montagne que vous gravirez.", "6381", None, "fr"],
+        ["Quand tu ne prends aucun risque, tu risques tout.", "2825", None, "fr"],
         [
             "Sont n\u00e9cessaires trop d'ann\u00e9es de travail pour r\u00e9ussir du jour au lendemain.",
             "1844",
             None,
-            "fr"
-        ],
-        [
-            "Sie haben die Aussicht auf die Spitze des Berges, den Sie erklimmen.",
-            "2314",
-            None,
-            "ge"
-        ],
-        [
-            "Wer nichts riskiert, riskiert alles.",
-            "7483",
-            None,
-            "ge"
-        ],
-        [
-            "Es sind zu viele Jahre Arbeit notwendig, um \u00fcber Nacht erfolgreich zu sein.",
-            "12461",
-            None,
-            "ge"
-        ],
-        [
-            "Avrai la vista della cima della montagna che sali.",
-            "4998",
-            None,
-            "it"
-        ],
-        [
-            "Quando non corri alcun rischio, rischi tutto.",
-            "6744",
-            None,
-            "it"
-        ],
-        [
-            "Are necessary too many years of work to succeed overnight.",
-            "1157",
-            None,
-            "it"
+            "fr",
         ],
+        ["Sie haben die Aussicht auf die Spitze des Berges, den Sie erklimmen.", "2314", None, "ge"],
+        ["Wer nichts riskiert, riskiert alles.", "7483", None, "ge"],
+        ["Es sind zu viele Jahre Arbeit notwendig, um \u00fcber Nacht erfolgreich zu sein.", "12461", None, "ge"],
+        ["Avrai la vista della cima della montagna che sali.", "4998", None, "it"],
+        ["Quando non corri alcun rischio, rischi tutto.", "6744", None, "it"],
+        ["Are necessary too many years of work to succeed overnight.", "1157", None, "it"],
         [
             "B\u0119dziesz mie\u0107 widok na szczyt g\u00f3ry, na kt\u00f3r\u0105 si\u0119 wspinasz.",
             "7014",
             None,
-            "pl"
-        ],
-        [
-            "Kiedy nie podejmujesz \u017cadnego ryzyka, ryzykujesz wszystko.",
-            "3492",
-            None,
-            "pl"
+            "pl",
         ],
+        ["Kiedy nie podejmujesz \u017cadnego ryzyka, ryzykujesz wszystko.", "3492", None, "pl"],
         [
             "Potrzebne s\u0105 zbyt wiele lat pracy, aby odnie\u015b\u0107 sukces z dnia na dzie\u0144.",
             "1890",
             None,
-            "pl"
-        ],
-        [
-            "Tendr\u00e1s la vista de la cima de la monta\u00f1a que subes",
-            "101",
-            None,
-            "sp"
-        ],
-        [
-            "Cuando no te arriesgas, lo arriesgas todo.",
-            "5922",
-            None,
-            "sp"
+            "pl",
         ],
+        ["Tendr\u00e1s la vista de la cima de la monta\u00f1a que subes", "101", None, "sp"],
+        ["Cuando no te arriesgas, lo arriesgas todo.", "5922", None, "sp"],
         [
             "Son necesarios demasiados a\u00f1os de trabajo para triunfar de la noche a la ma\u00f1ana.",
             "10246",
             None,
-            "sp"
-        ]
+            "sp",
+        ],
     ],
     # Enable the weighted sampler
     use_weighted_sampler=True,
@@ -399,10 +304,10 @@ config = VitsConfig(
     weighted_sampler_attrs={"language": 1.0},
     weighted_sampler_multipliers={
         # "speaker_name": {
-            # you can force the batching scheme to give a higher weight to a certain speaker and then this speaker will appears more frequently on the batch.
-            # It will speedup the speaker adaptation process. Considering the CML train dataset and "new_speaker" as the speaker name of the speaker that you want to adapt.
-            # The line above will make the balancer consider the "new_speaker" as 106 speakers so 1/4 of the number of speakers present on CML dataset.
-            # 'new_speaker': 106, # (CML tot. train speaker)/4 = (424/4) = 106
+        # you can force the batching scheme to give a higher weight to a certain speaker and then this speaker will appears more frequently on the batch.
+        # It will speedup the speaker adaptation process. Considering the CML train dataset and "new_speaker" as the speaker name of the speaker that you want to adapt.
+        # The line above will make the balancer consider the "new_speaker" as 106 speakers so 1/4 of the number of speakers present on CML dataset.
+        # 'new_speaker': 106, # (CML tot. train speaker)/4 = (424/4) = 106
         # }
     },
     # It defines the Speaker Consistency Loss (SCL) α to 9 like the YourTTS paper
@@ -414,7 +319,7 @@ train_samples, eval_samples = load_tts_samples(
     config.datasets,
     eval_split=True,
     eval_split_max_size=config.eval_split_max_size,
-    eval_split_size=config.eval_split_size
+    eval_split_size=config.eval_split_size,
 )
 
 # Init the model
diff --git a/tests/zoo_tests/test_models.py b/tests/zoo_tests/test_models.py
index d3a83980..e6ed18e1 100644
--- a/tests/zoo_tests/test_models.py
+++ b/tests/zoo_tests/test_models.py
@@ -10,12 +10,15 @@ from TTS.utils.generic_utils import get_user_data_dir
 from TTS.utils.manage import ModelManager
 
 
+MODELS_WITH_SEP_TESTS = ["bark", "xtts"]
+
+
 def run_models(offset=0, step=1):
     """Check if all the models are downloadable and tts models run correctly."""
     print(" > Run synthesizer with all the models.")
     output_path = os.path.join(get_tests_output_path(), "output.wav")
     manager = ModelManager(output_prefix=get_tests_output_path(), progress_bar=False)
-    model_names = [name for name in manager.list_models() if "bark" not in name]
+    model_names = [name for name in manager.list_models() if name in MODELS_WITH_SEP_TESTS]
     for model_name in model_names[offset::step]:
         print(f"\n > Run - {model_name}")
         model_path, _, _ = manager.download_model(model_name)
@@ -63,20 +66,15 @@ def run_models(offset=0, step=1):
             manager.download_model(model_name)
         print(f" | > OK: {model_name}")
 
-    # folders = glob.glob(os.path.join(manager.output_prefix, "*"))
-    # assert len(folders) == len(model_names) // step
 
-
-def test_models_offset_0_step_3():
-    run_models(offset=0, step=3)
-
-
-def test_models_offset_1_step_3():
-    run_models(offset=1, step=3)
-
-
-def test_models_offset_2_step_3():
-    run_models(offset=2, step=3)
+def test_xtts():
+    output_path = os.path.join(get_tests_output_path(), "output.wav")
+    speaker_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0001.wav")
+    run_cli("yes | "
+        f"tts --model_name  tts_models/multilingual/multi-dataset/xtts_v1 "
+        f'--text "This is an example." --out_path "{output_path}" --progress_bar False --use_cuda True '
+        f'--speaker_wav "{speaker_wav}" --language_idx "en"'
+    )
 
 
 def test_bark():
@@ -84,7 +82,7 @@ def test_bark():
     output_path = os.path.join(get_tests_output_path(), "output.wav")
     run_cli(
         f" tts --model_name  tts_models/multilingual/multi-dataset/bark "
-        f'--text "This is an example." --out_path "{output_path}" --progress_bar False'
+        f'--text "This is an example." --out_path "{output_path}" --progress_bar False --use_cuda True'
     )
 
 
@@ -99,3 +97,17 @@ def test_voice_conversion():
         f"tts --model_name  {model_name}"
         f" --out_path {output_path} --speaker_wav {speaker_wav} --reference_wav {reference_wav} --language_idx {language_id} --progress_bar False"
     )
+
+"""
+These are used to split tests into different actions on Github.
+"""
+def test_models_offset_0_step_3():
+    run_models(offset=0, step=3)
+
+
+def test_models_offset_1_step_3():
+    run_models(offset=1, step=3)
+
+
+def test_models_offset_2_step_3():
+    run_models(offset=2, step=3)
\ No newline at end of file