From ccfaa6b1d5f0ede98f715df4dba950459d2bba70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eren=20G=C3=B6lge?= <egolge@coqui.ai>
Date: Tue, 18 May 2021 15:38:22 +0200
Subject: [PATCH] add `needs_phonemizer` field to models.json. If set true
 these models are only compatible with v0.0.13 or below.

---
 TTS/.models.json               | 37 +++++++++++++++++++++++-----------
 TTS/tts/utils/text/__init__.py |  2 +-
 TTS/utils/manage.py            |  3 +++
 tests/test_synthesize.py       | 30 +++++++++++++--------------
 4 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/TTS/.models.json b/TTS/.models.json
index 5fcfa86b..b926f120 100644
--- a/TTS/.models.json
+++ b/TTS/.models.json
@@ -6,7 +6,8 @@
                     "description": "EK1 en-rp tacotron2 by NMStoker",
                     "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/tts_models--en--ek1--tacotron2.zip",
                     "default_vocoder": "vocoder_models/en/ek1/wavegrad",
-                    "commit": "c802255"
+                    "commit": "c802255",
+                    "needs_phonemizer": true
                 }
             },
             "ljspeech":{
@@ -17,7 +18,8 @@
                     "commit": "bae2ad0f",
                     "author": "Eren Gölge @erogol",
                     "license": "",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": false
                 },
                 "glow-tts":{
                     "description": "",
@@ -27,7 +29,8 @@
                     "commit": "",
                     "author": "Eren Gölge @erogol",
                     "license": "MPL",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                 },
                 "tacotron2-DCA": {
                     "description": "",
@@ -36,7 +39,8 @@
                     "commit": "",
                     "author": "Eren Gölge @erogol",
                     "license": "MPL",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                 },
                 "speedy-speech-wn":{
                     "description": "Speedy Speech model with wavenet decoder.",
@@ -45,7 +49,8 @@
                     "commit": "77b6145",
                     "author": "Eren Gölge @erogol",
                     "license": "MPL",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                 }
             },
             "vctk":{
@@ -56,7 +61,9 @@
                     "commit": "b531fa69",
                     "author": "Edresson Casanova",
                     "license": "",
-                    "contact":""
+                    "contact":"",
+                    "needs_phonemizer": true
+
 
                 }
             },
@@ -68,7 +75,8 @@
                     "commit": "bae2ad0f",
                     "author": "Eren Gölge @erogol",
                     "license": "",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                 }
             }
         },
@@ -80,7 +88,8 @@
                     "commit": "",
                     "author": "Eren Gölge @erogol",
                     "license": "MPL",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                 }
             }
         },
@@ -92,7 +101,8 @@
                     "commit": "",
                     "author": "Eren Gölge @erogol",
                     "license": "MPL",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                 }
             }
         },
@@ -112,7 +122,8 @@
                     "author": "@r-dh",
                     "default_vocoder": "vocoder_models/nl/mai/parallel-wavegan",
                     "stats_file": null,
-                    "commit": "540d811"
+                    "commit": "540d811",
+                    "needs_phonemizer": true
                 }
             }
         },
@@ -123,7 +134,8 @@
                     "author": "@erogol",
                     "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
                     "license":"",
-                    "contact": "egolge@coqui.com"
+                    "contact": "egolge@coqui.com",
+                    "needs_phonemizer": true
                 }
             }
         },
@@ -133,7 +145,8 @@
                     "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.11/tts_models--de--thorsten--tacotron2-DCA.zip",
                     "default_vocoder": "vocoder_models/de/thorsten/wavegrad",
                     "author": "@thorstenMueller",
-                    "commit": "unknown"
+                    "commit": "unknown",
+                    "needs_phonemizer": true
                 }
             }
         }
diff --git a/TTS/tts/utils/text/__init__.py b/TTS/tts/utils/text/__init__.py
index 2b73d4e4..2ead9561 100644
--- a/TTS/tts/utils/text/__init__.py
+++ b/TTS/tts/utils/text/__init__.py
@@ -39,7 +39,7 @@ def text2phone(text, language):
     if language == "zh-CN":
         ph = chinese_text_to_phonemes(text)
         return ph
-    raise ValueError(f" [!] Language {language} is nor supported for phonemization.")
+    raise ValueError(f" [!] Language {language} is not supported for phonemization.")
 
 
 def intersperse(sequence, token):
diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py
index fdc141ec..9630873f 100644
--- a/TTS/utils/manage.py
+++ b/TTS/utils/manage.py
@@ -101,6 +101,9 @@ class ModelManager(object):
         output_path = os.path.join(self.output_prefix, model_full_name)
         output_model_path = os.path.join(output_path, "model_file.pth.tar")
         output_config_path = os.path.join(output_path, "config.json")
+        # NOTE : band-aid for removing phoneme support
+        if 'needs_phonemizer' in model_item and model_item['needs_phonemizer']:
+            raise RuntimeError(' [!] Use 🐸TTS <= v0.0.13 for this model. Current version does not support phoneme based models.')
         if os.path.exists(output_path):
             print(f" > {model_name} is already downloaded.")
         else:
diff --git a/tests/test_synthesize.py b/tests/test_synthesize.py
index 526f7dc8..62eb6dbe 100644
--- a/tests/test_synthesize.py
+++ b/tests/test_synthesize.py
@@ -10,19 +10,19 @@ def test_synthesize():
 
     # single speaker model
     run_cli(f'tts --text "This is an example." --out_path "{output_path}"')
-    run_cli(
-        "tts --model_name tts_models/en/ljspeech/speedy-speech-wn "
-        f'--text "This is an example." --out_path "{output_path}"'
-    )
-    run_cli(
-        "tts --model_name tts_models/en/ljspeech/speedy-speech-wn  "
-        "--vocoder_name vocoder_models/en/ljspeech/multiband-melgan "
-        f'--text "This is an example." --out_path "{output_path}"'
-    )
+    # run_cli(
+    #     "tts --model_name tts_models/en/ljspeech/speedy-speech-wn "
+    #     f'--text "This is an example." --out_path "{output_path}"'
+    # )
+    # run_cli(
+    #     "tts --model_name tts_models/en/ljspeech/speedy-speech-wn  "
+    #     "--vocoder_name vocoder_models/en/ljspeech/multiband-melgan "
+    #     f'--text "This is an example." --out_path "{output_path}"'
+    # )
 
-    # multi-speaker model
-    run_cli("tts --model_name tts_models/en/vctk/sc-glow-tts --list_speaker_idxs")
-    run_cli(
-        f'tts --model_name tts_models/en/vctk/sc-glow-tts --speaker_idx "p304" '
-        f'--text "This is an example." --out_path "{output_path}"'
-    )
+    # # multi-speaker model
+    # run_cli("tts --model_name tts_models/en/vctk/sc-glow-tts --list_speaker_idxs")
+    # run_cli(
+    #     f'tts --model_name tts_models/en/vctk/sc-glow-tts --speaker_idx "p304" '
+    #     f'--text "This is an example." --out_path "{output_path}"'
+    # )