add `needs_phonemizer` field to models.json. If set true these models

are only compatible with v0.0.13 or below.
2021-05-18 15:38:22 +02:00 · 2021-05-18 15:38:22 +02:00 · ccfaa6b1d5
parent a14fcf2a13
commit ccfaa6b1d5
4 changed files with 44 additions and 28 deletions
--- a/TTS/.models.json
+++ b/TTS/.models.json
@ -6,7 +6,8 @@
                    "description": "EK1 en-rp tacotron2 by NMStoker",
                    "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/tts_models--en--ek1--tacotron2.zip",
                    "default_vocoder": "vocoder_models/en/ek1/wavegrad",
-                    "commit": "c802255"
+                    "commit": "c802255",
+                    "needs_phonemizer": true
                }
            },
            "ljspeech":{
@ -17,7 +18,8 @@
                    "commit": "bae2ad0f",
                    "author": "Eren Gölge @erogol",
                    "license": "",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": false
                },
                "glow-tts":{
                    "description": "",
@ -27,7 +29,8 @@
                    "commit": "",
                    "author": "Eren Gölge @erogol",
                    "license": "MPL",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                },
                "tacotron2-DCA": {
                    "description": "",
@ -36,7 +39,8 @@
                    "commit": "",
                    "author": "Eren Gölge @erogol",
                    "license": "MPL",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                },
                "speedy-speech-wn":{
                    "description": "Speedy Speech model with wavenet decoder.",
@ -45,7 +49,8 @@
                    "commit": "77b6145",
                    "author": "Eren Gölge @erogol",
                    "license": "MPL",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                }
            },
            "vctk":{
@ -56,7 +61,9 @@
                    "commit": "b531fa69",
                    "author": "Edresson Casanova",
                    "license": "",
-                    "contact":""
+                    "contact":"",
+                    "needs_phonemizer": true
+

                }
            },
@ -68,7 +75,8 @@
                    "commit": "bae2ad0f",
                    "author": "Eren Gölge @erogol",
                    "license": "",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                }
            }
        },
@ -80,7 +88,8 @@
                    "commit": "",
                    "author": "Eren Gölge @erogol",
                    "license": "MPL",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                }
            }
        },
@ -92,7 +101,8 @@
                    "commit": "",
                    "author": "Eren Gölge @erogol",
                    "license": "MPL",
-                    "contact":"egolge@coqui.com"
+                    "contact":"egolge@coqui.com",
+                    "needs_phonemizer": true
                }
            }
        },
@ -112,7 +122,8 @@
                    "author": "@r-dh",
                    "default_vocoder": "vocoder_models/nl/mai/parallel-wavegan",
                    "stats_file": null,
-                    "commit": "540d811"
+                    "commit": "540d811",
+                    "needs_phonemizer": true
                }
            }
        },
@ -123,7 +134,8 @@
                    "author": "@erogol",
                    "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
                    "license":"",
-                    "contact": "egolge@coqui.com"
+                    "contact": "egolge@coqui.com",
+                    "needs_phonemizer": true
                }
            }
        },
@ -133,7 +145,8 @@
                    "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.11/tts_models--de--thorsten--tacotron2-DCA.zip",
                    "default_vocoder": "vocoder_models/de/thorsten/wavegrad",
                    "author": "@thorstenMueller",
-                    "commit": "unknown"
+                    "commit": "unknown",
+                    "needs_phonemizer": true
                }
            }
        }
--- a/TTS/tts/utils/text/init.py
+++ b/TTS/tts/utils/text/init.py
@ -39,7 +39,7 @@ def text2phone(text, language):
    if language == "zh-CN":
        ph = chinese_text_to_phonemes(text)
        return ph
-    raise ValueError(f" [!] Language {language} is nor supported for phonemization.")
+    raise ValueError(f" [!] Language {language} is not supported for phonemization.")


 def intersperse(sequence, token):
--- a/TTS/utils/manage.py
+++ b/TTS/utils/manage.py
@ -101,6 +101,9 @@ class ModelManager(object):
        output_path = os.path.join(self.output_prefix, model_full_name)
        output_model_path = os.path.join(output_path, "model_file.pth.tar")
        output_config_path = os.path.join(output_path, "config.json")
+        # NOTE : band-aid for removing phoneme support
+        if 'needs_phonemizer' in model_item and model_item['needs_phonemizer']:
+            raise RuntimeError(' [!] Use 🐸TTS <= v0.0.13 for this model. Current version does not support phoneme based models.')
        if os.path.exists(output_path):
            print(f" > {model_name} is already downloaded.")
        else:
--- a/tests/test_synthesize.py
+++ b/tests/test_synthesize.py
@ -10,19 +10,19 @@ def test_synthesize():

    # single speaker model
    run_cli(f'tts --text "This is an example." --out_path "{output_path}"')
-    run_cli(
-        "tts --model_name tts_models/en/ljspeech/speedy-speech-wn "
-        f'--text "This is an example." --out_path "{output_path}"'
-    )
-    run_cli(
-        "tts --model_name tts_models/en/ljspeech/speedy-speech-wn  "
-        "--vocoder_name vocoder_models/en/ljspeech/multiband-melgan "
-        f'--text "This is an example." --out_path "{output_path}"'
-    )
+    # run_cli(
+    #     "tts --model_name tts_models/en/ljspeech/speedy-speech-wn "
+    #     f'--text "This is an example." --out_path "{output_path}"'
+    # )
+    # run_cli(
+    #     "tts --model_name tts_models/en/ljspeech/speedy-speech-wn  "
+    #     "--vocoder_name vocoder_models/en/ljspeech/multiband-melgan "
+    #     f'--text "This is an example." --out_path "{output_path}"'
+    # )

-    # multi-speaker model
-    run_cli("tts --model_name tts_models/en/vctk/sc-glow-tts --list_speaker_idxs")
-    run_cli(
-        f'tts --model_name tts_models/en/vctk/sc-glow-tts --speaker_idx "p304" '
-        f'--text "This is an example." --out_path "{output_path}"'
-    )
+    # # multi-speaker model
+    # run_cli("tts --model_name tts_models/en/vctk/sc-glow-tts --list_speaker_idxs")
+    # run_cli(
+    #     f'tts --model_name tts_models/en/vctk/sc-glow-tts --speaker_idx "p304" '
+    #     f'--text "This is an example." --out_path "{output_path}"'
+    # )