From ccfaa6b1d5f0ede98f715df4dba950459d2bba70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Tue, 18 May 2021 15:38:22 +0200 Subject: [PATCH] add `needs_phonemizer` field to models.json. If set true these models are only compatible with v0.0.13 or below. --- TTS/.models.json | 37 +++++++++++++++++++++++----------- TTS/tts/utils/text/__init__.py | 2 +- TTS/utils/manage.py | 3 +++ tests/test_synthesize.py | 30 +++++++++++++-------------- 4 files changed, 44 insertions(+), 28 deletions(-) diff --git a/TTS/.models.json b/TTS/.models.json index 5fcfa86b..b926f120 100644 --- a/TTS/.models.json +++ b/TTS/.models.json @@ -6,7 +6,8 @@ "description": "EK1 en-rp tacotron2 by NMStoker", "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/tts_models--en--ek1--tacotron2.zip", "default_vocoder": "vocoder_models/en/ek1/wavegrad", - "commit": "c802255" + "commit": "c802255", + "needs_phonemizer": true } }, "ljspeech":{ @@ -17,7 +18,8 @@ "commit": "bae2ad0f", "author": "Eren Gölge @erogol", "license": "", - "contact":"egolge@coqui.com" + "contact":"egolge@coqui.com", + "needs_phonemizer": false }, "glow-tts":{ "description": "", @@ -27,7 +29,8 @@ "commit": "", "author": "Eren Gölge @erogol", "license": "MPL", - "contact":"egolge@coqui.com" + "contact":"egolge@coqui.com", + "needs_phonemizer": true }, "tacotron2-DCA": { "description": "", @@ -36,7 +39,8 @@ "commit": "", "author": "Eren Gölge @erogol", "license": "MPL", - "contact":"egolge@coqui.com" + "contact":"egolge@coqui.com", + "needs_phonemizer": true }, "speedy-speech-wn":{ "description": "Speedy Speech model with wavenet decoder.", @@ -45,7 +49,8 @@ "commit": "77b6145", "author": "Eren Gölge @erogol", "license": "MPL", - "contact":"egolge@coqui.com" + "contact":"egolge@coqui.com", + "needs_phonemizer": true } }, "vctk":{ @@ -56,7 +61,9 @@ "commit": "b531fa69", "author": "Edresson Casanova", "license": "", - "contact":"" + "contact":"", + "needs_phonemizer": true + } }, @@ -68,7 +75,8 @@ "commit": "bae2ad0f", "author": "Eren Gölge @erogol", "license": "", - "contact":"egolge@coqui.com" + "contact":"egolge@coqui.com", + "needs_phonemizer": true } } }, @@ -80,7 +88,8 @@ "commit": "", "author": "Eren Gölge @erogol", "license": "MPL", - "contact":"egolge@coqui.com" + "contact":"egolge@coqui.com", + "needs_phonemizer": true } } }, @@ -92,7 +101,8 @@ "commit": "", "author": "Eren Gölge @erogol", "license": "MPL", - "contact":"egolge@coqui.com" + "contact":"egolge@coqui.com", + "needs_phonemizer": true } } }, @@ -112,7 +122,8 @@ "author": "@r-dh", "default_vocoder": "vocoder_models/nl/mai/parallel-wavegan", "stats_file": null, - "commit": "540d811" + "commit": "540d811", + "needs_phonemizer": true } } }, @@ -123,7 +134,8 @@ "author": "@erogol", "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan", "license":"", - "contact": "egolge@coqui.com" + "contact": "egolge@coqui.com", + "needs_phonemizer": true } } }, @@ -133,7 +145,8 @@ "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.11/tts_models--de--thorsten--tacotron2-DCA.zip", "default_vocoder": "vocoder_models/de/thorsten/wavegrad", "author": "@thorstenMueller", - "commit": "unknown" + "commit": "unknown", + "needs_phonemizer": true } } } diff --git a/TTS/tts/utils/text/__init__.py b/TTS/tts/utils/text/__init__.py index 2b73d4e4..2ead9561 100644 --- a/TTS/tts/utils/text/__init__.py +++ b/TTS/tts/utils/text/__init__.py @@ -39,7 +39,7 @@ def text2phone(text, language): if language == "zh-CN": ph = chinese_text_to_phonemes(text) return ph - raise ValueError(f" [!] Language {language} is nor supported for phonemization.") + raise ValueError(f" [!] Language {language} is not supported for phonemization.") def intersperse(sequence, token): diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index fdc141ec..9630873f 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -101,6 +101,9 @@ class ModelManager(object): output_path = os.path.join(self.output_prefix, model_full_name) output_model_path = os.path.join(output_path, "model_file.pth.tar") output_config_path = os.path.join(output_path, "config.json") + # NOTE : band-aid for removing phoneme support + if 'needs_phonemizer' in model_item and model_item['needs_phonemizer']: + raise RuntimeError(' [!] Use 🐸TTS <= v0.0.13 for this model. Current version does not support phoneme based models.') if os.path.exists(output_path): print(f" > {model_name} is already downloaded.") else: diff --git a/tests/test_synthesize.py b/tests/test_synthesize.py index 526f7dc8..62eb6dbe 100644 --- a/tests/test_synthesize.py +++ b/tests/test_synthesize.py @@ -10,19 +10,19 @@ def test_synthesize(): # single speaker model run_cli(f'tts --text "This is an example." --out_path "{output_path}"') - run_cli( - "tts --model_name tts_models/en/ljspeech/speedy-speech-wn " - f'--text "This is an example." --out_path "{output_path}"' - ) - run_cli( - "tts --model_name tts_models/en/ljspeech/speedy-speech-wn " - "--vocoder_name vocoder_models/en/ljspeech/multiband-melgan " - f'--text "This is an example." --out_path "{output_path}"' - ) + # run_cli( + # "tts --model_name tts_models/en/ljspeech/speedy-speech-wn " + # f'--text "This is an example." --out_path "{output_path}"' + # ) + # run_cli( + # "tts --model_name tts_models/en/ljspeech/speedy-speech-wn " + # "--vocoder_name vocoder_models/en/ljspeech/multiband-melgan " + # f'--text "This is an example." --out_path "{output_path}"' + # ) - # multi-speaker model - run_cli("tts --model_name tts_models/en/vctk/sc-glow-tts --list_speaker_idxs") - run_cli( - f'tts --model_name tts_models/en/vctk/sc-glow-tts --speaker_idx "p304" ' - f'--text "This is an example." --out_path "{output_path}"' - ) + # # multi-speaker model + # run_cli("tts --model_name tts_models/en/vctk/sc-glow-tts --list_speaker_idxs") + # run_cli( + # f'tts --model_name tts_models/en/vctk/sc-glow-tts --speaker_idx "p304" ' + # f'--text "This is an example." --out_path "{output_path}"' + # )