add `needs_phonemizer` field to models.json. If set true these models

are only compatible with v0.0.13 or below.
This commit is contained in:
Eren Gölge 2021-05-18 15:38:22 +02:00
parent a14fcf2a13
commit ccfaa6b1d5
4 changed files with 44 additions and 28 deletions

View File

@ -6,7 +6,8 @@
"description": "EK1 en-rp tacotron2 by NMStoker", "description": "EK1 en-rp tacotron2 by NMStoker",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/tts_models--en--ek1--tacotron2.zip", "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/tts_models--en--ek1--tacotron2.zip",
"default_vocoder": "vocoder_models/en/ek1/wavegrad", "default_vocoder": "vocoder_models/en/ek1/wavegrad",
"commit": "c802255" "commit": "c802255",
"needs_phonemizer": true
} }
}, },
"ljspeech":{ "ljspeech":{
@ -17,7 +18,8 @@
"commit": "bae2ad0f", "commit": "bae2ad0f",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
"license": "", "license": "",
"contact":"egolge@coqui.com" "contact":"egolge@coqui.com",
"needs_phonemizer": false
}, },
"glow-tts":{ "glow-tts":{
"description": "", "description": "",
@ -27,7 +29,8 @@
"commit": "", "commit": "",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
"license": "MPL", "license": "MPL",
"contact":"egolge@coqui.com" "contact":"egolge@coqui.com",
"needs_phonemizer": true
}, },
"tacotron2-DCA": { "tacotron2-DCA": {
"description": "", "description": "",
@ -36,7 +39,8 @@
"commit": "", "commit": "",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
"license": "MPL", "license": "MPL",
"contact":"egolge@coqui.com" "contact":"egolge@coqui.com",
"needs_phonemizer": true
}, },
"speedy-speech-wn":{ "speedy-speech-wn":{
"description": "Speedy Speech model with wavenet decoder.", "description": "Speedy Speech model with wavenet decoder.",
@ -45,7 +49,8 @@
"commit": "77b6145", "commit": "77b6145",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
"license": "MPL", "license": "MPL",
"contact":"egolge@coqui.com" "contact":"egolge@coqui.com",
"needs_phonemizer": true
} }
}, },
"vctk":{ "vctk":{
@ -56,7 +61,9 @@
"commit": "b531fa69", "commit": "b531fa69",
"author": "Edresson Casanova", "author": "Edresson Casanova",
"license": "", "license": "",
"contact":"" "contact":"",
"needs_phonemizer": true
} }
}, },
@ -68,7 +75,8 @@
"commit": "bae2ad0f", "commit": "bae2ad0f",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
"license": "", "license": "",
"contact":"egolge@coqui.com" "contact":"egolge@coqui.com",
"needs_phonemizer": true
} }
} }
}, },
@ -80,7 +88,8 @@
"commit": "", "commit": "",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
"license": "MPL", "license": "MPL",
"contact":"egolge@coqui.com" "contact":"egolge@coqui.com",
"needs_phonemizer": true
} }
} }
}, },
@ -92,7 +101,8 @@
"commit": "", "commit": "",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
"license": "MPL", "license": "MPL",
"contact":"egolge@coqui.com" "contact":"egolge@coqui.com",
"needs_phonemizer": true
} }
} }
}, },
@ -112,7 +122,8 @@
"author": "@r-dh", "author": "@r-dh",
"default_vocoder": "vocoder_models/nl/mai/parallel-wavegan", "default_vocoder": "vocoder_models/nl/mai/parallel-wavegan",
"stats_file": null, "stats_file": null,
"commit": "540d811" "commit": "540d811",
"needs_phonemizer": true
} }
} }
}, },
@ -123,7 +134,8 @@
"author": "@erogol", "author": "@erogol",
"default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan", "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
"license":"", "license":"",
"contact": "egolge@coqui.com" "contact": "egolge@coqui.com",
"needs_phonemizer": true
} }
} }
}, },
@ -133,7 +145,8 @@
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.11/tts_models--de--thorsten--tacotron2-DCA.zip", "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.11/tts_models--de--thorsten--tacotron2-DCA.zip",
"default_vocoder": "vocoder_models/de/thorsten/wavegrad", "default_vocoder": "vocoder_models/de/thorsten/wavegrad",
"author": "@thorstenMueller", "author": "@thorstenMueller",
"commit": "unknown" "commit": "unknown",
"needs_phonemizer": true
} }
} }
} }

View File

@ -39,7 +39,7 @@ def text2phone(text, language):
if language == "zh-CN": if language == "zh-CN":
ph = chinese_text_to_phonemes(text) ph = chinese_text_to_phonemes(text)
return ph return ph
raise ValueError(f" [!] Language {language} is nor supported for phonemization.") raise ValueError(f" [!] Language {language} is not supported for phonemization.")
def intersperse(sequence, token): def intersperse(sequence, token):

View File

@ -101,6 +101,9 @@ class ModelManager(object):
output_path = os.path.join(self.output_prefix, model_full_name) output_path = os.path.join(self.output_prefix, model_full_name)
output_model_path = os.path.join(output_path, "model_file.pth.tar") output_model_path = os.path.join(output_path, "model_file.pth.tar")
output_config_path = os.path.join(output_path, "config.json") output_config_path = os.path.join(output_path, "config.json")
# NOTE : band-aid for removing phoneme support
if 'needs_phonemizer' in model_item and model_item['needs_phonemizer']:
raise RuntimeError(' [!] Use 🐸TTS <= v0.0.13 for this model. Current version does not support phoneme based models.')
if os.path.exists(output_path): if os.path.exists(output_path):
print(f" > {model_name} is already downloaded.") print(f" > {model_name} is already downloaded.")
else: else:

View File

@ -10,19 +10,19 @@ def test_synthesize():
# single speaker model # single speaker model
run_cli(f'tts --text "This is an example." --out_path "{output_path}"') run_cli(f'tts --text "This is an example." --out_path "{output_path}"')
run_cli( # run_cli(
"tts --model_name tts_models/en/ljspeech/speedy-speech-wn " # "tts --model_name tts_models/en/ljspeech/speedy-speech-wn "
f'--text "This is an example." --out_path "{output_path}"' # f'--text "This is an example." --out_path "{output_path}"'
) # )
run_cli( # run_cli(
"tts --model_name tts_models/en/ljspeech/speedy-speech-wn " # "tts --model_name tts_models/en/ljspeech/speedy-speech-wn "
"--vocoder_name vocoder_models/en/ljspeech/multiband-melgan " # "--vocoder_name vocoder_models/en/ljspeech/multiband-melgan "
f'--text "This is an example." --out_path "{output_path}"' # f'--text "This is an example." --out_path "{output_path}"'
) # )
# multi-speaker model # # multi-speaker model
run_cli("tts --model_name tts_models/en/vctk/sc-glow-tts --list_speaker_idxs") # run_cli("tts --model_name tts_models/en/vctk/sc-glow-tts --list_speaker_idxs")
run_cli( # run_cli(
f'tts --model_name tts_models/en/vctk/sc-glow-tts --speaker_idx "p304" ' # f'tts --model_name tts_models/en/vctk/sc-glow-tts --speaker_idx "p304" '
f'--text "This is an example." --out_path "{output_path}"' # f'--text "This is an example." --out_path "{output_path}"'
) # )