add `needs_phonemizer` field to models.json. If set true these models

are only compatible with v0.0.13 or below.
This commit is contained in:
Eren Gölge 2021-05-18 15:38:22 +02:00
parent a14fcf2a13
commit ccfaa6b1d5
4 changed files with 44 additions and 28 deletions

View File

@ -6,7 +6,8 @@
"description": "EK1 en-rp tacotron2 by NMStoker",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/tts_models--en--ek1--tacotron2.zip",
"default_vocoder": "vocoder_models/en/ek1/wavegrad",
"commit": "c802255"
"commit": "c802255",
"needs_phonemizer": true
}
},
"ljspeech":{
@ -17,7 +18,8 @@
"commit": "bae2ad0f",
"author": "Eren Gölge @erogol",
"license": "",
"contact":"egolge@coqui.com"
"contact":"egolge@coqui.com",
"needs_phonemizer": false
},
"glow-tts":{
"description": "",
@ -27,7 +29,8 @@
"commit": "",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact":"egolge@coqui.com"
"contact":"egolge@coqui.com",
"needs_phonemizer": true
},
"tacotron2-DCA": {
"description": "",
@ -36,7 +39,8 @@
"commit": "",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact":"egolge@coqui.com"
"contact":"egolge@coqui.com",
"needs_phonemizer": true
},
"speedy-speech-wn":{
"description": "Speedy Speech model with wavenet decoder.",
@ -45,7 +49,8 @@
"commit": "77b6145",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact":"egolge@coqui.com"
"contact":"egolge@coqui.com",
"needs_phonemizer": true
}
},
"vctk":{
@ -56,7 +61,9 @@
"commit": "b531fa69",
"author": "Edresson Casanova",
"license": "",
"contact":""
"contact":"",
"needs_phonemizer": true
}
},
@ -68,7 +75,8 @@
"commit": "bae2ad0f",
"author": "Eren Gölge @erogol",
"license": "",
"contact":"egolge@coqui.com"
"contact":"egolge@coqui.com",
"needs_phonemizer": true
}
}
},
@ -80,7 +88,8 @@
"commit": "",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact":"egolge@coqui.com"
"contact":"egolge@coqui.com",
"needs_phonemizer": true
}
}
},
@ -92,7 +101,8 @@
"commit": "",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact":"egolge@coqui.com"
"contact":"egolge@coqui.com",
"needs_phonemizer": true
}
}
},
@ -112,7 +122,8 @@
"author": "@r-dh",
"default_vocoder": "vocoder_models/nl/mai/parallel-wavegan",
"stats_file": null,
"commit": "540d811"
"commit": "540d811",
"needs_phonemizer": true
}
}
},
@ -123,7 +134,8 @@
"author": "@erogol",
"default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
"license":"",
"contact": "egolge@coqui.com"
"contact": "egolge@coqui.com",
"needs_phonemizer": true
}
}
},
@ -133,7 +145,8 @@
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.11/tts_models--de--thorsten--tacotron2-DCA.zip",
"default_vocoder": "vocoder_models/de/thorsten/wavegrad",
"author": "@thorstenMueller",
"commit": "unknown"
"commit": "unknown",
"needs_phonemizer": true
}
}
}

View File

@ -39,7 +39,7 @@ def text2phone(text, language):
if language == "zh-CN":
ph = chinese_text_to_phonemes(text)
return ph
raise ValueError(f" [!] Language {language} is nor supported for phonemization.")
raise ValueError(f" [!] Language {language} is not supported for phonemization.")
def intersperse(sequence, token):

View File

@ -101,6 +101,9 @@ class ModelManager(object):
output_path = os.path.join(self.output_prefix, model_full_name)
output_model_path = os.path.join(output_path, "model_file.pth.tar")
output_config_path = os.path.join(output_path, "config.json")
# NOTE : band-aid for removing phoneme support
if 'needs_phonemizer' in model_item and model_item['needs_phonemizer']:
raise RuntimeError(' [!] Use 🐸TTS <= v0.0.13 for this model. Current version does not support phoneme based models.')
if os.path.exists(output_path):
print(f" > {model_name} is already downloaded.")
else:

View File

@ -10,19 +10,19 @@ def test_synthesize():
# single speaker model
run_cli(f'tts --text "This is an example." --out_path "{output_path}"')
run_cli(
"tts --model_name tts_models/en/ljspeech/speedy-speech-wn "
f'--text "This is an example." --out_path "{output_path}"'
)
run_cli(
"tts --model_name tts_models/en/ljspeech/speedy-speech-wn "
"--vocoder_name vocoder_models/en/ljspeech/multiband-melgan "
f'--text "This is an example." --out_path "{output_path}"'
)
# run_cli(
# "tts --model_name tts_models/en/ljspeech/speedy-speech-wn "
# f'--text "This is an example." --out_path "{output_path}"'
# )
# run_cli(
# "tts --model_name tts_models/en/ljspeech/speedy-speech-wn "
# "--vocoder_name vocoder_models/en/ljspeech/multiband-melgan "
# f'--text "This is an example." --out_path "{output_path}"'
# )
# multi-speaker model
run_cli("tts --model_name tts_models/en/vctk/sc-glow-tts --list_speaker_idxs")
run_cli(
f'tts --model_name tts_models/en/vctk/sc-glow-tts --speaker_idx "p304" '
f'--text "This is an example." --out_path "{output_path}"'
)
# # multi-speaker model
# run_cli("tts --model_name tts_models/en/vctk/sc-glow-tts --list_speaker_idxs")
# run_cli(
# f'tts --model_name tts_models/en/vctk/sc-glow-tts --speaker_idx "p304" '
# f'--text "This is an example." --out_path "{output_path}"'
# )