* Add Ukrainian LADA (female) voice

* Add ca and fa models

* Add pth files to manager

* Bump up to v0.10.1

Co-authored-by: Yehor Smoliakov <yehors@ukr.net>
This commit is contained in:
Eren Gölge 2022-12-26 15:46:21 +01:00 committed by GitHub
parent a04db8d632
commit a31af762e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 33 additions and 2 deletions

View File

@ -617,6 +617,30 @@
"license": "bsd-3-clause"
}
}
},
"ca": {
"custom": {
"vits":{
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--ca--custom--vits.zip",
"default_vocoder": null,
"commit": null,
"description": " It is trained from zero with 101460 utterances consisting of 257 speakers, approx 138 hours of speech. We used three datasets;\nFestcat and Google Catalan TTS (both TTS datasets) and also a part of Common Voice 8. It is trained with TTS v0.8.0.\nhttps://github.com/coqui-ai/TTS/discussions/930#discussioncomment-4466345",
"author": "@gullabi",
"license": "CC-BY-4.0"
}
}
},
"fa":{
"custom":{
"glow-tts": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--fa--custom--glow-tts.zip",
"default_vocoder": null,
"commit": null,
"description": "persian-tts-female-glow_tts model for text to speech purposes. Single-speaker female voice Trained on persian-tts-dataset-famale. \nThis model has no compatible vocoder thus the output quality is not very good. \nDataset: https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-famale.",
"author": "@karim23657",
"license": "CC-BY-4.0"
}
}
}
},
"vocoder_models": {

View File

@ -1 +1 @@
0.10.0
0.10.1

View File

@ -298,7 +298,9 @@ class ModelManager(object):
"""
output_stats_path = os.path.join(output_path, "scale_stats.npy")
output_d_vector_file_path = os.path.join(output_path, "speakers.json")
output_d_vector_file_pth_path = os.path.join(output_path, "speakers.pth")
output_speaker_ids_file_path = os.path.join(output_path, "speaker_ids.json")
output_speaker_ids_file_pth_path = os.path.join(output_path, "speaker_ids.pth")
speaker_encoder_config_path = os.path.join(output_path, "config_se.json")
speaker_encoder_model_path = self._find_speaker_encoder(output_path)
@ -307,11 +309,15 @@ class ModelManager(object):
# update the speakers.json file path in the model config.json to the current path
self._update_path("d_vector_file", output_d_vector_file_path, config_path)
self._update_path("d_vector_file", output_d_vector_file_pth_path, config_path)
self._update_path("model_args.d_vector_file", output_d_vector_file_path, config_path)
self._update_path("model_args.d_vector_file", output_d_vector_file_pth_path, config_path)
# update the speaker_ids.json file path in the model config.json to the current path
self._update_path("speakers_file", output_speaker_ids_file_path, config_path)
self._update_path("speakers_file", output_speaker_ids_file_pth_path, config_path)
self._update_path("model_args.speakers_file", output_speaker_ids_file_path, config_path)
self._update_path("model_args.speakers_file", output_speaker_ids_file_pth_path, config_path)
# update the speaker_encoder file path in the model config.json to the current path
self._update_path("speaker_encoder_model_path", speaker_encoder_model_path, config_path)

View File

@ -12,5 +12,6 @@ Some of the known public datasets that we successfully applied 🐸TTS:
- [German - Thorsten OGVD](https://github.com/thorstenMueller/deep-learning-german-tts)
- [Japanese - Kokoro](https://www.kaggle.com/kaiida/kokoro-speech-dataset-v11-small/version/1)
- [Chinese](https://www.data-baker.com/data/index/source/)
- [Ukrainian - LADA](https://github.com/egorsmkv/ukrainian-tts-datasets/tree/main/lada)
Let us know if you use 🐸TTS on a different dataset.
Let us know if you use 🐸TTS on a different dataset.