Gateway for TTS models

This commit is contained in:
Eren Gölge 2021-10-26 13:04:51 +02:00
parent 00becf2671
commit 7c10574931
2 changed files with 33 additions and 33 deletions

View File

@ -19,6 +19,8 @@
📄 [Text-to-Speech paper collection](https://github.com/erogol/TTS-papers) 📄 [Text-to-Speech paper collection](https://github.com/erogol/TTS-papers)
<img src="https://static.scarf.sh/a.png?x-pxid=cf317fe7-2188-4721-bc01-124bb5d5dbb2" />
## 💬 Where to ask questions ## 💬 Where to ask questions
Please use our dedicated channels for questions and discussion. Help is much more valuable if it's shared publicly so that more people can benefit from it. Please use our dedicated channels for questions and discussion. Help is much more valuable if it's shared publicly so that more people can benefit from it.
@ -154,5 +156,3 @@ If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](ht
|- vocoder/ (Vocoder models.) |- vocoder/ (Vocoder models.)
|- (same) |- (same)
``` ```
<img src="https://static.scarf.sh/a.png?x-pxid=503c242f-a253-4fb8-8071-ce1dc1e89999" />

View File

@ -4,7 +4,7 @@
"ek1": { "ek1": {
"tacotron2": { "tacotron2": {
"description": "EK1 en-rp tacotron2 by NMStoker", "description": "EK1 en-rp tacotron2 by NMStoker",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.1.0/tts_models--en--ek1--tacotron2.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.1.0/tts_models--en--ek1--tacotron2.zip",
"default_vocoder": "vocoder_models/en/ek1/wavegrad", "default_vocoder": "vocoder_models/en/ek1/wavegrad",
"commit": "c802255" "commit": "c802255"
} }
@ -12,7 +12,7 @@
"ljspeech": { "ljspeech": {
"tacotron2-DDC": { "tacotron2-DDC": {
"description": "Tacotron2 with Double Decoder Consistency.", "description": "Tacotron2 with Double Decoder Consistency.",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.12/tts_models--en--ljspeech--tacotron2-DDC.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.12/tts_models--en--ljspeech--tacotron2-DDC.zip",
"default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2", "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
"commit": "bae2ad0f", "commit": "bae2ad0f",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
@ -21,7 +21,7 @@
}, },
"tacotron2-DDC_ph": { "tacotron2-DDC_ph": {
"description": "Tacotron2 with Double Decoder Consistency with phonemes.", "description": "Tacotron2 with Double Decoder Consistency with phonemes.",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.2.0/tts_models--en--ljspeech--tacotronDDC_ph.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.2.0/tts_models--en--ljspeech--tacotronDDC_ph.zip",
"default_vocoder": "vocoder_models/en/ljspeech/univnet", "default_vocoder": "vocoder_models/en/ljspeech/univnet",
"commit": "3900448", "commit": "3900448",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
@ -30,7 +30,7 @@
}, },
"glow-tts": { "glow-tts": {
"description": "", "description": "",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.9/tts_models--en--ljspeech--glow-tts.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/tts_models--en--ljspeech--glow-tts.zip",
"stats_file": null, "stats_file": null,
"default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan", "default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan",
"commit": "", "commit": "",
@ -40,7 +40,7 @@
}, },
"speedy-speech": { "speedy-speech": {
"description": "Speedy Speech model trained on LJSpeech dataset using the Alignment Network for learning the durations.", "description": "Speedy Speech model trained on LJSpeech dataset using the Alignment Network for learning the durations.",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.3.0/tts_models--en--ljspeech--speedy_speech.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.3.0/tts_models--en--ljspeech--speedy_speech.zip",
"stats_file": null, "stats_file": null,
"default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2", "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
"commit": "4581e3d", "commit": "4581e3d",
@ -50,7 +50,7 @@
}, },
"tacotron2-DCA": { "tacotron2-DCA": {
"description": "", "description": "",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.9/tts_models--en--ljspeech--tacotron2-DCA.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/tts_models--en--ljspeech--tacotron2-DCA.zip",
"default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan", "default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan",
"commit": "", "commit": "",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
@ -59,7 +59,7 @@
}, },
"vits": { "vits": {
"description": "VITS is an End2End TTS model trained on LJSpeech dataset with phonemes.", "description": "VITS is an End2End TTS model trained on LJSpeech dataset with phonemes.",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.2.0/tts_models--en--ljspeech--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.2.0/tts_models--en--ljspeech--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": "3900448", "commit": "3900448",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
@ -68,7 +68,7 @@
}, },
"fast_pitch": { "fast_pitch": {
"description": "FastPitch model trained on LJSpeech using the Aligner Network", "description": "FastPitch model trained on LJSpeech using the Aligner Network",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.2.2/tts_models--en--ljspeech--fast_pitch.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.2.2/tts_models--en--ljspeech--fast_pitch.zip",
"default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2", "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
"commit": "b27b3ba", "commit": "b27b3ba",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
@ -79,7 +79,7 @@
"vctk": { "vctk": {
"sc-glow-tts": { "sc-glow-tts": {
"description": "Multi-Speaker Transformers based SC-Glow model from https://arxiv.org/abs/2104.05557.", "description": "Multi-Speaker Transformers based SC-Glow model from https://arxiv.org/abs/2104.05557.",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.1.0/tts_models--en--vctk--sc-glow-tts.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.1.0/tts_models--en--vctk--sc-glow-tts.zip",
"default_vocoder": "vocoder_models/en/vctk/hifigan_v2", "default_vocoder": "vocoder_models/en/vctk/hifigan_v2",
"commit": "b531fa69", "commit": "b531fa69",
"author": "Edresson Casanova", "author": "Edresson Casanova",
@ -88,7 +88,7 @@
}, },
"vits": { "vits": {
"description": "VITS End2End TTS model trained on VCTK dataset with 109 different speakers with EN accent.", "description": "VITS End2End TTS model trained on VCTK dataset with 109 different speakers with EN accent.",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.2.0/tts_models--en--vctk--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.2.0/tts_models--en--vctk--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": "3900448", "commit": "3900448",
"author": "Eren @erogol", "author": "Eren @erogol",
@ -97,7 +97,7 @@
}, },
"fast_pitch":{ "fast_pitch":{
"description": "FastPitch model trained on VCTK dataseset.", "description": "FastPitch model trained on VCTK dataseset.",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.4.0/tts_models--en--vctk--fast_pitch.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.4.0/tts_models--en--vctk--fast_pitch.zip",
"default_vocoder": "vocoder_models/en/vctk/hifigan_v2", "default_vocoder": "vocoder_models/en/vctk/hifigan_v2",
"commit": "bdab788d", "commit": "bdab788d",
"author": "Eren @erogol", "author": "Eren @erogol",
@ -108,7 +108,7 @@
"sam": { "sam": {
"tacotron-DDC": { "tacotron-DDC": {
"description": "Tacotron2 with Double Decoder Consistency trained with Aceenture's Sam dataset.", "description": "Tacotron2 with Double Decoder Consistency trained with Aceenture's Sam dataset.",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.13/tts_models--en--sam--tacotron_DDC.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.13/tts_models--en--sam--tacotron_DDC.zip",
"default_vocoder": "vocoder_models/en/sam/hifigan_v2", "default_vocoder": "vocoder_models/en/sam/hifigan_v2",
"commit": "bae2ad0f", "commit": "bae2ad0f",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
@ -120,7 +120,7 @@
"es": { "es": {
"mai": { "mai": {
"tacotron2-DDC": { "tacotron2-DDC": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.9/tts_models--es--mai--tacotron2-DDC.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/tts_models--es--mai--tacotron2-DDC.zip",
"default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan", "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
"commit": "", "commit": "",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
@ -132,7 +132,7 @@
"fr": { "fr": {
"mai": { "mai": {
"tacotron2-DDC": { "tacotron2-DDC": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.9/tts_models--fr--mai--tacotron2-DDC.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/tts_models--fr--mai--tacotron2-DDC.zip",
"default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan", "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
"commit": "", "commit": "",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
@ -144,7 +144,7 @@
"uk":{ "uk":{
"mai": { "mai": {
"glow-tts": { "glow-tts": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.4.0/tts_models--uk--mailabs--glow-tts.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.4.0/tts_models--uk--mailabs--glow-tts.zip",
"author":"@robinhad", "author":"@robinhad",
"commit": "bdab788d", "commit": "bdab788d",
"license": "MIT", "license": "MIT",
@ -155,7 +155,7 @@
"zh-CN": { "zh-CN": {
"baker": { "baker": {
"tacotron2-DDC-GST": { "tacotron2-DDC-GST": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/tts_models--zh-CN--baker--tacotron2-DDC-GST.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.10/tts_models--zh-CN--baker--tacotron2-DDC-GST.zip",
"commit": "unknown", "commit": "unknown",
"author": "@kirianguiller", "author": "@kirianguiller",
"default_vocoder": null "default_vocoder": null
@ -165,7 +165,7 @@
"nl": { "nl": {
"mai": { "mai": {
"tacotron2-DDC": { "tacotron2-DDC": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/tts_models--nl--mai--tacotron2-DDC.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.10/tts_models--nl--mai--tacotron2-DDC.zip",
"author": "@r-dh", "author": "@r-dh",
"default_vocoder": "vocoder_models/nl/mai/parallel-wavegan", "default_vocoder": "vocoder_models/nl/mai/parallel-wavegan",
"stats_file": null, "stats_file": null,
@ -176,7 +176,7 @@
"de": { "de": {
"thorsten": { "thorsten": {
"tacotron2-DCA": { "tacotron2-DCA": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.11/tts_models--de--thorsten--tacotron2-DCA.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.11/tts_models--de--thorsten--tacotron2-DCA.zip",
"default_vocoder": "vocoder_models/de/thorsten/fullband-melgan", "default_vocoder": "vocoder_models/de/thorsten/fullband-melgan",
"author": "@thorstenMueller", "author": "@thorstenMueller",
"commit": "unknown" "commit": "unknown"
@ -186,7 +186,7 @@
"ja": { "ja": {
"kokoro": { "kokoro": {
"tacotron2-DDC": { "tacotron2-DDC": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.15/tts_models--jp--kokoro--tacotron2-DDC.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.15/tts_models--jp--kokoro--tacotron2-DDC.zip",
"default_vocoder": "vocoder_models/ja/kokoro/hifigan_v1", "default_vocoder": "vocoder_models/ja/kokoro/hifigan_v1",
"description": "Tacotron2 with Double Decoder Consistency trained with Kokoro Speech Dataset.", "description": "Tacotron2 with Double Decoder Consistency trained with Kokoro Speech Dataset.",
"author": "@kaiidams", "author": "@kaiidams",
@ -199,14 +199,14 @@
"universal": { "universal": {
"libri-tts": { "libri-tts": {
"wavegrad": { "wavegrad": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.9/vocoder_models--universal--libri-tts--wavegrad.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/vocoder_models--universal--libri-tts--wavegrad.zip",
"commit": "ea976b0", "commit": "ea976b0",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
"license": "MPL", "license": "MPL",
"contact": "egolge@coqui.com" "contact": "egolge@coqui.com"
}, },
"fullband-melgan": { "fullband-melgan": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.9/vocoder_models--universal--libri-tts--fullband-melgan.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/vocoder_models--universal--libri-tts--fullband-melgan.zip",
"commit": "4132240", "commit": "4132240",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
"license": "MPL", "license": "MPL",
@ -218,13 +218,13 @@
"ek1": { "ek1": {
"wavegrad": { "wavegrad": {
"description": "EK1 en-rp wavegrad by NMStoker", "description": "EK1 en-rp wavegrad by NMStoker",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/vocoder_models--en--ek1--wavegrad.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.10/vocoder_models--en--ek1--wavegrad.zip",
"commit": "c802255" "commit": "c802255"
} }
}, },
"ljspeech": { "ljspeech": {
"multiband-melgan": { "multiband-melgan": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.9/vocoder_models--en--ljspeech--mulitband-melgan.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/vocoder_models--en--ljspeech--mulitband-melgan.zip",
"commit": "ea976b0", "commit": "ea976b0",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
"license": "MPL", "license": "MPL",
@ -232,7 +232,7 @@
}, },
"hifigan_v2": { "hifigan_v2": {
"description": "HiFiGAN_v2 LJSpeech vocoder from https://arxiv.org/abs/2010.05646.", "description": "HiFiGAN_v2 LJSpeech vocoder from https://arxiv.org/abs/2010.05646.",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.12/vocoder_model--en--ljspeech-hifigan_v2.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.12/vocoder_model--en--ljspeech-hifigan_v2.zip",
"commit": "bae2ad0f", "commit": "bae2ad0f",
"author": "@erogol", "author": "@erogol",
"license": "", "license": "",
@ -240,7 +240,7 @@
}, },
"univnet": { "univnet": {
"description": "UnivNet model finetuned on TacotronDDC_ph spectrograms for better compatibility.", "description": "UnivNet model finetuned on TacotronDDC_ph spectrograms for better compatibility.",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.3.0/vocoder_models--en--ljspeech--univnet_v2.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.3.0/vocoder_models--en--ljspeech--univnet_v2.zip",
"commit": "4581e3d", "commit": "4581e3d",
"author": "Eren @erogol", "author": "Eren @erogol",
"license": "TBD", "license": "TBD",
@ -250,7 +250,7 @@
"vctk": { "vctk": {
"hifigan_v2": { "hifigan_v2": {
"description": "Finetuned and intended to be used with tts_models/en/vctk/sc-glow-tts", "description": "Finetuned and intended to be used with tts_models/en/vctk/sc-glow-tts",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.12/vocoder_model--en--vctk--hifigan_v2.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.12/vocoder_model--en--vctk--hifigan_v2.zip",
"commit": "2f07160", "commit": "2f07160",
"author": "Edresson Casanova", "author": "Edresson Casanova",
"license": "", "license": "",
@ -260,7 +260,7 @@
"sam": { "sam": {
"hifigan_v2": { "hifigan_v2": {
"description": "Finetuned and intended to be used with tts_models/en/sam/tacotron_DDC", "description": "Finetuned and intended to be used with tts_models/en/sam/tacotron_DDC",
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.13/vocoder_models--en--sam--hifigan_v2.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.13/vocoder_models--en--sam--hifigan_v2.zip",
"commit": "2f07160", "commit": "2f07160",
"author": "Eren Gölge @erogol", "author": "Eren Gölge @erogol",
"license": "", "license": "",
@ -271,7 +271,7 @@
"nl": { "nl": {
"mai": { "mai": {
"parallel-wavegan": { "parallel-wavegan": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/vocoder_models--nl--mai--parallel-wavegan.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.10/vocoder_models--nl--mai--parallel-wavegan.zip",
"author": "@r-dh", "author": "@r-dh",
"commit": "unknown" "commit": "unknown"
} }
@ -280,12 +280,12 @@
"de": { "de": {
"thorsten": { "thorsten": {
"wavegrad": { "wavegrad": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.11/vocoder_models--de--thorsten--wavegrad.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.11/vocoder_models--de--thorsten--wavegrad.zip",
"author": "@thorstenMueller", "author": "@thorstenMueller",
"commit": "unknown" "commit": "unknown"
}, },
"fullband-melgan": { "fullband-melgan": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.1.3/vocoder_models--de--thorsten--fullband-melgan.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.1.3/vocoder_models--de--thorsten--fullband-melgan.zip",
"author": "@thorstenMueller", "author": "@thorstenMueller",
"commit": "unknown" "commit": "unknown"
} }
@ -294,7 +294,7 @@
"ja": { "ja": {
"kokoro": { "kokoro": {
"hifigan_v1": { "hifigan_v1": {
"github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.2.0/vocoder_models--ja--kokoro--hifigan_v1.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.2.0/vocoder_models--ja--kokoro--hifigan_v1.zip",
"description": "HifiGAN model trained for kokoro dataset by @kaiidams", "description": "HifiGAN model trained for kokoro dataset by @kaiidams",
"author": "@kaiidams", "author": "@kaiidams",
"commit": "3900448" "commit": "3900448"