Add bark model

This commit is contained in:
Eren G??lge 2023-06-19 14:16:06 +02:00
parent 2364c38d16
commit 37b708dac7
5 changed files with 100 additions and 81 deletions

View File

@ -1,20 +1,33 @@
{ {
"tts_models": { "tts_models": {
"multilingual":{ "multilingual": {
"multi-dataset":{ "multi-dataset": {
"your_tts":{ "your_tts": {
"description": "Your TTS model accompanying the paper https://arxiv.org/abs/2112.02418", "description": "Your TTS model accompanying the paper https://arxiv.org/abs/2112.02418",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--multilingual--multi-dataset--your_tts.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--multilingual--multi-dataset--your_tts.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": "e9a1953e", "commit": "e9a1953e",
"license": "CC BY-NC-ND 4.0", "license": "CC BY-NC-ND 4.0",
"contact": "egolge@coqui.ai" "contact": "egolge@coqui.ai"
},
"bark": {
"description": "🐶 Bark TTS model released by suno-ai. You can find the original implementation in https://github.com/suno-ai/bark.",
"hf_url": [
"https://coqui.gateway.scarf.sh/bark/coarse_2.pt",
"https://coqui.gateway.scarf.sh/bark/fine_2.pt",
"https://coqui.gateway.scarf.sh/bark/text_2.pt",
"https://coqui.gateway.scarf.sh/bark/config.json"
],
"default_vocoder": null,
"commit": "e9a1953e",
"license": "MIT",
"contact": "https://www.suno.ai/"
} }
} }
}, },
"bg": { "bg": {
"cv": { "cv": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--bg--cv--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--bg--cv--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -25,7 +38,7 @@
}, },
"cs": { "cs": {
"cv": { "cv": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--cs--cv--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--cs--cv--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -36,7 +49,7 @@
}, },
"da": { "da": {
"cv": { "cv": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--da--cv--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--da--cv--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -47,7 +60,7 @@
}, },
"et": { "et": {
"cv": { "cv": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--et--cv--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--et--cv--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -58,7 +71,7 @@
}, },
"ga": { "ga": {
"cv": { "cv": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ga--cv--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ga--cv--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -180,7 +193,7 @@
"license": "apache 2.0", "license": "apache 2.0",
"contact": "egolge@coqui.ai" "contact": "egolge@coqui.ai"
}, },
"fast_pitch":{ "fast_pitch": {
"description": "FastPitch model trained on VCTK dataseset.", "description": "FastPitch model trained on VCTK dataseset.",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--vctk--fast_pitch.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--vctk--fast_pitch.zip",
"default_vocoder": null, "default_vocoder": null,
@ -220,12 +233,12 @@
"license": "apache 2.0", "license": "apache 2.0",
"contact": "adamfroghyar@gmail.com" "contact": "adamfroghyar@gmail.com"
} }
}, },
"multi-dataset":{ "multi-dataset": {
"tortoise-v2":{ "tortoise-v2": {
"description": "Tortoise tts model https://github.com/neonbjb/tortoise-tts", "description": "Tortoise tts model https://github.com/neonbjb/tortoise-tts",
"github_rls_url": ["https://coqui.gateway.scarf.sh/v0.14.1_models/autoregressive.pth", "github_rls_url": [
"https://coqui.gateway.scarf.sh/v0.14.1_models/autoregressive.pth",
"https://coqui.gateway.scarf.sh/v0.14.1_models/clvp2.pth", "https://coqui.gateway.scarf.sh/v0.14.1_models/clvp2.pth",
"https://coqui.gateway.scarf.sh/v0.14.1_models/cvvp.pth", "https://coqui.gateway.scarf.sh/v0.14.1_models/cvvp.pth",
"https://coqui.gateway.scarf.sh/v0.14.1_models/diffusion_decoder.pth", "https://coqui.gateway.scarf.sh/v0.14.1_models/diffusion_decoder.pth",
@ -242,7 +255,7 @@
} }
}, },
"jenny": { "jenny": {
"jenny":{ "jenny": {
"description": "VITS model trained with Jenny(Dioco) dataset. Named as Jenny as demanded by the license. Original URL for the model https://www.kaggle.com/datasets/noml4u/tts-models--en--jenny-dioco--vits", "description": "VITS model trained with Jenny(Dioco) dataset. Named as Jenny as demanded by the license. Original URL for the model https://www.kaggle.com/datasets/noml4u/tts-models--en--jenny-dioco--vits",
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.14.0_models/tts_models--en--jenny--jenny.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.14.0_models/tts_models--en--jenny--jenny.zip",
"default_vocoder": null, "default_vocoder": null,
@ -263,8 +276,8 @@
"contact": "egolge@coqui.com" "contact": "egolge@coqui.com"
} }
}, },
"css10":{ "css10": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--es--css10--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--es--css10--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -284,8 +297,8 @@
"contact": "egolge@coqui.com" "contact": "egolge@coqui.com"
} }
}, },
"css10":{ "css10": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fr--css10--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fr--css10--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -294,17 +307,17 @@
} }
} }
}, },
"uk":{ "uk": {
"mai": { "mai": {
"glow-tts": { "glow-tts": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--uk--mai--glow-tts.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--uk--mai--glow-tts.zip",
"author":"@robinhad", "author": "@robinhad",
"commit": "bdab788d", "commit": "bdab788d",
"license": "MIT", "license": "MIT",
"contact": "", "contact": "",
"default_vocoder": "vocoder_models/uk/mai/multiband-melgan" "default_vocoder": "vocoder_models/uk/mai/multiband-melgan"
}, },
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--uk--mai--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--uk--mai--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -335,8 +348,8 @@
"commit": "540d811" "commit": "540d811"
} }
}, },
"css10":{ "css10": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--nl--css10--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--nl--css10--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -371,7 +384,7 @@
} }
}, },
"css10": { "css10": {
"vits-neon":{ "vits-neon": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--de--css10--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--de--css10--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"author": "@NeonGeckoCom", "author": "@NeonGeckoCom",
@ -392,9 +405,9 @@
} }
} }
}, },
"tr":{ "tr": {
"common-voice": { "common-voice": {
"glow-tts":{ "glow-tts": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--tr--common-voice--glow-tts.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--tr--common-voice--glow-tts.zip",
"default_vocoder": "vocoder_models/tr/common-voice/hifigan", "default_vocoder": "vocoder_models/tr/common-voice/hifigan",
"license": "MIT", "license": "MIT",
@ -406,7 +419,7 @@
}, },
"it": { "it": {
"mai_female": { "mai_female": {
"glow-tts":{ "glow-tts": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_female--glow-tts.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_female--glow-tts.zip",
"default_vocoder": null, "default_vocoder": null,
"description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.", "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
@ -414,7 +427,7 @@
"license": "apache 2.0", "license": "apache 2.0",
"commit": null "commit": null
}, },
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_female--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_female--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.", "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
@ -424,7 +437,7 @@
} }
}, },
"mai_male": { "mai_male": {
"glow-tts":{ "glow-tts": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_male--glow-tts.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_male--glow-tts.zip",
"default_vocoder": null, "default_vocoder": null,
"description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.", "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
@ -432,7 +445,7 @@
"license": "apache 2.0", "license": "apache 2.0",
"commit": null "commit": null
}, },
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_male--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_male--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.", "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
@ -444,7 +457,7 @@
}, },
"ewe": { "ewe": {
"openbible": { "openbible": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--ewe--openbible--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--ewe--openbible--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"license": "CC-BY-SA 4.0", "license": "CC-BY-SA 4.0",
@ -456,7 +469,7 @@
}, },
"hau": { "hau": {
"openbible": { "openbible": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--hau--openbible--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--hau--openbible--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"license": "CC-BY-SA 4.0", "license": "CC-BY-SA 4.0",
@ -468,7 +481,7 @@
}, },
"lin": { "lin": {
"openbible": { "openbible": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--lin--openbible--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--lin--openbible--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"license": "CC-BY-SA 4.0", "license": "CC-BY-SA 4.0",
@ -480,7 +493,7 @@
}, },
"tw_akuapem": { "tw_akuapem": {
"openbible": { "openbible": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--tw_akuapem--openbible--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--tw_akuapem--openbible--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"license": "CC-BY-SA 4.0", "license": "CC-BY-SA 4.0",
@ -492,7 +505,7 @@
}, },
"tw_asante": { "tw_asante": {
"openbible": { "openbible": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--tw_asante--openbible--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--tw_asante--openbible--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"license": "CC-BY-SA 4.0", "license": "CC-BY-SA 4.0",
@ -504,7 +517,7 @@
}, },
"yor": { "yor": {
"openbible": { "openbible": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--yor--openbible--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--yor--openbible--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"license": "CC-BY-SA 4.0", "license": "CC-BY-SA 4.0",
@ -538,7 +551,7 @@
}, },
"fi": { "fi": {
"css10": { "css10": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fi--css10--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fi--css10--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -549,7 +562,7 @@
}, },
"hr": { "hr": {
"cv": { "cv": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--hr--cv--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--hr--cv--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -560,7 +573,7 @@
}, },
"lt": { "lt": {
"cv": { "cv": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lt--cv--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lt--cv--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -571,7 +584,7 @@
}, },
"lv": { "lv": {
"cv": { "cv": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lv--cv--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lv--cv--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -582,7 +595,7 @@
}, },
"mt": { "mt": {
"cv": { "cv": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--mt--cv--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--mt--cv--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -593,7 +606,7 @@
}, },
"pl": { "pl": {
"mai_female": { "mai_female": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pl--mai_female--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pl--mai_female--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -604,7 +617,7 @@
}, },
"pt": { "pt": {
"cv": { "cv": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pt--cv--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pt--cv--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -615,7 +628,7 @@
}, },
"ro": { "ro": {
"cv": { "cv": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ro--cv--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ro--cv--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -626,7 +639,7 @@
}, },
"sk": { "sk": {
"cv": { "cv": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sk--cv--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sk--cv--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -637,7 +650,7 @@
}, },
"sl": { "sl": {
"cv": { "cv": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sl--cv--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sl--cv--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -648,7 +661,7 @@
}, },
"sv": { "sv": {
"cv": { "cv": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sv--cv--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sv--cv--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -659,7 +672,7 @@
}, },
"ca": { "ca": {
"custom": { "custom": {
"vits":{ "vits": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--ca--custom--vits.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--ca--custom--vits.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
@ -669,8 +682,8 @@
} }
} }
}, },
"fa":{ "fa": {
"custom":{ "custom": {
"glow-tts": { "glow-tts": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--fa--custom--glow-tts.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--fa--custom--glow-tts.zip",
"default_vocoder": null, "default_vocoder": null,
@ -681,18 +694,18 @@
} }
} }
}, },
"bn":{ "bn": {
"custom":{ "custom": {
"vits-male":{ "vits-male": {
"github_rls_url":"https://coqui.gateway.scarf.sh/v0.13.3_models/tts_models--bn--custom--vits_male.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.13.3_models/tts_models--bn--custom--vits_male.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
"description": "Single speaker Bangla male model. For more information -> https://github.com/mobassir94/comprehensive-bangla-tts", "description": "Single speaker Bangla male model. For more information -> https://github.com/mobassir94/comprehensive-bangla-tts",
"author": "@mobassir94", "author": "@mobassir94",
"license": "Apache 2.0" "license": "Apache 2.0"
}, },
"vits-female":{ "vits-female": {
"github_rls_url":"https://coqui.gateway.scarf.sh/v0.13.3_models/tts_models--bn--custom--vits_female.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.13.3_models/tts_models--bn--custom--vits_female.zip",
"default_vocoder": null, "default_vocoder": null,
"commit": null, "commit": null,
"description": "Single speaker Bangla female model. For more information -> https://github.com/mobassir94/comprehensive-bangla-tts", "description": "Single speaker Bangla female model. For more information -> https://github.com/mobassir94/comprehensive-bangla-tts",
@ -834,16 +847,16 @@
"mai": { "mai": {
"multiband-melgan": { "multiband-melgan": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--uk--mai--multiband-melgan.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--uk--mai--multiband-melgan.zip",
"author":"@robinhad", "author": "@robinhad",
"commit": "bdab788d", "commit": "bdab788d",
"license": "MIT", "license": "MIT",
"contact": "" "contact": ""
} }
} }
}, },
"tr":{ "tr": {
"common-voice": { "common-voice": {
"hifigan":{ "hifigan": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--tr--common-voice--hifigan.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--tr--common-voice--hifigan.zip",
"description": "HifiGAN model using an unknown speaker from the Common-Voice dataset.", "description": "HifiGAN model using an unknown speaker from the Common-Voice dataset.",
"author": "Fatih Akademi", "author": "Fatih Akademi",
@ -853,10 +866,10 @@
} }
} }
}, },
"voice_conversion_models":{ "voice_conversion_models": {
"multilingual":{ "multilingual": {
"vctk":{ "vctk": {
"freevc24":{ "freevc24": {
"github_rls_url": "https://coqui.gateway.scarf.sh/v0.13.0_models/voice_conversion_models--multilingual--vctk--freevc24.zip", "github_rls_url": "https://coqui.gateway.scarf.sh/v0.13.0_models/voice_conversion_models--multilingual--vctk--freevc24.zip",
"description": "FreeVC model trained on VCTK dataset from https://github.com/OlaWod/FreeVC", "description": "FreeVC model trained on VCTK dataset from https://github.com/OlaWod/FreeVC",
"author": "Jing-Yi Li @OlaWod", "author": "Jing-Yi Li @OlaWod",

View File

@ -342,7 +342,7 @@ class TTS:
def download_model_by_name(self, model_name: str): def download_model_by_name(self, model_name: str):
model_path, config_path, model_item = self.manager.download_model(model_name) model_path, config_path, model_item = self.manager.download_model(model_name)
if isinstance(model_item["github_rls_url"], list): if isinstance(model_item["model_url"], list):
# return model directory if there are multiple files # return model directory if there are multiple files
# we assume that the model knows how to load itself # we assume that the model knows how to load itself
return None, None, None, None, model_path return None, None, None, None, model_path
@ -580,6 +580,8 @@ class TTS:
Speed factor to use for 🐸Coqui Studio models, between 0.0 and 2.0. Defaults to None. Speed factor to use for 🐸Coqui Studio models, between 0.0 and 2.0. Defaults to None.
file_path (str, optional): file_path (str, optional):
Output file path. Defaults to "output.wav". Output file path. Defaults to "output.wav".
kwargs (dict, optional):
Additional arguments for the model.
""" """
self._check_arguments(speaker=speaker, language=language, speaker_wav=speaker_wav, **kwargs) self._check_arguments(speaker=speaker, language=language, speaker_wav=speaker_wav, **kwargs)

View File

@ -5,11 +5,14 @@ from typing import Dict
from TTS.tts.configs.shared_configs import BaseTTSConfig from TTS.tts.configs.shared_configs import BaseTTSConfig
from TTS.tts.layers.bark.model import GPTConfig from TTS.tts.layers.bark.model import GPTConfig
from TTS.tts.layers.bark.model_fine import FineGPTConfig from TTS.tts.layers.bark.model_fine import FineGPTConfig
from TTS.tts.models.bark import BarkAudioConfig
from TTS.utils.generic_utils import get_user_data_dir from TTS.utils.generic_utils import get_user_data_dir
@dataclass @dataclass
class BarkConfig(BaseTTSConfig): class BarkConfig(BaseTTSConfig):
model: str = "bark"
audio: BarkAudioConfig = BarkAudioConfig()
num_chars: int = 0 num_chars: int = 0
semantic_config: GPTConfig = GPTConfig() semantic_config: GPTConfig = GPTConfig()
fine_config: FineGPTConfig = FineGPTConfig() fine_config: FineGPTConfig = FineGPTConfig()
@ -31,7 +34,7 @@ class BarkConfig(BaseTTSConfig):
COARSE_SEMANTIC_PAD_TOKEN: int = 12_048 COARSE_SEMANTIC_PAD_TOKEN: int = 12_048
COARSE_INFER_TOKEN: int = 12_050 COARSE_INFER_TOKEN: int = 12_050
REMOTE_BASE_URL = "https://dl.suno-models.io/bark/models/v0/" REMOTE_BASE_URL = "https://huggingface.co/erogol/bark/tree/main/"
REMOTE_MODEL_PATHS: Dict = None REMOTE_MODEL_PATHS: Dict = None
LOCAL_MODEL_PATHS: Dict = None LOCAL_MODEL_PATHS: Dict = None
SMALL_REMOTE_MODEL_PATHS: Dict = None SMALL_REMOTE_MODEL_PATHS: Dict = None

View File

@ -52,7 +52,7 @@ def load_voice(voice: str, extra_voice_dirs: List[str] = []):
return semantic, coarse, fine return semantic, coarse, fine
if voice == "random": if voice == "random":
return None, None return None, None, None
voices = get_voices(extra_voice_dirs) voices = get_voices(extra_voice_dirs)
try: try:
@ -183,7 +183,7 @@ def generate_text_semantic(
assert isinstance(text, str) assert isinstance(text, str)
text = _normalize_whitespace(text) text = _normalize_whitespace(text)
assert len(text.strip()) > 0 assert len(text.strip()) > 0
if history_prompt is not None or base is not None: if all(v is not None for v in history_prompt) or base is not None:
if history_prompt is not None: if history_prompt is not None:
semantic_history = history_prompt[0] semantic_history = history_prompt[0]
if base is not None: if base is not None:
@ -327,7 +327,7 @@ def generate_coarse(
model.config.COARSE_RATE_HZ / model.config.SEMANTIC_RATE_HZ * model.config.N_COARSE_CODEBOOKS model.config.COARSE_RATE_HZ / model.config.SEMANTIC_RATE_HZ * model.config.N_COARSE_CODEBOOKS
) )
max_semantic_history = int(np.floor(max_coarse_history / semantic_to_coarse_ratio)) max_semantic_history = int(np.floor(max_coarse_history / semantic_to_coarse_ratio))
if history_prompt is not None or base is not None: if all(v is not None for v in history_prompt) or base is not None:
if history_prompt is not None: if history_prompt is not None:
x_history = history_prompt x_history = history_prompt
x_semantic_history = x_history[0] x_semantic_history = x_history[0]
@ -477,7 +477,7 @@ def generate_fine(
and x_coarse_gen.min() >= 0 and x_coarse_gen.min() >= 0
and x_coarse_gen.max() <= model.config.CODEBOOK_SIZE - 1 and x_coarse_gen.max() <= model.config.CODEBOOK_SIZE - 1
) )
if history_prompt is not None or base is not None: if all(v is not None for v in history_prompt) or base is not None:
if history_prompt is not None: if history_prompt is not None:
x_fine_history = history_prompt[2] x_fine_history = history_prompt[2]
if base is not None: if base is not None:
@ -572,4 +572,4 @@ def codec_decode(fine_tokens, model):
emb = model.encodec.quantizer.decode(arr) emb = model.encodec.quantizer.decode(arr)
out = model.encodec.decoder(emb) out = model.encodec.decoder(emb)
audio_arr = out.detach().cpu().numpy().squeeze() audio_arr = out.detach().cpu().numpy().squeeze()
save_wav(path="test.wav", wav=audio_arr, sample_rate=model.config.sample_rate) return audio_arr

View File

@ -4,6 +4,7 @@ Much of this code is adapted from Andrej Karpathy's NanoGPT
""" """
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from coqpit import Coqpit
import torch import torch
import torch.nn as nn import torch.nn as nn
@ -131,7 +132,7 @@ class Block(nn.Module):
@dataclass @dataclass
class GPTConfig: class GPTConfig(Coqpit):
block_size: int = 1024 block_size: int = 1024
input_vocab_size: int = 10_048 input_vocab_size: int = 10_048
output_vocab_size: int = 10_048 output_vocab_size: int = 10_048