mirror of https://github.com/coqui-ai/TTS.git
Move hifigan to XTTS_v1.1
This commit is contained in:
parent
0520697b5f
commit
771360d4a2
|
@ -4,6 +4,19 @@
|
||||||
"multi-dataset": {
|
"multi-dataset": {
|
||||||
"xtts_v1": {
|
"xtts_v1": {
|
||||||
"description": "XTTS-v1 by Coqui with 13 languages and cross-language voice cloning.",
|
"description": "XTTS-v1 by Coqui with 13 languages and cross-language voice cloning.",
|
||||||
|
"hf_url": [
|
||||||
|
"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/model.pth",
|
||||||
|
"https://huggingface.co/coqui/XTTS-v1/resolve/xtts_v1/config.json",
|
||||||
|
"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/vocab.json"
|
||||||
|
],
|
||||||
|
"default_vocoder": null,
|
||||||
|
"commit": "e5140314",
|
||||||
|
"license": "CPML",
|
||||||
|
"contact": "info@coqui.ai",
|
||||||
|
"tos_required": true
|
||||||
|
},
|
||||||
|
"xtts_v1.1": {
|
||||||
|
"description": "XTTS-v1.1 by Coqui with 13 languages and cross-language voice cloning with faster inference and streaming support.",
|
||||||
"hf_url": [
|
"hf_url": [
|
||||||
"https://huggingface.co/coqui/XTTS-v1/resolve/hifigan/model.pth",
|
"https://huggingface.co/coqui/XTTS-v1/resolve/hifigan/model.pth",
|
||||||
"https://huggingface.co/coqui/XTTS-v1/resolve/hifigan/config.json",
|
"https://huggingface.co/coqui/XTTS-v1/resolve/hifigan/config.json",
|
||||||
|
|
|
@ -203,7 +203,7 @@ class XttsArgs(Coqpit):
|
||||||
clvp_checkpoint (str, optional): The checkpoint for the ConditionalLatentVariablePerseq model. Defaults to None.
|
clvp_checkpoint (str, optional): The checkpoint for the ConditionalLatentVariablePerseq model. Defaults to None.
|
||||||
decoder_checkpoint (str, optional): The checkpoint for the DiffTTS model. Defaults to None.
|
decoder_checkpoint (str, optional): The checkpoint for the DiffTTS model. Defaults to None.
|
||||||
num_chars (int, optional): The maximum number of characters to generate. Defaults to 255.
|
num_chars (int, optional): The maximum number of characters to generate. Defaults to 255.
|
||||||
use_hifigan (bool, optional): Whether to use hifigan or diffusion + univnet as a decoder. Defaults to True.
|
use_hifigan (bool, optional): Whether to use hifigan or diffusion + univnet as a decoder. Defaults to False.
|
||||||
|
|
||||||
For GPT model:
|
For GPT model:
|
||||||
ar_max_audio_tokens (int, optional): The maximum mel tokens for the autoregressive model. Defaults to 604.
|
ar_max_audio_tokens (int, optional): The maximum mel tokens for the autoregressive model. Defaults to 604.
|
||||||
|
@ -238,7 +238,7 @@ class XttsArgs(Coqpit):
|
||||||
clvp_checkpoint: str = None
|
clvp_checkpoint: str = None
|
||||||
decoder_checkpoint: str = None
|
decoder_checkpoint: str = None
|
||||||
num_chars: int = 255
|
num_chars: int = 255
|
||||||
use_hifigan: bool = True
|
use_hifigan: bool = False
|
||||||
|
|
||||||
# XTTS GPT Encoder params
|
# XTTS GPT Encoder params
|
||||||
tokenizer_file: str = ""
|
tokenizer_file: str = ""
|
||||||
|
|
|
@ -33,7 +33,7 @@ You can also mail us at info@coqui.ai.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from TTS.api import TTS
|
from TTS.api import TTS
|
||||||
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True)
|
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1.1", gpu=True)
|
||||||
|
|
||||||
# generate speech by cloning a voice using default settings
|
# generate speech by cloning a voice using default settings
|
||||||
tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
|
tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
|
||||||
|
@ -45,7 +45,7 @@ tts.tts_to_file(text="It took me quite a long time to develop a voice, and now t
|
||||||
#### 🐸TTS Command line
|
#### 🐸TTS Command line
|
||||||
|
|
||||||
```console
|
```console
|
||||||
tts --model_name tts_models/multilingual/multi-dataset/xtts_v1 \
|
tts --model_name tts_models/multilingual/multi-dataset/xtts_v1.1 \
|
||||||
--text "Bugün okula gitmek istemiyorum." \
|
--text "Bugün okula gitmek istemiyorum." \
|
||||||
--speaker_wav /path/to/target/speaker.wav \
|
--speaker_wav /path/to/target/speaker.wav \
|
||||||
--language_idx tr \
|
--language_idx tr \
|
||||||
|
|
Loading…
Reference in New Issue