diff --git a/TTS/.models.json b/TTS/.models.json index ba7b5f62..7332ccb3 100644 --- a/TTS/.models.json +++ b/TTS/.models.json @@ -4,6 +4,19 @@ "multi-dataset": { "xtts_v1": { "description": "XTTS-v1 by Coqui with 13 languages and cross-language voice cloning.", + "hf_url": [ + "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/model.pth", + "https://huggingface.co/coqui/XTTS-v1/resolve/xtts_v1/config.json", + "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/vocab.json" + ], + "default_vocoder": null, + "commit": "e5140314", + "license": "CPML", + "contact": "info@coqui.ai", + "tos_required": true + }, + "xtts_v1.1": { + "description": "XTTS-v1.1 by Coqui with 13 languages and cross-language voice cloning with faster inference and streaming support.", "hf_url": [ "https://huggingface.co/coqui/XTTS-v1/resolve/hifigan/model.pth", "https://huggingface.co/coqui/XTTS-v1/resolve/hifigan/config.json", diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 2b480744..0d20c610 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -203,7 +203,7 @@ class XttsArgs(Coqpit): clvp_checkpoint (str, optional): The checkpoint for the ConditionalLatentVariablePerseq model. Defaults to None. decoder_checkpoint (str, optional): The checkpoint for the DiffTTS model. Defaults to None. num_chars (int, optional): The maximum number of characters to generate. Defaults to 255. - use_hifigan (bool, optional): Whether to use hifigan or diffusion + univnet as a decoder. Defaults to True. + use_hifigan (bool, optional): Whether to use hifigan or diffusion + univnet as a decoder. Defaults to False. For GPT model: ar_max_audio_tokens (int, optional): The maximum mel tokens for the autoregressive model. Defaults to 604. @@ -238,7 +238,7 @@ class XttsArgs(Coqpit): clvp_checkpoint: str = None decoder_checkpoint: str = None num_chars: int = 255 - use_hifigan: bool = True + use_hifigan: bool = False # XTTS GPT Encoder params tokenizer_file: str = "" diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md index ff6bcf97..04d0f72f 100644 --- a/docs/source/models/xtts.md +++ b/docs/source/models/xtts.md @@ -33,7 +33,7 @@ You can also mail us at info@coqui.ai. ```python from TTS.api import TTS -tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True) +tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1.1", gpu=True) # generate speech by cloning a voice using default settings tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", @@ -45,7 +45,7 @@ tts.tts_to_file(text="It took me quite a long time to develop a voice, and now t #### 🐸TTS Command line ```console - tts --model_name tts_models/multilingual/multi-dataset/xtts_v1 \ + tts --model_name tts_models/multilingual/multi-dataset/xtts_v1.1 \ --text "Bugün okula gitmek istemiyorum." \ --speaker_wav /path/to/target/speaker.wav \ --language_idx tr \