diff --git a/docs/source/models/tortoise.md b/docs/source/models/tortoise.md index c49a0fcb..d82cab81 100644 --- a/docs/source/models/tortoise.md +++ b/docs/source/models/tortoise.md @@ -1,7 +1,7 @@ # Tortoise 🐢 Tortoise is a very expressive TTS system with impressive voice cloning capabilities. It is based on an GPT like autogressive acoustic model that converts input text to discritized acouistic tokens, a diffusion model that converts these tokens to melspeectrogram frames and a Univnet vocoder to convert the spectrograms to -the final audio signal. The important downside is that Tortoise is very slow compared to the parallel TTS models like VITS. +the final audio signal. The important downside is that Tortoise is very slow compared to the parallel TTS models like VITS. Big thanks to 👑[@manmay-nakhashi](https://github.com/manmay-nakhashi) who helped us implement Tortoise in 🐸TTS. @@ -12,7 +12,7 @@ from TTS.tts.configs.tortoise_config import TortoiseConfig from TTS.tts.models.tortoise import Tortoise config = TortoiseConfig() -model = Tortoise.inif_from_config(config) +model = Tortoise.init_from_config(config) model.load_checkpoint(config, checkpoint_dir="paths/to/models_dir/", eval=True) # with random speaker @@ -29,8 +29,8 @@ from TTS.api import TTS tts = TTS("tts_models/en/multi-dataset/tortoise-v2") # cloning `lj` voice from `TTS/tts/utils/assets/tortoise/voices/lj` -# with custom inference settings overriding defaults. -tts.tts_to_file(text="Hello, my name is Manmay , how are you?", +# with custom inference settings overriding defaults. +tts.tts_to_file(text="Hello, my name is Manmay , how are you?", file_path="output.wav", voice_dir="TTS/tts/utils/assets/tortoise/voices/", speaker="lj", @@ -38,14 +38,14 @@ tts.tts_to_file(text="Hello, my name is Manmay , how are you?", diffusion_iterations=10) # Using presets with the same voice -tts.tts_to_file(text="Hello, my name is Manmay , how are you?", +tts.tts_to_file(text="Hello, my name is Manmay , how are you?", file_path="output.wav", voice_dir="TTS/tts/utils/assets/tortoise/voices/", speaker="lj", preset="ultra_fast") # Random voice generation -tts.tts_to_file(text="Hello, my name is Manmay , how are you?", +tts.tts_to_file(text="Hello, my name is Manmay , how are you?", file_path="output.wav") ``` @@ -54,7 +54,7 @@ Using 🐸TTS Command line: ```console # cloning the `lj` voice tts --model_name tts_models/en/multi-dataset/tortoise-v2 \ ---text "This is an example." \ +--text "This is an example." \ --out_path "/data/speech_synth/coqui-tts/TTS/tests/outputs/output.wav" \ --voice_dir TTS/tts/utils/assets/tortoise/voices/ \ --speaker_idx "lj" \