From 1f9dda65adae5a6327b8c7c210d99aaae89983f6 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 17 Dec 2024 10:41:49 +0100 Subject: [PATCH] docs(xtts): show manual inference with default speakers --- docs/source/models/xtts.md | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md index 96f5bb7c..91d4b407 100644 --- a/docs/source/models/xtts.md +++ b/docs/source/models/xtts.md @@ -163,12 +163,13 @@ from TTS.api import TTS tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda") # generate speech by cloning a voice using default settings -tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", - file_path="output.wav", - speaker="Ana Florence", - language="en", - split_sentences=True - ) +tts.tts_to_file( + text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", + file_path="output.wav", + speaker="Ana Florence", + language="en", + split_sentences=True +) ``` @@ -230,6 +231,11 @@ out = model.inference( torchaudio.save("xtts.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000) ``` +You can also use the Coqui speakers: + +```python +gpt_cond_latent, speaker_embedding = model.speaker_manager.speakers["Ana Florence"].values() +``` #### Streaming manually