docs(xtts): show manual inference with default speakers

2024-12-17 10:41:49 +01:00 · 2024-12-17 10:41:49 +01:00 · 1f9dda65ad
parent 9d5fc60a5d
commit 1f9dda65ad
1 changed files with 12 additions and 6 deletions
--- a/docs/source/models/xtts.md
+++ b/docs/source/models/xtts.md
@ -163,12 +163,13 @@ from TTS.api import TTS
 tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda")

 # generate speech by cloning a voice using default settings
-tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
-                file_path="output.wav",
-                speaker="Ana Florence",
-                language="en",
-                split_sentences=True
-                )
+tts.tts_to_file(
+  text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
+  file_path="output.wav",
+  speaker="Ana Florence",
+  language="en",
+  split_sentences=True
+)
 ```


@ -230,6 +231,11 @@ out = model.inference(
 torchaudio.save("xtts.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
 ```

+You can also use the Coqui speakers:
+
+```python
+gpt_cond_latent, speaker_embedding = model.speaker_manager.speakers["Ana Florence"].values()
+```

 #### Streaming manually