fix readme (#3071)

* fix readme

* fix inference.md
This commit is contained in:
Julian Weber 2023-10-17 10:27:11 +02:00 committed by GitHub
parent dcce1644b7
commit d21f15cc85
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 21 additions and 14 deletions

View File

@ -198,17 +198,18 @@ from TTS.api import TTS
# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"
# List available 🐸TTS models and choose the first one
model_name = TTS().list_models()[0]
# List available 🐸TTS models
print(TTS().list_models())
# Init TTS
tts = TTS(model_name).to(device)
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1").to(device)
# Run TTS
# ❗ Since this model is multi-speaker and multi-lingual, we must set the target speaker and the language
# Text to speech with a numpy output
wav = tts.tts("This is a test! This is also a test!!", speaker=tts.speakers[0], language=tts.languages[0])
# ❗ Since this model is multi-lingual voice cloning model, we must set the target speaker_wav and language
# Text to speech list of amplitude values as output
wav = tts.tts(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en")
# Text to speech to a file
tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.languages[0], file_path="output.wav")
tts.tts_to_file(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav")
```
#### Running a single speaker model

View File

@ -114,18 +114,24 @@ tts-server --model_name "<type>/<language>/<dataset>/<model_name>" \
You can run a multi-speaker and multi-lingual model in Python as
```python
import torch
from TTS.api import TTS
# List available 🐸TTS models and choose the first one
model_name = TTS().list_models()[0]
# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"
# List available 🐸TTS models
print(TTS().list_models())
# Init TTS
tts = TTS(model_name)
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1").to(device)
# Run TTS
# ❗ Since this model is multi-speaker and multi-lingual, we must set the target speaker and the language
# Text to speech with a numpy output
wav = tts.tts("This is a test! This is also a test!!", speaker=tts.speakers[0], language=tts.languages[0])
# ❗ Since this model is multi-lingual voice cloning model, we must set the target speaker_wav and language
# Text to speech list of amplitude values as output
wav = tts.tts(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en")
# Text to speech to a file
tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.languages[0], file_path="output.wav")
tts.tts_to_file(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav")
```
#### Here is an example for a single speaker model.