diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index 31d057cd..9e566e87 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -458,12 +458,12 @@ class VitsArgs(Coqpit): TTS_part_sample_rate (int): If not None this sample rate will be used for training the Posterior Encoder, flow, text_encoder and duration predictor. The decoder part (vocoder) will be - trained with the `config.audio.audio.sample_rate`. Defaults to None. + trained with the `config.audio.sample_rate`. Defaults to None. interpolate_z (bool): If `TTS_part_sample_rate` not None and this parameter True the nearest interpolation will be used to upsampling the latent variable z with the sampling rate `TTS_part_sample_rate` - to the `config.audio.audio.sample_rate`. If it is False you will need to add extra + to the `config.audio.sample_rate`. If it is False you will need to add extra `upsample_rates_decoder` to match the shape. Defaults to True. """