From ffc269eaf4b679d92624e7513c24971e7f7ab54e Mon Sep 17 00:00:00 2001 From: WeberJulian Date: Tue, 23 Nov 2021 14:50:19 +0100 Subject: [PATCH] Update docstring --- TTS/tts/models/vits.py | 18 +++++++++++++++++- TTS/tts/utils/synthesis.py | 6 ++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index 6b1dd325..a9d00213 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -167,6 +167,20 @@ class VitsArgs(Coqpit): speaker_encoder_model_path (str): Path to the file speaker encoder checkpoint file, to use for SCL. Defaults to "". + freeze_encoder (bool): + Freeze the encoder weigths during training. Defaults to False. + + freeze_DP (bool): + Freeze the duration predictor weigths during training. Defaults to False. + + freeze_PE (bool): + Freeze the posterior encoder weigths during training. Defaults to False. + + freeze_flow_encoder (bool): + Freeze the flow encoder weigths during training. Defaults to False. + + freeze_waveform_decoder (bool): + Freeze the waveform decoder weigths during training. Defaults to False. """ num_chars: int = 100 @@ -555,7 +569,8 @@ class Vits(BaseTTS): x_lengths (torch.tensor): Batch of input character sequence lengths. y (torch.tensor): Batch of input spectrograms. y_lengths (torch.tensor): Batch of input spectrogram lengths. - aux_input (dict, optional): Auxiliary inputs for multi-speaker training. Defaults to {"d_vectors": None, "speaker_ids": None}. + aux_input (dict, optional): Auxiliary inputs for multi-speaker and multi-lingual training. + Defaults to {"d_vectors": None, "speaker_ids": None, "language_ids": None}. Returns: Dict: model outputs keyed by the output name. @@ -567,6 +582,7 @@ class Vits(BaseTTS): - y_lengths: :math:`[B]` - d_vectors: :math:`[B, C, 1]` - speaker_ids: :math:`[B]` + - language_ids: :math:`[B]` """ outputs = {} sid, g, lid = self._set_cond_input(aux_input) diff --git a/TTS/tts/utils/synthesis.py b/TTS/tts/utils/synthesis.py index 102914c5..24b747be 100644 --- a/TTS/tts/utils/synthesis.py +++ b/TTS/tts/utils/synthesis.py @@ -249,6 +249,12 @@ def synthesis( d_vector (torch.Tensor): d-vector for multi-speaker models in share :math:`[1, D]`. Defaults to None. + language_id (int): + Language ID passed to the language embedding layer in multi-langual model. Defaults to None. + + language_name (str): + Language name corresponding to the language code used by the phonemizer. Defaults to None. + backend (str): tf or torch. Defaults to "torch". """