Update docstring

2021-11-23 14:50:19 +01:00 · 2021-11-23 14:50:19 +01:00 · ffc269eaf4
parent 06d89f93a8
commit ffc269eaf4
2 changed files with 23 additions and 1 deletions
--- a/TTS/tts/models/vits.py
+++ b/TTS/tts/models/vits.py
@ -167,6 +167,20 @@ class VitsArgs(Coqpit):
        speaker_encoder_model_path (str):
            Path to the file speaker encoder checkpoint file, to use for SCL. Defaults to "".

+        freeze_encoder (bool):
+            Freeze the encoder weigths during training. Defaults to False.
+
+        freeze_DP (bool):
+            Freeze the duration predictor weigths during training. Defaults to False.
+
+        freeze_PE (bool):
+            Freeze the posterior encoder weigths during training. Defaults to False.
+
+        freeze_flow_encoder (bool):
+            Freeze the flow encoder weigths during training. Defaults to False.
+
+        freeze_waveform_decoder (bool):
+            Freeze the waveform decoder weigths during training. Defaults to False.
    """

    num_chars: int = 100
@ -555,7 +569,8 @@ class Vits(BaseTTS):
            x_lengths (torch.tensor): Batch of input character sequence lengths.
            y (torch.tensor): Batch of input spectrograms.
            y_lengths (torch.tensor): Batch of input spectrogram lengths.
-            aux_input (dict, optional): Auxiliary inputs for multi-speaker training. Defaults to {"d_vectors": None, "speaker_ids": None}.
+            aux_input (dict, optional): Auxiliary inputs for multi-speaker and multi-lingual training. 
+                Defaults to {"d_vectors": None, "speaker_ids": None, "language_ids": None}.

        Returns:
            Dict: model outputs keyed by the output name.
@ -567,6 +582,7 @@ class Vits(BaseTTS):
            - y_lengths: :math:`[B]`
            - d_vectors: :math:`[B, C, 1]`
            - speaker_ids: :math:`[B]`
+            - language_ids: :math:`[B]`
        """
        outputs = {}
        sid, g, lid = self._set_cond_input(aux_input)
--- a/TTS/tts/utils/synthesis.py
+++ b/TTS/tts/utils/synthesis.py
@ -249,6 +249,12 @@ def synthesis(
        d_vector (torch.Tensor):
            d-vector for multi-speaker models in share :math:`[1, D]`. Defaults to None.

+        language_id (int):
+            Language ID passed to the language embedding layer in multi-langual model. Defaults to None.
+
+        language_name (str):
+            Language name corresponding to the language code used by the phonemizer. Defaults to None.
+
        backend (str):
            tf or torch. Defaults to "torch".
    """