docs(bark): update docstrings and type hints

2024-05-15 22:56:55 +02:00 · 2024-05-15 22:56:55 +02:00 · 018f1e6453
parent 59a6c9fdf2
commit 018f1e6453
2 changed files with 10 additions and 6 deletions
--- a/TTS/tts/layers/bark/inference_funcs.py
+++ b/TTS/tts/layers/bark/inference_funcs.py
@ -2,10 +2,11 @@ import logging
 import os
 import re
 from glob import glob
-from typing import Dict, List
+from typing import Dict, List, Optional, Tuple

 import librosa
 import numpy as np
+import numpy.typing as npt
 import torch
 import torchaudio
 import tqdm
@ -48,7 +49,7 @@ def get_voices(extra_voice_dirs: List[str] = []):  # pylint: disable=dangerous-d
    return voices


-def load_npz(npz_file):
+def load_npz(npz_file: str) -> Tuple[npt.NDArray[np.int64], npt.NDArray[np.int64], npt.NDArray[np.int64]]:
    x_history = np.load(npz_file)
    semantic = x_history["semantic_prompt"]
    coarse = x_history["coarse_prompt"]
@ -56,7 +57,11 @@ def load_npz(npz_file):
    return semantic, coarse, fine


-def load_voice(model, voice: str, extra_voice_dirs: List[str] = []):  # pylint: disable=dangerous-default-value
+def load_voice(
+    model, voice: str, extra_voice_dirs: List[str] = []
+) -> Tuple[
+    Optional[npt.NDArray[np.int64]], Optional[npt.NDArray[np.int64]], Optional[npt.NDArray[np.int64]]
+]:  # pylint: disable=dangerous-default-value
    if voice == "random":
        return None, None, None

@ -107,11 +112,10 @@ def generate_voice(
    model,
    output_path,
 ):
-    """Generate a new voice from a given audio and text prompt.
+    """Generate a new voice from a given audio.

    Args:
        audio (np.ndarray): The audio to use as a base for the new voice.
-        text (str): Transcription of the audio you are clonning.
        model (BarkModel): The BarkModel to use for generating the new voice.
        output_path (str): The path to save the generated voice to.
    """
--- a/TTS/tts/models/bark.py
+++ b/TTS/tts/models/bark.py
@ -164,7 +164,7 @@ class Bark(BaseTTS):
        return audio_arr, [x_semantic, c, f]

    def generate_voice(self, audio, speaker_id, voice_dir):
-        """Generate a voice from the given audio and text.
+        """Generate a voice from the given audio.

        Args:
            audio (str): Path to the audio file.