docs(bark): update docstrings and type hints

This commit is contained in:
Enno Hermann 2024-05-15 22:56:55 +02:00
parent 59a6c9fdf2
commit 018f1e6453
2 changed files with 10 additions and 6 deletions

View File

@ -2,10 +2,11 @@ import logging
import os import os
import re import re
from glob import glob from glob import glob
from typing import Dict, List from typing import Dict, List, Optional, Tuple
import librosa import librosa
import numpy as np import numpy as np
import numpy.typing as npt
import torch import torch
import torchaudio import torchaudio
import tqdm import tqdm
@ -48,7 +49,7 @@ def get_voices(extra_voice_dirs: List[str] = []): # pylint: disable=dangerous-d
return voices return voices
def load_npz(npz_file): def load_npz(npz_file: str) -> Tuple[npt.NDArray[np.int64], npt.NDArray[np.int64], npt.NDArray[np.int64]]:
x_history = np.load(npz_file) x_history = np.load(npz_file)
semantic = x_history["semantic_prompt"] semantic = x_history["semantic_prompt"]
coarse = x_history["coarse_prompt"] coarse = x_history["coarse_prompt"]
@ -56,7 +57,11 @@ def load_npz(npz_file):
return semantic, coarse, fine return semantic, coarse, fine
def load_voice(model, voice: str, extra_voice_dirs: List[str] = []): # pylint: disable=dangerous-default-value def load_voice(
model, voice: str, extra_voice_dirs: List[str] = []
) -> Tuple[
Optional[npt.NDArray[np.int64]], Optional[npt.NDArray[np.int64]], Optional[npt.NDArray[np.int64]]
]: # pylint: disable=dangerous-default-value
if voice == "random": if voice == "random":
return None, None, None return None, None, None
@ -107,11 +112,10 @@ def generate_voice(
model, model,
output_path, output_path,
): ):
"""Generate a new voice from a given audio and text prompt. """Generate a new voice from a given audio.
Args: Args:
audio (np.ndarray): The audio to use as a base for the new voice. audio (np.ndarray): The audio to use as a base for the new voice.
text (str): Transcription of the audio you are clonning.
model (BarkModel): The BarkModel to use for generating the new voice. model (BarkModel): The BarkModel to use for generating the new voice.
output_path (str): The path to save the generated voice to. output_path (str): The path to save the generated voice to.
""" """

View File

@ -164,7 +164,7 @@ class Bark(BaseTTS):
return audio_arr, [x_semantic, c, f] return audio_arr, [x_semantic, c, f]
def generate_voice(self, audio, speaker_id, voice_dir): def generate_voice(self, audio, speaker_id, voice_dir):
"""Generate a voice from the given audio and text. """Generate a voice from the given audio.
Args: Args:
audio (str): Path to the audio file. audio (str): Path to the audio file.