docs(bark): update docstrings and type hints

This commit is contained in:
Enno Hermann 2024-05-15 22:56:55 +02:00
parent 59a6c9fdf2
commit 018f1e6453
2 changed files with 10 additions and 6 deletions

View File

@ -2,10 +2,11 @@ import logging
import os
import re
from glob import glob
from typing import Dict, List
from typing import Dict, List, Optional, Tuple
import librosa
import numpy as np
import numpy.typing as npt
import torch
import torchaudio
import tqdm
@ -48,7 +49,7 @@ def get_voices(extra_voice_dirs: List[str] = []): # pylint: disable=dangerous-d
return voices
def load_npz(npz_file):
def load_npz(npz_file: str) -> Tuple[npt.NDArray[np.int64], npt.NDArray[np.int64], npt.NDArray[np.int64]]:
x_history = np.load(npz_file)
semantic = x_history["semantic_prompt"]
coarse = x_history["coarse_prompt"]
@ -56,7 +57,11 @@ def load_npz(npz_file):
return semantic, coarse, fine
def load_voice(model, voice: str, extra_voice_dirs: List[str] = []): # pylint: disable=dangerous-default-value
def load_voice(
model, voice: str, extra_voice_dirs: List[str] = []
) -> Tuple[
Optional[npt.NDArray[np.int64]], Optional[npt.NDArray[np.int64]], Optional[npt.NDArray[np.int64]]
]: # pylint: disable=dangerous-default-value
if voice == "random":
return None, None, None
@ -107,11 +112,10 @@ def generate_voice(
model,
output_path,
):
"""Generate a new voice from a given audio and text prompt.
"""Generate a new voice from a given audio.
Args:
audio (np.ndarray): The audio to use as a base for the new voice.
text (str): Transcription of the audio you are clonning.
model (BarkModel): The BarkModel to use for generating the new voice.
output_path (str): The path to save the generated voice to.
"""

View File

@ -164,7 +164,7 @@ class Bark(BaseTTS):
return audio_arr, [x_semantic, c, f]
def generate_voice(self, audio, speaker_id, voice_dir):
"""Generate a voice from the given audio and text.
"""Generate a voice from the given audio.
Args:
audio (str): Path to the audio file.