From 59a6c9fdf295e71d201efe95fbeeca9718a99bc7 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 15 May 2024 22:56:28 +0200 Subject: [PATCH 1/2] fix(bark): add missing argument for load_voice() Fixes https://github.com/coqui-ai/TTS/issues/2795 --- TTS/tts/models/bark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/tts/models/bark.py b/TTS/tts/models/bark.py index 833a9093..797ebb08 100644 --- a/TTS/tts/models/bark.py +++ b/TTS/tts/models/bark.py @@ -174,7 +174,7 @@ class Bark(BaseTTS): if voice_dir is not None: voice_dirs = [voice_dir] try: - _ = load_voice(speaker_id, voice_dirs) + _ = load_voice(self, speaker_id, voice_dirs) except (KeyError, FileNotFoundError): output_path = os.path.join(voice_dir, speaker_id + ".npz") os.makedirs(voice_dir, exist_ok=True) From 018f1e6453a88f7c8d26de1e682159c1f0aa446f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 15 May 2024 22:56:55 +0200 Subject: [PATCH 2/2] docs(bark): update docstrings and type hints --- TTS/tts/layers/bark/inference_funcs.py | 14 +++++++++----- TTS/tts/models/bark.py | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/TTS/tts/layers/bark/inference_funcs.py b/TTS/tts/layers/bark/inference_funcs.py index f3d3fee9..b2875c7a 100644 --- a/TTS/tts/layers/bark/inference_funcs.py +++ b/TTS/tts/layers/bark/inference_funcs.py @@ -2,10 +2,11 @@ import logging import os import re from glob import glob -from typing import Dict, List +from typing import Dict, List, Optional, Tuple import librosa import numpy as np +import numpy.typing as npt import torch import torchaudio import tqdm @@ -48,7 +49,7 @@ def get_voices(extra_voice_dirs: List[str] = []): # pylint: disable=dangerous-d return voices -def load_npz(npz_file): +def load_npz(npz_file: str) -> Tuple[npt.NDArray[np.int64], npt.NDArray[np.int64], npt.NDArray[np.int64]]: x_history = np.load(npz_file) semantic = x_history["semantic_prompt"] coarse = x_history["coarse_prompt"] @@ -56,7 +57,11 @@ def load_npz(npz_file): return semantic, coarse, fine -def load_voice(model, voice: str, extra_voice_dirs: List[str] = []): # pylint: disable=dangerous-default-value +def load_voice( + model, voice: str, extra_voice_dirs: List[str] = [] +) -> Tuple[ + Optional[npt.NDArray[np.int64]], Optional[npt.NDArray[np.int64]], Optional[npt.NDArray[np.int64]] +]: # pylint: disable=dangerous-default-value if voice == "random": return None, None, None @@ -107,11 +112,10 @@ def generate_voice( model, output_path, ): - """Generate a new voice from a given audio and text prompt. + """Generate a new voice from a given audio. Args: audio (np.ndarray): The audio to use as a base for the new voice. - text (str): Transcription of the audio you are clonning. model (BarkModel): The BarkModel to use for generating the new voice. output_path (str): The path to save the generated voice to. """ diff --git a/TTS/tts/models/bark.py b/TTS/tts/models/bark.py index 797ebb08..cdfb5efa 100644 --- a/TTS/tts/models/bark.py +++ b/TTS/tts/models/bark.py @@ -164,7 +164,7 @@ class Bark(BaseTTS): return audio_arr, [x_semantic, c, f] def generate_voice(self, audio, speaker_id, voice_dir): - """Generate a voice from the given audio and text. + """Generate a voice from the given audio. Args: audio (str): Path to the audio file.