From 59a6c9fdf295e71d201efe95fbeeca9718a99bc7 Mon Sep 17 00:00:00 2001
From: Enno Hermann <enno.hermann@idiap.ch>
Date: Wed, 15 May 2024 22:56:28 +0200
Subject: [PATCH 1/2] fix(bark): add missing argument for load_voice()

Fixes https://github.com/coqui-ai/TTS/issues/2795
---
 TTS/tts/models/bark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TTS/tts/models/bark.py b/TTS/tts/models/bark.py
index 833a9093..797ebb08 100644
--- a/TTS/tts/models/bark.py
+++ b/TTS/tts/models/bark.py
@@ -174,7 +174,7 @@ class Bark(BaseTTS):
         if voice_dir is not None:
             voice_dirs = [voice_dir]
             try:
-                _ = load_voice(speaker_id, voice_dirs)
+                _ = load_voice(self, speaker_id, voice_dirs)
             except (KeyError, FileNotFoundError):
                 output_path = os.path.join(voice_dir, speaker_id + ".npz")
                 os.makedirs(voice_dir, exist_ok=True)

From 018f1e6453a88f7c8d26de1e682159c1f0aa446f Mon Sep 17 00:00:00 2001
From: Enno Hermann <enno.hermann@idiap.ch>
Date: Wed, 15 May 2024 22:56:55 +0200
Subject: [PATCH 2/2] docs(bark): update docstrings and type hints

---
 TTS/tts/layers/bark/inference_funcs.py | 14 +++++++++-----
 TTS/tts/models/bark.py                 |  2 +-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/TTS/tts/layers/bark/inference_funcs.py b/TTS/tts/layers/bark/inference_funcs.py
index f3d3fee9..b2875c7a 100644
--- a/TTS/tts/layers/bark/inference_funcs.py
+++ b/TTS/tts/layers/bark/inference_funcs.py
@@ -2,10 +2,11 @@ import logging
 import os
 import re
 from glob import glob
-from typing import Dict, List
+from typing import Dict, List, Optional, Tuple
 
 import librosa
 import numpy as np
+import numpy.typing as npt
 import torch
 import torchaudio
 import tqdm
@@ -48,7 +49,7 @@ def get_voices(extra_voice_dirs: List[str] = []):  # pylint: disable=dangerous-d
     return voices
 
 
-def load_npz(npz_file):
+def load_npz(npz_file: str) -> Tuple[npt.NDArray[np.int64], npt.NDArray[np.int64], npt.NDArray[np.int64]]:
     x_history = np.load(npz_file)
     semantic = x_history["semantic_prompt"]
     coarse = x_history["coarse_prompt"]
@@ -56,7 +57,11 @@ def load_npz(npz_file):
     return semantic, coarse, fine
 
 
-def load_voice(model, voice: str, extra_voice_dirs: List[str] = []):  # pylint: disable=dangerous-default-value
+def load_voice(
+    model, voice: str, extra_voice_dirs: List[str] = []
+) -> Tuple[
+    Optional[npt.NDArray[np.int64]], Optional[npt.NDArray[np.int64]], Optional[npt.NDArray[np.int64]]
+]:  # pylint: disable=dangerous-default-value
     if voice == "random":
         return None, None, None
 
@@ -107,11 +112,10 @@ def generate_voice(
     model,
     output_path,
 ):
-    """Generate a new voice from a given audio and text prompt.
+    """Generate a new voice from a given audio.
 
     Args:
         audio (np.ndarray): The audio to use as a base for the new voice.
-        text (str): Transcription of the audio you are clonning.
         model (BarkModel): The BarkModel to use for generating the new voice.
         output_path (str): The path to save the generated voice to.
     """
diff --git a/TTS/tts/models/bark.py b/TTS/tts/models/bark.py
index 797ebb08..cdfb5efa 100644
--- a/TTS/tts/models/bark.py
+++ b/TTS/tts/models/bark.py
@@ -164,7 +164,7 @@ class Bark(BaseTTS):
         return audio_arr, [x_semantic, c, f]
 
     def generate_voice(self, audio, speaker_id, voice_dir):
-        """Generate a voice from the given audio and text.
+        """Generate a voice from the given audio.
 
         Args:
             audio (str): Path to the audio file.