From 03c347b7f3f5af29027ad1919c809d4e6cf434c8 Mon Sep 17 00:00:00 2001
From: Eren G??lge <egolge@coqui.ai>
Date: Wed, 21 Jun 2023 11:58:18 +0200
Subject: [PATCH] Update Bark Config

---
 TTS/bin/synthesize.py          |  2 +-
 TTS/tts/configs/bark_config.py | 39 ++++++++++++++++++++++++++++++++--
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py
index 8a7e178d..0334c023 100755
--- a/TTS/bin/synthesize.py
+++ b/TTS/bin/synthesize.py
@@ -356,7 +356,7 @@ If you don't specify any models, then it uses LJSpeech based English model.
             vc_config_path = config_path
 
         # tts model with multiple files to be loaded from the directory path
-        if isinstance(model_item["github_rls_url"], list):
+        if isinstance(model_item["model_url"], list):
             model_dir = model_path
             tts_path = None
             tts_config_path = None
diff --git a/TTS/tts/configs/bark_config.py b/TTS/tts/configs/bark_config.py
index 57ccf2d0..943f3dea 100644
--- a/TTS/tts/configs/bark_config.py
+++ b/TTS/tts/configs/bark_config.py
@@ -1,5 +1,5 @@
 import os
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import Dict
 
 from TTS.tts.configs.shared_configs import BaseTTSConfig
@@ -11,6 +11,40 @@ from TTS.utils.generic_utils import get_user_data_dir
 
 @dataclass
 class BarkConfig(BaseTTSConfig):
+    """ Bark TTS configuration
+
+    Args:
+        model (str): model name that registers the model.
+        audio (BarkAudioConfig): audio configuration. Defaults to BarkAudioConfig().
+        num_chars (int): number of characters in the alphabet. Defaults to 0.
+        semantic_config (GPTConfig): semantic configuration. Defaults to GPTConfig().
+        fine_config (FineGPTConfig): fine configuration. Defaults to FineGPTConfig().
+        coarse_config (GPTConfig): coarse configuration. Defaults to GPTConfig().
+        CONTEXT_WINDOW_SIZE (int): GPT context window size. Defaults to 1024.
+        SEMANTIC_RATE_HZ (float): semantic tokens rate in Hz. Defaults to 49.9.
+        SEMANTIC_VOCAB_SIZE (int): semantic vocabulary size. Defaults to 10_000.
+        CODEBOOK_SIZE (int): encodec codebook size. Defaults to 1024.
+        N_COARSE_CODEBOOKS (int): number of coarse codebooks. Defaults to 2.
+        N_FINE_CODEBOOKS (int): number of fine codebooks. Defaults to 8.
+        COARSE_RATE_HZ (int): coarse tokens rate in Hz. Defaults to 75.
+        SAMPLE_RATE (int): sample rate. Defaults to 24_000.
+        USE_SMALLER_MODELS (bool): use smaller models. Defaults to False.
+        TEXT_ENCODING_OFFSET (int): text encoding offset. Defaults to 10_048.
+        SEMANTIC_PAD_TOKEN (int): semantic pad token. Defaults to 10_000.
+        TEXT_PAD_TOKEN ([type]): text pad token. Defaults to 10_048.
+        TEXT_EOS_TOKEN ([type]): text end of sentence token. Defaults to 10_049.
+        TEXT_SOS_TOKEN ([type]): text start of sentence token. Defaults to 10_050.
+        SEMANTIC_INFER_TOKEN (int): semantic infer token. Defaults to 10_051.
+        COARSE_SEMANTIC_PAD_TOKEN (int): coarse semantic pad token. Defaults to 12_048.
+        COARSE_INFER_TOKEN (int): coarse infer token. Defaults to 12_050.
+        REMOTE_BASE_URL ([type]): remote base url. Defaults to "https://huggingface.co/erogol/bark/tree".
+        REMOTE_MODEL_PATHS (Dict): remote model paths. Defaults to None.
+        LOCAL_MODEL_PATHS (Dict): local model paths. Defaults to None.
+        SMALL_REMOTE_MODEL_PATHS (Dict): small remote model paths. Defaults to None.
+        CACHE_DIR (str): local cache directory. Defaults to get_user_data_dir().
+        DEF_SPEAKER_DIR (str): default speaker directory to stoke speaker values for voice cloning. Defaults to get_user_data_dir().
+    """
+
     model: str = "bark"
     audio: BarkAudioConfig = BarkAudioConfig()
     num_chars: int = 0
@@ -39,6 +73,7 @@ class BarkConfig(BaseTTSConfig):
     LOCAL_MODEL_PATHS: Dict = None
     SMALL_REMOTE_MODEL_PATHS: Dict = None
     CACHE_DIR: str = str(get_user_data_dir("tts/suno/bark_v0"))
+    DEF_SPEAKER_DIR: str = str(get_user_data_dir("tts/bark_v0/speakers"))
 
     def __post_init__(self):
         self.REMOTE_MODEL_PATHS = {
@@ -67,4 +102,4 @@ class BarkConfig(BaseTTSConfig):
             "coarse": {"path": os.path.join(self.REMOTE_BASE_URL, "coarse.pt")},
             "fine": {"path": os.path.join(self.REMOTE_BASE_URL, "fine.pt")},
         }
-        self.sample_rate = self.SAMPLE_RATE
+        self.sample_rate = self.SAMPLE_RATE  # pylint: disable=attribute-defined-outside-init