mirror of https://github.com/coqui-ai/TTS.git
Update Bark Config
This commit is contained in:
parent
695e862aad
commit
03c347b7f3
|
@ -356,7 +356,7 @@ If you don't specify any models, then it uses LJSpeech based English model.
|
||||||
vc_config_path = config_path
|
vc_config_path = config_path
|
||||||
|
|
||||||
# tts model with multiple files to be loaded from the directory path
|
# tts model with multiple files to be loaded from the directory path
|
||||||
if isinstance(model_item["github_rls_url"], list):
|
if isinstance(model_item["model_url"], list):
|
||||||
model_dir = model_path
|
model_dir = model_path
|
||||||
tts_path = None
|
tts_path = None
|
||||||
tts_config_path = None
|
tts_config_path = None
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import os
|
import os
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
from TTS.tts.configs.shared_configs import BaseTTSConfig
|
from TTS.tts.configs.shared_configs import BaseTTSConfig
|
||||||
|
@ -11,6 +11,40 @@ from TTS.utils.generic_utils import get_user_data_dir
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class BarkConfig(BaseTTSConfig):
|
class BarkConfig(BaseTTSConfig):
|
||||||
|
""" Bark TTS configuration
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model (str): model name that registers the model.
|
||||||
|
audio (BarkAudioConfig): audio configuration. Defaults to BarkAudioConfig().
|
||||||
|
num_chars (int): number of characters in the alphabet. Defaults to 0.
|
||||||
|
semantic_config (GPTConfig): semantic configuration. Defaults to GPTConfig().
|
||||||
|
fine_config (FineGPTConfig): fine configuration. Defaults to FineGPTConfig().
|
||||||
|
coarse_config (GPTConfig): coarse configuration. Defaults to GPTConfig().
|
||||||
|
CONTEXT_WINDOW_SIZE (int): GPT context window size. Defaults to 1024.
|
||||||
|
SEMANTIC_RATE_HZ (float): semantic tokens rate in Hz. Defaults to 49.9.
|
||||||
|
SEMANTIC_VOCAB_SIZE (int): semantic vocabulary size. Defaults to 10_000.
|
||||||
|
CODEBOOK_SIZE (int): encodec codebook size. Defaults to 1024.
|
||||||
|
N_COARSE_CODEBOOKS (int): number of coarse codebooks. Defaults to 2.
|
||||||
|
N_FINE_CODEBOOKS (int): number of fine codebooks. Defaults to 8.
|
||||||
|
COARSE_RATE_HZ (int): coarse tokens rate in Hz. Defaults to 75.
|
||||||
|
SAMPLE_RATE (int): sample rate. Defaults to 24_000.
|
||||||
|
USE_SMALLER_MODELS (bool): use smaller models. Defaults to False.
|
||||||
|
TEXT_ENCODING_OFFSET (int): text encoding offset. Defaults to 10_048.
|
||||||
|
SEMANTIC_PAD_TOKEN (int): semantic pad token. Defaults to 10_000.
|
||||||
|
TEXT_PAD_TOKEN ([type]): text pad token. Defaults to 10_048.
|
||||||
|
TEXT_EOS_TOKEN ([type]): text end of sentence token. Defaults to 10_049.
|
||||||
|
TEXT_SOS_TOKEN ([type]): text start of sentence token. Defaults to 10_050.
|
||||||
|
SEMANTIC_INFER_TOKEN (int): semantic infer token. Defaults to 10_051.
|
||||||
|
COARSE_SEMANTIC_PAD_TOKEN (int): coarse semantic pad token. Defaults to 12_048.
|
||||||
|
COARSE_INFER_TOKEN (int): coarse infer token. Defaults to 12_050.
|
||||||
|
REMOTE_BASE_URL ([type]): remote base url. Defaults to "https://huggingface.co/erogol/bark/tree".
|
||||||
|
REMOTE_MODEL_PATHS (Dict): remote model paths. Defaults to None.
|
||||||
|
LOCAL_MODEL_PATHS (Dict): local model paths. Defaults to None.
|
||||||
|
SMALL_REMOTE_MODEL_PATHS (Dict): small remote model paths. Defaults to None.
|
||||||
|
CACHE_DIR (str): local cache directory. Defaults to get_user_data_dir().
|
||||||
|
DEF_SPEAKER_DIR (str): default speaker directory to stoke speaker values for voice cloning. Defaults to get_user_data_dir().
|
||||||
|
"""
|
||||||
|
|
||||||
model: str = "bark"
|
model: str = "bark"
|
||||||
audio: BarkAudioConfig = BarkAudioConfig()
|
audio: BarkAudioConfig = BarkAudioConfig()
|
||||||
num_chars: int = 0
|
num_chars: int = 0
|
||||||
|
@ -39,6 +73,7 @@ class BarkConfig(BaseTTSConfig):
|
||||||
LOCAL_MODEL_PATHS: Dict = None
|
LOCAL_MODEL_PATHS: Dict = None
|
||||||
SMALL_REMOTE_MODEL_PATHS: Dict = None
|
SMALL_REMOTE_MODEL_PATHS: Dict = None
|
||||||
CACHE_DIR: str = str(get_user_data_dir("tts/suno/bark_v0"))
|
CACHE_DIR: str = str(get_user_data_dir("tts/suno/bark_v0"))
|
||||||
|
DEF_SPEAKER_DIR: str = str(get_user_data_dir("tts/bark_v0/speakers"))
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
self.REMOTE_MODEL_PATHS = {
|
self.REMOTE_MODEL_PATHS = {
|
||||||
|
@ -67,4 +102,4 @@ class BarkConfig(BaseTTSConfig):
|
||||||
"coarse": {"path": os.path.join(self.REMOTE_BASE_URL, "coarse.pt")},
|
"coarse": {"path": os.path.join(self.REMOTE_BASE_URL, "coarse.pt")},
|
||||||
"fine": {"path": os.path.join(self.REMOTE_BASE_URL, "fine.pt")},
|
"fine": {"path": os.path.join(self.REMOTE_BASE_URL, "fine.pt")},
|
||||||
}
|
}
|
||||||
self.sample_rate = self.SAMPLE_RATE
|
self.sample_rate = self.SAMPLE_RATE # pylint: disable=attribute-defined-outside-init
|
||||||
|
|
Loading…
Reference in New Issue