Update config fields for phonemizer

This commit is contained in:
Eren Gölge 2021-11-24 18:44:18 +01:00
parent d2525abe8c
commit 3eca5ad060
1 changed files with 21 additions and 19 deletions

View File

@ -50,7 +50,7 @@ class GSTConfig(Coqpit):
@dataclass @dataclass
class CharactersConfig(Coqpit): class CharactersConfig(Coqpit):
"""Defines character or phoneme set used by the model """Defines arguments for the `BaseCharacters` and its subclasses.
Args: Args:
pad (str): pad (str):
@ -62,6 +62,9 @@ class CharactersConfig(Coqpit):
bos (str): bos (str):
characters showing the beginning of a sentence. Defaults to None. characters showing the beginning of a sentence. Defaults to None.
blank (str):
Optional character used between characters by some models for better prosody. Defaults to `_blank`.
characters (str): characters (str):
character set used by the model. Characters not in this list are ignored when converting input text to character set used by the model. Characters not in this list are ignored when converting input text to
a list of sequence IDs. Defaults to None. a list of sequence IDs. Defaults to None.
@ -70,32 +73,26 @@ class CharactersConfig(Coqpit):
characters considered as punctuation as parsing the input sentence. Defaults to None. characters considered as punctuation as parsing the input sentence. Defaults to None.
phonemes (str): phonemes (str):
characters considered as parsing phonemes. Defaults to None. characters considered as parsing phonemes. This is only for backwards compat. Use `characters` for new
models. Defaults to None.
unique (bool): is_unique (bool):
remove any duplicate characters in the character lists. It is a bandaid for compatibility with the old remove any duplicate characters in the character lists. It is a bandaid for compatibility with the old
models trained with character lists with duplicates. models trained with character lists with duplicates.
is_sorted (bool):
Sort the characters in alphabetical order. Defaults to True.
""" """
pad: str = None pad: str = None
eos: str = None eos: str = None
bos: str = None bos: str = None
blank: str = None
characters: str = None characters: str = None
punctuations: str = None punctuations: str = None
phonemes: str = None phonemes: str = None
unique: bool = True # for backwards compatibility of models trained with char sets with duplicates is_unique: bool = True # for backwards compatibility of models trained with char sets with duplicates
is_sorted: bool = True
def check_values(
self,
):
"""Check config fields"""
c = asdict(self)
check_argument("pad", c, prerequest="characters", restricted=True)
check_argument("eos", c, prerequest="characters", restricted=True)
check_argument("bos", c, prerequest="characters", restricted=True)
check_argument("characters", c, prerequest="characters", restricted=True)
check_argument("phonemes", c, restricted=True)
check_argument("punctuations", c, prerequest="characters", restricted=True)
@dataclass @dataclass
@ -110,8 +107,13 @@ class BaseTTSConfig(BaseTrainingConfig):
use_phonemes (bool): use_phonemes (bool):
enable / disable phoneme use. enable / disable phoneme use.
use_espeak_phonemes (bool): phonemizer (str):
enable / disable eSpeak-compatible phonemes (only if use_phonemes = `True`). Name of the phonemizer to use. If set None, the phonemizer will be selected by `phoneme_language`.
Defaults to None.
phoneme_language (str):
Language code for the phonemizer. You can check the list of supported languages by running
`python TTS/tts/utils/text/phonemizers/__init__.py`. Defaults to None.
compute_input_seq_cache (bool): compute_input_seq_cache (bool):
enable / disable precomputation of the phoneme sequences. At the expense of some delay at the beginning of enable / disable precomputation of the phoneme sequences. At the expense of some delay at the beginning of
@ -195,7 +197,7 @@ class BaseTTSConfig(BaseTrainingConfig):
audio: BaseAudioConfig = field(default_factory=BaseAudioConfig) audio: BaseAudioConfig = field(default_factory=BaseAudioConfig)
# phoneme settings # phoneme settings
use_phonemes: bool = False use_phonemes: bool = False
use_espeak_phonemes: bool = True phonemizer: str = None
phoneme_language: str = None phoneme_language: str = None
compute_input_seq_cache: bool = False compute_input_seq_cache: bool = False
text_cleaner: str = None text_cleaner: str = None