mirror of https://github.com/coqui-ai/TTS.git
Allow None pad and blank tokens
This commit is contained in:
parent
c9972e6f14
commit
8649d4fd36
|
@ -57,8 +57,8 @@ class TTSTokenizer:
|
||||||
@characters.setter
|
@characters.setter
|
||||||
def characters(self, new_characters):
|
def characters(self, new_characters):
|
||||||
self._characters = new_characters
|
self._characters = new_characters
|
||||||
self.pad_id = self.characters.char_to_id(self.characters.pad)
|
self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None
|
||||||
self.blank_id = self.characters.char_to_id(self.characters.blank)
|
self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None
|
||||||
|
|
||||||
def encode(self, text: str) -> List[int]:
|
def encode(self, text: str) -> List[int]:
|
||||||
"""Encodes a string of text as a sequence of IDs."""
|
"""Encodes a string of text as a sequence of IDs."""
|
||||||
|
@ -82,7 +82,7 @@ class TTSTokenizer:
|
||||||
text += self.characters.id_to_char(token_id)
|
text += self.characters.id_to_char(token_id)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def text_to_ids(self, text: str, language: str = None) -> List[int]:
|
def text_to_ids(self, text: str, language: str = None) -> List[int]: # pylint: disable=unused-argument
|
||||||
"""Converts a string of text to a sequence of token IDs.
|
"""Converts a string of text to a sequence of token IDs.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -137,32 +137,50 @@ class TTSTokenizer:
|
||||||
print(f"{indent}| > {char}")
|
print(f"{indent}| > {char}")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def init_from_config(config: "Coqpit"):
|
def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None):
|
||||||
"""Init Tokenizer object from config
|
"""Init Tokenizer object from config
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
config (Coqpit): Coqpit model config.
|
config (Coqpit): Coqpit model config.
|
||||||
|
characters (BaseCharacters): Defines the model character set. If not set, use the default options based on
|
||||||
|
the config values. Defaults to None.
|
||||||
"""
|
"""
|
||||||
# init cleaners
|
# init cleaners
|
||||||
if isinstance(config.text_cleaner, (str, list)):
|
if isinstance(config.text_cleaner, (str, list)):
|
||||||
text_cleaner = getattr(cleaners, config.text_cleaner)
|
text_cleaner = getattr(cleaners, config.text_cleaner)
|
||||||
|
|
||||||
|
# init characters
|
||||||
|
if characters is None:
|
||||||
|
if config.use_phonemes:
|
||||||
|
# init phoneme set
|
||||||
|
characters, new_config = IPAPhonemes().init_from_config(config)
|
||||||
|
else:
|
||||||
|
# init character set
|
||||||
|
characters, new_config = Graphemes().init_from_config(config)
|
||||||
|
else:
|
||||||
|
characters, new_config = characters.init_from_config(config)
|
||||||
|
|
||||||
|
# init phonemizer
|
||||||
phonemizer = None
|
phonemizer = None
|
||||||
if config.use_phonemes:
|
if config.use_phonemes:
|
||||||
# init phoneme set
|
|
||||||
characters = IPAPhonemes().init_from_config(config)
|
|
||||||
phonemizer_kwargs = {"language": config.phoneme_language}
|
phonemizer_kwargs = {"language": config.phoneme_language}
|
||||||
|
|
||||||
# init phonemizer
|
|
||||||
if "phonemizer" in config and config.phonemizer:
|
if "phonemizer" in config and config.phonemizer:
|
||||||
phonemizer = get_phonemizer_by_name(config.phonemizer, **phonemizer_kwargs)
|
phonemizer = get_phonemizer_by_name(config.phonemizer, **phonemizer_kwargs)
|
||||||
else:
|
else:
|
||||||
phonemizer = get_phonemizer_by_name(
|
try:
|
||||||
DEF_LANG_TO_PHONEMIZER[config.phoneme_language], **phonemizer_kwargs
|
phonemizer = get_phonemizer_by_name(
|
||||||
)
|
DEF_LANG_TO_PHONEMIZER[config.phoneme_language], **phonemizer_kwargs
|
||||||
else:
|
)
|
||||||
# init character set
|
except KeyError as e:
|
||||||
characters = Graphemes().init_from_config(config)
|
raise ValueError(
|
||||||
return TTSTokenizer(
|
f"""No phonemizer found for language {config.phoneme_language}.
|
||||||
config.use_phonemes, text_cleaner, characters, phonemizer, config.add_blank, config.enable_eos_bos_chars
|
You may need to install a third party library for this language."""
|
||||||
|
) from e
|
||||||
|
|
||||||
|
return (
|
||||||
|
TTSTokenizer(
|
||||||
|
config.use_phonemes, text_cleaner, characters, phonemizer, config.add_blank, config.enable_eos_bos_chars
|
||||||
|
),
|
||||||
|
new_config,
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue