mirror of https://github.com/coqui-ai/TTS.git
Change tokenization order
This commit is contained in:
parent
d39878eac0
commit
2e3d61b0f8
|
@ -108,11 +108,12 @@ class TTSTokenizer:
|
||||||
text = self.text_cleaner(text)
|
text = self.text_cleaner(text)
|
||||||
if self.use_phonemes:
|
if self.use_phonemes:
|
||||||
text = self.phonemizer.phonemize(text, separator="", language=language)
|
text = self.phonemizer.phonemize(text, separator="", language=language)
|
||||||
|
text = self.encode(text)
|
||||||
if self.add_blank:
|
if self.add_blank:
|
||||||
text = self.intersperse_blank_char(text, True)
|
text = self.intersperse_blank_char(text, True)
|
||||||
if self.use_eos_bos:
|
if self.use_eos_bos:
|
||||||
text = self.pad_with_bos_eos(text)
|
text = self.pad_with_bos_eos(text)
|
||||||
return self.encode(text)
|
return text
|
||||||
|
|
||||||
def ids_to_text(self, id_sequence: List[int]) -> str:
|
def ids_to_text(self, id_sequence: List[int]) -> str:
|
||||||
"""Converts a sequence of token IDs to a string of text."""
|
"""Converts a sequence of token IDs to a string of text."""
|
||||||
|
@ -120,14 +121,14 @@ class TTSTokenizer:
|
||||||
|
|
||||||
def pad_with_bos_eos(self, char_sequence: List[str]):
|
def pad_with_bos_eos(self, char_sequence: List[str]):
|
||||||
"""Pads a sequence with the special BOS and EOS characters."""
|
"""Pads a sequence with the special BOS and EOS characters."""
|
||||||
return [self.characters.bos] + list(char_sequence) + [self.characters.eos]
|
return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id]
|
||||||
|
|
||||||
def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
|
def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False):
|
||||||
"""Intersperses the blank character between characters in a sequence.
|
"""Intersperses the blank character between characters in a sequence.
|
||||||
|
|
||||||
Use the ```blank``` character if defined else use the ```pad``` character.
|
Use the ```blank``` character if defined else use the ```pad``` character.
|
||||||
"""
|
"""
|
||||||
char_to_use = self.characters.blank if use_blank_char else self.characters.pad
|
char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad
|
||||||
result = [char_to_use] * (len(char_sequence) * 2 + 1)
|
result = [char_to_use] * (len(char_sequence) * 2 + 1)
|
||||||
result[1::2] = char_sequence
|
result[1::2] = char_sequence
|
||||||
return result
|
return result
|
||||||
|
|
Loading…
Reference in New Issue