mirror of https://github.com/coqui-ai/TTS.git
Fix Punctuation
This commit is contained in:
parent
ff7c385838
commit
d8bdeb8b8f
|
@ -74,36 +74,36 @@ phonemes = create_phonemes(_phonemes, _punctuations, _pad, _eos, _bos, _blank)
|
||||||
class BaseCharacters:
|
class BaseCharacters:
|
||||||
"""🐸BaseCharacters class
|
"""🐸BaseCharacters class
|
||||||
|
|
||||||
Every new character class should inherit from this.
|
Every new character class should inherit from this.
|
||||||
|
|
||||||
Characters are oredered as follows ```[PAD, EOS, BOS, BLANK, CHARACTERS, PUNCTUATIONS]```.
|
Characters are oredered as follows ```[PAD, EOS, BOS, BLANK, CHARACTERS, PUNCTUATIONS]```.
|
||||||
|
|
||||||
If you need a custom order, you need to define inherit from this class and override the ```_create_vocab``` method.
|
If you need a custom order, you need to define inherit from this class and override the ```_create_vocab``` method.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
characters (str):
|
characters (str):
|
||||||
Main set of characters to be used in the vocabulary.
|
Main set of characters to be used in the vocabulary.
|
||||||
|
|
||||||
punctuations (str):
|
punctuations (str):
|
||||||
Characters to be treated as punctuation.
|
Characters to be treated as punctuation.
|
||||||
|
|
||||||
pad (str):
|
pad (str):
|
||||||
Special padding character that would be ignored by the model.
|
Special padding character that would be ignored by the model.
|
||||||
|
|
||||||
eos (str):
|
eos (str):
|
||||||
End of the sentence character.
|
End of the sentence character.
|
||||||
|
|
||||||
bos (str):
|
bos (str):
|
||||||
Beginning of the sentence character.
|
Beginning of the sentence character.
|
||||||
|
|
||||||
blank (str):
|
blank (str):
|
||||||
Optional character used between characters by some models for better prosody.
|
Optional character used between characters by some models for better prosody.
|
||||||
|
|
||||||
is_unique (bool):
|
is_unique (bool):
|
||||||
Remove duplicates from the provided characters. Defaults to True.
|
Remove duplicates from the provided characters. Defaults to True.
|
||||||
el
|
el
|
||||||
is_sorted (bool):
|
is_sorted (bool):
|
||||||
Sort the characters in alphabetical order. Only applies to `self.characters`. Defaults to True.
|
Sort the characters in alphabetical order. Only applies to `self.characters`. Defaults to True.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -196,10 +196,10 @@ el
|
||||||
if self.is_sorted:
|
if self.is_sorted:
|
||||||
_vocab = sorted(_vocab)
|
_vocab = sorted(_vocab)
|
||||||
_vocab = list(_vocab)
|
_vocab = list(_vocab)
|
||||||
_vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
|
_vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab
|
||||||
_vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
|
_vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab
|
||||||
_vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
|
_vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab
|
||||||
_vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
|
_vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
|
||||||
self._vocab = _vocab + list(self._punctuations)
|
self._vocab = _vocab + list(self._punctuations)
|
||||||
self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
|
self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
|
||||||
self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
|
self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
|
||||||
|
@ -214,7 +214,7 @@ el
|
||||||
def id_to_char(self, idx: int) -> str:
|
def id_to_char(self, idx: int) -> str:
|
||||||
return self._id_to_char[idx]
|
return self._id_to_char[idx]
|
||||||
|
|
||||||
def print_log(self, level:int=0):
|
def print_log(self, level: int = 0):
|
||||||
"""
|
"""
|
||||||
Prints the vocabulary in a nice format.
|
Prints the vocabulary in a nice format.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -91,10 +91,13 @@ class Punctuation:
|
||||||
puncs.append(_PUNC_IDX(match.group(), position))
|
puncs.append(_PUNC_IDX(match.group(), position))
|
||||||
# convert str text to a List[str], each item is separated by a punctuation
|
# convert str text to a List[str], each item is separated by a punctuation
|
||||||
splitted_text = []
|
splitted_text = []
|
||||||
for punc in puncs:
|
for idx, punc in enumerate(puncs):
|
||||||
split = text.split(punc.punc)
|
split = text.split(punc.punc)
|
||||||
prefix, suffix = split[0], punc.punc.join(split[1:])
|
prefix, suffix = split[0], punc.punc.join(split[1:])
|
||||||
splitted_text.append(prefix)
|
splitted_text.append(prefix)
|
||||||
|
# if the text does not end with a punctuation, add it to the last item
|
||||||
|
if idx == len(puncs) - 1 and len(suffix) > 0:
|
||||||
|
splitted_text.append(suffix)
|
||||||
text = suffix
|
text = suffix
|
||||||
return splitted_text, puncs
|
return splitted_text, puncs
|
||||||
|
|
||||||
|
@ -126,7 +129,7 @@ class Punctuation:
|
||||||
current = puncs[0]
|
current = puncs[0]
|
||||||
|
|
||||||
if current.position == PuncPosition.BEGIN:
|
if current.position == PuncPosition.BEGIN:
|
||||||
return cls._restore([current.mark + text[0]] + text[1:], puncs[1:], num)
|
return cls._restore([current.punc + text[0]] + text[1:], puncs[1:], num)
|
||||||
|
|
||||||
if current.position == PuncPosition.END:
|
if current.position == PuncPosition.END:
|
||||||
return [text[0] + current.punc] + cls._restore(text[1:], puncs[1:], num + 1)
|
return [text[0] + current.punc] + cls._restore(text[1:], puncs[1:], num + 1)
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
from typing import Callable, Dict, List, Union
|
from typing import Callable, Dict, List, Union
|
||||||
|
|
||||||
from TTS.tts.utils.text import cleaners
|
from TTS.tts.utils.text import cleaners
|
||||||
from TTS.tts.utils.text.phonemizers import DEF_LANG_TO_PHONEMIZER, get_phonemizer_by_name
|
|
||||||
from TTS.tts.utils.text.characters import Graphemes, IPAPhonemes
|
from TTS.tts.utils.text.characters import Graphemes, IPAPhonemes
|
||||||
|
from TTS.tts.utils.text.phonemizers import DEF_LANG_TO_PHONEMIZER, get_phonemizer_by_name
|
||||||
|
|
||||||
|
|
||||||
class TTSTokenizer:
|
class TTSTokenizer:
|
||||||
|
|
Loading…
Reference in New Issue