mirror of https://github.com/coqui-ai/TTS.git
fix(punctuation): correctly handle initial punctuation
Stripping and restoring initial punctuation didn't work correctly because the string-splitting caused an additional empty string to be inserted in the text list (because `".A".split(".")` => `["", "A"]`). Now, an initial empty string is skipped and relevant test cases are added. Fixes #3333
This commit is contained in:
parent
87974f917a
commit
45200d15c9
|
@ -106,11 +106,14 @@ class Punctuation:
|
|||
for idx, punc in enumerate(puncs):
|
||||
split = text.split(punc.punc)
|
||||
prefix, suffix = split[0], punc.punc.join(split[1:])
|
||||
text = suffix
|
||||
if prefix == "":
|
||||
# We don't want to insert an empty string in case of initial punctuation
|
||||
continue
|
||||
splitted_text.append(prefix)
|
||||
# if the text does not end with a punctuation, add it to the last item
|
||||
if idx == len(puncs) - 1 and len(suffix) > 0:
|
||||
splitted_text.append(suffix)
|
||||
text = suffix
|
||||
return splitted_text, puncs
|
||||
|
||||
@classmethod
|
||||
|
|
|
@ -13,6 +13,9 @@ class PunctuationTest(unittest.TestCase):
|
|||
("This, is my text to be striped from text", "This is my text to be striped from text"),
|
||||
(".", ""),
|
||||
(" . ", ""),
|
||||
("!!! Attention !!!", "Attention"),
|
||||
("!!! Attention !!! This is just a ... test.", "Attention This is just a test"),
|
||||
("!!! Attention! This is just a ... test.", "Attention This is just a test"),
|
||||
]
|
||||
|
||||
def test_get_set_puncs(self):
|
||||
|
|
Loading…
Reference in New Issue