Fix tokenizer for punc only (#1717)

This commit is contained in:
WeberJulian 2022-07-06 22:59:41 +02:00 committed by GitHub
parent 9e00e31e37
commit 5cef6facb0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 8 additions and 1 deletions

View File

@ -137,7 +137,7 @@ class Punctuation:
# nothing have been phonemized, returns the puncs alone
if not text:
return ["".join(m.mark for m in puncs)]
return ["".join(m.punc for m in puncs)]
current = puncs[0]

View File

@ -30,6 +30,13 @@ class TestTTSTokenizer(unittest.TestCase):
test_hat = self.tokenizer_ph.ids_to_text(ids)
self.assertEqual(text_ph, test_hat)
def test_text_to_ids_phonemes_punctuation(self):
text = "..."
text_ph = self.ph.phonemize(text, separator="")
ids = self.tokenizer_ph.text_to_ids(text)
test_hat = self.tokenizer_ph.ids_to_text(ids)
self.assertEqual(text_ph, test_hat)
def test_text_to_ids_phonemes_with_eos_bos(self):
text = "Bu bir Örnek."
self.tokenizer_ph.use_eos_bos = True