Preserve [ja] token of the text processing

This commit is contained in:
Julian Weber 2023-10-21 11:26:03 +02:00 committed by GitHub
parent c7a16042e3
commit dad6a7b0b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 3 additions and 0 deletions

View File

@ -483,10 +483,13 @@ class VoiceBpeTokenizer:
if lang == "zh-cn":
txt = chinese_transliterate(txt)
elif lang == "ja":
assert txt[:4] == "[ja]", "Japanese speech should start with the [ja] token."
txt = txt[4:]
if self.katsu is None:
import cutlet
self.katsu = cutlet.Cutlet()
txt = japanese_cleaners(txt, self.katsu)
txt = "[ja]" + txt
else:
raise NotImplementedError()
return txt