Fixing english tokenization

This commit is contained in:
WeberJulian 2023-10-06 17:32:21 +02:00
parent 3063846aa7
commit 1ec341857e
1 changed files with 3 additions and 2 deletions

View File

@ -223,8 +223,9 @@ class VoiceBpeTokenizer:
results = kks.convert(txt)
txt = " ".join([result["kana"] for result in results])
txt = basic_cleaners(txt)
elif lang == "en":
txt = english_cleaners(txt)
# elif lang == "en":
# txt = english_cleaners(txt)
# English cleaner remove the language tag [en]
elif lang == "ar":
txt = arabic_cleaners(txt)
elif lang == "zh-cn":