mirror of https://github.com/coqui-ai/TTS.git
Move lang token add to tokenizer
This commit is contained in:
parent
6fa46d197d
commit
c1133724a1
|
@ -483,13 +483,10 @@ class VoiceBpeTokenizer:
|
||||||
if lang == "zh-cn":
|
if lang == "zh-cn":
|
||||||
txt = chinese_transliterate(txt)
|
txt = chinese_transliterate(txt)
|
||||||
elif lang == "ja":
|
elif lang == "ja":
|
||||||
assert txt[:4] == "[ja]", "Japanese speech should start with the [ja] token."
|
|
||||||
txt = txt[4:]
|
|
||||||
if self.katsu is None:
|
if self.katsu is None:
|
||||||
import cutlet
|
import cutlet
|
||||||
self.katsu = cutlet.Cutlet()
|
self.katsu = cutlet.Cutlet()
|
||||||
txt = japanese_cleaners(txt, self.katsu)
|
txt = japanese_cleaners(txt, self.katsu)
|
||||||
txt = "[ja]" + txt
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
return txt
|
return txt
|
||||||
|
@ -497,6 +494,7 @@ class VoiceBpeTokenizer:
|
||||||
def encode(self, txt, lang):
|
def encode(self, txt, lang):
|
||||||
if self.preprocess:
|
if self.preprocess:
|
||||||
txt = self.preprocess_text(txt, lang)
|
txt = self.preprocess_text(txt, lang)
|
||||||
|
txt = f"[{lang}]{txt}"
|
||||||
txt = txt.replace(" ", "[SPACE]")
|
txt = txt.replace(" ", "[SPACE]")
|
||||||
return self.tokenizer.encode(txt).ids
|
return self.tokenizer.encode(txt).ids
|
||||||
|
|
||||||
|
|
|
@ -610,7 +610,7 @@ class Xtts(BaseTTS):
|
||||||
decoder="hifigan",
|
decoder="hifigan",
|
||||||
**hf_generate_kwargs,
|
**hf_generate_kwargs,
|
||||||
):
|
):
|
||||||
text = f"[{language}]{text.strip().lower()}"
|
text = text.strip().lower()
|
||||||
text_tokens = torch.IntTensor(self.tokenizer.encode(text, lang=language)).unsqueeze(0).to(self.device)
|
text_tokens = torch.IntTensor(self.tokenizer.encode(text, lang=language)).unsqueeze(0).to(self.device)
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
|
@ -722,7 +722,7 @@ class Xtts(BaseTTS):
|
||||||
assert hasattr(
|
assert hasattr(
|
||||||
self, "hifigan_decoder"
|
self, "hifigan_decoder"
|
||||||
), "`inference_stream` requires use_hifigan to be set to true in the config.model_args, diffusion is too slow to stream."
|
), "`inference_stream` requires use_hifigan to be set to true in the config.model_args, diffusion is too slow to stream."
|
||||||
text = f"[{language}]{text.strip().lower()}"
|
text = text.strip().lower()
|
||||||
text_tokens = torch.IntTensor(self.tokenizer.encode(text, lang=language)).unsqueeze(0).to(self.device)
|
text_tokens = torch.IntTensor(self.tokenizer.encode(text, lang=language)).unsqueeze(0).to(self.device)
|
||||||
|
|
||||||
fake_inputs = self.gpt.compute_embeddings(
|
fake_inputs = self.gpt.compute_embeddings(
|
||||||
|
|
Loading…
Reference in New Issue