Add get_number_tokens

This commit is contained in:
WeberJulian 2023-10-20 09:26:07 -03:00
parent 478fe0b28d
commit 202d7477cb
2 changed files with 4 additions and 1 deletions

View File

@ -509,3 +509,6 @@ class VoiceBpeTokenizer:
def __len__(self): def __len__(self):
return self.tokenizer.get_vocab_size() return self.tokenizer.get_vocab_size()
def get_number_tokens(self):
return max(self.tokenizer.get_vocab().values()) + 1

View File

@ -312,7 +312,7 @@ class Xtts(BaseTTS):
def init_models(self): def init_models(self):
"""Initialize the models. We do it here since we need to load the tokenizer first.""" """Initialize the models. We do it here since we need to load the tokenizer first."""
if self.tokenizer.tokenizer is not None: if self.tokenizer.tokenizer is not None:
self.args.gpt_number_text_tokens = max(self.tokenizer.tokenizer.get_vocab().values()) + 1 self.args.gpt_number_text_tokens = self.tokenizer.get_number_tokens()
self.args.gpt_start_text_token = self.tokenizer.tokenizer.token_to_id("[START]") self.args.gpt_start_text_token = self.tokenizer.tokenizer.token_to_id("[START]")
self.args.gpt_stop_text_token = self.tokenizer.tokenizer.token_to_id("[STOP]") self.args.gpt_stop_text_token = self.tokenizer.tokenizer.token_to_id("[STOP]")