Fix multilingual recipe (#1354)

This commit is contained in:
Edresson Casanova 2022-03-09 12:18:17 -03:00 committed by GitHub
parent c670365507
commit d792b78703
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 21 additions and 10 deletions

View File

@ -6,9 +6,11 @@ from trainer import Trainer, TrainerArgs
from TTS.config.shared_configs import BaseAudioConfig from TTS.config.shared_configs import BaseAudioConfig
from TTS.tts.configs.shared_configs import BaseDatasetConfig from TTS.tts.configs.shared_configs import BaseDatasetConfig
from TTS.tts.configs.vits_config import VitsConfig from TTS.tts.configs.vits_config import VitsConfig
from TTS.tts.models.vits import CharactersConfig
from TTS.tts.datasets import load_tts_samples from TTS.tts.datasets import load_tts_samples
from TTS.tts.models.vits import Vits, VitsArgs from TTS.tts.models.vits import Vits, VitsArgs
from TTS.tts.utils.languages import LanguageManager from TTS.tts.utils.languages import LanguageManager
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.speakers import SpeakerManager
from TTS.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
@ -73,15 +75,16 @@ config = VitsConfig(
max_audio_len=160000, max_audio_len=160000,
output_path=output_path, output_path=output_path,
datasets=dataset_config, datasets=dataset_config,
characters={ characters=CharactersConfig(
"pad": "_", characters_class="TTS.tts.models.vits.VitsCharacters",
"eos": "&", pad="<PAD>",
"bos": "*", eos="<EOS>",
"characters": "'(),-.:;¿?abcdefghijklmnopqrstuvwxyzµßàáâäåæçèéêëìíîïñòóôöùúûüąćęłńœśşźżƒабвгдежзийклмнопрстуфхцчшщъыьэюяёєіїґӧ «°±µ»$%&‘’‚“`”„", bos="<BOS>",
"punctuations": "'(),-.:;¿? ", blank="<BLNK>",
"phonemes": None, characters="'(),-.:;¿?abcdefghijklmnopqrstuvwxyzµßàáâäåæçèéêëìíîïñòóôöùúûüąćęłńœśşźżƒабвгдежзийклмнопрстуфхцчшщъыьэюяёєіїґӧ «°±µ»$%&‘’‚“`”„",
"unique": True, punctuations="'(),-.:;¿? ",
}, phonemes=None,
),
test_sentences=[ test_sentences=[
[ [
"It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
@ -100,6 +103,9 @@ config = VitsConfig(
], ],
) )
# force the convertion of the custom characters to a config attribute
config.from_dict(config.to_dict())
# init audio processor # init audio processor
ap = AudioProcessor(**config.audio.to_dict()) ap = AudioProcessor(**config.audio.to_dict())
@ -115,8 +121,13 @@ config.model_args.num_speakers = speaker_manager.num_speakers
language_manager = LanguageManager(config=config) language_manager = LanguageManager(config=config)
config.model_args.num_languages = language_manager.num_languages config.model_args.num_languages = language_manager.num_languages
# INITIALIZE THE TOKENIZER
# Tokenizer is used to convert text to sequences of token IDs.
# config is updated with the default characters if not defined in the config.
tokenizer, config = TTSTokenizer.init_from_config(config)
# init model # init model
model = Vits(config, speaker_manager, language_manager) model = Vits(config, ap, tokenizer, speaker_manager, language_manager)
# init the trainer and 🚀 # init the trainer and 🚀
trainer = Trainer( trainer = Trainer(