diff --git a/recipes/vctk/delightful_tts/train_delightful_tts.py b/recipes/vctk/delightful_tts/train_delightful_tts.py index 19803035..e03ed2b7 100644 --- a/recipes/vctk/delightful_tts/train_delightful_tts.py +++ b/recipes/vctk/delightful_tts/train_delightful_tts.py @@ -1,22 +1,20 @@ import os -from clearml import Task from trainer import Trainer, TrainerArgs from TTS.config.shared_configs import BaseDatasetConfig from TTS.tts.configs.delightful_tts_config import DelightfulTtsAudioConfig, DelightfulTTSConfig from TTS.tts.datasets import load_tts_samples -from TTS.tts.models.delightful_tts import DelightfulTtsArgs, DelightfulTTSE2e, VocoderConfig +from TTS.tts.models.delightful_tts import DelightfulTtsArgs, DelightfulTTS, VocoderConfig from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.utils.audio.processor import AudioProcessor -task = Task.init(project_name="delightful-tts", task_name="vctk") data_path = "/raid/datasets/vctk_v092_48khz_removed_silence_silero_vad" output_path = os.path.dirname(os.path.abspath(__file__)) -dataset_config = BaseDatasetConfig(dataset_name="vctk", meta_file_train="", path=data_path, language="en-us") +dataset_config = BaseDatasetConfig(dataset_name="vctk", formatter="vctk", meta_file_train="", path=data_path, language="en-us") audio_config = DelightfulTtsAudioConfig() @@ -25,7 +23,7 @@ model_args = DelightfulTtsArgs() vocoder_config = VocoderConfig() something_tts_config = DelightfulTTSConfig( - run_name="delightful_tts_e2e_ljspeech", + run_name="delightful_tts_vctk", run_description="Train like in delightful tts paper.", model_args=model_args, audio=audio_config, @@ -75,7 +73,7 @@ speaker_manager.set_ids_from_data(train_samples + eval_samples, parse_key="speak config.model_args.num_speakers = speaker_manager.num_speakers -model = DelightfulTTSE2e( +model = DelightfulTTS( ap=ap, config=config, tokenizer=tokenizer, speaker_manager=speaker_manager, emotion_manager=None )