diff --git a/recipes/ljspeech/align_tts/train_aligntts.py b/recipes/ljspeech/align_tts/train_aligntts.py new file mode 100644 index 00000000..4a4f86c4 --- /dev/null +++ b/recipes/ljspeech/align_tts/train_aligntts.py @@ -0,0 +1,30 @@ +import os + +from TTS.tts.configs import AlignTTSConfig +from TTS.tts.configs import BaseDatasetConfig +from TTS.trainer import init_training, Trainer, TrainingArgs + + +output_path = os.path.dirname(os.path.abspath(__file__)) +dataset_config = BaseDatasetConfig(name="ljspeech", meta_file_train="metadata.csv", path=os.path.join(output_path, "../LJSpeech-1.1/")) +config = AlignTTSConfig( + batch_size=32, + eval_batch_size=16, + num_loader_workers=4, + num_eval_loader_workers=4, + run_eval=True, + test_delay_epochs=-1, + epochs=1000, + text_cleaner="english_cleaners", + use_phonemes=False, + phoneme_language="en-us", + phoneme_cache_path=os.path.join(output_path, "phoneme_cache"), + print_step=25, + print_eval=True, + mixed_precision=False, + output_path=output_path, + datasets=[dataset_config] +) +args, config, output_path, _, c_logger, tb_logger = init_training(TrainingArgs(), config) +trainer = Trainer(args, config, output_path, c_logger, tb_logger) +trainer.fit()