Update VITS VCTK recipe

This commit is contained in:
Eren Gölge 2022-05-03 13:44:32 +02:00 committed by Eren G??lge
parent 463d6f1b22
commit b73b445278
1 changed files with 4 additions and 13 deletions

View File

@ -2,11 +2,10 @@ import os
from trainer import Trainer, TrainerArgs from trainer import Trainer, TrainerArgs
from TTS.config.shared_configs import BaseAudioConfig
from TTS.tts.configs.shared_configs import BaseDatasetConfig from TTS.tts.configs.shared_configs import BaseDatasetConfig
from TTS.tts.configs.vits_config import VitsConfig from TTS.tts.configs.vits_config import VitsConfig
from TTS.tts.datasets import load_tts_samples from TTS.tts.datasets import load_tts_samples
from TTS.tts.models.vits import Vits, VitsArgs from TTS.tts.models.vits import Vits, VitsArgs, VitsAudioConfig
from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.speakers import SpeakerManager
from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
@ -17,22 +16,13 @@ dataset_config = BaseDatasetConfig(
) )
audio_config = BaseAudioConfig( audio_config = VitsAudioConfig(
sample_rate=22050, sample_rate=22050,
win_length=1024, win_length=1024,
hop_length=256, hop_length=256,
num_mels=80, num_mels=80,
preemphasis=0.0,
ref_level_db=20,
log_func="np.log",
do_trim_silence=True,
trim_db=23.0,
mel_fmin=0, mel_fmin=0,
mel_fmax=None, mel_fmax=None
spec_gain=1.0,
signal_norm=False,
do_amp_to_db_linear=False,
resample=True,
) )
vitsArgs = VitsArgs( vitsArgs = VitsArgs(
@ -62,6 +52,7 @@ config = VitsConfig(
max_text_len=325, # change this if you have a larger VRAM than 16GB max_text_len=325, # change this if you have a larger VRAM than 16GB
output_path=output_path, output_path=output_path,
datasets=[dataset_config], datasets=[dataset_config],
cudnn_benchmark=False
) )
# INITIALIZE THE AUDIO PROCESSOR # INITIALIZE THE AUDIO PROCESSOR