From d832d6b08a0ef5be9fd26ff0b11e2491f512d6b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Tue, 3 May 2022 13:53:06 +0200 Subject: [PATCH] Make style --- TTS/tts/models/vits.py | 2 ++ recipes/ljspeech/vits_tts/train_vits.py | 9 ++------- recipes/vctk/vits/train_vits.py | 9 ++------- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index 534fa96d..f58884a0 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -204,6 +204,7 @@ def wav_to_mel(y, n_fft, num_mels, sample_rate, hop_length, win_length, fmin, fm # CONFIGS ############################# + class VitsAudioConfig(Coqpit): sample_rate: int = 22050 win_length: int = 1024 @@ -212,6 +213,7 @@ class VitsAudioConfig(Coqpit): mel_fmin: int = 0 mel_fmax: int = None + ############################## # DATASET ############################## diff --git a/recipes/ljspeech/vits_tts/train_vits.py b/recipes/ljspeech/vits_tts/train_vits.py index b059fee4..94e230a1 100644 --- a/recipes/ljspeech/vits_tts/train_vits.py +++ b/recipes/ljspeech/vits_tts/train_vits.py @@ -14,12 +14,7 @@ dataset_config = BaseDatasetConfig( name="ljspeech", meta_file_train="metadata.csv", path=os.path.join(output_path, "../LJSpeech-1.1/") ) audio_config = VitsAudioConfig( - sample_rate=22050, - win_length=1024, - hop_length=256, - num_mels=80, - mel_fmin=0, - mel_fmax=None + sample_rate=22050, win_length=1024, hop_length=256, num_mels=80, mel_fmin=0, mel_fmax=None ) config = VitsConfig( @@ -43,7 +38,7 @@ config = VitsConfig( mixed_precision=True, output_path=output_path, datasets=[dataset_config], - cudnn_benchmark=False + cudnn_benchmark=False, ) # INITIALIZE THE AUDIO PROCESSOR diff --git a/recipes/vctk/vits/train_vits.py b/recipes/vctk/vits/train_vits.py index 1bd88509..814d0989 100644 --- a/recipes/vctk/vits/train_vits.py +++ b/recipes/vctk/vits/train_vits.py @@ -17,12 +17,7 @@ dataset_config = BaseDatasetConfig( audio_config = VitsAudioConfig( - sample_rate=22050, - win_length=1024, - hop_length=256, - num_mels=80, - mel_fmin=0, - mel_fmax=None + sample_rate=22050, win_length=1024, hop_length=256, num_mels=80, mel_fmin=0, mel_fmax=None ) vitsArgs = VitsArgs( @@ -52,7 +47,7 @@ config = VitsConfig( max_text_len=325, # change this if you have a larger VRAM than 16GB output_path=output_path, datasets=[dataset_config], - cudnn_benchmark=False + cudnn_benchmark=False, ) # INITIALIZE THE AUDIO PROCESSOR