Make style

2022-05-03 13:53:06 +02:00 · 2022-05-03 13:53:06 +02:00 · d832d6b08a
parent b73b445278
commit d832d6b08a
3 changed files with 6 additions and 14 deletions
--- a/TTS/tts/models/vits.py
+++ b/TTS/tts/models/vits.py
@ -204,6 +204,7 @@ def wav_to_mel(y, n_fft, num_mels, sample_rate, hop_length, win_length, fmin, fm
 # CONFIGS
 #############################
 class VitsAudioConfig(Coqpit):
    sample_rate: int = 22050
    win_length: int = 1024
@ -212,6 +213,7 @@ class VitsAudioConfig(Coqpit):
    mel_fmin: int = 0
    mel_fmax: int = None
 ##############################
 # DATASET
 ##############################
--- a/recipes/ljspeech/vits_tts/train_vits.py
+++ b/recipes/ljspeech/vits_tts/train_vits.py
@ -14,12 +14,7 @@ dataset_config = BaseDatasetConfig(
    name="ljspeech", meta_file_train="metadata.csv", path=os.path.join(output_path, "../LJSpeech-1.1/")
 )
 audio_config = VitsAudioConfig(
-    sample_rate=22050,
+    sample_rate=22050, win_length=1024, hop_length=256, num_mels=80, mel_fmin=0, mel_fmax=None
    win_length=1024,
    hop_length=256,
    num_mels=80,
    mel_fmin=0,
    mel_fmax=None
 )
 config = VitsConfig(
@ -43,7 +38,7 @@ config = VitsConfig(
    mixed_precision=True,
    output_path=output_path,
    datasets=[dataset_config],
-    cudnn_benchmark=False
+    cudnn_benchmark=False,
 )
 # INITIALIZE THE AUDIO PROCESSOR
--- a/recipes/vctk/vits/train_vits.py
+++ b/recipes/vctk/vits/train_vits.py
@ -17,12 +17,7 @@ dataset_config = BaseDatasetConfig(
 audio_config = VitsAudioConfig(
-    sample_rate=22050,
+    sample_rate=22050, win_length=1024, hop_length=256, num_mels=80, mel_fmin=0, mel_fmax=None
    win_length=1024,
    hop_length=256,
    num_mels=80,
    mel_fmin=0,
    mel_fmax=None
 )
 vitsArgs = VitsArgs(
@ -52,7 +47,7 @@ config = VitsConfig(
    max_text_len=325,  # change this if you have a larger VRAM than 16GB
    output_path=output_path,
    datasets=[dataset_config],
-    cudnn_benchmark=False
+    cudnn_benchmark=False,
 )
 # INITIALIZE THE AUDIO PROCESSOR