Implement VitsAudioConfig

This commit is contained in:
Eren Gölge 2022-05-03 13:41:51 +02:00 committed by Eren G??lge
parent c44e39d9d6
commit 0ef6bf2c28
2 changed files with 17 additions and 1 deletions

View File

@ -2,7 +2,7 @@ from dataclasses import dataclass, field
from typing import List
from TTS.tts.configs.shared_configs import BaseTTSConfig
from TTS.tts.models.vits import VitsArgs
from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
@dataclass
@ -16,6 +16,9 @@ class VitsConfig(BaseTTSConfig):
model_args (VitsArgs):
Model architecture arguments. Defaults to `VitsArgs()`.
audio (VitsAudioConfig):
Audio processing configuration. Defaults to `VitsAudioConfig()`.
grad_clip (List):
Gradient clipping thresholds for each optimizer. Defaults to `[1000.0, 1000.0]`.
@ -94,6 +97,7 @@ class VitsConfig(BaseTTSConfig):
model: str = "vits"
# model specific params
model_args: VitsArgs = field(default_factory=VitsArgs)
audio: VitsAudioConfig = VitsAudioConfig()
# optimizer
grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])

View File

@ -200,6 +200,18 @@ def wav_to_mel(y, n_fft, num_mels, sample_rate, hop_length, win_length, fmin, fm
return spec
#############################
# CONFIGS
#############################
class VitsAudioConfig(Coqpit):
sample_rate: int = 22050
win_length: int = 1024
hop_length: int = 256
num_mels: int = 80
mel_fmin: int = 0
mel_fmax: int = None
##############################
# DATASET
##############################