mirror of https://github.com/coqui-ai/TTS.git
Implement VitsAudioConfig
This commit is contained in:
parent
c44e39d9d6
commit
0ef6bf2c28
|
@ -2,7 +2,7 @@ from dataclasses import dataclass, field
|
|||
from typing import List
|
||||
|
||||
from TTS.tts.configs.shared_configs import BaseTTSConfig
|
||||
from TTS.tts.models.vits import VitsArgs
|
||||
from TTS.tts.models.vits import VitsArgs, VitsAudioConfig
|
||||
|
||||
|
||||
@dataclass
|
||||
|
@ -16,6 +16,9 @@ class VitsConfig(BaseTTSConfig):
|
|||
model_args (VitsArgs):
|
||||
Model architecture arguments. Defaults to `VitsArgs()`.
|
||||
|
||||
audio (VitsAudioConfig):
|
||||
Audio processing configuration. Defaults to `VitsAudioConfig()`.
|
||||
|
||||
grad_clip (List):
|
||||
Gradient clipping thresholds for each optimizer. Defaults to `[1000.0, 1000.0]`.
|
||||
|
||||
|
@ -94,6 +97,7 @@ class VitsConfig(BaseTTSConfig):
|
|||
model: str = "vits"
|
||||
# model specific params
|
||||
model_args: VitsArgs = field(default_factory=VitsArgs)
|
||||
audio: VitsAudioConfig = VitsAudioConfig()
|
||||
|
||||
# optimizer
|
||||
grad_clip: List[float] = field(default_factory=lambda: [1000, 1000])
|
||||
|
|
|
@ -200,6 +200,18 @@ def wav_to_mel(y, n_fft, num_mels, sample_rate, hop_length, win_length, fmin, fm
|
|||
return spec
|
||||
|
||||
|
||||
#############################
|
||||
# CONFIGS
|
||||
#############################
|
||||
|
||||
class VitsAudioConfig(Coqpit):
|
||||
sample_rate: int = 22050
|
||||
win_length: int = 1024
|
||||
hop_length: int = 256
|
||||
num_mels: int = 80
|
||||
mel_fmin: int = 0
|
||||
mel_fmax: int = None
|
||||
|
||||
##############################
|
||||
# DATASET
|
||||
##############################
|
||||
|
|
Loading…
Reference in New Issue