mirror of https://github.com/coqui-ai/TTS.git
51 lines
1.6 KiB
Python
51 lines
1.6 KiB
Python
from dataclasses import asdict, dataclass, field
|
|
|
|
from .shared_configs import BaseVocoderConfig
|
|
|
|
|
|
@dataclass
|
|
class WavernnConfig(BaseVocoderConfig):
|
|
"""Defines parameters for Wavernn vocoder."""
|
|
|
|
model: str = "wavernn"
|
|
|
|
# Model specific params
|
|
mode: str = "mold" # mold [string], gauss [string], bits [int]
|
|
mulaw: bool = True # apply mulaw if mode is bits
|
|
generator_model: str = "WaveRNN"
|
|
wavernn_model_params: dict = field(
|
|
default_factory=lambda: {
|
|
"rnn_dims": 512,
|
|
"fc_dims": 512,
|
|
"compute_dims": 128,
|
|
"res_out_dims": 128,
|
|
"num_res_blocks": 10,
|
|
"use_aux_net": True,
|
|
"use_upsample_net": True,
|
|
"upsample_factors": [4, 8, 8], # this needs to correctly factorise hop_length
|
|
}
|
|
)
|
|
|
|
# Inference
|
|
batched: bool = True
|
|
target_samples: int = 11000
|
|
overlap_samples: int = 550
|
|
|
|
# Training - overrides
|
|
epochs: int = 10000
|
|
batch_size: int = 256
|
|
seq_len: int = 1280
|
|
padding: int = 2
|
|
use_noise_augment: bool = False
|
|
use_cache: bool = True
|
|
steps_to_start_discriminator: int = 200000
|
|
mixed_precision: bool = True
|
|
eval_split_size: int = 50
|
|
test_every_epochs: int = 10 # number of epochs to wait until the next test run (synthesizing a full audio clip).
|
|
|
|
# optimizer overrides
|
|
grad_clip: float = 4.0
|
|
lr: float = 1e-4 # Initial learning rate.
|
|
lr_scheduler: str = "MultiStepLR" # one of the schedulers from https:#pytorch.org/docs/stable/optim.html
|
|
lr_scheduler_params: dict = field(default_factory=lambda: {"gamma": 0.5, "milestones": [200000, 400000, 600000]})
|