mirror of https://github.com/coqui-ai/TTS.git
63 lines
1.7 KiB
Python
63 lines
1.7 KiB
Python
from dataclasses import dataclass
|
|
from typing import List
|
|
|
|
from .shared_configs import BaseTTSConfig, GSTConfig
|
|
|
|
|
|
@dataclass
|
|
class TacotronConfig(BaseTTSConfig):
|
|
"""Defines parameters for Tacotron based models."""
|
|
|
|
model: str = "tacotron"
|
|
use_gst: bool = False
|
|
gst: GSTConfig = None
|
|
gst_style_input: str = None
|
|
# model specific params
|
|
r: int = 2
|
|
gradual_training: List = None
|
|
memory_size: int = -1
|
|
prenet_type: str = "original"
|
|
prenet_dropout: bool = True
|
|
prenet_dropout_at_inference: bool = False
|
|
stopnet: bool = True
|
|
separate_stopnet: bool = True
|
|
stopnet_pos_weight: float = 10.0
|
|
|
|
# attention layers
|
|
attention_type: str = "original"
|
|
attention_heads: int = None
|
|
attention_norm: str = "sigmoid"
|
|
windowing: bool = False
|
|
use_forward_attn: bool = False
|
|
forward_attn_mask: bool = False
|
|
transition_agent: bool = False
|
|
location_attn: bool = True
|
|
|
|
# advance methods
|
|
bidirectional_decoder: bool = False
|
|
double_decoder_consistency: bool = False
|
|
ddc_r: int = 6
|
|
|
|
# multi-speaker settings
|
|
use_speaker_embedding: bool = False
|
|
use_external_speaker_embedding_file: bool = False
|
|
external_speaker_embedding_file: str = False
|
|
|
|
# optimizer parameters
|
|
noam_schedule: bool = False
|
|
warmup_steps: int = 4000
|
|
lr: float = 1e-4
|
|
wd: float = 1e-6
|
|
grad_clip: float = 5.0
|
|
seq_len_norm: bool = False
|
|
loss_masking: bool = True
|
|
|
|
# loss params
|
|
decoder_loss_alpha: float = 0.25
|
|
postnet_loss_alpha: float = 0.25
|
|
postnet_diff_spec_alpha: float = 0.25
|
|
decoder_diff_spec_alpha: float = 0.25
|
|
decoder_ssim_alpha: float = 0.25
|
|
postnet_ssim_alpha: float = 0.25
|
|
ga_alpha: float = 5.0
|