import torch from torch import nn from ..generic.normalization import LayerNorm class DurationPredictor(nn.Module): """Glow-TTS duration prediction model. :: [2 x (conv1d_kxk -> relu -> layer_norm -> dropout)] -> conv1d_1x1 -> durs Args: in_channels (int): Number of channels of the input tensor. hidden_channels (int): Number of hidden channels of the network. kernel_size (int): Kernel size for the conv layers. dropout_p (float): Dropout rate used after each conv layer. """ def __init__(self, in_channels, hidden_channels, kernel_size, dropout_p): super().__init__() # class arguments self.in_channels = in_channels self.filter_channels = hidden_channels self.kernel_size = kernel_size self.dropout_p = dropout_p # layers self.drop = nn.Dropout(dropout_p) self.conv_1 = nn.Conv1d(in_channels, hidden_channels, kernel_size, padding=kernel_size // 2) self.norm_1 = LayerNorm(hidden_channels) self.conv_2 = nn.Conv1d(hidden_channels, hidden_channels, kernel_size, padding=kernel_size // 2) self.norm_2 = LayerNorm(hidden_channels) # output layer self.proj = nn.Conv1d(hidden_channels, 1, 1) def forward(self, x, x_mask): """ Shapes: - x: :math:`[B, C, T]` - x_mask: :math:`[B, 1, T]` """ x = self.conv_1(x * x_mask) x = torch.relu(x) x = self.norm_1(x) x = self.drop(x) x = self.conv_2(x * x_mask) x = torch.relu(x) x = self.norm_2(x) x = self.drop(x) x = self.proj(x * x_mask) return x * x_mask