import logging import os import tensorflow as tf from TTS.vocoder.tf.layers.melgan import ReflectionPad1d, ResidualStack os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # FATAL logging.getLogger("tensorflow").setLevel(logging.FATAL) from TTS.vocoder.tf.layers.melgan import ReflectionPad1d, ResidualStack # pylint: disable=too-many-ancestors # pylint: disable=abstract-method class MelganGenerator(tf.keras.models.Model): """Melgan Generator TF implementation dedicated for inference with no weight norm""" def __init__( self, in_channels=80, out_channels=1, proj_kernel=7, base_channels=512, upsample_factors=(8, 8, 2, 2), res_kernel=3, num_res_blocks=3, ): super().__init__() self.in_channels = in_channels # assert model parameters assert (proj_kernel - 1) % 2 == 0, " [!] proj_kernel should be an odd number." # setup additional model parameters base_padding = (proj_kernel - 1) // 2 act_slope = 0.2 self.inference_padding = 2 # initial layer self.initial_layer = [ ReflectionPad1d(base_padding), tf.keras.layers.Conv2D( filters=base_channels, kernel_size=(proj_kernel, 1), strides=1, padding="valid", use_bias=True, name="1" ), ] num_layers = 3 # count number of layers for layer naming # upsampling layers and residual stacks self.upsample_layers = [] for idx, upsample_factor in enumerate(upsample_factors): layer_out_channels = base_channels // (2 ** (idx + 1)) layer_filter_size = upsample_factor * 2 layer_stride = upsample_factor # layer_output_padding = upsample_factor % 2 self.upsample_layers += [ tf.keras.layers.LeakyReLU(act_slope), tf.keras.layers.Conv2DTranspose( filters=layer_out_channels, kernel_size=(layer_filter_size, 1), strides=(layer_stride, 1), padding="same", # output_padding=layer_output_padding, use_bias=True, name=f"{num_layers}", ), ResidualStack( channels=layer_out_channels, num_res_blocks=num_res_blocks, kernel_size=res_kernel, name=f"layers.{num_layers + 1}", ), ] num_layers += num_res_blocks - 1 self.upsample_layers += [tf.keras.layers.LeakyReLU(act_slope)] # final layer self.final_layers = [ ReflectionPad1d(base_padding), tf.keras.layers.Conv2D( filters=out_channels, kernel_size=(proj_kernel, 1), use_bias=True, name=f"layers.{num_layers + 1}" ), tf.keras.layers.Activation("tanh"), ] # self.model_layers = tf.keras.models.Sequential(self.initial_layer + self.upsample_layers + self.final_layers, name="layers") self.model_layers = self.initial_layer + self.upsample_layers + self.final_layers @tf.function(experimental_relax_shapes=True) def call(self, c, training=False): """ c : :math:`[B, C, T]` """ if training: raise NotImplementedError() return self.inference(c) def inference(self, c): c = tf.transpose(c, perm=[0, 2, 1]) c = tf.expand_dims(c, 2) # FIXME: TF had no replicate padding as in Torch # c = tf.pad(c, [[0, 0], [self.inference_padding, self.inference_padding], [0, 0], [0, 0]], "REFLECT") o = c for layer in self.model_layers: o = layer(o) # o = self.model_layers(c) o = tf.transpose(o, perm=[0, 3, 2, 1]) return o[:, :, 0, :] def build_inference(self): x = tf.random.uniform((1, self.in_channels, 4), dtype=tf.float32) self(x, training=False) @tf.function( experimental_relax_shapes=True, input_signature=[ tf.TensorSpec([1, None, None], dtype=tf.float32), ], ) def inference_tflite(self, c): c = tf.transpose(c, perm=[0, 2, 1]) c = tf.expand_dims(c, 2) # FIXME: TF had no replicate padding as in Torch # c = tf.pad(c, [[0, 0], [self.inference_padding, self.inference_padding], [0, 0], [0, 0]], "REFLECT") o = c for layer in self.model_layers: o = layer(o) # o = self.model_layers(c) o = tf.transpose(o, perm=[0, 3, 2, 1]) return o[:, :, 0, :]