mirror of https://github.com/coqui-ai/TTS.git
134 lines
4.6 KiB
Python
134 lines
4.6 KiB
Python
import logging
|
|
import os
|
|
|
|
import tensorflow as tf
|
|
|
|
from TTS.vocoder.tf.layers.melgan import ReflectionPad1d, ResidualStack
|
|
|
|
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # FATAL
|
|
logging.getLogger("tensorflow").setLevel(logging.FATAL)
|
|
|
|
from TTS.vocoder.tf.layers.melgan import ReflectionPad1d, ResidualStack
|
|
|
|
|
|
# pylint: disable=too-many-ancestors
|
|
# pylint: disable=abstract-method
|
|
class MelganGenerator(tf.keras.models.Model):
|
|
"""Melgan Generator TF implementation dedicated for inference with no
|
|
weight norm"""
|
|
|
|
def __init__(
|
|
self,
|
|
in_channels=80,
|
|
out_channels=1,
|
|
proj_kernel=7,
|
|
base_channels=512,
|
|
upsample_factors=(8, 8, 2, 2),
|
|
res_kernel=3,
|
|
num_res_blocks=3,
|
|
):
|
|
super().__init__()
|
|
|
|
self.in_channels = in_channels
|
|
|
|
# assert model parameters
|
|
assert (proj_kernel - 1) % 2 == 0, " [!] proj_kernel should be an odd number."
|
|
|
|
# setup additional model parameters
|
|
base_padding = (proj_kernel - 1) // 2
|
|
act_slope = 0.2
|
|
self.inference_padding = 2
|
|
|
|
# initial layer
|
|
self.initial_layer = [
|
|
ReflectionPad1d(base_padding),
|
|
tf.keras.layers.Conv2D(
|
|
filters=base_channels, kernel_size=(proj_kernel, 1), strides=1, padding="valid", use_bias=True, name="1"
|
|
),
|
|
]
|
|
num_layers = 3 # count number of layers for layer naming
|
|
|
|
# upsampling layers and residual stacks
|
|
self.upsample_layers = []
|
|
for idx, upsample_factor in enumerate(upsample_factors):
|
|
layer_out_channels = base_channels // (2 ** (idx + 1))
|
|
layer_filter_size = upsample_factor * 2
|
|
layer_stride = upsample_factor
|
|
# layer_output_padding = upsample_factor % 2
|
|
self.upsample_layers += [
|
|
tf.keras.layers.LeakyReLU(act_slope),
|
|
tf.keras.layers.Conv2DTranspose(
|
|
filters=layer_out_channels,
|
|
kernel_size=(layer_filter_size, 1),
|
|
strides=(layer_stride, 1),
|
|
padding="same",
|
|
# output_padding=layer_output_padding,
|
|
use_bias=True,
|
|
name=f"{num_layers}",
|
|
),
|
|
ResidualStack(
|
|
channels=layer_out_channels,
|
|
num_res_blocks=num_res_blocks,
|
|
kernel_size=res_kernel,
|
|
name=f"layers.{num_layers + 1}",
|
|
),
|
|
]
|
|
num_layers += num_res_blocks - 1
|
|
|
|
self.upsample_layers += [tf.keras.layers.LeakyReLU(act_slope)]
|
|
|
|
# final layer
|
|
self.final_layers = [
|
|
ReflectionPad1d(base_padding),
|
|
tf.keras.layers.Conv2D(
|
|
filters=out_channels, kernel_size=(proj_kernel, 1), use_bias=True, name=f"layers.{num_layers + 1}"
|
|
),
|
|
tf.keras.layers.Activation("tanh"),
|
|
]
|
|
|
|
# self.model_layers = tf.keras.models.Sequential(self.initial_layer + self.upsample_layers + self.final_layers, name="layers")
|
|
self.model_layers = self.initial_layer + self.upsample_layers + self.final_layers
|
|
|
|
@tf.function(experimental_relax_shapes=True)
|
|
def call(self, c, training=False):
|
|
"""
|
|
c : :math:`[B, C, T]`
|
|
"""
|
|
if training:
|
|
raise NotImplementedError()
|
|
return self.inference(c)
|
|
|
|
def inference(self, c):
|
|
c = tf.transpose(c, perm=[0, 2, 1])
|
|
c = tf.expand_dims(c, 2)
|
|
# FIXME: TF had no replicate padding as in Torch
|
|
# c = tf.pad(c, [[0, 0], [self.inference_padding, self.inference_padding], [0, 0], [0, 0]], "REFLECT")
|
|
o = c
|
|
for layer in self.model_layers:
|
|
o = layer(o)
|
|
# o = self.model_layers(c)
|
|
o = tf.transpose(o, perm=[0, 3, 2, 1])
|
|
return o[:, :, 0, :]
|
|
|
|
def build_inference(self):
|
|
x = tf.random.uniform((1, self.in_channels, 4), dtype=tf.float32)
|
|
self(x, training=False)
|
|
|
|
@tf.function(
|
|
experimental_relax_shapes=True,
|
|
input_signature=[
|
|
tf.TensorSpec([1, None, None], dtype=tf.float32),
|
|
],
|
|
)
|
|
def inference_tflite(self, c):
|
|
c = tf.transpose(c, perm=[0, 2, 1])
|
|
c = tf.expand_dims(c, 2)
|
|
# FIXME: TF had no replicate padding as in Torch
|
|
# c = tf.pad(c, [[0, 0], [self.inference_padding, self.inference_padding], [0, 0], [0, 0]], "REFLECT")
|
|
o = c
|
|
for layer in self.model_layers:
|
|
o = layer(o)
|
|
# o = self.model_layers(c)
|
|
o = tf.transpose(o, perm=[0, 3, 2, 1])
|
|
return o[:, :, 0, :]
|