From e386caa07107a77c7dfb2301c8ba60f05d0211da Mon Sep 17 00:00:00 2001 From: erogol Date: Tue, 4 Aug 2020 14:07:47 +0200 Subject: [PATCH] mass linter fix --- .../bin/convert_melgan_tflite.py | 1 - .../bin/convert_melgan_torch_to_tf.py | 1 - .../bin/convert_tacotron2_tflite.py | 2 +- .../bin/convert_tacotron2_torch_to_tf.py | 2 +- mozilla_voice_tts/bin/synthesize.py | 2 - mozilla_voice_tts/bin/train_encoder.py | 13 +++--- mozilla_voice_tts/bin/train_tts.py | 43 +++++++++++-------- mozilla_voice_tts/bin/train_vocoder.py | 23 ++++++---- mozilla_voice_tts/server/synthesizer.py | 1 - mozilla_voice_tts/speaker_encoder/README.md | 2 +- .../speaker_encoder/generic_utils.py | 2 +- mozilla_voice_tts/speaker_encoder/model.py | 1 - mozilla_voice_tts/tts/layers/common_layers.py | 8 ++-- mozilla_voice_tts/tts/layers/gst_layers.py | 2 +- mozilla_voice_tts/tts/layers/losses.py | 1 - mozilla_voice_tts/tts/layers/tacotron.py | 11 +++-- mozilla_voice_tts/tts/layers/tacotron2.py | 11 ++--- .../tts/tf/layers/common_layers.py | 3 ++ mozilla_voice_tts/tts/tf/layers/tacotron2.py | 4 +- mozilla_voice_tts/tts/tf/models/tacotron2.py | 3 +- .../tts/tf/utils/convert_torch_to_tf_utils.py | 3 ++ .../tts/tf/utils/generic_utils.py | 1 - mozilla_voice_tts/tts/tf/utils/io.py | 1 - mozilla_voice_tts/tts/tf/utils/tflite.py | 2 +- mozilla_voice_tts/tts/utils/data.py | 1 - mozilla_voice_tts/tts/utils/distribute.py | 12 ++---- mozilla_voice_tts/tts/utils/io.py | 2 +- mozilla_voice_tts/tts/utils/measures.py | 3 -- mozilla_voice_tts/tts/utils/speakers.py | 2 - mozilla_voice_tts/tts/utils/text/__init__.py | 1 + mozilla_voice_tts/tts/utils/visual.py | 4 +- mozilla_voice_tts/utils/audio.py | 6 +-- mozilla_voice_tts/utils/generic_utils.py | 2 +- mozilla_voice_tts/utils/io.py | 2 +- mozilla_voice_tts/utils/radam.py | 4 +- mozilla_voice_tts/utils/tensorboard_logger.py | 2 +- mozilla_voice_tts/vocoder/layers/losses.py | 2 +- .../vocoder/models/melgan_generator.py | 1 - .../models/melgan_multiscale_discriminator.py | 2 +- mozilla_voice_tts/vocoder/tf/layers/melgan.py | 2 +- .../vocoder/tf/models/melgan_generator.py | 2 +- .../vocoder/tf/utils/generic_utils.py | 2 +- mozilla_voice_tts/vocoder/tf/utils/io.py | 2 +- mozilla_voice_tts/vocoder/tf/utils/tflite.py | 2 +- .../vocoder/utils/generic_utils.py | 5 +-- mozilla_voice_tts/vocoder/utils/io.py | 2 +- notebooks/dataset_analysis/analyze.py | 2 +- setup.py | 2 +- tests/generic_utils_text.py | 35 --------------- tests/test_audio.py | 9 ++-- tests/test_demo_server.py | 6 +-- tests/test_encoder.py | 6 +-- tests/test_layers.py | 6 +-- tests/test_loader.py | 19 ++++---- tests/test_preprocessors.py | 2 +- tests/test_tacotron2_model.py | 8 ++-- tests/test_tacotron2_tf_model.py | 15 ++++--- tests/test_text_processing.py | 14 +++--- tests/test_vocoder_melgan_discriminator.py | 2 +- tests/test_vocoder_melgan_generator.py | 1 - tests/test_vocoder_pqmf.py | 1 - tests/test_vocoder_tf_pqmf.py | 1 - 62 files changed, 153 insertions(+), 182 deletions(-) delete mode 100644 tests/generic_utils_text.py diff --git a/mozilla_voice_tts/bin/convert_melgan_tflite.py b/mozilla_voice_tts/bin/convert_melgan_tflite.py index 51ea34a7..1ede8e05 100644 --- a/mozilla_voice_tts/bin/convert_melgan_tflite.py +++ b/mozilla_voice_tts/bin/convert_melgan_tflite.py @@ -30,4 +30,3 @@ model = load_checkpoint(model, args.tf_model) # create tflite model tflite_model = convert_melgan_to_tflite(model, output_path=args.output_path) - diff --git a/mozilla_voice_tts/bin/convert_melgan_torch_to_tf.py b/mozilla_voice_tts/bin/convert_melgan_torch_to_tf.py index b7a8bc4b..364c9e54 100644 --- a/mozilla_voice_tts/bin/convert_melgan_torch_to_tf.py +++ b/mozilla_voice_tts/bin/convert_melgan_torch_to_tf.py @@ -114,4 +114,3 @@ assert compare_torch_tf(output_torch, output_tf) < 1e-5, compare_torch_tf( save_checkpoint(model_tf, checkpoint['step'], checkpoint['epoch'], args.output_path) print(' > Model conversion is successfully completed :).') - diff --git a/mozilla_voice_tts/bin/convert_tacotron2_tflite.py b/mozilla_voice_tts/bin/convert_tacotron2_tflite.py index 9303365f..c7c6891b 100644 --- a/mozilla_voice_tts/bin/convert_tacotron2_tflite.py +++ b/mozilla_voice_tts/bin/convert_tacotron2_tflite.py @@ -34,4 +34,4 @@ model = load_checkpoint(model, args.tf_model) model.decoder.set_max_decoder_steps(1000) # create tflite model -tflite_model = convert_tacotron2_to_tflite(model, output_path=args.output_path) \ No newline at end of file +tflite_model = convert_tacotron2_to_tflite(model, output_path=args.output_path) diff --git a/mozilla_voice_tts/bin/convert_tacotron2_torch_to_tf.py b/mozilla_voice_tts/bin/convert_tacotron2_torch_to_tf.py index 9e011af4..1c4db982 100644 --- a/mozilla_voice_tts/bin/convert_tacotron2_torch_to_tf.py +++ b/mozilla_voice_tts/bin/convert_tacotron2_torch_to_tf.py @@ -92,7 +92,7 @@ var_map = [ # %% # get tf_model graph -mel_pred = model_tf.build_inference() +model_tf.build_inference() # get tf variables tf_vars = model_tf.weights diff --git a/mozilla_voice_tts/bin/synthesize.py b/mozilla_voice_tts/bin/synthesize.py index c680d18b..6f139433 100644 --- a/mozilla_voice_tts/bin/synthesize.py +++ b/mozilla_voice_tts/bin/synthesize.py @@ -40,8 +40,6 @@ def tts(model, vocoder_model, text, CONFIG, use_cuda, ap, use_gl, speaker_id): if __name__ == "__main__": - global symbols, phonemes - parser = argparse.ArgumentParser() parser.add_argument('text', type=str, help='Text to generate speech.') parser.add_argument('config_path', diff --git a/mozilla_voice_tts/bin/train_encoder.py b/mozilla_voice_tts/bin/train_encoder.py index aa708e0f..d612ac6e 100644 --- a/mozilla_voice_tts/bin/train_encoder.py +++ b/mozilla_voice_tts/bin/train_encoder.py @@ -9,6 +9,8 @@ import traceback import torch from torch.utils.data import DataLoader + +from mozilla_voice_tts.generic_utils import count_parameters from mozilla_voice_tts.speaker_encoder.dataset import MyDataset from mozilla_voice_tts.speaker_encoder.generic_utils import save_best_model from mozilla_voice_tts.speaker_encoder.loss import GE2ELoss @@ -16,10 +18,9 @@ from mozilla_voice_tts.speaker_encoder.model import SpeakerEncoder from mozilla_voice_tts.speaker_encoder.visual import plot_embeddings from mozilla_voice_tts.tts.datasets.preprocess import load_meta_data from mozilla_voice_tts.tts.utils.audio import AudioProcessor -from mozilla_voice_tts.tts.utils.generic_utils import (create_experiment_folder, - get_git_branch, - remove_experiment_folder, - set_init_dict) +from mozilla_voice_tts.tts.utils.generic_utils import ( + create_experiment_folder, get_git_branch, remove_experiment_folder, + set_init_dict) from mozilla_voice_tts.tts.utils.io import copy_config_file, load_config from mozilla_voice_tts.tts.utils.radam import RAdam from mozilla_voice_tts.tts.utils.tensorboard_logger import TensorboardLogger @@ -182,8 +183,8 @@ def main(args): # pylint: disable=redefined-outer-name meta_data_train, meta_data_eval = load_meta_data(c.datasets) global_step = args.restore_step - train_loss, global_step = train(model, criterion, optimizer, scheduler, ap, - global_step) + _, global_step = train(model, criterion, optimizer, scheduler, ap, + global_step) if __name__ == '__main__': diff --git a/mozilla_voice_tts/bin/train_tts.py b/mozilla_voice_tts/bin/train_tts.py index 3d176a85..719b926f 100644 --- a/mozilla_voice_tts/bin/train_tts.py +++ b/mozilla_voice_tts/bin/train_tts.py @@ -11,31 +11,40 @@ import traceback import numpy as np import torch from torch.utils.data import DataLoader + from mozilla_voice_tts.tts.datasets.preprocess import load_meta_data from mozilla_voice_tts.tts.datasets.TTSDataset import MyDataset from mozilla_voice_tts.tts.layers.losses import TacotronLoss from mozilla_voice_tts.tts.utils.distribute import (DistributedSampler, - apply_gradient_allreduce, - init_distributed, reduce_tensor) + apply_gradient_allreduce, + init_distributed, + reduce_tensor) from mozilla_voice_tts.tts.utils.generic_utils import check_config, setup_model from mozilla_voice_tts.tts.utils.io import save_best_model, save_checkpoint from mozilla_voice_tts.tts.utils.measures import alignment_diagonal_score -from mozilla_voice_tts.tts.utils.speakers import (get_speakers, load_speaker_mapping, - save_speaker_mapping) +from mozilla_voice_tts.tts.utils.speakers import (get_speakers, + load_speaker_mapping, + save_speaker_mapping) from mozilla_voice_tts.tts.utils.synthesis import synthesis -from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, phonemes, symbols +from mozilla_voice_tts.tts.utils.text.symbols import (make_symbols, phonemes, + symbols) from mozilla_voice_tts.tts.utils.visual import plot_alignment, plot_spectrogram from mozilla_voice_tts.utils.audio import AudioProcessor from mozilla_voice_tts.utils.console_logger import ConsoleLogger -from mozilla_voice_tts.utils.generic_utils import (KeepAverage, count_parameters, - create_experiment_folder, get_git_branch, - remove_experiment_folder, set_init_dict) +from mozilla_voice_tts.utils.generic_utils import (KeepAverage, + count_parameters, + create_experiment_folder, + get_git_branch, + remove_experiment_folder, + set_init_dict) from mozilla_voice_tts.utils.io import copy_config_file, load_config from mozilla_voice_tts.utils.radam import RAdam from mozilla_voice_tts.utils.tensorboard_logger import TensorboardLogger -from mozilla_voice_tts.utils.training import (NoamLR, adam_weight_decay, check_update, - gradual_training_scheduler, set_weight_decay, - setup_torch_training_env) +from mozilla_voice_tts.utils.training import (NoamLR, adam_weight_decay, + check_update, + gradual_training_scheduler, + set_weight_decay, + setup_torch_training_env) use_cuda, num_gpus = setup_torch_training_env(True, False) @@ -47,7 +56,7 @@ def setup_loader(ap, r, is_val=False, verbose=False): dataset = MyDataset( r, c.text_cleaner, - compute_linear_spec=True if c.model.lower() == 'tacotron' else False, + compute_linear_spec=c.model.lower() == 'tacotron', meta_data=meta_data_eval if is_val else meta_data_train, ap=ap, tp=c.characters if 'characters' in c.keys() else None, @@ -156,7 +165,7 @@ def train(model, criterion, optimizer, optimizer_st, scheduler, decoder_backward_output = None alignments_backward = None - # set the alignment lengths wrt reduction factor for guided attention + # set the [alignment] lengths wrt reduction factor for guided attention if mel_lengths.max() % model.decoder.r != 0: alignment_lengths = (mel_lengths + (model.decoder.r - (mel_lengths.max() % model.decoder.r))) // model.decoder.r else: @@ -171,7 +180,7 @@ def train(model, criterion, optimizer, optimizer_st, scheduler, # backward pass if amp is not None: - with amp.scale_loss( loss_dict['loss'], optimizer) as scaled_loss: + with amp.scale_loss(loss_dict['loss'], optimizer) as scaled_loss: scaled_loss.backward() else: loss_dict['loss'].backward() @@ -425,7 +434,7 @@ def evaluate(model, criterion, ap, global_step, epoch): style_wav = c.get("style_wav_for_test") for idx, test_sentence in enumerate(test_sentences): try: - wav, alignment, decoder_output, postnet_output, stop_tokens, inputs = synthesis( + wav, alignment, decoder_output, postnet_output, stop_tokens, _ = synthesis( model, test_sentence, c, @@ -448,7 +457,7 @@ def evaluate(model, criterion, ap, global_step, epoch): postnet_output, ap, output_fig=False) test_figures['{}-alignment'.format(idx)] = plot_alignment( alignment, output_fig=False) - except: + except: #pylint: disable=bare-except print(" !! Error creating Test Sentence -", idx) traceback.print_exc() tb_logger.tb_test_audios(global_step, test_audios, @@ -531,7 +540,7 @@ def main(args): # pylint: disable=redefined-outer-name if c.reinit_layers: raise RuntimeError model.load_state_dict(checkpoint['model']) - except: + except KeyError: print(" > Partial model initialization.") model_dict = model.state_dict() model_dict = set_init_dict(model_dict, checkpoint['model'], c) diff --git a/mozilla_voice_tts/bin/train_vocoder.py b/mozilla_voice_tts/bin/train_vocoder.py index 42d0ac75..8db77cf2 100644 --- a/mozilla_voice_tts/bin/train_vocoder.py +++ b/mozilla_voice_tts/bin/train_vocoder.py @@ -8,23 +8,30 @@ from inspect import signature import torch from torch.utils.data import DataLoader + from mozilla_voice_tts.utils.audio import AudioProcessor from mozilla_voice_tts.utils.console_logger import ConsoleLogger -from mozilla_voice_tts.utils.generic_utils import (KeepAverage, count_parameters, - create_experiment_folder, get_git_branch, - remove_experiment_folder, set_init_dict) +from mozilla_voice_tts.utils.generic_utils import (KeepAverage, + count_parameters, + create_experiment_folder, + get_git_branch, + remove_experiment_folder, + set_init_dict) from mozilla_voice_tts.utils.io import copy_config_file, load_config from mozilla_voice_tts.utils.radam import RAdam from mozilla_voice_tts.utils.tensorboard_logger import TensorboardLogger from mozilla_voice_tts.utils.training import setup_torch_training_env from mozilla_voice_tts.vocoder.datasets.gan_dataset import GANDataset -from mozilla_voice_tts.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data +from mozilla_voice_tts.vocoder.datasets.preprocess import (load_wav_data, + load_wav_feat_data) # from distribute import (DistributedSampler, apply_gradient_allreduce, # init_distributed, reduce_tensor) -from mozilla_voice_tts.vocoder.layers.losses import DiscriminatorLoss, GeneratorLoss -from mozilla_voice_tts.vocoder.utils.generic_utils import (check_config, plot_results, - setup_discriminator, - setup_generator) +from mozilla_voice_tts.vocoder.layers.losses import (DiscriminatorLoss, + GeneratorLoss) +from mozilla_voice_tts.vocoder.utils.generic_utils import (check_config, + plot_results, + setup_discriminator, + setup_generator) from mozilla_voice_tts.vocoder.utils.io import save_best_model, save_checkpoint use_cuda, num_gpus = setup_torch_training_env(True, True) diff --git a/mozilla_voice_tts/server/synthesizer.py b/mozilla_voice_tts/server/synthesizer.py index 76682a9b..e6854958 100644 --- a/mozilla_voice_tts/server/synthesizer.py +++ b/mozilla_voice_tts/server/synthesizer.py @@ -4,7 +4,6 @@ import time import numpy as np import torch -import yaml import pysbd from mozilla_voice_tts.utils.audio import AudioProcessor diff --git a/mozilla_voice_tts/speaker_encoder/README.md b/mozilla_voice_tts/speaker_encoder/README.md index b6f541f8..7706c7d7 100644 --- a/mozilla_voice_tts/speaker_encoder/README.md +++ b/mozilla_voice_tts/speaker_encoder/README.md @@ -10,7 +10,7 @@ Below is an example showing embedding results of various speakers. You can gener Download a pretrained model from [Released Models](https://github.com/mozilla/TTS/wiki/Released-Models) page. -To run the code, you need to follow the same flow as in TTS. +To run the code, you need to follow the same flow as in mozilla_voice_tts. - Define 'config.json' for your needs. Note that, audio parameters should match your TTS model. - Example training call ```python speaker_encoder/train.py --config_path speaker_encoder/config.json --data_path ~/Data/Libri-TTS/train-clean-360``` diff --git a/mozilla_voice_tts/speaker_encoder/generic_utils.py b/mozilla_voice_tts/speaker_encoder/generic_utils.py index c568d129..f649ceb9 100644 --- a/mozilla_voice_tts/speaker_encoder/generic_utils.py +++ b/mozilla_voice_tts/speaker_encoder/generic_utils.py @@ -38,4 +38,4 @@ def save_best_model(model, optimizer, model_loss, best_loss, out_path, print("\n > BEST MODEL ({0:.5f}) : {1:}".format( model_loss, bestmodel_path)) torch.save(state, bestmodel_path) - return best_loss \ No newline at end of file + return best_loss diff --git a/mozilla_voice_tts/speaker_encoder/model.py b/mozilla_voice_tts/speaker_encoder/model.py index b3bd71ff..ca2abe31 100644 --- a/mozilla_voice_tts/speaker_encoder/model.py +++ b/mozilla_voice_tts/speaker_encoder/model.py @@ -85,4 +85,3 @@ class SpeakerEncoder(nn.Module): frames[cur_iter <= num_iters, :, :] ) return embed / num_iters - diff --git a/mozilla_voice_tts/tts/layers/common_layers.py b/mozilla_voice_tts/tts/layers/common_layers.py index d4cb63a1..d197bb86 100644 --- a/mozilla_voice_tts/tts/layers/common_layers.py +++ b/mozilla_voice_tts/tts/layers/common_layers.py @@ -1,6 +1,5 @@ import torch from torch import nn -from torch.autograd import Variable from torch.nn import functional as F @@ -52,6 +51,7 @@ class LinearBN(nn.Module): class Prenet(nn.Module): + # pylint: disable=dangerous-default-value def __init__(self, in_features, prenet_type="original", @@ -300,8 +300,8 @@ class OriginalAttention(nn.Module): def apply_forward_attention(self, alignment): # forward attention - fwd_shifted_alpha = F.pad(self.alpha[:, :-1].clone().to(alignment.device), - (1, 0, 0, 0)) + fwd_shifted_alpha = F.pad( + self.alpha[:, :-1].clone().to(alignment.device), (1, 0, 0, 0)) # compute transition potentials alpha = ((1 - self.u) * self.alpha + self.u * fwd_shifted_alpha @@ -309,7 +309,7 @@ class OriginalAttention(nn.Module): # force incremental alignment if not self.training and self.forward_attn_mask: _, n = fwd_shifted_alpha.max(1) - val, n2 = alpha.max(1) + val, _ = alpha.max(1) for b in range(alignment.shape[0]): alpha[b, n[b] + 3:] = 0 alpha[b, :( diff --git a/mozilla_voice_tts/tts/layers/gst_layers.py b/mozilla_voice_tts/tts/layers/gst_layers.py index 8058d5ed..01f90697 100644 --- a/mozilla_voice_tts/tts/layers/gst_layers.py +++ b/mozilla_voice_tts/tts/layers/gst_layers.py @@ -72,7 +72,7 @@ class ReferenceEncoder(nn.Module): # x: 3D tensor [batch_size, post_conv_width, # num_channels*post_conv_height] self.recurrence.flatten_parameters() - memory, out = self.recurrence(x) + _, out = self.recurrence(x) # out: 3D tensor [seq_len==1, batch_size, encoding_size=128] return out.squeeze(0) diff --git a/mozilla_voice_tts/tts/layers/losses.py b/mozilla_voice_tts/tts/layers/losses.py index a09b77b9..ac80ddbf 100644 --- a/mozilla_voice_tts/tts/layers/losses.py +++ b/mozilla_voice_tts/tts/layers/losses.py @@ -243,4 +243,3 @@ class TacotronLoss(torch.nn.Module): return_dict['loss'] = loss return return_dict - diff --git a/mozilla_voice_tts/tts/layers/tacotron.py b/mozilla_voice_tts/tts/layers/tacotron.py index 656d9575..2fc9e86a 100644 --- a/mozilla_voice_tts/tts/layers/tacotron.py +++ b/mozilla_voice_tts/tts/layers/tacotron.py @@ -1,7 +1,7 @@ # coding: utf-8 import torch from torch import nn -from .common_layers import Prenet, init_attn, Linear +from .common_layers import Prenet, init_attn class BatchNormConv1d(nn.Module): @@ -46,9 +46,9 @@ class BatchNormConv1d(nn.Module): # self.init_layers() def init_layers(self): - if type(self.activation) == torch.nn.ReLU: + if isinstance(self.activation, torch.nn.ReLU): w_gain = 'relu' - elif type(self.activation) == torch.nn.Tanh: + elif isinstance(self.activation, torch.nn.Tanh): w_gain = 'tanh' elif self.activation is None: w_gain = 'linear' @@ -117,7 +117,7 @@ class CBHG(nn.Module): - input: (B, C, T_in) - output: (B, T_in, C*2) """ - + #pylint: disable=dangerous-default-value def __init__(self, in_features, K=16, @@ -355,7 +355,6 @@ class Decoder(nn.Module): Initialization of decoder states """ B = inputs.size(0) - T = inputs.size(1) # go frame as zeros matrix if self.use_memory_queue: self.memory_input = torch.zeros(1, device=inputs.device).repeat(B, self.frame_channels * self.memory_size) @@ -496,7 +495,7 @@ class Decoder(nn.Module): if t > inputs.shape[1] / 4 and (stop_token > 0.6 or attention[:, -1].item() > 0.6): break - elif t > self.max_decoder_steps: + if t > self.max_decoder_steps: print(" | > Decoder stopped with 'max_decoder_steps") break return self._parse_outputs(outputs, attentions, stop_tokens) diff --git a/mozilla_voice_tts/tts/layers/tacotron2.py b/mozilla_voice_tts/tts/layers/tacotron2.py index 01a2f191..395a10ea 100644 --- a/mozilla_voice_tts/tts/layers/tacotron2.py +++ b/mozilla_voice_tts/tts/layers/tacotron2.py @@ -1,10 +1,11 @@ import torch -from torch.autograd import Variable from torch import nn from torch.nn import functional as F from .common_layers import init_attn, Prenet, Linear - +# NOTE: linter has a problem with the current TF release +#pylint: disable=no-value-for-parameter +#pylint: disable=unexpected-keyword-arg class ConvBNBlock(nn.Module): r"""Convolutions with Batch Normalization and non-linear activation. @@ -156,6 +157,7 @@ class Decoder(nn.Module): self.separate_stopnet = separate_stopnet self.max_decoder_steps = 1000 self.stop_threshold = 0.5 + self.speaker_embedding_dim = speaker_embedding_dim # model dimensions self.query_dim = 1024 @@ -211,8 +213,8 @@ class Decoder(nn.Module): def get_go_frame(self, inputs): B = inputs.size(0) - memory = torch.zeros(1, device=inputs.device).repeat(B, - self.frame_channels * self.r) + memory = torch.zeros(1, device=inputs.device).repeat( + B, self.frame_channels * self.r) return memory def _init_states(self, inputs, mask, keep_states=False): @@ -393,7 +395,6 @@ class Decoder(nn.Module): self.attention.init_win_idx() self.attention.init_states(inputs) outputs, stop_tokens, alignments, t = [], [], [], 0 - stop_flags = [True, False, False] while True: memory = self.prenet(self.memory_truncated) decoder_output, alignment, stop_token = self.decode(memory) diff --git a/mozilla_voice_tts/tts/tf/layers/common_layers.py b/mozilla_voice_tts/tts/tf/layers/common_layers.py index f2353a93..ad18b9fc 100644 --- a/mozilla_voice_tts/tts/tf/layers/common_layers.py +++ b/mozilla_voice_tts/tts/tf/layers/common_layers.py @@ -3,6 +3,9 @@ from tensorflow import keras from tensorflow.python.ops import math_ops # from tensorflow_addons.seq2seq import BahdanauAttention +# NOTE: linter has a problem with the current TF release +#pylint: disable=no-value-for-parameter +#pylint: disable=unexpected-keyword-arg class Linear(keras.layers.Layer): def __init__(self, units, use_bias, **kwargs): diff --git a/mozilla_voice_tts/tts/tf/layers/tacotron2.py b/mozilla_voice_tts/tts/tf/layers/tacotron2.py index 366535da..0dd0593e 100644 --- a/mozilla_voice_tts/tts/tf/layers/tacotron2.py +++ b/mozilla_voice_tts/tts/tf/layers/tacotron2.py @@ -4,7 +4,9 @@ from mozilla_voice_tts.tts.tf.utils.tf_utils import shape_list from mozilla_voice_tts.tts.tf.layers.common_layers import Prenet, Attention # from tensorflow_addons.seq2seq import AttentionWrapper - +# NOTE: linter has a problem with the current TF release +#pylint: disable=no-value-for-parameter +#pylint: disable=unexpected-keyword-arg class ConvBNBlock(keras.layers.Layer): def __init__(self, filters, kernel_size, activation, **kwargs): super(ConvBNBlock, self).__init__(**kwargs) diff --git a/mozilla_voice_tts/tts/tf/models/tacotron2.py b/mozilla_voice_tts/tts/tf/models/tacotron2.py index 8017b534..812fc634 100644 --- a/mozilla_voice_tts/tts/tf/models/tacotron2.py +++ b/mozilla_voice_tts/tts/tf/models/tacotron2.py @@ -5,7 +5,7 @@ from mozilla_voice_tts.tts.tf.layers.tacotron2 import Encoder, Decoder, Postnet from mozilla_voice_tts.tts.tf.utils.tf_utils import shape_list -#pylint: disable=too-many-ancestors +#pylint: disable=too-many-ancestors, abstract-method class Tacotron2(keras.models.Model): def __init__(self, num_chars, @@ -105,4 +105,3 @@ class Tacotron2(keras.models.Model): # TODO: issue https://github.com/PyCQA/pylint/issues/3613 input_ids = tf.random.uniform(shape=[1, 4], maxval=10, dtype=tf.int32) #pylint: disable=unexpected-keyword-arg self(input_ids) - diff --git a/mozilla_voice_tts/tts/tf/utils/convert_torch_to_tf_utils.py b/mozilla_voice_tts/tts/tf/utils/convert_torch_to_tf_utils.py index e9e1e8a3..03b41803 100644 --- a/mozilla_voice_tts/tts/tf/utils/convert_torch_to_tf_utils.py +++ b/mozilla_voice_tts/tts/tf/utils/convert_torch_to_tf_utils.py @@ -1,6 +1,9 @@ import numpy as np import tensorflow as tf +# NOTE: linter has a problem with the current TF release +#pylint: disable=no-value-for-parameter +#pylint: disable=unexpected-keyword-arg def tf_create_dummy_inputs(): """ Create dummy inputs for TF Tacotron2 model """ diff --git a/mozilla_voice_tts/tts/tf/utils/generic_utils.py b/mozilla_voice_tts/tts/tf/utils/generic_utils.py index 509a00bd..f8131abd 100644 --- a/mozilla_voice_tts/tts/tf/utils/generic_utils.py +++ b/mozilla_voice_tts/tts/tf/utils/generic_utils.py @@ -1,4 +1,3 @@ -import os import datetime import importlib import pickle diff --git a/mozilla_voice_tts/tts/tf/utils/io.py b/mozilla_voice_tts/tts/tf/utils/io.py index 78a56de4..143422d2 100644 --- a/mozilla_voice_tts/tts/tf/utils/io.py +++ b/mozilla_voice_tts/tts/tf/utils/io.py @@ -39,4 +39,3 @@ def load_tflite_model(tflite_path): tflite_model = tf.lite.Interpreter(model_path=tflite_path) tflite_model.allocate_tensors() return tflite_model - diff --git a/mozilla_voice_tts/tts/tf/utils/tflite.py b/mozilla_voice_tts/tts/tf/utils/tflite.py index 5e684b30..b8daf254 100644 --- a/mozilla_voice_tts/tts/tf/utils/tflite.py +++ b/mozilla_voice_tts/tts/tf/utils/tflite.py @@ -28,4 +28,4 @@ def convert_tacotron2_to_tflite(model, def load_tflite_model(tflite_path): tflite_model = tf.lite.Interpreter(model_path=tflite_path) tflite_model.allocate_tensors() - return tflite_model \ No newline at end of file + return tflite_model diff --git a/mozilla_voice_tts/tts/utils/data.py b/mozilla_voice_tts/tts/utils/data.py index a83325cb..a75410b4 100644 --- a/mozilla_voice_tts/tts/utils/data.py +++ b/mozilla_voice_tts/tts/utils/data.py @@ -74,4 +74,3 @@ class StandardScaler(): X *= self.scale_ X += self.mean_ return X - diff --git a/mozilla_voice_tts/tts/utils/distribute.py b/mozilla_voice_tts/tts/utils/distribute.py index 6a3237ce..89d4efec 100644 --- a/mozilla_voice_tts/tts/utils/distribute.py +++ b/mozilla_voice_tts/tts/utils/distribute.py @@ -1,15 +1,11 @@ # edited from https://github.com/fastai/imagenet-fast/blob/master/imagenet_nv/distributed.py -import os, sys import math -import time -import subprocess -import argparse + import torch import torch.distributed as dist -from torch.utils.data.sampler import Sampler -from torch.autograd import Variable from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors -from mozilla_voice_tts.utils.generic_utils import create_experiment_folder +from torch.autograd import Variable +from torch.utils.data.sampler import Sampler class DistributedSampler(Sampler): @@ -108,7 +104,7 @@ def apply_gradient_allreduce(module): for param in list(module.parameters()): def allreduce_hook(*_): - Variable._execution_engine.queue_callback(allreduce_params) + Variable._execution_engine.queue_callback(allreduce_params) #pylint: disable=protected-access if param.requires_grad: param.register_hook(allreduce_hook) diff --git a/mozilla_voice_tts/tts/utils/io.py b/mozilla_voice_tts/tts/utils/io.py index f01c427c..da5c8b27 100644 --- a/mozilla_voice_tts/tts/utils/io.py +++ b/mozilla_voice_tts/tts/utils/io.py @@ -3,7 +3,7 @@ import torch import datetime -def load_checkpoint(model, checkpoint_path, use_cuda=False): +def load_checkpoint(model, checkpoint_path, amp=None, use_cuda=False): state = torch.load(checkpoint_path, map_location=torch.device('cpu')) model.load_state_dict(state['model']) if amp and 'amp' in state: diff --git a/mozilla_voice_tts/tts/utils/measures.py b/mozilla_voice_tts/tts/utils/measures.py index 01d25695..fdd31242 100644 --- a/mozilla_voice_tts/tts/utils/measures.py +++ b/mozilla_voice_tts/tts/utils/measures.py @@ -1,6 +1,3 @@ -import torch - - def alignment_diagonal_score(alignments, binary=False): """ Compute how diagonal alignment predictions are. It is useful diff --git a/mozilla_voice_tts/tts/utils/speakers.py b/mozilla_voice_tts/tts/utils/speakers.py index 4e1d151a..ff624b36 100644 --- a/mozilla_voice_tts/tts/utils/speakers.py +++ b/mozilla_voice_tts/tts/utils/speakers.py @@ -1,8 +1,6 @@ import os import json -from mozilla_voice_tts.tts.datasets.preprocess import get_preprocessor_by_name - def make_speakers_json_path(out_path): """Returns conventional speakers.json location.""" diff --git a/mozilla_voice_tts/tts/utils/text/__init__.py b/mozilla_voice_tts/tts/utils/text/__init__.py index e85dee03..191508be 100644 --- a/mozilla_voice_tts/tts/utils/text/__init__.py +++ b/mozilla_voice_tts/tts/utils/text/__init__.py @@ -8,6 +8,7 @@ from mozilla_voice_tts.tts.utils.text import cleaners from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, symbols, phonemes, _phoneme_punctuations, _bos, \ _eos +# pylint: disable=unnecessary-comprehension # Mappings from symbol to numeric ID and vice versa: _symbol_to_id = {s: i for i, s in enumerate(symbols)} _id_to_symbol = {i: s for i, s in enumerate(symbols)} diff --git a/mozilla_voice_tts/tts/utils/visual.py b/mozilla_voice_tts/tts/utils/visual.py index 2026555b..17be49c7 100644 --- a/mozilla_voice_tts/tts/utils/visual.py +++ b/mozilla_voice_tts/tts/utils/visual.py @@ -41,7 +41,7 @@ def plot_spectrogram(spectrogram, ap=None, fig_size=(16, 10), output_fig=False): plt.colorbar() plt.tight_layout() if not output_fig: - plt.close() + plt.close() return fig @@ -97,4 +97,4 @@ def visualize(alignment, postnet_output, stop_tokens, text, hop_length, CONFIG, plt.close() if not output_fig: - plt.close() + plt.close() diff --git a/mozilla_voice_tts/utils/audio.py b/mozilla_voice_tts/utils/audio.py index a4e063dc..7fe7a91b 100644 --- a/mozilla_voice_tts/utils/audio.py +++ b/mozilla_voice_tts/utils/audio.py @@ -52,7 +52,7 @@ class AudioProcessor(object): self.mel_fmin = mel_fmin or 0 self.mel_fmax = mel_fmax self.spec_gain = float(spec_gain) - self.stft_pad_mode = 'reflect' + self.stft_pad_mode = stft_pad_mode self.max_norm = 1.0 if max_norm is None else float(max_norm) self.clip_norm = clip_norm self.do_trim_silence = do_trim_silence @@ -123,7 +123,7 @@ class AudioProcessor(object): if self.symmetric_norm: S_norm = ((2 * self.max_norm) * S_norm) - self.max_norm if self.clip_norm: - S_norm = np.clip(S_norm, -self.max_norm, self.max_norm) + S_norm = np.clip(S_norm, -self.max_norm, self.max_norm) # pylint: disable=invalid-unary-operand-type return S_norm else: S_norm = self.max_norm * S_norm @@ -148,7 +148,7 @@ class AudioProcessor(object): raise RuntimeError(' [!] Mean-Var stats does not match the given feature dimensions.') if self.symmetric_norm: if self.clip_norm: - S_denorm = np.clip(S_denorm, -self.max_norm, self.max_norm) + S_denorm = np.clip(S_denorm, -self.max_norm, self.max_norm) #pylint: disable=invalid-unary-operand-type S_denorm = ((S_denorm + self.max_norm) * -self.min_level_db / (2 * self.max_norm)) + self.min_level_db return S_denorm + self.ref_level_db else: diff --git a/mozilla_voice_tts/utils/generic_utils.py b/mozilla_voice_tts/utils/generic_utils.py index 7332c047..478b4358 100644 --- a/mozilla_voice_tts/utils/generic_utils.py +++ b/mozilla_voice_tts/utils/generic_utils.py @@ -147,4 +147,4 @@ def check_argument(name, c, enum_list=None, max_val=None, min_val=None, restrict if enum_list: assert c[name].lower() in enum_list, f' [!] {name} is not a valid value' if val_type: - assert isinstance(c[name], val_type) or c[name] is None, f' [!] {name} has wrong type - {type(c[name])} vs {val_type}' \ No newline at end of file + assert isinstance(c[name], val_type) or c[name] is None, f' [!] {name} has wrong type - {type(c[name])} vs {val_type}' diff --git a/mozilla_voice_tts/utils/io.py b/mozilla_voice_tts/utils/io.py index f67fa33d..cda974a6 100644 --- a/mozilla_voice_tts/utils/io.py +++ b/mozilla_voice_tts/utils/io.py @@ -29,4 +29,4 @@ def copy_config_file(config_file, out_path, new_fields): config_lines.insert(1, new_line) config_out_file = open(out_path, "w") config_out_file.writelines(config_lines) - config_out_file.close() \ No newline at end of file + config_out_file.close() diff --git a/mozilla_voice_tts/utils/radam.py b/mozilla_voice_tts/utils/radam.py index 4724b705..58cec920 100644 --- a/mozilla_voice_tts/utils/radam.py +++ b/mozilla_voice_tts/utils/radam.py @@ -2,7 +2,7 @@ import math import torch -from torch.optim.optimizer import Optimizer, required +from torch.optim.optimizer import Optimizer class RAdam(Optimizer): @@ -25,7 +25,7 @@ class RAdam(Optimizer): defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, buffer=[[None, None, None] for _ in range(10)]) super(RAdam, self).__init__(params, defaults) - def __setstate__(self, state): + def __setstate__(self, state): # pylint: disable=useless-super-delegation super(RAdam, self).__setstate__(state) def step(self, closure=None): diff --git a/mozilla_voice_tts/utils/tensorboard_logger.py b/mozilla_voice_tts/utils/tensorboard_logger.py index cbf68ad6..4ee12d74 100644 --- a/mozilla_voice_tts/utils/tensorboard_logger.py +++ b/mozilla_voice_tts/utils/tensorboard_logger.py @@ -47,7 +47,7 @@ class TensorboardLogger(object): for key, value in audios.items(): try: self.writer.add_audio('{}/{}'.format(scope_name, key), value, step, sample_rate=sample_rate) - except: + except RuntimeError: traceback.print_exc() def tb_train_iter_stats(self, step, stats): diff --git a/mozilla_voice_tts/vocoder/layers/losses.py b/mozilla_voice_tts/vocoder/layers/losses.py index 431f7f45..e705b1e0 100644 --- a/mozilla_voice_tts/vocoder/layers/losses.py +++ b/mozilla_voice_tts/vocoder/layers/losses.py @@ -306,4 +306,4 @@ class DiscriminatorLoss(nn.Module): loss += hinge_D_loss return_dict['D_loss'] = loss - return return_dict \ No newline at end of file + return return_dict diff --git a/mozilla_voice_tts/vocoder/models/melgan_generator.py b/mozilla_voice_tts/vocoder/models/melgan_generator.py index 76c8adcb..4c35b1eb 100644 --- a/mozilla_voice_tts/vocoder/models/melgan_generator.py +++ b/mozilla_voice_tts/vocoder/models/melgan_generator.py @@ -95,4 +95,3 @@ class MelganGenerator(nn.Module): nn.utils.remove_weight_norm(layer) except ValueError: layer.remove_weight_norm() - diff --git a/mozilla_voice_tts/vocoder/models/melgan_multiscale_discriminator.py b/mozilla_voice_tts/vocoder/models/melgan_multiscale_discriminator.py index 885985d9..69adcc27 100644 --- a/mozilla_voice_tts/vocoder/models/melgan_multiscale_discriminator.py +++ b/mozilla_voice_tts/vocoder/models/melgan_multiscale_discriminator.py @@ -38,4 +38,4 @@ class MelganMultiscaleDiscriminator(nn.Module): scores.append(score) feats.append(feat) x = self.pooling(x) - return scores, feats \ No newline at end of file + return scores, feats diff --git a/mozilla_voice_tts/vocoder/tf/layers/melgan.py b/mozilla_voice_tts/vocoder/tf/layers/melgan.py index 3fad4c2a..f9806579 100644 --- a/mozilla_voice_tts/vocoder/tf/layers/melgan.py +++ b/mozilla_voice_tts/vocoder/tf/layers/melgan.py @@ -54,4 +54,4 @@ class ResidualStack(tf.keras.layers.Layer): for layer in block: x = layer(x) x += res - return x \ No newline at end of file + return x diff --git a/mozilla_voice_tts/vocoder/tf/models/melgan_generator.py b/mozilla_voice_tts/vocoder/tf/models/melgan_generator.py index 11712db4..60e870cc 100644 --- a/mozilla_voice_tts/vocoder/tf/models/melgan_generator.py +++ b/mozilla_voice_tts/vocoder/tf/models/melgan_generator.py @@ -125,4 +125,4 @@ class MelganGenerator(tf.keras.models.Model): o = layer(o) # o = self.model_layers(c) o = tf.transpose(o, perm=[0, 3, 2, 1]) - return o[:, :, 0, :] \ No newline at end of file + return o[:, :, 0, :] diff --git a/mozilla_voice_tts/vocoder/tf/utils/generic_utils.py b/mozilla_voice_tts/vocoder/tf/utils/generic_utils.py index 7ce50a85..6f1622dc 100644 --- a/mozilla_voice_tts/vocoder/tf/utils/generic_utils.py +++ b/mozilla_voice_tts/vocoder/tf/utils/generic_utils.py @@ -32,4 +32,4 @@ def setup_generator(c): upsample_factors=c.generator_model_params['upsample_factors'], res_kernel=3, num_res_blocks=c.generator_model_params['num_res_blocks']) - return model \ No newline at end of file + return model diff --git a/mozilla_voice_tts/vocoder/tf/utils/io.py b/mozilla_voice_tts/vocoder/tf/utils/io.py index d95d972c..c73c9cd8 100644 --- a/mozilla_voice_tts/vocoder/tf/utils/io.py +++ b/mozilla_voice_tts/vocoder/tf/utils/io.py @@ -24,4 +24,4 @@ def load_checkpoint(model, checkpoint_path): layer_name = tf_var.name chkp_var_value = chkp_var_dict[layer_name] tf.keras.backend.set_value(tf_var, chkp_var_value) - return model \ No newline at end of file + return model diff --git a/mozilla_voice_tts/vocoder/tf/utils/tflite.py b/mozilla_voice_tts/vocoder/tf/utils/tflite.py index d0637596..d62a081a 100644 --- a/mozilla_voice_tts/vocoder/tf/utils/tflite.py +++ b/mozilla_voice_tts/vocoder/tf/utils/tflite.py @@ -28,4 +28,4 @@ def convert_melgan_to_tflite(model, def load_tflite_model(tflite_path): tflite_model = tf.lite.Interpreter(model_path=tflite_path) tflite_model.allocate_tensors() - return tflite_model \ No newline at end of file + return tflite_model diff --git a/mozilla_voice_tts/vocoder/utils/generic_utils.py b/mozilla_voice_tts/vocoder/utils/generic_utils.py index 5d9d1c18..c6df4ca8 100644 --- a/mozilla_voice_tts/vocoder/utils/generic_utils.py +++ b/mozilla_voice_tts/vocoder/utils/generic_utils.py @@ -145,6 +145,5 @@ def setup_discriminator(c): ) return model - -def check_config(c): - pass \ No newline at end of file +# def check_config(c): + # pass diff --git a/mozilla_voice_tts/vocoder/utils/io.py b/mozilla_voice_tts/vocoder/utils/io.py index 9d350238..734714e0 100644 --- a/mozilla_voice_tts/vocoder/utils/io.py +++ b/mozilla_voice_tts/vocoder/utils/io.py @@ -60,4 +60,4 @@ def save_best_model(target_loss, best_loss, model, optimizer, scheduler, model_loss=target_loss, **kwargs) best_loss = target_loss - return best_loss \ No newline at end of file + return best_loss diff --git a/notebooks/dataset_analysis/analyze.py b/notebooks/dataset_analysis/analyze.py index f34605dd..161e2ae3 100644 --- a/notebooks/dataset_analysis/analyze.py +++ b/notebooks/dataset_analysis/analyze.py @@ -71,7 +71,7 @@ def process_meta_data(path): def get_data_points(meta_data): - x = [char_cnt for char_cnt in meta_data] + x = meta_data y_avg = [meta_data[d]['mean'] for d in meta_data] y_mode = [meta_data[d]['mode'] for d in meta_data] y_median = [meta_data[d]['median'] for d in meta_data] diff --git a/setup.py b/setup.py index 6c5dfe9c..c40f77e6 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ else: pass -class build_py(setuptools.command.build_py.build_py): +class build_py(setuptools.command.build_py.build_py): # pylint: disable=too-many-ancestors def run(self): self.create_version_file() setuptools.command.build_py.build_py.run(self) diff --git a/tests/generic_utils_text.py b/tests/generic_utils_text.py deleted file mode 100644 index a6826154..00000000 --- a/tests/generic_utils_text.py +++ /dev/null @@ -1,35 +0,0 @@ -import unittest -import torch as T - -from mozilla_voice_tts.tts.utils.generic_utils import save_checkpoint, save_best_model -from mozilla_voice_tts.tts.layers.tacotron import Prenet - -OUT_PATH = '/tmp/test.pth.tar' - - -class ModelSavingTests(unittest.TestCase): - def save_checkpoint_test(self): - # create a dummy model - model = Prenet(128, out_features=[256, 128]) - model = T.nn.DataParallel(layer) #FIXME: undefined variable layer - - # save the model - save_checkpoint(model, None, 100, OUT_PATH, 1, 1) - - # load the model to CPU - model_dict = T.load( - MODEL_PATH, map_location=lambda storage, loc: storage) #FIXME: undefined variable MODEL_PATH - model.load_state_dict(model_dict['model']) - - def save_best_model_test(self): - # create a dummy model - model = Prenet(256, out_features=[256, 256]) - model = T.nn.DataParallel(layer) - - # save the model - save_best_model(model, None, 0, 100, OUT_PATH, 10, 1) - - # load the model to CPU - model_dict = T.load( - MODEL_PATH, map_location=lambda storage, loc: storage) - model.load_state_dict(model_dict['model']) diff --git a/tests/test_audio.py b/tests/test_audio.py index 6fa5eb16..6796c644 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -1,7 +1,8 @@ import os import unittest -from tests import get_tests_path, get_tests_input_path, get_tests_output_path +from tests import get_tests_input_path, get_tests_output_path, get_tests_path + from mozilla_voice_tts.utils.audio import AudioProcessor from mozilla_voice_tts.utils.io import load_config @@ -103,7 +104,7 @@ class TestAudio(unittest.TestCase): assert (x_old - x).sum() == 0 # check value range assert x_norm.max() <= self.ap.max_norm + 1, x_norm.max() - assert x_norm.min() >= -self.ap.max_norm - 2, x_norm.min() + assert x_norm.min() >= -self.ap.max_norm - 2, x_norm.min() #pylint: disable=invalid-unary-operand-type assert x_norm.min() <= 0, x_norm.min() # check denorm. x_ = self.ap._denormalize(x_norm) @@ -120,7 +121,7 @@ class TestAudio(unittest.TestCase): assert (x_old - x).sum() == 0 # check value range assert x_norm.max() <= self.ap.max_norm, x_norm.max() - assert x_norm.min() >= -self.ap.max_norm, x_norm.min() + assert x_norm.min() >= -self.ap.max_norm, x_norm.min() #pylint: disable=invalid-unary-operand-type assert x_norm.min() <= 0, x_norm.min() # check denorm. x_ = self.ap._denormalize(x_norm) @@ -148,7 +149,7 @@ class TestAudio(unittest.TestCase): assert (x_old - x).sum() == 0 assert x_norm.max() <= self.ap.max_norm, x_norm.max() - assert x_norm.min() >= -self.ap.max_norm, x_norm.min() + assert x_norm.min() >= -self.ap.max_norm, x_norm.min() #pylint: disable=invalid-unary-operand-type assert x_norm.min() < 0, x_norm.min() x_ = self.ap._denormalize(x_norm) assert (x - x_).sum() < 1e-3 diff --git a/tests/test_demo_server.py b/tests/test_demo_server.py index a6da3f70..2ec15aba 100644 --- a/tests/test_demo_server.py +++ b/tests/test_demo_server.py @@ -1,13 +1,13 @@ import os import unittest -import torch as T +from tests import get_tests_input_path, get_tests_output_path from mozilla_voice_tts.server.synthesizer import Synthesizer -from tests import get_tests_input_path, get_tests_output_path -from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, phonemes, symbols from mozilla_voice_tts.tts.utils.generic_utils import setup_model from mozilla_voice_tts.tts.utils.io import save_checkpoint +from mozilla_voice_tts.tts.utils.text.symbols import (make_symbols, phonemes, + symbols) from mozilla_voice_tts.utils.io import load_config diff --git a/tests/test_encoder.py b/tests/test_encoder.py index 37a0340f..711ad195 100644 --- a/tests/test_encoder.py +++ b/tests/test_encoder.py @@ -1,13 +1,13 @@ import os import unittest + import torch as T +from tests import get_tests_input_path -from tests import get_tests_path, get_tests_input_path -from mozilla_voice_tts.speaker_encoder.model import SpeakerEncoder from mozilla_voice_tts.speaker_encoder.loss import GE2ELoss +from mozilla_voice_tts.speaker_encoder.model import SpeakerEncoder from mozilla_voice_tts.utils.io import load_config - file_path = get_tests_input_path() c = load_config(os.path.join(file_path, "test_config.json")) diff --git a/tests/test_layers.py b/tests/test_layers.py index 947ef9a0..bf036f5c 100644 --- a/tests/test_layers.py +++ b/tests/test_layers.py @@ -9,7 +9,7 @@ from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask class PrenetTests(unittest.TestCase): - def test_in_out(self): + def test_in_out(self): #pylint: disable=no-self-use layer = Prenet(128, out_features=[256, 128]) dummy_input = T.rand(4, 128) @@ -104,7 +104,7 @@ class DecoderTests(unittest.TestCase): class EncoderTests(unittest.TestCase): - def test_in_out(self): + def test_in_out(self): #pylint: disable=no-self-use layer = Encoder(128) dummy_input = T.rand(4, 8, 128) @@ -117,7 +117,7 @@ class EncoderTests(unittest.TestCase): class L1LossMaskedTests(unittest.TestCase): - def test_in_out(self): + def test_in_out(self): #pylint: disable=no-self-use # test input == target layer = L1LossMasked(seq_len_norm=False) dummy_input = T.ones(4, 8, 128).float() diff --git a/tests/test_loader.py b/tests/test_loader.py index 8884b1fe..9f084f8f 100644 --- a/tests/test_loader.py +++ b/tests/test_loader.py @@ -1,15 +1,16 @@ import os -import unittest import shutil -import torch -import numpy as np +import unittest -from tests import get_tests_path, get_tests_input_path, get_tests_output_path +import numpy as np +import torch +from tests import get_tests_input_path, get_tests_output_path from torch.utils.data import DataLoader -from mozilla_voice_tts.utils.io import load_config -from mozilla_voice_tts.utils.audio import AudioProcessor + from mozilla_voice_tts.tts.datasets import TTSDataset from mozilla_voice_tts.tts.datasets.preprocess import ljspeech +from mozilla_voice_tts.utils.audio import AudioProcessor +from mozilla_voice_tts.utils.io import load_config #pylint: disable=unused-variable @@ -32,7 +33,7 @@ class TestTTSDataset(unittest.TestCase): self.ap = AudioProcessor(**c.audio) def _create_dataloader(self, batch_size, r, bgs): - items = ljspeech(c.data_path,'metadata.csv') + items = ljspeech(c.data_path, 'metadata.csv') dataset = TTSDataset.MyDataset( r, c.text_cleaner, @@ -74,7 +75,7 @@ class TestTTSDataset(unittest.TestCase): assert check_count == 0, \ " !! Negative values in text_input: {}".format(check_count) # TODO: more assertion here - assert type(speaker_name[0]) is str + assert isinstance(speaker_name[0], str) assert linear_input.shape[0] == c.batch_size assert linear_input.shape[2] == self.ap.fft_size // 2 + 1 assert mel_input.shape[0] == c.batch_size @@ -82,7 +83,7 @@ class TestTTSDataset(unittest.TestCase): # check normalization ranges if self.ap.symmetric_norm: assert mel_input.max() <= self.ap.max_norm - assert mel_input.min() >= -self.ap.max_norm + assert mel_input.min() >= -self.ap.max_norm #pylint: disable=invalid-unary-operand-type assert mel_input.min() < 0 else: assert mel_input.max() <= self.ap.max_norm diff --git a/tests/test_preprocessors.py b/tests/test_preprocessors.py index e861fb98..5c875ce6 100644 --- a/tests/test_preprocessors.py +++ b/tests/test_preprocessors.py @@ -7,7 +7,7 @@ from mozilla_voice_tts.tts.datasets.preprocess import common_voice class TestPreprocessors(unittest.TestCase): - def test_common_voice_preprocessor(self): + def test_common_voice_preprocessor(self): #pylint: disable=no-self-use root_path = get_tests_input_path() meta_file = "common_voice.tsv" items = common_voice(root_path, meta_file) diff --git a/tests/test_tacotron2_model.py b/tests/test_tacotron2_model.py index 030e0d13..2faccd75 100644 --- a/tests/test_tacotron2_model.py +++ b/tests/test_tacotron2_model.py @@ -20,8 +20,8 @@ c = load_config(os.path.join(get_tests_input_path(), 'test_config.json')) class TacotronTrainTest(unittest.TestCase): - def test_train_step(self): - input = torch.randint(0, 24, (8, 128)).long().to(device) + def test_train_step(self): # pylint: disable=no-self-use + input_dummy = torch.randint(0, 24, (8, 128)).long().to(device) input_lengths = torch.randint(100, 128, (8, )).long().to(device) input_lengths = torch.sort(input_lengths, descending=True)[0] mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) @@ -34,7 +34,7 @@ class TacotronTrainTest(unittest.TestCase): for idx in mel_lengths: stop_targets[:, int(idx.item()):, 0] = 1.0 - stop_targets = stop_targets.view(input.shape[0], + stop_targets = stop_targets.view(input_dummy.shape[0], stop_targets.size(1) // c.r, -1) stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze() @@ -51,7 +51,7 @@ class TacotronTrainTest(unittest.TestCase): optimizer = optim.Adam(model.parameters(), lr=c.lr) for i in range(5): mel_out, mel_postnet_out, align, stop_tokens = model.forward( - input, input_lengths, mel_spec, mel_lengths, speaker_ids) + input_dummy, input_lengths, mel_spec, mel_lengths, speaker_ids) assert torch.sigmoid(stop_tokens).data.max() <= 1.0 assert torch.sigmoid(stop_tokens).data.min() >= 0.0 optimizer.zero_grad() diff --git a/tests/test_tacotron2_tf_model.py b/tests/test_tacotron2_tf_model.py index 1a64c509..50853e9a 100644 --- a/tests/test_tacotron2_tf_model.py +++ b/tests/test_tacotron2_tf_model.py @@ -1,15 +1,19 @@ import os -import torch import unittest + import numpy as np import tensorflow as tf +import torch +from tests import get_tests_input_path + +from mozilla_voice_tts.tts.tf.models.tacotron2 import Tacotron2 +from mozilla_voice_tts.tts.tf.utils.tflite import (convert_tacotron2_to_tflite, + load_tflite_model) +from mozilla_voice_tts.utils.io import load_config + tf.get_logger().setLevel('INFO') -from tests import get_tests_path, get_tests_input_path, get_tests_output_path -from mozilla_voice_tts.utils.io import load_config -from mozilla_voice_tts.tts.tf.models.tacotron2 import Tacotron2 -from mozilla_voice_tts.tts.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model #pylint: disable=unused-variable @@ -132,4 +136,3 @@ class TacotronTFTrainTest(unittest.TestCase): postnet_output = tflite_model.get_tensor(output_details[1]['index']) # remove tflite binary os.remove('test_tacotron2.tflite') - diff --git a/tests/test_text_processing.py b/tests/test_text_processing.py index 94222621..61c2a407 100644 --- a/tests/test_text_processing.py +++ b/tests/test_text_processing.py @@ -16,7 +16,7 @@ def test_phoneme_to_sequence(): lang = "en-us" sequence = phoneme_to_sequence(text, text_cleaner, lang) text_hat = sequence_to_phoneme(sequence) - sequence_with_params = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters) gt = "ɹiːsənt ɹɪsɜːtʃ æt hɑːɹvɚd hɐz ʃoʊn mɛdᵻteɪɾɪŋ fɔːɹ æz lɪɾəl æz eɪt wiːks kæn æktʃuːəli ɪnkɹiːs, ðə ɡɹeɪ mæɾɚɹ ɪnðə pɑːɹts ʌvðə bɹeɪn ɹɪspɑːnsəbəl fɔːɹ ɪmoʊʃənəl ɹɛɡjuːleɪʃən ænd lɜːnɪŋ!" assert text_hat == text_hat_with_params == gt @@ -25,7 +25,7 @@ def test_phoneme_to_sequence(): text = "Be a voice, not an! echo?" sequence = phoneme_to_sequence(text, text_cleaner, lang) text_hat = sequence_to_phoneme(sequence) - sequence_with_params = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters) gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ?" print(text_hat) @@ -36,7 +36,7 @@ def test_phoneme_to_sequence(): text = "Be a voice, not an! echo" sequence = phoneme_to_sequence(text, text_cleaner, lang) text_hat = sequence_to_phoneme(sequence) - sequence_with_params = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters) gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ" print(text_hat) @@ -47,7 +47,7 @@ def test_phoneme_to_sequence(): text = "Be a voice, not an echo!" sequence = phoneme_to_sequence(text, text_cleaner, lang) text_hat = sequence_to_phoneme(sequence) - sequence_with_params = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters) gt = "biː ɐ vɔɪs, nɑːt ɐn ɛkoʊ!" print(text_hat) @@ -58,7 +58,7 @@ def test_phoneme_to_sequence(): text = "Be a voice, not an! echo. " sequence = phoneme_to_sequence(text, text_cleaner, lang) text_hat = sequence_to_phoneme(sequence) - sequence_with_params = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters) gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ." print(text_hat) @@ -69,7 +69,7 @@ def test_phoneme_to_sequence(): text = "Be a voice, not an! echo. " sequence = phoneme_to_sequence(text, text_cleaner, lang, True) text_hat = sequence_to_phoneme(sequence) - sequence_with_params = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters) gt = "^biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ.~" print(text_hat) @@ -80,7 +80,7 @@ def test_phoneme_to_sequence(): text = "_Be a _voice, not an! echo_" sequence = phoneme_to_sequence(text, text_cleaner, lang) text_hat = sequence_to_phoneme(sequence) - sequence_with_params = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) + _ = phoneme_to_sequence(text, text_cleaner, lang, tp=conf.characters) text_hat_with_params = sequence_to_phoneme(sequence, tp=conf.characters) gt = "biː ɐ vɔɪs, nɑːt ɐn! ɛkoʊ" print(text_hat) diff --git a/tests/test_vocoder_melgan_discriminator.py b/tests/test_vocoder_melgan_discriminator.py index 265ba777..feafa60b 100644 --- a/tests/test_vocoder_melgan_discriminator.py +++ b/tests/test_vocoder_melgan_discriminator.py @@ -23,4 +23,4 @@ def test_melgan_multi_scale_discriminator(): assert np.all(scores[0].shape == (4, 1, 64)) assert np.all(feats[0][0].shape == (4, 16, 4096)) assert np.all(feats[0][1].shape == (4, 64, 1024)) - assert np.all(feats[0][2].shape == (4, 256, 256)) \ No newline at end of file + assert np.all(feats[0][2].shape == (4, 256, 256)) diff --git a/tests/test_vocoder_melgan_generator.py b/tests/test_vocoder_melgan_generator.py index ae758c86..c9cf5e2d 100644 --- a/tests/test_vocoder_melgan_generator.py +++ b/tests/test_vocoder_melgan_generator.py @@ -11,4 +11,3 @@ def test_melgan_generator(): assert np.all(output.shape == (4, 1, 64 * 256)) output = model.inference(dummy_input) assert np.all(output.shape == (4, 1, (64 + 4) * 256)) - diff --git a/tests/test_vocoder_pqmf.py b/tests/test_vocoder_pqmf.py index eda52d32..485e2f2b 100644 --- a/tests/test_vocoder_pqmf.py +++ b/tests/test_vocoder_pqmf.py @@ -25,4 +25,3 @@ def test_pqmf(): print(w2_.min()) print(w2_.mean()) sf.write('pqmf_output.wav', w2_.flatten().detach(), sr) - diff --git a/tests/test_vocoder_tf_pqmf.py b/tests/test_vocoder_tf_pqmf.py index 3ea774a7..851c0fb0 100644 --- a/tests/test_vocoder_tf_pqmf.py +++ b/tests/test_vocoder_tf_pqmf.py @@ -26,4 +26,3 @@ def test_pqmf(): print(w2_.min()) print(w2_.mean()) sf.write('tf_pqmf_output.wav', w2_.flatten(), sr) -