diff --git a/TTS/bin/compute_attention_masks.py b/TTS/bin/compute_attention_masks.py index 2ac725bf..0a4337da 100644 --- a/TTS/bin/compute_attention_masks.py +++ b/TTS/bin/compute_attention_masks.py @@ -1,12 +1,13 @@ import argparse import importlib import os +from argparse import RawTextHelpFormatter import numpy as np import torch from torch.utils.data import DataLoader from tqdm import tqdm -from argparse import RawTextHelpFormatter + from TTS.tts.datasets.TTSDataset import MyDataset from TTS.tts.utils.generic_utils import setup_model from TTS.tts.utils.io import load_checkpoint @@ -14,7 +15,6 @@ from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols from TTS.utils.audio import AudioProcessor from TTS.utils.io import load_config - if __name__ == "__main__": # pylint: disable=bad-option-value parser = argparse.ArgumentParser( diff --git a/TTS/bin/compute_embeddings.py b/TTS/bin/compute_embeddings.py index 36ecb0f0..c38e0e7e 100644 --- a/TTS/bin/compute_embeddings.py +++ b/TTS/bin/compute_embeddings.py @@ -3,14 +3,14 @@ import glob import os import numpy as np +import torch from tqdm import tqdm -import torch from TTS.speaker_encoder.model import SpeakerEncoder +from TTS.tts.datasets.preprocess import load_meta_data +from TTS.tts.utils.speakers import save_speaker_mapping from TTS.utils.audio import AudioProcessor from TTS.utils.io import load_config -from TTS.tts.utils.speakers import save_speaker_mapping -from TTS.tts.datasets.preprocess import load_meta_data parser = argparse.ArgumentParser( description='Compute embedding vectors for each wav file in a dataset. If "target_dataset" is defined, it generates "speakers.json" necessary for training a multi-speaker model.' diff --git a/TTS/bin/compute_statistics.py b/TTS/bin/compute_statistics.py index ce224310..9e2b7415 100755 --- a/TTS/bin/compute_statistics.py +++ b/TTS/bin/compute_statistics.py @@ -1,16 +1,16 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import os -import glob import argparse +import glob +import os import numpy as np from tqdm import tqdm from TTS.tts.datasets.preprocess import load_meta_data -from TTS.utils.io import load_config from TTS.utils.audio import AudioProcessor +from TTS.utils.io import load_config def main(): diff --git a/TTS/bin/convert_melgan_tflite.py b/TTS/bin/convert_melgan_tflite.py index 06784abe..a3a3fb66 100644 --- a/TTS/bin/convert_melgan_tflite.py +++ b/TTS/bin/convert_melgan_tflite.py @@ -7,7 +7,6 @@ from TTS.vocoder.tf.utils.generic_utils import setup_generator from TTS.vocoder.tf.utils.io import load_checkpoint from TTS.vocoder.tf.utils.tflite import convert_melgan_to_tflite - parser = argparse.ArgumentParser() parser.add_argument("--tf_model", type=str, help="Path to target torch model to be converted to TF.") parser.add_argument("--config_path", type=str, help="Path to config file of torch model.") diff --git a/TTS/bin/convert_melgan_torch_to_tf.py b/TTS/bin/convert_melgan_torch_to_tf.py index 176bb992..ba8e186a 100644 --- a/TTS/bin/convert_melgan_torch_to_tf.py +++ b/TTS/bin/convert_melgan_torch_to_tf.py @@ -1,17 +1,14 @@ import argparse -from difflib import SequenceMatcher import os +from difflib import SequenceMatcher import numpy as np import tensorflow as tf import torch from TTS.utils.io import load_config -from TTS.vocoder.tf.utils.convert_torch_to_tf_utils import ( - compare_torch_tf, - convert_tf_name, - transfer_weights_torch_to_tf, -) +from TTS.vocoder.tf.utils.convert_torch_to_tf_utils import (compare_torch_tf, convert_tf_name, + transfer_weights_torch_to_tf) from TTS.vocoder.tf.utils.generic_utils import setup_generator as setup_tf_generator from TTS.vocoder.tf.utils.io import save_checkpoint from TTS.vocoder.utils.generic_utils import setup_generator diff --git a/TTS/bin/convert_tacotron2_tflite.py b/TTS/bin/convert_tacotron2_tflite.py index 2a7926a8..327d0ae8 100644 --- a/TTS/bin/convert_tacotron2_tflite.py +++ b/TTS/bin/convert_tacotron2_tflite.py @@ -2,12 +2,11 @@ import argparse -from TTS.utils.io import load_config -from TTS.tts.utils.text.symbols import symbols, phonemes from TTS.tts.tf.utils.generic_utils import setup_model from TTS.tts.tf.utils.io import load_checkpoint from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite - +from TTS.tts.utils.text.symbols import phonemes, symbols +from TTS.utils.io import load_config parser = argparse.ArgumentParser() parser.add_argument("--tf_model", type=str, help="Path to target torch model to be converted to TF.") diff --git a/TTS/bin/convert_tacotron2_torch_to_tf.py b/TTS/bin/convert_tacotron2_torch_to_tf.py index b4aafa9e..d523d01e 100644 --- a/TTS/bin/convert_tacotron2_torch_to_tf.py +++ b/TTS/bin/convert_tacotron2_torch_to_tf.py @@ -1,12 +1,13 @@ import argparse -from difflib import SequenceMatcher import os import sys +from difflib import SequenceMatcher from pprint import pprint import numpy as np import tensorflow as tf import torch + from TTS.tts.tf.models.tacotron2 import Tacotron2 from TTS.tts.tf.utils.convert_torch_to_tf_utils import compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf from TTS.tts.tf.utils.generic_utils import save_checkpoint diff --git a/TTS/bin/distribute.py b/TTS/bin/distribute.py index 6b1c6fd6..0bd27275 100644 --- a/TTS/bin/distribute.py +++ b/TTS/bin/distribute.py @@ -1,12 +1,13 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import os -import sys -import pathlib -import time -import subprocess import argparse +import os +import pathlib +import subprocess +import sys +import time + import torch diff --git a/TTS/bin/find_unique_chars.py b/TTS/bin/find_unique_chars.py index b7056e01..7891d65a 100644 --- a/TTS/bin/find_unique_chars.py +++ b/TTS/bin/find_unique_chars.py @@ -1,6 +1,6 @@ """Find all the unique characters in a dataset""" -import os import argparse +import os from argparse import RawTextHelpFormatter from TTS.tts.datasets.preprocess import get_preprocessor_by_name diff --git a/TTS/bin/resample.py b/TTS/bin/resample.py index 7d358d4d..f1e416b4 100644 --- a/TTS/bin/resample.py +++ b/TTS/bin/resample.py @@ -1,10 +1,11 @@ import argparse import glob import os -import librosa -from distutils.dir_util import copy_tree from argparse import RawTextHelpFormatter +from distutils.dir_util import copy_tree from multiprocessing import Pool + +import librosa from tqdm import tqdm diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index aca245bb..bec41a60 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -4,7 +4,6 @@ import argparse import sys from argparse import RawTextHelpFormatter - # pylint: disable=redefined-outer-name, unused-argument from pathlib import Path diff --git a/TTS/bin/train_align_tts.py b/TTS/bin/train_align_tts.py index 16940f1e..6f268ed3 100644 --- a/TTS/bin/train_align_tts.py +++ b/TTS/bin/train_align_tts.py @@ -12,6 +12,7 @@ import torch from torch.nn.parallel import DistributedDataParallel as DDP_th from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler + from TTS.tts.datasets.preprocess import load_meta_data from TTS.tts.datasets.TTSDataset import MyDataset from TTS.tts.layers.losses import AlignTTSLoss diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py index a2d917ac..5a3f59af 100644 --- a/TTS/bin/train_encoder.py +++ b/TTS/bin/train_encoder.py @@ -9,6 +9,7 @@ import traceback import torch from torch.utils.data import DataLoader + from TTS.speaker_encoder.dataset import MyDataset from TTS.speaker_encoder.losses import AngleProtoLoss, GE2ELoss from TTS.speaker_encoder.model import SpeakerEncoder @@ -16,13 +17,8 @@ from TTS.speaker_encoder.utils.generic_utils import check_config_speaker_encoder from TTS.speaker_encoder.utils.visual import plot_embeddings from TTS.tts.datasets.preprocess import load_meta_data from TTS.utils.audio import AudioProcessor -from TTS.utils.generic_utils import ( - count_parameters, - create_experiment_folder, - get_git_branch, - remove_experiment_folder, - set_init_dict, -) +from TTS.utils.generic_utils import (count_parameters, create_experiment_folder, get_git_branch, + remove_experiment_folder, set_init_dict) from TTS.utils.io import copy_model_files, load_config from TTS.utils.radam import RAdam from TTS.utils.tensorboard_logger import TensorboardLogger diff --git a/TTS/bin/train_glow_tts.py b/TTS/bin/train_glow_tts.py index 01b62c14..ec27574c 100644 --- a/TTS/bin/train_glow_tts.py +++ b/TTS/bin/train_glow_tts.py @@ -8,13 +8,11 @@ import traceback from random import randrange import torch - # DISTRIBUTED from torch.nn.parallel import DistributedDataParallel as DDP_th from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler -from TTS.utils.arguments import parse_arguments, process_args from TTS.tts.datasets.preprocess import load_meta_data from TTS.tts.datasets.TTSDataset import MyDataset from TTS.tts.layers.losses import GlowTTSLoss @@ -25,6 +23,7 @@ from TTS.tts.utils.speakers import parse_speakers from TTS.tts.utils.synthesis import synthesis from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols from TTS.tts.utils.visual import plot_alignment, plot_spectrogram +from TTS.utils.arguments import parse_arguments, process_args from TTS.utils.audio import AudioProcessor from TTS.utils.distribute import init_distributed, reduce_tensor from TTS.utils.generic_utils import KeepAverage, count_parameters, remove_experiment_folder, set_init_dict diff --git a/TTS/bin/train_speedy_speech.py b/TTS/bin/train_speedy_speech.py index 7959c3c1..d27af5a8 100644 --- a/TTS/bin/train_speedy_speech.py +++ b/TTS/bin/train_speedy_speech.py @@ -5,16 +5,15 @@ import os import sys import time import traceback -import numpy as np from random import randrange +import numpy as np import torch -from TTS.utils.arguments import parse_arguments, process_args - # DISTRIBUTED from torch.nn.parallel import DistributedDataParallel as DDP_th from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler + from TTS.tts.datasets.preprocess import load_meta_data from TTS.tts.datasets.TTSDataset import MyDataset from TTS.tts.layers.losses import SpeedySpeechLoss @@ -25,6 +24,7 @@ from TTS.tts.utils.speakers import parse_speakers from TTS.tts.utils.synthesis import synthesis from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols from TTS.tts.utils.visual import plot_alignment, plot_spectrogram +from TTS.utils.arguments import parse_arguments, process_args from TTS.utils.audio import AudioProcessor from TTS.utils.distribute import init_distributed, reduce_tensor from TTS.utils.generic_utils import KeepAverage, count_parameters, remove_experiment_folder, set_init_dict diff --git a/TTS/bin/train_tacotron.py b/TTS/bin/train_tacotron.py index cf5552fc..aee13cac 100644 --- a/TTS/bin/train_tacotron.py +++ b/TTS/bin/train_tacotron.py @@ -10,7 +10,7 @@ from random import randrange import numpy as np import torch from torch.utils.data import DataLoader -from TTS.utils.arguments import parse_arguments, process_args + from TTS.tts.datasets.preprocess import load_meta_data from TTS.tts.datasets.TTSDataset import MyDataset from TTS.tts.layers.losses import TacotronLoss @@ -21,18 +21,13 @@ from TTS.tts.utils.speakers import parse_speakers from TTS.tts.utils.synthesis import synthesis from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols from TTS.tts.utils.visual import plot_alignment, plot_spectrogram +from TTS.utils.arguments import parse_arguments, process_args from TTS.utils.audio import AudioProcessor from TTS.utils.distribute import DistributedSampler, apply_gradient_allreduce, init_distributed, reduce_tensor from TTS.utils.generic_utils import KeepAverage, count_parameters, remove_experiment_folder, set_init_dict from TTS.utils.radam import RAdam -from TTS.utils.training import ( - NoamLR, - adam_weight_decay, - check_update, - gradual_training_scheduler, - set_weight_decay, - setup_torch_training_env, -) +from TTS.utils.training import (NoamLR, adam_weight_decay, check_update, gradual_training_scheduler, set_weight_decay, + setup_torch_training_env) use_cuda, num_gpus = setup_torch_training_env(True, False) diff --git a/TTS/bin/train_vocoder_gan.py b/TTS/bin/train_vocoder_gan.py index 628a1f4c..6546e6ed 100644 --- a/TTS/bin/train_vocoder_gan.py +++ b/TTS/bin/train_vocoder_gan.py @@ -9,13 +9,16 @@ import traceback from inspect import signature import torch +# DISTRIBUTED +from torch.nn.parallel import DistributedDataParallel as DDP_th from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler + from TTS.utils.arguments import parse_arguments, process_args from TTS.utils.audio import AudioProcessor +from TTS.utils.distribute import init_distributed from TTS.utils.generic_utils import KeepAverage, count_parameters, remove_experiment_folder, set_init_dict - from TTS.utils.radam import RAdam - from TTS.utils.training import setup_torch_training_env from TTS.vocoder.datasets.gan_dataset import GANDataset from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data @@ -23,11 +26,6 @@ from TTS.vocoder.layers.losses import DiscriminatorLoss, GeneratorLoss from TTS.vocoder.utils.generic_utils import plot_results, setup_discriminator, setup_generator from TTS.vocoder.utils.io import save_best_model, save_checkpoint -# DISTRIBUTED -from torch.nn.parallel import DistributedDataParallel as DDP_th -from torch.utils.data.distributed import DistributedSampler -from TTS.utils.distribute import init_distributed - use_cuda, num_gpus = setup_torch_training_env(True, True) diff --git a/TTS/bin/train_vocoder_wavegrad.py b/TTS/bin/train_vocoder_wavegrad.py index e5ade7b8..104e18de 100644 --- a/TTS/bin/train_vocoder_wavegrad.py +++ b/TTS/bin/train_vocoder_wavegrad.py @@ -5,15 +5,15 @@ import os import sys import time import traceback + import numpy as np - import torch - # DISTRIBUTED from torch.nn.parallel import DistributedDataParallel as DDP_th from torch.optim import Adam from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler + from TTS.utils.arguments import parse_arguments, process_args from TTS.utils.audio import AudioProcessor from TTS.utils.distribute import init_distributed diff --git a/TTS/bin/train_vocoder_wavernn.py b/TTS/bin/train_vocoder_wavernn.py index 25129883..3f6f5836 100644 --- a/TTS/bin/train_vocoder_wavernn.py +++ b/TTS/bin/train_vocoder_wavernn.py @@ -2,33 +2,29 @@ """Train WaveRNN vocoder model.""" import os -import sys -import traceback -import time import random +import sys +import time +import traceback import torch from torch.utils.data import DataLoader -# from torch.utils.data.distributed import DistributedSampler - -from TTS.utils.arguments import parse_arguments, process_args from TTS.tts.utils.visual import plot_spectrogram +from TTS.utils.arguments import parse_arguments, process_args from TTS.utils.audio import AudioProcessor +from TTS.utils.generic_utils import KeepAverage, count_parameters, remove_experiment_folder, set_init_dict from TTS.utils.radam import RAdam from TTS.utils.training import setup_torch_training_env -from TTS.utils.generic_utils import ( - KeepAverage, - count_parameters, - remove_experiment_folder, - set_init_dict, -) -from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data +from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset from TTS.vocoder.utils.distribution import discretized_mix_logistic_loss, gaussian_loss from TTS.vocoder.utils.generic_utils import setup_generator from TTS.vocoder.utils.io import save_best_model, save_checkpoint +# from torch.utils.data.distributed import DistributedSampler + + use_cuda, num_gpus = setup_torch_training_env(True, True) diff --git a/TTS/bin/tune_wavegrad.py b/TTS/bin/tune_wavegrad.py index d0f64214..a31d6c45 100644 --- a/TTS/bin/tune_wavegrad.py +++ b/TTS/bin/tune_wavegrad.py @@ -6,6 +6,7 @@ import numpy as np import torch from torch.utils.data import DataLoader from tqdm import tqdm + from TTS.utils.audio import AudioProcessor from TTS.utils.io import load_config from TTS.vocoder.datasets.preprocess import load_wav_data diff --git a/TTS/server/server.py b/TTS/server/server.py index 106c282c..9e533dc6 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -1,14 +1,15 @@ #!flask/bin/python import argparse +import io import os import sys -import io from pathlib import Path from flask import Flask, render_template, request, send_file -from TTS.utils.synthesizer import Synthesizer -from TTS.utils.manage import ModelManager + from TTS.utils.io import load_config +from TTS.utils.manage import ModelManager +from TTS.utils.synthesizer import Synthesizer def create_argparser(): diff --git a/TTS/speaker_encoder/utils/generic_utils.py b/TTS/speaker_encoder/utils/generic_utils.py index 38f9870d..c9bfa679 100644 --- a/TTS/speaker_encoder/utils/generic_utils.py +++ b/TTS/speaker_encoder/utils/generic_utils.py @@ -3,6 +3,7 @@ import os import re import torch + from TTS.speaker_encoder.model import SpeakerEncoder from TTS.utils.generic_utils import check_argument diff --git a/TTS/speaker_encoder/utils/prepare_voxceleb.py b/TTS/speaker_encoder/utils/prepare_voxceleb.py index 58ff9dad..7bcbaf95 100644 --- a/TTS/speaker_encoder/utils/prepare_voxceleb.py +++ b/TTS/speaker_encoder/utils/prepare_voxceleb.py @@ -19,15 +19,17 @@ # pylint: disable=too-many-locals, too-many-statements, too-many-arguments, too-many-instance-attributes """ voxceleb 1 & 2 """ +import hashlib import os +import subprocess import sys import zipfile -import subprocess -import hashlib -import pandas -from absl import logging -import tensorflow as tf + import soundfile as sf +import tensorflow as tf +from absl import logging + +import pandas gfile = tf.compat.v1.gfile diff --git a/TTS/speaker_encoder/utils/visual.py b/TTS/speaker_encoder/utils/visual.py index 68c48f12..4f40f68c 100644 --- a/TTS/speaker_encoder/utils/visual.py +++ b/TTS/speaker_encoder/utils/visual.py @@ -1,7 +1,7 @@ -import umap -import numpy as np import matplotlib import matplotlib.pyplot as plt +import numpy as np +import umap matplotlib.use("Agg") diff --git a/TTS/tts/datasets/TTSDataset.py b/TTS/tts/datasets/TTSDataset.py index 3c791625..b613e37c 100644 --- a/TTS/tts/datasets/TTSDataset.py +++ b/TTS/tts/datasets/TTSDataset.py @@ -7,6 +7,7 @@ import numpy as np import torch import tqdm from torch.utils.data import Dataset + from TTS.tts.utils.data import prepare_data, prepare_stop_target, prepare_tensor from TTS.tts.utils.text import pad_with_eos_bos, phoneme_to_sequence, text_to_sequence diff --git a/TTS/tts/datasets/preprocess.py b/TTS/tts/datasets/preprocess.py index 14c7d4c5..0f82814d 100644 --- a/TTS/tts/datasets/preprocess.py +++ b/TTS/tts/datasets/preprocess.py @@ -7,6 +7,7 @@ from pathlib import Path from typing import List from tqdm import tqdm + from TTS.tts.utils.generic_utils import split_dataset #################### diff --git a/TTS/tts/layers/align_tts/duration_predictor.py b/TTS/tts/layers/align_tts/duration_predictor.py index 83916464..b2b83894 100644 --- a/TTS/tts/layers/align_tts/duration_predictor.py +++ b/TTS/tts/layers/align_tts/duration_predictor.py @@ -1,6 +1,7 @@ from torch import nn -from TTS.tts.layers.generic.transformer import FFTransformerBlock + from TTS.tts.layers.generic.pos_encoding import PositionalEncoding +from TTS.tts.layers.generic.transformer import FFTransformerBlock class DurationPredictor(nn.Module): diff --git a/TTS/tts/layers/feed_forward/decoder.py b/TTS/tts/layers/feed_forward/decoder.py index 7e145a6c..34c586aa 100644 --- a/TTS/tts/layers/feed_forward/decoder.py +++ b/TTS/tts/layers/feed_forward/decoder.py @@ -1,9 +1,10 @@ import torch from torch import nn -from TTS.tts.layers.generic.res_conv_bn import Conv1dBNBlock, ResidualConv1dBNBlock, Conv1dBN + +from TTS.tts.layers.generic.res_conv_bn import Conv1dBN, Conv1dBNBlock, ResidualConv1dBNBlock +from TTS.tts.layers.generic.transformer import FFTransformerBlock from TTS.tts.layers.generic.wavenet import WNBlocks from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer -from TTS.tts.layers.generic.transformer import FFTransformerBlock class WaveNetDecoder(nn.Module): diff --git a/TTS/tts/layers/feed_forward/encoder.py b/TTS/tts/layers/feed_forward/encoder.py index a50898a4..bb87ffbb 100644 --- a/TTS/tts/layers/feed_forward/encoder.py +++ b/TTS/tts/layers/feed_forward/encoder.py @@ -1,8 +1,8 @@ from torch import nn -from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer from TTS.tts.layers.generic.res_conv_bn import ResidualConv1dBNBlock from TTS.tts.layers.generic.transformer import FFTransformerBlock +from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer class RelativePositionTransformerEncoder(nn.Module): diff --git a/TTS/tts/layers/generic/pos_encoding.py b/TTS/tts/layers/generic/pos_encoding.py index a1eaacea..46a0b516 100644 --- a/TTS/tts/layers/generic/pos_encoding.py +++ b/TTS/tts/layers/generic/pos_encoding.py @@ -1,6 +1,6 @@ -import torch import math +import torch from torch import nn diff --git a/TTS/tts/layers/glow_tts/decoder.py b/TTS/tts/layers/glow_tts/decoder.py index 3cfcf461..7b3f0ed1 100644 --- a/TTS/tts/layers/glow_tts/decoder.py +++ b/TTS/tts/layers/glow_tts/decoder.py @@ -1,8 +1,8 @@ import torch from torch import nn -from TTS.tts.layers.glow_tts.glow import InvConvNear, CouplingBlock from TTS.tts.layers.generic.normalization import ActNorm +from TTS.tts.layers.glow_tts.glow import CouplingBlock, InvConvNear def squeeze(x, x_mask=None, num_sqz=2): diff --git a/TTS/tts/layers/glow_tts/encoder.py b/TTS/tts/layers/glow_tts/encoder.py index e7c1205f..48bb3008 100644 --- a/TTS/tts/layers/glow_tts/encoder.py +++ b/TTS/tts/layers/glow_tts/encoder.py @@ -1,14 +1,15 @@ import math + import torch from torch import nn -from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer from TTS.tts.layers.generic.gated_conv import GatedConvBlock -from TTS.tts.utils.generic_utils import sequence_mask -from TTS.tts.layers.glow_tts.glow import ResidualConv1dLayerNormBlock -from TTS.tts.layers.glow_tts.duration_predictor import DurationPredictor -from TTS.tts.layers.generic.time_depth_sep_conv import TimeDepthSeparableConvBlock from TTS.tts.layers.generic.res_conv_bn import ResidualConv1dBNBlock +from TTS.tts.layers.generic.time_depth_sep_conv import TimeDepthSeparableConvBlock +from TTS.tts.layers.glow_tts.duration_predictor import DurationPredictor +from TTS.tts.layers.glow_tts.glow import ResidualConv1dLayerNormBlock +from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer +from TTS.tts.utils.generic_utils import sequence_mask class Encoder(nn.Module): diff --git a/TTS/tts/layers/glow_tts/glow.py b/TTS/tts/layers/glow_tts/glow.py index d279ad77..18c491e3 100644 --- a/TTS/tts/layers/glow_tts/glow.py +++ b/TTS/tts/layers/glow_tts/glow.py @@ -1,6 +1,7 @@ import torch from torch import nn from torch.nn import functional as F + from TTS.tts.layers.generic.wavenet import WN from ..generic.normalization import LayerNorm diff --git a/TTS/tts/layers/glow_tts/monotonic_align/__init__.py b/TTS/tts/layers/glow_tts/monotonic_align/__init__.py index 9673e9a2..7be124f4 100644 --- a/TTS/tts/layers/glow_tts/monotonic_align/__init__.py +++ b/TTS/tts/layers/glow_tts/monotonic_align/__init__.py @@ -1,6 +1,7 @@ import numpy as np import torch from torch.nn import functional as F + from TTS.tts.utils.generic_utils import sequence_mask try: diff --git a/TTS/tts/layers/glow_tts/transformer.py b/TTS/tts/layers/glow_tts/transformer.py index 78b5b2f4..1a67d0ba 100644 --- a/TTS/tts/layers/glow_tts/transformer.py +++ b/TTS/tts/layers/glow_tts/transformer.py @@ -1,4 +1,5 @@ import math + import torch from torch import nn from torch.nn import functional as F diff --git a/TTS/tts/layers/losses.py b/TTS/tts/layers/losses.py index 00399514..729a21af 100644 --- a/TTS/tts/layers/losses.py +++ b/TTS/tts/layers/losses.py @@ -1,8 +1,10 @@ import math + import numpy as np import torch from torch import nn from torch.nn import functional + from TTS.tts.utils.generic_utils import sequence_mask from TTS.tts.utils.ssim import ssim diff --git a/TTS/tts/layers/tacotron/attentions.py b/TTS/tts/layers/tacotron/attentions.py index 320a8509..a01ccc49 100644 --- a/TTS/tts/layers/tacotron/attentions.py +++ b/TTS/tts/layers/tacotron/attentions.py @@ -1,9 +1,9 @@ import torch +from scipy.stats import betabinom from torch import nn from torch.nn import functional as F from TTS.tts.layers.tacotron.common_layers import Linear -from scipy.stats import betabinom class LocationLayer(nn.Module): diff --git a/TTS/tts/layers/tacotron/tacotron.py b/TTS/tts/layers/tacotron/tacotron.py index 5ff9ed1d..153af5b7 100644 --- a/TTS/tts/layers/tacotron/tacotron.py +++ b/TTS/tts/layers/tacotron/tacotron.py @@ -1,8 +1,9 @@ # coding: utf-8 import torch from torch import nn -from .common_layers import Prenet + from .attentions import init_attn +from .common_layers import Prenet class BatchNormConv1d(nn.Module): diff --git a/TTS/tts/layers/tacotron/tacotron2.py b/TTS/tts/layers/tacotron/tacotron2.py index 755598c6..df14aead 100644 --- a/TTS/tts/layers/tacotron/tacotron2.py +++ b/TTS/tts/layers/tacotron/tacotron2.py @@ -1,8 +1,10 @@ import torch from torch import nn from torch.nn import functional as F -from .common_layers import Prenet, Linear + from .attentions import init_attn +from .common_layers import Linear, Prenet + # NOTE: linter has a problem with the current TF release # pylint: disable=no-value-for-parameter diff --git a/TTS/tts/models/align_tts.py b/TTS/tts/models/align_tts.py index 903b99c8..e097ac50 100644 --- a/TTS/tts/models/align_tts.py +++ b/TTS/tts/models/align_tts.py @@ -1,12 +1,13 @@ import torch import torch.nn as nn -from TTS.tts.layers.generic.pos_encoding import PositionalEncoding + +from TTS.tts.layers.align_tts.mdn import MDNBlock +from TTS.tts.layers.feed_forward.decoder import Decoder from TTS.tts.layers.feed_forward.duration_predictor import DurationPredictor +from TTS.tts.layers.feed_forward.encoder import Encoder +from TTS.tts.layers.generic.pos_encoding import PositionalEncoding from TTS.tts.layers.glow_tts.monotonic_align import generate_path, maximum_path from TTS.tts.utils.generic_utils import sequence_mask -from TTS.tts.layers.align_tts.mdn import MDNBlock -from TTS.tts.layers.feed_forward.encoder import Encoder -from TTS.tts.layers.feed_forward.decoder import Decoder class AlignTTS(nn.Module): diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py index 77222cba..0717e2a8 100644 --- a/TTS/tts/models/glow_tts.py +++ b/TTS/tts/models/glow_tts.py @@ -1,12 +1,13 @@ import math + import torch from torch import nn from torch.nn import functional as F -from TTS.tts.layers.glow_tts.encoder import Encoder from TTS.tts.layers.glow_tts.decoder import Decoder +from TTS.tts.layers.glow_tts.encoder import Encoder +from TTS.tts.layers.glow_tts.monotonic_align import generate_path, maximum_path from TTS.tts.utils.generic_utils import sequence_mask -from TTS.tts.layers.glow_tts.monotonic_align import maximum_path, generate_path class GlowTTS(nn.Module): diff --git a/TTS/tts/models/speedy_speech.py b/TTS/tts/models/speedy_speech.py index 0bad9ede..9880b82b 100644 --- a/TTS/tts/models/speedy_speech.py +++ b/TTS/tts/models/speedy_speech.py @@ -1,11 +1,12 @@ import torch from torch import nn + from TTS.tts.layers.feed_forward.decoder import Decoder from TTS.tts.layers.feed_forward.duration_predictor import DurationPredictor from TTS.tts.layers.feed_forward.encoder import Encoder from TTS.tts.layers.generic.pos_encoding import PositionalEncoding -from TTS.tts.utils.generic_utils import sequence_mask from TTS.tts.layers.glow_tts.monotonic_align import generate_path +from TTS.tts.utils.generic_utils import sequence_mask class SpeedySpeech(nn.Module): diff --git a/TTS/tts/models/tacotron2.py b/TTS/tts/models/tacotron2.py index 25386fca..c9750807 100644 --- a/TTS/tts/models/tacotron2.py +++ b/TTS/tts/models/tacotron2.py @@ -5,6 +5,7 @@ from TTS.tts.layers.tacotron.gst_layers import GST from TTS.tts.layers.tacotron.tacotron2 import Decoder, Encoder, Postnet from TTS.tts.models.tacotron_abstract import TacotronAbstract + # TODO: match function arguments with tacotron class Tacotron2(TacotronAbstract): """Tacotron2 as in https://arxiv.org/abs/1712.05884 diff --git a/TTS/tts/tf/layers/tacotron/tacotron2.py b/TTS/tts/tf/layers/tacotron/tacotron2.py index 3247a8c4..1fe679d2 100644 --- a/TTS/tts/tf/layers/tacotron/tacotron2.py +++ b/TTS/tts/tf/layers/tacotron/tacotron2.py @@ -1,7 +1,8 @@ import tensorflow as tf from tensorflow import keras + +from TTS.tts.tf.layers.tacotron.common_layers import Attention, Prenet from TTS.tts.tf.utils.tf_utils import shape_list -from TTS.tts.tf.layers.tacotron.common_layers import Prenet, Attention # NOTE: linter has a problem with the current TF release diff --git a/TTS/tts/tf/models/tacotron2.py b/TTS/tts/tf/models/tacotron2.py index 5a0c1977..9cc62070 100644 --- a/TTS/tts/tf/models/tacotron2.py +++ b/TTS/tts/tf/models/tacotron2.py @@ -1,7 +1,7 @@ import tensorflow as tf from tensorflow import keras -from TTS.tts.tf.layers.tacotron.tacotron2 import Encoder, Decoder, Postnet +from TTS.tts.tf.layers.tacotron.tacotron2 import Decoder, Encoder, Postnet from TTS.tts.tf.utils.tf_utils import shape_list diff --git a/TTS/tts/tf/utils/generic_utils.py b/TTS/tts/tf/utils/generic_utils.py index 8956b47e..5b8b4ce2 100644 --- a/TTS/tts/tf/utils/generic_utils.py +++ b/TTS/tts/tf/utils/generic_utils.py @@ -1,6 +1,7 @@ import datetime import importlib import pickle + import numpy as np import tensorflow as tf diff --git a/TTS/tts/tf/utils/io.py b/TTS/tts/tf/utils/io.py index 06c1c9fb..b2345b00 100644 --- a/TTS/tts/tf/utils/io.py +++ b/TTS/tts/tf/utils/io.py @@ -1,5 +1,6 @@ -import pickle import datetime +import pickle + import tensorflow as tf diff --git a/TTS/tts/utils/chinese_mandarin/numbers.py b/TTS/tts/utils/chinese_mandarin/numbers.py index adb21142..4787ea61 100644 --- a/TTS/tts/utils/chinese_mandarin/numbers.py +++ b/TTS/tts/utils/chinese_mandarin/numbers.py @@ -5,8 +5,8 @@ # This uses Python 3, but it's easy to port to Python 2 by changing # strings to u'xx'. -import re import itertools +import re def _num2chinese(num: str, big=False, simp=True, o=False, twoalt=False) -> str: diff --git a/TTS/tts/utils/chinese_mandarin/phonemizer.py b/TTS/tts/utils/chinese_mandarin/phonemizer.py index 7e46edf6..29cac160 100644 --- a/TTS/tts/utils/chinese_mandarin/phonemizer.py +++ b/TTS/tts/utils/chinese_mandarin/phonemizer.py @@ -1,13 +1,11 @@ from typing import List +import jieba import pypinyin from .pinyinToPhonemes import PINYIN_DICT -import jieba - - def _chinese_character_to_pinyin(text: str) -> List[str]: pinyins = pypinyin.pinyin(text, style=pypinyin.Style.TONE3, heteronym=False, neutral_tone_with_five=True) pinyins_flat_list = [item for sublist in pinyins for item in sublist] diff --git a/TTS/tts/utils/generic_utils.py b/TTS/tts/utils/generic_utils.py index 6e566f10..8c23dd84 100644 --- a/TTS/tts/utils/generic_utils.py +++ b/TTS/tts/utils/generic_utils.py @@ -1,9 +1,10 @@ -import re -import torch import importlib -import numpy as np +import re from collections import Counter +import numpy as np +import torch + from TTS.utils.generic_utils import check_argument diff --git a/TTS/tts/utils/io.py b/TTS/tts/utils/io.py index 32b5c766..bb8432fa 100644 --- a/TTS/tts/utils/io.py +++ b/TTS/tts/utils/io.py @@ -1,8 +1,9 @@ -import os -import torch import datetime +import os import pickle as pickle_tts +import torch + from TTS.utils.io import RenamingUnpickler diff --git a/TTS/tts/utils/speakers.py b/TTS/tts/utils/speakers.py index 224667dd..cb2827fd 100644 --- a/TTS/tts/utils/speakers.py +++ b/TTS/tts/utils/speakers.py @@ -1,5 +1,5 @@ -import os import json +import os def make_speakers_json_path(out_path): diff --git a/TTS/tts/utils/synthesis.py b/TTS/tts/utils/synthesis.py index 4621961f..8dd33dc7 100644 --- a/TTS/tts/utils/synthesis.py +++ b/TTS/tts/utils/synthesis.py @@ -1,14 +1,16 @@ import os -os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" +import numpy as np import pkg_resources +import torch + +from .text import phoneme_to_sequence, text_to_sequence + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" installed = {pkg.key for pkg in pkg_resources.working_set} # pylint: disable=not-an-iterable if "tensorflow" in installed or "tensorflow-gpu" in installed: import tensorflow as tf -import torch -import numpy as np -from .text import text_to_sequence, phoneme_to_sequence def text_to_seqvec(text, CONFIG): diff --git a/TTS/tts/utils/text/__init__.py b/TTS/tts/utils/text/__init__.py index 6d1dc9a0..64d15b01 100644 --- a/TTS/tts/utils/text/__init__.py +++ b/TTS/tts/utils/text/__init__.py @@ -5,10 +5,10 @@ import re import phonemizer from packaging import version from phonemizer.phonemize import phonemize + +from TTS.tts.utils.chinese_mandarin.phonemizer import chinese_text_to_phonemes from TTS.tts.utils.text import cleaners from TTS.tts.utils.text.symbols import _bos, _eos, _punctuations, make_symbols, phonemes, symbols -from TTS.tts.utils.chinese_mandarin.phonemizer import chinese_text_to_phonemes - # pylint: disable=unnecessary-comprehension # Mappings from symbol to numeric ID and vice versa: diff --git a/TTS/tts/utils/text/cleaners.py b/TTS/tts/utils/text/cleaners.py index 555d01d1..d61738a6 100644 --- a/TTS/tts/utils/text/cleaners.py +++ b/TTS/tts/utils/text/cleaners.py @@ -11,12 +11,14 @@ hyperparameter. Some cleaners are English-specific. You'll typically want to use """ import re + from unidecode import unidecode -from .number_norm import normalize_numbers -from .abbreviations import abbreviations_en, abbreviations_fr -from .time import expand_time_english + from TTS.tts.utils.chinese_mandarin.numbers import replace_numbers_to_characters_in_text +from .abbreviations import abbreviations_en, abbreviations_fr +from .number_norm import normalize_numbers +from .time import expand_time_english # Regular expression matching whitespace: _whitespace_re = re.compile(r"\s+") diff --git a/TTS/tts/utils/text/number_norm.py b/TTS/tts/utils/text/number_norm.py index 4f648b42..e8377ede 100644 --- a/TTS/tts/utils/text/number_norm.py +++ b/TTS/tts/utils/text/number_norm.py @@ -1,9 +1,10 @@ """ from https://github.com/keithito/tacotron """ -import inflect import re from typing import Dict +import inflect + _inflect = inflect.engine() _comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])") _decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)") diff --git a/TTS/tts/utils/text/time.py b/TTS/tts/utils/text/time.py index a591434f..c8ac09e7 100644 --- a/TTS/tts/utils/text/time.py +++ b/TTS/tts/utils/text/time.py @@ -1,4 +1,5 @@ import re + import inflect _inflect = inflect.engine() diff --git a/TTS/tts/utils/visual.py b/TTS/tts/utils/visual.py index 97a8cd48..44732322 100644 --- a/TTS/tts/utils/visual.py +++ b/TTS/tts/utils/visual.py @@ -1,12 +1,13 @@ import librosa import matplotlib +import matplotlib.pyplot as plt import numpy as np import torch -matplotlib.use("Agg") -import matplotlib.pyplot as plt from TTS.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme +matplotlib.use("Agg") + def plot_alignment(alignment, info=None, fig_size=(16, 10), title=None, output_fig=False): if isinstance(alignment, torch.Tensor): diff --git a/TTS/utils/arguments.py b/TTS/utils/arguments.py index c688cd16..af0a1598 100644 --- a/TTS/utils/arguments.py +++ b/TTS/utils/arguments.py @@ -4,11 +4,12 @@ import argparse import glob +import json import os import re -import json import torch + from TTS.tts.utils.text.symbols import parse_symbols from TTS.utils.console_logger import ConsoleLogger from TTS.utils.generic_utils import create_experiment_folder, get_git_branch diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py index 2c451d23..60af8f3e 100644 --- a/TTS/utils/audio.py +++ b/TTS/utils/audio.py @@ -1,12 +1,13 @@ import librosa -import soundfile as sf import numpy as np import scipy.io.wavfile import scipy.signal +import soundfile as sf + +from TTS.tts.utils.data import StandardScaler # import pyworld as pw -from TTS.tts.utils.data import StandardScaler # pylint: disable=too-many-public-methods class AudioProcessor(object): diff --git a/TTS/utils/console_logger.py b/TTS/utils/console_logger.py index a035fa4e..7d6e1968 100644 --- a/TTS/utils/console_logger.py +++ b/TTS/utils/console_logger.py @@ -1,6 +1,6 @@ import datetime -from TTS.utils.io import AttrDict +from TTS.utils.io import AttrDict tcolors = AttrDict( { diff --git a/TTS/utils/io.py b/TTS/utils/io.py index 846c6fc1..84493e07 100644 --- a/TTS/utils/io.py +++ b/TTS/utils/io.py @@ -1,10 +1,11 @@ -import os -import re import json -import yaml +import os import pickle as pickle_tts +import re from shutil import copyfile +import yaml + class RenamingUnpickler(pickle_tts.Unpickler): """Overload default pickler to solve module renaming problem""" diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index ad2dd8b9..f0a81227 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -7,6 +7,7 @@ from shutil import copyfile import gdown import requests + from TTS.utils.generic_utils import get_user_data_dir from TTS.utils.io import load_config diff --git a/TTS/utils/radam.py b/TTS/utils/radam.py index 37403929..b6c86fed 100644 --- a/TTS/utils/radam.py +++ b/TTS/utils/radam.py @@ -1,6 +1,7 @@ # from https://github.com/LiyuanLucasLiu/RAdam import math + import torch from torch.optim.optimizer import Optimizer diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index b8896ec4..fb15a16c 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -1,19 +1,18 @@ import time import numpy as np -import torch import pysbd +import torch -from TTS.utils.audio import AudioProcessor -from TTS.utils.io import load_config from TTS.tts.utils.generic_utils import setup_model from TTS.tts.utils.speakers import load_speaker_mapping -from TTS.vocoder.utils.generic_utils import setup_generator, interpolate_vocoder_input - # pylint: disable=unused-wildcard-import # pylint: disable=wildcard-import from TTS.tts.utils.synthesis import synthesis, trim_silence from TTS.tts.utils.text import make_symbols, phonemes, symbols +from TTS.utils.audio import AudioProcessor +from TTS.utils.io import load_config +from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input, setup_generator class Synthesizer(object): diff --git a/TTS/utils/tensorboard_logger.py b/TTS/utils/tensorboard_logger.py index 769d47f5..3874a42b 100644 --- a/TTS/utils/tensorboard_logger.py +++ b/TTS/utils/tensorboard_logger.py @@ -1,4 +1,5 @@ import traceback + from tensorboardX import SummaryWriter diff --git a/TTS/utils/training.py b/TTS/utils/training.py index 56255100..37b32637 100644 --- a/TTS/utils/training.py +++ b/TTS/utils/training.py @@ -1,5 +1,5 @@ -import torch import numpy as np +import torch def setup_torch_training_env(cudnn_enable, cudnn_benchmark): diff --git a/TTS/vocoder/datasets/gan_dataset.py b/TTS/vocoder/datasets/gan_dataset.py index 4010b628..e14d50ff 100644 --- a/TTS/vocoder/datasets/gan_dataset.py +++ b/TTS/vocoder/datasets/gan_dataset.py @@ -1,11 +1,12 @@ -import os import glob -import torch +import os import random -import numpy as np -from torch.utils.data import Dataset from multiprocessing import Manager +import numpy as np +import torch +from torch.utils.data import Dataset + class GANDataset(Dataset): """ diff --git a/TTS/vocoder/datasets/preprocess.py b/TTS/vocoder/datasets/preprocess.py index 2aa1d116..d99ee147 100644 --- a/TTS/vocoder/datasets/preprocess.py +++ b/TTS/vocoder/datasets/preprocess.py @@ -1,9 +1,9 @@ import glob import os from pathlib import Path -from tqdm import tqdm import numpy as np +from tqdm import tqdm def preprocess_wav_files(out_path, config, ap): diff --git a/TTS/vocoder/datasets/wavegrad_dataset.py b/TTS/vocoder/datasets/wavegrad_dataset.py index be2b5a8a..51767b56 100644 --- a/TTS/vocoder/datasets/wavegrad_dataset.py +++ b/TTS/vocoder/datasets/wavegrad_dataset.py @@ -1,11 +1,12 @@ -import os import glob -import torch +import os import random -import numpy as np -from torch.utils.data import Dataset from multiprocessing import Manager +import numpy as np +import torch +from torch.utils.data import Dataset + class WaveGradDataset(Dataset): """ diff --git a/TTS/vocoder/datasets/wavernn_dataset.py b/TTS/vocoder/datasets/wavernn_dataset.py index 4a554580..1596ea8f 100644 --- a/TTS/vocoder/datasets/wavernn_dataset.py +++ b/TTS/vocoder/datasets/wavernn_dataset.py @@ -1,5 +1,5 @@ -import torch import numpy as np +import torch from torch.utils.data import Dataset diff --git a/TTS/vocoder/layers/losses.py b/TTS/vocoder/layers/losses.py index f988565b..2e204115 100644 --- a/TTS/vocoder/layers/losses.py +++ b/TTS/vocoder/layers/losses.py @@ -1,5 +1,5 @@ -import torch import librosa +import torch from torch import nn from torch.nn import functional as F diff --git a/TTS/vocoder/layers/pqmf.py b/TTS/vocoder/layers/pqmf.py index 5cfbf729..6253efbb 100644 --- a/TTS/vocoder/layers/pqmf.py +++ b/TTS/vocoder/layers/pqmf.py @@ -1,7 +1,6 @@ import numpy as np import torch import torch.nn.functional as F - from scipy import signal as sig diff --git a/TTS/vocoder/models/multiband_melgan_generator.py b/TTS/vocoder/models/multiband_melgan_generator.py index 0caadc09..25d65906 100644 --- a/TTS/vocoder/models/multiband_melgan_generator.py +++ b/TTS/vocoder/models/multiband_melgan_generator.py @@ -1,7 +1,7 @@ import torch -from TTS.vocoder.models.melgan_generator import MelganGenerator from TTS.vocoder.layers.pqmf import PQMF +from TTS.vocoder.models.melgan_generator import MelganGenerator class MultibandMelganGenerator(MelganGenerator): diff --git a/TTS/vocoder/models/parallel_wavegan_discriminator.py b/TTS/vocoder/models/parallel_wavegan_discriminator.py index 7414c233..9cc1061c 100644 --- a/TTS/vocoder/models/parallel_wavegan_discriminator.py +++ b/TTS/vocoder/models/parallel_wavegan_discriminator.py @@ -1,4 +1,5 @@ import math + import torch from torch import nn diff --git a/TTS/vocoder/models/parallel_wavegan_generator.py b/TTS/vocoder/models/parallel_wavegan_generator.py index c9a84a0e..788856cc 100644 --- a/TTS/vocoder/models/parallel_wavegan_generator.py +++ b/TTS/vocoder/models/parallel_wavegan_generator.py @@ -1,4 +1,5 @@ import math + import numpy as np import torch diff --git a/TTS/vocoder/models/wavegrad.py b/TTS/vocoder/models/wavegrad.py index ef8e8add..d19c2a5e 100644 --- a/TTS/vocoder/models/wavegrad.py +++ b/TTS/vocoder/models/wavegrad.py @@ -3,7 +3,7 @@ import torch from torch import nn from torch.nn.utils import weight_norm -from ..layers.wavegrad import DBlock, FiLM, UBlock, Conv1d +from ..layers.wavegrad import Conv1d, DBlock, FiLM, UBlock class Wavegrad(nn.Module): diff --git a/TTS/vocoder/models/wavernn.py b/TTS/vocoder/models/wavernn.py index ca4ea3f8..994244dc 100644 --- a/TTS/vocoder/models/wavernn.py +++ b/TTS/vocoder/models/wavernn.py @@ -1,16 +1,14 @@ import sys +import time + +import numpy as np import torch import torch.nn as nn -import numpy as np import torch.nn.functional as F -import time # fix this from TTS.utils.audio import AudioProcessor as ap -from TTS.vocoder.utils.distribution import ( - sample_from_gaussian, - sample_from_discretized_mix_logistic, -) +from TTS.vocoder.utils.distribution import sample_from_discretized_mix_logistic, sample_from_gaussian def stream(string, variables): diff --git a/TTS/vocoder/tf/layers/pqmf.py b/TTS/vocoder/tf/layers/pqmf.py index e1b5055a..81b666b9 100644 --- a/TTS/vocoder/tf/layers/pqmf.py +++ b/TTS/vocoder/tf/layers/pqmf.py @@ -1,6 +1,5 @@ import numpy as np import tensorflow as tf - from scipy import signal as sig diff --git a/TTS/vocoder/tf/models/melgan_generator.py b/TTS/vocoder/tf/models/melgan_generator.py index 0a8a0b73..1a0fa103 100644 --- a/TTS/vocoder/tf/models/melgan_generator.py +++ b/TTS/vocoder/tf/models/melgan_generator.py @@ -1,11 +1,13 @@ import logging import os +import tensorflow as tf + +from TTS.vocoder.tf.layers.melgan import ReflectionPad1d, ResidualStack + os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # FATAL logging.getLogger("tensorflow").setLevel(logging.FATAL) -import tensorflow as tf -from TTS.vocoder.tf.layers.melgan import ResidualStack, ReflectionPad1d # pylint: disable=too-many-ancestors diff --git a/TTS/vocoder/tf/models/multiband_melgan_generator.py b/TTS/vocoder/tf/models/multiband_melgan_generator.py index 23836659..24d899b2 100644 --- a/TTS/vocoder/tf/models/multiband_melgan_generator.py +++ b/TTS/vocoder/tf/models/multiband_melgan_generator.py @@ -1,7 +1,8 @@ import tensorflow as tf -from TTS.vocoder.tf.models.melgan_generator import MelganGenerator from TTS.vocoder.tf.layers.pqmf import PQMF +from TTS.vocoder.tf.models.melgan_generator import MelganGenerator + # pylint: disable=too-many-ancestors # pylint: disable=abstract-method diff --git a/TTS/vocoder/tf/utils/generic_utils.py b/TTS/vocoder/tf/utils/generic_utils.py index 97cb9ae7..94364ab4 100644 --- a/TTS/vocoder/tf/utils/generic_utils.py +++ b/TTS/vocoder/tf/utils/generic_utils.py @@ -1,5 +1,5 @@ -import re import importlib +import re def to_camel(text): diff --git a/TTS/vocoder/tf/utils/io.py b/TTS/vocoder/tf/utils/io.py index 6ffa302c..4f69ad27 100644 --- a/TTS/vocoder/tf/utils/io.py +++ b/TTS/vocoder/tf/utils/io.py @@ -1,5 +1,6 @@ import datetime import pickle + import tensorflow as tf diff --git a/TTS/vocoder/utils/distribution.py b/TTS/vocoder/utils/distribution.py index 3f62b7ad..9b37aa12 100644 --- a/TTS/vocoder/utils/distribution.py +++ b/TTS/vocoder/utils/distribution.py @@ -1,8 +1,9 @@ -import numpy as np import math + +import numpy as np import torch -from torch.distributions.normal import Normal import torch.nn.functional as F +from torch.distributions.normal import Normal def gaussian_loss(y_hat, y, log_std_min=-7.0): diff --git a/TTS/vocoder/utils/generic_utils.py b/TTS/vocoder/utils/generic_utils.py index 35102295..53de9d96 100644 --- a/TTS/vocoder/utils/generic_utils.py +++ b/TTS/vocoder/utils/generic_utils.py @@ -1,7 +1,8 @@ -import re -import torch import importlib +import re + import numpy as np +import torch from matplotlib import pyplot as plt from TTS.tts.utils.visual import plot_spectrogram diff --git a/TTS/vocoder/utils/io.py b/TTS/vocoder/utils/io.py index 8a5d144d..9c67535f 100644 --- a/TTS/vocoder/utils/io.py +++ b/TTS/vocoder/utils/io.py @@ -1,9 +1,10 @@ -import os -import glob -import torch import datetime +import glob +import os import pickle as pickle_tts +import torch + from TTS.utils.io import RenamingUnpickler diff --git a/hubconf.py b/hubconf.py index d8589c94..0dd25893 100644 --- a/hubconf.py +++ b/hubconf.py @@ -1,8 +1,8 @@ dependencies = ['torch', 'gdown', 'pysbd', 'phonemizer', 'unidecode', 'pypinyin'] # apt install espeak-ng import torch -from TTS.utils.synthesizer import Synthesizer from TTS.utils.manage import ModelManager +from TTS.utils.synthesizer import Synthesizer def tts(model_name='tts_models/en/ljspeech/tacotron2-DCA', vocoder_name=None, use_cuda=False): diff --git a/notebooks/dataset_analysis/analyze.py b/notebooks/dataset_analysis/analyze.py index 161e2ae3..66d008cd 100644 --- a/notebooks/dataset_analysis/analyze.py +++ b/notebooks/dataset_analysis/analyze.py @@ -1,14 +1,16 @@ # visualisation tools for mimic2 -import matplotlib.pyplot as plt -from statistics import stdev, mode, mean, median -from statistics import StatisticsError import argparse -import os import csv -import seaborn as sns +import os import random +from statistics import StatisticsError, mean, median, mode, stdev + +import matplotlib.pyplot as plt + +import seaborn as sns from text.cmudict import CMUDict + def get_audio_seconds(frames): return (frames*12.5)/1000 diff --git a/setup.py b/setup.py index de277655..68337644 100644 --- a/setup.py +++ b/setup.py @@ -8,9 +8,8 @@ from distutils.version import LooseVersion import numpy import setuptools.command.build_py import setuptools.command.develop -from setuptools import setup, Extension, find_packages from Cython.Build import cythonize - +from setuptools import Extension, find_packages, setup if LooseVersion(sys.version) < LooseVersion("3.6") or LooseVersion(sys.version) > LooseVersion("3.9"): raise RuntimeError( diff --git a/tests/test_audio.py b/tests/test_audio.py index e33a6d6c..75141730 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -2,7 +2,6 @@ import os import unittest from tests import get_tests_input_path, get_tests_output_path, get_tests_path - from TTS.utils.audio import AudioProcessor from TTS.utils.io import load_config diff --git a/tests/test_feed_forward_layers.py b/tests/test_feed_forward_layers.py index 7dd54e56..a19e808c 100644 --- a/tests/test_feed_forward_layers.py +++ b/tests/test_feed_forward_layers.py @@ -1,4 +1,5 @@ import torch + from TTS.tts.layers.feed_forward.decoder import Decoder from TTS.tts.layers.feed_forward.encoder import Encoder from TTS.tts.utils.generic_utils import sequence_mask diff --git a/tests/test_glow_tts.py b/tests/test_glow_tts.py index e9fdc761..66d594e2 100644 --- a/tests/test_glow_tts.py +++ b/tests/test_glow_tts.py @@ -3,13 +3,13 @@ import os import unittest import torch -from tests import get_tests_input_path from torch import optim +from tests import get_tests_input_path from TTS.tts.layers.losses import GlowTTSLoss from TTS.tts.models.glow_tts import GlowTTS -from TTS.utils.io import load_config from TTS.utils.audio import AudioProcessor +from TTS.utils.io import load_config #pylint: disable=unused-variable diff --git a/tests/test_layers.py b/tests/test_layers.py index 582ca8be..9224c673 100644 --- a/tests/test_layers.py +++ b/tests/test_layers.py @@ -1,8 +1,9 @@ import unittest + import torch as T -from TTS.tts.layers.tacotron.tacotron import Prenet, CBHG, Decoder, Encoder from TTS.tts.layers.losses import L1LossMasked, SSIMLoss +from TTS.tts.layers.tacotron.tacotron import CBHG, Decoder, Encoder, Prenet from TTS.tts.utils.generic_utils import sequence_mask # pylint: disable=unused-variable diff --git a/tests/test_loader.py b/tests/test_loader.py index 439e8d35..e711cc03 100644 --- a/tests/test_loader.py +++ b/tests/test_loader.py @@ -4,9 +4,9 @@ import unittest import numpy as np import torch -from tests import get_tests_input_path, get_tests_output_path from torch.utils.data import DataLoader +from tests import get_tests_input_path, get_tests_output_path from TTS.tts.datasets import TTSDataset from TTS.tts.datasets.preprocess import ljspeech from TTS.utils.audio import AudioProcessor diff --git a/tests/test_preprocessors.py b/tests/test_preprocessors.py index 8c7b16b0..c120018d 100644 --- a/tests/test_preprocessors.py +++ b/tests/test_preprocessors.py @@ -1,7 +1,7 @@ -import unittest import os -from tests import get_tests_input_path +import unittest +from tests import get_tests_input_path from TTS.tts.datasets.preprocess import common_voice diff --git a/tests/test_speaker_encoder.py b/tests/test_speaker_encoder.py index 4d4dbba1..77f3b54c 100644 --- a/tests/test_speaker_encoder.py +++ b/tests/test_speaker_encoder.py @@ -2,9 +2,9 @@ import os import unittest import torch as T -from tests import get_tests_input_path -from TTS.speaker_encoder.losses import GE2ELoss, AngleProtoLoss +from tests import get_tests_input_path +from TTS.speaker_encoder.losses import AngleProtoLoss, GE2ELoss from TTS.speaker_encoder.model import SpeakerEncoder from TTS.utils.io import load_config diff --git a/tests/test_speedy_speech_layers.py b/tests/test_speedy_speech_layers.py index 954d5eca..51a2450a 100644 --- a/tests/test_speedy_speech_layers.py +++ b/tests/test_speedy_speech_layers.py @@ -1,8 +1,8 @@ import torch -from TTS.tts.layers.feed_forward.duration_predictor import DurationPredictor -from TTS.tts.utils.generic_utils import sequence_mask -from TTS.tts.models.speedy_speech import SpeedySpeech +from TTS.tts.layers.feed_forward.duration_predictor import DurationPredictor +from TTS.tts.models.speedy_speech import SpeedySpeech +from TTS.tts.utils.generic_utils import sequence_mask use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") diff --git a/tests/test_symbols.py b/tests/test_symbols.py index 4e70b9d5..0c24f124 100644 --- a/tests/test_symbols.py +++ b/tests/test_symbols.py @@ -2,6 +2,7 @@ import unittest from TTS.tts.utils.text import phonemes + class SymbolsTest(unittest.TestCase): def test_uniqueness(self): #pylint: disable=no-self-use assert sorted(phonemes) == sorted(list(set(phonemes))), " {} vs {} ".format(len(phonemes), len(set(phonemes))) diff --git a/tests/test_synthesizer.py b/tests/test_synthesizer.py index b7d3febc..1c2c23b2 100644 --- a/tests/test_synthesizer.py +++ b/tests/test_synthesizer.py @@ -2,11 +2,11 @@ import os import unittest from tests import get_tests_input_path, get_tests_output_path -from TTS.utils.synthesizer import Synthesizer from TTS.tts.utils.generic_utils import setup_model from TTS.tts.utils.io import save_checkpoint from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols from TTS.utils.io import load_config +from TTS.utils.synthesizer import Synthesizer class SynthesizerTest(unittest.TestCase): diff --git a/tests/test_tacotron2_model.py b/tests/test_tacotron2_model.py index 4ac07118..fb811eaa 100644 --- a/tests/test_tacotron2_model.py +++ b/tests/test_tacotron2_model.py @@ -3,13 +3,13 @@ import os import unittest import torch -from tests import get_tests_input_path from torch import nn, optim +from tests import get_tests_input_path from TTS.tts.layers.losses import MSELossMasked from TTS.tts.models.tacotron2 import Tacotron2 -from TTS.utils.io import load_config from TTS.utils.audio import AudioProcessor +from TTS.utils.io import load_config #pylint: disable=unused-variable diff --git a/tests/test_tacotron2_tf_model.py b/tests/test_tacotron2_tf_model.py index b792cfa7..084b972d 100644 --- a/tests/test_tacotron2_tf_model.py +++ b/tests/test_tacotron2_tf_model.py @@ -4,10 +4,10 @@ import unittest import numpy as np import tensorflow as tf import torch + from tests import get_tests_input_path from TTS.tts.tf.models.tacotron2 import Tacotron2 -from TTS.tts.tf.utils.tflite import (convert_tacotron2_to_tflite, - load_tflite_model) +from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model from TTS.utils.io import load_config tf.get_logger().setLevel('INFO') diff --git a/tests/test_tacotron_model.py b/tests/test_tacotron_model.py index f8e88160..0af8dab4 100644 --- a/tests/test_tacotron_model.py +++ b/tests/test_tacotron_model.py @@ -3,13 +3,13 @@ import os import unittest import torch -from tests import get_tests_input_path from torch import nn, optim +from tests import get_tests_input_path from TTS.tts.layers.losses import L1LossMasked from TTS.tts.models.tacotron import Tacotron -from TTS.utils.io import load_config from TTS.utils.audio import AudioProcessor +from TTS.utils.io import load_config #pylint: disable=unused-variable diff --git a/tests/test_text_processing.py b/tests/test_text_processing.py index 774ac0aa..b8b74e28 100644 --- a/tests/test_text_processing.py +++ b/tests/test_text_processing.py @@ -1,10 +1,10 @@ import os + # pylint: disable=unused-wildcard-import # pylint: disable=wildcard-import # pylint: disable=unused-import -from tests import get_tests_input_path +from tests import get_tests_input_path, get_tests_path from TTS.tts.utils.text import * -from tests import get_tests_path from TTS.utils.io import load_config conf = load_config(os.path.join(get_tests_input_path(), 'test_config.json')) diff --git a/tests/test_vocoder_gan_datasets.py b/tests/test_vocoder_gan_datasets.py index 99a25dcf..13b1b9d2 100644 --- a/tests/test_vocoder_gan_datasets.py +++ b/tests/test_vocoder_gan_datasets.py @@ -1,9 +1,9 @@ import os import numpy as np -from tests import get_tests_path, get_tests_input_path, get_tests_output_path from torch.utils.data import DataLoader +from tests import get_tests_input_path, get_tests_output_path, get_tests_path from TTS.utils.audio import AudioProcessor from TTS.utils.io import load_config from TTS.vocoder.datasets.gan_dataset import GANDataset diff --git a/tests/test_vocoder_losses.py b/tests/test_vocoder_losses.py index 4a18ee53..7b3c7017 100644 --- a/tests/test_vocoder_losses.py +++ b/tests/test_vocoder_losses.py @@ -1,11 +1,11 @@ import os import torch -from tests import get_tests_input_path, get_tests_output_path, get_tests_path +from tests import get_tests_input_path, get_tests_output_path, get_tests_path from TTS.utils.audio import AudioProcessor from TTS.utils.io import load_config -from TTS.vocoder.layers.losses import MultiScaleSTFTLoss, STFTLoss, TorchSTFT, MelganFeatureLoss +from TTS.vocoder.layers.losses import MelganFeatureLoss, MultiScaleSTFTLoss, STFTLoss, TorchSTFT TESTS_PATH = get_tests_path() diff --git a/tests/test_vocoder_melgan_generator.py b/tests/test_vocoder_melgan_generator.py index fedf6301..f4958de4 100644 --- a/tests/test_vocoder_melgan_generator.py +++ b/tests/test_vocoder_melgan_generator.py @@ -3,6 +3,7 @@ import torch from TTS.vocoder.models.melgan_generator import MelganGenerator + def test_melgan_generator(): model = MelganGenerator() print(model) diff --git a/tests/test_vocoder_parallel_wavegan_discriminator.py b/tests/test_vocoder_parallel_wavegan_discriminator.py index b496e216..6496d562 100644 --- a/tests/test_vocoder_parallel_wavegan_discriminator.py +++ b/tests/test_vocoder_parallel_wavegan_discriminator.py @@ -1,7 +1,8 @@ import numpy as np import torch -from TTS.vocoder.models.parallel_wavegan_discriminator import ParallelWaveganDiscriminator, ResidualParallelWaveganDiscriminator +from TTS.vocoder.models.parallel_wavegan_discriminator import (ParallelWaveganDiscriminator, + ResidualParallelWaveganDiscriminator) def test_pwgan_disciminator(): diff --git a/tests/test_vocoder_pqmf.py b/tests/test_vocoder_pqmf.py index 74da451f..3112df5a 100644 --- a/tests/test_vocoder_pqmf.py +++ b/tests/test_vocoder_pqmf.py @@ -1,13 +1,12 @@ import os -import torch import soundfile as sf +import torch from librosa.core import load -from tests import get_tests_path, get_tests_input_path, get_tests_output_path +from tests import get_tests_input_path, get_tests_output_path, get_tests_path from TTS.vocoder.layers.pqmf import PQMF - TESTS_PATH = get_tests_path() WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav") diff --git a/tests/test_vocoder_rwd.py b/tests/test_vocoder_rwd.py index 424d3b49..82525e1b 100644 --- a/tests/test_vocoder_rwd.py +++ b/tests/test_vocoder_rwd.py @@ -1,5 +1,5 @@ -import torch import numpy as np +import torch from TTS.vocoder.models.random_window_discriminator import RandomWindowDiscriminator diff --git a/tests/test_vocoder_tf_pqmf.py b/tests/test_vocoder_tf_pqmf.py index 16c46b2a..28aebe5b 100644 --- a/tests/test_vocoder_tf_pqmf.py +++ b/tests/test_vocoder_tf_pqmf.py @@ -1,13 +1,12 @@ import os -import tensorflow as tf import soundfile as sf +import tensorflow as tf from librosa.core import load -from tests import get_tests_path, get_tests_input_path, get_tests_output_path +from tests import get_tests_input_path, get_tests_output_path, get_tests_path from TTS.vocoder.tf.layers.pqmf import PQMF - TESTS_PATH = get_tests_path() WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav") diff --git a/tests/test_vocoder_wavernn.py b/tests/test_vocoder_wavernn.py index 2464cfa3..9c58fa1c 100644 --- a/tests/test_vocoder_wavernn.py +++ b/tests/test_vocoder_wavernn.py @@ -1,6 +1,8 @@ +import random + import numpy as np import torch -import random + from TTS.vocoder.models.wavernn import WaveRNN diff --git a/tests/test_vocoder_wavernn_datasets.py b/tests/test_vocoder_wavernn_datasets.py index a95e247a..6a2a3339 100644 --- a/tests/test_vocoder_wavernn_datasets.py +++ b/tests/test_vocoder_wavernn_datasets.py @@ -2,13 +2,13 @@ import os import shutil import numpy as np -from tests import get_tests_path, get_tests_input_path, get_tests_output_path from torch.utils.data import DataLoader +from tests import get_tests_input_path, get_tests_output_path, get_tests_path from TTS.utils.audio import AudioProcessor from TTS.utils.io import load_config -from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset from TTS.vocoder.datasets.preprocess import load_wav_feat_data, preprocess_wav_files +from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset file_path = os.path.dirname(os.path.realpath(__file__)) OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/") diff --git a/tests/test_wavegrad_layers.py b/tests/test_wavegrad_layers.py index d81ae47d..6052e922 100644 --- a/tests/test_wavegrad_layers.py +++ b/tests/test_wavegrad_layers.py @@ -1,6 +1,6 @@ import torch -from TTS.vocoder.layers.wavegrad import PositionalEncoding, FiLM, UBlock, DBlock +from TTS.vocoder.layers.wavegrad import DBlock, FiLM, PositionalEncoding, UBlock from TTS.vocoder.models.wavegrad import Wavegrad diff --git a/tests/test_wavegrad_train.py b/tests/test_wavegrad_train.py index 45f75e3b..6c950c5a 100644 --- a/tests/test_wavegrad_train.py +++ b/tests/test_wavegrad_train.py @@ -3,6 +3,7 @@ import unittest import numpy as np import torch from torch import optim + from TTS.vocoder.models.wavegrad import Wavegrad #pylint: disable=unused-variable