mirror of https://github.com/coqui-ai/TTS.git
isort all imports
This commit is contained in:
parent
0e79fa86ad
commit
e5b9607bc3
|
@ -1,12 +1,13 @@
|
||||||
import argparse
|
import argparse
|
||||||
import importlib
|
import importlib
|
||||||
import os
|
import os
|
||||||
|
from argparse import RawTextHelpFormatter
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from argparse import RawTextHelpFormatter
|
|
||||||
from TTS.tts.datasets.TTSDataset import MyDataset
|
from TTS.tts.datasets.TTSDataset import MyDataset
|
||||||
from TTS.tts.utils.generic_utils import setup_model
|
from TTS.tts.utils.generic_utils import setup_model
|
||||||
from TTS.tts.utils.io import load_checkpoint
|
from TTS.tts.utils.io import load_checkpoint
|
||||||
|
@ -14,7 +15,6 @@ from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.utils.io import load_config
|
from TTS.utils.io import load_config
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# pylint: disable=bad-option-value
|
# pylint: disable=bad-option-value
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
|
|
|
@ -3,14 +3,14 @@ import glob
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import torch
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
import torch
|
|
||||||
from TTS.speaker_encoder.model import SpeakerEncoder
|
from TTS.speaker_encoder.model import SpeakerEncoder
|
||||||
|
from TTS.tts.datasets.preprocess import load_meta_data
|
||||||
|
from TTS.tts.utils.speakers import save_speaker_mapping
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.utils.io import load_config
|
from TTS.utils.io import load_config
|
||||||
from TTS.tts.utils.speakers import save_speaker_mapping
|
|
||||||
from TTS.tts.datasets.preprocess import load_meta_data
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='Compute embedding vectors for each wav file in a dataset. If "target_dataset" is defined, it generates "speakers.json" necessary for training a multi-speaker model.'
|
description='Compute embedding vectors for each wav file in a dataset. If "target_dataset" is defined, it generates "speakers.json" necessary for training a multi-speaker model.'
|
||||||
|
|
|
@ -1,16 +1,16 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import os
|
|
||||||
import glob
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from TTS.tts.datasets.preprocess import load_meta_data
|
from TTS.tts.datasets.preprocess import load_meta_data
|
||||||
from TTS.utils.io import load_config
|
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
|
from TTS.utils.io import load_config
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
|
@ -7,7 +7,6 @@ from TTS.vocoder.tf.utils.generic_utils import setup_generator
|
||||||
from TTS.vocoder.tf.utils.io import load_checkpoint
|
from TTS.vocoder.tf.utils.io import load_checkpoint
|
||||||
from TTS.vocoder.tf.utils.tflite import convert_melgan_to_tflite
|
from TTS.vocoder.tf.utils.tflite import convert_melgan_to_tflite
|
||||||
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--tf_model", type=str, help="Path to target torch model to be converted to TF.")
|
parser.add_argument("--tf_model", type=str, help="Path to target torch model to be converted to TF.")
|
||||||
parser.add_argument("--config_path", type=str, help="Path to config file of torch model.")
|
parser.add_argument("--config_path", type=str, help="Path to config file of torch model.")
|
||||||
|
|
|
@ -1,17 +1,14 @@
|
||||||
import argparse
|
import argparse
|
||||||
from difflib import SequenceMatcher
|
|
||||||
import os
|
import os
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from TTS.utils.io import load_config
|
from TTS.utils.io import load_config
|
||||||
from TTS.vocoder.tf.utils.convert_torch_to_tf_utils import (
|
from TTS.vocoder.tf.utils.convert_torch_to_tf_utils import (compare_torch_tf, convert_tf_name,
|
||||||
compare_torch_tf,
|
transfer_weights_torch_to_tf)
|
||||||
convert_tf_name,
|
|
||||||
transfer_weights_torch_to_tf,
|
|
||||||
)
|
|
||||||
from TTS.vocoder.tf.utils.generic_utils import setup_generator as setup_tf_generator
|
from TTS.vocoder.tf.utils.generic_utils import setup_generator as setup_tf_generator
|
||||||
from TTS.vocoder.tf.utils.io import save_checkpoint
|
from TTS.vocoder.tf.utils.io import save_checkpoint
|
||||||
from TTS.vocoder.utils.generic_utils import setup_generator
|
from TTS.vocoder.utils.generic_utils import setup_generator
|
||||||
|
|
|
@ -2,12 +2,11 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from TTS.utils.io import load_config
|
|
||||||
from TTS.tts.utils.text.symbols import symbols, phonemes
|
|
||||||
from TTS.tts.tf.utils.generic_utils import setup_model
|
from TTS.tts.tf.utils.generic_utils import setup_model
|
||||||
from TTS.tts.tf.utils.io import load_checkpoint
|
from TTS.tts.tf.utils.io import load_checkpoint
|
||||||
from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite
|
from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite
|
||||||
|
from TTS.tts.utils.text.symbols import phonemes, symbols
|
||||||
|
from TTS.utils.io import load_config
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--tf_model", type=str, help="Path to target torch model to be converted to TF.")
|
parser.add_argument("--tf_model", type=str, help="Path to target torch model to be converted to TF.")
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
import argparse
|
import argparse
|
||||||
from difflib import SequenceMatcher
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
from difflib import SequenceMatcher
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from TTS.tts.tf.models.tacotron2 import Tacotron2
|
from TTS.tts.tf.models.tacotron2 import Tacotron2
|
||||||
from TTS.tts.tf.utils.convert_torch_to_tf_utils import compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf
|
from TTS.tts.tf.utils.convert_torch_to_tf_utils import compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf
|
||||||
from TTS.tts.tf.utils.generic_utils import save_checkpoint
|
from TTS.tts.tf.utils.generic_utils import save_checkpoint
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import pathlib
|
|
||||||
import time
|
|
||||||
import subprocess
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
"""Find all the unique characters in a dataset"""
|
"""Find all the unique characters in a dataset"""
|
||||||
import os
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import os
|
||||||
from argparse import RawTextHelpFormatter
|
from argparse import RawTextHelpFormatter
|
||||||
|
|
||||||
from TTS.tts.datasets.preprocess import get_preprocessor_by_name
|
from TTS.tts.datasets.preprocess import get_preprocessor_by_name
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
import argparse
|
import argparse
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
import librosa
|
|
||||||
from distutils.dir_util import copy_tree
|
|
||||||
from argparse import RawTextHelpFormatter
|
from argparse import RawTextHelpFormatter
|
||||||
|
from distutils.dir_util import copy_tree
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
|
|
||||||
|
import librosa
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,6 @@
|
||||||
import argparse
|
import argparse
|
||||||
import sys
|
import sys
|
||||||
from argparse import RawTextHelpFormatter
|
from argparse import RawTextHelpFormatter
|
||||||
|
|
||||||
# pylint: disable=redefined-outer-name, unused-argument
|
# pylint: disable=redefined-outer-name, unused-argument
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@ import torch
|
||||||
from torch.nn.parallel import DistributedDataParallel as DDP_th
|
from torch.nn.parallel import DistributedDataParallel as DDP_th
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torch.utils.data.distributed import DistributedSampler
|
from torch.utils.data.distributed import DistributedSampler
|
||||||
|
|
||||||
from TTS.tts.datasets.preprocess import load_meta_data
|
from TTS.tts.datasets.preprocess import load_meta_data
|
||||||
from TTS.tts.datasets.TTSDataset import MyDataset
|
from TTS.tts.datasets.TTSDataset import MyDataset
|
||||||
from TTS.tts.layers.losses import AlignTTSLoss
|
from TTS.tts.layers.losses import AlignTTSLoss
|
||||||
|
|
|
@ -9,6 +9,7 @@ import traceback
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
|
|
||||||
from TTS.speaker_encoder.dataset import MyDataset
|
from TTS.speaker_encoder.dataset import MyDataset
|
||||||
from TTS.speaker_encoder.losses import AngleProtoLoss, GE2ELoss
|
from TTS.speaker_encoder.losses import AngleProtoLoss, GE2ELoss
|
||||||
from TTS.speaker_encoder.model import SpeakerEncoder
|
from TTS.speaker_encoder.model import SpeakerEncoder
|
||||||
|
@ -16,13 +17,8 @@ from TTS.speaker_encoder.utils.generic_utils import check_config_speaker_encoder
|
||||||
from TTS.speaker_encoder.utils.visual import plot_embeddings
|
from TTS.speaker_encoder.utils.visual import plot_embeddings
|
||||||
from TTS.tts.datasets.preprocess import load_meta_data
|
from TTS.tts.datasets.preprocess import load_meta_data
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.utils.generic_utils import (
|
from TTS.utils.generic_utils import (count_parameters, create_experiment_folder, get_git_branch,
|
||||||
count_parameters,
|
remove_experiment_folder, set_init_dict)
|
||||||
create_experiment_folder,
|
|
||||||
get_git_branch,
|
|
||||||
remove_experiment_folder,
|
|
||||||
set_init_dict,
|
|
||||||
)
|
|
||||||
from TTS.utils.io import copy_model_files, load_config
|
from TTS.utils.io import copy_model_files, load_config
|
||||||
from TTS.utils.radam import RAdam
|
from TTS.utils.radam import RAdam
|
||||||
from TTS.utils.tensorboard_logger import TensorboardLogger
|
from TTS.utils.tensorboard_logger import TensorboardLogger
|
||||||
|
|
|
@ -8,13 +8,11 @@ import traceback
|
||||||
from random import randrange
|
from random import randrange
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
# DISTRIBUTED
|
# DISTRIBUTED
|
||||||
from torch.nn.parallel import DistributedDataParallel as DDP_th
|
from torch.nn.parallel import DistributedDataParallel as DDP_th
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torch.utils.data.distributed import DistributedSampler
|
from torch.utils.data.distributed import DistributedSampler
|
||||||
|
|
||||||
from TTS.utils.arguments import parse_arguments, process_args
|
|
||||||
from TTS.tts.datasets.preprocess import load_meta_data
|
from TTS.tts.datasets.preprocess import load_meta_data
|
||||||
from TTS.tts.datasets.TTSDataset import MyDataset
|
from TTS.tts.datasets.TTSDataset import MyDataset
|
||||||
from TTS.tts.layers.losses import GlowTTSLoss
|
from TTS.tts.layers.losses import GlowTTSLoss
|
||||||
|
@ -25,6 +23,7 @@ from TTS.tts.utils.speakers import parse_speakers
|
||||||
from TTS.tts.utils.synthesis import synthesis
|
from TTS.tts.utils.synthesis import synthesis
|
||||||
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||||
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
|
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
|
||||||
|
from TTS.utils.arguments import parse_arguments, process_args
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.utils.distribute import init_distributed, reduce_tensor
|
from TTS.utils.distribute import init_distributed, reduce_tensor
|
||||||
from TTS.utils.generic_utils import KeepAverage, count_parameters, remove_experiment_folder, set_init_dict
|
from TTS.utils.generic_utils import KeepAverage, count_parameters, remove_experiment_folder, set_init_dict
|
||||||
|
|
|
@ -5,16 +5,15 @@ import os
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
import numpy as np
|
|
||||||
from random import randrange
|
from random import randrange
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from TTS.utils.arguments import parse_arguments, process_args
|
|
||||||
|
|
||||||
# DISTRIBUTED
|
# DISTRIBUTED
|
||||||
from torch.nn.parallel import DistributedDataParallel as DDP_th
|
from torch.nn.parallel import DistributedDataParallel as DDP_th
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torch.utils.data.distributed import DistributedSampler
|
from torch.utils.data.distributed import DistributedSampler
|
||||||
|
|
||||||
from TTS.tts.datasets.preprocess import load_meta_data
|
from TTS.tts.datasets.preprocess import load_meta_data
|
||||||
from TTS.tts.datasets.TTSDataset import MyDataset
|
from TTS.tts.datasets.TTSDataset import MyDataset
|
||||||
from TTS.tts.layers.losses import SpeedySpeechLoss
|
from TTS.tts.layers.losses import SpeedySpeechLoss
|
||||||
|
@ -25,6 +24,7 @@ from TTS.tts.utils.speakers import parse_speakers
|
||||||
from TTS.tts.utils.synthesis import synthesis
|
from TTS.tts.utils.synthesis import synthesis
|
||||||
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||||
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
|
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
|
||||||
|
from TTS.utils.arguments import parse_arguments, process_args
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.utils.distribute import init_distributed, reduce_tensor
|
from TTS.utils.distribute import init_distributed, reduce_tensor
|
||||||
from TTS.utils.generic_utils import KeepAverage, count_parameters, remove_experiment_folder, set_init_dict
|
from TTS.utils.generic_utils import KeepAverage, count_parameters, remove_experiment_folder, set_init_dict
|
||||||
|
|
|
@ -10,7 +10,7 @@ from random import randrange
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from TTS.utils.arguments import parse_arguments, process_args
|
|
||||||
from TTS.tts.datasets.preprocess import load_meta_data
|
from TTS.tts.datasets.preprocess import load_meta_data
|
||||||
from TTS.tts.datasets.TTSDataset import MyDataset
|
from TTS.tts.datasets.TTSDataset import MyDataset
|
||||||
from TTS.tts.layers.losses import TacotronLoss
|
from TTS.tts.layers.losses import TacotronLoss
|
||||||
|
@ -21,18 +21,13 @@ from TTS.tts.utils.speakers import parse_speakers
|
||||||
from TTS.tts.utils.synthesis import synthesis
|
from TTS.tts.utils.synthesis import synthesis
|
||||||
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||||
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
|
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
|
||||||
|
from TTS.utils.arguments import parse_arguments, process_args
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.utils.distribute import DistributedSampler, apply_gradient_allreduce, init_distributed, reduce_tensor
|
from TTS.utils.distribute import DistributedSampler, apply_gradient_allreduce, init_distributed, reduce_tensor
|
||||||
from TTS.utils.generic_utils import KeepAverage, count_parameters, remove_experiment_folder, set_init_dict
|
from TTS.utils.generic_utils import KeepAverage, count_parameters, remove_experiment_folder, set_init_dict
|
||||||
from TTS.utils.radam import RAdam
|
from TTS.utils.radam import RAdam
|
||||||
from TTS.utils.training import (
|
from TTS.utils.training import (NoamLR, adam_weight_decay, check_update, gradual_training_scheduler, set_weight_decay,
|
||||||
NoamLR,
|
setup_torch_training_env)
|
||||||
adam_weight_decay,
|
|
||||||
check_update,
|
|
||||||
gradual_training_scheduler,
|
|
||||||
set_weight_decay,
|
|
||||||
setup_torch_training_env,
|
|
||||||
)
|
|
||||||
|
|
||||||
use_cuda, num_gpus = setup_torch_training_env(True, False)
|
use_cuda, num_gpus = setup_torch_training_env(True, False)
|
||||||
|
|
||||||
|
|
|
@ -9,13 +9,16 @@ import traceback
|
||||||
from inspect import signature
|
from inspect import signature
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
# DISTRIBUTED
|
||||||
|
from torch.nn.parallel import DistributedDataParallel as DDP_th
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
|
from torch.utils.data.distributed import DistributedSampler
|
||||||
|
|
||||||
from TTS.utils.arguments import parse_arguments, process_args
|
from TTS.utils.arguments import parse_arguments, process_args
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
|
from TTS.utils.distribute import init_distributed
|
||||||
from TTS.utils.generic_utils import KeepAverage, count_parameters, remove_experiment_folder, set_init_dict
|
from TTS.utils.generic_utils import KeepAverage, count_parameters, remove_experiment_folder, set_init_dict
|
||||||
|
|
||||||
from TTS.utils.radam import RAdam
|
from TTS.utils.radam import RAdam
|
||||||
|
|
||||||
from TTS.utils.training import setup_torch_training_env
|
from TTS.utils.training import setup_torch_training_env
|
||||||
from TTS.vocoder.datasets.gan_dataset import GANDataset
|
from TTS.vocoder.datasets.gan_dataset import GANDataset
|
||||||
from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
|
from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
|
||||||
|
@ -23,11 +26,6 @@ from TTS.vocoder.layers.losses import DiscriminatorLoss, GeneratorLoss
|
||||||
from TTS.vocoder.utils.generic_utils import plot_results, setup_discriminator, setup_generator
|
from TTS.vocoder.utils.generic_utils import plot_results, setup_discriminator, setup_generator
|
||||||
from TTS.vocoder.utils.io import save_best_model, save_checkpoint
|
from TTS.vocoder.utils.io import save_best_model, save_checkpoint
|
||||||
|
|
||||||
# DISTRIBUTED
|
|
||||||
from torch.nn.parallel import DistributedDataParallel as DDP_th
|
|
||||||
from torch.utils.data.distributed import DistributedSampler
|
|
||||||
from TTS.utils.distribute import init_distributed
|
|
||||||
|
|
||||||
use_cuda, num_gpus = setup_torch_training_env(True, True)
|
use_cuda, num_gpus = setup_torch_training_env(True, True)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5,15 +5,15 @@ import os
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
# DISTRIBUTED
|
# DISTRIBUTED
|
||||||
from torch.nn.parallel import DistributedDataParallel as DDP_th
|
from torch.nn.parallel import DistributedDataParallel as DDP_th
|
||||||
from torch.optim import Adam
|
from torch.optim import Adam
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torch.utils.data.distributed import DistributedSampler
|
from torch.utils.data.distributed import DistributedSampler
|
||||||
|
|
||||||
from TTS.utils.arguments import parse_arguments, process_args
|
from TTS.utils.arguments import parse_arguments, process_args
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.utils.distribute import init_distributed
|
from TTS.utils.distribute import init_distributed
|
||||||
|
|
|
@ -2,33 +2,29 @@
|
||||||
"""Train WaveRNN vocoder model."""
|
"""Train WaveRNN vocoder model."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
import traceback
|
|
||||||
import time
|
|
||||||
import random
|
import random
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import traceback
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
|
|
||||||
# from torch.utils.data.distributed import DistributedSampler
|
|
||||||
|
|
||||||
from TTS.utils.arguments import parse_arguments, process_args
|
|
||||||
from TTS.tts.utils.visual import plot_spectrogram
|
from TTS.tts.utils.visual import plot_spectrogram
|
||||||
|
from TTS.utils.arguments import parse_arguments, process_args
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
|
from TTS.utils.generic_utils import KeepAverage, count_parameters, remove_experiment_folder, set_init_dict
|
||||||
from TTS.utils.radam import RAdam
|
from TTS.utils.radam import RAdam
|
||||||
from TTS.utils.training import setup_torch_training_env
|
from TTS.utils.training import setup_torch_training_env
|
||||||
from TTS.utils.generic_utils import (
|
|
||||||
KeepAverage,
|
|
||||||
count_parameters,
|
|
||||||
remove_experiment_folder,
|
|
||||||
set_init_dict,
|
|
||||||
)
|
|
||||||
from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset
|
|
||||||
from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
|
from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
|
||||||
|
from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset
|
||||||
from TTS.vocoder.utils.distribution import discretized_mix_logistic_loss, gaussian_loss
|
from TTS.vocoder.utils.distribution import discretized_mix_logistic_loss, gaussian_loss
|
||||||
from TTS.vocoder.utils.generic_utils import setup_generator
|
from TTS.vocoder.utils.generic_utils import setup_generator
|
||||||
from TTS.vocoder.utils.io import save_best_model, save_checkpoint
|
from TTS.vocoder.utils.io import save_best_model, save_checkpoint
|
||||||
|
|
||||||
|
# from torch.utils.data.distributed import DistributedSampler
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
use_cuda, num_gpus = setup_torch_training_env(True, True)
|
use_cuda, num_gpus = setup_torch_training_env(True, True)
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@ import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.utils.io import load_config
|
from TTS.utils.io import load_config
|
||||||
from TTS.vocoder.datasets.preprocess import load_wav_data
|
from TTS.vocoder.datasets.preprocess import load_wav_data
|
||||||
|
|
|
@ -1,14 +1,15 @@
|
||||||
#!flask/bin/python
|
#!flask/bin/python
|
||||||
import argparse
|
import argparse
|
||||||
|
import io
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import io
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from flask import Flask, render_template, request, send_file
|
from flask import Flask, render_template, request, send_file
|
||||||
from TTS.utils.synthesizer import Synthesizer
|
|
||||||
from TTS.utils.manage import ModelManager
|
|
||||||
from TTS.utils.io import load_config
|
from TTS.utils.io import load_config
|
||||||
|
from TTS.utils.manage import ModelManager
|
||||||
|
from TTS.utils.synthesizer import Synthesizer
|
||||||
|
|
||||||
|
|
||||||
def create_argparser():
|
def create_argparser():
|
||||||
|
|
|
@ -3,6 +3,7 @@ import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from TTS.speaker_encoder.model import SpeakerEncoder
|
from TTS.speaker_encoder.model import SpeakerEncoder
|
||||||
from TTS.utils.generic_utils import check_argument
|
from TTS.utils.generic_utils import check_argument
|
||||||
|
|
||||||
|
|
|
@ -19,15 +19,17 @@
|
||||||
# pylint: disable=too-many-locals, too-many-statements, too-many-arguments, too-many-instance-attributes
|
# pylint: disable=too-many-locals, too-many-statements, too-many-arguments, too-many-instance-attributes
|
||||||
""" voxceleb 1 & 2 """
|
""" voxceleb 1 & 2 """
|
||||||
|
|
||||||
|
import hashlib
|
||||||
import os
|
import os
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import zipfile
|
import zipfile
|
||||||
import subprocess
|
|
||||||
import hashlib
|
|
||||||
import pandas
|
|
||||||
from absl import logging
|
|
||||||
import tensorflow as tf
|
|
||||||
import soundfile as sf
|
import soundfile as sf
|
||||||
|
import tensorflow as tf
|
||||||
|
from absl import logging
|
||||||
|
|
||||||
|
import pandas
|
||||||
|
|
||||||
gfile = tf.compat.v1.gfile
|
gfile = tf.compat.v1.gfile
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import umap
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib
|
import matplotlib
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
import umap
|
||||||
|
|
||||||
matplotlib.use("Agg")
|
matplotlib.use("Agg")
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@ import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import tqdm
|
import tqdm
|
||||||
from torch.utils.data import Dataset
|
from torch.utils.data import Dataset
|
||||||
|
|
||||||
from TTS.tts.utils.data import prepare_data, prepare_stop_target, prepare_tensor
|
from TTS.tts.utils.data import prepare_data, prepare_stop_target, prepare_tensor
|
||||||
from TTS.tts.utils.text import pad_with_eos_bos, phoneme_to_sequence, text_to_sequence
|
from TTS.tts.utils.text import pad_with_eos_bos, phoneme_to_sequence, text_to_sequence
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@ from pathlib import Path
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from TTS.tts.utils.generic_utils import split_dataset
|
from TTS.tts.utils.generic_utils import split_dataset
|
||||||
|
|
||||||
####################
|
####################
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from TTS.tts.layers.generic.transformer import FFTransformerBlock
|
|
||||||
from TTS.tts.layers.generic.pos_encoding import PositionalEncoding
|
from TTS.tts.layers.generic.pos_encoding import PositionalEncoding
|
||||||
|
from TTS.tts.layers.generic.transformer import FFTransformerBlock
|
||||||
|
|
||||||
|
|
||||||
class DurationPredictor(nn.Module):
|
class DurationPredictor(nn.Module):
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from TTS.tts.layers.generic.res_conv_bn import Conv1dBNBlock, ResidualConv1dBNBlock, Conv1dBN
|
|
||||||
|
from TTS.tts.layers.generic.res_conv_bn import Conv1dBN, Conv1dBNBlock, ResidualConv1dBNBlock
|
||||||
|
from TTS.tts.layers.generic.transformer import FFTransformerBlock
|
||||||
from TTS.tts.layers.generic.wavenet import WNBlocks
|
from TTS.tts.layers.generic.wavenet import WNBlocks
|
||||||
from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer
|
from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer
|
||||||
from TTS.tts.layers.generic.transformer import FFTransformerBlock
|
|
||||||
|
|
||||||
|
|
||||||
class WaveNetDecoder(nn.Module):
|
class WaveNetDecoder(nn.Module):
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer
|
|
||||||
from TTS.tts.layers.generic.res_conv_bn import ResidualConv1dBNBlock
|
from TTS.tts.layers.generic.res_conv_bn import ResidualConv1dBNBlock
|
||||||
from TTS.tts.layers.generic.transformer import FFTransformerBlock
|
from TTS.tts.layers.generic.transformer import FFTransformerBlock
|
||||||
|
from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer
|
||||||
|
|
||||||
|
|
||||||
class RelativePositionTransformerEncoder(nn.Module):
|
class RelativePositionTransformerEncoder(nn.Module):
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import torch
|
|
||||||
import math
|
import math
|
||||||
|
|
||||||
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from TTS.tts.layers.glow_tts.glow import InvConvNear, CouplingBlock
|
|
||||||
from TTS.tts.layers.generic.normalization import ActNorm
|
from TTS.tts.layers.generic.normalization import ActNorm
|
||||||
|
from TTS.tts.layers.glow_tts.glow import CouplingBlock, InvConvNear
|
||||||
|
|
||||||
|
|
||||||
def squeeze(x, x_mask=None, num_sqz=2):
|
def squeeze(x, x_mask=None, num_sqz=2):
|
||||||
|
|
|
@ -1,14 +1,15 @@
|
||||||
import math
|
import math
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer
|
|
||||||
from TTS.tts.layers.generic.gated_conv import GatedConvBlock
|
from TTS.tts.layers.generic.gated_conv import GatedConvBlock
|
||||||
from TTS.tts.utils.generic_utils import sequence_mask
|
|
||||||
from TTS.tts.layers.glow_tts.glow import ResidualConv1dLayerNormBlock
|
|
||||||
from TTS.tts.layers.glow_tts.duration_predictor import DurationPredictor
|
|
||||||
from TTS.tts.layers.generic.time_depth_sep_conv import TimeDepthSeparableConvBlock
|
|
||||||
from TTS.tts.layers.generic.res_conv_bn import ResidualConv1dBNBlock
|
from TTS.tts.layers.generic.res_conv_bn import ResidualConv1dBNBlock
|
||||||
|
from TTS.tts.layers.generic.time_depth_sep_conv import TimeDepthSeparableConvBlock
|
||||||
|
from TTS.tts.layers.glow_tts.duration_predictor import DurationPredictor
|
||||||
|
from TTS.tts.layers.glow_tts.glow import ResidualConv1dLayerNormBlock
|
||||||
|
from TTS.tts.layers.glow_tts.transformer import RelativePositionTransformer
|
||||||
|
from TTS.tts.utils.generic_utils import sequence_mask
|
||||||
|
|
||||||
|
|
||||||
class Encoder(nn.Module):
|
class Encoder(nn.Module):
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
|
|
||||||
from TTS.tts.layers.generic.wavenet import WN
|
from TTS.tts.layers.generic.wavenet import WN
|
||||||
|
|
||||||
from ..generic.normalization import LayerNorm
|
from ..generic.normalization import LayerNorm
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
|
|
||||||
from TTS.tts.utils.generic_utils import sequence_mask
|
from TTS.tts.utils.generic_utils import sequence_mask
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import math
|
import math
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
import math
|
import math
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import functional
|
from torch.nn import functional
|
||||||
|
|
||||||
from TTS.tts.utils.generic_utils import sequence_mask
|
from TTS.tts.utils.generic_utils import sequence_mask
|
||||||
from TTS.tts.utils.ssim import ssim
|
from TTS.tts.utils.ssim import ssim
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
import torch
|
import torch
|
||||||
|
from scipy.stats import betabinom
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
|
|
||||||
from TTS.tts.layers.tacotron.common_layers import Linear
|
from TTS.tts.layers.tacotron.common_layers import Linear
|
||||||
from scipy.stats import betabinom
|
|
||||||
|
|
||||||
|
|
||||||
class LocationLayer(nn.Module):
|
class LocationLayer(nn.Module):
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from .common_layers import Prenet
|
|
||||||
from .attentions import init_attn
|
from .attentions import init_attn
|
||||||
|
from .common_layers import Prenet
|
||||||
|
|
||||||
|
|
||||||
class BatchNormConv1d(nn.Module):
|
class BatchNormConv1d(nn.Module):
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
from .common_layers import Prenet, Linear
|
|
||||||
from .attentions import init_attn
|
from .attentions import init_attn
|
||||||
|
from .common_layers import Linear, Prenet
|
||||||
|
|
||||||
|
|
||||||
# NOTE: linter has a problem with the current TF release
|
# NOTE: linter has a problem with the current TF release
|
||||||
# pylint: disable=no-value-for-parameter
|
# pylint: disable=no-value-for-parameter
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from TTS.tts.layers.generic.pos_encoding import PositionalEncoding
|
|
||||||
|
from TTS.tts.layers.align_tts.mdn import MDNBlock
|
||||||
|
from TTS.tts.layers.feed_forward.decoder import Decoder
|
||||||
from TTS.tts.layers.feed_forward.duration_predictor import DurationPredictor
|
from TTS.tts.layers.feed_forward.duration_predictor import DurationPredictor
|
||||||
|
from TTS.tts.layers.feed_forward.encoder import Encoder
|
||||||
|
from TTS.tts.layers.generic.pos_encoding import PositionalEncoding
|
||||||
from TTS.tts.layers.glow_tts.monotonic_align import generate_path, maximum_path
|
from TTS.tts.layers.glow_tts.monotonic_align import generate_path, maximum_path
|
||||||
from TTS.tts.utils.generic_utils import sequence_mask
|
from TTS.tts.utils.generic_utils import sequence_mask
|
||||||
from TTS.tts.layers.align_tts.mdn import MDNBlock
|
|
||||||
from TTS.tts.layers.feed_forward.encoder import Encoder
|
|
||||||
from TTS.tts.layers.feed_forward.decoder import Decoder
|
|
||||||
|
|
||||||
|
|
||||||
class AlignTTS(nn.Module):
|
class AlignTTS(nn.Module):
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
import math
|
import math
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
|
|
||||||
from TTS.tts.layers.glow_tts.encoder import Encoder
|
|
||||||
from TTS.tts.layers.glow_tts.decoder import Decoder
|
from TTS.tts.layers.glow_tts.decoder import Decoder
|
||||||
|
from TTS.tts.layers.glow_tts.encoder import Encoder
|
||||||
|
from TTS.tts.layers.glow_tts.monotonic_align import generate_path, maximum_path
|
||||||
from TTS.tts.utils.generic_utils import sequence_mask
|
from TTS.tts.utils.generic_utils import sequence_mask
|
||||||
from TTS.tts.layers.glow_tts.monotonic_align import maximum_path, generate_path
|
|
||||||
|
|
||||||
|
|
||||||
class GlowTTS(nn.Module):
|
class GlowTTS(nn.Module):
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from TTS.tts.layers.feed_forward.decoder import Decoder
|
from TTS.tts.layers.feed_forward.decoder import Decoder
|
||||||
from TTS.tts.layers.feed_forward.duration_predictor import DurationPredictor
|
from TTS.tts.layers.feed_forward.duration_predictor import DurationPredictor
|
||||||
from TTS.tts.layers.feed_forward.encoder import Encoder
|
from TTS.tts.layers.feed_forward.encoder import Encoder
|
||||||
from TTS.tts.layers.generic.pos_encoding import PositionalEncoding
|
from TTS.tts.layers.generic.pos_encoding import PositionalEncoding
|
||||||
from TTS.tts.utils.generic_utils import sequence_mask
|
|
||||||
from TTS.tts.layers.glow_tts.monotonic_align import generate_path
|
from TTS.tts.layers.glow_tts.monotonic_align import generate_path
|
||||||
|
from TTS.tts.utils.generic_utils import sequence_mask
|
||||||
|
|
||||||
|
|
||||||
class SpeedySpeech(nn.Module):
|
class SpeedySpeech(nn.Module):
|
||||||
|
|
|
@ -5,6 +5,7 @@ from TTS.tts.layers.tacotron.gst_layers import GST
|
||||||
from TTS.tts.layers.tacotron.tacotron2 import Decoder, Encoder, Postnet
|
from TTS.tts.layers.tacotron.tacotron2 import Decoder, Encoder, Postnet
|
||||||
from TTS.tts.models.tacotron_abstract import TacotronAbstract
|
from TTS.tts.models.tacotron_abstract import TacotronAbstract
|
||||||
|
|
||||||
|
|
||||||
# TODO: match function arguments with tacotron
|
# TODO: match function arguments with tacotron
|
||||||
class Tacotron2(TacotronAbstract):
|
class Tacotron2(TacotronAbstract):
|
||||||
"""Tacotron2 as in https://arxiv.org/abs/1712.05884
|
"""Tacotron2 as in https://arxiv.org/abs/1712.05884
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from tensorflow import keras
|
from tensorflow import keras
|
||||||
|
|
||||||
|
from TTS.tts.tf.layers.tacotron.common_layers import Attention, Prenet
|
||||||
from TTS.tts.tf.utils.tf_utils import shape_list
|
from TTS.tts.tf.utils.tf_utils import shape_list
|
||||||
from TTS.tts.tf.layers.tacotron.common_layers import Prenet, Attention
|
|
||||||
|
|
||||||
|
|
||||||
# NOTE: linter has a problem with the current TF release
|
# NOTE: linter has a problem with the current TF release
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from tensorflow import keras
|
from tensorflow import keras
|
||||||
|
|
||||||
from TTS.tts.tf.layers.tacotron.tacotron2 import Encoder, Decoder, Postnet
|
from TTS.tts.tf.layers.tacotron.tacotron2 import Decoder, Encoder, Postnet
|
||||||
from TTS.tts.tf.utils.tf_utils import shape_list
|
from TTS.tts.tf.utils.tf_utils import shape_list
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import datetime
|
import datetime
|
||||||
import importlib
|
import importlib
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import pickle
|
|
||||||
import datetime
|
import datetime
|
||||||
|
import pickle
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5,8 +5,8 @@
|
||||||
# This uses Python 3, but it's easy to port to Python 2 by changing
|
# This uses Python 3, but it's easy to port to Python 2 by changing
|
||||||
# strings to u'xx'.
|
# strings to u'xx'.
|
||||||
|
|
||||||
import re
|
|
||||||
import itertools
|
import itertools
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
def _num2chinese(num: str, big=False, simp=True, o=False, twoalt=False) -> str:
|
def _num2chinese(num: str, big=False, simp=True, o=False, twoalt=False) -> str:
|
||||||
|
|
|
@ -1,13 +1,11 @@
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
import jieba
|
||||||
import pypinyin
|
import pypinyin
|
||||||
|
|
||||||
from .pinyinToPhonemes import PINYIN_DICT
|
from .pinyinToPhonemes import PINYIN_DICT
|
||||||
|
|
||||||
|
|
||||||
import jieba
|
|
||||||
|
|
||||||
|
|
||||||
def _chinese_character_to_pinyin(text: str) -> List[str]:
|
def _chinese_character_to_pinyin(text: str) -> List[str]:
|
||||||
pinyins = pypinyin.pinyin(text, style=pypinyin.Style.TONE3, heteronym=False, neutral_tone_with_five=True)
|
pinyins = pypinyin.pinyin(text, style=pypinyin.Style.TONE3, heteronym=False, neutral_tone_with_five=True)
|
||||||
pinyins_flat_list = [item for sublist in pinyins for item in sublist]
|
pinyins_flat_list = [item for sublist in pinyins for item in sublist]
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
import re
|
|
||||||
import torch
|
|
||||||
import importlib
|
import importlib
|
||||||
import numpy as np
|
import re
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
|
||||||
from TTS.utils.generic_utils import check_argument
|
from TTS.utils.generic_utils import check_argument
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
import os
|
|
||||||
import torch
|
|
||||||
import datetime
|
import datetime
|
||||||
|
import os
|
||||||
import pickle as pickle_tts
|
import pickle as pickle_tts
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
from TTS.utils.io import RenamingUnpickler
|
from TTS.utils.io import RenamingUnpickler
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import os
|
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
def make_speakers_json_path(out_path):
|
def make_speakers_json_path(out_path):
|
||||||
|
|
|
@ -1,14 +1,16 @@
|
||||||
import os
|
import os
|
||||||
|
|
||||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
import numpy as np
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from .text import phoneme_to_sequence, text_to_sequence
|
||||||
|
|
||||||
|
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
||||||
|
|
||||||
installed = {pkg.key for pkg in pkg_resources.working_set} # pylint: disable=not-an-iterable
|
installed = {pkg.key for pkg in pkg_resources.working_set} # pylint: disable=not-an-iterable
|
||||||
if "tensorflow" in installed or "tensorflow-gpu" in installed:
|
if "tensorflow" in installed or "tensorflow-gpu" in installed:
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import torch
|
|
||||||
import numpy as np
|
|
||||||
from .text import text_to_sequence, phoneme_to_sequence
|
|
||||||
|
|
||||||
|
|
||||||
def text_to_seqvec(text, CONFIG):
|
def text_to_seqvec(text, CONFIG):
|
||||||
|
|
|
@ -5,10 +5,10 @@ import re
|
||||||
import phonemizer
|
import phonemizer
|
||||||
from packaging import version
|
from packaging import version
|
||||||
from phonemizer.phonemize import phonemize
|
from phonemizer.phonemize import phonemize
|
||||||
|
|
||||||
|
from TTS.tts.utils.chinese_mandarin.phonemizer import chinese_text_to_phonemes
|
||||||
from TTS.tts.utils.text import cleaners
|
from TTS.tts.utils.text import cleaners
|
||||||
from TTS.tts.utils.text.symbols import _bos, _eos, _punctuations, make_symbols, phonemes, symbols
|
from TTS.tts.utils.text.symbols import _bos, _eos, _punctuations, make_symbols, phonemes, symbols
|
||||||
from TTS.tts.utils.chinese_mandarin.phonemizer import chinese_text_to_phonemes
|
|
||||||
|
|
||||||
|
|
||||||
# pylint: disable=unnecessary-comprehension
|
# pylint: disable=unnecessary-comprehension
|
||||||
# Mappings from symbol to numeric ID and vice versa:
|
# Mappings from symbol to numeric ID and vice versa:
|
||||||
|
|
|
@ -11,12 +11,14 @@ hyperparameter. Some cleaners are English-specific. You'll typically want to use
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from unidecode import unidecode
|
from unidecode import unidecode
|
||||||
from .number_norm import normalize_numbers
|
|
||||||
from .abbreviations import abbreviations_en, abbreviations_fr
|
|
||||||
from .time import expand_time_english
|
|
||||||
from TTS.tts.utils.chinese_mandarin.numbers import replace_numbers_to_characters_in_text
|
from TTS.tts.utils.chinese_mandarin.numbers import replace_numbers_to_characters_in_text
|
||||||
|
|
||||||
|
from .abbreviations import abbreviations_en, abbreviations_fr
|
||||||
|
from .number_norm import normalize_numbers
|
||||||
|
from .time import expand_time_english
|
||||||
|
|
||||||
# Regular expression matching whitespace:
|
# Regular expression matching whitespace:
|
||||||
_whitespace_re = re.compile(r"\s+")
|
_whitespace_re = re.compile(r"\s+")
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
""" from https://github.com/keithito/tacotron """
|
""" from https://github.com/keithito/tacotron """
|
||||||
|
|
||||||
import inflect
|
|
||||||
import re
|
import re
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
|
import inflect
|
||||||
|
|
||||||
_inflect = inflect.engine()
|
_inflect = inflect.engine()
|
||||||
_comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
|
_comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
|
||||||
_decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
|
_decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import inflect
|
import inflect
|
||||||
|
|
||||||
_inflect = inflect.engine()
|
_inflect = inflect.engine()
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
import librosa
|
import librosa
|
||||||
import matplotlib
|
import matplotlib
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
matplotlib.use("Agg")
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from TTS.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme
|
from TTS.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme
|
||||||
|
|
||||||
|
matplotlib.use("Agg")
|
||||||
|
|
||||||
|
|
||||||
def plot_alignment(alignment, info=None, fig_size=(16, 10), title=None, output_fig=False):
|
def plot_alignment(alignment, info=None, fig_size=(16, 10), title=None, output_fig=False):
|
||||||
if isinstance(alignment, torch.Tensor):
|
if isinstance(alignment, torch.Tensor):
|
||||||
|
|
|
@ -4,11 +4,12 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import glob
|
import glob
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from TTS.tts.utils.text.symbols import parse_symbols
|
from TTS.tts.utils.text.symbols import parse_symbols
|
||||||
from TTS.utils.console_logger import ConsoleLogger
|
from TTS.utils.console_logger import ConsoleLogger
|
||||||
from TTS.utils.generic_utils import create_experiment_folder, get_git_branch
|
from TTS.utils.generic_utils import create_experiment_folder, get_git_branch
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
import librosa
|
import librosa
|
||||||
import soundfile as sf
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.io.wavfile
|
import scipy.io.wavfile
|
||||||
import scipy.signal
|
import scipy.signal
|
||||||
|
import soundfile as sf
|
||||||
|
|
||||||
|
from TTS.tts.utils.data import StandardScaler
|
||||||
|
|
||||||
# import pyworld as pw
|
# import pyworld as pw
|
||||||
|
|
||||||
from TTS.tts.utils.data import StandardScaler
|
|
||||||
|
|
||||||
# pylint: disable=too-many-public-methods
|
# pylint: disable=too-many-public-methods
|
||||||
class AudioProcessor(object):
|
class AudioProcessor(object):
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import datetime
|
import datetime
|
||||||
from TTS.utils.io import AttrDict
|
|
||||||
|
|
||||||
|
from TTS.utils.io import AttrDict
|
||||||
|
|
||||||
tcolors = AttrDict(
|
tcolors = AttrDict(
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import json
|
import json
|
||||||
import yaml
|
import os
|
||||||
import pickle as pickle_tts
|
import pickle as pickle_tts
|
||||||
|
import re
|
||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
class RenamingUnpickler(pickle_tts.Unpickler):
|
class RenamingUnpickler(pickle_tts.Unpickler):
|
||||||
"""Overload default pickler to solve module renaming problem"""
|
"""Overload default pickler to solve module renaming problem"""
|
||||||
|
|
|
@ -7,6 +7,7 @@ from shutil import copyfile
|
||||||
|
|
||||||
import gdown
|
import gdown
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from TTS.utils.generic_utils import get_user_data_dir
|
from TTS.utils.generic_utils import get_user_data_dir
|
||||||
from TTS.utils.io import load_config
|
from TTS.utils.io import load_config
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# from https://github.com/LiyuanLucasLiu/RAdam
|
# from https://github.com/LiyuanLucasLiu/RAdam
|
||||||
|
|
||||||
import math
|
import math
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch.optim.optimizer import Optimizer
|
from torch.optim.optimizer import Optimizer
|
||||||
|
|
||||||
|
|
|
@ -1,19 +1,18 @@
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
|
||||||
import pysbd
|
import pysbd
|
||||||
|
import torch
|
||||||
|
|
||||||
from TTS.utils.audio import AudioProcessor
|
|
||||||
from TTS.utils.io import load_config
|
|
||||||
from TTS.tts.utils.generic_utils import setup_model
|
from TTS.tts.utils.generic_utils import setup_model
|
||||||
from TTS.tts.utils.speakers import load_speaker_mapping
|
from TTS.tts.utils.speakers import load_speaker_mapping
|
||||||
from TTS.vocoder.utils.generic_utils import setup_generator, interpolate_vocoder_input
|
|
||||||
|
|
||||||
# pylint: disable=unused-wildcard-import
|
# pylint: disable=unused-wildcard-import
|
||||||
# pylint: disable=wildcard-import
|
# pylint: disable=wildcard-import
|
||||||
from TTS.tts.utils.synthesis import synthesis, trim_silence
|
from TTS.tts.utils.synthesis import synthesis, trim_silence
|
||||||
from TTS.tts.utils.text import make_symbols, phonemes, symbols
|
from TTS.tts.utils.text import make_symbols, phonemes, symbols
|
||||||
|
from TTS.utils.audio import AudioProcessor
|
||||||
|
from TTS.utils.io import load_config
|
||||||
|
from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input, setup_generator
|
||||||
|
|
||||||
|
|
||||||
class Synthesizer(object):
|
class Synthesizer(object):
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
from tensorboardX import SummaryWriter
|
from tensorboardX import SummaryWriter
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import torch
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
def setup_torch_training_env(cudnn_enable, cudnn_benchmark):
|
def setup_torch_training_env(cudnn_enable, cudnn_benchmark):
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
import os
|
|
||||||
import glob
|
import glob
|
||||||
import torch
|
import os
|
||||||
import random
|
import random
|
||||||
import numpy as np
|
|
||||||
from torch.utils.data import Dataset
|
|
||||||
from multiprocessing import Manager
|
from multiprocessing import Manager
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from torch.utils.data import Dataset
|
||||||
|
|
||||||
|
|
||||||
class GANDataset(Dataset):
|
class GANDataset(Dataset):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
def preprocess_wav_files(out_path, config, ap):
|
def preprocess_wav_files(out_path, config, ap):
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
import os
|
|
||||||
import glob
|
import glob
|
||||||
import torch
|
import os
|
||||||
import random
|
import random
|
||||||
import numpy as np
|
|
||||||
from torch.utils.data import Dataset
|
|
||||||
from multiprocessing import Manager
|
from multiprocessing import Manager
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from torch.utils.data import Dataset
|
||||||
|
|
||||||
|
|
||||||
class WaveGradDataset(Dataset):
|
class WaveGradDataset(Dataset):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import torch
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import torch
|
||||||
from torch.utils.data import Dataset
|
from torch.utils.data import Dataset
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import torch
|
|
||||||
import librosa
|
import librosa
|
||||||
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
from scipy import signal as sig
|
from scipy import signal as sig
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from TTS.vocoder.models.melgan_generator import MelganGenerator
|
|
||||||
from TTS.vocoder.layers.pqmf import PQMF
|
from TTS.vocoder.layers.pqmf import PQMF
|
||||||
|
from TTS.vocoder.models.melgan_generator import MelganGenerator
|
||||||
|
|
||||||
|
|
||||||
class MultibandMelganGenerator(MelganGenerator):
|
class MultibandMelganGenerator(MelganGenerator):
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import math
|
import math
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import math
|
import math
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn.utils import weight_norm
|
from torch.nn.utils import weight_norm
|
||||||
|
|
||||||
from ..layers.wavegrad import DBlock, FiLM, UBlock, Conv1d
|
from ..layers.wavegrad import Conv1d, DBlock, FiLM, UBlock
|
||||||
|
|
||||||
|
|
||||||
class Wavegrad(nn.Module):
|
class Wavegrad(nn.Module):
|
||||||
|
|
|
@ -1,16 +1,14 @@
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import numpy as np
|
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
import time
|
|
||||||
|
|
||||||
# fix this
|
# fix this
|
||||||
from TTS.utils.audio import AudioProcessor as ap
|
from TTS.utils.audio import AudioProcessor as ap
|
||||||
from TTS.vocoder.utils.distribution import (
|
from TTS.vocoder.utils.distribution import sample_from_discretized_mix_logistic, sample_from_gaussian
|
||||||
sample_from_gaussian,
|
|
||||||
sample_from_discretized_mix_logistic,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def stream(string, variables):
|
def stream(string, variables):
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from scipy import signal as sig
|
from scipy import signal as sig
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from TTS.vocoder.tf.layers.melgan import ReflectionPad1d, ResidualStack
|
||||||
|
|
||||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # FATAL
|
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # FATAL
|
||||||
logging.getLogger("tensorflow").setLevel(logging.FATAL)
|
logging.getLogger("tensorflow").setLevel(logging.FATAL)
|
||||||
|
|
||||||
import tensorflow as tf
|
|
||||||
from TTS.vocoder.tf.layers.melgan import ResidualStack, ReflectionPad1d
|
|
||||||
|
|
||||||
|
|
||||||
# pylint: disable=too-many-ancestors
|
# pylint: disable=too-many-ancestors
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from TTS.vocoder.tf.models.melgan_generator import MelganGenerator
|
|
||||||
from TTS.vocoder.tf.layers.pqmf import PQMF
|
from TTS.vocoder.tf.layers.pqmf import PQMF
|
||||||
|
from TTS.vocoder.tf.models.melgan_generator import MelganGenerator
|
||||||
|
|
||||||
|
|
||||||
# pylint: disable=too-many-ancestors
|
# pylint: disable=too-many-ancestors
|
||||||
# pylint: disable=abstract-method
|
# pylint: disable=abstract-method
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import re
|
|
||||||
import importlib
|
import importlib
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
def to_camel(text):
|
def to_camel(text):
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import datetime
|
import datetime
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
import numpy as np
|
|
||||||
import math
|
import math
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from torch.distributions.normal import Normal
|
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
from torch.distributions.normal import Normal
|
||||||
|
|
||||||
|
|
||||||
def gaussian_loss(y_hat, y, log_std_min=-7.0):
|
def gaussian_loss(y_hat, y, log_std_min=-7.0):
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
import re
|
|
||||||
import torch
|
|
||||||
import importlib
|
import importlib
|
||||||
|
import re
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import torch
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
from TTS.tts.utils.visual import plot_spectrogram
|
from TTS.tts.utils.visual import plot_spectrogram
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
import os
|
|
||||||
import glob
|
|
||||||
import torch
|
|
||||||
import datetime
|
import datetime
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
import pickle as pickle_tts
|
import pickle as pickle_tts
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
from TTS.utils.io import RenamingUnpickler
|
from TTS.utils.io import RenamingUnpickler
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
dependencies = ['torch', 'gdown', 'pysbd', 'phonemizer', 'unidecode', 'pypinyin'] # apt install espeak-ng
|
dependencies = ['torch', 'gdown', 'pysbd', 'phonemizer', 'unidecode', 'pypinyin'] # apt install espeak-ng
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from TTS.utils.synthesizer import Synthesizer
|
|
||||||
from TTS.utils.manage import ModelManager
|
from TTS.utils.manage import ModelManager
|
||||||
|
from TTS.utils.synthesizer import Synthesizer
|
||||||
|
|
||||||
|
|
||||||
def tts(model_name='tts_models/en/ljspeech/tacotron2-DCA', vocoder_name=None, use_cuda=False):
|
def tts(model_name='tts_models/en/ljspeech/tacotron2-DCA', vocoder_name=None, use_cuda=False):
|
||||||
|
|
|
@ -1,14 +1,16 @@
|
||||||
# visualisation tools for mimic2
|
# visualisation tools for mimic2
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from statistics import stdev, mode, mean, median
|
|
||||||
from statistics import StatisticsError
|
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
|
||||||
import csv
|
import csv
|
||||||
import seaborn as sns
|
import os
|
||||||
import random
|
import random
|
||||||
|
from statistics import StatisticsError, mean, median, mode, stdev
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
import seaborn as sns
|
||||||
from text.cmudict import CMUDict
|
from text.cmudict import CMUDict
|
||||||
|
|
||||||
|
|
||||||
def get_audio_seconds(frames):
|
def get_audio_seconds(frames):
|
||||||
return (frames*12.5)/1000
|
return (frames*12.5)/1000
|
||||||
|
|
||||||
|
|
3
setup.py
3
setup.py
|
@ -8,9 +8,8 @@ from distutils.version import LooseVersion
|
||||||
import numpy
|
import numpy
|
||||||
import setuptools.command.build_py
|
import setuptools.command.build_py
|
||||||
import setuptools.command.develop
|
import setuptools.command.develop
|
||||||
from setuptools import setup, Extension, find_packages
|
|
||||||
from Cython.Build import cythonize
|
from Cython.Build import cythonize
|
||||||
|
from setuptools import Extension, find_packages, setup
|
||||||
|
|
||||||
if LooseVersion(sys.version) < LooseVersion("3.6") or LooseVersion(sys.version) > LooseVersion("3.9"):
|
if LooseVersion(sys.version) < LooseVersion("3.6") or LooseVersion(sys.version) > LooseVersion("3.9"):
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
|
|
|
@ -2,7 +2,6 @@ import os
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from tests import get_tests_input_path, get_tests_output_path, get_tests_path
|
from tests import get_tests_input_path, get_tests_output_path, get_tests_path
|
||||||
|
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.utils.io import load_config
|
from TTS.utils.io import load_config
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from TTS.tts.layers.feed_forward.decoder import Decoder
|
from TTS.tts.layers.feed_forward.decoder import Decoder
|
||||||
from TTS.tts.layers.feed_forward.encoder import Encoder
|
from TTS.tts.layers.feed_forward.encoder import Encoder
|
||||||
from TTS.tts.utils.generic_utils import sequence_mask
|
from TTS.tts.utils.generic_utils import sequence_mask
|
||||||
|
|
|
@ -3,13 +3,13 @@ import os
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from tests import get_tests_input_path
|
|
||||||
from torch import optim
|
from torch import optim
|
||||||
|
|
||||||
|
from tests import get_tests_input_path
|
||||||
from TTS.tts.layers.losses import GlowTTSLoss
|
from TTS.tts.layers.losses import GlowTTSLoss
|
||||||
from TTS.tts.models.glow_tts import GlowTTS
|
from TTS.tts.models.glow_tts import GlowTTS
|
||||||
from TTS.utils.io import load_config
|
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
|
from TTS.utils.io import load_config
|
||||||
|
|
||||||
#pylint: disable=unused-variable
|
#pylint: disable=unused-variable
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import torch as T
|
import torch as T
|
||||||
|
|
||||||
from TTS.tts.layers.tacotron.tacotron import Prenet, CBHG, Decoder, Encoder
|
|
||||||
from TTS.tts.layers.losses import L1LossMasked, SSIMLoss
|
from TTS.tts.layers.losses import L1LossMasked, SSIMLoss
|
||||||
|
from TTS.tts.layers.tacotron.tacotron import CBHG, Decoder, Encoder, Prenet
|
||||||
from TTS.tts.utils.generic_utils import sequence_mask
|
from TTS.tts.utils.generic_utils import sequence_mask
|
||||||
|
|
||||||
# pylint: disable=unused-variable
|
# pylint: disable=unused-variable
|
||||||
|
|
|
@ -4,9 +4,9 @@ import unittest
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from tests import get_tests_input_path, get_tests_output_path
|
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
|
|
||||||
|
from tests import get_tests_input_path, get_tests_output_path
|
||||||
from TTS.tts.datasets import TTSDataset
|
from TTS.tts.datasets import TTSDataset
|
||||||
from TTS.tts.datasets.preprocess import ljspeech
|
from TTS.tts.datasets.preprocess import ljspeech
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import unittest
|
|
||||||
import os
|
import os
|
||||||
from tests import get_tests_input_path
|
import unittest
|
||||||
|
|
||||||
|
from tests import get_tests_input_path
|
||||||
from TTS.tts.datasets.preprocess import common_voice
|
from TTS.tts.datasets.preprocess import common_voice
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,9 +2,9 @@ import os
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import torch as T
|
import torch as T
|
||||||
from tests import get_tests_input_path
|
|
||||||
|
|
||||||
from TTS.speaker_encoder.losses import GE2ELoss, AngleProtoLoss
|
from tests import get_tests_input_path
|
||||||
|
from TTS.speaker_encoder.losses import AngleProtoLoss, GE2ELoss
|
||||||
from TTS.speaker_encoder.model import SpeakerEncoder
|
from TTS.speaker_encoder.model import SpeakerEncoder
|
||||||
from TTS.utils.io import load_config
|
from TTS.utils.io import load_config
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import torch
|
import torch
|
||||||
from TTS.tts.layers.feed_forward.duration_predictor import DurationPredictor
|
|
||||||
from TTS.tts.utils.generic_utils import sequence_mask
|
|
||||||
from TTS.tts.models.speedy_speech import SpeedySpeech
|
|
||||||
|
|
||||||
|
from TTS.tts.layers.feed_forward.duration_predictor import DurationPredictor
|
||||||
|
from TTS.tts.models.speedy_speech import SpeedySpeech
|
||||||
|
from TTS.tts.utils.generic_utils import sequence_mask
|
||||||
|
|
||||||
use_cuda = torch.cuda.is_available()
|
use_cuda = torch.cuda.is_available()
|
||||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||||
|
|
|
@ -2,6 +2,7 @@ import unittest
|
||||||
|
|
||||||
from TTS.tts.utils.text import phonemes
|
from TTS.tts.utils.text import phonemes
|
||||||
|
|
||||||
|
|
||||||
class SymbolsTest(unittest.TestCase):
|
class SymbolsTest(unittest.TestCase):
|
||||||
def test_uniqueness(self): #pylint: disable=no-self-use
|
def test_uniqueness(self): #pylint: disable=no-self-use
|
||||||
assert sorted(phonemes) == sorted(list(set(phonemes))), " {} vs {} ".format(len(phonemes), len(set(phonemes)))
|
assert sorted(phonemes) == sorted(list(set(phonemes))), " {} vs {} ".format(len(phonemes), len(set(phonemes)))
|
||||||
|
|
|
@ -2,11 +2,11 @@ import os
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from tests import get_tests_input_path, get_tests_output_path
|
from tests import get_tests_input_path, get_tests_output_path
|
||||||
from TTS.utils.synthesizer import Synthesizer
|
|
||||||
from TTS.tts.utils.generic_utils import setup_model
|
from TTS.tts.utils.generic_utils import setup_model
|
||||||
from TTS.tts.utils.io import save_checkpoint
|
from TTS.tts.utils.io import save_checkpoint
|
||||||
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||||
from TTS.utils.io import load_config
|
from TTS.utils.io import load_config
|
||||||
|
from TTS.utils.synthesizer import Synthesizer
|
||||||
|
|
||||||
|
|
||||||
class SynthesizerTest(unittest.TestCase):
|
class SynthesizerTest(unittest.TestCase):
|
||||||
|
|
|
@ -3,13 +3,13 @@ import os
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from tests import get_tests_input_path
|
|
||||||
from torch import nn, optim
|
from torch import nn, optim
|
||||||
|
|
||||||
|
from tests import get_tests_input_path
|
||||||
from TTS.tts.layers.losses import MSELossMasked
|
from TTS.tts.layers.losses import MSELossMasked
|
||||||
from TTS.tts.models.tacotron2 import Tacotron2
|
from TTS.tts.models.tacotron2 import Tacotron2
|
||||||
from TTS.utils.io import load_config
|
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
|
from TTS.utils.io import load_config
|
||||||
|
|
||||||
#pylint: disable=unused-variable
|
#pylint: disable=unused-variable
|
||||||
|
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue