mirror of https://github.com/coqui-ai/TTS.git
rebranding and replacing import statements
This commit is contained in:
parent
868efa90b9
commit
f35504f187
|
@ -7,9 +7,9 @@ import argparse
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from TTS.tts.datasets.preprocess import load_meta_data
|
from mozilla_voice_tts.tts.datasets.preprocess import load_meta_data
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
from TTS.utils.audio import AudioProcessor
|
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Run preprocessing process."""
|
"""Run preprocessing process."""
|
||||||
|
|
|
@ -2,10 +2,10 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
from TTS.vocoder.tf.utils.generic_utils import setup_generator
|
from mozilla_voice_tts.vocoder.tf.utils.generic_utils import setup_generator
|
||||||
from TTS.vocoder.tf.utils.io import load_checkpoint
|
from mozilla_voice_tts.vocoder.tf.utils.io import load_checkpoint
|
||||||
from TTS.vocoder.tf.utils.tflite import convert_melgan_to_tflite
|
from mozilla_voice_tts.vocoder.tf.utils.tflite import convert_melgan_to_tflite
|
||||||
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|
|
@ -6,13 +6,13 @@ import tensorflow as tf
|
||||||
import torch
|
import torch
|
||||||
from fuzzywuzzy import fuzz
|
from fuzzywuzzy import fuzz
|
||||||
|
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
from TTS.vocoder.tf.utils.convert_torch_to_tf_utils import (
|
from mozilla_voice_tts.vocoder.tf.utils.convert_torch_to_tf_utils import (
|
||||||
compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf)
|
compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf)
|
||||||
from TTS.vocoder.tf.utils.generic_utils import \
|
from mozilla_voice_tts.vocoder.tf.utils.generic_utils import \
|
||||||
setup_generator as setup_tf_generator
|
setup_generator as setup_tf_generator
|
||||||
from TTS.vocoder.tf.utils.io import save_checkpoint
|
from mozilla_voice_tts.vocoder.tf.utils.io import save_checkpoint
|
||||||
from TTS.vocoder.utils.generic_utils import setup_generator
|
from mozilla_voice_tts.vocoder.utils.generic_utils import setup_generator
|
||||||
|
|
||||||
# prevent GPU use
|
# prevent GPU use
|
||||||
os.environ['CUDA_VISIBLE_DEVICES'] = ''
|
os.environ['CUDA_VISIBLE_DEVICES'] = ''
|
||||||
|
|
|
@ -2,11 +2,11 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
from TTS.tts.utils.text.symbols import symbols, phonemes
|
from mozilla_voice_tts.tts.utils.text.symbols import symbols, phonemes
|
||||||
from TTS.tts.tf.utils.generic_utils import setup_model
|
from mozilla_voice_tts.tts.tf.utils.generic_utils import setup_model
|
||||||
from TTS.tts.tf.utils.io import load_checkpoint
|
from mozilla_voice_tts.tts.tf.utils.io import load_checkpoint
|
||||||
from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite
|
from mozilla_voice_tts.tts.tf.utils.tflite import convert_tacotron2_to_tflite
|
||||||
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|
|
@ -11,13 +11,13 @@ import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import torch
|
import torch
|
||||||
from fuzzywuzzy import fuzz
|
from fuzzywuzzy import fuzz
|
||||||
from TTS.tts.tf.models.tacotron2 import Tacotron2
|
from mozilla_voice_tts.tts.tf.models.tacotron2 import Tacotron2
|
||||||
from TTS.tts.tf.utils.convert_torch_to_tf_utils import (
|
from mozilla_voice_tts.tts.tf.utils.convert_torch_to_tf_utils import (
|
||||||
compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf)
|
compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf)
|
||||||
from TTS.tts.tf.utils.generic_utils import save_checkpoint
|
from mozilla_voice_tts.tts.tf.utils.generic_utils import save_checkpoint
|
||||||
from TTS.tts.utils.generic_utils import setup_model
|
from mozilla_voice_tts.tts.utils.generic_utils import setup_model
|
||||||
from TTS.tts.utils.text.symbols import phonemes, symbols
|
from mozilla_voice_tts.tts.utils.text.symbols import phonemes, symbols
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
|
|
||||||
sys.path.append('/home/erogol/Projects')
|
sys.path.append('/home/erogol/Projects')
|
||||||
os.environ['CUDA_VISIBLE_DEVICES'] = ''
|
os.environ['CUDA_VISIBLE_DEVICES'] = ''
|
||||||
|
|
|
@ -10,12 +10,12 @@ import time
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from TTS.tts.utils.generic_utils import setup_model
|
from mozilla_voice_tts.tts.utils.generic_utils import setup_model
|
||||||
from TTS.tts.utils.synthesis import synthesis
|
from mozilla_voice_tts.tts.utils.synthesis import synthesis
|
||||||
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||||
from TTS.utils.audio import AudioProcessor
|
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
from TTS.vocoder.utils.generic_utils import setup_generator
|
from mozilla_voice_tts.vocoder.utils.generic_utils import setup_generator
|
||||||
|
|
||||||
|
|
||||||
def tts(model, vocoder_model, text, CONFIG, use_cuda, ap, use_gl, speaker_id):
|
def tts(model, vocoder_model, text, CONFIG, use_cuda, ap, use_gl, speaker_id):
|
||||||
|
|
|
@ -9,21 +9,21 @@ import traceback
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from TTS.speaker_encoder.dataset import MyDataset
|
from mozilla_voice_tts.speaker_encoder.dataset import MyDataset
|
||||||
from TTS.speaker_encoder.generic_utils import save_best_model
|
from mozilla_voice_tts.speaker_encoder.generic_utils import save_best_model
|
||||||
from TTS.speaker_encoder.loss import GE2ELoss
|
from mozilla_voice_tts.speaker_encoder.loss import GE2ELoss
|
||||||
from TTS.speaker_encoder.model import SpeakerEncoder
|
from mozilla_voice_tts.speaker_encoder.model import SpeakerEncoder
|
||||||
from TTS.speaker_encoder.visual import plot_embeddings
|
from mozilla_voice_tts.speaker_encoder.visual import plot_embeddings
|
||||||
from TTS.tts.datasets.preprocess import load_meta_data
|
from mozilla_voice_tts.tts.datasets.preprocess import load_meta_data
|
||||||
from TTS.tts.utils.audio import AudioProcessor
|
from mozilla_voice_tts.tts.utils.audio import AudioProcessor
|
||||||
from TTS.tts.utils.generic_utils import (create_experiment_folder,
|
from mozilla_voice_tts.tts.utils.generic_utils import (create_experiment_folder,
|
||||||
get_git_branch,
|
get_git_branch,
|
||||||
remove_experiment_folder,
|
remove_experiment_folder,
|
||||||
set_init_dict)
|
set_init_dict)
|
||||||
from TTS.tts.utils.io import copy_config_file, load_config
|
from mozilla_voice_tts.tts.utils.io import copy_config_file, load_config
|
||||||
from TTS.tts.utils.radam import RAdam
|
from mozilla_voice_tts.tts.utils.radam import RAdam
|
||||||
from TTS.tts.utils.tensorboard_logger import TensorboardLogger
|
from mozilla_voice_tts.tts.utils.tensorboard_logger import TensorboardLogger
|
||||||
from TTS.tts.utils.training import NoamLR, check_update
|
from mozilla_voice_tts.tts.utils.training import NoamLR, check_update
|
||||||
|
|
||||||
torch.backends.cudnn.enabled = True
|
torch.backends.cudnn.enabled = True
|
||||||
torch.backends.cudnn.benchmark = True
|
torch.backends.cudnn.benchmark = True
|
||||||
|
|
|
@ -11,29 +11,29 @@ import traceback
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from TTS.tts.datasets.preprocess import load_meta_data
|
from mozilla_voice_tts.tts.datasets.preprocess import load_meta_data
|
||||||
from TTS.tts.datasets.TTSDataset import MyDataset
|
from mozilla_voice_tts.tts.datasets.TTSDataset import MyDataset
|
||||||
from TTS.tts.layers.losses import TacotronLoss
|
from mozilla_voice_tts.tts.layers.losses import TacotronLoss
|
||||||
from TTS.tts.utils.distribute import (DistributedSampler,
|
from mozilla_voice_tts.tts.utils.distribute import (DistributedSampler,
|
||||||
apply_gradient_allreduce,
|
apply_gradient_allreduce,
|
||||||
init_distributed, reduce_tensor)
|
init_distributed, reduce_tensor)
|
||||||
from TTS.tts.utils.generic_utils import check_config, setup_model
|
from mozilla_voice_tts.tts.utils.generic_utils import check_config, setup_model
|
||||||
from TTS.tts.utils.io import save_best_model, save_checkpoint
|
from mozilla_voice_tts.tts.utils.io import save_best_model, save_checkpoint
|
||||||
from TTS.tts.utils.measures import alignment_diagonal_score
|
from mozilla_voice_tts.tts.utils.measures import alignment_diagonal_score
|
||||||
from TTS.tts.utils.speakers import (get_speakers, load_speaker_mapping,
|
from mozilla_voice_tts.tts.utils.speakers import (get_speakers, load_speaker_mapping,
|
||||||
save_speaker_mapping)
|
save_speaker_mapping)
|
||||||
from TTS.tts.utils.synthesis import synthesis
|
from mozilla_voice_tts.tts.utils.synthesis import synthesis
|
||||||
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||||
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
|
from mozilla_voice_tts.tts.utils.visual import plot_alignment, plot_spectrogram
|
||||||
from TTS.utils.audio import AudioProcessor
|
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||||
from TTS.utils.console_logger import ConsoleLogger
|
from mozilla_voice_tts.utils.console_logger import ConsoleLogger
|
||||||
from TTS.utils.generic_utils import (KeepAverage, count_parameters,
|
from mozilla_voice_tts.utils.generic_utils import (KeepAverage, count_parameters,
|
||||||
create_experiment_folder, get_git_branch,
|
create_experiment_folder, get_git_branch,
|
||||||
remove_experiment_folder, set_init_dict)
|
remove_experiment_folder, set_init_dict)
|
||||||
from TTS.utils.io import copy_config_file, load_config
|
from mozilla_voice_tts.utils.io import copy_config_file, load_config
|
||||||
from TTS.utils.radam import RAdam
|
from mozilla_voice_tts.utils.radam import RAdam
|
||||||
from TTS.utils.tensorboard_logger import TensorboardLogger
|
from mozilla_voice_tts.utils.tensorboard_logger import TensorboardLogger
|
||||||
from TTS.utils.training import (NoamLR, adam_weight_decay, check_update,
|
from mozilla_voice_tts.utils.training import (NoamLR, adam_weight_decay, check_update,
|
||||||
gradual_training_scheduler, set_weight_decay,
|
gradual_training_scheduler, set_weight_decay,
|
||||||
setup_torch_training_env)
|
setup_torch_training_env)
|
||||||
|
|
||||||
|
|
|
@ -8,24 +8,24 @@ from inspect import signature
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from TTS.utils.audio import AudioProcessor
|
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||||
from TTS.utils.console_logger import ConsoleLogger
|
from mozilla_voice_tts.utils.console_logger import ConsoleLogger
|
||||||
from TTS.utils.generic_utils import (KeepAverage, count_parameters,
|
from mozilla_voice_tts.utils.generic_utils import (KeepAverage, count_parameters,
|
||||||
create_experiment_folder, get_git_branch,
|
create_experiment_folder, get_git_branch,
|
||||||
remove_experiment_folder, set_init_dict)
|
remove_experiment_folder, set_init_dict)
|
||||||
from TTS.utils.io import copy_config_file, load_config
|
from mozilla_voice_tts.utils.io import copy_config_file, load_config
|
||||||
from TTS.utils.radam import RAdam
|
from mozilla_voice_tts.utils.radam import RAdam
|
||||||
from TTS.utils.tensorboard_logger import TensorboardLogger
|
from mozilla_voice_tts.utils.tensorboard_logger import TensorboardLogger
|
||||||
from TTS.utils.training import setup_torch_training_env
|
from mozilla_voice_tts.utils.training import setup_torch_training_env
|
||||||
from TTS.vocoder.datasets.gan_dataset import GANDataset
|
from mozilla_voice_tts.vocoder.datasets.gan_dataset import GANDataset
|
||||||
from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
|
from mozilla_voice_tts.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
|
||||||
# from distribute import (DistributedSampler, apply_gradient_allreduce,
|
# from distribute import (DistributedSampler, apply_gradient_allreduce,
|
||||||
# init_distributed, reduce_tensor)
|
# init_distributed, reduce_tensor)
|
||||||
from TTS.vocoder.layers.losses import DiscriminatorLoss, GeneratorLoss
|
from mozilla_voice_tts.vocoder.layers.losses import DiscriminatorLoss, GeneratorLoss
|
||||||
from TTS.vocoder.utils.generic_utils import (check_config, plot_results,
|
from mozilla_voice_tts.vocoder.utils.generic_utils import (check_config, plot_results,
|
||||||
setup_discriminator,
|
setup_discriminator,
|
||||||
setup_generator)
|
setup_generator)
|
||||||
from TTS.vocoder.utils.io import save_best_model, save_checkpoint
|
from mozilla_voice_tts.vocoder.utils.io import save_best_model, save_checkpoint
|
||||||
|
|
||||||
use_cuda, num_gpus = setup_torch_training_env(True, True)
|
use_cuda, num_gpus = setup_torch_training_env(True, True)
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ If you have the environment set already for TTS, then you can directly call ```s
|
||||||
3. source /tmp/venv/bin/activate
|
3. source /tmp/venv/bin/activate
|
||||||
4. pip install -U pip setuptools wheel
|
4. pip install -U pip setuptools wheel
|
||||||
5. pip install -U https//example.com/url/to/python/package.whl
|
5. pip install -U https//example.com/url/to/python/package.whl
|
||||||
6. python -m TTS.server.server
|
6. python -m mozilla_voice_tts.server.server
|
||||||
|
|
||||||
You can now open http://localhost:5002 in a browser
|
You can now open http://localhost:5002 in a browser
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ import argparse
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from flask import Flask, request, render_template, send_file
|
from flask import Flask, request, render_template, send_file
|
||||||
from TTS.server.synthesizer import Synthesizer
|
from mozilla_voice_tts.server.synthesizer import Synthesizer
|
||||||
|
|
||||||
|
|
||||||
def create_argparser():
|
def create_argparser():
|
||||||
|
@ -18,8 +18,8 @@ def create_argparser():
|
||||||
parser.add_argument('--wavernn_checkpoint', type=str, default=None, help='path to WaveRNN checkpoint file.')
|
parser.add_argument('--wavernn_checkpoint', type=str, default=None, help='path to WaveRNN checkpoint file.')
|
||||||
parser.add_argument('--wavernn_config', type=str, default=None, help='path to WaveRNN config file.')
|
parser.add_argument('--wavernn_config', type=str, default=None, help='path to WaveRNN config file.')
|
||||||
parser.add_argument('--is_wavernn_batched', type=convert_boolean, default=False, help='true to use batched WaveRNN.')
|
parser.add_argument('--is_wavernn_batched', type=convert_boolean, default=False, help='true to use batched WaveRNN.')
|
||||||
parser.add_argument('--vocoder_config', type=str, default=None, help='path to TTS.vocoder config file.')
|
parser.add_argument('--vocoder_config', type=str, default=None, help='path to mozilla_voice_tts.vocoder config file.')
|
||||||
parser.add_argument('--vocoder_checkpoint', type=str, default=None, help='path to TTS.vocoder checkpoint file.')
|
parser.add_argument('--vocoder_checkpoint', type=str, default=None, help='path to mozilla_voice_tts.vocoder checkpoint file.')
|
||||||
parser.add_argument('--port', type=int, default=5002, help='port to listen on.')
|
parser.add_argument('--port', type=int, default=5002, help='port to listen on.')
|
||||||
parser.add_argument('--use_cuda', type=convert_boolean, default=False, help='true to use CUDA.')
|
parser.add_argument('--use_cuda', type=convert_boolean, default=False, help='true to use CUDA.')
|
||||||
parser.add_argument('--debug', type=convert_boolean, default=False, help='true to enable Flask debug mode.')
|
parser.add_argument('--debug', type=convert_boolean, default=False, help='true to enable Flask debug mode.')
|
||||||
|
|
|
@ -7,16 +7,16 @@ import torch
|
||||||
import yaml
|
import yaml
|
||||||
import pysbd
|
import pysbd
|
||||||
|
|
||||||
from TTS.utils.audio import AudioProcessor
|
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
from TTS.tts.utils.generic_utils import setup_model
|
from mozilla_voice_tts.tts.utils.generic_utils import setup_model
|
||||||
from TTS.tts.utils.speakers import load_speaker_mapping
|
from mozilla_voice_tts.tts.utils.speakers import load_speaker_mapping
|
||||||
from TTS.vocoder.utils.generic_utils import setup_generator
|
from mozilla_voice_tts.vocoder.utils.generic_utils import setup_generator
|
||||||
# pylint: disable=unused-wildcard-import
|
# pylint: disable=unused-wildcard-import
|
||||||
# pylint: disable=wildcard-import
|
# pylint: disable=wildcard-import
|
||||||
from TTS.tts.utils.synthesis import *
|
from mozilla_voice_tts.tts.utils.synthesis import *
|
||||||
|
|
||||||
from TTS.tts.utils.text import make_symbols, phonemes, symbols
|
from mozilla_voice_tts.tts.utils.text import make_symbols, phonemes, symbols
|
||||||
|
|
||||||
|
|
||||||
class Synthesizer(object):
|
class Synthesizer(object):
|
||||||
|
|
|
@ -6,9 +6,9 @@ import numpy as np
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from TTS.speaker_encoder.model import SpeakerEncoder
|
from mozilla_voice_tts.speaker_encoder.model import SpeakerEncoder
|
||||||
from TTS.tts.utils.audio import AudioProcessor
|
from mozilla_voice_tts.tts.utils.audio import AudioProcessor
|
||||||
from TTS.tts.utils.generic_utils import load_config
|
from mozilla_voice_tts.tts.utils.generic_utils import load_config
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='Compute embedding vectors for each wav file in a dataset. ')
|
description='Compute embedding vectors for each wav file in a dataset. ')
|
||||||
|
|
|
@ -9,7 +9,7 @@ class MyDataset(Dataset):
|
||||||
num_utter_per_speaker=10, skip_speakers=False, verbose=False):
|
num_utter_per_speaker=10, skip_speakers=False, verbose=False):
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
ap (TTS.tts.utils.AudioProcessor): audio processor object.
|
ap (mozilla_voice_tts.tts.utils.AudioProcessor): audio processor object.
|
||||||
meta_data (list): list of dataset instances.
|
meta_data (list): list of dataset instances.
|
||||||
seq_len (int): voice segment length in seconds.
|
seq_len (int): voice segment length in seconds.
|
||||||
verbose (bool): print diagnostic information.
|
verbose (bool): print diagnostic information.
|
||||||
|
|
|
@ -5,8 +5,8 @@ import torch
|
||||||
import random
|
import random
|
||||||
from torch.utils.data import Dataset
|
from torch.utils.data import Dataset
|
||||||
|
|
||||||
from TTS.tts.utils.text import text_to_sequence, phoneme_to_sequence, pad_with_eos_bos
|
from mozilla_voice_tts.tts.utils.text import text_to_sequence, phoneme_to_sequence, pad_with_eos_bos
|
||||||
from TTS.tts.utils.data import prepare_data, prepare_tensor, prepare_stop_target
|
from mozilla_voice_tts.tts.utils.data import prepare_data, prepare_tensor, prepare_stop_target
|
||||||
|
|
||||||
|
|
||||||
class MyDataset(Dataset):
|
class MyDataset(Dataset):
|
||||||
|
@ -30,7 +30,7 @@ class MyDataset(Dataset):
|
||||||
outputs_per_step (int): number of time frames predicted per step.
|
outputs_per_step (int): number of time frames predicted per step.
|
||||||
text_cleaner (str): text cleaner used for the dataset.
|
text_cleaner (str): text cleaner used for the dataset.
|
||||||
compute_linear_spec (bool): compute linear spectrogram if True.
|
compute_linear_spec (bool): compute linear spectrogram if True.
|
||||||
ap (TTS.tts.utils.AudioProcessor): audio processor object.
|
ap (mozilla_voice_tts.tts.utils.AudioProcessor): audio processor object.
|
||||||
meta_data (list): list of dataset instances.
|
meta_data (list): list of dataset instances.
|
||||||
batch_group_size (int): (0) range of batch randomization after sorting
|
batch_group_size (int): (0) range of batch randomization after sorting
|
||||||
sequences by length.
|
sequences by length.
|
||||||
|
|
|
@ -2,7 +2,7 @@ import os
|
||||||
from glob import glob
|
from glob import glob
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from TTS.tts.utils.generic_utils import split_dataset
|
from mozilla_voice_tts.tts.utils.generic_utils import split_dataset
|
||||||
|
|
||||||
|
|
||||||
def load_meta_data(datasets):
|
def load_meta_data(datasets):
|
||||||
|
|
|
@ -2,7 +2,7 @@ import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import functional
|
from torch.nn import functional
|
||||||
from TTS.tts.utils.generic_utils import sequence_mask
|
from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask
|
||||||
|
|
||||||
|
|
||||||
class L1LossMasked(nn.Module):
|
class L1LossMasked(nn.Module):
|
||||||
|
|
|
@ -2,9 +2,9 @@
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from TTS.tts.layers.gst_layers import GST
|
from mozilla_voice_tts.tts.layers.gst_layers import GST
|
||||||
from TTS.tts.layers.tacotron import Decoder, Encoder, PostCBHG
|
from mozilla_voice_tts.tts.layers.tacotron import Decoder, Encoder, PostCBHG
|
||||||
from TTS.tts.models.tacotron_abstract import TacotronAbstract
|
from mozilla_voice_tts.tts.models.tacotron_abstract import TacotronAbstract
|
||||||
|
|
||||||
|
|
||||||
class Tacotron(TacotronAbstract):
|
class Tacotron(TacotronAbstract):
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from TTS.tts.layers.gst_layers import GST
|
from mozilla_voice_tts.tts.layers.gst_layers import GST
|
||||||
from TTS.tts.layers.tacotron2 import Decoder, Encoder, Postnet
|
from mozilla_voice_tts.tts.layers.tacotron2 import Decoder, Encoder, Postnet
|
||||||
from TTS.tts.models.tacotron_abstract import TacotronAbstract
|
from mozilla_voice_tts.tts.models.tacotron_abstract import TacotronAbstract
|
||||||
|
|
||||||
|
|
||||||
# TODO: match function arguments with tacotron
|
# TODO: match function arguments with tacotron
|
||||||
|
|
|
@ -4,7 +4,7 @@ from abc import ABC, abstractmethod
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from TTS.tts.utils.generic_utils import sequence_mask
|
from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask
|
||||||
|
|
||||||
|
|
||||||
class TacotronAbstract(ABC, nn.Module):
|
class TacotronAbstract(ABC, nn.Module):
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from tensorflow import keras
|
from tensorflow import keras
|
||||||
from TTS.tts.tf.utils.tf_utils import shape_list
|
from mozilla_voice_tts.tts.tf.utils.tf_utils import shape_list
|
||||||
from TTS.tts.tf.layers.common_layers import Prenet, Attention
|
from mozilla_voice_tts.tts.tf.layers.common_layers import Prenet, Attention
|
||||||
# from tensorflow_addons.seq2seq import AttentionWrapper
|
# from tensorflow_addons.seq2seq import AttentionWrapper
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from tensorflow import keras
|
from tensorflow import keras
|
||||||
|
|
||||||
from TTS.tts.tf.layers.tacotron2 import Encoder, Decoder, Postnet
|
from mozilla_voice_tts.tts.tf.layers.tacotron2 import Encoder, Decoder, Postnet
|
||||||
from TTS.tts.tf.utils.tf_utils import shape_list
|
from mozilla_voice_tts.tts.tf.utils.tf_utils import shape_list
|
||||||
|
|
||||||
|
|
||||||
#pylint: disable=too-many-ancestors
|
#pylint: disable=too-many-ancestors
|
||||||
|
|
|
@ -78,7 +78,7 @@ def count_parameters(model, c):
|
||||||
|
|
||||||
def setup_model(num_chars, num_speakers, c, enable_tflite=False):
|
def setup_model(num_chars, num_speakers, c, enable_tflite=False):
|
||||||
print(" > Using model: {}".format(c.model))
|
print(" > Using model: {}".format(c.model))
|
||||||
MyModel = importlib.import_module('TTS.tts.tf.models.' + c.model.lower())
|
MyModel = importlib.import_module('mozilla_voice_tts.tts.tf.models.' + c.model.lower())
|
||||||
MyModel = getattr(MyModel, c.model)
|
MyModel = getattr(MyModel, c.model)
|
||||||
if c.model.lower() in "tacotron":
|
if c.model.lower() in "tacotron":
|
||||||
raise NotImplementedError(' [!] Tacotron model is not ready.')
|
raise NotImplementedError(' [!] Tacotron model is not ready.')
|
||||||
|
|
|
@ -9,7 +9,7 @@ import torch.distributed as dist
|
||||||
from torch.utils.data.sampler import Sampler
|
from torch.utils.data.sampler import Sampler
|
||||||
from torch.autograd import Variable
|
from torch.autograd import Variable
|
||||||
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
|
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
|
||||||
from TTS.utils.generic_utils import create_experiment_folder
|
from mozilla_voice_tts.utils.generic_utils import create_experiment_folder
|
||||||
|
|
||||||
|
|
||||||
class DistributedSampler(Sampler):
|
class DistributedSampler(Sampler):
|
||||||
|
|
|
@ -3,7 +3,7 @@ import importlib
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
|
||||||
from TTS.utils.generic_utils import check_argument
|
from mozilla_voice_tts.utils.generic_utils import check_argument
|
||||||
|
|
||||||
|
|
||||||
def split_dataset(items):
|
def split_dataset(items):
|
||||||
|
@ -46,7 +46,7 @@ def sequence_mask(sequence_length, max_len=None):
|
||||||
|
|
||||||
def setup_model(num_chars, num_speakers, c):
|
def setup_model(num_chars, num_speakers, c):
|
||||||
print(" > Using model: {}".format(c.model))
|
print(" > Using model: {}".format(c.model))
|
||||||
MyModel = importlib.import_module('TTS.tts.models.' + c.model.lower())
|
MyModel = importlib.import_module('mozilla_voice_tts.tts.models.' + c.model.lower())
|
||||||
MyModel = getattr(MyModel, c.model)
|
MyModel = getattr(MyModel, c.model)
|
||||||
if c.model.lower() in "tacotron":
|
if c.model.lower() in "tacotron":
|
||||||
model = MyModel(num_chars=num_chars,
|
model = MyModel(num_chars=num_chars,
|
||||||
|
@ -227,7 +227,7 @@ def check_config(c):
|
||||||
|
|
||||||
# dataloading
|
# dataloading
|
||||||
# pylint: disable=import-outside-toplevel
|
# pylint: disable=import-outside-toplevel
|
||||||
from TTS.tts.utils.text import cleaners
|
from mozilla_voice_tts.tts.utils.text import cleaners
|
||||||
check_argument('text_cleaner', c, restricted=True, val_type=str, enum_list=dir(cleaners))
|
check_argument('text_cleaner', c, restricted=True, val_type=str, enum_list=dir(cleaners))
|
||||||
check_argument('enable_eos_bos_chars', c, restricted=True, val_type=bool)
|
check_argument('enable_eos_bos_chars', c, restricted=True, val_type=bool)
|
||||||
check_argument('num_loader_workers', c, restricted=True, val_type=int, min_val=0)
|
check_argument('num_loader_workers', c, restricted=True, val_type=int, min_val=0)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from TTS.tts.datasets.preprocess import get_preprocessor_by_name
|
from mozilla_voice_tts.tts.datasets.preprocess import get_preprocessor_by_name
|
||||||
|
|
||||||
|
|
||||||
def make_speakers_json_path(out_path):
|
def make_speakers_json_path(out_path):
|
||||||
|
|
|
@ -169,11 +169,11 @@ def synthesis(model,
|
||||||
"""Synthesize voice for the given text.
|
"""Synthesize voice for the given text.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
model (TTS.tts.models): model to synthesize.
|
model (mozilla_voice_tts.tts.models): model to synthesize.
|
||||||
text (str): target text
|
text (str): target text
|
||||||
CONFIG (dict): config dictionary to be loaded from config.json.
|
CONFIG (dict): config dictionary to be loaded from config.json.
|
||||||
use_cuda (bool): enable cuda.
|
use_cuda (bool): enable cuda.
|
||||||
ap (TTS.tts.utils.audio.AudioProcessor): audio processor to process
|
ap (mozilla_voice_tts.tts.utils.audio.AudioProcessor): audio processor to process
|
||||||
model outputs.
|
model outputs.
|
||||||
speaker_id (int): id of speaker
|
speaker_id (int): id of speaker
|
||||||
style_wav (str): Uses for style embedding of GST.
|
style_wav (str): Uses for style embedding of GST.
|
||||||
|
|
|
@ -4,8 +4,8 @@ import re
|
||||||
from packaging import version
|
from packaging import version
|
||||||
import phonemizer
|
import phonemizer
|
||||||
from phonemizer.phonemize import phonemize
|
from phonemizer.phonemize import phonemize
|
||||||
from TTS.tts.utils.text import cleaners
|
from mozilla_voice_tts.tts.utils.text import cleaners
|
||||||
from TTS.tts.utils.text.symbols import make_symbols, symbols, phonemes, _phoneme_punctuations, _bos, \
|
from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, symbols, phonemes, _phoneme_punctuations, _bos, \
|
||||||
_eos
|
_eos
|
||||||
|
|
||||||
# Mappings from symbol to numeric ID and vice versa:
|
# Mappings from symbol to numeric ID and vice versa:
|
||||||
|
|
|
@ -3,7 +3,7 @@ import librosa
|
||||||
import matplotlib
|
import matplotlib
|
||||||
matplotlib.use('Agg')
|
matplotlib.use('Agg')
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from TTS.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme
|
from mozilla_voice_tts.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme
|
||||||
|
|
||||||
|
|
||||||
def plot_alignment(alignment, info=None, fig_size=(16, 10), title=None, output_fig=False):
|
def plot_alignment(alignment, info=None, fig_size=(16, 10), title=None, output_fig=False):
|
||||||
|
|
|
@ -4,7 +4,7 @@ import numpy as np
|
||||||
import scipy.io.wavfile
|
import scipy.io.wavfile
|
||||||
import scipy.signal
|
import scipy.signal
|
||||||
|
|
||||||
from TTS.tts.utils.data import StandardScaler
|
from mozilla_voice_tts.tts.utils.data import StandardScaler
|
||||||
|
|
||||||
|
|
||||||
class AudioProcessor(object):
|
class AudioProcessor(object):
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import datetime
|
import datetime
|
||||||
from TTS.utils.io import AttrDict
|
from mozilla_voice_tts.utils.io import AttrDict
|
||||||
|
|
||||||
|
|
||||||
tcolors = AttrDict({
|
tcolors = AttrDict({
|
||||||
|
|
|
@ -2,7 +2,7 @@ import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn.utils import weight_norm
|
from torch.nn.utils import weight_norm
|
||||||
|
|
||||||
from TTS.vocoder.layers.melgan import ResidualStack
|
from mozilla_voice_tts.vocoder.layers.melgan import ResidualStack
|
||||||
|
|
||||||
|
|
||||||
class MelganGenerator(nn.Module):
|
class MelganGenerator(nn.Module):
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from TTS.vocoder.models.melgan_discriminator import MelganDiscriminator
|
from mozilla_voice_tts.vocoder.models.melgan_discriminator import MelganDiscriminator
|
||||||
|
|
||||||
|
|
||||||
class MelganMultiscaleDiscriminator(nn.Module):
|
class MelganMultiscaleDiscriminator(nn.Module):
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from TTS.vocoder.models.melgan_generator import MelganGenerator
|
from mozilla_voice_tts.vocoder.models.melgan_generator import MelganGenerator
|
||||||
from TTS.vocoder.layers.pqmf import PQMF
|
from mozilla_voice_tts.vocoder.layers.pqmf import PQMF
|
||||||
|
|
||||||
|
|
||||||
class MultibandMelganGenerator(MelganGenerator):
|
class MultibandMelganGenerator(MelganGenerator):
|
||||||
|
|
|
@ -2,7 +2,7 @@ import math
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
from TTS.vocoder.layers.parallel_wavegan import ResidualBlock
|
from mozilla_voice_tts.vocoder.layers.parallel_wavegan import ResidualBlock
|
||||||
|
|
||||||
|
|
||||||
class ParallelWaveganDiscriminator(nn.Module):
|
class ParallelWaveganDiscriminator(nn.Module):
|
||||||
|
|
|
@ -2,8 +2,8 @@ import math
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from TTS.vocoder.layers.parallel_wavegan import ResidualBlock
|
from mozilla_voice_tts.vocoder.layers.parallel_wavegan import ResidualBlock
|
||||||
from TTS.vocoder.layers.upsample import ConvUpsample
|
from mozilla_voice_tts.vocoder.layers.upsample import ConvUpsample
|
||||||
|
|
||||||
|
|
||||||
class ParallelWaveganGenerator(torch.nn.Module):
|
class ParallelWaveganGenerator(torch.nn.Module):
|
||||||
|
|
|
@ -5,7 +5,7 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # FATAL
|
||||||
logging.getLogger('tensorflow').setLevel(logging.FATAL)
|
logging.getLogger('tensorflow').setLevel(logging.FATAL)
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from TTS.vocoder.tf.layers.melgan import ResidualStack, ReflectionPad1d
|
from mozilla_voice_tts.vocoder.tf.layers.melgan import ResidualStack, ReflectionPad1d
|
||||||
|
|
||||||
|
|
||||||
#pylint: disable=too-many-ancestors
|
#pylint: disable=too-many-ancestors
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from TTS.vocoder.tf.models.melgan_generator import MelganGenerator
|
from mozilla_voice_tts.vocoder.tf.models.melgan_generator import MelganGenerator
|
||||||
from TTS.vocoder.tf.layers.pqmf import PQMF
|
from mozilla_voice_tts.vocoder.tf.layers.pqmf import PQMF
|
||||||
|
|
||||||
#pylint: disable=too-many-ancestors
|
#pylint: disable=too-many-ancestors
|
||||||
#pylint: disable=abstract-method
|
#pylint: disable=abstract-method
|
||||||
|
|
|
@ -9,7 +9,7 @@ def to_camel(text):
|
||||||
|
|
||||||
def setup_generator(c):
|
def setup_generator(c):
|
||||||
print(" > Generator Model: {}".format(c.generator_model))
|
print(" > Generator Model: {}".format(c.generator_model))
|
||||||
MyModel = importlib.import_module('TTS.vocoder.tf.models.' +
|
MyModel = importlib.import_module('mozilla_voice_tts.vocoder.tf.models.' +
|
||||||
c.generator_model.lower())
|
c.generator_model.lower())
|
||||||
MyModel = getattr(MyModel, to_camel(c.generator_model))
|
MyModel = getattr(MyModel, to_camel(c.generator_model))
|
||||||
if c.generator_model in 'melgan_generator':
|
if c.generator_model in 'melgan_generator':
|
||||||
|
|
|
@ -3,7 +3,7 @@ import importlib
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
from TTS.tts.utils.visual import plot_spectrogram
|
from mozilla_voice_tts.tts.utils.visual import plot_spectrogram
|
||||||
|
|
||||||
|
|
||||||
def plot_results(y_hat, y, ap, global_step, name_prefix):
|
def plot_results(y_hat, y, ap, global_step, name_prefix):
|
||||||
|
@ -44,7 +44,7 @@ def to_camel(text):
|
||||||
|
|
||||||
def setup_generator(c):
|
def setup_generator(c):
|
||||||
print(" > Generator Model: {}".format(c.generator_model))
|
print(" > Generator Model: {}".format(c.generator_model))
|
||||||
MyModel = importlib.import_module('TTS.vocoder.models.' +
|
MyModel = importlib.import_module('mozilla_voice_tts.vocoder.models.' +
|
||||||
c.generator_model.lower())
|
c.generator_model.lower())
|
||||||
MyModel = getattr(MyModel, to_camel(c.generator_model))
|
MyModel = getattr(MyModel, to_camel(c.generator_model))
|
||||||
if c.generator_model in 'melgan_generator':
|
if c.generator_model in 'melgan_generator':
|
||||||
|
@ -91,9 +91,9 @@ def setup_discriminator(c):
|
||||||
print(" > Discriminator Model: {}".format(c.discriminator_model))
|
print(" > Discriminator Model: {}".format(c.discriminator_model))
|
||||||
if 'parallel_wavegan' in c.discriminator_model:
|
if 'parallel_wavegan' in c.discriminator_model:
|
||||||
MyModel = importlib.import_module(
|
MyModel = importlib.import_module(
|
||||||
'TTS.vocoder.models.parallel_wavegan_discriminator')
|
'mozilla_voice_tts.vocoder.models.parallel_wavegan_discriminator')
|
||||||
else:
|
else:
|
||||||
MyModel = importlib.import_module('TTS.vocoder.models.' +
|
MyModel = importlib.import_module('mozilla_voice_tts.vocoder.models.' +
|
||||||
c.discriminator_model.lower())
|
c.discriminator_model.lower())
|
||||||
MyModel = getattr(MyModel, to_camel(c.discriminator_model.lower()))
|
MyModel = getattr(MyModel, to_camel(c.discriminator_model.lower()))
|
||||||
if c.discriminator_model in 'random_window_discriminator':
|
if c.discriminator_model in 'random_window_discriminator':
|
||||||
|
|
|
@ -16,9 +16,9 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"%matplotlib inline\n",
|
"%matplotlib inline\n",
|
||||||
"from TTS.tts.utils.audio import AudioProcessor\n",
|
"from mozilla_voice_tts.tts.utils.audio import AudioProcessor\n",
|
||||||
"from TTS.tts.utils.visual import plot_spectrogram\n",
|
"from mozilla_voice_tts.tts.utils.visual import plot_spectrogram\n",
|
||||||
"from TTS.tts.utils.generic_utils import load_config\n",
|
"from mozilla_voice_tts.tts.utils.generic_utils import load_config\n",
|
||||||
"import glob \n",
|
"import glob \n",
|
||||||
"import IPython.display as ipd"
|
"import IPython.display as ipd"
|
||||||
]
|
]
|
||||||
|
|
|
@ -154,11 +154,11 @@
|
||||||
"import time\n",
|
"import time\n",
|
||||||
"import IPython\n",
|
"import IPython\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from TTS.tts.utils.generic_utils import setup_model\n",
|
"from mozilla_voice_tts.tts.utils.generic_utils import setup_model\n",
|
||||||
"from TTS.utils.io import load_config\n",
|
"from mozilla_voice_tts.utils.io import load_config\n",
|
||||||
"from TTS.tts.utils.text.symbols import symbols, phonemes\n",
|
"from mozilla_voice_tts.tts.utils.text.symbols import symbols, phonemes\n",
|
||||||
"from TTS.utils.audio import AudioProcessor\n",
|
"from mozilla_voice_tts.utils.audio import AudioProcessor\n",
|
||||||
"from TTS.tts.utils.synthesis import synthesis"
|
"from mozilla_voice_tts.tts.utils.synthesis import synthesis"
|
||||||
],
|
],
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
|
@ -280,7 +280,7 @@
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"from TTS.vocoder.utils.generic_utils import setup_generator\n",
|
"from mozilla_voice_tts.vocoder.utils.generic_utils import setup_generator\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# LOAD VOCODER MODEL\n",
|
"# LOAD VOCODER MODEL\n",
|
||||||
"vocoder_model = setup_generator(VOCODER_CONFIG)\n",
|
"vocoder_model = setup_generator(VOCODER_CONFIG)\n",
|
||||||
|
|
|
@ -1140,12 +1140,12 @@
|
||||||
"import time\n",
|
"import time\n",
|
||||||
"import IPython\n",
|
"import IPython\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from TTS.tf.utils.tflite import load_tflite_model\n",
|
"from mozilla_voice_tts.tf.utils.tflite import load_tflite_model\n",
|
||||||
"from TTS.tf.utils.io import load_checkpoint\n",
|
"from mozilla_voice_tts.tf.utils.io import load_checkpoint\n",
|
||||||
"from TTS.utils.io import load_config\n",
|
"from mozilla_voice_tts.utils.io import load_config\n",
|
||||||
"from TTS.utils.text.symbols import symbols, phonemes\n",
|
"from mozilla_voice_tts.utils.text.symbols import symbols, phonemes\n",
|
||||||
"from TTS.utils.audio import AudioProcessor\n",
|
"from mozilla_voice_tts.utils.audio import AudioProcessor\n",
|
||||||
"from TTS.tts.utils.synthesis import synthesis"
|
"from mozilla_voice_tts.tts.utils.synthesis import synthesis"
|
||||||
],
|
],
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
|
|
|
@ -31,7 +31,7 @@
|
||||||
"colab_type": "text"
|
"colab_type": "text"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"**These models are converted from released [PyTorch models](https://colab.research.google.com/drive/1u_16ZzHjKYFn1HNVuA4Qf_i2MMFB9olY?usp=sharing) using our TF utilities provided in Mozilla TTS.**\n",
|
"**These models are converted from released [PyTorch models](https://colab.research.google.com/drive/1u_16ZzHjKYFn1HNVuA4Qf_i2MMFB9olY?usp=sharing) using our TF utilities provided in Mozilla mozilla_voice_tts.**\n",
|
||||||
"\n",
|
"\n",
|
||||||
"These TF models support TF 2.2 and for different versions you might need to\n",
|
"These TF models support TF 2.2 and for different versions you might need to\n",
|
||||||
"regenerate them. \n",
|
"regenerate them. \n",
|
||||||
|
@ -154,12 +154,12 @@
|
||||||
"import time\n",
|
"import time\n",
|
||||||
"import IPython\n",
|
"import IPython\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from TTS.tts.tf.utils.generic_utils import setup_model\n",
|
"from mozilla_voice_tts.tts.tf.utils.generic_utils import setup_model\n",
|
||||||
"from TTS.tts.tf.utils.io import load_checkpoint\n",
|
"from mozilla_voice_tts.tts.tf.utils.io import load_checkpoint\n",
|
||||||
"from TTS.utils.io import load_config\n",
|
"from mozilla_voice_tts.utils.io import load_config\n",
|
||||||
"from TTS.tts.utils.text.symbols import symbols, phonemes\n",
|
"from mozilla_voice_tts.tts.utils.text.symbols import symbols, phonemes\n",
|
||||||
"from TTS.utils.audio import AudioProcessor\n",
|
"from mozilla_voice_tts.utils.audio import AudioProcessor\n",
|
||||||
"from TTS.tts.utils.synthesis import synthesis"
|
"from mozilla_voice_tts.tts.utils.synthesis import synthesis"
|
||||||
],
|
],
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
|
@ -270,8 +270,8 @@
|
||||||
"outputId": "2cc3deae-144f-4465-da3b-98628d948506"
|
"outputId": "2cc3deae-144f-4465-da3b-98628d948506"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"from TTS.vocoder.tf.utils.generic_utils import setup_generator\n",
|
"from mozilla_voice_tts.vocoder.tf.utils.generic_utils import setup_generator\n",
|
||||||
"from TTS.vocoder.tf.utils.io import load_checkpoint\n",
|
"from mozilla_voice_tts.vocoder.tf.utils.io import load_checkpoint\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# LOAD VOCODER MODEL\n",
|
"# LOAD VOCODER MODEL\n",
|
||||||
"vocoder_model = setup_generator(VOCODER_CONFIG)\n",
|
"vocoder_model = setup_generator(VOCODER_CONFIG)\n",
|
||||||
|
|
|
@ -154,11 +154,11 @@
|
||||||
"import time\n",
|
"import time\n",
|
||||||
"import IPython\n",
|
"import IPython\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from TTS.tts.utils.generic_utils import setup_model\n",
|
"from mozilla_voice_tts.tts.utils.generic_utils import setup_model\n",
|
||||||
"from TTS.utils.io import load_config\n",
|
"from mozilla_voice_tts.utils.io import load_config\n",
|
||||||
"from TTS.tts.utils.text.symbols import symbols, phonemes\n",
|
"from mozilla_voice_tts.tts.utils.text.symbols import symbols, phonemes\n",
|
||||||
"from TTS.utils.audio import AudioProcessor\n",
|
"from mozilla_voice_tts.utils.audio import AudioProcessor\n",
|
||||||
"from TTS.tts.utils.synthesis import synthesis"
|
"from mozilla_voice_tts.tts.utils.synthesis import synthesis"
|
||||||
],
|
],
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
|
@ -280,7 +280,7 @@
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"from TTS.vocoder.utils.generic_utils import setup_generator\n",
|
"from mozilla_voice_tts.vocoder.utils.generic_utils import setup_generator\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# LOAD VOCODER MODEL\n",
|
"# LOAD VOCODER MODEL\n",
|
||||||
"vocoder_model = setup_generator(VOCODER_CONFIG)\n",
|
"vocoder_model = setup_generator(VOCODER_CONFIG)\n",
|
||||||
|
|
|
@ -22,12 +22,12 @@
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"from tqdm import tqdm as tqdm\n",
|
"from tqdm import tqdm as tqdm\n",
|
||||||
"from torch.utils.data import DataLoader\n",
|
"from torch.utils.data import DataLoader\n",
|
||||||
"from TTS.tts.datasets.TTSDataset import MyDataset\n",
|
"from mozilla_voice_tts.tts.datasets.TTSDataset import MyDataset\n",
|
||||||
"from TTS.tts.layers.losses import L1LossMasked\n",
|
"from mozilla_voice_tts.tts.layers.losses import L1LossMasked\n",
|
||||||
"from TTS.tts.utils.audio import AudioProcessor\n",
|
"from mozilla_voice_tts.tts.utils.audio import AudioProcessor\n",
|
||||||
"from TTS.tts.utils.visual import plot_spectrogram\n",
|
"from mozilla_voice_tts.tts.utils.visual import plot_spectrogram\n",
|
||||||
"from TTS.tts.utils.generic_utils import load_config, setup_model, sequence_mask\n",
|
"from mozilla_voice_tts.tts.utils.generic_utils import load_config, setup_model, sequence_mask\n",
|
||||||
"from TTS.tts.utils.text.symbols import make_symbols, symbols, phonemes\n",
|
"from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, symbols, phonemes\n",
|
||||||
"\n",
|
"\n",
|
||||||
"%matplotlib inline\n",
|
"%matplotlib inline\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -108,7 +108,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"preprocessor = importlib.import_module('TTS.tts.datasets.preprocess')\n",
|
"preprocessor = importlib.import_module('mozilla_voice_tts.tts.datasets.preprocess')\n",
|
||||||
"preprocessor = getattr(preprocessor, DATASET.lower())\n",
|
"preprocessor = getattr(preprocessor, DATASET.lower())\n",
|
||||||
"meta_data = preprocessor(DATA_PATH,METADATA_FILE)\n",
|
"meta_data = preprocessor(DATA_PATH,METADATA_FILE)\n",
|
||||||
"dataset = MyDataset(checkpoint['r'], C.text_cleaner, False, ap, meta_data,tp=C.characters if 'characters' in C.keys() else None, use_phonemes=C.use_phonemes, phoneme_cache_path=C.phoneme_cache_path, enable_eos_bos=C.enable_eos_bos_chars)\n",
|
"dataset = MyDataset(checkpoint['r'], C.text_cleaner, False, ap, meta_data,tp=C.characters if 'characters' in C.keys() else None, use_phonemes=C.use_phonemes, phoneme_cache_path=C.phoneme_cache_path, enable_eos_bos=C.enable_eos_bos_chars)\n",
|
||||||
|
|
|
@ -36,14 +36,14 @@
|
||||||
"import librosa\n",
|
"import librosa\n",
|
||||||
"import librosa.display\n",
|
"import librosa.display\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from TTS.tts.layers import *\n",
|
"from mozilla_voice_tts.tts.layers import *\n",
|
||||||
"from TTS.tts.utils.audio import AudioProcessor\n",
|
"from mozilla_voice_tts.tts.utils.audio import AudioProcessor\n",
|
||||||
"from TTS.tts.utils.generic_utils import setup_model\n",
|
"from mozilla_voice_tts.tts.utils.generic_utils import setup_model\n",
|
||||||
"from TTS.tts.utils.io import load_config\n",
|
"from mozilla_voice_tts.tts.utils.io import load_config\n",
|
||||||
"from TTS.tts.utils.text import text_to_sequence\n",
|
"from mozilla_voice_tts.tts.utils.text import text_to_sequence\n",
|
||||||
"from TTS.tts.utils.synthesis import synthesis\n",
|
"from mozilla_voice_tts.tts.utils.synthesis import synthesis\n",
|
||||||
"from TTS.tts.utils.visual import plot_alignment\n",
|
"from mozilla_voice_tts.tts.utils.visual import plot_alignment\n",
|
||||||
"from TTS.tts.utils.measures import alignment_diagonal_score\n",
|
"from mozilla_voice_tts.tts.utils.measures import alignment_diagonal_score\n",
|
||||||
"\n",
|
"\n",
|
||||||
"import IPython\n",
|
"import IPython\n",
|
||||||
"from IPython.display import Audio\n",
|
"from IPython.display import Audio\n",
|
||||||
|
@ -96,7 +96,7 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# LOAD TTS MODEL\n",
|
"# LOAD TTS MODEL\n",
|
||||||
"from TTS.tts.utils.text.symbols import make_symbols, symbols, phonemes\n",
|
"from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, symbols, phonemes\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# multi speaker \n",
|
"# multi speaker \n",
|
||||||
"if CONFIG.use_speaker_embedding:\n",
|
"if CONFIG.use_speaker_embedding:\n",
|
||||||
|
|
|
@ -276,12 +276,12 @@
|
||||||
"import time\n",
|
"import time\n",
|
||||||
"import IPython\n",
|
"import IPython\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from TTS.tts.tf.utils.tflite import load_tflite_model\n",
|
"from mozilla_voice_tts.tts.tf.utils.tflite import load_tflite_model\n",
|
||||||
"from TTS.tts.tf.utils.io import load_checkpoint\n",
|
"from mozilla_voice_tts.tts.tf.utils.io import load_checkpoint\n",
|
||||||
"from TTS.utils.io import load_config\n",
|
"from mozilla_voice_tts.utils.io import load_config\n",
|
||||||
"from TTS.tts.utils.text.symbols import symbols, phonemes\n",
|
"from mozilla_voice_tts.tts.utils.text.symbols import symbols, phonemes\n",
|
||||||
"from TTS.utils.audio import AudioProcessor\n",
|
"from mozilla_voice_tts.utils.audio import AudioProcessor\n",
|
||||||
"from TTS.tts.utils.synthesis import synthesis"
|
"from mozilla_voice_tts.tts.utils.synthesis import synthesis"
|
||||||
],
|
],
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"outputs": []
|
"outputs": []
|
||||||
|
|
|
@ -31,7 +31,7 @@
|
||||||
"from multiprocessing import Pool\n",
|
"from multiprocessing import Pool\n",
|
||||||
"from matplotlib import pylab as plt\n",
|
"from matplotlib import pylab as plt\n",
|
||||||
"from collections import Counter\n",
|
"from collections import Counter\n",
|
||||||
"from TTS.tts.datasets.preprocess import *\n",
|
"from mozilla_voice_tts.tts.datasets.preprocess import *\n",
|
||||||
"%matplotlib inline"
|
"%matplotlib inline"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
|
@ -27,7 +27,7 @@
|
||||||
"from multiprocessing import Pool\n",
|
"from multiprocessing import Pool\n",
|
||||||
"from matplotlib import pylab as plt\n",
|
"from matplotlib import pylab as plt\n",
|
||||||
"from collections import Counter\n",
|
"from collections import Counter\n",
|
||||||
"from TTS.tts.datasets.preprocess import *\n",
|
"from mozilla_voice_tts.tts.datasets.preprocess import *\n",
|
||||||
"%matplotlib inline"
|
"%matplotlib inline"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import unittest
|
import unittest
|
||||||
import torch as T
|
import torch as T
|
||||||
|
|
||||||
from TTS.tts.utils.generic_utils import save_checkpoint, save_best_model
|
from mozilla_voice_tts.tts.utils.generic_utils import save_checkpoint, save_best_model
|
||||||
from TTS.tts.layers.tacotron import Prenet
|
from mozilla_voice_tts.tts.layers.tacotron import Prenet
|
||||||
|
|
||||||
OUT_PATH = '/tmp/test.pth.tar'
|
OUT_PATH = '/tmp/test.pth.tar'
|
||||||
|
|
||||||
|
|
|
@ -67,6 +67,7 @@
|
||||||
"gradual_training": [[0, 7, 4]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
|
"gradual_training": [[0, 7, 4]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
|
||||||
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
"loss_masking": true, // enable / disable loss masking against the sequence padding.
|
||||||
"ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled.
|
"ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled.
|
||||||
|
"apex_amp_level": null,
|
||||||
|
|
||||||
// VALIDATION
|
// VALIDATION
|
||||||
"run_eval": true,
|
"run_eval": true,
|
||||||
|
|
|
@ -73,7 +73,7 @@
|
||||||
"data_path": "/media/erogol/data_ssd/Data/Mozilla/", // DATASET-RELATED: can overwritten from command argument
|
"data_path": "/media/erogol/data_ssd/Data/Mozilla/", // DATASET-RELATED: can overwritten from command argument
|
||||||
"meta_file_train": "metadata_train.txt", // DATASET-RELATED: metafile for training dataloader.
|
"meta_file_train": "metadata_train.txt", // DATASET-RELATED: metafile for training dataloader.
|
||||||
"meta_file_val": "metadata_val.txt", // DATASET-RELATED: metafile for evaluation dataloader.
|
"meta_file_val": "metadata_val.txt", // DATASET-RELATED: metafile for evaluation dataloader.
|
||||||
"dataset": "mozilla", // DATASET-RELATED: one of TTS.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py
|
"dataset": "mozilla", // DATASET-RELATED: one of mozilla_voice_tts.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py
|
||||||
"min_seq_len": 0, // DATASET-RELATED: minimum text length to use in training
|
"min_seq_len": 0, // DATASET-RELATED: minimum text length to use in training
|
||||||
"max_seq_len": 150, // DATASET-RELATED: maximum text length
|
"max_seq_len": 150, // DATASET-RELATED: maximum text length
|
||||||
"output_path": "../keep/", // DATASET-RELATED: output path for all training outputs.
|
"output_path": "../keep/", // DATASET-RELATED: output path for all training outputs.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from TTS.tts.utils.text import phonemes
|
from mozilla_voice_tts.tts.utils.text import phonemes
|
||||||
|
|
||||||
class SymbolsTest(unittest.TestCase):
|
class SymbolsTest(unittest.TestCase):
|
||||||
def test_uniqueness(self): #pylint: disable=no-self-use
|
def test_uniqueness(self): #pylint: disable=no-self-use
|
||||||
|
|
|
@ -2,8 +2,8 @@ import os
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from tests import get_tests_path, get_tests_input_path, get_tests_output_path
|
from tests import get_tests_path, get_tests_input_path, get_tests_output_path
|
||||||
from TTS.utils.audio import AudioProcessor
|
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
|
|
||||||
TESTS_PATH = get_tests_path()
|
TESTS_PATH = get_tests_path()
|
||||||
OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")
|
OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")
|
||||||
|
|
|
@ -3,12 +3,12 @@ import unittest
|
||||||
|
|
||||||
import torch as T
|
import torch as T
|
||||||
|
|
||||||
from TTS.server.synthesizer import Synthesizer
|
from mozilla_voice_tts.server.synthesizer import Synthesizer
|
||||||
from tests import get_tests_input_path, get_tests_output_path
|
from tests import get_tests_input_path, get_tests_output_path
|
||||||
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||||
from TTS.tts.utils.generic_utils import setup_model
|
from mozilla_voice_tts.tts.utils.generic_utils import setup_model
|
||||||
from TTS.tts.utils.io import save_checkpoint
|
from mozilla_voice_tts.tts.utils.io import save_checkpoint
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
|
|
||||||
|
|
||||||
class DemoServerTest(unittest.TestCase):
|
class DemoServerTest(unittest.TestCase):
|
||||||
|
|
|
@ -3,9 +3,9 @@ import unittest
|
||||||
import torch as T
|
import torch as T
|
||||||
|
|
||||||
from tests import get_tests_path, get_tests_input_path
|
from tests import get_tests_path, get_tests_input_path
|
||||||
from TTS.speaker_encoder.model import SpeakerEncoder
|
from mozilla_voice_tts.speaker_encoder.model import SpeakerEncoder
|
||||||
from TTS.speaker_encoder.loss import GE2ELoss
|
from mozilla_voice_tts.speaker_encoder.loss import GE2ELoss
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
|
|
||||||
|
|
||||||
file_path = get_tests_input_path()
|
file_path = get_tests_input_path()
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
import unittest
|
import unittest
|
||||||
import torch as T
|
import torch as T
|
||||||
|
|
||||||
from TTS.tts.layers.tacotron import Prenet, CBHG, Decoder, Encoder
|
from mozilla_voice_tts.tts.layers.tacotron import Prenet, CBHG, Decoder, Encoder
|
||||||
from TTS.tts.layers.losses import L1LossMasked
|
from mozilla_voice_tts.tts.layers.losses import L1LossMasked
|
||||||
from TTS.tts.utils.generic_utils import sequence_mask
|
from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask
|
||||||
|
|
||||||
# pylint: disable=unused-variable
|
# pylint: disable=unused-variable
|
||||||
|
|
||||||
|
|
|
@ -6,10 +6,10 @@ import numpy as np
|
||||||
|
|
||||||
from tests import get_tests_path, get_tests_input_path, get_tests_output_path
|
from tests import get_tests_path, get_tests_input_path, get_tests_output_path
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
from TTS.utils.audio import AudioProcessor
|
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||||
from TTS.tts.datasets import TTSDataset
|
from mozilla_voice_tts.tts.datasets import TTSDataset
|
||||||
from TTS.tts.datasets.preprocess import ljspeech
|
from mozilla_voice_tts.tts.datasets.preprocess import ljspeech
|
||||||
|
|
||||||
#pylint: disable=unused-variable
|
#pylint: disable=unused-variable
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ import unittest
|
||||||
import os
|
import os
|
||||||
from tests import get_tests_input_path
|
from tests import get_tests_input_path
|
||||||
|
|
||||||
from TTS.tts.datasets.preprocess import common_voice
|
from mozilla_voice_tts.tts.datasets.preprocess import common_voice
|
||||||
|
|
||||||
|
|
||||||
class TestPreprocessors(unittest.TestCase):
|
class TestPreprocessors(unittest.TestCase):
|
||||||
|
|
|
@ -12,12 +12,12 @@ pip install --quiet --upgrade pip setuptools wheel
|
||||||
|
|
||||||
rm -f dist/*.whl
|
rm -f dist/*.whl
|
||||||
python setup.py --quiet bdist_wheel --checkpoint tests/outputs/checkpoint_10.pth.tar --model_config tests/outputs/dummy_model_config.json
|
python setup.py --quiet bdist_wheel --checkpoint tests/outputs/checkpoint_10.pth.tar --model_config tests/outputs/dummy_model_config.json
|
||||||
pip install --quiet dist/TTS*.whl
|
pip install --quiet dist/mozilla_voice_tts*.whl
|
||||||
|
|
||||||
# this is related to https://github.com/librosa/librosa/issues/1160
|
# this is related to https://github.com/librosa/librosa/issues/1160
|
||||||
pip install numba==0.48
|
pip install numba==0.48
|
||||||
|
|
||||||
python -m TTS.server.server &
|
python -m mozilla_voice_tts.server.server &
|
||||||
SERVER_PID=$!
|
SERVER_PID=$!
|
||||||
|
|
||||||
echo 'Waiting for server...'
|
echo 'Waiting for server...'
|
||||||
|
|
|
@ -6,9 +6,9 @@ import torch
|
||||||
from tests import get_tests_input_path
|
from tests import get_tests_input_path
|
||||||
from torch import nn, optim
|
from torch import nn, optim
|
||||||
|
|
||||||
from TTS.tts.layers.losses import MSELossMasked
|
from mozilla_voice_tts.tts.layers.losses import MSELossMasked
|
||||||
from TTS.tts.models.tacotron2 import Tacotron2
|
from mozilla_voice_tts.tts.models.tacotron2 import Tacotron2
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
|
|
||||||
#pylint: disable=unused-variable
|
#pylint: disable=unused-variable
|
||||||
|
|
||||||
|
|
|
@ -7,9 +7,9 @@ tf.get_logger().setLevel('INFO')
|
||||||
|
|
||||||
from tests import get_tests_path, get_tests_input_path, get_tests_output_path
|
from tests import get_tests_path, get_tests_input_path, get_tests_output_path
|
||||||
|
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
from TTS.tts.tf.models.tacotron2 import Tacotron2
|
from mozilla_voice_tts.tts.tf.models.tacotron2 import Tacotron2
|
||||||
from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model
|
from mozilla_voice_tts.tts.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model
|
||||||
|
|
||||||
#pylint: disable=unused-variable
|
#pylint: disable=unused-variable
|
||||||
|
|
||||||
|
|
|
@ -6,9 +6,9 @@ import torch
|
||||||
from tests import get_tests_input_path
|
from tests import get_tests_input_path
|
||||||
from torch import nn, optim
|
from torch import nn, optim
|
||||||
|
|
||||||
from TTS.tts.layers.losses import L1LossMasked
|
from mozilla_voice_tts.tts.layers.losses import L1LossMasked
|
||||||
from TTS.tts.models.tacotron import Tacotron
|
from mozilla_voice_tts.tts.models.tacotron import Tacotron
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
|
|
||||||
#pylint: disable=unused-variable
|
#pylint: disable=unused-variable
|
||||||
|
|
||||||
|
|
|
@ -4,9 +4,9 @@ import os
|
||||||
# pylint: disable=unused-import
|
# pylint: disable=unused-import
|
||||||
import unittest
|
import unittest
|
||||||
from tests import get_tests_input_path
|
from tests import get_tests_input_path
|
||||||
from TTS.tts.utils.text import *
|
from mozilla_voice_tts.tts.utils.text import *
|
||||||
from tests import get_tests_path
|
from tests import get_tests_path
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
|
|
||||||
conf = load_config(os.path.join(get_tests_input_path(), 'test_config.json'))
|
conf = load_config(os.path.join(get_tests_input_path(), 'test_config.json'))
|
||||||
|
|
||||||
|
|
|
@ -3,11 +3,11 @@
|
||||||
BASEDIR=$(dirname "$0")
|
BASEDIR=$(dirname "$0")
|
||||||
echo "$BASEDIR"
|
echo "$BASEDIR"
|
||||||
# run training
|
# run training
|
||||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tts.py --config_path $BASEDIR/inputs/test_train_config.json
|
CUDA_VISIBLE_DEVICES="" python mozilla_voice_tts/bin/train_tts.py --config_path $BASEDIR/inputs/test_train_config.json
|
||||||
# find the training folder
|
# find the training folder
|
||||||
LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
|
LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
|
||||||
echo $LATEST_FOLDER
|
echo $LATEST_FOLDER
|
||||||
# continue the previous training
|
# continue the previous training
|
||||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tts.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
|
CUDA_VISIBLE_DEVICES="" python mozilla_voice_tts/bin/train_tts.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
|
||||||
# remove all the outputs
|
# remove all the outputs
|
||||||
rm -rf $BASEDIR/train_outputs/
|
rm -rf $BASEDIR/train_outputs/
|
||||||
|
|
|
@ -4,10 +4,10 @@ import numpy as np
|
||||||
from tests import get_tests_path, get_tests_input_path, get_tests_output_path
|
from tests import get_tests_path, get_tests_input_path, get_tests_output_path
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
|
|
||||||
from TTS.utils.audio import AudioProcessor
|
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
from TTS.vocoder.datasets.gan_dataset import GANDataset
|
from mozilla_voice_tts.vocoder.datasets.gan_dataset import GANDataset
|
||||||
from TTS.vocoder.datasets.preprocess import load_wav_data
|
from mozilla_voice_tts.vocoder.datasets.preprocess import load_wav_data
|
||||||
|
|
||||||
file_path = os.path.dirname(os.path.realpath(__file__))
|
file_path = os.path.dirname(os.path.realpath(__file__))
|
||||||
OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")
|
OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")
|
||||||
|
|
|
@ -3,9 +3,9 @@ import os
|
||||||
import torch
|
import torch
|
||||||
from tests import get_tests_input_path, get_tests_output_path, get_tests_path
|
from tests import get_tests_input_path, get_tests_output_path, get_tests_path
|
||||||
|
|
||||||
from TTS.utils.audio import AudioProcessor
|
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||||
from TTS.utils.io import load_config
|
from mozilla_voice_tts.utils.io import load_config
|
||||||
from TTS.vocoder.layers.losses import MultiScaleSTFTLoss, STFTLoss, TorchSTFT
|
from mozilla_voice_tts.vocoder.layers.losses import MultiScaleSTFTLoss, STFTLoss, TorchSTFT
|
||||||
|
|
||||||
TESTS_PATH = get_tests_path()
|
TESTS_PATH = get_tests_path()
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from TTS.vocoder.models.melgan_discriminator import MelganDiscriminator
|
from mozilla_voice_tts.vocoder.models.melgan_discriminator import MelganDiscriminator
|
||||||
from TTS.vocoder.models.melgan_multiscale_discriminator import MelganMultiscaleDiscriminator
|
from mozilla_voice_tts.vocoder.models.melgan_multiscale_discriminator import MelganMultiscaleDiscriminator
|
||||||
|
|
||||||
|
|
||||||
def test_melgan_discriminator():
|
def test_melgan_discriminator():
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from TTS.vocoder.models.melgan_generator import MelganGenerator
|
from mozilla_voice_tts.vocoder.models.melgan_generator import MelganGenerator
|
||||||
|
|
||||||
def test_melgan_generator():
|
def test_melgan_generator():
|
||||||
model = MelganGenerator()
|
model = MelganGenerator()
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from TTS.vocoder.models.parallel_wavegan_discriminator import ParallelWaveganDiscriminator, ResidualParallelWaveganDiscriminator
|
from mozilla_voice_tts.vocoder.models.parallel_wavegan_discriminator import ParallelWaveganDiscriminator, ResidualParallelWaveganDiscriminator
|
||||||
|
|
||||||
|
|
||||||
def test_pwgan_disciminator():
|
def test_pwgan_disciminator():
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from TTS.vocoder.models.parallel_wavegan_generator import ParallelWaveganGenerator
|
from mozilla_voice_tts.vocoder.models.parallel_wavegan_generator import ParallelWaveganGenerator
|
||||||
|
|
||||||
|
|
||||||
def test_pwgan_generator():
|
def test_pwgan_generator():
|
||||||
|
|
|
@ -5,7 +5,7 @@ import soundfile as sf
|
||||||
from librosa.core import load
|
from librosa.core import load
|
||||||
|
|
||||||
from tests import get_tests_path, get_tests_input_path
|
from tests import get_tests_path, get_tests_input_path
|
||||||
from TTS.vocoder.layers.pqmf import PQMF
|
from mozilla_voice_tts.vocoder.layers.pqmf import PQMF
|
||||||
|
|
||||||
|
|
||||||
TESTS_PATH = get_tests_path()
|
TESTS_PATH = get_tests_path()
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import torch
|
import torch
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from TTS.vocoder.models.random_window_discriminator import RandomWindowDiscriminator
|
from mozilla_voice_tts.vocoder.models.random_window_discriminator import RandomWindowDiscriminator
|
||||||
|
|
||||||
|
|
||||||
def test_rwd():
|
def test_rwd():
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from TTS.vocoder.tf.models.melgan_generator import MelganGenerator
|
from mozilla_voice_tts.vocoder.tf.models.melgan_generator import MelganGenerator
|
||||||
|
|
||||||
|
|
||||||
def test_melgan_generator():
|
def test_melgan_generator():
|
||||||
|
|
|
@ -5,7 +5,7 @@ import soundfile as sf
|
||||||
from librosa.core import load
|
from librosa.core import load
|
||||||
|
|
||||||
from tests import get_tests_path, get_tests_input_path
|
from tests import get_tests_path, get_tests_input_path
|
||||||
from TTS.vocoder.tf.layers.pqmf import PQMF
|
from mozilla_voice_tts.vocoder.tf.layers.pqmf import PQMF
|
||||||
|
|
||||||
|
|
||||||
TESTS_PATH = get_tests_path()
|
TESTS_PATH = get_tests_path()
|
||||||
|
|
|
@ -5,11 +5,11 @@ echo "$BASEDIR"
|
||||||
# create run dir
|
# create run dir
|
||||||
mkdir $BASEDIR/train_outputs
|
mkdir $BASEDIR/train_outputs
|
||||||
# run training
|
# run training
|
||||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json
|
CUDA_VISIBLE_DEVICES="" python mozilla_voice_tts/bin/train_vocoder.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json
|
||||||
# find the training folder
|
# find the training folder
|
||||||
LATEST_FOLDER=$(ls $BASEDIR/outputs/train_outputs/| sort | tail -1)
|
LATEST_FOLDER=$(ls $BASEDIR/outputs/train_outputs/| sort | tail -1)
|
||||||
echo $LATEST_FOLDER
|
echo $LATEST_FOLDER
|
||||||
# continue the previous training
|
# continue the previous training
|
||||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder.py --continue_path $BASEDIR/outputs/train_outputs/$LATEST_FOLDER
|
CUDA_VISIBLE_DEVICES="" python mozilla_voice_tts/bin/train_vocoder.py --continue_path $BASEDIR/outputs/train_outputs/$LATEST_FOLDER
|
||||||
# remove all the outputs
|
# remove all the outputs
|
||||||
rm -rf $BASEDIR/train_outputs/
|
rm -rf $BASEDIR/train_outputs/
|
||||||
|
|
Loading…
Reference in New Issue