rebranding and replacing import statements

This commit is contained in:
erogol 2020-08-04 10:51:19 +02:00
parent 868efa90b9
commit f35504f187
77 changed files with 240 additions and 239 deletions

View File

@ -7,9 +7,9 @@ import argparse
import numpy as np import numpy as np
from tqdm import tqdm from tqdm import tqdm
from TTS.tts.datasets.preprocess import load_meta_data from mozilla_voice_tts.tts.datasets.preprocess import load_meta_data
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
from TTS.utils.audio import AudioProcessor from mozilla_voice_tts.utils.audio import AudioProcessor
def main(): def main():
"""Run preprocessing process.""" """Run preprocessing process."""

View File

@ -2,10 +2,10 @@
import argparse import argparse
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
from TTS.vocoder.tf.utils.generic_utils import setup_generator from mozilla_voice_tts.vocoder.tf.utils.generic_utils import setup_generator
from TTS.vocoder.tf.utils.io import load_checkpoint from mozilla_voice_tts.vocoder.tf.utils.io import load_checkpoint
from TTS.vocoder.tf.utils.tflite import convert_melgan_to_tflite from mozilla_voice_tts.vocoder.tf.utils.tflite import convert_melgan_to_tflite
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()

View File

@ -6,13 +6,13 @@ import tensorflow as tf
import torch import torch
from fuzzywuzzy import fuzz from fuzzywuzzy import fuzz
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
from TTS.vocoder.tf.utils.convert_torch_to_tf_utils import ( from mozilla_voice_tts.vocoder.tf.utils.convert_torch_to_tf_utils import (
compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf) compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf)
from TTS.vocoder.tf.utils.generic_utils import \ from mozilla_voice_tts.vocoder.tf.utils.generic_utils import \
setup_generator as setup_tf_generator setup_generator as setup_tf_generator
from TTS.vocoder.tf.utils.io import save_checkpoint from mozilla_voice_tts.vocoder.tf.utils.io import save_checkpoint
from TTS.vocoder.utils.generic_utils import setup_generator from mozilla_voice_tts.vocoder.utils.generic_utils import setup_generator
# prevent GPU use # prevent GPU use
os.environ['CUDA_VISIBLE_DEVICES'] = '' os.environ['CUDA_VISIBLE_DEVICES'] = ''

View File

@ -2,11 +2,11 @@
import argparse import argparse
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
from TTS.tts.utils.text.symbols import symbols, phonemes from mozilla_voice_tts.tts.utils.text.symbols import symbols, phonemes
from TTS.tts.tf.utils.generic_utils import setup_model from mozilla_voice_tts.tts.tf.utils.generic_utils import setup_model
from TTS.tts.tf.utils.io import load_checkpoint from mozilla_voice_tts.tts.tf.utils.io import load_checkpoint
from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite from mozilla_voice_tts.tts.tf.utils.tflite import convert_tacotron2_to_tflite
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()

View File

@ -11,13 +11,13 @@ import numpy as np
import tensorflow as tf import tensorflow as tf
import torch import torch
from fuzzywuzzy import fuzz from fuzzywuzzy import fuzz
from TTS.tts.tf.models.tacotron2 import Tacotron2 from mozilla_voice_tts.tts.tf.models.tacotron2 import Tacotron2
from TTS.tts.tf.utils.convert_torch_to_tf_utils import ( from mozilla_voice_tts.tts.tf.utils.convert_torch_to_tf_utils import (
compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf) compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf)
from TTS.tts.tf.utils.generic_utils import save_checkpoint from mozilla_voice_tts.tts.tf.utils.generic_utils import save_checkpoint
from TTS.tts.utils.generic_utils import setup_model from mozilla_voice_tts.tts.utils.generic_utils import setup_model
from TTS.tts.utils.text.symbols import phonemes, symbols from mozilla_voice_tts.tts.utils.text.symbols import phonemes, symbols
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
sys.path.append('/home/erogol/Projects') sys.path.append('/home/erogol/Projects')
os.environ['CUDA_VISIBLE_DEVICES'] = '' os.environ['CUDA_VISIBLE_DEVICES'] = ''

View File

@ -10,12 +10,12 @@ import time
import torch import torch
from TTS.tts.utils.generic_utils import setup_model from mozilla_voice_tts.tts.utils.generic_utils import setup_model
from TTS.tts.utils.synthesis import synthesis from mozilla_voice_tts.tts.utils.synthesis import synthesis
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, phonemes, symbols
from TTS.utils.audio import AudioProcessor from mozilla_voice_tts.utils.audio import AudioProcessor
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
from TTS.vocoder.utils.generic_utils import setup_generator from mozilla_voice_tts.vocoder.utils.generic_utils import setup_generator
def tts(model, vocoder_model, text, CONFIG, use_cuda, ap, use_gl, speaker_id): def tts(model, vocoder_model, text, CONFIG, use_cuda, ap, use_gl, speaker_id):

View File

@ -9,21 +9,21 @@ import traceback
import torch import torch
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from TTS.speaker_encoder.dataset import MyDataset from mozilla_voice_tts.speaker_encoder.dataset import MyDataset
from TTS.speaker_encoder.generic_utils import save_best_model from mozilla_voice_tts.speaker_encoder.generic_utils import save_best_model
from TTS.speaker_encoder.loss import GE2ELoss from mozilla_voice_tts.speaker_encoder.loss import GE2ELoss
from TTS.speaker_encoder.model import SpeakerEncoder from mozilla_voice_tts.speaker_encoder.model import SpeakerEncoder
from TTS.speaker_encoder.visual import plot_embeddings from mozilla_voice_tts.speaker_encoder.visual import plot_embeddings
from TTS.tts.datasets.preprocess import load_meta_data from mozilla_voice_tts.tts.datasets.preprocess import load_meta_data
from TTS.tts.utils.audio import AudioProcessor from mozilla_voice_tts.tts.utils.audio import AudioProcessor
from TTS.tts.utils.generic_utils import (create_experiment_folder, from mozilla_voice_tts.tts.utils.generic_utils import (create_experiment_folder,
get_git_branch, get_git_branch,
remove_experiment_folder, remove_experiment_folder,
set_init_dict) set_init_dict)
from TTS.tts.utils.io import copy_config_file, load_config from mozilla_voice_tts.tts.utils.io import copy_config_file, load_config
from TTS.tts.utils.radam import RAdam from mozilla_voice_tts.tts.utils.radam import RAdam
from TTS.tts.utils.tensorboard_logger import TensorboardLogger from mozilla_voice_tts.tts.utils.tensorboard_logger import TensorboardLogger
from TTS.tts.utils.training import NoamLR, check_update from mozilla_voice_tts.tts.utils.training import NoamLR, check_update
torch.backends.cudnn.enabled = True torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True torch.backends.cudnn.benchmark = True

View File

@ -11,29 +11,29 @@ import traceback
import numpy as np import numpy as np
import torch import torch
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from TTS.tts.datasets.preprocess import load_meta_data from mozilla_voice_tts.tts.datasets.preprocess import load_meta_data
from TTS.tts.datasets.TTSDataset import MyDataset from mozilla_voice_tts.tts.datasets.TTSDataset import MyDataset
from TTS.tts.layers.losses import TacotronLoss from mozilla_voice_tts.tts.layers.losses import TacotronLoss
from TTS.tts.utils.distribute import (DistributedSampler, from mozilla_voice_tts.tts.utils.distribute import (DistributedSampler,
apply_gradient_allreduce, apply_gradient_allreduce,
init_distributed, reduce_tensor) init_distributed, reduce_tensor)
from TTS.tts.utils.generic_utils import check_config, setup_model from mozilla_voice_tts.tts.utils.generic_utils import check_config, setup_model
from TTS.tts.utils.io import save_best_model, save_checkpoint from mozilla_voice_tts.tts.utils.io import save_best_model, save_checkpoint
from TTS.tts.utils.measures import alignment_diagonal_score from mozilla_voice_tts.tts.utils.measures import alignment_diagonal_score
from TTS.tts.utils.speakers import (get_speakers, load_speaker_mapping, from mozilla_voice_tts.tts.utils.speakers import (get_speakers, load_speaker_mapping,
save_speaker_mapping) save_speaker_mapping)
from TTS.tts.utils.synthesis import synthesis from mozilla_voice_tts.tts.utils.synthesis import synthesis
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, phonemes, symbols
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram from mozilla_voice_tts.tts.utils.visual import plot_alignment, plot_spectrogram
from TTS.utils.audio import AudioProcessor from mozilla_voice_tts.utils.audio import AudioProcessor
from TTS.utils.console_logger import ConsoleLogger from mozilla_voice_tts.utils.console_logger import ConsoleLogger
from TTS.utils.generic_utils import (KeepAverage, count_parameters, from mozilla_voice_tts.utils.generic_utils import (KeepAverage, count_parameters,
create_experiment_folder, get_git_branch, create_experiment_folder, get_git_branch,
remove_experiment_folder, set_init_dict) remove_experiment_folder, set_init_dict)
from TTS.utils.io import copy_config_file, load_config from mozilla_voice_tts.utils.io import copy_config_file, load_config
from TTS.utils.radam import RAdam from mozilla_voice_tts.utils.radam import RAdam
from TTS.utils.tensorboard_logger import TensorboardLogger from mozilla_voice_tts.utils.tensorboard_logger import TensorboardLogger
from TTS.utils.training import (NoamLR, adam_weight_decay, check_update, from mozilla_voice_tts.utils.training import (NoamLR, adam_weight_decay, check_update,
gradual_training_scheduler, set_weight_decay, gradual_training_scheduler, set_weight_decay,
setup_torch_training_env) setup_torch_training_env)

View File

@ -8,24 +8,24 @@ from inspect import signature
import torch import torch
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from TTS.utils.audio import AudioProcessor from mozilla_voice_tts.utils.audio import AudioProcessor
from TTS.utils.console_logger import ConsoleLogger from mozilla_voice_tts.utils.console_logger import ConsoleLogger
from TTS.utils.generic_utils import (KeepAverage, count_parameters, from mozilla_voice_tts.utils.generic_utils import (KeepAverage, count_parameters,
create_experiment_folder, get_git_branch, create_experiment_folder, get_git_branch,
remove_experiment_folder, set_init_dict) remove_experiment_folder, set_init_dict)
from TTS.utils.io import copy_config_file, load_config from mozilla_voice_tts.utils.io import copy_config_file, load_config
from TTS.utils.radam import RAdam from mozilla_voice_tts.utils.radam import RAdam
from TTS.utils.tensorboard_logger import TensorboardLogger from mozilla_voice_tts.utils.tensorboard_logger import TensorboardLogger
from TTS.utils.training import setup_torch_training_env from mozilla_voice_tts.utils.training import setup_torch_training_env
from TTS.vocoder.datasets.gan_dataset import GANDataset from mozilla_voice_tts.vocoder.datasets.gan_dataset import GANDataset
from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data from mozilla_voice_tts.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
# from distribute import (DistributedSampler, apply_gradient_allreduce, # from distribute import (DistributedSampler, apply_gradient_allreduce,
# init_distributed, reduce_tensor) # init_distributed, reduce_tensor)
from TTS.vocoder.layers.losses import DiscriminatorLoss, GeneratorLoss from mozilla_voice_tts.vocoder.layers.losses import DiscriminatorLoss, GeneratorLoss
from TTS.vocoder.utils.generic_utils import (check_config, plot_results, from mozilla_voice_tts.vocoder.utils.generic_utils import (check_config, plot_results,
setup_discriminator, setup_discriminator,
setup_generator) setup_generator)
from TTS.vocoder.utils.io import save_best_model, save_checkpoint from mozilla_voice_tts.vocoder.utils.io import save_best_model, save_checkpoint
use_cuda, num_gpus = setup_torch_training_env(True, True) use_cuda, num_gpus = setup_torch_training_env(True, True)

View File

@ -15,7 +15,7 @@ If you have the environment set already for TTS, then you can directly call ```s
3. source /tmp/venv/bin/activate 3. source /tmp/venv/bin/activate
4. pip install -U pip setuptools wheel 4. pip install -U pip setuptools wheel
5. pip install -U https//example.com/url/to/python/package.whl 5. pip install -U https//example.com/url/to/python/package.whl
6. python -m TTS.server.server 6. python -m mozilla_voice_tts.server.server
You can now open http://localhost:5002 in a browser You can now open http://localhost:5002 in a browser

View File

@ -3,7 +3,7 @@ import argparse
import os import os
from flask import Flask, request, render_template, send_file from flask import Flask, request, render_template, send_file
from TTS.server.synthesizer import Synthesizer from mozilla_voice_tts.server.synthesizer import Synthesizer
def create_argparser(): def create_argparser():
@ -18,8 +18,8 @@ def create_argparser():
parser.add_argument('--wavernn_checkpoint', type=str, default=None, help='path to WaveRNN checkpoint file.') parser.add_argument('--wavernn_checkpoint', type=str, default=None, help='path to WaveRNN checkpoint file.')
parser.add_argument('--wavernn_config', type=str, default=None, help='path to WaveRNN config file.') parser.add_argument('--wavernn_config', type=str, default=None, help='path to WaveRNN config file.')
parser.add_argument('--is_wavernn_batched', type=convert_boolean, default=False, help='true to use batched WaveRNN.') parser.add_argument('--is_wavernn_batched', type=convert_boolean, default=False, help='true to use batched WaveRNN.')
parser.add_argument('--vocoder_config', type=str, default=None, help='path to TTS.vocoder config file.') parser.add_argument('--vocoder_config', type=str, default=None, help='path to mozilla_voice_tts.vocoder config file.')
parser.add_argument('--vocoder_checkpoint', type=str, default=None, help='path to TTS.vocoder checkpoint file.') parser.add_argument('--vocoder_checkpoint', type=str, default=None, help='path to mozilla_voice_tts.vocoder checkpoint file.')
parser.add_argument('--port', type=int, default=5002, help='port to listen on.') parser.add_argument('--port', type=int, default=5002, help='port to listen on.')
parser.add_argument('--use_cuda', type=convert_boolean, default=False, help='true to use CUDA.') parser.add_argument('--use_cuda', type=convert_boolean, default=False, help='true to use CUDA.')
parser.add_argument('--debug', type=convert_boolean, default=False, help='true to enable Flask debug mode.') parser.add_argument('--debug', type=convert_boolean, default=False, help='true to enable Flask debug mode.')

View File

@ -7,16 +7,16 @@ import torch
import yaml import yaml
import pysbd import pysbd
from TTS.utils.audio import AudioProcessor from mozilla_voice_tts.utils.audio import AudioProcessor
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
from TTS.tts.utils.generic_utils import setup_model from mozilla_voice_tts.tts.utils.generic_utils import setup_model
from TTS.tts.utils.speakers import load_speaker_mapping from mozilla_voice_tts.tts.utils.speakers import load_speaker_mapping
from TTS.vocoder.utils.generic_utils import setup_generator from mozilla_voice_tts.vocoder.utils.generic_utils import setup_generator
# pylint: disable=unused-wildcard-import # pylint: disable=unused-wildcard-import
# pylint: disable=wildcard-import # pylint: disable=wildcard-import
from TTS.tts.utils.synthesis import * from mozilla_voice_tts.tts.utils.synthesis import *
from TTS.tts.utils.text import make_symbols, phonemes, symbols from mozilla_voice_tts.tts.utils.text import make_symbols, phonemes, symbols
class Synthesizer(object): class Synthesizer(object):

View File

@ -6,9 +6,9 @@ import numpy as np
from tqdm import tqdm from tqdm import tqdm
import torch import torch
from TTS.speaker_encoder.model import SpeakerEncoder from mozilla_voice_tts.speaker_encoder.model import SpeakerEncoder
from TTS.tts.utils.audio import AudioProcessor from mozilla_voice_tts.tts.utils.audio import AudioProcessor
from TTS.tts.utils.generic_utils import load_config from mozilla_voice_tts.tts.utils.generic_utils import load_config
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description='Compute embedding vectors for each wav file in a dataset. ') description='Compute embedding vectors for each wav file in a dataset. ')

View File

@ -9,7 +9,7 @@ class MyDataset(Dataset):
num_utter_per_speaker=10, skip_speakers=False, verbose=False): num_utter_per_speaker=10, skip_speakers=False, verbose=False):
""" """
Args: Args:
ap (TTS.tts.utils.AudioProcessor): audio processor object. ap (mozilla_voice_tts.tts.utils.AudioProcessor): audio processor object.
meta_data (list): list of dataset instances. meta_data (list): list of dataset instances.
seq_len (int): voice segment length in seconds. seq_len (int): voice segment length in seconds.
verbose (bool): print diagnostic information. verbose (bool): print diagnostic information.

View File

@ -5,8 +5,8 @@ import torch
import random import random
from torch.utils.data import Dataset from torch.utils.data import Dataset
from TTS.tts.utils.text import text_to_sequence, phoneme_to_sequence, pad_with_eos_bos from mozilla_voice_tts.tts.utils.text import text_to_sequence, phoneme_to_sequence, pad_with_eos_bos
from TTS.tts.utils.data import prepare_data, prepare_tensor, prepare_stop_target from mozilla_voice_tts.tts.utils.data import prepare_data, prepare_tensor, prepare_stop_target
class MyDataset(Dataset): class MyDataset(Dataset):
@ -30,7 +30,7 @@ class MyDataset(Dataset):
outputs_per_step (int): number of time frames predicted per step. outputs_per_step (int): number of time frames predicted per step.
text_cleaner (str): text cleaner used for the dataset. text_cleaner (str): text cleaner used for the dataset.
compute_linear_spec (bool): compute linear spectrogram if True. compute_linear_spec (bool): compute linear spectrogram if True.
ap (TTS.tts.utils.AudioProcessor): audio processor object. ap (mozilla_voice_tts.tts.utils.AudioProcessor): audio processor object.
meta_data (list): list of dataset instances. meta_data (list): list of dataset instances.
batch_group_size (int): (0) range of batch randomization after sorting batch_group_size (int): (0) range of batch randomization after sorting
sequences by length. sequences by length.

View File

@ -2,7 +2,7 @@ import os
from glob import glob from glob import glob
import re import re
import sys import sys
from TTS.tts.utils.generic_utils import split_dataset from mozilla_voice_tts.tts.utils.generic_utils import split_dataset
def load_meta_data(datasets): def load_meta_data(datasets):

View File

@ -2,7 +2,7 @@ import numpy as np
import torch import torch
from torch import nn from torch import nn
from torch.nn import functional from torch.nn import functional
from TTS.tts.utils.generic_utils import sequence_mask from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask
class L1LossMasked(nn.Module): class L1LossMasked(nn.Module):

View File

@ -2,9 +2,9 @@
import torch import torch
from torch import nn from torch import nn
from TTS.tts.layers.gst_layers import GST from mozilla_voice_tts.tts.layers.gst_layers import GST
from TTS.tts.layers.tacotron import Decoder, Encoder, PostCBHG from mozilla_voice_tts.tts.layers.tacotron import Decoder, Encoder, PostCBHG
from TTS.tts.models.tacotron_abstract import TacotronAbstract from mozilla_voice_tts.tts.models.tacotron_abstract import TacotronAbstract
class Tacotron(TacotronAbstract): class Tacotron(TacotronAbstract):

View File

@ -1,9 +1,9 @@
import torch import torch
from torch import nn from torch import nn
from TTS.tts.layers.gst_layers import GST from mozilla_voice_tts.tts.layers.gst_layers import GST
from TTS.tts.layers.tacotron2 import Decoder, Encoder, Postnet from mozilla_voice_tts.tts.layers.tacotron2 import Decoder, Encoder, Postnet
from TTS.tts.models.tacotron_abstract import TacotronAbstract from mozilla_voice_tts.tts.models.tacotron_abstract import TacotronAbstract
# TODO: match function arguments with tacotron # TODO: match function arguments with tacotron

View File

@ -4,7 +4,7 @@ from abc import ABC, abstractmethod
import torch import torch
from torch import nn from torch import nn
from TTS.tts.utils.generic_utils import sequence_mask from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask
class TacotronAbstract(ABC, nn.Module): class TacotronAbstract(ABC, nn.Module):

View File

@ -1,7 +1,7 @@
import tensorflow as tf import tensorflow as tf
from tensorflow import keras from tensorflow import keras
from TTS.tts.tf.utils.tf_utils import shape_list from mozilla_voice_tts.tts.tf.utils.tf_utils import shape_list
from TTS.tts.tf.layers.common_layers import Prenet, Attention from mozilla_voice_tts.tts.tf.layers.common_layers import Prenet, Attention
# from tensorflow_addons.seq2seq import AttentionWrapper # from tensorflow_addons.seq2seq import AttentionWrapper

View File

@ -1,8 +1,8 @@
import tensorflow as tf import tensorflow as tf
from tensorflow import keras from tensorflow import keras
from TTS.tts.tf.layers.tacotron2 import Encoder, Decoder, Postnet from mozilla_voice_tts.tts.tf.layers.tacotron2 import Encoder, Decoder, Postnet
from TTS.tts.tf.utils.tf_utils import shape_list from mozilla_voice_tts.tts.tf.utils.tf_utils import shape_list
#pylint: disable=too-many-ancestors #pylint: disable=too-many-ancestors

View File

@ -78,7 +78,7 @@ def count_parameters(model, c):
def setup_model(num_chars, num_speakers, c, enable_tflite=False): def setup_model(num_chars, num_speakers, c, enable_tflite=False):
print(" > Using model: {}".format(c.model)) print(" > Using model: {}".format(c.model))
MyModel = importlib.import_module('TTS.tts.tf.models.' + c.model.lower()) MyModel = importlib.import_module('mozilla_voice_tts.tts.tf.models.' + c.model.lower())
MyModel = getattr(MyModel, c.model) MyModel = getattr(MyModel, c.model)
if c.model.lower() in "tacotron": if c.model.lower() in "tacotron":
raise NotImplementedError(' [!] Tacotron model is not ready.') raise NotImplementedError(' [!] Tacotron model is not ready.')

View File

@ -9,7 +9,7 @@ import torch.distributed as dist
from torch.utils.data.sampler import Sampler from torch.utils.data.sampler import Sampler
from torch.autograd import Variable from torch.autograd import Variable
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
from TTS.utils.generic_utils import create_experiment_folder from mozilla_voice_tts.utils.generic_utils import create_experiment_folder
class DistributedSampler(Sampler): class DistributedSampler(Sampler):

View File

@ -3,7 +3,7 @@ import importlib
import numpy as np import numpy as np
from collections import Counter from collections import Counter
from TTS.utils.generic_utils import check_argument from mozilla_voice_tts.utils.generic_utils import check_argument
def split_dataset(items): def split_dataset(items):
@ -46,7 +46,7 @@ def sequence_mask(sequence_length, max_len=None):
def setup_model(num_chars, num_speakers, c): def setup_model(num_chars, num_speakers, c):
print(" > Using model: {}".format(c.model)) print(" > Using model: {}".format(c.model))
MyModel = importlib.import_module('TTS.tts.models.' + c.model.lower()) MyModel = importlib.import_module('mozilla_voice_tts.tts.models.' + c.model.lower())
MyModel = getattr(MyModel, c.model) MyModel = getattr(MyModel, c.model)
if c.model.lower() in "tacotron": if c.model.lower() in "tacotron":
model = MyModel(num_chars=num_chars, model = MyModel(num_chars=num_chars,
@ -227,7 +227,7 @@ def check_config(c):
# dataloading # dataloading
# pylint: disable=import-outside-toplevel # pylint: disable=import-outside-toplevel
from TTS.tts.utils.text import cleaners from mozilla_voice_tts.tts.utils.text import cleaners
check_argument('text_cleaner', c, restricted=True, val_type=str, enum_list=dir(cleaners)) check_argument('text_cleaner', c, restricted=True, val_type=str, enum_list=dir(cleaners))
check_argument('enable_eos_bos_chars', c, restricted=True, val_type=bool) check_argument('enable_eos_bos_chars', c, restricted=True, val_type=bool)
check_argument('num_loader_workers', c, restricted=True, val_type=int, min_val=0) check_argument('num_loader_workers', c, restricted=True, val_type=int, min_val=0)

View File

@ -1,7 +1,7 @@
import os import os
import json import json
from TTS.tts.datasets.preprocess import get_preprocessor_by_name from mozilla_voice_tts.tts.datasets.preprocess import get_preprocessor_by_name
def make_speakers_json_path(out_path): def make_speakers_json_path(out_path):

View File

@ -169,11 +169,11 @@ def synthesis(model,
"""Synthesize voice for the given text. """Synthesize voice for the given text.
Args: Args:
model (TTS.tts.models): model to synthesize. model (mozilla_voice_tts.tts.models): model to synthesize.
text (str): target text text (str): target text
CONFIG (dict): config dictionary to be loaded from config.json. CONFIG (dict): config dictionary to be loaded from config.json.
use_cuda (bool): enable cuda. use_cuda (bool): enable cuda.
ap (TTS.tts.utils.audio.AudioProcessor): audio processor to process ap (mozilla_voice_tts.tts.utils.audio.AudioProcessor): audio processor to process
model outputs. model outputs.
speaker_id (int): id of speaker speaker_id (int): id of speaker
style_wav (str): Uses for style embedding of GST. style_wav (str): Uses for style embedding of GST.

View File

@ -4,8 +4,8 @@ import re
from packaging import version from packaging import version
import phonemizer import phonemizer
from phonemizer.phonemize import phonemize from phonemizer.phonemize import phonemize
from TTS.tts.utils.text import cleaners from mozilla_voice_tts.tts.utils.text import cleaners
from TTS.tts.utils.text.symbols import make_symbols, symbols, phonemes, _phoneme_punctuations, _bos, \ from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, symbols, phonemes, _phoneme_punctuations, _bos, \
_eos _eos
# Mappings from symbol to numeric ID and vice versa: # Mappings from symbol to numeric ID and vice versa:

View File

@ -3,7 +3,7 @@ import librosa
import matplotlib import matplotlib
matplotlib.use('Agg') matplotlib.use('Agg')
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from TTS.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme from mozilla_voice_tts.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme
def plot_alignment(alignment, info=None, fig_size=(16, 10), title=None, output_fig=False): def plot_alignment(alignment, info=None, fig_size=(16, 10), title=None, output_fig=False):

View File

@ -4,7 +4,7 @@ import numpy as np
import scipy.io.wavfile import scipy.io.wavfile
import scipy.signal import scipy.signal
from TTS.tts.utils.data import StandardScaler from mozilla_voice_tts.tts.utils.data import StandardScaler
class AudioProcessor(object): class AudioProcessor(object):

View File

@ -1,5 +1,5 @@
import datetime import datetime
from TTS.utils.io import AttrDict from mozilla_voice_tts.utils.io import AttrDict
tcolors = AttrDict({ tcolors = AttrDict({

View File

@ -2,7 +2,7 @@ import torch
from torch import nn from torch import nn
from torch.nn.utils import weight_norm from torch.nn.utils import weight_norm
from TTS.vocoder.layers.melgan import ResidualStack from mozilla_voice_tts.vocoder.layers.melgan import ResidualStack
class MelganGenerator(nn.Module): class MelganGenerator(nn.Module):

View File

@ -1,6 +1,6 @@
from torch import nn from torch import nn
from TTS.vocoder.models.melgan_discriminator import MelganDiscriminator from mozilla_voice_tts.vocoder.models.melgan_discriminator import MelganDiscriminator
class MelganMultiscaleDiscriminator(nn.Module): class MelganMultiscaleDiscriminator(nn.Module):

View File

@ -1,7 +1,7 @@
import torch import torch
from TTS.vocoder.models.melgan_generator import MelganGenerator from mozilla_voice_tts.vocoder.models.melgan_generator import MelganGenerator
from TTS.vocoder.layers.pqmf import PQMF from mozilla_voice_tts.vocoder.layers.pqmf import PQMF
class MultibandMelganGenerator(MelganGenerator): class MultibandMelganGenerator(MelganGenerator):

View File

@ -2,7 +2,7 @@ import math
import torch import torch
from torch import nn from torch import nn
from TTS.vocoder.layers.parallel_wavegan import ResidualBlock from mozilla_voice_tts.vocoder.layers.parallel_wavegan import ResidualBlock
class ParallelWaveganDiscriminator(nn.Module): class ParallelWaveganDiscriminator(nn.Module):

View File

@ -2,8 +2,8 @@ import math
import numpy as np import numpy as np
import torch import torch
from TTS.vocoder.layers.parallel_wavegan import ResidualBlock from mozilla_voice_tts.vocoder.layers.parallel_wavegan import ResidualBlock
from TTS.vocoder.layers.upsample import ConvUpsample from mozilla_voice_tts.vocoder.layers.upsample import ConvUpsample
class ParallelWaveganGenerator(torch.nn.Module): class ParallelWaveganGenerator(torch.nn.Module):

View File

@ -5,7 +5,7 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # FATAL
logging.getLogger('tensorflow').setLevel(logging.FATAL) logging.getLogger('tensorflow').setLevel(logging.FATAL)
import tensorflow as tf import tensorflow as tf
from TTS.vocoder.tf.layers.melgan import ResidualStack, ReflectionPad1d from mozilla_voice_tts.vocoder.tf.layers.melgan import ResidualStack, ReflectionPad1d
#pylint: disable=too-many-ancestors #pylint: disable=too-many-ancestors

View File

@ -1,7 +1,7 @@
import tensorflow as tf import tensorflow as tf
from TTS.vocoder.tf.models.melgan_generator import MelganGenerator from mozilla_voice_tts.vocoder.tf.models.melgan_generator import MelganGenerator
from TTS.vocoder.tf.layers.pqmf import PQMF from mozilla_voice_tts.vocoder.tf.layers.pqmf import PQMF
#pylint: disable=too-many-ancestors #pylint: disable=too-many-ancestors
#pylint: disable=abstract-method #pylint: disable=abstract-method

View File

@ -9,7 +9,7 @@ def to_camel(text):
def setup_generator(c): def setup_generator(c):
print(" > Generator Model: {}".format(c.generator_model)) print(" > Generator Model: {}".format(c.generator_model))
MyModel = importlib.import_module('TTS.vocoder.tf.models.' + MyModel = importlib.import_module('mozilla_voice_tts.vocoder.tf.models.' +
c.generator_model.lower()) c.generator_model.lower())
MyModel = getattr(MyModel, to_camel(c.generator_model)) MyModel = getattr(MyModel, to_camel(c.generator_model))
if c.generator_model in 'melgan_generator': if c.generator_model in 'melgan_generator':

View File

@ -3,7 +3,7 @@ import importlib
import numpy as np import numpy as np
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from TTS.tts.utils.visual import plot_spectrogram from mozilla_voice_tts.tts.utils.visual import plot_spectrogram
def plot_results(y_hat, y, ap, global_step, name_prefix): def plot_results(y_hat, y, ap, global_step, name_prefix):
@ -44,7 +44,7 @@ def to_camel(text):
def setup_generator(c): def setup_generator(c):
print(" > Generator Model: {}".format(c.generator_model)) print(" > Generator Model: {}".format(c.generator_model))
MyModel = importlib.import_module('TTS.vocoder.models.' + MyModel = importlib.import_module('mozilla_voice_tts.vocoder.models.' +
c.generator_model.lower()) c.generator_model.lower())
MyModel = getattr(MyModel, to_camel(c.generator_model)) MyModel = getattr(MyModel, to_camel(c.generator_model))
if c.generator_model in 'melgan_generator': if c.generator_model in 'melgan_generator':
@ -91,9 +91,9 @@ def setup_discriminator(c):
print(" > Discriminator Model: {}".format(c.discriminator_model)) print(" > Discriminator Model: {}".format(c.discriminator_model))
if 'parallel_wavegan' in c.discriminator_model: if 'parallel_wavegan' in c.discriminator_model:
MyModel = importlib.import_module( MyModel = importlib.import_module(
'TTS.vocoder.models.parallel_wavegan_discriminator') 'mozilla_voice_tts.vocoder.models.parallel_wavegan_discriminator')
else: else:
MyModel = importlib.import_module('TTS.vocoder.models.' + MyModel = importlib.import_module('mozilla_voice_tts.vocoder.models.' +
c.discriminator_model.lower()) c.discriminator_model.lower())
MyModel = getattr(MyModel, to_camel(c.discriminator_model.lower())) MyModel = getattr(MyModel, to_camel(c.discriminator_model.lower()))
if c.discriminator_model in 'random_window_discriminator': if c.discriminator_model in 'random_window_discriminator':

View File

@ -16,9 +16,9 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"%matplotlib inline\n", "%matplotlib inline\n",
"from TTS.tts.utils.audio import AudioProcessor\n", "from mozilla_voice_tts.tts.utils.audio import AudioProcessor\n",
"from TTS.tts.utils.visual import plot_spectrogram\n", "from mozilla_voice_tts.tts.utils.visual import plot_spectrogram\n",
"from TTS.tts.utils.generic_utils import load_config\n", "from mozilla_voice_tts.tts.utils.generic_utils import load_config\n",
"import glob \n", "import glob \n",
"import IPython.display as ipd" "import IPython.display as ipd"
] ]

View File

@ -154,11 +154,11 @@
"import time\n", "import time\n",
"import IPython\n", "import IPython\n",
"\n", "\n",
"from TTS.tts.utils.generic_utils import setup_model\n", "from mozilla_voice_tts.tts.utils.generic_utils import setup_model\n",
"from TTS.utils.io import load_config\n", "from mozilla_voice_tts.utils.io import load_config\n",
"from TTS.tts.utils.text.symbols import symbols, phonemes\n", "from mozilla_voice_tts.tts.utils.text.symbols import symbols, phonemes\n",
"from TTS.utils.audio import AudioProcessor\n", "from mozilla_voice_tts.utils.audio import AudioProcessor\n",
"from TTS.tts.utils.synthesis import synthesis" "from mozilla_voice_tts.tts.utils.synthesis import synthesis"
], ],
"execution_count": null, "execution_count": null,
"outputs": [] "outputs": []
@ -280,7 +280,7 @@
"tags": [] "tags": []
}, },
"source": [ "source": [
"from TTS.vocoder.utils.generic_utils import setup_generator\n", "from mozilla_voice_tts.vocoder.utils.generic_utils import setup_generator\n",
"\n", "\n",
"# LOAD VOCODER MODEL\n", "# LOAD VOCODER MODEL\n",
"vocoder_model = setup_generator(VOCODER_CONFIG)\n", "vocoder_model = setup_generator(VOCODER_CONFIG)\n",

View File

@ -1140,12 +1140,12 @@
"import time\n", "import time\n",
"import IPython\n", "import IPython\n",
"\n", "\n",
"from TTS.tf.utils.tflite import load_tflite_model\n", "from mozilla_voice_tts.tf.utils.tflite import load_tflite_model\n",
"from TTS.tf.utils.io import load_checkpoint\n", "from mozilla_voice_tts.tf.utils.io import load_checkpoint\n",
"from TTS.utils.io import load_config\n", "from mozilla_voice_tts.utils.io import load_config\n",
"from TTS.utils.text.symbols import symbols, phonemes\n", "from mozilla_voice_tts.utils.text.symbols import symbols, phonemes\n",
"from TTS.utils.audio import AudioProcessor\n", "from mozilla_voice_tts.utils.audio import AudioProcessor\n",
"from TTS.tts.utils.synthesis import synthesis" "from mozilla_voice_tts.tts.utils.synthesis import synthesis"
], ],
"execution_count": null, "execution_count": null,
"outputs": [] "outputs": []

View File

@ -31,7 +31,7 @@
"colab_type": "text" "colab_type": "text"
}, },
"source": [ "source": [
"**These models are converted from released [PyTorch models](https://colab.research.google.com/drive/1u_16ZzHjKYFn1HNVuA4Qf_i2MMFB9olY?usp=sharing) using our TF utilities provided in Mozilla TTS.**\n", "**These models are converted from released [PyTorch models](https://colab.research.google.com/drive/1u_16ZzHjKYFn1HNVuA4Qf_i2MMFB9olY?usp=sharing) using our TF utilities provided in Mozilla mozilla_voice_tts.**\n",
"\n", "\n",
"These TF models support TF 2.2 and for different versions you might need to\n", "These TF models support TF 2.2 and for different versions you might need to\n",
"regenerate them. \n", "regenerate them. \n",
@ -154,12 +154,12 @@
"import time\n", "import time\n",
"import IPython\n", "import IPython\n",
"\n", "\n",
"from TTS.tts.tf.utils.generic_utils import setup_model\n", "from mozilla_voice_tts.tts.tf.utils.generic_utils import setup_model\n",
"from TTS.tts.tf.utils.io import load_checkpoint\n", "from mozilla_voice_tts.tts.tf.utils.io import load_checkpoint\n",
"from TTS.utils.io import load_config\n", "from mozilla_voice_tts.utils.io import load_config\n",
"from TTS.tts.utils.text.symbols import symbols, phonemes\n", "from mozilla_voice_tts.tts.utils.text.symbols import symbols, phonemes\n",
"from TTS.utils.audio import AudioProcessor\n", "from mozilla_voice_tts.utils.audio import AudioProcessor\n",
"from TTS.tts.utils.synthesis import synthesis" "from mozilla_voice_tts.tts.utils.synthesis import synthesis"
], ],
"execution_count": null, "execution_count": null,
"outputs": [] "outputs": []
@ -270,8 +270,8 @@
"outputId": "2cc3deae-144f-4465-da3b-98628d948506" "outputId": "2cc3deae-144f-4465-da3b-98628d948506"
}, },
"source": [ "source": [
"from TTS.vocoder.tf.utils.generic_utils import setup_generator\n", "from mozilla_voice_tts.vocoder.tf.utils.generic_utils import setup_generator\n",
"from TTS.vocoder.tf.utils.io import load_checkpoint\n", "from mozilla_voice_tts.vocoder.tf.utils.io import load_checkpoint\n",
"\n", "\n",
"# LOAD VOCODER MODEL\n", "# LOAD VOCODER MODEL\n",
"vocoder_model = setup_generator(VOCODER_CONFIG)\n", "vocoder_model = setup_generator(VOCODER_CONFIG)\n",

View File

@ -154,11 +154,11 @@
"import time\n", "import time\n",
"import IPython\n", "import IPython\n",
"\n", "\n",
"from TTS.tts.utils.generic_utils import setup_model\n", "from mozilla_voice_tts.tts.utils.generic_utils import setup_model\n",
"from TTS.utils.io import load_config\n", "from mozilla_voice_tts.utils.io import load_config\n",
"from TTS.tts.utils.text.symbols import symbols, phonemes\n", "from mozilla_voice_tts.tts.utils.text.symbols import symbols, phonemes\n",
"from TTS.utils.audio import AudioProcessor\n", "from mozilla_voice_tts.utils.audio import AudioProcessor\n",
"from TTS.tts.utils.synthesis import synthesis" "from mozilla_voice_tts.tts.utils.synthesis import synthesis"
], ],
"execution_count": null, "execution_count": null,
"outputs": [] "outputs": []
@ -280,7 +280,7 @@
"tags": [] "tags": []
}, },
"source": [ "source": [
"from TTS.vocoder.utils.generic_utils import setup_generator\n", "from mozilla_voice_tts.vocoder.utils.generic_utils import setup_generator\n",
"\n", "\n",
"# LOAD VOCODER MODEL\n", "# LOAD VOCODER MODEL\n",
"vocoder_model = setup_generator(VOCODER_CONFIG)\n", "vocoder_model = setup_generator(VOCODER_CONFIG)\n",

View File

@ -22,12 +22,12 @@
"import numpy as np\n", "import numpy as np\n",
"from tqdm import tqdm as tqdm\n", "from tqdm import tqdm as tqdm\n",
"from torch.utils.data import DataLoader\n", "from torch.utils.data import DataLoader\n",
"from TTS.tts.datasets.TTSDataset import MyDataset\n", "from mozilla_voice_tts.tts.datasets.TTSDataset import MyDataset\n",
"from TTS.tts.layers.losses import L1LossMasked\n", "from mozilla_voice_tts.tts.layers.losses import L1LossMasked\n",
"from TTS.tts.utils.audio import AudioProcessor\n", "from mozilla_voice_tts.tts.utils.audio import AudioProcessor\n",
"from TTS.tts.utils.visual import plot_spectrogram\n", "from mozilla_voice_tts.tts.utils.visual import plot_spectrogram\n",
"from TTS.tts.utils.generic_utils import load_config, setup_model, sequence_mask\n", "from mozilla_voice_tts.tts.utils.generic_utils import load_config, setup_model, sequence_mask\n",
"from TTS.tts.utils.text.symbols import make_symbols, symbols, phonemes\n", "from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, symbols, phonemes\n",
"\n", "\n",
"%matplotlib inline\n", "%matplotlib inline\n",
"\n", "\n",
@ -108,7 +108,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"preprocessor = importlib.import_module('TTS.tts.datasets.preprocess')\n", "preprocessor = importlib.import_module('mozilla_voice_tts.tts.datasets.preprocess')\n",
"preprocessor = getattr(preprocessor, DATASET.lower())\n", "preprocessor = getattr(preprocessor, DATASET.lower())\n",
"meta_data = preprocessor(DATA_PATH,METADATA_FILE)\n", "meta_data = preprocessor(DATA_PATH,METADATA_FILE)\n",
"dataset = MyDataset(checkpoint['r'], C.text_cleaner, False, ap, meta_data,tp=C.characters if 'characters' in C.keys() else None, use_phonemes=C.use_phonemes, phoneme_cache_path=C.phoneme_cache_path, enable_eos_bos=C.enable_eos_bos_chars)\n", "dataset = MyDataset(checkpoint['r'], C.text_cleaner, False, ap, meta_data,tp=C.characters if 'characters' in C.keys() else None, use_phonemes=C.use_phonemes, phoneme_cache_path=C.phoneme_cache_path, enable_eos_bos=C.enable_eos_bos_chars)\n",

View File

@ -36,14 +36,14 @@
"import librosa\n", "import librosa\n",
"import librosa.display\n", "import librosa.display\n",
"\n", "\n",
"from TTS.tts.layers import *\n", "from mozilla_voice_tts.tts.layers import *\n",
"from TTS.tts.utils.audio import AudioProcessor\n", "from mozilla_voice_tts.tts.utils.audio import AudioProcessor\n",
"from TTS.tts.utils.generic_utils import setup_model\n", "from mozilla_voice_tts.tts.utils.generic_utils import setup_model\n",
"from TTS.tts.utils.io import load_config\n", "from mozilla_voice_tts.tts.utils.io import load_config\n",
"from TTS.tts.utils.text import text_to_sequence\n", "from mozilla_voice_tts.tts.utils.text import text_to_sequence\n",
"from TTS.tts.utils.synthesis import synthesis\n", "from mozilla_voice_tts.tts.utils.synthesis import synthesis\n",
"from TTS.tts.utils.visual import plot_alignment\n", "from mozilla_voice_tts.tts.utils.visual import plot_alignment\n",
"from TTS.tts.utils.measures import alignment_diagonal_score\n", "from mozilla_voice_tts.tts.utils.measures import alignment_diagonal_score\n",
"\n", "\n",
"import IPython\n", "import IPython\n",
"from IPython.display import Audio\n", "from IPython.display import Audio\n",
@ -96,7 +96,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# LOAD TTS MODEL\n", "# LOAD TTS MODEL\n",
"from TTS.tts.utils.text.symbols import make_symbols, symbols, phonemes\n", "from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, symbols, phonemes\n",
"\n", "\n",
"# multi speaker \n", "# multi speaker \n",
"if CONFIG.use_speaker_embedding:\n", "if CONFIG.use_speaker_embedding:\n",

View File

@ -276,12 +276,12 @@
"import time\n", "import time\n",
"import IPython\n", "import IPython\n",
"\n", "\n",
"from TTS.tts.tf.utils.tflite import load_tflite_model\n", "from mozilla_voice_tts.tts.tf.utils.tflite import load_tflite_model\n",
"from TTS.tts.tf.utils.io import load_checkpoint\n", "from mozilla_voice_tts.tts.tf.utils.io import load_checkpoint\n",
"from TTS.utils.io import load_config\n", "from mozilla_voice_tts.utils.io import load_config\n",
"from TTS.tts.utils.text.symbols import symbols, phonemes\n", "from mozilla_voice_tts.tts.utils.text.symbols import symbols, phonemes\n",
"from TTS.utils.audio import AudioProcessor\n", "from mozilla_voice_tts.utils.audio import AudioProcessor\n",
"from TTS.tts.utils.synthesis import synthesis" "from mozilla_voice_tts.tts.utils.synthesis import synthesis"
], ],
"execution_count": null, "execution_count": null,
"outputs": [] "outputs": []

View File

@ -31,7 +31,7 @@
"from multiprocessing import Pool\n", "from multiprocessing import Pool\n",
"from matplotlib import pylab as plt\n", "from matplotlib import pylab as plt\n",
"from collections import Counter\n", "from collections import Counter\n",
"from TTS.tts.datasets.preprocess import *\n", "from mozilla_voice_tts.tts.datasets.preprocess import *\n",
"%matplotlib inline" "%matplotlib inline"
] ]
}, },

View File

@ -27,7 +27,7 @@
"from multiprocessing import Pool\n", "from multiprocessing import Pool\n",
"from matplotlib import pylab as plt\n", "from matplotlib import pylab as plt\n",
"from collections import Counter\n", "from collections import Counter\n",
"from TTS.tts.datasets.preprocess import *\n", "from mozilla_voice_tts.tts.datasets.preprocess import *\n",
"%matplotlib inline" "%matplotlib inline"
] ]
}, },

View File

@ -1,8 +1,8 @@
import unittest import unittest
import torch as T import torch as T
from TTS.tts.utils.generic_utils import save_checkpoint, save_best_model from mozilla_voice_tts.tts.utils.generic_utils import save_checkpoint, save_best_model
from TTS.tts.layers.tacotron import Prenet from mozilla_voice_tts.tts.layers.tacotron import Prenet
OUT_PATH = '/tmp/test.pth.tar' OUT_PATH = '/tmp/test.pth.tar'

View File

@ -67,6 +67,7 @@
"gradual_training": [[0, 7, 4]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed. "gradual_training": [[0, 7, 4]], //set gradual training steps [first_step, r, batch_size]. If it is null, gradual training is disabled. For Tacotron, you might need to reduce the 'batch_size' as you proceeed.
"loss_masking": true, // enable / disable loss masking against the sequence padding. "loss_masking": true, // enable / disable loss masking against the sequence padding.
"ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled. "ga_alpha": 10.0, // weight for guided attention loss. If > 0, guided attention is enabled.
"apex_amp_level": null,
// VALIDATION // VALIDATION
"run_eval": true, "run_eval": true,

View File

@ -73,7 +73,7 @@
"data_path": "/media/erogol/data_ssd/Data/Mozilla/", // DATASET-RELATED: can overwritten from command argument "data_path": "/media/erogol/data_ssd/Data/Mozilla/", // DATASET-RELATED: can overwritten from command argument
"meta_file_train": "metadata_train.txt", // DATASET-RELATED: metafile for training dataloader. "meta_file_train": "metadata_train.txt", // DATASET-RELATED: metafile for training dataloader.
"meta_file_val": "metadata_val.txt", // DATASET-RELATED: metafile for evaluation dataloader. "meta_file_val": "metadata_val.txt", // DATASET-RELATED: metafile for evaluation dataloader.
"dataset": "mozilla", // DATASET-RELATED: one of TTS.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py "dataset": "mozilla", // DATASET-RELATED: one of mozilla_voice_tts.dataset.preprocessors depending on your target dataset. Use "tts_cache" for pre-computed dataset by extract_features.py
"min_seq_len": 0, // DATASET-RELATED: minimum text length to use in training "min_seq_len": 0, // DATASET-RELATED: minimum text length to use in training
"max_seq_len": 150, // DATASET-RELATED: maximum text length "max_seq_len": 150, // DATASET-RELATED: maximum text length
"output_path": "../keep/", // DATASET-RELATED: output path for all training outputs. "output_path": "../keep/", // DATASET-RELATED: output path for all training outputs.

View File

@ -1,6 +1,6 @@
import unittest import unittest
from TTS.tts.utils.text import phonemes from mozilla_voice_tts.tts.utils.text import phonemes
class SymbolsTest(unittest.TestCase): class SymbolsTest(unittest.TestCase):
def test_uniqueness(self): #pylint: disable=no-self-use def test_uniqueness(self): #pylint: disable=no-self-use

View File

@ -2,8 +2,8 @@ import os
import unittest import unittest
from tests import get_tests_path, get_tests_input_path, get_tests_output_path from tests import get_tests_path, get_tests_input_path, get_tests_output_path
from TTS.utils.audio import AudioProcessor from mozilla_voice_tts.utils.audio import AudioProcessor
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
TESTS_PATH = get_tests_path() TESTS_PATH = get_tests_path()
OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests") OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")

View File

@ -3,12 +3,12 @@ import unittest
import torch as T import torch as T
from TTS.server.synthesizer import Synthesizer from mozilla_voice_tts.server.synthesizer import Synthesizer
from tests import get_tests_input_path, get_tests_output_path from tests import get_tests_input_path, get_tests_output_path
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, phonemes, symbols
from TTS.tts.utils.generic_utils import setup_model from mozilla_voice_tts.tts.utils.generic_utils import setup_model
from TTS.tts.utils.io import save_checkpoint from mozilla_voice_tts.tts.utils.io import save_checkpoint
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
class DemoServerTest(unittest.TestCase): class DemoServerTest(unittest.TestCase):

View File

@ -3,9 +3,9 @@ import unittest
import torch as T import torch as T
from tests import get_tests_path, get_tests_input_path from tests import get_tests_path, get_tests_input_path
from TTS.speaker_encoder.model import SpeakerEncoder from mozilla_voice_tts.speaker_encoder.model import SpeakerEncoder
from TTS.speaker_encoder.loss import GE2ELoss from mozilla_voice_tts.speaker_encoder.loss import GE2ELoss
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
file_path = get_tests_input_path() file_path = get_tests_input_path()

View File

@ -1,9 +1,9 @@
import unittest import unittest
import torch as T import torch as T
from TTS.tts.layers.tacotron import Prenet, CBHG, Decoder, Encoder from mozilla_voice_tts.tts.layers.tacotron import Prenet, CBHG, Decoder, Encoder
from TTS.tts.layers.losses import L1LossMasked from mozilla_voice_tts.tts.layers.losses import L1LossMasked
from TTS.tts.utils.generic_utils import sequence_mask from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask
# pylint: disable=unused-variable # pylint: disable=unused-variable

View File

@ -6,10 +6,10 @@ import numpy as np
from tests import get_tests_path, get_tests_input_path, get_tests_output_path from tests import get_tests_path, get_tests_input_path, get_tests_output_path
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
from TTS.utils.audio import AudioProcessor from mozilla_voice_tts.utils.audio import AudioProcessor
from TTS.tts.datasets import TTSDataset from mozilla_voice_tts.tts.datasets import TTSDataset
from TTS.tts.datasets.preprocess import ljspeech from mozilla_voice_tts.tts.datasets.preprocess import ljspeech
#pylint: disable=unused-variable #pylint: disable=unused-variable

View File

@ -2,7 +2,7 @@ import unittest
import os import os
from tests import get_tests_input_path from tests import get_tests_input_path
from TTS.tts.datasets.preprocess import common_voice from mozilla_voice_tts.tts.datasets.preprocess import common_voice
class TestPreprocessors(unittest.TestCase): class TestPreprocessors(unittest.TestCase):

View File

@ -12,12 +12,12 @@ pip install --quiet --upgrade pip setuptools wheel
rm -f dist/*.whl rm -f dist/*.whl
python setup.py --quiet bdist_wheel --checkpoint tests/outputs/checkpoint_10.pth.tar --model_config tests/outputs/dummy_model_config.json python setup.py --quiet bdist_wheel --checkpoint tests/outputs/checkpoint_10.pth.tar --model_config tests/outputs/dummy_model_config.json
pip install --quiet dist/TTS*.whl pip install --quiet dist/mozilla_voice_tts*.whl
# this is related to https://github.com/librosa/librosa/issues/1160 # this is related to https://github.com/librosa/librosa/issues/1160
pip install numba==0.48 pip install numba==0.48
python -m TTS.server.server & python -m mozilla_voice_tts.server.server &
SERVER_PID=$! SERVER_PID=$!
echo 'Waiting for server...' echo 'Waiting for server...'

View File

@ -6,9 +6,9 @@ import torch
from tests import get_tests_input_path from tests import get_tests_input_path
from torch import nn, optim from torch import nn, optim
from TTS.tts.layers.losses import MSELossMasked from mozilla_voice_tts.tts.layers.losses import MSELossMasked
from TTS.tts.models.tacotron2 import Tacotron2 from mozilla_voice_tts.tts.models.tacotron2 import Tacotron2
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
#pylint: disable=unused-variable #pylint: disable=unused-variable

View File

@ -7,9 +7,9 @@ tf.get_logger().setLevel('INFO')
from tests import get_tests_path, get_tests_input_path, get_tests_output_path from tests import get_tests_path, get_tests_input_path, get_tests_output_path
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
from TTS.tts.tf.models.tacotron2 import Tacotron2 from mozilla_voice_tts.tts.tf.models.tacotron2 import Tacotron2
from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model from mozilla_voice_tts.tts.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model
#pylint: disable=unused-variable #pylint: disable=unused-variable

View File

@ -6,9 +6,9 @@ import torch
from tests import get_tests_input_path from tests import get_tests_input_path
from torch import nn, optim from torch import nn, optim
from TTS.tts.layers.losses import L1LossMasked from mozilla_voice_tts.tts.layers.losses import L1LossMasked
from TTS.tts.models.tacotron import Tacotron from mozilla_voice_tts.tts.models.tacotron import Tacotron
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
#pylint: disable=unused-variable #pylint: disable=unused-variable

View File

@ -4,9 +4,9 @@ import os
# pylint: disable=unused-import # pylint: disable=unused-import
import unittest import unittest
from tests import get_tests_input_path from tests import get_tests_input_path
from TTS.tts.utils.text import * from mozilla_voice_tts.tts.utils.text import *
from tests import get_tests_path from tests import get_tests_path
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
conf = load_config(os.path.join(get_tests_input_path(), 'test_config.json')) conf = load_config(os.path.join(get_tests_input_path(), 'test_config.json'))

View File

@ -3,11 +3,11 @@
BASEDIR=$(dirname "$0") BASEDIR=$(dirname "$0")
echo "$BASEDIR" echo "$BASEDIR"
# run training # run training
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tts.py --config_path $BASEDIR/inputs/test_train_config.json CUDA_VISIBLE_DEVICES="" python mozilla_voice_tts/bin/train_tts.py --config_path $BASEDIR/inputs/test_train_config.json
# find the training folder # find the training folder
LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1) LATEST_FOLDER=$(ls $BASEDIR/train_outputs/| sort | tail -1)
echo $LATEST_FOLDER echo $LATEST_FOLDER
# continue the previous training # continue the previous training
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_tts.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER CUDA_VISIBLE_DEVICES="" python mozilla_voice_tts/bin/train_tts.py --continue_path $BASEDIR/train_outputs/$LATEST_FOLDER
# remove all the outputs # remove all the outputs
rm -rf $BASEDIR/train_outputs/ rm -rf $BASEDIR/train_outputs/

View File

@ -4,10 +4,10 @@ import numpy as np
from tests import get_tests_path, get_tests_input_path, get_tests_output_path from tests import get_tests_path, get_tests_input_path, get_tests_output_path
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from TTS.utils.audio import AudioProcessor from mozilla_voice_tts.utils.audio import AudioProcessor
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
from TTS.vocoder.datasets.gan_dataset import GANDataset from mozilla_voice_tts.vocoder.datasets.gan_dataset import GANDataset
from TTS.vocoder.datasets.preprocess import load_wav_data from mozilla_voice_tts.vocoder.datasets.preprocess import load_wav_data
file_path = os.path.dirname(os.path.realpath(__file__)) file_path = os.path.dirname(os.path.realpath(__file__))
OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/") OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")

View File

@ -3,9 +3,9 @@ import os
import torch import torch
from tests import get_tests_input_path, get_tests_output_path, get_tests_path from tests import get_tests_input_path, get_tests_output_path, get_tests_path
from TTS.utils.audio import AudioProcessor from mozilla_voice_tts.utils.audio import AudioProcessor
from TTS.utils.io import load_config from mozilla_voice_tts.utils.io import load_config
from TTS.vocoder.layers.losses import MultiScaleSTFTLoss, STFTLoss, TorchSTFT from mozilla_voice_tts.vocoder.layers.losses import MultiScaleSTFTLoss, STFTLoss, TorchSTFT
TESTS_PATH = get_tests_path() TESTS_PATH = get_tests_path()

View File

@ -1,8 +1,8 @@
import numpy as np import numpy as np
import torch import torch
from TTS.vocoder.models.melgan_discriminator import MelganDiscriminator from mozilla_voice_tts.vocoder.models.melgan_discriminator import MelganDiscriminator
from TTS.vocoder.models.melgan_multiscale_discriminator import MelganMultiscaleDiscriminator from mozilla_voice_tts.vocoder.models.melgan_multiscale_discriminator import MelganMultiscaleDiscriminator
def test_melgan_discriminator(): def test_melgan_discriminator():

View File

@ -1,7 +1,7 @@
import numpy as np import numpy as np
import torch import torch
from TTS.vocoder.models.melgan_generator import MelganGenerator from mozilla_voice_tts.vocoder.models.melgan_generator import MelganGenerator
def test_melgan_generator(): def test_melgan_generator():
model = MelganGenerator() model = MelganGenerator()

View File

@ -1,7 +1,7 @@
import numpy as np import numpy as np
import torch import torch
from TTS.vocoder.models.parallel_wavegan_discriminator import ParallelWaveganDiscriminator, ResidualParallelWaveganDiscriminator from mozilla_voice_tts.vocoder.models.parallel_wavegan_discriminator import ParallelWaveganDiscriminator, ResidualParallelWaveganDiscriminator
def test_pwgan_disciminator(): def test_pwgan_disciminator():

View File

@ -1,7 +1,7 @@
import numpy as np import numpy as np
import torch import torch
from TTS.vocoder.models.parallel_wavegan_generator import ParallelWaveganGenerator from mozilla_voice_tts.vocoder.models.parallel_wavegan_generator import ParallelWaveganGenerator
def test_pwgan_generator(): def test_pwgan_generator():

View File

@ -5,7 +5,7 @@ import soundfile as sf
from librosa.core import load from librosa.core import load
from tests import get_tests_path, get_tests_input_path from tests import get_tests_path, get_tests_input_path
from TTS.vocoder.layers.pqmf import PQMF from mozilla_voice_tts.vocoder.layers.pqmf import PQMF
TESTS_PATH = get_tests_path() TESTS_PATH = get_tests_path()

View File

@ -1,7 +1,7 @@
import torch import torch
import numpy as np import numpy as np
from TTS.vocoder.models.random_window_discriminator import RandomWindowDiscriminator from mozilla_voice_tts.vocoder.models.random_window_discriminator import RandomWindowDiscriminator
def test_rwd(): def test_rwd():

View File

@ -1,7 +1,7 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from TTS.vocoder.tf.models.melgan_generator import MelganGenerator from mozilla_voice_tts.vocoder.tf.models.melgan_generator import MelganGenerator
def test_melgan_generator(): def test_melgan_generator():

View File

@ -5,7 +5,7 @@ import soundfile as sf
from librosa.core import load from librosa.core import load
from tests import get_tests_path, get_tests_input_path from tests import get_tests_path, get_tests_input_path
from TTS.vocoder.tf.layers.pqmf import PQMF from mozilla_voice_tts.vocoder.tf.layers.pqmf import PQMF
TESTS_PATH = get_tests_path() TESTS_PATH = get_tests_path()

View File

@ -5,11 +5,11 @@ echo "$BASEDIR"
# create run dir # create run dir
mkdir $BASEDIR/train_outputs mkdir $BASEDIR/train_outputs
# run training # run training
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json CUDA_VISIBLE_DEVICES="" python mozilla_voice_tts/bin/train_vocoder.py --config_path $BASEDIR/inputs/test_vocoder_multiband_melgan_config.json
# find the training folder # find the training folder
LATEST_FOLDER=$(ls $BASEDIR/outputs/train_outputs/| sort | tail -1) LATEST_FOLDER=$(ls $BASEDIR/outputs/train_outputs/| sort | tail -1)
echo $LATEST_FOLDER echo $LATEST_FOLDER
# continue the previous training # continue the previous training
CUDA_VISIBLE_DEVICES="" python TTS/bin/train_vocoder.py --continue_path $BASEDIR/outputs/train_outputs/$LATEST_FOLDER CUDA_VISIBLE_DEVICES="" python mozilla_voice_tts/bin/train_vocoder.py --continue_path $BASEDIR/outputs/train_outputs/$LATEST_FOLDER
# remove all the outputs # remove all the outputs
rm -rf $BASEDIR/train_outputs/ rm -rf $BASEDIR/train_outputs/