mirror of https://github.com/coqui-ai/TTS.git
rename the project to old TTS
This commit is contained in:
parent
daf9c7aaec
commit
df19428ec6
|
@ -7,9 +7,9 @@ import argparse
|
|||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
from mozilla_voice_tts.tts.datasets.preprocess import load_meta_data
|
||||
from mozilla_voice_tts.utils.io import load_config
|
||||
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||
from TTS.tts.datasets.preprocess import load_meta_data
|
||||
from TTS.utils.io import load_config
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
|
||||
def main():
|
||||
"""Run preprocessing process."""
|
|
@ -2,10 +2,10 @@
|
|||
|
||||
import argparse
|
||||
|
||||
from mozilla_voice_tts.utils.io import load_config
|
||||
from mozilla_voice_tts.vocoder.tf.utils.generic_utils import setup_generator
|
||||
from mozilla_voice_tts.vocoder.tf.utils.io import load_checkpoint
|
||||
from mozilla_voice_tts.vocoder.tf.utils.tflite import convert_melgan_to_tflite
|
||||
from TTS.utils.io import load_config
|
||||
from TTS.vocoder.tf.utils.generic_utils import setup_generator
|
||||
from TTS.vocoder.tf.utils.io import load_checkpoint
|
||||
from TTS.vocoder.tf.utils.tflite import convert_melgan_to_tflite
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
|
@ -6,13 +6,13 @@ import tensorflow as tf
|
|||
import torch
|
||||
from fuzzywuzzy import fuzz
|
||||
|
||||
from mozilla_voice_tts.utils.io import load_config
|
||||
from mozilla_voice_tts.vocoder.tf.utils.convert_torch_to_tf_utils import (
|
||||
from TTS.utils.io import load_config
|
||||
from TTS.vocoder.tf.utils.convert_torch_to_tf_utils import (
|
||||
compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf)
|
||||
from mozilla_voice_tts.vocoder.tf.utils.generic_utils import \
|
||||
from TTS.vocoder.tf.utils.generic_utils import \
|
||||
setup_generator as setup_tf_generator
|
||||
from mozilla_voice_tts.vocoder.tf.utils.io import save_checkpoint
|
||||
from mozilla_voice_tts.vocoder.utils.generic_utils import setup_generator
|
||||
from TTS.vocoder.tf.utils.io import save_checkpoint
|
||||
from TTS.vocoder.utils.generic_utils import setup_generator
|
||||
|
||||
# prevent GPU use
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = ''
|
|
@ -2,11 +2,11 @@
|
|||
|
||||
import argparse
|
||||
|
||||
from mozilla_voice_tts.utils.io import load_config
|
||||
from mozilla_voice_tts.tts.utils.text.symbols import symbols, phonemes
|
||||
from mozilla_voice_tts.tts.tf.utils.generic_utils import setup_model
|
||||
from mozilla_voice_tts.tts.tf.utils.io import load_checkpoint
|
||||
from mozilla_voice_tts.tts.tf.utils.tflite import convert_tacotron2_to_tflite
|
||||
from TTS.utils.io import load_config
|
||||
from TTS.tts.utils.text.symbols import symbols, phonemes
|
||||
from TTS.tts.tf.utils.generic_utils import setup_model
|
||||
from TTS.tts.tf.utils.io import load_checkpoint
|
||||
from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
|
@ -11,13 +11,13 @@ import numpy as np
|
|||
import tensorflow as tf
|
||||
import torch
|
||||
from fuzzywuzzy import fuzz
|
||||
from mozilla_voice_tts.tts.tf.models.tacotron2 import Tacotron2
|
||||
from mozilla_voice_tts.tts.tf.utils.convert_torch_to_tf_utils import (
|
||||
from TTS.tts.tf.models.tacotron2 import Tacotron2
|
||||
from TTS.tts.tf.utils.convert_torch_to_tf_utils import (
|
||||
compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf)
|
||||
from mozilla_voice_tts.tts.tf.utils.generic_utils import save_checkpoint
|
||||
from mozilla_voice_tts.tts.utils.generic_utils import setup_model
|
||||
from mozilla_voice_tts.tts.utils.text.symbols import phonemes, symbols
|
||||
from mozilla_voice_tts.utils.io import load_config
|
||||
from TTS.tts.tf.utils.generic_utils import save_checkpoint
|
||||
from TTS.tts.utils.generic_utils import setup_model
|
||||
from TTS.tts.utils.text.symbols import phonemes, symbols
|
||||
from TTS.utils.io import load_config
|
||||
|
||||
sys.path.append('/home/erogol/Projects')
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = ''
|
|
@ -10,12 +10,12 @@ import time
|
|||
|
||||
import torch
|
||||
|
||||
from mozilla_voice_tts.tts.utils.generic_utils import setup_model
|
||||
from mozilla_voice_tts.tts.utils.synthesis import synthesis
|
||||
from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||
from mozilla_voice_tts.utils.io import load_config
|
||||
from mozilla_voice_tts.vocoder.utils.generic_utils import setup_generator
|
||||
from TTS.tts.utils.generic_utils import setup_model
|
||||
from TTS.tts.utils.synthesis import synthesis
|
||||
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.utils.io import load_config
|
||||
from TTS.vocoder.utils.generic_utils import setup_generator
|
||||
|
||||
|
||||
def tts(model, vocoder_model, text, CONFIG, use_cuda, ap, use_gl, speaker_fileid, speaker_embedding=None, gst_style=None):
|
|
@ -10,21 +10,21 @@ import traceback
|
|||
import torch
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
from mozilla_voice_tts.speaker_encoder.dataset import MyDataset
|
||||
from mozilla_voice_tts.speaker_encoder.generic_utils import save_best_model
|
||||
from mozilla_voice_tts.speaker_encoder.losses import GE2ELoss, AngleProtoLoss
|
||||
from mozilla_voice_tts.speaker_encoder.model import SpeakerEncoder
|
||||
from mozilla_voice_tts.speaker_encoder.visual import plot_embeddings
|
||||
from mozilla_voice_tts.tts.datasets.preprocess import load_meta_data
|
||||
from mozilla_voice_tts.utils.generic_utils import (
|
||||
from TTS.speaker_encoder.dataset import MyDataset
|
||||
from TTS.speaker_encoder.generic_utils import save_best_model
|
||||
from TTS.speaker_encoder.losses import GE2ELoss, AngleProtoLoss
|
||||
from TTS.speaker_encoder.model import SpeakerEncoder
|
||||
from TTS.speaker_encoder.visual import plot_embeddings
|
||||
from TTS.tts.datasets.preprocess import load_meta_data
|
||||
from TTS.utils.generic_utils import (
|
||||
create_experiment_folder, get_git_branch, remove_experiment_folder,
|
||||
set_init_dict)
|
||||
from mozilla_voice_tts.utils.io import copy_config_file, load_config
|
||||
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||
from mozilla_voice_tts.utils.generic_utils import count_parameters
|
||||
from mozilla_voice_tts.utils.radam import RAdam
|
||||
from mozilla_voice_tts.utils.tensorboard_logger import TensorboardLogger
|
||||
from mozilla_voice_tts.utils.training import NoamLR, check_update
|
||||
from TTS.utils.io import copy_config_file, load_config
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.utils.generic_utils import count_parameters
|
||||
from TTS.utils.radam import RAdam
|
||||
from TTS.utils.tensorboard_logger import TensorboardLogger
|
||||
from TTS.utils.training import NoamLR, check_update
|
||||
|
||||
torch.backends.cudnn.enabled = True
|
||||
torch.backends.cudnn.benchmark = True
|
|
@ -11,40 +11,31 @@ import traceback
|
|||
import numpy as np
|
||||
import torch
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
from mozilla_voice_tts.tts.datasets.preprocess import load_meta_data
|
||||
from mozilla_voice_tts.tts.datasets.TTSDataset import MyDataset
|
||||
from mozilla_voice_tts.tts.layers.losses import TacotronLoss
|
||||
from mozilla_voice_tts.tts.utils.distribute import (DistributedSampler,
|
||||
apply_gradient_allreduce,
|
||||
init_distributed,
|
||||
reduce_tensor)
|
||||
from mozilla_voice_tts.tts.utils.generic_utils import check_config, setup_model
|
||||
from mozilla_voice_tts.tts.utils.io import save_best_model, save_checkpoint
|
||||
from mozilla_voice_tts.tts.utils.measures import alignment_diagonal_score
|
||||
from mozilla_voice_tts.tts.utils.speakers import (get_speakers,
|
||||
load_speaker_mapping,
|
||||
save_speaker_mapping)
|
||||
from mozilla_voice_tts.tts.utils.synthesis import synthesis
|
||||
from mozilla_voice_tts.tts.utils.text.symbols import (make_symbols, phonemes,
|
||||
symbols)
|
||||
from mozilla_voice_tts.tts.utils.visual import plot_alignment, plot_spectrogram
|
||||
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||
from mozilla_voice_tts.utils.console_logger import ConsoleLogger
|
||||
from mozilla_voice_tts.utils.generic_utils import (KeepAverage,
|
||||
count_parameters,
|
||||
create_experiment_folder,
|
||||
get_git_branch,
|
||||
remove_experiment_folder,
|
||||
set_init_dict)
|
||||
from mozilla_voice_tts.utils.io import copy_config_file, load_config
|
||||
from mozilla_voice_tts.utils.radam import RAdam
|
||||
from mozilla_voice_tts.utils.tensorboard_logger import TensorboardLogger
|
||||
from mozilla_voice_tts.utils.training import (NoamLR, adam_weight_decay,
|
||||
check_update,
|
||||
gradual_training_scheduler,
|
||||
set_weight_decay,
|
||||
setup_torch_training_env)
|
||||
from TTS.tts.datasets.preprocess import load_meta_data
|
||||
from TTS.tts.datasets.TTSDataset import MyDataset
|
||||
from TTS.tts.layers.losses import TacotronLoss
|
||||
from TTS.tts.utils.distribute import (DistributedSampler,
|
||||
apply_gradient_allreduce,
|
||||
init_distributed, reduce_tensor)
|
||||
from TTS.tts.utils.generic_utils import check_config, setup_model
|
||||
from TTS.tts.utils.io import save_best_model, save_checkpoint
|
||||
from TTS.tts.utils.measures import alignment_diagonal_score
|
||||
from TTS.tts.utils.speakers import (get_speakers, load_speaker_mapping,
|
||||
save_speaker_mapping)
|
||||
from TTS.tts.utils.synthesis import synthesis
|
||||
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.utils.console_logger import ConsoleLogger
|
||||
from TTS.utils.generic_utils import (KeepAverage, count_parameters,
|
||||
create_experiment_folder, get_git_branch,
|
||||
remove_experiment_folder, set_init_dict)
|
||||
from TTS.utils.io import copy_config_file, load_config
|
||||
from TTS.utils.radam import RAdam
|
||||
from TTS.utils.tensorboard_logger import TensorboardLogger
|
||||
from TTS.utils.training import (NoamLR, adam_weight_decay, check_update,
|
||||
gradual_training_scheduler, set_weight_decay,
|
||||
setup_torch_training_env)
|
||||
|
||||
use_cuda, num_gpus = setup_torch_training_env(True, False)
|
||||
|
|
@ -8,30 +8,23 @@ from inspect import signature
|
|||
|
||||
import torch
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||
from mozilla_voice_tts.utils.console_logger import ConsoleLogger
|
||||
from mozilla_voice_tts.utils.generic_utils import (KeepAverage,
|
||||
count_parameters,
|
||||
create_experiment_folder,
|
||||
get_git_branch,
|
||||
remove_experiment_folder,
|
||||
set_init_dict)
|
||||
from mozilla_voice_tts.utils.io import copy_config_file, load_config
|
||||
from mozilla_voice_tts.utils.radam import RAdam
|
||||
from mozilla_voice_tts.utils.tensorboard_logger import TensorboardLogger
|
||||
from mozilla_voice_tts.utils.training import setup_torch_training_env
|
||||
from mozilla_voice_tts.vocoder.datasets.gan_dataset import GANDataset
|
||||
from mozilla_voice_tts.vocoder.datasets.preprocess import (load_wav_data,
|
||||
load_wav_feat_data)
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.utils.console_logger import ConsoleLogger
|
||||
from TTS.utils.generic_utils import (KeepAverage, count_parameters,
|
||||
create_experiment_folder, get_git_branch,
|
||||
remove_experiment_folder, set_init_dict)
|
||||
from TTS.utils.io import copy_config_file, load_config
|
||||
from TTS.utils.radam import RAdam
|
||||
from TTS.utils.tensorboard_logger import TensorboardLogger
|
||||
from TTS.utils.training import setup_torch_training_env
|
||||
from TTS.vocoder.datasets.gan_dataset import GANDataset
|
||||
from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
|
||||
# from distribute import (DistributedSampler, apply_gradient_allreduce,
|
||||
# init_distributed, reduce_tensor)
|
||||
from mozilla_voice_tts.vocoder.layers.losses import (DiscriminatorLoss,
|
||||
GeneratorLoss)
|
||||
from mozilla_voice_tts.vocoder.utils.generic_utils import (plot_results,
|
||||
setup_discriminator,
|
||||
setup_generator)
|
||||
from mozilla_voice_tts.vocoder.utils.io import save_best_model, save_checkpoint
|
||||
from TTS.vocoder.layers.losses import DiscriminatorLoss, GeneratorLoss
|
||||
from TTS.vocoder.utils.generic_utils import (plot_results, setup_discriminator,
|
||||
setup_generator)
|
||||
from TTS.vocoder.utils.io import save_best_model, save_checkpoint
|
||||
|
||||
use_cuda, num_gpus = setup_torch_training_env(True, True)
|
||||
|
|
@ -15,7 +15,7 @@ If you have the environment set already for TTS, then you can directly call ```s
|
|||
3. source /tmp/venv/bin/activate
|
||||
4. pip install -U pip setuptools wheel
|
||||
5. pip install -U https//example.com/url/to/python/package.whl
|
||||
6. python -m mozilla_voice_tts.server.server
|
||||
6. python -m TTS.server.server
|
||||
|
||||
You can now open http://localhost:5002 in a browser
|
||||
|
|
@ -3,7 +3,7 @@ import argparse
|
|||
import os
|
||||
|
||||
from flask import Flask, request, render_template, send_file
|
||||
from mozilla_voice_tts.server.synthesizer import Synthesizer
|
||||
from TTS.server.synthesizer import Synthesizer
|
||||
|
||||
|
||||
def create_argparser():
|
||||
|
@ -18,8 +18,8 @@ def create_argparser():
|
|||
parser.add_argument('--wavernn_checkpoint', type=str, default=None, help='path to WaveRNN checkpoint file.')
|
||||
parser.add_argument('--wavernn_config', type=str, default=None, help='path to WaveRNN config file.')
|
||||
parser.add_argument('--is_wavernn_batched', type=convert_boolean, default=False, help='true to use batched WaveRNN.')
|
||||
parser.add_argument('--vocoder_config', type=str, default=None, help='path to mozilla_voice_tts.vocoder config file.')
|
||||
parser.add_argument('--vocoder_checkpoint', type=str, default=None, help='path to mozilla_voice_tts.vocoder checkpoint file.')
|
||||
parser.add_argument('--vocoder_config', type=str, default=None, help='path to TTS.vocoder config file.')
|
||||
parser.add_argument('--vocoder_checkpoint', type=str, default=None, help='path to TTS.vocoder checkpoint file.')
|
||||
parser.add_argument('--port', type=int, default=5002, help='port to listen on.')
|
||||
parser.add_argument('--use_cuda', type=convert_boolean, default=False, help='true to use CUDA.')
|
||||
parser.add_argument('--debug', type=convert_boolean, default=False, help='true to enable Flask debug mode.')
|
|
@ -6,16 +6,16 @@ import numpy as np
|
|||
import torch
|
||||
import pysbd
|
||||
|
||||
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||
from mozilla_voice_tts.utils.io import load_config
|
||||
from mozilla_voice_tts.tts.utils.generic_utils import setup_model
|
||||
from mozilla_voice_tts.tts.utils.speakers import load_speaker_mapping
|
||||
from mozilla_voice_tts.vocoder.utils.generic_utils import setup_generator
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.utils.io import load_config
|
||||
from TTS.tts.utils.generic_utils import setup_model
|
||||
from TTS.tts.utils.speakers import load_speaker_mapping
|
||||
from TTS.vocoder.utils.generic_utils import setup_generator
|
||||
# pylint: disable=unused-wildcard-import
|
||||
# pylint: disable=wildcard-import
|
||||
from mozilla_voice_tts.tts.utils.synthesis import *
|
||||
from TTS.tts.utils.synthesis import *
|
||||
|
||||
from mozilla_voice_tts.tts.utils.text import make_symbols, phonemes, symbols
|
||||
from TTS.tts.utils.text import make_symbols, phonemes, symbols
|
||||
|
||||
|
||||
class Synthesizer(object):
|
|
@ -10,7 +10,7 @@ Below is an example showing embedding results of various speakers. You can gener
|
|||
|
||||
Download a pretrained model from [Released Models](https://github.com/mozilla/TTS/wiki/Released-Models) page.
|
||||
|
||||
To run the code, you need to follow the same flow as in mozilla_voice_tts.
|
||||
To run the code, you need to follow the same flow as in TTS.
|
||||
|
||||
- Define 'config.json' for your needs. Note that, audio parameters should match your TTS model.
|
||||
- Example training call ```python speaker_encoder/train.py --config_path speaker_encoder/config.json --data_path ~/Data/Libri-TTS/train-clean-360```
|
|
@ -6,9 +6,9 @@ import numpy as np
|
|||
from tqdm import tqdm
|
||||
|
||||
import torch
|
||||
from mozilla_voice_tts.speaker_encoder.model import SpeakerEncoder
|
||||
from mozilla_voice_tts.tts.utils.audio import AudioProcessor
|
||||
from mozilla_voice_tts.tts.utils.generic_utils import load_config
|
||||
from TTS.speaker_encoder.model import SpeakerEncoder
|
||||
from TTS.tts.utils.audio import AudioProcessor
|
||||
from TTS.tts.utils.generic_utils import load_config
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Compute embedding vectors for each wav file in a dataset. ')
|
|
@ -9,7 +9,7 @@ class MyDataset(Dataset):
|
|||
num_utter_per_speaker=10, skip_speakers=False, verbose=False):
|
||||
"""
|
||||
Args:
|
||||
ap (mozilla_voice_tts.tts.utils.AudioProcessor): audio processor object.
|
||||
ap (TTS.tts.utils.AudioProcessor): audio processor object.
|
||||
meta_data (list): list of dataset instances.
|
||||
seq_len (int): voice segment length in seconds.
|
||||
verbose (bool): print diagnostic information.
|
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 24 KiB |
|
@ -5,8 +5,8 @@ import torch
|
|||
import random
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
from mozilla_voice_tts.tts.utils.text import text_to_sequence, phoneme_to_sequence, pad_with_eos_bos
|
||||
from mozilla_voice_tts.tts.utils.data import prepare_data, prepare_tensor, prepare_stop_target
|
||||
from TTS.tts.utils.text import text_to_sequence, phoneme_to_sequence, pad_with_eos_bos
|
||||
from TTS.tts.utils.data import prepare_data, prepare_tensor, prepare_stop_target
|
||||
|
||||
|
||||
class MyDataset(Dataset):
|
||||
|
@ -31,7 +31,7 @@ class MyDataset(Dataset):
|
|||
outputs_per_step (int): number of time frames predicted per step.
|
||||
text_cleaner (str): text cleaner used for the dataset.
|
||||
compute_linear_spec (bool): compute linear spectrogram if True.
|
||||
ap (mozilla_voice_tts.tts.utils.AudioProcessor): audio processor object.
|
||||
ap (TTS.tts.utils.AudioProcessor): audio processor object.
|
||||
meta_data (list): list of dataset instances.
|
||||
batch_group_size (int): (0) range of batch randomization after sorting
|
||||
sequences by length.
|
|
@ -2,7 +2,7 @@ import os
|
|||
from glob import glob
|
||||
import re
|
||||
import sys
|
||||
from mozilla_voice_tts.tts.utils.generic_utils import split_dataset
|
||||
from TTS.tts.utils.generic_utils import split_dataset
|
||||
|
||||
|
||||
def load_meta_data(datasets):
|
|
@ -2,7 +2,7 @@ import numpy as np
|
|||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional
|
||||
from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask
|
||||
from TTS.tts.utils.generic_utils import sequence_mask
|
||||
|
||||
|
||||
class L1LossMasked(nn.Module):
|
|
@ -2,9 +2,9 @@
|
|||
import torch
|
||||
from torch import nn
|
||||
|
||||
from mozilla_voice_tts.tts.layers.gst_layers import GST
|
||||
from mozilla_voice_tts.tts.layers.tacotron import Decoder, Encoder, PostCBHG
|
||||
from mozilla_voice_tts.tts.models.tacotron_abstract import TacotronAbstract
|
||||
from TTS.tts.layers.gst_layers import GST
|
||||
from TTS.tts.layers.tacotron import Decoder, Encoder, PostCBHG
|
||||
from TTS.tts.models.tacotron_abstract import TacotronAbstract
|
||||
|
||||
|
||||
class Tacotron(TacotronAbstract):
|
|
@ -1,9 +1,9 @@
|
|||
import torch
|
||||
from torch import nn
|
||||
|
||||
from mozilla_voice_tts.tts.layers.gst_layers import GST
|
||||
from mozilla_voice_tts.tts.layers.tacotron2 import Decoder, Encoder, Postnet
|
||||
from mozilla_voice_tts.tts.models.tacotron_abstract import TacotronAbstract
|
||||
from TTS.tts.layers.gst_layers import GST
|
||||
from TTS.tts.layers.tacotron2 import Decoder, Encoder, Postnet
|
||||
from TTS.tts.models.tacotron_abstract import TacotronAbstract
|
||||
|
||||
# TODO: match function arguments with tacotron
|
||||
class Tacotron2(TacotronAbstract):
|
|
@ -4,7 +4,7 @@ from abc import ABC, abstractmethod
|
|||
import torch
|
||||
from torch import nn
|
||||
|
||||
from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask
|
||||
from TTS.tts.utils.generic_utils import sequence_mask
|
||||
|
||||
|
||||
class TacotronAbstract(ABC, nn.Module):
|
|
@ -1,7 +1,7 @@
|
|||
import tensorflow as tf
|
||||
from tensorflow import keras
|
||||
from mozilla_voice_tts.tts.tf.utils.tf_utils import shape_list
|
||||
from mozilla_voice_tts.tts.tf.layers.common_layers import Prenet, Attention
|
||||
from TTS.tts.tf.utils.tf_utils import shape_list
|
||||
from TTS.tts.tf.layers.common_layers import Prenet, Attention
|
||||
# from tensorflow_addons.seq2seq import AttentionWrapper
|
||||
|
||||
# NOTE: linter has a problem with the current TF release
|
|
@ -1,8 +1,8 @@
|
|||
import tensorflow as tf
|
||||
from tensorflow import keras
|
||||
|
||||
from mozilla_voice_tts.tts.tf.layers.tacotron2 import Encoder, Decoder, Postnet
|
||||
from mozilla_voice_tts.tts.tf.utils.tf_utils import shape_list
|
||||
from TTS.tts.tf.layers.tacotron2 import Encoder, Decoder, Postnet
|
||||
from TTS.tts.tf.utils.tf_utils import shape_list
|
||||
|
||||
|
||||
#pylint: disable=too-many-ancestors, abstract-method
|
|
@ -77,7 +77,7 @@ def count_parameters(model, c):
|
|||
|
||||
def setup_model(num_chars, num_speakers, c, enable_tflite=False):
|
||||
print(" > Using model: {}".format(c.model))
|
||||
MyModel = importlib.import_module('mozilla_voice_tts.tts.tf.models.' + c.model.lower())
|
||||
MyModel = importlib.import_module('TTS.tts.tf.models.' + c.model.lower())
|
||||
MyModel = getattr(MyModel, c.model)
|
||||
if c.model.lower() in "tacotron":
|
||||
raise NotImplementedError(' [!] Tacotron model is not ready.')
|
|
@ -3,7 +3,7 @@ import importlib
|
|||
import numpy as np
|
||||
from collections import Counter
|
||||
|
||||
from mozilla_voice_tts.utils.generic_utils import check_argument
|
||||
from TTS.utils.generic_utils import check_argument
|
||||
|
||||
|
||||
def split_dataset(items):
|
||||
|
@ -46,7 +46,7 @@ def sequence_mask(sequence_length, max_len=None):
|
|||
|
||||
def setup_model(num_chars, num_speakers, c, speaker_embedding_dim=None):
|
||||
print(" > Using model: {}".format(c.model))
|
||||
MyModel = importlib.import_module('mozilla_voice_tts.tts.models.' + c.model.lower())
|
||||
MyModel = importlib.import_module('TTS.tts.models.' + c.model.lower())
|
||||
MyModel = getattr(MyModel, c.model)
|
||||
if c.model.lower() in "tacotron":
|
||||
model = MyModel(num_chars=num_chars,
|
||||
|
@ -197,7 +197,7 @@ def check_config(c):
|
|||
|
||||
# dataloading
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from mozilla_voice_tts.tts.utils.text import cleaners
|
||||
from TTS.tts.utils.text import cleaners
|
||||
check_argument('text_cleaner', c, restricted=True, val_type=str, enum_list=dir(cleaners))
|
||||
check_argument('enable_eos_bos_chars', c, restricted=True, val_type=bool)
|
||||
check_argument('num_loader_workers', c, restricted=True, val_type=int, min_val=0)
|
|
@ -183,11 +183,11 @@ def synthesis(model,
|
|||
"""Synthesize voice for the given text.
|
||||
|
||||
Args:
|
||||
model (mozilla_voice_tts.tts.models): model to synthesize.
|
||||
model (TTS.tts.models): model to synthesize.
|
||||
text (str): target text
|
||||
CONFIG (dict): config dictionary to be loaded from config.json.
|
||||
use_cuda (bool): enable cuda.
|
||||
ap (mozilla_voice_tts.tts.utils.audio.AudioProcessor): audio processor to process
|
||||
ap (TTS.tts.utils.audio.AudioProcessor): audio processor to process
|
||||
model outputs.
|
||||
speaker_id (int): id of speaker
|
||||
style_wav (str): Uses for style embedding of GST.
|
|
@ -4,8 +4,8 @@ import re
|
|||
from packaging import version
|
||||
import phonemizer
|
||||
from phonemizer.phonemize import phonemize
|
||||
from mozilla_voice_tts.tts.utils.text import cleaners
|
||||
from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, symbols, phonemes, _phoneme_punctuations, _bos, \
|
||||
from TTS.tts.utils.text import cleaners
|
||||
from TTS.tts.utils.text.symbols import make_symbols, symbols, phonemes, _phoneme_punctuations, _bos, \
|
||||
_eos
|
||||
|
||||
# pylint: disable=unnecessary-comprehension
|
|
@ -3,7 +3,7 @@ import librosa
|
|||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
import matplotlib.pyplot as plt
|
||||
from mozilla_voice_tts.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme
|
||||
from TTS.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme
|
||||
|
||||
|
||||
def plot_alignment(alignment, info=None, fig_size=(16, 10), title=None, output_fig=False):
|
|
@ -5,7 +5,7 @@ import scipy.io.wavfile
|
|||
import scipy.signal
|
||||
import pyworld as pw
|
||||
|
||||
from mozilla_voice_tts.tts.utils.data import StandardScaler
|
||||
from TTS.tts.utils.data import StandardScaler
|
||||
|
||||
#pylint: disable=too-many-public-methods
|
||||
class AudioProcessor(object):
|
|
@ -1,5 +1,5 @@
|
|||
import datetime
|
||||
from mozilla_voice_tts.utils.io import AttrDict
|
||||
from TTS.utils.io import AttrDict
|
||||
|
||||
|
||||
tcolors = AttrDict({
|
|
@ -2,7 +2,7 @@ import torch
|
|||
from torch import nn
|
||||
from torch.nn.utils import weight_norm
|
||||
|
||||
from mozilla_voice_tts.vocoder.layers.melgan import ResidualStack
|
||||
from TTS.vocoder.layers.melgan import ResidualStack
|
||||
|
||||
|
||||
class MelganGenerator(nn.Module):
|
|
@ -1,6 +1,6 @@
|
|||
from torch import nn
|
||||
|
||||
from mozilla_voice_tts.vocoder.models.melgan_discriminator import MelganDiscriminator
|
||||
from TTS.vocoder.models.melgan_discriminator import MelganDiscriminator
|
||||
|
||||
|
||||
class MelganMultiscaleDiscriminator(nn.Module):
|
|
@ -1,7 +1,7 @@
|
|||
import torch
|
||||
|
||||
from mozilla_voice_tts.vocoder.models.melgan_generator import MelganGenerator
|
||||
from mozilla_voice_tts.vocoder.layers.pqmf import PQMF
|
||||
from TTS.vocoder.models.melgan_generator import MelganGenerator
|
||||
from TTS.vocoder.layers.pqmf import PQMF
|
||||
|
||||
|
||||
class MultibandMelganGenerator(MelganGenerator):
|
|
@ -2,7 +2,7 @@ import math
|
|||
import torch
|
||||
from torch import nn
|
||||
|
||||
from mozilla_voice_tts.vocoder.layers.parallel_wavegan import ResidualBlock
|
||||
from TTS.vocoder.layers.parallel_wavegan import ResidualBlock
|
||||
|
||||
|
||||
class ParallelWaveganDiscriminator(nn.Module):
|
|
@ -2,8 +2,8 @@ import math
|
|||
import numpy as np
|
||||
import torch
|
||||
|
||||
from mozilla_voice_tts.vocoder.layers.parallel_wavegan import ResidualBlock
|
||||
from mozilla_voice_tts.vocoder.layers.upsample import ConvUpsample
|
||||
from TTS.vocoder.layers.parallel_wavegan import ResidualBlock
|
||||
from TTS.vocoder.layers.upsample import ConvUpsample
|
||||
|
||||
|
||||
class ParallelWaveganGenerator(torch.nn.Module):
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue