glow-tts module renaming updates

This commit is contained in:
erogol 2020-09-12 03:33:36 +02:00
parent 0ffe91b21d
commit 15e6ab3912
14 changed files with 1254 additions and 1254 deletions

View File

@ -13,30 +13,30 @@ import torch
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from torch.nn.parallel import DistributedDataParallel as DDP from torch.nn.parallel import DistributedDataParallel as DDP
from mozilla_voice_tts.tts.datasets.preprocess import load_meta_data from TTS.tts.datasets.preprocess import load_meta_data
from mozilla_voice_tts.tts.datasets.TTSDataset import MyDataset from TTS.tts.datasets.TTSDataset import MyDataset
from mozilla_voice_tts.tts.layers.losses import GlowTTSLoss from TTS.tts.layers.losses import GlowTTSLoss
from mozilla_voice_tts.utils.console_logger import ConsoleLogger from TTS.utils.console_logger import ConsoleLogger
from mozilla_voice_tts.tts.utils.distribute import (DistributedSampler, from TTS.tts.utils.distribute import (DistributedSampler,
init_distributed, init_distributed,
reduce_tensor) reduce_tensor)
from mozilla_voice_tts.tts.utils.generic_utils import check_config, setup_model from TTS.tts.utils.generic_utils import check_config, setup_model
from mozilla_voice_tts.tts.utils.io import save_best_model, save_checkpoint from TTS.tts.utils.io import save_best_model, save_checkpoint
from mozilla_voice_tts.tts.utils.measures import alignment_diagonal_score from TTS.tts.utils.measures import alignment_diagonal_score
from mozilla_voice_tts.tts.utils.speakers import (get_speakers, from TTS.tts.utils.speakers import (get_speakers,
load_speaker_mapping, load_speaker_mapping,
save_speaker_mapping) save_speaker_mapping)
from mozilla_voice_tts.tts.utils.synthesis import synthesis from TTS.tts.utils.synthesis import synthesis
from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, phonemes, symbols from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
from mozilla_voice_tts.tts.utils.visual import plot_alignment, plot_spectrogram from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
from mozilla_voice_tts.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
from mozilla_voice_tts.utils.generic_utils import ( from TTS.utils.generic_utils import (
KeepAverage, count_parameters, create_experiment_folder, get_git_branch, KeepAverage, count_parameters, create_experiment_folder, get_git_branch,
remove_experiment_folder, set_init_dict) remove_experiment_folder, set_init_dict)
from mozilla_voice_tts.utils.io import copy_config_file, load_config from TTS.utils.io import copy_config_file, load_config
from mozilla_voice_tts.utils.radam import RAdam from TTS.utils.radam import RAdam
from mozilla_voice_tts.utils.tensorboard_logger import TensorboardLogger from TTS.utils.tensorboard_logger import TensorboardLogger
from mozilla_voice_tts.utils.training import (NoamLR, adam_weight_decay, from TTS.utils.training import (NoamLR, adam_weight_decay,
check_update, check_update,
gradual_training_scheduler, gradual_training_scheduler,
set_weight_decay, set_weight_decay,

View File

@ -2,8 +2,8 @@ import torch
from torch import nn from torch import nn
from torch.nn import functional as F from torch.nn import functional as F
from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask from TTS.tts.utils.generic_utils import sequence_mask
from mozilla_voice_tts.tts.layers.glow_tts.glow import InvConvNear, CouplingBlock, ActNorm from TTS.tts.layers.glow_tts.glow import InvConvNear, CouplingBlock, ActNorm
def squeeze(x, x_mask=None, num_sqz=2): def squeeze(x, x_mask=None, num_sqz=2):

View File

@ -2,10 +2,10 @@ import math
import torch import torch
from torch import nn from torch import nn
from mozilla_voice_tts.tts.layers.glow_tts.transformer import Transformer from TTS.tts.layers.glow_tts.transformer import Transformer
from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask from TTS.tts.utils.generic_utils import sequence_mask
from mozilla_voice_tts.tts.layers.glow_tts.glow import ConvLayerNorm, LayerNorm from TTS.tts.layers.glow_tts.glow import ConvLayerNorm, LayerNorm
from mozilla_voice_tts.tts.layers.glow_tts.duration_predictor import DurationPredictor from TTS.tts.layers.glow_tts.duration_predictor import DurationPredictor
class GatedConvBlock(nn.Module): class GatedConvBlock(nn.Module):

View File

@ -1,8 +1,8 @@
import numpy as np import numpy as np
import torch import torch
from torch.nn import functional as F from torch.nn import functional as F
from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask from TTS.tts.utils.generic_utils import sequence_mask
from mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core import maximum_path_c from TTS.tts.layers.glow_tts.monotonic_align.core import maximum_path_c
def convert_pad_shape(pad_shape): def convert_pad_shape(pad_shape):

View File

@ -4,12 +4,12 @@
{ {
"distutils": { "distutils": {
"depends": [], "depends": [],
"name": "mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core", "name": "TTS.tts.layers.glow_tts.monotonic_align.core",
"sources": [ "sources": [
"core.pyx" "core.pyx"
] ]
}, },
"module_name": "mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core" "module_name": "TTS.tts.layers.glow_tts.monotonic_align.core"
} }
END: Cython Metadata */ END: Cython Metadata */
@ -2130,7 +2130,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *, cha
/* Module declarations from 'cython' */ /* Module declarations from 'cython' */
/* Module declarations from 'mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core' */ /* Module declarations from 'TTS.tts.layers.glow_tts.monotonic_align.core' */
static PyTypeObject *__pyx_array_type = 0; static PyTypeObject *__pyx_array_type = 0;
static PyTypeObject *__pyx_MemviewEnum_type = 0; static PyTypeObject *__pyx_MemviewEnum_type = 0;
static PyTypeObject *__pyx_memoryview_type = 0; static PyTypeObject *__pyx_memoryview_type = 0;
@ -2179,11 +2179,11 @@ static void __pyx_memoryview__slice_assign_scalar(char *, Py_ssize_t *, Py_ssize
static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *, PyObject *); /*proto*/ static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *, PyObject *); /*proto*/
static __Pyx_TypeInfo __Pyx_TypeInfo_int = { "int", NULL, sizeof(int), { 0 }, 0, IS_UNSIGNED(int) ? 'U' : 'I', IS_UNSIGNED(int), 0 }; static __Pyx_TypeInfo __Pyx_TypeInfo_int = { "int", NULL, sizeof(int), { 0 }, 0, IS_UNSIGNED(int) ? 'U' : 'I', IS_UNSIGNED(int), 0 };
static __Pyx_TypeInfo __Pyx_TypeInfo_float = { "float", NULL, sizeof(float), { 0 }, 0, 'R', 0, 0 }; static __Pyx_TypeInfo __Pyx_TypeInfo_float = { "float", NULL, sizeof(float), { 0 }, 0, 'R', 0, 0 };
#define __Pyx_MODULE_NAME "mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core" #define __Pyx_MODULE_NAME "TTS.tts.layers.glow_tts.monotonic_align.core"
extern int __pyx_module_is_main_TTS__tts__layers__glow_tts__monotonic_align__core; extern int __pyx_module_is_main_TTS__tts__layers__glow_tts__monotonic_align__core;
int __pyx_module_is_main_TTS__tts__layers__glow_tts__monotonic_align__core = 0; int __pyx_module_is_main_TTS__tts__layers__glow_tts__monotonic_align__core = 0;
/* Implementation of 'mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core' */ /* Implementation of 'TTS.tts.layers.glow_tts.monotonic_align.core' */
static PyObject *__pyx_builtin_range; static PyObject *__pyx_builtin_range;
static PyObject *__pyx_builtin_ValueError; static PyObject *__pyx_builtin_ValueError;
static PyObject *__pyx_builtin_RuntimeError; static PyObject *__pyx_builtin_RuntimeError;
@ -3047,7 +3047,7 @@ __pyx_t_6 = __pyx_v_i;
__pyx_L1_error:; __pyx_L1_error:;
__PYX_XDEC_MEMVIEW(&__pyx_t_4, 0); __PYX_XDEC_MEMVIEW(&__pyx_t_4, 0);
__PYX_XDEC_MEMVIEW(&__pyx_t_5, 0); __PYX_XDEC_MEMVIEW(&__pyx_t_5, 0);
__Pyx_WriteUnraisable("mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename, 1, 1); __Pyx_WriteUnraisable("TTS.tts.layers.glow_tts.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename, 1, 1);
__pyx_L0:; __pyx_L0:;
} }
@ -3141,7 +3141,7 @@ static PyObject *__pyx_pw_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_1m
__pyx_L5_argtuple_error:; __pyx_L5_argtuple_error:;
__Pyx_RaiseArgtupleInvalid("maximum_path_c", 0, 4, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 40, __pyx_L3_error) __Pyx_RaiseArgtupleInvalid("maximum_path_c", 0, 4, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 40, __pyx_L3_error)
__pyx_L3_error:; __pyx_L3_error:;
__Pyx_AddTraceback("mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_AddTraceback("TTS.tts.layers.glow_tts.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext(); __Pyx_RefNannyFinishContext();
return NULL; return NULL;
__pyx_L4_argument_unpacking_done:; __pyx_L4_argument_unpacking_done:;
@ -3175,7 +3175,7 @@ static PyObject *__pyx_pf_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_ma
/* function exit code */ /* function exit code */
__pyx_L1_error:; __pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_2);
__Pyx_AddTraceback("mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_AddTraceback("TTS.tts.layers.glow_tts.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL; __pyx_r = NULL;
__pyx_L0:; __pyx_L0:;
__PYX_XDEC_MEMVIEW(&__pyx_v_paths, 1); __PYX_XDEC_MEMVIEW(&__pyx_v_paths, 1);
@ -18550,7 +18550,7 @@ static PyBufferProcs __pyx_tp_as_buffer_array = {
static PyTypeObject __pyx_type___pyx_array = { static PyTypeObject __pyx_type___pyx_array = {
PyVarObject_HEAD_INIT(0, 0) PyVarObject_HEAD_INIT(0, 0)
"mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core.array", /*tp_name*/ "TTS.tts.layers.glow_tts.monotonic_align.core.array", /*tp_name*/
sizeof(struct __pyx_array_obj), /*tp_basicsize*/ sizeof(struct __pyx_array_obj), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
__pyx_tp_dealloc_array, /*tp_dealloc*/ __pyx_tp_dealloc_array, /*tp_dealloc*/
@ -18669,7 +18669,7 @@ static PyMethodDef __pyx_methods_Enum[] = {
static PyTypeObject __pyx_type___pyx_MemviewEnum = { static PyTypeObject __pyx_type___pyx_MemviewEnum = {
PyVarObject_HEAD_INIT(0, 0) PyVarObject_HEAD_INIT(0, 0)
"mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core.Enum", /*tp_name*/ "TTS.tts.layers.glow_tts.monotonic_align.core.Enum", /*tp_name*/
sizeof(struct __pyx_MemviewEnum_obj), /*tp_basicsize*/ sizeof(struct __pyx_MemviewEnum_obj), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
__pyx_tp_dealloc_Enum, /*tp_dealloc*/ __pyx_tp_dealloc_Enum, /*tp_dealloc*/
@ -18930,7 +18930,7 @@ static PyBufferProcs __pyx_tp_as_buffer_memoryview = {
static PyTypeObject __pyx_type___pyx_memoryview = { static PyTypeObject __pyx_type___pyx_memoryview = {
PyVarObject_HEAD_INIT(0, 0) PyVarObject_HEAD_INIT(0, 0)
"mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core.memoryview", /*tp_name*/ "TTS.tts.layers.glow_tts.monotonic_align.core.memoryview", /*tp_name*/
sizeof(struct __pyx_memoryview_obj), /*tp_basicsize*/ sizeof(struct __pyx_memoryview_obj), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
__pyx_tp_dealloc_memoryview, /*tp_dealloc*/ __pyx_tp_dealloc_memoryview, /*tp_dealloc*/
@ -19068,7 +19068,7 @@ static struct PyGetSetDef __pyx_getsets__memoryviewslice[] = {
static PyTypeObject __pyx_type___pyx_memoryviewslice = { static PyTypeObject __pyx_type___pyx_memoryviewslice = {
PyVarObject_HEAD_INIT(0, 0) PyVarObject_HEAD_INIT(0, 0)
"mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core._memoryviewslice", /*tp_name*/ "TTS.tts.layers.glow_tts.monotonic_align.core._memoryviewslice", /*tp_name*/
sizeof(struct __pyx_memoryviewslice_obj), /*tp_basicsize*/ sizeof(struct __pyx_memoryviewslice_obj), /*tp_basicsize*/
0, /*tp_itemsize*/ 0, /*tp_itemsize*/
__pyx_tp_dealloc__memoryviewslice, /*tp_dealloc*/ __pyx_tp_dealloc__memoryviewslice, /*tp_dealloc*/
@ -20001,8 +20001,8 @@ if (!__Pyx_RefNanny) {
#if PY_MAJOR_VERSION >= 3 #if PY_MAJOR_VERSION >= 3
{ {
PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error)
if (!PyDict_GetItemString(modules, "mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core")) { if (!PyDict_GetItemString(modules, "TTS.tts.layers.glow_tts.monotonic_align.core")) {
if (unlikely(PyDict_SetItemString(modules, "mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error) if (unlikely(PyDict_SetItemString(modules, "TTS.tts.layers.glow_tts.monotonic_align.core", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
} }
} }
#endif #endif
@ -20213,11 +20213,11 @@ if (!__Pyx_RefNanny) {
__Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_1);
if (__pyx_m) { if (__pyx_m) {
if (__pyx_d) { if (__pyx_d) {
__Pyx_AddTraceback("init mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_AddTraceback("init TTS.tts.layers.glow_tts.monotonic_align.core", __pyx_clineno, __pyx_lineno, __pyx_filename);
} }
Py_CLEAR(__pyx_m); Py_CLEAR(__pyx_m);
} else if (!PyErr_Occurred()) { } else if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ImportError, "init mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core"); PyErr_SetString(PyExc_ImportError, "init TTS.tts.layers.glow_tts.monotonic_align.core");
} }
__pyx_L0:; __pyx_L0:;
__Pyx_RefNannyFinishContext(); __Pyx_RefNannyFinishContext();

View File

@ -5,7 +5,7 @@ import torch
from torch import nn from torch import nn
from torch.nn import functional as F from torch.nn import functional as F
from mozilla_voice_tts.tts.layers.glow_tts.glow import LayerNorm from TTS.tts.layers.glow_tts.glow import LayerNorm
class RelativePositionMultiHeadAttention(nn.Module): class RelativePositionMultiHeadAttention(nn.Module):

View File

@ -3,10 +3,10 @@ import torch
from torch import nn from torch import nn
from torch.nn import functional as F from torch.nn import functional as F
from mozilla_voice_tts.tts.layers.glow_tts.encoder import Encoder from TTS.tts.layers.glow_tts.encoder import Encoder
from mozilla_voice_tts.tts.layers.glow_tts.decoder import Decoder from TTS.tts.layers.glow_tts.decoder import Decoder
from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask from TTS.tts.utils.generic_utils import sequence_mask
from mozilla_voice_tts.tts.layers.glow_tts.monotonic_align import maximum_path, generate_path from TTS.tts.layers.glow_tts.monotonic_align import maximum_path, generate_path
class GlowTts(nn.Module): class GlowTts(nn.Module):

View File

@ -49,7 +49,7 @@ def to_camel(text):
def setup_model(num_chars, num_speakers, c, speaker_embedding_dim=None): def setup_model(num_chars, num_speakers, c, speaker_embedding_dim=None):
print(" > Using model: {}".format(c.model)) print(" > Using model: {}".format(c.model))
MyModel = importlib.import_module('TTS.tts.models.' + c.model.lower()) MyModel = importlib.import_module('TTS.tts.models.' + c.model.lower())
MyModel = getattr(MyModel, c.model) MyModel = getattr(MyModel, to_camel(c.model))
if c.model.lower() in "tacotron": if c.model.lower() in "tacotron":
model = MyModel(num_chars=num_chars, model = MyModel(num_chars=num_chars,
num_speakers=num_speakers, num_speakers=num_speakers,

View File

@ -5,8 +5,8 @@ import pickle as pickle_tts
class RenamingUnpickler(pickle_tts.Unpickler): class RenamingUnpickler(pickle_tts.Unpickler):
"""Overload default pickler to solve module renaming problem""" """Overload default pickler to solve module renaming problem"""
def find_class(self, module, name): def find_class(self, module, name):
if 'mozilla_voice_tts' in module : if 'TTS' in module :
module = module.replace('mozilla_voice_tts', 'TTS') module = module.replace('TTS', 'TTS')
return super().find_class(module, name) return super().find_class(module, name)
class AttrDict(dict): class AttrDict(dict):