mirror of https://github.com/coqui-ai/TTS.git
glow-tts module renaming updates
This commit is contained in:
parent
0ffe91b21d
commit
15e6ab3912
|
@ -13,30 +13,30 @@ import torch
|
|||
from torch.utils.data import DataLoader
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
|
||||
from mozilla_voice_tts.tts.datasets.preprocess import load_meta_data
|
||||
from mozilla_voice_tts.tts.datasets.TTSDataset import MyDataset
|
||||
from mozilla_voice_tts.tts.layers.losses import GlowTTSLoss
|
||||
from mozilla_voice_tts.utils.console_logger import ConsoleLogger
|
||||
from mozilla_voice_tts.tts.utils.distribute import (DistributedSampler,
|
||||
from TTS.tts.datasets.preprocess import load_meta_data
|
||||
from TTS.tts.datasets.TTSDataset import MyDataset
|
||||
from TTS.tts.layers.losses import GlowTTSLoss
|
||||
from TTS.utils.console_logger import ConsoleLogger
|
||||
from TTS.tts.utils.distribute import (DistributedSampler,
|
||||
init_distributed,
|
||||
reduce_tensor)
|
||||
from mozilla_voice_tts.tts.utils.generic_utils import check_config, setup_model
|
||||
from mozilla_voice_tts.tts.utils.io import save_best_model, save_checkpoint
|
||||
from mozilla_voice_tts.tts.utils.measures import alignment_diagonal_score
|
||||
from mozilla_voice_tts.tts.utils.speakers import (get_speakers,
|
||||
from TTS.tts.utils.generic_utils import check_config, setup_model
|
||||
from TTS.tts.utils.io import save_best_model, save_checkpoint
|
||||
from TTS.tts.utils.measures import alignment_diagonal_score
|
||||
from TTS.tts.utils.speakers import (get_speakers,
|
||||
load_speaker_mapping,
|
||||
save_speaker_mapping)
|
||||
from mozilla_voice_tts.tts.utils.synthesis import synthesis
|
||||
from mozilla_voice_tts.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||
from mozilla_voice_tts.tts.utils.visual import plot_alignment, plot_spectrogram
|
||||
from mozilla_voice_tts.utils.audio import AudioProcessor
|
||||
from mozilla_voice_tts.utils.generic_utils import (
|
||||
from TTS.tts.utils.synthesis import synthesis
|
||||
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.utils.generic_utils import (
|
||||
KeepAverage, count_parameters, create_experiment_folder, get_git_branch,
|
||||
remove_experiment_folder, set_init_dict)
|
||||
from mozilla_voice_tts.utils.io import copy_config_file, load_config
|
||||
from mozilla_voice_tts.utils.radam import RAdam
|
||||
from mozilla_voice_tts.utils.tensorboard_logger import TensorboardLogger
|
||||
from mozilla_voice_tts.utils.training import (NoamLR, adam_weight_decay,
|
||||
from TTS.utils.io import copy_config_file, load_config
|
||||
from TTS.utils.radam import RAdam
|
||||
from TTS.utils.tensorboard_logger import TensorboardLogger
|
||||
from TTS.utils.training import (NoamLR, adam_weight_decay,
|
||||
check_update,
|
||||
gradual_training_scheduler,
|
||||
set_weight_decay,
|
|
@ -2,8 +2,8 @@ import torch
|
|||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask
|
||||
from mozilla_voice_tts.tts.layers.glow_tts.glow import InvConvNear, CouplingBlock, ActNorm
|
||||
from TTS.tts.utils.generic_utils import sequence_mask
|
||||
from TTS.tts.layers.glow_tts.glow import InvConvNear, CouplingBlock, ActNorm
|
||||
|
||||
|
||||
def squeeze(x, x_mask=None, num_sqz=2):
|
|
@ -2,10 +2,10 @@ import math
|
|||
import torch
|
||||
from torch import nn
|
||||
|
||||
from mozilla_voice_tts.tts.layers.glow_tts.transformer import Transformer
|
||||
from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask
|
||||
from mozilla_voice_tts.tts.layers.glow_tts.glow import ConvLayerNorm, LayerNorm
|
||||
from mozilla_voice_tts.tts.layers.glow_tts.duration_predictor import DurationPredictor
|
||||
from TTS.tts.layers.glow_tts.transformer import Transformer
|
||||
from TTS.tts.utils.generic_utils import sequence_mask
|
||||
from TTS.tts.layers.glow_tts.glow import ConvLayerNorm, LayerNorm
|
||||
from TTS.tts.layers.glow_tts.duration_predictor import DurationPredictor
|
||||
|
||||
|
||||
class GatedConvBlock(nn.Module):
|
|
@ -1,8 +1,8 @@
|
|||
import numpy as np
|
||||
import torch
|
||||
from torch.nn import functional as F
|
||||
from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask
|
||||
from mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core import maximum_path_c
|
||||
from TTS.tts.utils.generic_utils import sequence_mask
|
||||
from TTS.tts.layers.glow_tts.monotonic_align.core import maximum_path_c
|
||||
|
||||
|
||||
def convert_pad_shape(pad_shape):
|
|
@ -4,12 +4,12 @@
|
|||
{
|
||||
"distutils": {
|
||||
"depends": [],
|
||||
"name": "mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core",
|
||||
"name": "TTS.tts.layers.glow_tts.monotonic_align.core",
|
||||
"sources": [
|
||||
"core.pyx"
|
||||
]
|
||||
},
|
||||
"module_name": "mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core"
|
||||
"module_name": "TTS.tts.layers.glow_tts.monotonic_align.core"
|
||||
}
|
||||
END: Cython Metadata */
|
||||
|
||||
|
@ -2130,7 +2130,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *, cha
|
|||
|
||||
/* Module declarations from 'cython' */
|
||||
|
||||
/* Module declarations from 'mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core' */
|
||||
/* Module declarations from 'TTS.tts.layers.glow_tts.monotonic_align.core' */
|
||||
static PyTypeObject *__pyx_array_type = 0;
|
||||
static PyTypeObject *__pyx_MemviewEnum_type = 0;
|
||||
static PyTypeObject *__pyx_memoryview_type = 0;
|
||||
|
@ -2179,11 +2179,11 @@ static void __pyx_memoryview__slice_assign_scalar(char *, Py_ssize_t *, Py_ssize
|
|||
static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *, PyObject *); /*proto*/
|
||||
static __Pyx_TypeInfo __Pyx_TypeInfo_int = { "int", NULL, sizeof(int), { 0 }, 0, IS_UNSIGNED(int) ? 'U' : 'I', IS_UNSIGNED(int), 0 };
|
||||
static __Pyx_TypeInfo __Pyx_TypeInfo_float = { "float", NULL, sizeof(float), { 0 }, 0, 'R', 0, 0 };
|
||||
#define __Pyx_MODULE_NAME "mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core"
|
||||
#define __Pyx_MODULE_NAME "TTS.tts.layers.glow_tts.monotonic_align.core"
|
||||
extern int __pyx_module_is_main_TTS__tts__layers__glow_tts__monotonic_align__core;
|
||||
int __pyx_module_is_main_TTS__tts__layers__glow_tts__monotonic_align__core = 0;
|
||||
|
||||
/* Implementation of 'mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core' */
|
||||
/* Implementation of 'TTS.tts.layers.glow_tts.monotonic_align.core' */
|
||||
static PyObject *__pyx_builtin_range;
|
||||
static PyObject *__pyx_builtin_ValueError;
|
||||
static PyObject *__pyx_builtin_RuntimeError;
|
||||
|
@ -3047,7 +3047,7 @@ __pyx_t_6 = __pyx_v_i;
|
|||
__pyx_L1_error:;
|
||||
__PYX_XDEC_MEMVIEW(&__pyx_t_4, 0);
|
||||
__PYX_XDEC_MEMVIEW(&__pyx_t_5, 0);
|
||||
__Pyx_WriteUnraisable("mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename, 1, 1);
|
||||
__Pyx_WriteUnraisable("TTS.tts.layers.glow_tts.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename, 1, 1);
|
||||
__pyx_L0:;
|
||||
}
|
||||
|
||||
|
@ -3141,7 +3141,7 @@ static PyObject *__pyx_pw_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_1m
|
|||
__pyx_L5_argtuple_error:;
|
||||
__Pyx_RaiseArgtupleInvalid("maximum_path_c", 0, 4, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 40, __pyx_L3_error)
|
||||
__pyx_L3_error:;
|
||||
__Pyx_AddTraceback("mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
||||
__Pyx_AddTraceback("TTS.tts.layers.glow_tts.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
||||
__Pyx_RefNannyFinishContext();
|
||||
return NULL;
|
||||
__pyx_L4_argument_unpacking_done:;
|
||||
|
@ -3175,7 +3175,7 @@ static PyObject *__pyx_pf_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_ma
|
|||
/* function exit code */
|
||||
__pyx_L1_error:;
|
||||
__Pyx_XDECREF(__pyx_t_2);
|
||||
__Pyx_AddTraceback("mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
||||
__Pyx_AddTraceback("TTS.tts.layers.glow_tts.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
||||
__pyx_r = NULL;
|
||||
__pyx_L0:;
|
||||
__PYX_XDEC_MEMVIEW(&__pyx_v_paths, 1);
|
||||
|
@ -18550,7 +18550,7 @@ static PyBufferProcs __pyx_tp_as_buffer_array = {
|
|||
|
||||
static PyTypeObject __pyx_type___pyx_array = {
|
||||
PyVarObject_HEAD_INIT(0, 0)
|
||||
"mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core.array", /*tp_name*/
|
||||
"TTS.tts.layers.glow_tts.monotonic_align.core.array", /*tp_name*/
|
||||
sizeof(struct __pyx_array_obj), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
__pyx_tp_dealloc_array, /*tp_dealloc*/
|
||||
|
@ -18669,7 +18669,7 @@ static PyMethodDef __pyx_methods_Enum[] = {
|
|||
|
||||
static PyTypeObject __pyx_type___pyx_MemviewEnum = {
|
||||
PyVarObject_HEAD_INIT(0, 0)
|
||||
"mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core.Enum", /*tp_name*/
|
||||
"TTS.tts.layers.glow_tts.monotonic_align.core.Enum", /*tp_name*/
|
||||
sizeof(struct __pyx_MemviewEnum_obj), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
__pyx_tp_dealloc_Enum, /*tp_dealloc*/
|
||||
|
@ -18930,7 +18930,7 @@ static PyBufferProcs __pyx_tp_as_buffer_memoryview = {
|
|||
|
||||
static PyTypeObject __pyx_type___pyx_memoryview = {
|
||||
PyVarObject_HEAD_INIT(0, 0)
|
||||
"mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core.memoryview", /*tp_name*/
|
||||
"TTS.tts.layers.glow_tts.monotonic_align.core.memoryview", /*tp_name*/
|
||||
sizeof(struct __pyx_memoryview_obj), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
__pyx_tp_dealloc_memoryview, /*tp_dealloc*/
|
||||
|
@ -19068,7 +19068,7 @@ static struct PyGetSetDef __pyx_getsets__memoryviewslice[] = {
|
|||
|
||||
static PyTypeObject __pyx_type___pyx_memoryviewslice = {
|
||||
PyVarObject_HEAD_INIT(0, 0)
|
||||
"mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core._memoryviewslice", /*tp_name*/
|
||||
"TTS.tts.layers.glow_tts.monotonic_align.core._memoryviewslice", /*tp_name*/
|
||||
sizeof(struct __pyx_memoryviewslice_obj), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
__pyx_tp_dealloc__memoryviewslice, /*tp_dealloc*/
|
||||
|
@ -20001,8 +20001,8 @@ if (!__Pyx_RefNanny) {
|
|||
#if PY_MAJOR_VERSION >= 3
|
||||
{
|
||||
PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error)
|
||||
if (!PyDict_GetItemString(modules, "mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core")) {
|
||||
if (unlikely(PyDict_SetItemString(modules, "mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
|
||||
if (!PyDict_GetItemString(modules, "TTS.tts.layers.glow_tts.monotonic_align.core")) {
|
||||
if (unlikely(PyDict_SetItemString(modules, "TTS.tts.layers.glow_tts.monotonic_align.core", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -20213,11 +20213,11 @@ if (!__Pyx_RefNanny) {
|
|||
__Pyx_XDECREF(__pyx_t_1);
|
||||
if (__pyx_m) {
|
||||
if (__pyx_d) {
|
||||
__Pyx_AddTraceback("init mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
||||
__Pyx_AddTraceback("init TTS.tts.layers.glow_tts.monotonic_align.core", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
||||
}
|
||||
Py_CLEAR(__pyx_m);
|
||||
} else if (!PyErr_Occurred()) {
|
||||
PyErr_SetString(PyExc_ImportError, "init mozilla_voice_tts.tts.layers.glow_tts.monotonic_align.core");
|
||||
PyErr_SetString(PyExc_ImportError, "init TTS.tts.layers.glow_tts.monotonic_align.core");
|
||||
}
|
||||
__pyx_L0:;
|
||||
__Pyx_RefNannyFinishContext();
|
|
@ -5,7 +5,7 @@ import torch
|
|||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
from mozilla_voice_tts.tts.layers.glow_tts.glow import LayerNorm
|
||||
from TTS.tts.layers.glow_tts.glow import LayerNorm
|
||||
|
||||
|
||||
class RelativePositionMultiHeadAttention(nn.Module):
|
|
@ -3,10 +3,10 @@ import torch
|
|||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
from mozilla_voice_tts.tts.layers.glow_tts.encoder import Encoder
|
||||
from mozilla_voice_tts.tts.layers.glow_tts.decoder import Decoder
|
||||
from mozilla_voice_tts.tts.utils.generic_utils import sequence_mask
|
||||
from mozilla_voice_tts.tts.layers.glow_tts.monotonic_align import maximum_path, generate_path
|
||||
from TTS.tts.layers.glow_tts.encoder import Encoder
|
||||
from TTS.tts.layers.glow_tts.decoder import Decoder
|
||||
from TTS.tts.utils.generic_utils import sequence_mask
|
||||
from TTS.tts.layers.glow_tts.monotonic_align import maximum_path, generate_path
|
||||
|
||||
|
||||
class GlowTts(nn.Module):
|
|
@ -49,7 +49,7 @@ def to_camel(text):
|
|||
def setup_model(num_chars, num_speakers, c, speaker_embedding_dim=None):
|
||||
print(" > Using model: {}".format(c.model))
|
||||
MyModel = importlib.import_module('TTS.tts.models.' + c.model.lower())
|
||||
MyModel = getattr(MyModel, c.model)
|
||||
MyModel = getattr(MyModel, to_camel(c.model))
|
||||
if c.model.lower() in "tacotron":
|
||||
model = MyModel(num_chars=num_chars,
|
||||
num_speakers=num_speakers,
|
||||
|
|
|
@ -5,8 +5,8 @@ import pickle as pickle_tts
|
|||
class RenamingUnpickler(pickle_tts.Unpickler):
|
||||
"""Overload default pickler to solve module renaming problem"""
|
||||
def find_class(self, module, name):
|
||||
if 'mozilla_voice_tts' in module :
|
||||
module = module.replace('mozilla_voice_tts', 'TTS')
|
||||
if 'TTS' in module :
|
||||
module = module.replace('TTS', 'TTS')
|
||||
return super().find_class(module, name)
|
||||
|
||||
class AttrDict(dict):
|
||||
|
|
Loading…
Reference in New Issue