Merge branch 'dev' of github.com:mozilla/TTS into dev

This commit is contained in:
Eren Golge 2019-08-29 15:03:36 +02:00
commit 00ba281dec
32 changed files with 107 additions and 75 deletions

View File

@ -11,5 +11,7 @@ fi
if [[ "$TEST_SUITE" == "unittest" ]]; then if [[ "$TEST_SUITE" == "unittest" ]]; then
# Run tests on all pushes # Run tests on all pushes
pushd tts_namespace
python -m unittest python -m unittest
popd
fi fi

0
__init__.py Normal file
View File

View File

@ -5,8 +5,8 @@ import torch
import random import random
from torch.utils.data import Dataset from torch.utils.data import Dataset
from utils.text import text_to_sequence, phoneme_to_sequence, pad_with_eos_bos from TTS.utils.text import text_to_sequence, phoneme_to_sequence, pad_with_eos_bos
from utils.data import prepare_data, prepare_tensor, prepare_stop_target from TTS.utils.data import prepare_data, prepare_tensor, prepare_stop_target
class MyDataset(Dataset): class MyDataset(Dataset):

View File

@ -9,7 +9,7 @@ import torch.distributed as dist
from torch.utils.data.sampler import Sampler from torch.utils.data.sampler import Sampler
from torch.autograd import Variable from torch.autograd import Variable
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
from utils.generic_utils import load_config, create_experiment_folder from TTS.utils.generic_utils import load_config, create_experiment_folder
class DistributedSampler(Sampler): class DistributedSampler(Sampler):

View File

@ -1,6 +1,6 @@
from torch import nn from torch import nn
from torch.nn import functional from torch.nn import functional
from utils.generic_utils import sequence_mask from TTS.utils.generic_utils import sequence_mask
class L1LossMasked(nn.Module): class L1LossMasked(nn.Module):

View File

@ -1,7 +1,7 @@
# coding: utf-8 # coding: utf-8
from torch import nn from torch import nn
from layers.tacotron import Encoder, Decoder, PostCBHG from TTS.layers.tacotron import Encoder, Decoder, PostCBHG
from utils.generic_utils import sequence_mask from TTS.utils.generic_utils import sequence_mask
class Tacotron(nn.Module): class Tacotron(nn.Module):

View File

@ -1,7 +1,7 @@
from math import sqrt from math import sqrt
from torch import nn from torch import nn
from layers.tacotron2 import Encoder, Decoder, Postnet from TTS.layers.tacotron2 import Encoder, Decoder, Postnet
from utils.generic_utils import sequence_mask from TTS.utils.generic_utils import sequence_mask
# TODO: match function arguments with tacotron # TODO: match function arguments with tacotron

View File

@ -1,8 +1,8 @@
# coding: utf-8 # coding: utf-8
from torch import nn from torch import nn
from layers.tacotron import Encoder, Decoder, PostCBHG from TTS.layers.tacotron import Encoder, Decoder, PostCBHG
from layers.gst_layers import GST from TTS.layers.gst_layers import GST
from utils.generic_utils import sequence_mask from TTS.utils.generic_utils import sequence_mask
class TacotronGST(nn.Module): class TacotronGST(nn.Module):

View File

@ -138,7 +138,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# LOAD TTS MODEL\n", "# LOAD TTS MODEL\n",
"from utils.text.symbols import symbols, phonemes\n", "from TTS.utils.text.symbols import symbols, phonemes\n",
"\n", "\n",
"# multi speaker \n", "# multi speaker \n",
"if CONFIG.use_speaker_embedding:\n", "if CONFIG.use_speaker_embedding:\n",

View File

@ -105,10 +105,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from utils.text.symbols import symbols, phonemes\n", "from TTS.utils.text.symbols import symbols, phonemes\n",
"from utils.generic_utils import sequence_mask\n", "from TTS.utils.generic_utils import sequence_mask\n",
"from layers.losses import L1LossMasked\n", "from TTS.layers.losses import L1LossMasked\n",
"from utils.text.symbols import symbols, phonemes\n", "from TTS.utils.text.symbols import symbols, phonemes\n",
"\n", "\n",
"# load the model\n", "# load the model\n",
"num_chars = len(phonemes) if C.use_phonemes else len(symbols)\n", "num_chars = len(phonemes) if C.use_phonemes else len(symbols)\n",

View File

@ -1,7 +1,7 @@
#!flask/bin/python #!flask/bin/python
import argparse import argparse
from synthesizer import Synthesizer from synthesizer import Synthesizer
from utils.generic_utils import load_config from TTS.utils.generic_utils import load_config
from flask import Flask, request, render_template, send_file from flask import Flask, request, render_template, send_file
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()

View File

@ -5,11 +5,11 @@ import numpy as np
import torch import torch
import sys import sys
from utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
from utils.generic_utils import load_config, setup_model from TTS.utils.generic_utils import load_config, setup_model
from utils.text import phonemes, symbols from TTS.utils.text import phonemes, symbols
from utils.speakers import load_speaker_mapping from TTS.utils.speakers import load_speaker_mapping
from utils.synthesis import * from TTS.utils.synthesis import *
import re import re
alphabets = r"([A-Za-z])" alphabets = r"([A-Za-z])"

View File

@ -63,8 +63,8 @@ setup(
url='https://github.com/mozilla/TTS', url='https://github.com/mozilla/TTS',
description='Text to Speech with Deep Learning', description='Text to Speech with Deep Learning',
license='MPL-2.0', license='MPL-2.0',
package_dir={'TTS': '.'}, package_dir={'': 'tts_namespace'},
packages=['TTS'] + ['TTS.' + pkg for pkg in find_packages()], packages=find_packages('tts_namespace'),
project_urls={ project_urls={
'Documentation': 'https://github.com/mozilla/TTS/wiki', 'Documentation': 'https://github.com/mozilla/TTS/wiki',
'Tracker': 'https://github.com/mozilla/TTS/issues', 'Tracker': 'https://github.com/mozilla/TTS/issues',

View File

@ -4,10 +4,10 @@ import argparse
import torch import torch
import string import string
from utils.synthesis import synthesis from TTS.utils.synthesis import synthesis
from utils.generic_utils import load_config, setup_model from TTS.utils.generic_utils import load_config, setup_model
from utils.text.symbols import symbols, phonemes from TTS.utils.text.symbols import symbols, phonemes
from utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
def tts(model, def tts(model,

View File

@ -1,8 +1,8 @@
import unittest import unittest
import torch as T import torch as T
from utils.generic_utils import save_checkpoint, save_best_model from TTS.utils.generic_utils import save_checkpoint, save_best_model
from layers.tacotron import Prenet from TTS.layers.tacotron import Prenet
OUT_PATH = '/tmp/test.pth.tar' OUT_PATH = '/tmp/test.pth.tar'

View File

@ -1,5 +1,5 @@
{ {
"tts_path":"tests/outputs/", // tts model root folder "tts_path":"TTS/tests/outputs/", // tts model root folder
"tts_file":"checkpoint_10.pth.tar", // tts checkpoint file "tts_file":"checkpoint_10.pth.tar", // tts checkpoint file
"tts_config":"dummy_model_config.json", // tts config.json file "tts_config":"dummy_model_config.json", // tts config.json file
"tts_speakers": null, // json file listing speaker ids. null if no speaker embedding. "tts_speakers": null, // json file listing speaker ids. null if no speaker embedding.

View File

@ -1,6 +1,6 @@
import unittest import unittest
from utils.text import phonemes from TTS.utils.text import phonemes
class SymbolsTest(unittest.TestCase): class SymbolsTest(unittest.TestCase):
def test_uniqueness(self): #pylint: disable=no-self-use def test_uniqueness(self): #pylint: disable=no-self-use

View File

@ -1,9 +1,9 @@
import os import os
import unittest import unittest
from tests import get_tests_path, get_tests_input_path, get_tests_output_path from TTS.tests import get_tests_path, get_tests_input_path, get_tests_output_path
from utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
from utils.generic_utils import load_config from TTS.utils.generic_utils import load_config
TESTS_PATH = get_tests_path() TESTS_PATH = get_tests_path()
OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests") OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")

View File

@ -3,10 +3,10 @@ import unittest
import torch as T import torch as T
from server.synthesizer import Synthesizer from TTS.server.synthesizer import Synthesizer
from tests import get_tests_input_path, get_tests_output_path, get_tests_path from TTS.tests import get_tests_input_path, get_tests_output_path
from utils.text.symbols import phonemes, symbols from TTS.utils.text.symbols import phonemes, symbols
from utils.generic_utils import load_config, save_checkpoint, setup_model from TTS.utils.generic_utils import load_config, save_checkpoint, setup_model
class DemoServerTest(unittest.TestCase): class DemoServerTest(unittest.TestCase):

View File

@ -1,9 +1,9 @@
import unittest import unittest
import torch as T import torch as T
from layers.tacotron import Prenet, CBHG, Decoder, Encoder from TTS.layers.tacotron import Prenet, CBHG, Decoder, Encoder
from layers.losses import L1LossMasked from TTS.layers.losses import L1LossMasked
from utils.generic_utils import sequence_mask from TTS.utils.generic_utils import sequence_mask
#pylint: disable=unused-variable #pylint: disable=unused-variable

View File

@ -5,10 +5,10 @@ import torch
import numpy as np import numpy as np
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from utils.generic_utils import load_config from TTS.utils.generic_utils import load_config
from utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
from datasets import TTSDataset from TTS.datasets import TTSDataset
from datasets.preprocess import ljspeech from TTS.datasets.preprocess import ljspeech
#pylint: disable=unused-variable #pylint: disable=unused-variable

View File

@ -1,8 +1,8 @@
import unittest import unittest
import os import os
from tests import get_tests_input_path from TTS.tests import get_tests_input_path
from datasets.preprocess import common_voice from TTS.datasets.preprocess import common_voice
class TestPreprocessors(unittest.TestCase): class TestPreprocessors(unittest.TestCase):

View File

@ -6,9 +6,9 @@ import numpy as np
from torch import optim from torch import optim
from torch import nn from torch import nn
from utils.generic_utils import load_config from TTS.utils.generic_utils import load_config
from layers.losses import MSELossMasked from TTS.layers.losses import MSELossMasked
from models.tacotron2 import Tacotron2 from TTS.models.tacotron2 import Tacotron2
#pylint: disable=unused-variable #pylint: disable=unused-variable

View File

@ -5,9 +5,9 @@ import unittest
from torch import optim from torch import optim
from torch import nn from torch import nn
from utils.generic_utils import load_config from TTS.utils.generic_utils import load_config
from layers.losses import L1LossMasked from TTS.layers.losses import L1LossMasked
from models.tacotron import Tacotron from TTS.models.tacotron import Tacotron
#pylint: disable=unused-variable #pylint: disable=unused-variable

View File

@ -1,7 +1,7 @@
import unittest import unittest
import torch as T import torch as T
from utils.text import * from TTS.utils.text import *
def test_phoneme_to_sequence(): def test_phoneme_to_sequence():
text = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!" text = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!"

View File

@ -10,24 +10,24 @@ import torch.nn as nn
from torch import optim from torch import optim
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from datasets.TTSDataset import MyDataset from TTS.datasets.TTSDataset import MyDataset
from distribute import (DistributedSampler, apply_gradient_allreduce, from distribute import (DistributedSampler, apply_gradient_allreduce,
init_distributed, reduce_tensor) init_distributed, reduce_tensor)
from layers.losses import L1LossMasked, MSELossMasked from TTS.layers.losses import L1LossMasked, MSELossMasked
from utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
from utils.generic_utils import (NoamLR, check_update, count_parameters, from TTS.utils.generic_utils import (NoamLR, check_update, count_parameters,
create_experiment_folder, get_git_branch, create_experiment_folder, get_git_branch,
load_config, remove_experiment_folder, load_config, remove_experiment_folder,
save_best_model, save_checkpoint, weight_decay, save_best_model, save_checkpoint, weight_decay,
set_init_dict, copy_config_file, setup_model, set_init_dict, copy_config_file, setup_model,
split_dataset, gradual_training_scheduler) split_dataset, gradual_training_scheduler)
from utils.logger import Logger from TTS.utils.logger import Logger
from utils.speakers import load_speaker_mapping, save_speaker_mapping, \ from TTS.utils.speakers import load_speaker_mapping, save_speaker_mapping, \
get_speakers get_speakers
from utils.synthesis import synthesis from TTS.utils.synthesis import synthesis
from utils.text.symbols import phonemes, symbols from TTS.utils.text.symbols import phonemes, symbols
from utils.visual import plot_alignment, plot_spectrogram from TTS.utils.visual import plot_alignment, plot_spectrogram
from datasets.preprocess import get_preprocessor_by_name from TTS.datasets.preprocess import get_preprocessor_by_name
torch.backends.cudnn.enabled = True torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = False torch.backends.cudnn.benchmark = False

29
tts_namespace/README.md Normal file
View File

@ -0,0 +1,29 @@
This folder contains a symlink called TTS to the parent folder:
lrwxr-xr-x TTS -> ..
This is used to appease the distribute/setuptools gods. When the project was
initially set up, the repository folder itself was considered a namespace, and
development was done with `sys.path` hacks. This means if you tried to install
TTS, `setup.py` would see the packages `models`, `utils`, `layers`... instead of
`TTS.models`, `TTS.utils`...
Installing TTS would then pollute the package namespace with generic names like
those above. In order to make things installable in both install and development
modes (`pip install /path/to/TTS` and `pip install -e /path/to/TTS`), we needed
to add an additional 'TTS' namespace to avoid this pollution. A virtual redirect
using `packages_dir` in `setup.py` is not enough because it breaks the editable
installation, which can only handle the simplest of `package_dir` redirects.
Our solution is to use a symlink in order to add the extra `TTS` namespace. In
`setup.py`, we only look for packages inside `tts_namespace` (this folder),
which contains a symlink called TTS pointing to the repository root. The final
result is that `setuptools.find_packages` will find `TTS.models`, `TTS.utils`...
With this hack, `pip install -e` will then add a symlink to the `tts_namespace`
in your `site-packages` folder, which works properly. It's important not to add
anything else in this folder because it will pollute the package namespace when
installing the project.
This does not work if you check out your project on a filesystem that does not
support symlinks.

1
tts_namespace/TTS Symbolic link
View File

@ -0,0 +1 @@
..

View File

@ -250,7 +250,7 @@ def set_init_dict(model_dict, checkpoint, c):
def setup_model(num_chars, num_speakers, c): def setup_model(num_chars, num_speakers, c):
print(" > Using model: {}".format(c.model)) print(" > Using model: {}".format(c.model))
MyModel = importlib.import_module('models.' + c.model.lower()) MyModel = importlib.import_module('TTS.models.' + c.model.lower())
MyModel = getattr(MyModel, c.model) MyModel = getattr(MyModel, c.model)
if c.model.lower() in ["tacotron", "tacotrongst"]: if c.model.lower() in ["tacotron", "tacotrongst"]:
model = MyModel( model = MyModel(

View File

@ -1,7 +1,7 @@
import os import os
import json import json
from datasets.preprocess import get_preprocessor_by_name from TTS.datasets.preprocess import get_preprocessor_by_name
def make_speakers_json_path(out_path): def make_speakers_json_path(out_path):

View File

@ -3,8 +3,8 @@
import re import re
import phonemizer import phonemizer
from phonemizer.phonemize import phonemize from phonemizer.phonemize import phonemize
from utils.text import cleaners from TTS.utils.text import cleaners
from utils.text.symbols import symbols, phonemes, _phoneme_punctuations, _bos, \ from TTS.utils.text.symbols import symbols, phonemes, _phoneme_punctuations, _bos, \
_eos _eos
# Mappings from symbol to numeric ID and vice versa: # Mappings from symbol to numeric ID and vice versa:

View File

@ -2,7 +2,7 @@ import librosa
import matplotlib import matplotlib
matplotlib.use('Agg') matplotlib.use('Agg')
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from utils.text import phoneme_to_sequence, sequence_to_phoneme from TTS.utils.text import phoneme_to_sequence, sequence_to_phoneme
def plot_alignment(alignment, info=None): def plot_alignment(alignment, info=None):