diff --git a/.compute b/.compute new file mode 100644 index 00000000..ff6a8360 --- /dev/null +++ b/.compute @@ -0,0 +1,3 @@ +#!/bin/bash +source ../tmp/venv/bin/activate +python train.py --config_path config.json \ No newline at end of file diff --git a/.gitignore b/.gitignore index 1418de63..24b13870 100644 --- a/.gitignore +++ b/.gitignore @@ -116,5 +116,8 @@ venv.bak/ *.pth.tar result/ +# setup.py +version.py + # jupyter dummy files core diff --git a/.install b/.install new file mode 100644 index 00000000..0ae9b43b --- /dev/null +++ b/.install @@ -0,0 +1,4 @@ +#!/bin/bash +virtualenv -p python3 ../tmp/venv +source ../tmp/venv/bin/activate +python setup.py develop \ No newline at end of file diff --git a/__init__.py b/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/config.json b/config.json new file mode 100644 index 00000000..572ce28c --- /dev/null +++ b/config.json @@ -0,0 +1,32 @@ +{ + "model_name": "best-model", + "num_mels": 80, + "num_freq": 1025, + "sample_rate": 20000, + "frame_length_ms": 50, + "frame_shift_ms": 12.5, + "preemphasis": 0.97, + "min_level_db": -100, + "ref_level_db": 20, + "embedding_size": 256, + "text_cleaner": "english_cleaners", + + "epochs": 1000, + "lr": 0.002, + "warmup_steps": 4000, + "batch_size": 32, + "eval_batch_size":32, + "r": 5, + + "griffin_lim_iters": 60, + "power": 1.5, + + "num_loader_workers": 8, + + "checkpoint": true, + "save_step": 376, + "data_path": "/snakepit/shared/data/keithito/LJSpeech-1.1/", + "min_seq_len": 0, + "output_path": "models/" + } + \ No newline at end of file diff --git a/datasets/LJSpeech.py b/datasets/LJSpeech.py index 03ccee28..7638e708 100644 --- a/datasets/LJSpeech.py +++ b/datasets/LJSpeech.py @@ -5,9 +5,9 @@ import librosa import torch from torch.utils.data import Dataset -from TTS.utils.text import text_to_sequence -from TTS.utils.audio import AudioProcessor -from TTS.utils.data import (prepare_data, pad_per_step, +from utils.text import text_to_sequence +from utils.audio import AudioProcessor +from utils.data import (prepare_data, pad_per_step, prepare_tensor, prepare_stop_target) diff --git a/models/tacotron.py b/models/tacotron.py index 71253149..1b0923a4 100644 --- a/models/tacotron.py +++ b/models/tacotron.py @@ -1,8 +1,8 @@ # coding: utf-8 import torch from torch import nn -from TTS.utils.text.symbols import symbols -from TTS.layers.tacotron import Prenet, Encoder, Decoder, CBHG +from utils.text.symbols import symbols +from layers.tacotron import Prenet, Encoder, Decoder, CBHG class Tacotron(nn.Module): diff --git a/requirements.txt b/requirements.txt index f2997902..4628a6ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ +torch>=0.4.0 librosa inflect unidecode tensorboard tensorboardX -torch matplotlib Pillow flask diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..06afe37a --- /dev/null +++ b/setup.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +from setuptools import setup, find_packages +import setuptools.command.develop +import setuptools.command.build_py +import os +import subprocess +from os.path import exists + +version = '0.0.1' + +# Adapted from https://github.com/pytorch/pytorch +cwd = os.path.dirname(os.path.abspath(__file__)) +if os.getenv('TTS_PYTORCH_BUILD_VERSION'): + version = os.getenv('TTS_PYTORCH_BUILD_VERSION') +else: + try: + sha = subprocess.check_output( + ['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip() + version += '+' + sha[:7] + except subprocess.CalledProcessError: + pass + except IOError: # FileNotFoundError for python 3 + pass + + +class build_py(setuptools.command.build_py.build_py): + + def run(self): + self.create_version_file() + setuptools.command.build_py.build_py.run(self) + + @staticmethod + def create_version_file(): + global version, cwd + print('-- Building version ' + version) + version_path = os.path.join(cwd, 'version.py') + with open(version_path, 'w') as f: + f.write("__version__ = '{}'\n".format(version)) + + +class develop(setuptools.command.develop.develop): + + def run(self): + build_py.create_version_file() + setuptools.command.develop.develop.run(self) + + +def create_readme_rst(): + global cwd + try: + subprocess.check_call( + ["pandoc", "--from=markdown", "--to=rst", "--output=README.rst", + "README.md"], cwd=cwd) + print("Generated README.rst from README.md using pandoc.") + except subprocess.CalledProcessError: + pass + except OSError: + pass + + +setup(name='TTS', + version=version, + url='https://github.com/mozilla/TTS', + description='Text to Speech with Deep Learning', + packages=find_packages(), + cmdclass={ + 'build_py': build_py, + 'develop': develop, + }, + install_requires=[ + "numpy", + "scipy", + "librosa", + "torch >= 0.4.0", + "unidecode", + "tensorboardX", + "matplotlib", + "Pillow", + "flask", + ], + extras_require={ + "bin": [ + "tqdm", + "tensorboardX", + "requests", + ], + }) \ No newline at end of file diff --git a/utils/text/__init__.py b/utils/text/__init__.py index 9b812c27..3d158c99 100644 --- a/utils/text/__init__.py +++ b/utils/text/__init__.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- import re -from TTS.utils.text import cleaners -from TTS.utils.text.symbols import symbols +from utils.text import cleaners +from utils.text.symbols import symbols # Mappings from symbol to numeric ID and vice versa: diff --git a/utils/text/symbols.py b/utils/text/symbols.py index a1706b23..c8550e1d 100644 --- a/utils/text/symbols.py +++ b/utils/text/symbols.py @@ -7,7 +7,7 @@ Defines the set of symbols used in text input to the model. The default is a set of ASCII characters that works well for English or text that has been run through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details. ''' -from TTS.utils.text import cmudict +from utils.text import cmudict _pad = '_' _eos = '~'