Merge branch 'master' of https://github.com/Mozilla/TTS

2018-07-04 16:00:54 +02:00 · 2018-07-04 16:00:54 +02:00 · bdf3fd209a
parent 41f4d99e77 4d1295cb90
commit bdf3fd209a
11 changed files with 139 additions and 9 deletions
--- a/.compute
+++ b/.compute
@ -0,0 +1,3 @@
+#!/bin/bash
+source ../tmp/venv/bin/activate
+python train.py --config_path config.json
--- a/.gitignore
+++ b/.gitignore
@ -116,5 +116,8 @@ venv.bak/
 *.pth.tar
 result/

+# setup.py
+version.py
+
 # jupyter dummy files
 core
--- a/.install
+++ b/.install
@ -0,0 +1,4 @@
+#!/bin/bash
+virtualenv -p python3 ../tmp/venv
+source ../tmp/venv/bin/activate
+python setup.py develop
--- a/init.py
+++ b/init.py
--- a/config.json
+++ b/config.json
@ -0,0 +1,32 @@
+{
+    "model_name": "best-model",
+    "num_mels": 80,
+    "num_freq": 1025,
+    "sample_rate": 20000,
+    "frame_length_ms": 50,
+    "frame_shift_ms": 12.5,
+    "preemphasis": 0.97,
+    "min_level_db": -100,
+    "ref_level_db": 20,
+    "embedding_size": 256,
+    "text_cleaner": "english_cleaners",
+  
+    "epochs": 1000,
+    "lr": 0.002,
+    "warmup_steps": 4000,
+    "batch_size": 32,
+    "eval_batch_size":32,
+    "r": 5,
+      
+    "griffin_lim_iters": 60,
+    "power": 1.5,
+  
+    "num_loader_workers": 8,
+  
+    "checkpoint": true,
+    "save_step": 376,
+    "data_path": "/snakepit/shared/data/keithito/LJSpeech-1.1/",
+    "min_seq_len": 0, 
+    "output_path": "models/"
+  }
+  
--- a/datasets/LJSpeech.py
+++ b/datasets/LJSpeech.py
@ -5,9 +5,9 @@ import librosa
 import torch
 from torch.utils.data import Dataset

-from TTS.utils.text import text_to_sequence
-from TTS.utils.audio import AudioProcessor
-from TTS.utils.data import (prepare_data, pad_per_step,
+from utils.text import text_to_sequence
+from utils.audio import AudioProcessor
+from utils.data import (prepare_data, pad_per_step,
                            prepare_tensor, prepare_stop_target)


--- a/models/tacotron.py
+++ b/models/tacotron.py
@ -1,8 +1,8 @@
 # coding: utf-8
 import torch
 from torch import nn
-from TTS.utils.text.symbols import symbols
-from TTS.layers.tacotron import Prenet, Encoder, Decoder, CBHG
+from utils.text.symbols import symbols
+from layers.tacotron import Prenet, Encoder, Decoder, CBHG


 class Tacotron(nn.Module):
--- a/requirements.txt
+++ b/requirements.txt
@ -1,9 +1,9 @@
+torch>=0.4.0
 librosa
 inflect
 unidecode
 tensorboard
 tensorboardX
-torch
 matplotlib
 Pillow
 flask
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+from setuptools import setup, find_packages
+import setuptools.command.develop
+import setuptools.command.build_py
+import os
+import subprocess
+from os.path import exists
+
+version = '0.0.1'
+
+# Adapted from https://github.com/pytorch/pytorch
+cwd = os.path.dirname(os.path.abspath(__file__))
+if os.getenv('TTS_PYTORCH_BUILD_VERSION'):
+    version = os.getenv('TTS_PYTORCH_BUILD_VERSION')
+else:
+    try:
+        sha = subprocess.check_output(
+            ['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip()
+        version += '+' + sha[:7]
+    except subprocess.CalledProcessError:
+        pass
+    except IOError:  # FileNotFoundError for python 3
+        pass
+
+
+class build_py(setuptools.command.build_py.build_py):
+
+    def run(self):
+        self.create_version_file()
+        setuptools.command.build_py.build_py.run(self)
+
+    @staticmethod
+    def create_version_file():
+        global version, cwd
+        print('-- Building version ' + version)
+        version_path = os.path.join(cwd, 'version.py')
+        with open(version_path, 'w') as f:
+            f.write("__version__ = '{}'\n".format(version))
+
+
+class develop(setuptools.command.develop.develop):
+
+    def run(self):
+        build_py.create_version_file()
+        setuptools.command.develop.develop.run(self)
+
+
+def create_readme_rst():
+    global cwd
+    try:
+        subprocess.check_call(
+            ["pandoc", "--from=markdown", "--to=rst", "--output=README.rst",
+             "README.md"], cwd=cwd)
+        print("Generated README.rst from README.md using pandoc.")
+    except subprocess.CalledProcessError:
+        pass
+    except OSError:
+        pass
+
+
+setup(name='TTS',
+      version=version,
+      url='https://github.com/mozilla/TTS',
+      description='Text to Speech with Deep Learning',
+      packages=find_packages(),
+      cmdclass={
+          'build_py': build_py,
+          'develop': develop,
+      },
+      install_requires=[
+          "numpy",
+          "scipy",
+          "librosa",
+          "torch >= 0.4.0",
+          "unidecode",
+          "tensorboardX",
+          "matplotlib",
+          "Pillow",
+          "flask",
+      ],
+      extras_require={
+          "bin": [
+              "tqdm",
+              "tensorboardX",
+              "requests",
+          ],
+      })
--- a/utils/text/init.py
+++ b/utils/text/init.py
@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-

 import re
-from TTS.utils.text import cleaners
-from TTS.utils.text.symbols import symbols
+from utils.text import cleaners
+from utils.text.symbols import symbols


 # Mappings from symbol to numeric ID and vice versa:
--- a/utils/text/symbols.py
+++ b/utils/text/symbols.py
@ -7,7 +7,7 @@ Defines the set of symbols used in text input to the model.
 The default is a set of ASCII characters that works well for English or text that has been run
 through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details.
 '''
-from TTS.utils.text import cmudict
+from utils.text import cmudict

 _pad = '_'
 _eos = '~'