This commit is contained in:
Eren G 2018-07-04 16:00:54 +02:00
commit bdf3fd209a
11 changed files with 139 additions and 9 deletions

3
.compute Normal file
View File

@ -0,0 +1,3 @@
#!/bin/bash
source ../tmp/venv/bin/activate
python train.py --config_path config.json

3
.gitignore vendored
View File

@ -116,5 +116,8 @@ venv.bak/
*.pth.tar
result/
# setup.py
version.py
# jupyter dummy files
core

4
.install Normal file
View File

@ -0,0 +1,4 @@
#!/bin/bash
virtualenv -p python3 ../tmp/venv
source ../tmp/venv/bin/activate
python setup.py develop

View File

32
config.json Normal file
View File

@ -0,0 +1,32 @@
{
"model_name": "best-model",
"num_mels": 80,
"num_freq": 1025,
"sample_rate": 20000,
"frame_length_ms": 50,
"frame_shift_ms": 12.5,
"preemphasis": 0.97,
"min_level_db": -100,
"ref_level_db": 20,
"embedding_size": 256,
"text_cleaner": "english_cleaners",
"epochs": 1000,
"lr": 0.002,
"warmup_steps": 4000,
"batch_size": 32,
"eval_batch_size":32,
"r": 5,
"griffin_lim_iters": 60,
"power": 1.5,
"num_loader_workers": 8,
"checkpoint": true,
"save_step": 376,
"data_path": "/snakepit/shared/data/keithito/LJSpeech-1.1/",
"min_seq_len": 0,
"output_path": "models/"
}

View File

@ -5,9 +5,9 @@ import librosa
import torch
from torch.utils.data import Dataset
from TTS.utils.text import text_to_sequence
from TTS.utils.audio import AudioProcessor
from TTS.utils.data import (prepare_data, pad_per_step,
from utils.text import text_to_sequence
from utils.audio import AudioProcessor
from utils.data import (prepare_data, pad_per_step,
prepare_tensor, prepare_stop_target)

View File

@ -1,8 +1,8 @@
# coding: utf-8
import torch
from torch import nn
from TTS.utils.text.symbols import symbols
from TTS.layers.tacotron import Prenet, Encoder, Decoder, CBHG
from utils.text.symbols import symbols
from layers.tacotron import Prenet, Encoder, Decoder, CBHG
class Tacotron(nn.Module):

View File

@ -1,9 +1,9 @@
torch>=0.4.0
librosa
inflect
unidecode
tensorboard
tensorboardX
torch
matplotlib
Pillow
flask

88
setup.py Normal file
View File

@ -0,0 +1,88 @@
#!/usr/bin/env python
from setuptools import setup, find_packages
import setuptools.command.develop
import setuptools.command.build_py
import os
import subprocess
from os.path import exists
version = '0.0.1'
# Adapted from https://github.com/pytorch/pytorch
cwd = os.path.dirname(os.path.abspath(__file__))
if os.getenv('TTS_PYTORCH_BUILD_VERSION'):
version = os.getenv('TTS_PYTORCH_BUILD_VERSION')
else:
try:
sha = subprocess.check_output(
['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip()
version += '+' + sha[:7]
except subprocess.CalledProcessError:
pass
except IOError: # FileNotFoundError for python 3
pass
class build_py(setuptools.command.build_py.build_py):
def run(self):
self.create_version_file()
setuptools.command.build_py.build_py.run(self)
@staticmethod
def create_version_file():
global version, cwd
print('-- Building version ' + version)
version_path = os.path.join(cwd, 'version.py')
with open(version_path, 'w') as f:
f.write("__version__ = '{}'\n".format(version))
class develop(setuptools.command.develop.develop):
def run(self):
build_py.create_version_file()
setuptools.command.develop.develop.run(self)
def create_readme_rst():
global cwd
try:
subprocess.check_call(
["pandoc", "--from=markdown", "--to=rst", "--output=README.rst",
"README.md"], cwd=cwd)
print("Generated README.rst from README.md using pandoc.")
except subprocess.CalledProcessError:
pass
except OSError:
pass
setup(name='TTS',
version=version,
url='https://github.com/mozilla/TTS',
description='Text to Speech with Deep Learning',
packages=find_packages(),
cmdclass={
'build_py': build_py,
'develop': develop,
},
install_requires=[
"numpy",
"scipy",
"librosa",
"torch >= 0.4.0",
"unidecode",
"tensorboardX",
"matplotlib",
"Pillow",
"flask",
],
extras_require={
"bin": [
"tqdm",
"tensorboardX",
"requests",
],
})

View File

@ -1,8 +1,8 @@
# -*- coding: utf-8 -*-
import re
from TTS.utils.text import cleaners
from TTS.utils.text.symbols import symbols
from utils.text import cleaners
from utils.text.symbols import symbols
# Mappings from symbol to numeric ID and vice versa:

View File

@ -7,7 +7,7 @@ Defines the set of symbols used in text input to the model.
The default is a set of ASCII characters that works well for English or text that has been run
through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details.
'''
from TTS.utils.text import cmudict
from utils.text import cmudict
_pad = '_'
_eos = '~'