Merge branch 'reuben-model-pkg-dev' into dev

This commit is contained in:
Eren Golge 2019-12-09 12:12:04 +01:00
commit 956f8b7672
11 changed files with 177 additions and 56 deletions

View File

@ -14,4 +14,6 @@ if [[ "$TEST_SUITE" == "unittest" ]]; then
pushd tts_namespace
python -m unittest
popd
# Test server package
./tests/test_server_package.sh
fi

View File

@ -1,9 +1,34 @@
## TTS example web-server
Steps to run:
1. Download one of the models given on the main page. Click [here](https://drive.google.com/drive/folders/1Q6BKeEkZyxSGsocK2p_mqgzLwlNvbHFJ?usp=sharing) for the lastest model.
2. Checkout the corresponding commit history or use ```server``` branch if you like to use the latest model.
3. Set the paths and the other options in the file ```server/conf.json```.
4. Run the server ```python server/server.py -c server/conf.json```. (Requires Flask)
5. Go to ```localhost:[given_port]``` and enjoy.
For high quality results, please use the library versions shown in the ```requirements.txt``` file.
You'll need a model package (Zip file, includes TTS Python wheel, model files, server configuration, and optional nginx/uwsgi configs). Publicly available models are listed [here](https://github.com/mozilla/TTS/wiki/Released-Models).
Instructions below are based on a Ubuntu 18.04 machine, but it should be simple to adapt the package names to other distros if needed. Python 3.6 is recommended, as some of the dependencies' versions predate Python 3.7 and will force building from source, which requires extra dependencies and is not guaranteed to work.
Development server:
1. apt-get install -y espeak libsndfile1 python3-venv
2. python3 -m venv /tmp/venv
3. source /tmp/venv/bin/activate
4. pip install -U pip setuptools wheel
5. # Download model package
6. unzip model.zip
7. pip install -U ./TTS*.whl
8. python -m TTS.server.server
You can now browse to http://localhost:5002
Running with nginx/uwsgi:
1. apt-get install -y uwsgi uwsgi-plugin-python3 nginx espeak libsndfile1 python3-venv
2. python3 -m venv /tmp/venv
3. source /tmp/venv/bin/activate
4. pip install -U pip setuptools wheel
5. # Download model package
6. unzip model.zip
7. pip install -U ./TTS*.whl
8. cp tts_site_nginx /etc/nginx/sites-enabled/default
9. service nginx restart
10. uwsgi --ini uwsgi.ini
You can now browse to http://localhost:80 (edit the port in /etc/nginx/sites-enabled/tts_site_nginx).
Configure number of workers (number of requests that will be processed in parallel) in uwsgi.ini, `processes` setting.

View File

@ -1,17 +1,45 @@
#!flask/bin/python
import argparse
from synthesizer import Synthesizer
from TTS.utils.generic_utils import load_config
import os
from flask import Flask, request, render_template, send_file
from TTS.server.synthesizer import Synthesizer
def create_argparser():
def convert_boolean(x):
return x.lower() in ['true', '1', 'yes']
parser = argparse.ArgumentParser()
parser.add_argument('--tts_checkpoint', type=str, help='path to TTS checkpoint file')
parser.add_argument('--tts_config', type=str, help='path to TTS config.json file')
parser.add_argument('--tts_speakers', type=str, help='path to JSON file containing speaker ids, if speaker ids are used in the model')
parser.add_argument('--wavernn_lib_path', type=str, help='path to WaveRNN project folder to be imported. If this is not passed, model uses Griffin-Lim for synthesis.')
parser.add_argument('--wavernn_file', type=str, help='path to WaveRNN checkpoint file.')
parser.add_argument('--wavernn_config', type=str, help='path to WaveRNN config file.')
parser.add_argument('--is_wavernn_batched', type=convert_boolean, default=False, help='true to use batched WaveRNN.')
parser.add_argument('--port', type=int, default=5002, help='port to listen on.')
parser.add_argument('--use_cuda', type=convert_boolean, default=False, help='true to use CUDA.')
parser.add_argument('--debug', type=convert_boolean, default=False, help='true to enable Flask debug mode.')
return parser
config = None
synthesizer = None
embedded_model_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'model')
checkpoint_file = os.path.join(embedded_model_folder, 'checkpoint.pth.tar')
config_file = os.path.join(embedded_model_folder, 'config.json')
if os.path.isfile(checkpoint_file) and os.path.isfile(config_file):
# Use default config with embedded model files
config = create_argparser().parse_args([])
config.tts_checkpoint = checkpoint_file
config.tts_config = config_file
synthesizer = Synthesizer(config)
parser = argparse.ArgumentParser()
parser.add_argument(
'-c', '--config_path', type=str, help='path to config file for training')
args = parser.parse_args()
config = load_config(args.config_path)
app = Flask(__name__)
synthesizer = Synthesizer(config)
@app.route('/')
def index():
@ -27,4 +55,8 @@ def tts():
if __name__ == '__main__':
if not config or not synthesizer:
args = create_argparser().parse_args()
synthesizer = Synthesizer(args)
app.run(debug=config.debug, host='0.0.0.0', port=config.port)

View File

@ -24,19 +24,20 @@ class Synthesizer(object):
def __init__(self, config):
self.wavernn = None
self.config = config
self.use_cuda = config.use_cuda
self.use_cuda = self.config.use_cuda
if self.use_cuda:
assert torch.cuda.is_available(), "CUDA is not availabe on this machine."
self.load_tts(self.config.tts_path, self.config.tts_file, self.config.tts_config, config.use_cuda)
self.load_tts(self.config.tts_checkpoint, self.config.tts_config,
self.config.use_cuda)
if self.config.wavernn_lib_path:
self.load_wavernn(config.wavernn_lib_path, config.wavernn_path, config.wavernn_file, config.wavernn_config, config.use_cuda)
self.load_wavernn(self.config.wavernn_lib_path, self.config.wavernn_path,
self.config.wavernn_file, self.config.wavernn_config,
self.config.use_cuda)
def load_tts(self, model_path, model_file, model_config, use_cuda):
tts_config = os.path.join(model_path, model_config)
self.model_file = os.path.join(model_path, model_file)
def load_tts(self, tts_checkpoint, tts_config, use_cuda):
print(" > Loading TTS model ...")
print(" | > model config: ", tts_config)
print(" | > model file: ", model_file)
print(" | > checkpoint file: ", tts_checkpoint)
self.tts_config = load_config(tts_config)
self.use_phonemes = self.tts_config.use_phonemes
self.ap = AudioProcessor(**self.tts_config.audio)
@ -52,7 +53,8 @@ class Synthesizer(object):
num_speakers = 0
self.tts_model = setup_model(self.input_size, num_speakers=num_speakers, c=self.tts_config)
# load model state
cp = torch.load(self.model_file)
map_location = None if use_cuda else torch.device('cpu')
cp = torch.load(tts_checkpoint, map_location=map_location)
# load the model
self.tts_model.load_state_dict(cp['model'])
if use_cuda:

View File

@ -57,6 +57,7 @@
<div class="row">
<div class="col-lg-12 text-center">
<img class="mt-5" src="https://user-images.githubusercontent.com/1402048/52643646-c2102980-2edd-11e9-8c37-b72f3c89a640.png" alt=></img>
<h1 class="mt-5">Mozilla TTS</h1>
<ul class="list-unstyled">
</ul>
<input id="text" placeholder="Type here..." size=45 type="text" name="text">
@ -68,12 +69,10 @@
</div>
<!-- Bootstrap core JavaScript -->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css"></script>
<script>
function q(selector) {return document.querySelector(selector)}
q('#text').focus()
q('#speak-button').addEventListener('click', function(e) {
function do_tts(e) {
text = q('#text').value
if (text) {
q('#message').textContent = 'Synthesizing...'
@ -83,6 +82,12 @@
}
e.preventDefault()
return false
}
q('#speak-button').addEventListener('click', do_tts)
q('#text').addEventListener('keyup', function(e) {
if (e.keyCode == 13) { // enter
do_tts(e)
}
})
function synthesize(text) {
fetch('/api/tts?text=' + encodeURIComponent(text), {cache: 'no-cache'})

8
setup.cfg Normal file
View File

@ -0,0 +1,8 @@
[build_py]
build-lib=temp_build
[bdist_wheel]
bdist-dir=temp_build
[install_lib]
build-dir=temp_build

View File

@ -1,10 +1,23 @@
#!/usr/bin/env python
import argparse
import os
import shutil
import subprocess
import sys
from setuptools import setup, find_packages
import setuptools.command.develop
import setuptools.command.build_py
import os
import subprocess
parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
parser.add_argument('--checkpoint', type=str, help='Path to checkpoint file to embed in wheel.')
parser.add_argument('--model_config', type=str, help='Path to model configuration file to embed in wheel.')
args, unknown_args = parser.parse_known_args()
# Remove our arguments from argv so that setuptools doesn't see them
sys.argv = [sys.argv[0]] + unknown_args
version = '0.0.1'
@ -42,20 +55,17 @@ class develop(setuptools.command.develop.develop):
setuptools.command.develop.develop.run(self)
def create_readme_rst():
try:
subprocess.check_call(
[
"pandoc", "--from=markdown", "--to=rst", "--output=README.rst",
"README.md"
],
cwd=cwd)
print("Generated README.rst from README.md using pandoc.")
except subprocess.CalledProcessError:
pass
except OSError:
pass
package_data = ['server/templates/*']
if 'bdist_wheel' in unknown_args and args.checkpoint and args.model_config:
print('Embedding model in wheel file...')
model_dir = os.path.join('server', 'model')
os.makedirs(model_dir, exist_ok=True)
embedded_checkpoint_path = os.path.join(model_dir, 'checkpoint.pth.tar')
shutil.copy(args.checkpoint, embedded_checkpoint_path)
embedded_config_path = os.path.join(model_dir, 'config.json')
shutil.copy(args.model_config, embedded_config_path)
package_data.extend([embedded_checkpoint_path, embedded_config_path])
setup(
name='TTS',
@ -65,6 +75,9 @@ setup(
license='MPL-2.0',
package_dir={'': 'tts_namespace'},
packages=find_packages('tts_namespace'),
package_data={
'TTS': package_data,
},
project_urls={
'Documentation': 'https://github.com/mozilla/TTS/wiki',
'Tracker': 'https://github.com/mozilla/TTS/issues',
@ -75,12 +88,13 @@ setup(
'build_py': build_py,
'develop': develop,
},
setup_requires=["numpy==1.15.4"],
install_requires=[
"scipy >=0.19.0",
"torch >= 0.4.1",
"scipy>=0.19.0",
"torch>=0.4.1",
"numpy==1.15.4",
"librosa==0.6.2",
"unidecode==0.4.20",
"attrdict",
"tensorboardX",
"matplotlib",
"Pillow",

View File

@ -1,6 +1,5 @@
{
"tts_path":"TTS/tests/outputs/", // tts model root folder
"tts_file":"checkpoint_10.pth.tar", // tts checkpoint file
"tts_checkpoint":"checkpoint_10.pth.tar", // tts checkpoint file
"tts_config":"dummy_model_config.json", // tts config.json file
"tts_speakers": null, // json file listing speaker ids. null if no speaker embedding.
"wavernn_lib_path": null, // Rootpath to wavernn project folder to be imported. If this is null, model uses GL for speech synthesis.

View File

@ -20,6 +20,8 @@ class DemoServerTest(unittest.TestCase):
def test_in_out(self):
self._create_random_model()
config = load_config(os.path.join(get_tests_input_path(), 'server_config.json'))
config['tts_path'] = get_tests_output_path()
tts_root_path = get_tests_output_path()
config['tts_checkpoint'] = os.path.join(tts_root_path, config['tts_checkpoint'])
config['tts_config'] = os.path.join(tts_root_path, config['tts_config'])
synthesizer = Synthesizer(config)
synthesizer.tts("Better this test works!!")

View File

@ -138,8 +138,8 @@ class TestTTSDataset(unittest.TestCase):
# there is a slight difference between two matrices.
# TODO: Check this assert cond more in detail.
assert abs((abs(mel.T)
- abs(mel_dl[:-1])
).sum()) < 1e-5, (abs(mel.T) - abs(mel_dl[:-1])).sum()
- abs(mel_dl)
).sum()) < 1e-5, (abs(mel.T) - abs(mel_dl)).sum()
# check mel-spec correctness
mel_spec = mel_input[0].cpu().numpy()
@ -155,9 +155,9 @@ class TestTTSDataset(unittest.TestCase):
OUTPATH + '/linear_target_dataloader.wav')
# check the last time step to be zero padded
assert linear_input[0, -1].sum() == 0
assert linear_input[0, -1].sum() != 0
assert linear_input[0, -2].sum() != 0
assert mel_input[0, -1].sum() == 0
assert mel_input[0, -1].sum() != 0
assert mel_input[0, -2].sum() != 0
assert stop_target[0, -1] == 1
assert stop_target[0, -2] == 0
@ -187,9 +187,9 @@ class TestTTSDataset(unittest.TestCase):
idx = 1
# check the first item in the batch
assert linear_input[idx, -1].sum() == 0
assert linear_input[idx, -1].sum() != 0
assert linear_input[idx, -2].sum() != 0, linear_input
assert mel_input[idx, -1].sum() == 0
assert mel_input[idx, -1].sum() != 0
assert mel_input[idx, -2].sum() != 0, mel_input
assert stop_target[idx, -1] == 1
assert stop_target[idx, -2] == 0
@ -204,6 +204,6 @@ class TestTTSDataset(unittest.TestCase):
assert stop_target[1 - idx, -1] == 1
assert len(mel_lengths.shape) == 1
# check batch conditions
assert (linear_input * stop_target.unsqueeze(2)).sum() == 0
assert (mel_input * stop_target.unsqueeze(2)).sum() == 0
# check batch zero-frame conditions (zero-frame disabled)
# assert (linear_input * stop_target.unsqueeze(2)).sum() == 0
# assert (mel_input * stop_target.unsqueeze(2)).sum() == 0

32
tests/test_server_package.sh Executable file
View File

@ -0,0 +1,32 @@
#!/bin/bash
set -xe
if [[ ! -f tests/outputs/checkpoint_10.pth.tar ]]; then
echo "Missing dummy model in tests/outputs. This test needs to run after the Python unittests have been run."
exit 1
fi
python -m venv /tmp/venv
source /tmp/venv/bin/activate
pip install --quiet --upgrade pip setuptools wheel
rm -f dist/*.whl
python setup.py bdist_wheel --checkpoint tests/outputs/checkpoint_10.pth.tar --model_config tests/outputs/dummy_model_config.json
pip install --quiet dist/TTS*.whl
python -m TTS.server.server &
SERVER_PID=$!
echo 'Waiting for server...'
sleep 30
curl -o /tmp/audio.wav "http://localhost:5002/api/tts?text=synthesis%20schmynthesis"
python -c 'import sys; import wave; print(wave.open(sys.argv[1]).getnframes())' /tmp/audio.wav
kill $SERVER_PID
deactivate
rm -rf /tmp/venv
rm /tmp/audio.wav
rm dist/*.whl