Merge branch 'reuben-model-pkg-dev' into dev

2019-12-09 12:12:04 +01:00 · 2019-12-09 12:12:04 +01:00 · 956f8b7672
parent 649312fc78 94d1c93790
commit 956f8b7672
11 changed files with 177 additions and 56 deletions
--- a/.travis/script
+++ b/.travis/script
@ -14,4 +14,6 @@ if [[ "$TEST_SUITE" == "unittest" ]]; then
  pushd tts_namespace
  python -m unittest
  popd
+  # Test server package
+  ./tests/test_server_package.sh
 fi
--- a/server/README.md
+++ b/server/README.md
@ -1,9 +1,34 @@
 ## TTS example web-server
-Steps to run:
-1. Download one of the models given on the main page. Click [here](https://drive.google.com/drive/folders/1Q6BKeEkZyxSGsocK2p_mqgzLwlNvbHFJ?usp=sharing) for the lastest model.
-2. Checkout the corresponding commit history or use ```server``` branch if you like to use the latest model.
-3. Set the paths and the other options in the file ```server/conf.json```.
-4. Run the server ```python server/server.py -c server/conf.json```. (Requires Flask)
-5. Go to ```localhost:[given_port]``` and enjoy.

-For high quality results, please use the library versions shown in the ```requirements.txt``` file.
+You'll need a model package (Zip file, includes TTS Python wheel, model files, server configuration, and optional nginx/uwsgi configs). Publicly available models are listed [here](https://github.com/mozilla/TTS/wiki/Released-Models).
+
+Instructions below are based on a Ubuntu 18.04 machine, but it should be simple to adapt the package names to other distros if needed. Python 3.6 is recommended, as some of the dependencies' versions predate Python 3.7 and will force building from source, which requires extra dependencies and is not guaranteed to work.
+
+Development server:
+
+1. apt-get install -y espeak libsndfile1 python3-venv
+2. python3 -m venv /tmp/venv
+3. source /tmp/venv/bin/activate
+4. pip install -U pip setuptools wheel
+5. # Download model package
+6. unzip model.zip
+7. pip install -U ./TTS*.whl
+8. python -m TTS.server.server
+
+You can now browse to http://localhost:5002
+
+Running with nginx/uwsgi:
+
+1. apt-get install -y uwsgi uwsgi-plugin-python3 nginx espeak libsndfile1 python3-venv
+2. python3 -m venv /tmp/venv
+3. source /tmp/venv/bin/activate
+4. pip install -U pip setuptools wheel
+5. # Download model package
+6. unzip model.zip
+7. pip install -U ./TTS*.whl
+8. cp tts_site_nginx /etc/nginx/sites-enabled/default
+9. service nginx restart
+10. uwsgi --ini uwsgi.ini
+
+You can now browse to http://localhost:80 (edit the port in /etc/nginx/sites-enabled/tts_site_nginx).
+Configure number of workers (number of requests that will be processed in parallel) in uwsgi.ini, `processes` setting.
--- a/server/server.py
+++ b/server/server.py
@ -1,17 +1,45 @@
 #!flask/bin/python
 import argparse
-from synthesizer import Synthesizer
-from TTS.utils.generic_utils import load_config
+import os
+
 from flask import Flask, request, render_template, send_file
+from TTS.server.synthesizer import Synthesizer
+
+
+def create_argparser():
+    def convert_boolean(x):
+        return x.lower() in ['true', '1', 'yes']
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--tts_checkpoint', type=str, help='path to TTS checkpoint file')
+    parser.add_argument('--tts_config', type=str, help='path to TTS config.json file')
+    parser.add_argument('--tts_speakers', type=str, help='path to JSON file containing speaker ids, if speaker ids are used in the model')
+    parser.add_argument('--wavernn_lib_path', type=str, help='path to WaveRNN project folder to be imported. If this is not passed, model uses Griffin-Lim for synthesis.')
+    parser.add_argument('--wavernn_file', type=str, help='path to WaveRNN checkpoint file.')
+    parser.add_argument('--wavernn_config', type=str, help='path to WaveRNN config file.')
+    parser.add_argument('--is_wavernn_batched', type=convert_boolean, default=False, help='true to use batched WaveRNN.')
+    parser.add_argument('--port', type=int, default=5002, help='port to listen on.')
+    parser.add_argument('--use_cuda', type=convert_boolean, default=False, help='true to use CUDA.')
+    parser.add_argument('--debug', type=convert_boolean, default=False, help='true to enable Flask debug mode.')
+    return parser
+
+
+config = None
+synthesizer = None
+
+embedded_model_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'model')
+checkpoint_file = os.path.join(embedded_model_folder, 'checkpoint.pth.tar')
+config_file = os.path.join(embedded_model_folder, 'config.json')
+
+if os.path.isfile(checkpoint_file) and os.path.isfile(config_file):
+    # Use default config with embedded model files
+    config = create_argparser().parse_args([])
+    config.tts_checkpoint = checkpoint_file
+    config.tts_config = config_file
+    synthesizer = Synthesizer(config)

-parser = argparse.ArgumentParser()
-parser.add_argument(
-    '-c', '--config_path', type=str, help='path to config file for training')
-args = parser.parse_args()

-config = load_config(args.config_path)
 app = Flask(__name__)
-synthesizer = Synthesizer(config)

@app.route('/')
 def index():
@ -27,4 +55,8 @@ def tts():


 if __name__ == '__main__':
+    if not config or not synthesizer:
+        args = create_argparser().parse_args()
+        synthesizer = Synthesizer(args)
+
    app.run(debug=config.debug, host='0.0.0.0', port=config.port)
--- a/server/synthesizer.py
+++ b/server/synthesizer.py
@ -24,19 +24,20 @@ class Synthesizer(object):
    def __init__(self, config):
        self.wavernn = None
        self.config = config
-        self.use_cuda = config.use_cuda
+        self.use_cuda = self.config.use_cuda
        if self.use_cuda:
            assert torch.cuda.is_available(), "CUDA is not availabe on this machine."
-        self.load_tts(self.config.tts_path, self.config.tts_file, self.config.tts_config, config.use_cuda)
+        self.load_tts(self.config.tts_checkpoint, self.config.tts_config,
+                      self.config.use_cuda)
        if self.config.wavernn_lib_path:
-            self.load_wavernn(config.wavernn_lib_path, config.wavernn_path, config.wavernn_file, config.wavernn_config, config.use_cuda)
+            self.load_wavernn(self.config.wavernn_lib_path, self.config.wavernn_path,
+                              self.config.wavernn_file, self.config.wavernn_config,
+                              self.config.use_cuda)

-    def load_tts(self, model_path, model_file, model_config, use_cuda):
-        tts_config = os.path.join(model_path, model_config)
-        self.model_file = os.path.join(model_path, model_file)
+    def load_tts(self, tts_checkpoint, tts_config, use_cuda):
        print(" > Loading TTS model ...")
        print(" | > model config: ", tts_config)
-        print(" | > model file: ", model_file)
+        print(" | > checkpoint file: ", tts_checkpoint)
        self.tts_config = load_config(tts_config)
        self.use_phonemes = self.tts_config.use_phonemes
        self.ap = AudioProcessor(**self.tts_config.audio)
@ -52,7 +53,8 @@ class Synthesizer(object):
            num_speakers = 0
        self.tts_model = setup_model(self.input_size, num_speakers=num_speakers, c=self.tts_config) 
        # load model state
-        cp = torch.load(self.model_file)
+        map_location = None if use_cuda else torch.device('cpu')
+        cp = torch.load(tts_checkpoint, map_location=map_location)
        # load the model
        self.tts_model.load_state_dict(cp['model'])
        if use_cuda:
--- a/server/templates/index.html
+++ b/server/templates/index.html
@ -57,6 +57,7 @@
      <div class="row">
        <div class="col-lg-12 text-center">
          <img class="mt-5" src="https://user-images.githubusercontent.com/1402048/52643646-c2102980-2edd-11e9-8c37-b72f3c89a640.png" alt=></img>
+          <h1 class="mt-5">Mozilla TTS</h1>
          <ul class="list-unstyled">
          </ul>
          <input id="text" placeholder="Type here..." size=45 type="text" name="text"> 
@ -68,12 +69,10 @@
    </div>

    <!-- Bootstrap core JavaScript -->
-    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
-    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css"></script>
    <script>
            function q(selector) {return document.querySelector(selector)}
            q('#text').focus()
-            q('#speak-button').addEventListener('click', function(e) {
+            function do_tts(e) {
                text = q('#text').value
                if (text) {
                    q('#message').textContent = 'Synthesizing...'
@ -83,6 +82,12 @@
                }
                e.preventDefault()
                return false
+            }
+            q('#speak-button').addEventListener('click', do_tts)
+            q('#text').addEventListener('keyup', function(e) {
+              if (e.keyCode == 13) { // enter
+                do_tts(e)
+              }
            })
            function synthesize(text) {
                fetch('/api/tts?text=' + encodeURIComponent(text), {cache: 'no-cache'})
--- a/setup.cfg
+++ b/setup.cfg
@ -0,0 +1,8 @@
+[build_py]
+build-lib=temp_build
+
+[bdist_wheel]
+bdist-dir=temp_build
+
+[install_lib]
+build-dir=temp_build
--- a/setup.py
+++ b/setup.py
@ -1,10 +1,23 @@
 #!/usr/bin/env python

+import argparse
+import os
+import shutil
+import subprocess
+import sys
+
 from setuptools import setup, find_packages
 import setuptools.command.develop
 import setuptools.command.build_py
-import os
-import subprocess
+
+
+parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
+parser.add_argument('--checkpoint', type=str, help='Path to checkpoint file to embed in wheel.')
+parser.add_argument('--model_config', type=str, help='Path to model configuration file to embed in wheel.')
+args, unknown_args = parser.parse_known_args()
+
+# Remove our arguments from argv so that setuptools doesn't see them
+sys.argv = [sys.argv[0]] + unknown_args

 version = '0.0.1'

@ -42,20 +55,17 @@ class develop(setuptools.command.develop.develop):
        setuptools.command.develop.develop.run(self)


-def create_readme_rst():
-    try:
-        subprocess.check_call(
-            [
-                "pandoc", "--from=markdown", "--to=rst", "--output=README.rst",
-                "README.md"
-            ],
-            cwd=cwd)
-        print("Generated README.rst from README.md using pandoc.")
-    except subprocess.CalledProcessError:
-        pass
-    except OSError:
-        pass
+package_data = ['server/templates/*']

+if 'bdist_wheel' in unknown_args and args.checkpoint and args.model_config:
+    print('Embedding model in wheel file...')
+    model_dir = os.path.join('server', 'model')
+    os.makedirs(model_dir, exist_ok=True)
+    embedded_checkpoint_path = os.path.join(model_dir, 'checkpoint.pth.tar')
+    shutil.copy(args.checkpoint, embedded_checkpoint_path)
+    embedded_config_path = os.path.join(model_dir, 'config.json')
+    shutil.copy(args.model_config, embedded_config_path)
+    package_data.extend([embedded_checkpoint_path, embedded_config_path])

 setup(
    name='TTS',
@ -65,6 +75,9 @@ setup(
    license='MPL-2.0',
    package_dir={'': 'tts_namespace'},
    packages=find_packages('tts_namespace'),
+    package_data={
+        'TTS': package_data,
+    },
    project_urls={
        'Documentation': 'https://github.com/mozilla/TTS/wiki',
        'Tracker': 'https://github.com/mozilla/TTS/issues',
@ -75,12 +88,13 @@ setup(
        'build_py': build_py,
        'develop': develop,
    },
-    setup_requires=["numpy==1.15.4"],
    install_requires=[
-        "scipy >=0.19.0",
-        "torch >= 0.4.1",
+        "scipy>=0.19.0",
+        "torch>=0.4.1",
+        "numpy==1.15.4",
        "librosa==0.6.2",
        "unidecode==0.4.20",
+        "attrdict",
        "tensorboardX",
        "matplotlib",
        "Pillow",
--- a/tests/inputs/server_config.json
+++ b/tests/inputs/server_config.json
@ -1,6 +1,5 @@
 {
-    "tts_path":"TTS/tests/outputs/",  // tts model root folder
-    "tts_file":"checkpoint_10.pth.tar",     // tts checkpoint file
+    "tts_checkpoint":"checkpoint_10.pth.tar",     // tts checkpoint file
    "tts_config":"dummy_model_config.json",     // tts config.json file
    "tts_speakers": null,           // json file listing speaker ids. null if no speaker embedding.
    "wavernn_lib_path": null,   // Rootpath to wavernn project folder to be imported. If this is null, model uses GL for speech synthesis.
--- a/tests/test_demo_server.py
+++ b/tests/test_demo_server.py
@ -20,6 +20,8 @@ class DemoServerTest(unittest.TestCase):
    def test_in_out(self):
        self._create_random_model()
        config = load_config(os.path.join(get_tests_input_path(), 'server_config.json'))
-        config['tts_path'] = get_tests_output_path()
+        tts_root_path = get_tests_output_path()
+        config['tts_checkpoint'] = os.path.join(tts_root_path, config['tts_checkpoint'])
+        config['tts_config'] = os.path.join(tts_root_path, config['tts_config'])
        synthesizer = Synthesizer(config)
        synthesizer.tts("Better this test works!!")
--- a/tests/test_loader.py
+++ b/tests/test_loader.py
@ -138,8 +138,8 @@ class TestTTSDataset(unittest.TestCase):
                # there is a slight difference between two matrices.
                # TODO: Check this assert cond more in detail.
                assert abs((abs(mel.T)
-                            - abs(mel_dl[:-1])
-                            ).sum()) < 1e-5, (abs(mel.T) - abs(mel_dl[:-1])).sum()
+                            - abs(mel_dl)
+                            ).sum()) < 1e-5, (abs(mel.T) - abs(mel_dl)).sum()

                # check mel-spec correctness
                mel_spec = mel_input[0].cpu().numpy()
@ -155,9 +155,9 @@ class TestTTSDataset(unittest.TestCase):
                            OUTPATH + '/linear_target_dataloader.wav')

                # check the last time step to be zero padded
-                assert linear_input[0, -1].sum() == 0
+                assert linear_input[0, -1].sum() != 0
                assert linear_input[0, -2].sum() != 0
-                assert mel_input[0, -1].sum() == 0
+                assert mel_input[0, -1].sum() != 0
                assert mel_input[0, -2].sum() != 0
                assert stop_target[0, -1] == 1
                assert stop_target[0, -2] == 0
@ -187,9 +187,9 @@ class TestTTSDataset(unittest.TestCase):
                    idx = 1

                # check the first item in the batch
-                assert linear_input[idx, -1].sum() == 0
+                assert linear_input[idx, -1].sum() != 0
                assert linear_input[idx, -2].sum() != 0, linear_input
-                assert mel_input[idx, -1].sum() == 0
+                assert mel_input[idx, -1].sum() != 0
                assert mel_input[idx, -2].sum() != 0, mel_input
                assert stop_target[idx, -1] == 1
                assert stop_target[idx, -2] == 0
@ -204,6 +204,6 @@ class TestTTSDataset(unittest.TestCase):
                assert stop_target[1 - idx, -1] == 1
                assert len(mel_lengths.shape) == 1

-                # check batch conditions
-                assert (linear_input * stop_target.unsqueeze(2)).sum() == 0
-                assert (mel_input * stop_target.unsqueeze(2)).sum() == 0
+                # check batch zero-frame conditions (zero-frame disabled)
+                # assert (linear_input * stop_target.unsqueeze(2)).sum() == 0
+                # assert (mel_input * stop_target.unsqueeze(2)).sum() == 0
--- a/tests/test_server_package.sh
+++ b/tests/test_server_package.sh
@ -0,0 +1,32 @@
+#!/bin/bash
+set -xe
+
+if [[ ! -f tests/outputs/checkpoint_10.pth.tar ]]; then
+    echo "Missing dummy model in tests/outputs. This test needs to run after the Python unittests have been run."
+    exit 1
+fi
+
+python -m venv /tmp/venv
+source /tmp/venv/bin/activate
+pip install --quiet --upgrade pip setuptools wheel
+
+rm -f dist/*.whl
+python setup.py bdist_wheel --checkpoint tests/outputs/checkpoint_10.pth.tar --model_config tests/outputs/dummy_model_config.json
+pip install --quiet dist/TTS*.whl
+
+python -m TTS.server.server &
+SERVER_PID=$!
+
+echo 'Waiting for server...'
+sleep 30
+
+curl -o /tmp/audio.wav "http://localhost:5002/api/tts?text=synthesis%20schmynthesis"
+python -c 'import sys; import wave; print(wave.open(sys.argv[1]).getnframes())' /tmp/audio.wav
+
+kill $SERVER_PID
+
+deactivate
+rm -rf /tmp/venv
+
+rm /tmp/audio.wav
+rm dist/*.whl