From b116bdaefab8ad5054f943f28b8788b8440da3ca Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Tue, 14 Jul 2020 17:47:47 +0200
Subject: [PATCH 1/9] new requirements

---
 requirements.txt                    |  2 ++
 setup.py                            |  2 ++
 vocoder/tf/convert_melgan_tflite.py | 34 +++++++++++++++++++++++++++++
 vocoder/tf/utils/tflite.py          | 31 ++++++++++++++++++++++++++
 4 files changed, 69 insertions(+)
 create mode 100644 vocoder/tf/convert_melgan_tflite.py
 create mode 100644 vocoder/tf/utils/tflite.py

diff --git a/requirements.txt b/requirements.txt
index 959fe2d7..edd640b8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,3 +19,5 @@ soundfile
 nose==1.3.7
 cardboardlint==1.3.0
 pylint==2.5.3
+fuzzywuzzy
+gdown
diff --git a/setup.py b/setup.py
index b139dc20..11fef51c 100644
--- a/setup.py
+++ b/setup.py
@@ -98,6 +98,8 @@ requirements = {
         "nose==1.3.7",
         "cardboardlint==1.3.0",
         "pylint==2.5.3",
+        'fuzzywuzzy',
+        'gdown'
     ],
     'pip_install':[
         'tensorflow>=2.2.0',
diff --git a/vocoder/tf/convert_melgan_tflite.py b/vocoder/tf/convert_melgan_tflite.py
new file mode 100644
index 00000000..d84aae19
--- /dev/null
+++ b/vocoder/tf/convert_melgan_tflite.py
@@ -0,0 +1,34 @@
+# Convert Tensorflow Tacotron2 model to TF-Lite binary
+
+import argparse
+
+from TTS.utils.io import load_config
+from TTS.utils.text.symbols import symbols, phonemes
+from TTS.vocoder.tf.utils.generic_utils import setup_generator
+from TTS.vocoder.tf.utils.io import load_checkpoint
+from TTS.vocoder.tf.utils.tflite import convert_melgan_to_tflite
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--tf_model',
+                    type=str,
+                    help='Path to target torch model to be converted to TF.')
+parser.add_argument('--config_path',
+                    type=str,
+                    help='Path to config file of torch model.')
+parser.add_argument('--output_path',
+                    type=str,
+                    help='path to tflite output binary.')
+args = parser.parse_args()
+
+# Set constants
+CONFIG = load_config(args.config_path)
+
+# load the model
+model = setup_generator(CONFIG)
+model.build_inference()
+model = load_checkpoint(model, args.tf_model)
+
+# create tflite model
+tflite_model = convert_melgan_to_tflite(model, output_path=args.output_path)
+
diff --git a/vocoder/tf/utils/tflite.py b/vocoder/tf/utils/tflite.py
new file mode 100644
index 00000000..3669434e
--- /dev/null
+++ b/vocoder/tf/utils/tflite.py
@@ -0,0 +1,31 @@
+import tensorflow as tf
+
+
+
+def convert_melgan_to_tflite(model,
+                                output_path=None,
+                                experimental_converter=True):
+    """Convert Tensorflow MelGAN model to TFLite. Save a binary file if output_path is
+    provided, else return TFLite model."""
+
+    concrete_function = model.inference_tflite.get_concrete_function()
+    converter = tf.lite.TFLiteConverter.from_concrete_functions(
+        [concrete_function])
+    converter.experimental_new_converter = experimental_converter
+    converter.optimizations = []
+    converter.target_spec.supported_ops = [
+        tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS
+    ]
+    tflite_model = converter.convert()
+    print(f'Tflite Model size is {len(tflite_model) / (1024.0 * 1024.0)} MBs.')
+    if output_path is not None:
+        # same model binary if outputpath is provided
+        with open(output_path, 'wb') as f:
+            f.write(tflite_model)
+        return None
+    return tflite_model
+
+def load_tflite_model(tflite_path):
+    tflite_model = tf.lite.Interpreter(model_path=tflite_path)
+    tflite_model.allocate_tensors()
+    return tflite_model
\ No newline at end of file

From 2d596aa1403a958a52eb470a2647486eb6803966 Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Tue, 14 Jul 2020 17:48:17 +0200
Subject: [PATCH 2/9] tf_lite tacotron2 bug fix

---
 tf/convert_tacotron2_tflite.py | 4 +---
 tf/utils/tflite.py             | 1 +
 vocoder/tf/layers/pqmf.py      | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tf/convert_tacotron2_tflite.py b/tf/convert_tacotron2_tflite.py
index e06cac2b..fc46cc79 100644
--- a/tf/convert_tacotron2_tflite.py
+++ b/tf/convert_tacotron2_tflite.py
@@ -34,6 +34,4 @@ model = load_checkpoint(model, args.tf_model)
 model.decoder.set_max_decoder_steps(1000)
 
 # create tflite model
-tflite_model = convert_tacotron2_to_tflite(model, output_path=args.output_path)
-
-print(f'Tflite Model size is {len(tflite_model) / (1024.0 * 1024.0)} MBs.')
+tflite_model = convert_tacotron2_to_tflite(model, output_path=args.output_path)
\ No newline at end of file
diff --git a/tf/utils/tflite.py b/tf/utils/tflite.py
index 6c37f170..5e684b30 100644
--- a/tf/utils/tflite.py
+++ b/tf/utils/tflite.py
@@ -16,6 +16,7 @@ def convert_tacotron2_to_tflite(model,
         tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS
     ]
     tflite_model = converter.convert()
+    print(f'Tflite Model size is {len(tflite_model) / (1024.0 * 1024.0)} MBs.')
     if output_path is not None:
         # same model binary if outputpath is provided
         with open(output_path, 'wb') as f:
diff --git a/vocoder/tf/layers/pqmf.py b/vocoder/tf/layers/pqmf.py
index 6c47dfc4..c018971f 100644
--- a/vocoder/tf/layers/pqmf.py
+++ b/vocoder/tf/layers/pqmf.py
@@ -51,7 +51,7 @@ class PQMF(tf.keras.layers.Layer):
 
     def synthesis(self, x):
         """
-        x : B x 1 x T
+        x : B x D x T
         """
         x = tf.transpose(x, perm=[0, 2, 1])
         x = tf.nn.conv1d_transpose(

From b2cc256dabca983f417204ae3a2b382e6d1d5cc9 Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Tue, 14 Jul 2020 17:48:44 +0200
Subject: [PATCH 3/9] tflite inference for melgan models

---
 vocoder/tf/models/melgan_generator.py         | 19 +++++++++++++++-
 .../tf/models/multiband_melgan_generator.py   | 22 ++++++++++++++-----
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/vocoder/tf/models/melgan_generator.py b/vocoder/tf/models/melgan_generator.py
index bf67f3d2..168fd29e 100644
--- a/vocoder/tf/models/melgan_generator.py
+++ b/vocoder/tf/models/melgan_generator.py
@@ -108,4 +108,21 @@ class MelganGenerator(tf.keras.models.Model):
 
     def build_inference(self):
         x = tf.random.uniform((1, self.in_channels, 4), dtype=tf.float32)
-        self(x, training=False)
\ No newline at end of file
+        self(x, training=False)
+
+    @tf.function(
+        experimental_relax_shapes=True,
+        input_signature=[
+            tf.TensorSpec([1, None, None], dtype=tf.float32),
+        ],)
+    def inference_tflite(self, c):
+        c = tf.transpose(c, perm=[0, 2, 1])
+        c = tf.expand_dims(c, 2)
+        # FIXME: TF had no replicate padding as in Torch
+        # c = tf.pad(c, [[0, 0], [self.inference_padding, self.inference_padding], [0, 0], [0, 0]], "REFLECT")
+        o = c
+        for layer in self.model_layers:
+            o = layer(o)
+        # o = self.model_layers(c)
+        o = tf.transpose(o, perm=[0, 3, 2, 1])
+        return o[:, :, 0, :]
\ No newline at end of file
diff --git a/vocoder/tf/models/multiband_melgan_generator.py b/vocoder/tf/models/multiband_melgan_generator.py
index c63ed06a..bdd333ed 100644
--- a/vocoder/tf/models/multiband_melgan_generator.py
+++ b/vocoder/tf/models/multiband_melgan_generator.py
@@ -30,11 +30,6 @@ class MultibandMelganGenerator(MelganGenerator):
     def pqmf_synthesis(self, x):
         return self.pqmf_layer.synthesis(x)
 
-    # def call(self, c, training=False):
-    #     if training:
-    #         raise NotImplementedError()
-    #     return self.inference(c)
-
     def inference(self, c):
         c = tf.transpose(c, perm=[0, 2, 1])
         c = tf.expand_dims(c, 2)
@@ -46,3 +41,20 @@ class MultibandMelganGenerator(MelganGenerator):
         o = tf.transpose(o, perm=[0, 3, 2, 1])
         o = self.pqmf_layer.synthesis(o[:, :, 0, :])
         return o
+
+    @tf.function(
+        experimental_relax_shapes=True,
+        input_signature=[
+            tf.TensorSpec([1, 80, None], dtype=tf.float32),
+        ],)
+    def inference_tflite(self, c):
+        c = tf.transpose(c, perm=[0, 2, 1])
+        c = tf.expand_dims(c, 2)
+        # FIXME: TF had no replicate padding as in Torch
+        # c = tf.pad(c, [[0, 0], [self.inference_padding, self.inference_padding], [0, 0], [0, 0]], "REFLECT")
+        o = c
+        for layer in self.model_layers:
+            o = layer(o)
+        o = tf.transpose(o, perm=[0, 3, 2, 1])
+        o = self.pqmf_layer.synthesis(o[:, :, 0, :])
+        return o

From 664f42df33de48f9d6ab8b5504d129393e3e1204 Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Tue, 14 Jul 2020 17:55:46 +0200
Subject: [PATCH 4/9] pylint fixes and add missing requirement

---
 requirements.txt                    | 1 +
 server/synthesizer.py               | 1 -
 setup.py                            | 1 +
 vocoder/tf/convert_melgan_tflite.py | 1 -
 vocoder/tf/utils/tflite.py          | 6 +++---
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index edd640b8..ec7a1092 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,6 +15,7 @@ tqdm
 inflect
 pysbd
 bokeh==1.4.0
+pysbd
 soundfile
 nose==1.3.7
 cardboardlint==1.3.0
diff --git a/server/synthesizer.py b/server/synthesizer.py
index 0c402609..0f743d87 100644
--- a/server/synthesizer.py
+++ b/server/synthesizer.py
@@ -1,5 +1,4 @@
 import io
-import re
 import sys
 import time
 
diff --git a/setup.py b/setup.py
index 11fef51c..3f02dd09 100644
--- a/setup.py
+++ b/setup.py
@@ -93,6 +93,7 @@ requirements = {
         "inflect",
         "pysbd",
         "bokeh==1.4.0",
+        "pysbd",
         "soundfile",
         "phonemizer>=2.2.0",
         "nose==1.3.7",
diff --git a/vocoder/tf/convert_melgan_tflite.py b/vocoder/tf/convert_melgan_tflite.py
index d84aae19..9a652b57 100644
--- a/vocoder/tf/convert_melgan_tflite.py
+++ b/vocoder/tf/convert_melgan_tflite.py
@@ -3,7 +3,6 @@
 import argparse
 
 from TTS.utils.io import load_config
-from TTS.utils.text.symbols import symbols, phonemes
 from TTS.vocoder.tf.utils.generic_utils import setup_generator
 from TTS.vocoder.tf.utils.io import load_checkpoint
 from TTS.vocoder.tf.utils.tflite import convert_melgan_to_tflite
diff --git a/vocoder/tf/utils/tflite.py b/vocoder/tf/utils/tflite.py
index 3669434e..d0637596 100644
--- a/vocoder/tf/utils/tflite.py
+++ b/vocoder/tf/utils/tflite.py
@@ -1,10 +1,9 @@
 import tensorflow as tf
 
 
-
 def convert_melgan_to_tflite(model,
-                                output_path=None,
-                                experimental_converter=True):
+                             output_path=None,
+                             experimental_converter=True):
     """Convert Tensorflow MelGAN model to TFLite. Save a binary file if output_path is
     provided, else return TFLite model."""
 
@@ -25,6 +24,7 @@ def convert_melgan_to_tflite(model,
         return None
     return tflite_model
 
+
 def load_tflite_model(tflite_path):
     tflite_model = tf.lite.Interpreter(model_path=tflite_path)
     tflite_model.allocate_tensors()

From c9e2df14510a6413f2cc8dec3294f4f01f4a7b8a Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Wed, 15 Jul 2020 11:08:35 +0200
Subject: [PATCH 5/9] README.md update

---
 README.md | 38 ++++++++++++++------------------------
 1 file changed, 14 insertions(+), 24 deletions(-)

diff --git a/README.md b/README.md
index ce496707..4903d4b0 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,10 @@
 <p align="center"><img src="https://user-images.githubusercontent.com/1402048/52643646-c2102980-2edd-11e9-8c37-b72f3c89a640.png" data-canonical-src="![TTS banner](https://user-images.githubusercontent.com/1402048/52643646-c2102980-2edd-11e9-8c37-b72f3c89a640.png =250x250)
 " width="320" height="95" /></p>
 
+<center>
 <img src="https://travis-ci.org/mozilla/TTS.svg?branch=dev"/>
+[![Discourse](https://img.shields.io/badge/discourse-online-green.svg)](https://discourse.mozilla.org/c/tts)
+</center>
 
 This project is a part of [Mozilla Common Voice](https://voice.mozilla.org/en). TTS aims a deep learning based Text2Speech engine, low in cost and high in quality.
 
@@ -38,25 +41,26 @@ Vocoders:
 You can also help us implement more models. Some TTS related work can be found [here](https://github.com/erogol/TTS-papers).
 
 ## Features
-- High performance Deep Learning models for Text2Speech related tasks.
-    - Text2Speech models (Tacotron, Tacotron2).
+- High performance Deep Learning models for Text2Speech tasks.
+    - Text2Spec models (Tacotron, Tacotron2).
     - Speaker Encoder to compute speaker embeddings efficiently.
-    - Vocoder models (MelGAN, Multiband-MelGAN, GAN-TTS)
-- Support for multi-speaker TTS training.
-- Support for Multi-GPUs training.
-- Ability to convert Torch models to Tensorflow 2.0 for inference.
-- Released pre-trained models.
+    - Vocoder models (MelGAN, Multiband-MelGAN, GAN-TTS, ParallelWaveGAN)
 - Fast and efficient model training.
 - Detailed training logs on console and Tensorboard.
+- Support for multi-speaker TTS.
+- Efficient Multi-GPUs training.
+- Ability to convert PyTorch models to Tensorflow 2.0 and TFLite for inference.
+- Released models in PyTorch, Tensorflow and TFLite.
 - Tools to curate Text2Speech datasets under```dataset_analysis```.
 - Demo server for model testing.
 - Notebooks for extensive model benchmarking.
 - Modular (but not too much) code base enabling easy testing for new ideas.
 
-## Requirements and Installation
+## Main Requirements and Installation
 Highly recommended to use [miniconda](https://conda.io/miniconda.html) for easier installation.
   * python>=3.6
-  * pytorch>=0.4.1
+  * pytorch>=1.4.1
+  * tensorflow>=2.2
   * librosa
   * tensorboard
   * tensorboardX
@@ -107,21 +111,7 @@ Audio examples: [soundcloud](https://soundcloud.com/user-565970875/pocket-articl
 
 <img src="images/example_model_output.png?raw=true" alt="example_output" width="400"/>
 
-## Runtime
-The most time-consuming part is the vocoder algorithm (Griffin-Lim) which runs on CPU. By setting its number of iterations lower, you might have faster execution with a small loss of quality. Some of the experimental values are below.
-
-Sentence: "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."
-
-Audio length is approximately 6 secs.
-
-| Time (secs) | System | # GL iters | Model
-| ---- |:-------|:-----------| ---- |
-|2.00|GTX1080Ti|30|Tacotron|
-|3.01|GTX1080Ti|60|Tacotron|
-|3.57|CPU|60|Tacotron|
-|5.27|GTX1080Ti|60|Tacotron2|
-|6.50|CPU|60|Tacotron2|
-
+## [Mozilla TTS Tutorials and Notebooks](https://github.com/mozilla/TTS/wiki/TTS-Notebooks-and-Tutorials)
 
 ## Datasets and Data-Loading
 TTS provides a generic dataloader easy to use for new datasets. You need to write an preprocessor function to integrate your own dataset.Check ```datasets/preprocess.py``` to see some examples. After the function, you need to set ```dataset``` field in ```config.json```. Do not forget other data related fields too.

From 3ece0c49c34999f234cf668c52de4a5b9d5a2968 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eren=20G=C3=B6lge?= <egolge@mozilla.com>
Date: Wed, 15 Jul 2020 11:13:33 +0200
Subject: [PATCH 6/9] Update README.md

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 4903d4b0..d8f8ac93 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,10 @@
 <p align="center"><img src="https://user-images.githubusercontent.com/1402048/52643646-c2102980-2edd-11e9-8c37-b72f3c89a640.png" data-canonical-src="![TTS banner](https://user-images.githubusercontent.com/1402048/52643646-c2102980-2edd-11e9-8c37-b72f3c89a640.png =250x250)
 " width="320" height="95" /></p>
 
-<center>
-<img src="https://travis-ci.org/mozilla/TTS.svg?branch=dev"/>
-[![Discourse](https://img.shields.io/badge/discourse-online-green.svg)](https://discourse.mozilla.org/c/tts)
-</center>
+<p align='center'>
+    <img src="https://travis-ci.org/mozilla/TTS.svg?branch=dev"/>
+    <a href='https://discourse.mozilla.org/c/tts'><img src="https://img.shields.io/badge/discourse-online-green.svg"/></a>
+</p>
 
 This project is a part of [Mozilla Common Voice](https://voice.mozilla.org/en). TTS aims a deep learning based Text2Speech engine, low in cost and high in quality.
 

From c1d3b2a079d0be7a84833ce8d5572c6594760722 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eren=20G=C3=B6lge?= <egolge@mozilla.com>
Date: Wed, 15 Jul 2020 11:14:46 +0200
Subject: [PATCH 7/9] Update README.md

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d8f8ac93..5c617b2f 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,9 @@
     <a href='https://discourse.mozilla.org/c/tts'><img src="https://img.shields.io/badge/discourse-online-green.svg"/></a>
 </p>
 
-This project is a part of [Mozilla Common Voice](https://voice.mozilla.org/en). TTS aims a deep learning based Text2Speech engine, low in cost and high in quality.
+This project is a part of [Mozilla Common Voice](https://voice.mozilla.org/en). 
+
+Mozilla TTS aims a deep learning based Text2Speech engine, low in cost and high in quality.
 
 You can check some of synthesized voice samples from [here](https://erogol.github.io/ddc-samples/).
 

From fb406d6e4d95bda4d9a67162879a1f094dbccad7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eren=20G=C3=B6lge?= <egolge@mozilla.com>
Date: Wed, 15 Jul 2020 11:15:26 +0200
Subject: [PATCH 8/9] Update README.md

---
 README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/README.md b/README.md
index 5c617b2f..49b0dc58 100644
--- a/README.md
+++ b/README.md
@@ -156,8 +156,6 @@ In case of any error or intercepted execution, if there is no checkpoint yet und
 
 You can also enjoy Tensorboard,  if you point Tensorboard argument```--logdir``` to the experiment folder.
 
-## [Testing and Examples](https://github.com/mozilla/TTS/wiki/Examples-using-TTS)
-
 ## Contribution guidelines
 This repository is governed by Mozilla's code of conduct and etiquette guidelines. For more details, please read the [Mozilla Community Participation Guidelines.](https://www.mozilla.org/about/governance/policies/participation/)
 

From f9c36bbf3260de353e8ace46c0a1087d00536aa0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eren=20G=C3=B6lge?= <egolge@mozilla.com>
Date: Wed, 15 Jul 2020 11:20:23 +0200
Subject: [PATCH 9/9] Update README.md

---
 README.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 49b0dc58..31bfe73c 100644
--- a/README.md
+++ b/README.md
@@ -116,9 +116,11 @@ Audio examples: [soundcloud](https://soundcloud.com/user-565970875/pocket-articl
 ## [Mozilla TTS Tutorials and Notebooks](https://github.com/mozilla/TTS/wiki/TTS-Notebooks-and-Tutorials)
 
 ## Datasets and Data-Loading
-TTS provides a generic dataloader easy to use for new datasets. You need to write an preprocessor function to integrate your own dataset.Check ```datasets/preprocess.py``` to see some examples. After the function, you need to set ```dataset``` field in ```config.json```. Do not forget other data related fields too.
+TTS provides a generic dataloader easy to use for your custom dataset. 
+You just need to write a simple function to format the dataset. Check ```datasets/preprocess.py``` to see some examples. 
+After that, you need to set ```dataset``` fields in ```config.json```.
 
-Some of the open-sourced datasets that we successfully applied TTS, are linked below.
+Some of the public datasets that we successfully applied TTS:
 
 - [LJ Speech](https://keithito.com/LJ-Speech-Dataset/)
 - [Nancy](http://www.cstr.ed.ac.uk/projects/blizzard/2011/lessac_blizzard2011/)