From 6a8052fd722e9fc604ebe005c5448a503a5dafa2 Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Sat, 11 Jul 2020 03:57:37 +0200
Subject: [PATCH 01/12] new scale stats

---
 tests/inputs/scale_stats.npy | Bin 10479 -> 10479 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/tests/inputs/scale_stats.npy b/tests/inputs/scale_stats.npy
index 5368ecb25445401efae40d8dd8c640aec15fd9c4..10015de5a9075c34875dcbab6e3e893e30b6d583 100644
GIT binary patch
delta 317
zcmX}oy-EW?5C`xT6vcoxLJAwvB8NpljqxkdQ;QVm1=GlO%f{^G7H;=;cJ~O_*rvZ1
zk-UIU;xptGoU9+^{P^={W|pJn=;NvNzD)?3NzDqOr#^?1l&sU0<)!(v)0C{yU9`+{
zC4QAD2WLaN^Up4qnO|vVNE@h*D^mz&xfgJr9=-%LRtRNs?xch608{l>tJ%Fs_Co)p
zWZp`}oWyNj1zgVQCT4<7#&DJN=myTFoCyop^>K7?bc42qrYpsiC>->X=KwgcOi2y>
zT1CzF?I8Y)F-OSHa&oG9!E|XA+|9$m&U5dW;rN8OUPPV!`_Pl&QBJ-Ic)-t875DHs
GZhr#@W?x$X

delta 335
zcmYMwy-ve05C?FBKzs^>#LyA3v=b6g%0~+X7&?ShbV1%Mhd7O;*zws8Wn=B07vK?i
z10Jcbz~QRGi{+pGy7S$9JRg5Qw|kp}5M3*lTLEyItgO;)l&RFL6nf$dI7`S{u%a?c
zJ8w+8_1PWKJ<R5oE5TeLbI+86ixJ&HbzGTJFw4DwUUK{z&{!dqDY%mkE{B*3hwkEK
zMUK_qMai-huHr+wkC;`=TdA0n_?b5W*E70-94tFX;U*r?bp%sQg@u08>KqjVwB=Ar
zln!p=mjL)+xsn?0{yYB#7tTN6ei;2k2!t#qkQ2>IrYox;nuQfO&%I-Y<60h?uTFRC
Lv97s?r}X3(0N!9?


From 6448c87a553bb7cec50e471c5fd68c059df4f105 Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Sat, 11 Jul 2020 22:49:48 +0200
Subject: [PATCH 02/12] separate tf tests

---
 tf/tests/__init__.py                |   1 +
 tf/tests/test_layers_tf.py          |   0
 tf/tests/test_tacotron2_tf_model.py | 135 ++++++++++++++++++++++++++++
 3 files changed, 136 insertions(+)
 create mode 100644 tf/tests/__init__.py
 create mode 100644 tf/tests/test_layers_tf.py
 create mode 100644 tf/tests/test_tacotron2_tf_model.py

diff --git a/tf/tests/__init__.py b/tf/tests/__init__.py
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/tf/tests/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tf/tests/test_layers_tf.py b/tf/tests/test_layers_tf.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tf/tests/test_tacotron2_tf_model.py b/tf/tests/test_tacotron2_tf_model.py
new file mode 100644
index 00000000..dc904ea2
--- /dev/null
+++ b/tf/tests/test_tacotron2_tf_model.py
@@ -0,0 +1,135 @@
+import os
+import torch
+import unittest
+import numpy as np
+import tensorflow as tf
+tf.get_logger().setLevel('INFO')
+
+from TTS.utils.io import load_config
+from TTS.tf.models.tacotron2 import Tacotron2
+from TTS.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model
+from TTS.utils.synthesis import run_model_tflite, text_to_seqvec
+
+#pylint: disable=unused-variable
+
+torch.manual_seed(1)
+use_cuda = torch.cuda.is_available()
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+file_path = os.path.dirname(os.path.realpath(__file__)).replace('/tf/','/')
+c = load_config(os.path.join(file_path, 'test_config.json'))
+
+
+class TacotronTFTrainTest(unittest.TestCase):
+
+    @staticmethod
+    def generate_dummy_inputs():
+        chars_seq = torch.randint(0, 24, (8, 128)).long().to(device)
+        chars_seq_lengths = torch.randint(100, 128, (8, )).long().to(device)
+        chars_seq_lengths = torch.sort(chars_seq_lengths, descending=True)[0]
+        mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
+        mel_postnet_spec = torch.rand(8, 30, c.audio['num_mels']).to(device)
+        mel_lengths = torch.randint(20, 30, (8, )).long().to(device)
+        stop_targets = torch.zeros(8, 30, 1).float().to(device)
+        speaker_ids = torch.randint(0, 5, (8, )).long().to(device)
+
+        chars_seq = tf.convert_to_tensor(chars_seq.cpu().numpy())
+        chars_seq_lengths = tf.convert_to_tensor(chars_seq_lengths.cpu().numpy())
+        mel_spec = tf.convert_to_tensor(mel_spec.cpu().numpy())
+        return chars_seq, chars_seq_lengths, mel_spec, mel_postnet_spec, mel_lengths,\
+            stop_targets, speaker_ids
+
+    def test_train_step(self):
+        ''' test forward pass '''
+        chars_seq, chars_seq_lengths, mel_spec, mel_postnet_spec, mel_lengths,\
+            stop_targets, speaker_ids = self.generate_dummy_inputs()
+
+        for idx in mel_lengths:
+            stop_targets[:, int(idx.item()):, 0] = 1.0
+
+        stop_targets = stop_targets.view(chars_seq.shape[0],
+                                         stop_targets.size(1) // c.r, -1)
+        stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
+
+        model = Tacotron2(num_chars=24, r=c.r, num_speakers=5)
+        # training pass
+        output = model(chars_seq, chars_seq_lengths, mel_spec, training=True)
+
+        # check model output shapes
+        assert np.all(output[0].shape == mel_spec.shape)
+        assert np.all(output[1].shape == mel_spec.shape)
+        assert output[2].shape[2] == chars_seq.shape[1]
+        assert output[2].shape[1] == (mel_spec.shape[1] // model.decoder.r)
+        assert output[3].shape[1] == (mel_spec.shape[1] // model.decoder.r)
+
+        # inference pass
+        output = model(chars_seq, training=False)
+
+    def test_forward_attention(self,):
+        chars_seq, chars_seq_lengths, mel_spec, mel_postnet_spec, mel_lengths,\
+            stop_targets, speaker_ids = self.generate_dummy_inputs()
+
+        for idx in mel_lengths:
+            stop_targets[:, int(idx.item()):, 0] = 1.0
+
+        stop_targets = stop_targets.view(chars_seq.shape[0],
+                                         stop_targets.size(1) // c.r, -1)
+        stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
+
+        model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, forward_attn=True)
+        # training pass
+        output = model(chars_seq, chars_seq_lengths, mel_spec, training=True)
+
+        # check model output shapes
+        assert np.all(output[0].shape == mel_spec.shape)
+        assert np.all(output[1].shape == mel_spec.shape)
+        assert output[2].shape[2] == chars_seq.shape[1]
+        assert output[2].shape[1] == (mel_spec.shape[1] // model.decoder.r)
+        assert output[3].shape[1] == (mel_spec.shape[1] // model.decoder.r)
+
+        # inference pass
+        output = model(chars_seq, training=False)
+
+    def test_tflite_conversion(self, ):
+        model = Tacotron2(num_chars=24,
+                          num_speakers=0,
+                          r=3,
+                          postnet_output_dim=80,
+                          decoder_output_dim=80,
+                          attn_type='original',
+                          attn_win=False,
+                          attn_norm='sigmoid',
+                          prenet_type='original',
+                          prenet_dropout=True,
+                          forward_attn=False,
+                          trans_agent=False,
+                          forward_attn_mask=False,
+                          location_attn=True,
+                          attn_K=0,
+                          separate_stopnet=True,
+                          bidirectional_decoder=False,
+                          enable_tflite=True)
+        model.build_inference()
+        convert_tacotron2_to_tflite(model, output_path='test_tacotron2.tflite', experimental_converter=True)
+        # init tflite model
+        tflite_model = load_tflite_model('test_tacotron2.tflite')
+        # fake input
+        inputs = tf.random.uniform([1, 4], maxval=10, dtype=tf.int32)
+        # run inference
+        # get input and output details
+        input_details = tflite_model.get_input_details()
+        output_details = tflite_model.get_output_details()
+        # reshape input tensor for the new input shape
+        tflite_model.resize_tensor_input(input_details[0]['index'], inputs.shape)
+        tflite_model.allocate_tensors()
+        detail = input_details[0]
+        input_shape = detail['shape']
+        tflite_model.set_tensor(detail['index'], inputs)
+        # run the tflite_model
+        tflite_model.invoke()
+        # collect outputs
+        decoder_output = tflite_model.get_tensor(output_details[0]['index'])
+        postnet_output = tflite_model.get_tensor(output_details[1]['index'])
+        # remove tflite binary
+        os.remove('test_tacotron2.tflite')
+

From da3478bef1bcd213032f298b8a24e307e1439299 Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Sat, 11 Jul 2020 23:32:43 +0200
Subject: [PATCH 03/12] linter fix

---
 tf/tests/test_tacotron2_tf_model.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tf/tests/test_tacotron2_tf_model.py b/tf/tests/test_tacotron2_tf_model.py
index dc904ea2..03db194a 100644
--- a/tf/tests/test_tacotron2_tf_model.py
+++ b/tf/tests/test_tacotron2_tf_model.py
@@ -8,7 +8,6 @@ tf.get_logger().setLevel('INFO')
 from TTS.utils.io import load_config
 from TTS.tf.models.tacotron2 import Tacotron2
 from TTS.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model
-from TTS.utils.synthesis import run_model_tflite, text_to_seqvec
 
 #pylint: disable=unused-variable
 
@@ -16,7 +15,7 @@ torch.manual_seed(1)
 use_cuda = torch.cuda.is_available()
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
-file_path = os.path.dirname(os.path.realpath(__file__)).replace('/tf/','/')
+file_path = os.path.dirname(os.path.realpath(__file__)).replace('/tf/', '/')
 c = load_config(os.path.join(file_path, 'test_config.json'))
 
 
@@ -90,7 +89,7 @@ class TacotronTFTrainTest(unittest.TestCase):
         # inference pass
         output = model(chars_seq, training=False)
 
-    def test_tflite_conversion(self, ):
+    def test_tflite_conversion(self, ):  #pylint:disable=no-self-use
         model = Tacotron2(num_chars=24,
                           num_speakers=0,
                           r=3,
@@ -114,13 +113,13 @@ class TacotronTFTrainTest(unittest.TestCase):
         # init tflite model
         tflite_model = load_tflite_model('test_tacotron2.tflite')
         # fake input
-        inputs = tf.random.uniform([1, 4], maxval=10, dtype=tf.int32)
+        inputs = tf.random.uniform([1, 4], maxval=10, dtype=tf.int32)  #pylint:disable=unexpected-keyword-arg
         # run inference
         # get input and output details
         input_details = tflite_model.get_input_details()
         output_details = tflite_model.get_output_details()
         # reshape input tensor for the new input shape
-        tflite_model.resize_tensor_input(input_details[0]['index'], inputs.shape)
+        tflite_model.resize_tensor_input(input_details[0]['index'], inputs.shape)  #pylint:disable=unexpected-keyword-arg
         tflite_model.allocate_tensors()
         detail = input_details[0]
         input_shape = detail['shape']

From 3c626a24c2bc03a7a0daf26691c789c143ed0903 Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Sat, 11 Jul 2020 23:33:15 +0200
Subject: [PATCH 04/12] tf init

---
 tf/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 tf/__init__.py

diff --git a/tf/__init__.py b/tf/__init__.py
new file mode 100644
index 00000000..e69de29b

From 717484f0158bbb57e1b6fb7a5f5db69295f65870 Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Sun, 12 Jul 2020 02:01:24 +0200
Subject: [PATCH 05/12] use tf-nightly

---
 requirements_tests.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements_tests.txt b/requirements_tests.txt
index c3c32fd2..92ddadb2 100644
--- a/requirements_tests.txt
+++ b/requirements_tests.txt
@@ -1,7 +1,7 @@
 numpy>=1.16.0
 numba==0.48
 torch>=0.4.1
-tensorflow>=2.2
+tf-nightly
 librosa>=0.5.1
 Unidecode>=0.4.20
 tensorboard

From a03ae8e99adadd8887ede9562a087a2630a56a8a Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Sun, 12 Jul 2020 10:57:45 +0200
Subject: [PATCH 06/12] server fix and remove pwgan use

---
 server/server.py      | 26 ++++++++------------------
 server/synthesizer.py |  5 +++--
 2 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/server/server.py b/server/server.py
index 43f1b3c4..fe079e7e 100644
--- a/server/server.py
+++ b/server/server.py
@@ -15,12 +15,9 @@ def create_argparser():
     parser.add_argument('--tts_config', type=str, help='path to TTS config.json file')
     parser.add_argument('--tts_speakers', type=str, help='path to JSON file containing speaker ids, if speaker ids are used in the model')
     parser.add_argument('--wavernn_lib_path', type=str, default=None, help='path to WaveRNN project folder to be imported. If this is not passed, model uses Griffin-Lim for synthesis.')
-    parser.add_argument('--wavernn_file', type=str, default=None, help='path to WaveRNN checkpoint file.')
+    parser.add_argument('--wavernn_checkpoint', type=str, default=None, help='path to WaveRNN checkpoint file.')
     parser.add_argument('--wavernn_config', type=str, default=None, help='path to WaveRNN config file.')
     parser.add_argument('--is_wavernn_batched', type=convert_boolean, default=False, help='true to use batched WaveRNN.')
-    parser.add_argument('--pwgan_lib_path', type=str, default=None, help='path to ParallelWaveGAN project folder to be imported. If this is not passed, model uses Griffin-Lim for synthesis.')
-    parser.add_argument('--pwgan_file', type=str, default=None, help='path to ParallelWaveGAN checkpoint file.')
-    parser.add_argument('--pwgan_config', type=str, default=None, help='path to ParallelWaveGAN config file.')
     parser.add_argument('--vocoder_config', type=str, default=None, help='path to TTS.vocoder config file.')
     parser.add_argument('--vocoder_checkpoint', type=str, default=None, help='path to TTS.vocoder checkpoint file.')
     parser.add_argument('--port', type=int, default=5002, help='port to listen on.')
@@ -46,10 +43,6 @@ embedded_wavernn_folder = os.path.join(embedded_models_folder, 'wavernn')
 wavernn_checkpoint_file = os.path.join(embedded_wavernn_folder, 'checkpoint.pth.tar')
 wavernn_config_file = os.path.join(embedded_wavernn_folder, 'config.json')
 
-embedded_pwgan_folder = os.path.join(embedded_models_folder, 'pwgan')
-pwgan_checkpoint_file = os.path.join(embedded_pwgan_folder, 'checkpoint.pkl')
-pwgan_config_file = os.path.join(embedded_pwgan_folder, 'config.yml')
-
 args = create_argparser().parse_args()
 
 # If these were not specified in the CLI args, use default values with embedded model files
@@ -57,19 +50,16 @@ if not args.tts_checkpoint and os.path.isfile(tts_checkpoint_file):
     args.tts_checkpoint = tts_checkpoint_file
 if not args.tts_config and os.path.isfile(tts_config_file):
     args.tts_config = tts_config_file
-if not args.vocoder_checkpoint and os.path.isfile(tts_checkpoint_file):
-    args.tts_checkpoint = tts_checkpoint_file
-if not args.vocoder_config and os.path.isfile(tts_config_file):
-    args.tts_config = tts_config_file
 
-if not args.wavernn_file and os.path.isfile(wavernn_checkpoint_file):
-    args.wavernn_file = wavernn_checkpoint_file
+if not args.vocoder_checkpoint and os.path.isfile(vocoder_checkpoint_file):
+    args.vocoder_file = vocoder_checkpoint_file
+if not args.vocoder_config and os.path.isfile(vocoder_config_file):
+    args.vocoder_config = vocoder_config_file
+
+if not args.wavernn_checkpoint and os.path.isfile(wavernn_checkpoint_file):
+    args.wavernn_checkpoint = wavernn_checkpoint_file
 if not args.wavernn_config and os.path.isfile(wavernn_config_file):
     args.wavernn_config = wavernn_config_file
-if not args.pwgan_file and os.path.isfile(pwgan_checkpoint_file):
-    args.pwgan_file = pwgan_checkpoint_file
-if not args.pwgan_config and os.path.isfile(pwgan_config_file):
-    args.pwgan_config = pwgan_config_file
 
 synthesizer = Synthesizer(args)
 
diff --git a/server/synthesizer.py b/server/synthesizer.py
index b18d73ac..99819ccc 100644
--- a/server/synthesizer.py
+++ b/server/synthesizer.py
@@ -31,15 +31,16 @@ class Synthesizer(object):
         self.wavernn = None
         self.vocoder_model = None
         self.config = config
+        print(config)
         self.use_cuda = self.config.use_cuda
         if self.use_cuda:
             assert torch.cuda.is_available(), "CUDA is not availabe on this machine."
         self.load_tts(self.config.tts_checkpoint, self.config.tts_config,
                       self.config.use_cuda)
-        if self.config.vocoder_file:
+        if self.config.vocoder_checkpoint:
             self.load_vocoder(self.config.vocoder_checkpoint, self.config.vocoder_config, self.config.use_cuda)
         if self.config.wavernn_lib_path:
-            self.load_wavernn(self.config.wavernn_lib_path, self.config.wavernn_file,
+            self.load_wavernn(self.config.wavernn_lib_path, self.config.wavernn_checkpoint,
                               self.config.wavernn_config, self.config.use_cuda)
 
     def load_tts(self, tts_checkpoint, tts_config, use_cuda):

From 8ca7c53158436ca3ef6e93ed26165d868bc8d2fd Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Sun, 12 Jul 2020 15:35:28 +0200
Subject: [PATCH 07/12] update requirements and setup.py

---
 requirements.txt       | 20 ++++++++++++--------
 requirements_tests.txt | 18 +++++++++---------
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 1d505f10..fb3e2281 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,15 +1,19 @@
-numpy>=1.16.0
 torch>=1.5
-librosa>=0.5.1
-Unidecode>=0.4.20
-tensorboard
+tensorflow>=2.2
+numpy>=1.16.0
+scipy>=0.19.0
+numba==0.48
+librosa==0.6.2
+unidecode==0.4.20
+attrdict
 tensorboardX
 matplotlib
 Pillow
 flask
-scipy
 tqdm
-soundfile
-phonemizer
-bokeh==1.4.0
 inflect
+bokeh==1.4.0
+soundfile
+nose==1.3.7
+cardboardlint==1.3.0
+pylint==2.5.3
\ No newline at end of file
diff --git a/requirements_tests.txt b/requirements_tests.txt
index 92ddadb2..6823b172 100644
--- a/requirements_tests.txt
+++ b/requirements_tests.txt
@@ -1,18 +1,18 @@
+torch>=1.5
+tensorflow==2.3rc
 numpy>=1.16.0
+scipy>=0.19.0
 numba==0.48
-torch>=0.4.1
-tf-nightly
-librosa>=0.5.1
-Unidecode>=0.4.20
-tensorboard
+librosa==0.6.2
+unidecode==0.4.20
+attrdict
 tensorboardX
 matplotlib
 Pillow
 flask
-scipy
 tqdm
-soundfile
 inflect
-phonemizer
 bokeh==1.4.0
-nose
+soundfile
+nose==1.3.7
+cardboardlint==1.3.0
\ No newline at end of file

From 238cc424f4c856eb2db09f7463a687a00c5e3c4b Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Sun, 12 Jul 2020 15:35:51 +0200
Subject: [PATCH 08/12] fix server.py vocoder call

---
 server/server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/server.py b/server/server.py
index fe079e7e..bd23ea9c 100644
--- a/server/server.py
+++ b/server/server.py
@@ -52,7 +52,7 @@ if not args.tts_config and os.path.isfile(tts_config_file):
     args.tts_config = tts_config_file
 
 if not args.vocoder_checkpoint and os.path.isfile(vocoder_checkpoint_file):
-    args.vocoder_file = vocoder_checkpoint_file
+    args.vocoder_checkpoint = vocoder_checkpoint_file
 if not args.vocoder_config and os.path.isfile(vocoder_config_file):
     args.vocoder_config = vocoder_config_file
 

From 0ab73061a12cbd48b724ed1ca7b40bade6e4bead Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Sun, 12 Jul 2020 15:36:36 +0200
Subject: [PATCH 09/12] update setup.py install tf with external pip call

---
 setup.py | 72 +++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 19 deletions(-)

diff --git a/setup.py b/setup.py
index 462f0056..7e40f234 100644
--- a/setup.py
+++ b/setup.py
@@ -69,6 +69,41 @@ if 'bdist_wheel' in unknown_args and args.checkpoint and args.model_config:
     shutil.copy(args.model_config, embedded_config_path)
     package_data.extend([embedded_checkpoint_path, embedded_config_path])
 
+
+def pip_install(package_name):
+    subprocess.call(
+        [sys.executable, '-m', 'pip', 'install', package_name]
+    )
+
+
+requirements = {
+    'install_requires':[
+        "torch>=1.5",
+        "numpy>=1.16.0",
+        "numba==0.48",
+        "scipy>=0.19.0",
+        "librosa==0.6.2",
+        "unidecode==0.4.20",
+        "attrdict",
+        "tensorboardX",
+        "matplotlib",
+        "Pillow",
+        "flask",
+        "tqdm",
+        "inflect",
+        "bokeh==1.4.0",
+        "soundfile",
+        "phonemizer>=2.2.0",
+        "nose==1.3.7",
+        "cardboardlint==1.3.0",
+        "pylint==2.5.3",
+    ],
+    'pip_install':[
+        'tensorflow>=2.2.0',
+    ]
+}
+
+
 setup(
     name='TTS',
     version=version,
@@ -95,24 +130,23 @@ setup(
         'build_py': build_py,
         'develop': develop,
     },
-    install_requires=[
-        "scipy>=0.19.0",
-        "torch>=1.5",
-        "numpy>=1.16.0",
-        "librosa==0.6.2",
-        "unidecode==0.4.20",
-        "attrdict",
-        "tensorboardX",
-        "matplotlib",
-        "Pillow",
-        "flask",
-        "tqdm",
-        "inflect",
-        "bokeh==1.4.0",
-        "soundfile",
-        "phonemizer @ https://github.com/bootphon/phonemizer/tarball/master",
-    ],
-    dependency_links=[
-        "http://github.com/bootphon/phonemizer/tarball/master#egg=phonemizer-1.0.1"
+    install_requires=requirements['install_requires'],
+    python_requires='>=3.6.0',
+    classifiers=[
+        "Programming Language :: Python",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        'Development Status :: 3 - Alpha',
+        "Intended Audience :: Science/Research :: Developers",
+        "Operating System :: POSIX :: Linux",
+        'License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)',
+        "Topic :: Software Development :: Libraries :: Python Modules :: Speech :: Sound/Audio :: Multimedia :: Artificial Intelligence",
     ]
 )
+
+# for some reason having tensorflow in 'install_requires'
+# breaks some of the dependencies.
+for module in requirements['pip_install']:
+    pip_install(module)
\ No newline at end of file

From ad235f0481fba280807f777d1262267518bfd02a Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Sun, 12 Jul 2020 15:37:10 +0200
Subject: [PATCH 10/12] update test server_config and mitigate
 https://github.com/librosa/librosa/issues/1160 in server package test

---
 tests/inputs/server_config.json | 2 +-
 tests/test_server_package.sh    | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/inputs/server_config.json b/tests/inputs/server_config.json
index 9eb7f09f..0cb9b948 100644
--- a/tests/inputs/server_config.json
+++ b/tests/inputs/server_config.json
@@ -6,7 +6,7 @@
     "wavernn_file": null, // wavernn checkpoint file name
     "wavernn_config": null, // wavernn config file
     "vocoder_config":null,
-    "vocoder_file": null,
+    "vocoder_checkpoint": null,
     "is_wavernn_batched":true,
     "port": 5002,
     "use_cuda": false,
diff --git a/tests/test_server_package.sh b/tests/test_server_package.sh
index 9fe5e8b1..83ffc6f0 100755
--- a/tests/test_server_package.sh
+++ b/tests/test_server_package.sh
@@ -14,6 +14,9 @@ rm -f dist/*.whl
 python setup.py --quiet bdist_wheel --checkpoint tests/outputs/checkpoint_10.pth.tar --model_config tests/outputs/dummy_model_config.json
 pip install --quiet dist/TTS*.whl
 
+# this is related to https://github.com/librosa/librosa/issues/1160
+pip install numba==0.48
+
 python -m TTS.server.server &
 SERVER_PID=$!
 

From c33068ad4035ea0b1599c5210970334c5c588f94 Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Sun, 12 Jul 2020 16:09:03 +0200
Subject: [PATCH 11/12] use librosa 0.7.2 and fix vocoder datatset assert

---
 requirements.txt               | 2 +-
 requirements_tests.txt         | 2 +-
 setup.py                       | 2 +-
 vocoder/tests/test_datasets.py | 6 +++---
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index fb3e2281..03873061 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,7 @@ tensorflow>=2.2
 numpy>=1.16.0
 scipy>=0.19.0
 numba==0.48
-librosa==0.6.2
+librosa==0.7.2
 unidecode==0.4.20
 attrdict
 tensorboardX
diff --git a/requirements_tests.txt b/requirements_tests.txt
index 6823b172..cd5df3fa 100644
--- a/requirements_tests.txt
+++ b/requirements_tests.txt
@@ -3,7 +3,7 @@ tensorflow==2.3rc
 numpy>=1.16.0
 scipy>=0.19.0
 numba==0.48
-librosa==0.6.2
+librosa==0.7.2
 unidecode==0.4.20
 attrdict
 tensorboardX
diff --git a/setup.py b/setup.py
index 7e40f234..bfc59516 100644
--- a/setup.py
+++ b/setup.py
@@ -82,7 +82,7 @@ requirements = {
         "numpy>=1.16.0",
         "numba==0.48",
         "scipy>=0.19.0",
-        "librosa==0.6.2",
+        "librosa==0.7.2",
         "unidecode==0.4.20",
         "attrdict",
         "tensorboardX",
diff --git a/vocoder/tests/test_datasets.py b/vocoder/tests/test_datasets.py
index 5d409b3f..43d0d3de 100644
--- a/vocoder/tests/test_datasets.py
+++ b/vocoder/tests/test_datasets.py
@@ -59,9 +59,9 @@ def gan_dataset_case(batch_size, seq_len, hop_len, conv_pad, return_segments, us
                     audio = wav1[idx].squeeze()
                     feat = feat1[idx]
                     mel = ap.melspectrogram(audio)
-                    # the first 2 and the last frame is skipped due to the padding
-                    # applied in spec. computation.
-                    assert (feat - mel[:, :feat1.shape[-1]])[:, 2:-1].sum() == 0, f' [!] {(feat - mel[:, :feat1.shape[-1]])[:, 2:-1].sum()}'
+                    # the first 2 and the last 2 frames are skipped due to the padding
+                    # differences in stft
+                    assert (feat - mel[:, :feat1.shape[-1]])[:, 2:-2].sum() <= 0, f' [!] {(feat - mel[:, :feat1.shape[-1]])[:, 2:-2].sum()}'
 
             count_iter += 1
             # if count_iter == max_iter:

From 98becfe973eab8aa665d47398ba21ea2ba7e04c4 Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Sun, 12 Jul 2020 16:19:05 +0200
Subject: [PATCH 12/12] phonemizer in requirements

---
 requirements.txt       | 1 +
 requirements_tests.txt | 1 +
 2 files changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 03873061..ed8c0499 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,6 +4,7 @@ numpy>=1.16.0
 scipy>=0.19.0
 numba==0.48
 librosa==0.7.2
+phonemizer>=2.2.0
 unidecode==0.4.20
 attrdict
 tensorboardX
diff --git a/requirements_tests.txt b/requirements_tests.txt
index cd5df3fa..963c1c8f 100644
--- a/requirements_tests.txt
+++ b/requirements_tests.txt
@@ -4,6 +4,7 @@ numpy>=1.16.0
 scipy>=0.19.0
 numba==0.48
 librosa==0.7.2
+phonemizer>=2.2.0
 unidecode==0.4.20
 attrdict
 tensorboardX