From 6a8052fd722e9fc604ebe005c5448a503a5dafa2 Mon Sep 17 00:00:00 2001 From: erogol Date: Sat, 11 Jul 2020 03:57:37 +0200 Subject: [PATCH 01/12] new scale stats --- tests/inputs/scale_stats.npy | Bin 10479 -> 10479 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/inputs/scale_stats.npy b/tests/inputs/scale_stats.npy index 5368ecb25445401efae40d8dd8c640aec15fd9c4..10015de5a9075c34875dcbab6e3e893e30b6d583 100644 GIT binary patch delta 317 zcmX}oy-EW?5C`xT6vcoxLJAwvB8NpljqxkdQ;QVm1=GlO%f{^G7H;=;cJ~O_*rvZ1 zk-UIU;xptGoU9+^{P^={W|pJn=;NvNzD)?3NzDqOr#^?1l&sU0<)!(v)0C{yU9`+{ zC4QAD2WLaN^Up4qnO|vVNE@h*D^mz&xfgJr9=-%LRtRNs?xch608{l>tJ%Fs_Co)p zWZp`}oWyNj1zgVQCT4<7#&DJN=myTFoCyop^>K7?bc42qrYpsiC>->X=KwgcOi2y> zT1CzF?I8Y)F-OSHa&oG9!E|XA+|9$m&U5dW;rN8OUPPV!`_Pl&QBJ-Ic)-t875DHs GZhr#@W?x$X delta 335 zcmYMwy-ve05C?FBKzs^>#LyA3v=b6g%0~+X7&?ShbV1%Mhd7O;*zws8Wn=B07vK?i z10Jcbz~QRGi{+pGy7S$9JRg5Qw|kp}5M3*lTLEyItgO;)l&RFL6nf$dI7`S{u%a?c zJ8w+8_1PWKJKqjVwB=Ar zln!p=mjL)+xsn?0{yYB#7tTN6ei;2k2!t#qkQ2>IrYox;nuQfO&%I-Y<60h?uTFRC Lv97s?r}X3(0N!9? From 6448c87a553bb7cec50e471c5fd68c059df4f105 Mon Sep 17 00:00:00 2001 From: erogol Date: Sat, 11 Jul 2020 22:49:48 +0200 Subject: [PATCH 02/12] separate tf tests --- tf/tests/__init__.py | 1 + tf/tests/test_layers_tf.py | 0 tf/tests/test_tacotron2_tf_model.py | 135 ++++++++++++++++++++++++++++ 3 files changed, 136 insertions(+) create mode 100644 tf/tests/__init__.py create mode 100644 tf/tests/test_layers_tf.py create mode 100644 tf/tests/test_tacotron2_tf_model.py diff --git a/tf/tests/__init__.py b/tf/tests/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/tf/tests/__init__.py @@ -0,0 +1 @@ + diff --git a/tf/tests/test_layers_tf.py b/tf/tests/test_layers_tf.py new file mode 100644 index 00000000..e69de29b diff --git a/tf/tests/test_tacotron2_tf_model.py b/tf/tests/test_tacotron2_tf_model.py new file mode 100644 index 00000000..dc904ea2 --- /dev/null +++ b/tf/tests/test_tacotron2_tf_model.py @@ -0,0 +1,135 @@ +import os +import torch +import unittest +import numpy as np +import tensorflow as tf +tf.get_logger().setLevel('INFO') + +from TTS.utils.io import load_config +from TTS.tf.models.tacotron2 import Tacotron2 +from TTS.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model +from TTS.utils.synthesis import run_model_tflite, text_to_seqvec + +#pylint: disable=unused-variable + +torch.manual_seed(1) +use_cuda = torch.cuda.is_available() +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + +file_path = os.path.dirname(os.path.realpath(__file__)).replace('/tf/','/') +c = load_config(os.path.join(file_path, 'test_config.json')) + + +class TacotronTFTrainTest(unittest.TestCase): + + @staticmethod + def generate_dummy_inputs(): + chars_seq = torch.randint(0, 24, (8, 128)).long().to(device) + chars_seq_lengths = torch.randint(100, 128, (8, )).long().to(device) + chars_seq_lengths = torch.sort(chars_seq_lengths, descending=True)[0] + mel_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + mel_postnet_spec = torch.rand(8, 30, c.audio['num_mels']).to(device) + mel_lengths = torch.randint(20, 30, (8, )).long().to(device) + stop_targets = torch.zeros(8, 30, 1).float().to(device) + speaker_ids = torch.randint(0, 5, (8, )).long().to(device) + + chars_seq = tf.convert_to_tensor(chars_seq.cpu().numpy()) + chars_seq_lengths = tf.convert_to_tensor(chars_seq_lengths.cpu().numpy()) + mel_spec = tf.convert_to_tensor(mel_spec.cpu().numpy()) + return chars_seq, chars_seq_lengths, mel_spec, mel_postnet_spec, mel_lengths,\ + stop_targets, speaker_ids + + def test_train_step(self): + ''' test forward pass ''' + chars_seq, chars_seq_lengths, mel_spec, mel_postnet_spec, mel_lengths,\ + stop_targets, speaker_ids = self.generate_dummy_inputs() + + for idx in mel_lengths: + stop_targets[:, int(idx.item()):, 0] = 1.0 + + stop_targets = stop_targets.view(chars_seq.shape[0], + stop_targets.size(1) // c.r, -1) + stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze() + + model = Tacotron2(num_chars=24, r=c.r, num_speakers=5) + # training pass + output = model(chars_seq, chars_seq_lengths, mel_spec, training=True) + + # check model output shapes + assert np.all(output[0].shape == mel_spec.shape) + assert np.all(output[1].shape == mel_spec.shape) + assert output[2].shape[2] == chars_seq.shape[1] + assert output[2].shape[1] == (mel_spec.shape[1] // model.decoder.r) + assert output[3].shape[1] == (mel_spec.shape[1] // model.decoder.r) + + # inference pass + output = model(chars_seq, training=False) + + def test_forward_attention(self,): + chars_seq, chars_seq_lengths, mel_spec, mel_postnet_spec, mel_lengths,\ + stop_targets, speaker_ids = self.generate_dummy_inputs() + + for idx in mel_lengths: + stop_targets[:, int(idx.item()):, 0] = 1.0 + + stop_targets = stop_targets.view(chars_seq.shape[0], + stop_targets.size(1) // c.r, -1) + stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze() + + model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, forward_attn=True) + # training pass + output = model(chars_seq, chars_seq_lengths, mel_spec, training=True) + + # check model output shapes + assert np.all(output[0].shape == mel_spec.shape) + assert np.all(output[1].shape == mel_spec.shape) + assert output[2].shape[2] == chars_seq.shape[1] + assert output[2].shape[1] == (mel_spec.shape[1] // model.decoder.r) + assert output[3].shape[1] == (mel_spec.shape[1] // model.decoder.r) + + # inference pass + output = model(chars_seq, training=False) + + def test_tflite_conversion(self, ): + model = Tacotron2(num_chars=24, + num_speakers=0, + r=3, + postnet_output_dim=80, + decoder_output_dim=80, + attn_type='original', + attn_win=False, + attn_norm='sigmoid', + prenet_type='original', + prenet_dropout=True, + forward_attn=False, + trans_agent=False, + forward_attn_mask=False, + location_attn=True, + attn_K=0, + separate_stopnet=True, + bidirectional_decoder=False, + enable_tflite=True) + model.build_inference() + convert_tacotron2_to_tflite(model, output_path='test_tacotron2.tflite', experimental_converter=True) + # init tflite model + tflite_model = load_tflite_model('test_tacotron2.tflite') + # fake input + inputs = tf.random.uniform([1, 4], maxval=10, dtype=tf.int32) + # run inference + # get input and output details + input_details = tflite_model.get_input_details() + output_details = tflite_model.get_output_details() + # reshape input tensor for the new input shape + tflite_model.resize_tensor_input(input_details[0]['index'], inputs.shape) + tflite_model.allocate_tensors() + detail = input_details[0] + input_shape = detail['shape'] + tflite_model.set_tensor(detail['index'], inputs) + # run the tflite_model + tflite_model.invoke() + # collect outputs + decoder_output = tflite_model.get_tensor(output_details[0]['index']) + postnet_output = tflite_model.get_tensor(output_details[1]['index']) + # remove tflite binary + os.remove('test_tacotron2.tflite') + From da3478bef1bcd213032f298b8a24e307e1439299 Mon Sep 17 00:00:00 2001 From: erogol Date: Sat, 11 Jul 2020 23:32:43 +0200 Subject: [PATCH 03/12] linter fix --- tf/tests/test_tacotron2_tf_model.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tf/tests/test_tacotron2_tf_model.py b/tf/tests/test_tacotron2_tf_model.py index dc904ea2..03db194a 100644 --- a/tf/tests/test_tacotron2_tf_model.py +++ b/tf/tests/test_tacotron2_tf_model.py @@ -8,7 +8,6 @@ tf.get_logger().setLevel('INFO') from TTS.utils.io import load_config from TTS.tf.models.tacotron2 import Tacotron2 from TTS.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model -from TTS.utils.synthesis import run_model_tflite, text_to_seqvec #pylint: disable=unused-variable @@ -16,7 +15,7 @@ torch.manual_seed(1) use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -file_path = os.path.dirname(os.path.realpath(__file__)).replace('/tf/','/') +file_path = os.path.dirname(os.path.realpath(__file__)).replace('/tf/', '/') c = load_config(os.path.join(file_path, 'test_config.json')) @@ -90,7 +89,7 @@ class TacotronTFTrainTest(unittest.TestCase): # inference pass output = model(chars_seq, training=False) - def test_tflite_conversion(self, ): + def test_tflite_conversion(self, ): #pylint:disable=no-self-use model = Tacotron2(num_chars=24, num_speakers=0, r=3, @@ -114,13 +113,13 @@ class TacotronTFTrainTest(unittest.TestCase): # init tflite model tflite_model = load_tflite_model('test_tacotron2.tflite') # fake input - inputs = tf.random.uniform([1, 4], maxval=10, dtype=tf.int32) + inputs = tf.random.uniform([1, 4], maxval=10, dtype=tf.int32) #pylint:disable=unexpected-keyword-arg # run inference # get input and output details input_details = tflite_model.get_input_details() output_details = tflite_model.get_output_details() # reshape input tensor for the new input shape - tflite_model.resize_tensor_input(input_details[0]['index'], inputs.shape) + tflite_model.resize_tensor_input(input_details[0]['index'], inputs.shape) #pylint:disable=unexpected-keyword-arg tflite_model.allocate_tensors() detail = input_details[0] input_shape = detail['shape'] From 3c626a24c2bc03a7a0daf26691c789c143ed0903 Mon Sep 17 00:00:00 2001 From: erogol Date: Sat, 11 Jul 2020 23:33:15 +0200 Subject: [PATCH 04/12] tf init --- tf/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tf/__init__.py diff --git a/tf/__init__.py b/tf/__init__.py new file mode 100644 index 00000000..e69de29b From 717484f0158bbb57e1b6fb7a5f5db69295f65870 Mon Sep 17 00:00:00 2001 From: erogol Date: Sun, 12 Jul 2020 02:01:24 +0200 Subject: [PATCH 05/12] use tf-nightly --- requirements_tests.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_tests.txt b/requirements_tests.txt index c3c32fd2..92ddadb2 100644 --- a/requirements_tests.txt +++ b/requirements_tests.txt @@ -1,7 +1,7 @@ numpy>=1.16.0 numba==0.48 torch>=0.4.1 -tensorflow>=2.2 +tf-nightly librosa>=0.5.1 Unidecode>=0.4.20 tensorboard From a03ae8e99adadd8887ede9562a087a2630a56a8a Mon Sep 17 00:00:00 2001 From: erogol Date: Sun, 12 Jul 2020 10:57:45 +0200 Subject: [PATCH 06/12] server fix and remove pwgan use --- server/server.py | 26 ++++++++------------------ server/synthesizer.py | 5 +++-- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/server/server.py b/server/server.py index 43f1b3c4..fe079e7e 100644 --- a/server/server.py +++ b/server/server.py @@ -15,12 +15,9 @@ def create_argparser(): parser.add_argument('--tts_config', type=str, help='path to TTS config.json file') parser.add_argument('--tts_speakers', type=str, help='path to JSON file containing speaker ids, if speaker ids are used in the model') parser.add_argument('--wavernn_lib_path', type=str, default=None, help='path to WaveRNN project folder to be imported. If this is not passed, model uses Griffin-Lim for synthesis.') - parser.add_argument('--wavernn_file', type=str, default=None, help='path to WaveRNN checkpoint file.') + parser.add_argument('--wavernn_checkpoint', type=str, default=None, help='path to WaveRNN checkpoint file.') parser.add_argument('--wavernn_config', type=str, default=None, help='path to WaveRNN config file.') parser.add_argument('--is_wavernn_batched', type=convert_boolean, default=False, help='true to use batched WaveRNN.') - parser.add_argument('--pwgan_lib_path', type=str, default=None, help='path to ParallelWaveGAN project folder to be imported. If this is not passed, model uses Griffin-Lim for synthesis.') - parser.add_argument('--pwgan_file', type=str, default=None, help='path to ParallelWaveGAN checkpoint file.') - parser.add_argument('--pwgan_config', type=str, default=None, help='path to ParallelWaveGAN config file.') parser.add_argument('--vocoder_config', type=str, default=None, help='path to TTS.vocoder config file.') parser.add_argument('--vocoder_checkpoint', type=str, default=None, help='path to TTS.vocoder checkpoint file.') parser.add_argument('--port', type=int, default=5002, help='port to listen on.') @@ -46,10 +43,6 @@ embedded_wavernn_folder = os.path.join(embedded_models_folder, 'wavernn') wavernn_checkpoint_file = os.path.join(embedded_wavernn_folder, 'checkpoint.pth.tar') wavernn_config_file = os.path.join(embedded_wavernn_folder, 'config.json') -embedded_pwgan_folder = os.path.join(embedded_models_folder, 'pwgan') -pwgan_checkpoint_file = os.path.join(embedded_pwgan_folder, 'checkpoint.pkl') -pwgan_config_file = os.path.join(embedded_pwgan_folder, 'config.yml') - args = create_argparser().parse_args() # If these were not specified in the CLI args, use default values with embedded model files @@ -57,19 +50,16 @@ if not args.tts_checkpoint and os.path.isfile(tts_checkpoint_file): args.tts_checkpoint = tts_checkpoint_file if not args.tts_config and os.path.isfile(tts_config_file): args.tts_config = tts_config_file -if not args.vocoder_checkpoint and os.path.isfile(tts_checkpoint_file): - args.tts_checkpoint = tts_checkpoint_file -if not args.vocoder_config and os.path.isfile(tts_config_file): - args.tts_config = tts_config_file -if not args.wavernn_file and os.path.isfile(wavernn_checkpoint_file): - args.wavernn_file = wavernn_checkpoint_file +if not args.vocoder_checkpoint and os.path.isfile(vocoder_checkpoint_file): + args.vocoder_file = vocoder_checkpoint_file +if not args.vocoder_config and os.path.isfile(vocoder_config_file): + args.vocoder_config = vocoder_config_file + +if not args.wavernn_checkpoint and os.path.isfile(wavernn_checkpoint_file): + args.wavernn_checkpoint = wavernn_checkpoint_file if not args.wavernn_config and os.path.isfile(wavernn_config_file): args.wavernn_config = wavernn_config_file -if not args.pwgan_file and os.path.isfile(pwgan_checkpoint_file): - args.pwgan_file = pwgan_checkpoint_file -if not args.pwgan_config and os.path.isfile(pwgan_config_file): - args.pwgan_config = pwgan_config_file synthesizer = Synthesizer(args) diff --git a/server/synthesizer.py b/server/synthesizer.py index b18d73ac..99819ccc 100644 --- a/server/synthesizer.py +++ b/server/synthesizer.py @@ -31,15 +31,16 @@ class Synthesizer(object): self.wavernn = None self.vocoder_model = None self.config = config + print(config) self.use_cuda = self.config.use_cuda if self.use_cuda: assert torch.cuda.is_available(), "CUDA is not availabe on this machine." self.load_tts(self.config.tts_checkpoint, self.config.tts_config, self.config.use_cuda) - if self.config.vocoder_file: + if self.config.vocoder_checkpoint: self.load_vocoder(self.config.vocoder_checkpoint, self.config.vocoder_config, self.config.use_cuda) if self.config.wavernn_lib_path: - self.load_wavernn(self.config.wavernn_lib_path, self.config.wavernn_file, + self.load_wavernn(self.config.wavernn_lib_path, self.config.wavernn_checkpoint, self.config.wavernn_config, self.config.use_cuda) def load_tts(self, tts_checkpoint, tts_config, use_cuda): From 8ca7c53158436ca3ef6e93ed26165d868bc8d2fd Mon Sep 17 00:00:00 2001 From: erogol Date: Sun, 12 Jul 2020 15:35:28 +0200 Subject: [PATCH 07/12] update requirements and setup.py --- requirements.txt | 20 ++++++++++++-------- requirements_tests.txt | 18 +++++++++--------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/requirements.txt b/requirements.txt index 1d505f10..fb3e2281 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,19 @@ -numpy>=1.16.0 torch>=1.5 -librosa>=0.5.1 -Unidecode>=0.4.20 -tensorboard +tensorflow>=2.2 +numpy>=1.16.0 +scipy>=0.19.0 +numba==0.48 +librosa==0.6.2 +unidecode==0.4.20 +attrdict tensorboardX matplotlib Pillow flask -scipy tqdm -soundfile -phonemizer -bokeh==1.4.0 inflect +bokeh==1.4.0 +soundfile +nose==1.3.7 +cardboardlint==1.3.0 +pylint==2.5.3 \ No newline at end of file diff --git a/requirements_tests.txt b/requirements_tests.txt index 92ddadb2..6823b172 100644 --- a/requirements_tests.txt +++ b/requirements_tests.txt @@ -1,18 +1,18 @@ +torch>=1.5 +tensorflow==2.3rc numpy>=1.16.0 +scipy>=0.19.0 numba==0.48 -torch>=0.4.1 -tf-nightly -librosa>=0.5.1 -Unidecode>=0.4.20 -tensorboard +librosa==0.6.2 +unidecode==0.4.20 +attrdict tensorboardX matplotlib Pillow flask -scipy tqdm -soundfile inflect -phonemizer bokeh==1.4.0 -nose +soundfile +nose==1.3.7 +cardboardlint==1.3.0 \ No newline at end of file From 238cc424f4c856eb2db09f7463a687a00c5e3c4b Mon Sep 17 00:00:00 2001 From: erogol Date: Sun, 12 Jul 2020 15:35:51 +0200 Subject: [PATCH 08/12] fix server.py vocoder call --- server/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/server.py b/server/server.py index fe079e7e..bd23ea9c 100644 --- a/server/server.py +++ b/server/server.py @@ -52,7 +52,7 @@ if not args.tts_config and os.path.isfile(tts_config_file): args.tts_config = tts_config_file if not args.vocoder_checkpoint and os.path.isfile(vocoder_checkpoint_file): - args.vocoder_file = vocoder_checkpoint_file + args.vocoder_checkpoint = vocoder_checkpoint_file if not args.vocoder_config and os.path.isfile(vocoder_config_file): args.vocoder_config = vocoder_config_file From 0ab73061a12cbd48b724ed1ca7b40bade6e4bead Mon Sep 17 00:00:00 2001 From: erogol Date: Sun, 12 Jul 2020 15:36:36 +0200 Subject: [PATCH 09/12] update setup.py install tf with external pip call --- setup.py | 72 +++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/setup.py b/setup.py index 462f0056..7e40f234 100644 --- a/setup.py +++ b/setup.py @@ -69,6 +69,41 @@ if 'bdist_wheel' in unknown_args and args.checkpoint and args.model_config: shutil.copy(args.model_config, embedded_config_path) package_data.extend([embedded_checkpoint_path, embedded_config_path]) + +def pip_install(package_name): + subprocess.call( + [sys.executable, '-m', 'pip', 'install', package_name] + ) + + +requirements = { + 'install_requires':[ + "torch>=1.5", + "numpy>=1.16.0", + "numba==0.48", + "scipy>=0.19.0", + "librosa==0.6.2", + "unidecode==0.4.20", + "attrdict", + "tensorboardX", + "matplotlib", + "Pillow", + "flask", + "tqdm", + "inflect", + "bokeh==1.4.0", + "soundfile", + "phonemizer>=2.2.0", + "nose==1.3.7", + "cardboardlint==1.3.0", + "pylint==2.5.3", + ], + 'pip_install':[ + 'tensorflow>=2.2.0', + ] +} + + setup( name='TTS', version=version, @@ -95,24 +130,23 @@ setup( 'build_py': build_py, 'develop': develop, }, - install_requires=[ - "scipy>=0.19.0", - "torch>=1.5", - "numpy>=1.16.0", - "librosa==0.6.2", - "unidecode==0.4.20", - "attrdict", - "tensorboardX", - "matplotlib", - "Pillow", - "flask", - "tqdm", - "inflect", - "bokeh==1.4.0", - "soundfile", - "phonemizer @ https://github.com/bootphon/phonemizer/tarball/master", - ], - dependency_links=[ - "http://github.com/bootphon/phonemizer/tarball/master#egg=phonemizer-1.0.1" + install_requires=requirements['install_requires'], + python_requires='>=3.6.0', + classifiers=[ + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + 'Development Status :: 3 - Alpha', + "Intended Audience :: Science/Research :: Developers", + "Operating System :: POSIX :: Linux", + 'License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)', + "Topic :: Software Development :: Libraries :: Python Modules :: Speech :: Sound/Audio :: Multimedia :: Artificial Intelligence", ] ) + +# for some reason having tensorflow in 'install_requires' +# breaks some of the dependencies. +for module in requirements['pip_install']: + pip_install(module) \ No newline at end of file From ad235f0481fba280807f777d1262267518bfd02a Mon Sep 17 00:00:00 2001 From: erogol Date: Sun, 12 Jul 2020 15:37:10 +0200 Subject: [PATCH 10/12] update test server_config and mitigate https://github.com/librosa/librosa/issues/1160 in server package test --- tests/inputs/server_config.json | 2 +- tests/test_server_package.sh | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/inputs/server_config.json b/tests/inputs/server_config.json index 9eb7f09f..0cb9b948 100644 --- a/tests/inputs/server_config.json +++ b/tests/inputs/server_config.json @@ -6,7 +6,7 @@ "wavernn_file": null, // wavernn checkpoint file name "wavernn_config": null, // wavernn config file "vocoder_config":null, - "vocoder_file": null, + "vocoder_checkpoint": null, "is_wavernn_batched":true, "port": 5002, "use_cuda": false, diff --git a/tests/test_server_package.sh b/tests/test_server_package.sh index 9fe5e8b1..83ffc6f0 100755 --- a/tests/test_server_package.sh +++ b/tests/test_server_package.sh @@ -14,6 +14,9 @@ rm -f dist/*.whl python setup.py --quiet bdist_wheel --checkpoint tests/outputs/checkpoint_10.pth.tar --model_config tests/outputs/dummy_model_config.json pip install --quiet dist/TTS*.whl +# this is related to https://github.com/librosa/librosa/issues/1160 +pip install numba==0.48 + python -m TTS.server.server & SERVER_PID=$! From c33068ad4035ea0b1599c5210970334c5c588f94 Mon Sep 17 00:00:00 2001 From: erogol Date: Sun, 12 Jul 2020 16:09:03 +0200 Subject: [PATCH 11/12] use librosa 0.7.2 and fix vocoder datatset assert --- requirements.txt | 2 +- requirements_tests.txt | 2 +- setup.py | 2 +- vocoder/tests/test_datasets.py | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index fb3e2281..03873061 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ tensorflow>=2.2 numpy>=1.16.0 scipy>=0.19.0 numba==0.48 -librosa==0.6.2 +librosa==0.7.2 unidecode==0.4.20 attrdict tensorboardX diff --git a/requirements_tests.txt b/requirements_tests.txt index 6823b172..cd5df3fa 100644 --- a/requirements_tests.txt +++ b/requirements_tests.txt @@ -3,7 +3,7 @@ tensorflow==2.3rc numpy>=1.16.0 scipy>=0.19.0 numba==0.48 -librosa==0.6.2 +librosa==0.7.2 unidecode==0.4.20 attrdict tensorboardX diff --git a/setup.py b/setup.py index 7e40f234..bfc59516 100644 --- a/setup.py +++ b/setup.py @@ -82,7 +82,7 @@ requirements = { "numpy>=1.16.0", "numba==0.48", "scipy>=0.19.0", - "librosa==0.6.2", + "librosa==0.7.2", "unidecode==0.4.20", "attrdict", "tensorboardX", diff --git a/vocoder/tests/test_datasets.py b/vocoder/tests/test_datasets.py index 5d409b3f..43d0d3de 100644 --- a/vocoder/tests/test_datasets.py +++ b/vocoder/tests/test_datasets.py @@ -59,9 +59,9 @@ def gan_dataset_case(batch_size, seq_len, hop_len, conv_pad, return_segments, us audio = wav1[idx].squeeze() feat = feat1[idx] mel = ap.melspectrogram(audio) - # the first 2 and the last frame is skipped due to the padding - # applied in spec. computation. - assert (feat - mel[:, :feat1.shape[-1]])[:, 2:-1].sum() == 0, f' [!] {(feat - mel[:, :feat1.shape[-1]])[:, 2:-1].sum()}' + # the first 2 and the last 2 frames are skipped due to the padding + # differences in stft + assert (feat - mel[:, :feat1.shape[-1]])[:, 2:-2].sum() <= 0, f' [!] {(feat - mel[:, :feat1.shape[-1]])[:, 2:-2].sum()}' count_iter += 1 # if count_iter == max_iter: From 98becfe973eab8aa665d47398ba21ea2ba7e04c4 Mon Sep 17 00:00:00 2001 From: erogol Date: Sun, 12 Jul 2020 16:19:05 +0200 Subject: [PATCH 12/12] phonemizer in requirements --- requirements.txt | 1 + requirements_tests.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 03873061..ed8c0499 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ numpy>=1.16.0 scipy>=0.19.0 numba==0.48 librosa==0.7.2 +phonemizer>=2.2.0 unidecode==0.4.20 attrdict tensorboardX diff --git a/requirements_tests.txt b/requirements_tests.txt index cd5df3fa..963c1c8f 100644 --- a/requirements_tests.txt +++ b/requirements_tests.txt @@ -4,6 +4,7 @@ numpy>=1.16.0 scipy>=0.19.0 numba==0.48 librosa==0.7.2 +phonemizer>=2.2.0 unidecode==0.4.20 attrdict tensorboardX