From 74f83b2d135357eacc7fe58d3720b7c404ca6c88 Mon Sep 17 00:00:00 2001 From: nmstoker Date: Sat, 11 Jul 2020 17:48:05 +0100 Subject: [PATCH 1/2] Fixes #450 --- vocoder/datasets/gan_dataset.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vocoder/datasets/gan_dataset.py b/vocoder/datasets/gan_dataset.py index 55513e7d..af23fbf2 100644 --- a/vocoder/datasets/gan_dataset.py +++ b/vocoder/datasets/gan_dataset.py @@ -87,6 +87,11 @@ class GANDataset(Dataset): audio, mel = self.cache[idx] else: audio = self.ap.load_wav(wavpath) + + if len(audio) < self.seq_len + self.pad_short: + audio = np.pad(audio, (0, self.seq_len + self.pad_short - len(audio)), \ + mode='constant', constant_values=0.0) + mel = self.ap.melspectrogram(audio) else: @@ -99,10 +104,6 @@ class GANDataset(Dataset): audio = self.ap.load_wav(wavpath) mel = np.load(feat_path) - if len(audio) < self.seq_len + self.pad_short: - audio = np.pad(audio, (0, self.seq_len + self.pad_short - len(audio)), \ - mode='constant', constant_values=0.0) - # correct the audio length wrt padding applied in stft audio = np.pad(audio, (0, self.hop_len), mode="edge") audio = audio[:mel.shape[-1] * self.hop_len] From 3d9e2faba857d945f15d2c78c0bacc9709544d8b Mon Sep 17 00:00:00 2001 From: nmstoker Date: Sat, 11 Jul 2020 17:56:49 +0100 Subject: [PATCH 2/2] Clarify GPU Id use with vocoder training --- vocoder/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vocoder/README.md b/vocoder/README.md index 1b65f929..e3baf1f9 100644 --- a/vocoder/README.md +++ b/vocoder/README.md @@ -16,23 +16,23 @@ You can see here an example (Soon)[Colab Notebook]() training MelGAN with LJSpee In order to train a new model, you need to collecto all your wav files under a common parent folder and give this path to `data_path` field in '''config.json''' -You need to define other relevant parameters in your ```config.json``` and then start traning with the following command from Mozilla TTS root path. +You need to define other relevant parameters in your ```config.json``` and then start traning with the following command from Mozilla TTS root path, where '0' is the Id of the GPU you wish to use. -```CUDA_VISIBLE_DEVICES='1' python vocoder/train.py --config_path path/to/config.json``` +```CUDA_VISIBLE_DEVICES='0' python vocoder/train.py --config_path path/to/config.json``` Exampled config files can be found under `vocoder/configs/` folder. You can continue a previous training by the following command. -```CUDA_VISIBLE_DEVICES='1' python vocoder/train.py --continue_path path/to/your/model/folder``` +```CUDA_VISIBLE_DEVICES='0' python vocoder/train.py --continue_path path/to/your/model/folder``` You can fine-tune a pre-trained model by the following command. -```CUDA_VISIBLE_DEVICES='1' python vocoder/train.py --restore_path path/to/your/model.pth.tar``` +```CUDA_VISIBLE_DEVICES='0' python vocoder/train.py --restore_path path/to/your/model.pth.tar``` Restoring a model starts a new training in a different output folder. It only restores model weights with the given checkpoint file. However, continuing a training starts from the same conditions the previous training run left off. You can also follow your training runs on Tensorboard as you do with our TTS models. ## Acknowledgement -Thanks to @kan-bayashi for his [repository](https://github.com/kan-bayashi/ParallelWaveGAN) being the start point of our work. \ No newline at end of file +Thanks to @kan-bayashi for his [repository](https://github.com/kan-bayashi/ParallelWaveGAN) being the start point of our work.