From 9b29b4e281ce697bfbe3e69e64bf1aa14b1ccc99 Mon Sep 17 00:00:00 2001 From: Eren Date: Tue, 18 Sep 2018 12:56:07 +0200 Subject: [PATCH 1/5] Weight decay --- config.json | 1 + train.py | 17 +++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/config.json b/config.json index 23320fe1..aa429805 100644 --- a/config.json +++ b/config.json @@ -23,6 +23,7 @@ "batch_size": 32, "eval_batch_size":-1, "r": 5, + "wd": 0.0001, "griffin_lim_iters": 60, "power": 1.5, diff --git a/train.py b/train.py index e1df221d..14def5e7 100644 --- a/train.py +++ b/train.py @@ -22,7 +22,6 @@ from models.tacotron import Tacotron from layers.losses import L1LossMasked from utils.audio import AudioProcessor - torch.manual_seed(1) # torch.set_num_threads(4) use_cuda = torch.cuda.is_available() @@ -278,7 +277,7 @@ def evaluate(model, criterion, criterion_st, data_loader, ap, current_step): tb.add_figure('ValVisual/Reconstruction', const_spec, current_step) tb.add_figure('ValVisual/GroundTruth', gt_spec, current_step) tb.add_figure('ValVisual/ValidationAlignment', align_img, - current_step) + current_step) # Sample audio audio_signal = linear_output[idx].data.cpu().numpy() @@ -318,7 +317,8 @@ def evaluate(model, criterion, criterion_st, data_loader, ap, current_step): file_path = os.path.join(AUDIO_PATH, str(current_step)) os.makedirs(file_path, exist_ok=True) - file_path = os.path.join(file_path, "TestSentence_{}.wav".format(idx)) + file_path = os.path.join(file_path, + "TestSentence_{}.wav".format(idx)) ap.save_wav(wav, file_path) wav_name = 'TestSentences/{}'.format(idx) @@ -327,10 +327,10 @@ def evaluate(model, criterion, criterion_st, data_loader, ap, current_step): align_img = alignments[0].data.cpu().numpy() linear_spec = plot_spectrogram(linear_spec, ap) align_img = plot_alignment(align_img) - tb.add_figure('TestSentences/{}_Spectrogram'.format(idx), linear_spec, - current_step) + tb.add_figure('TestSentences/{}_Spectrogram'.format(idx), + linear_spec, current_step) tb.add_figure('TestSentences/{}_Alignment'.format(idx), align_img, - current_step) + current_step) except: print(" !! Error as creating Test Sentence -", idx) pass @@ -390,8 +390,9 @@ def main(args): model = Tacotron(c.embedding_size, ap.num_freq, c.num_mels, c.r) print(" | > Num output units : {}".format(ap.num_freq), flush=True) - optimizer = optim.Adam(model.parameters(), lr=c.lr) - optimizer_st = optim.Adam(model.decoder.stopnet.parameters(), lr=c.lr) + optimizer = optim.Adam(model.parameters(), lr=c.lr, weight_decay=c.wd) + optimizer_st = optim.Adam( + model.decoder.stopnet.parameters(), lr=c.lr, weight_decay=c.wd) criterion = L1LossMasked() criterion_st = nn.BCELoss() From 16db5159f12568a2f9fc9ab150a7f00f2c08897b Mon Sep 17 00:00:00 2001 From: Eren Date: Wed, 19 Sep 2018 14:25:30 +0200 Subject: [PATCH 2/5] Weight decay described here: http://www.fast.ai/2018/07/02/adam-weight-decay/ --- train.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index 14def5e7..55811097 100644 --- a/train.py +++ b/train.py @@ -89,6 +89,9 @@ def train(model, criterion, criterion_st, data_loader, optimizer, optimizer_st, # backpass and check the grad norm for spec losses loss.backward(retain_graph=True) + for group in optimizer.param_groups: + for param in group['params']: + param.data = param.data.add(-c.wd * group['lr'], param.data) grad_norm, skip_flag = check_update(model, 1) if skip_flag: optimizer.zero_grad() @@ -98,6 +101,9 @@ def train(model, criterion, criterion_st, data_loader, optimizer, optimizer_st, # backpass and check the grad norm for stop loss stop_loss.backward() + for group in optimizer_st.param_groups: + for param in group['params']: + param.data = param.data.add(-c.wd * group['lr'], param.data) grad_norm_st, skip_flag = check_update(model.decoder.stopnet, 0.5) if skip_flag: optimizer_st.zero_grad() @@ -390,9 +396,9 @@ def main(args): model = Tacotron(c.embedding_size, ap.num_freq, c.num_mels, c.r) print(" | > Num output units : {}".format(ap.num_freq), flush=True) - optimizer = optim.Adam(model.parameters(), lr=c.lr, weight_decay=c.wd) + optimizer = optim.Adam(model.parameters(), lr=c.lr, weight_decay=0) optimizer_st = optim.Adam( - model.decoder.stopnet.parameters(), lr=c.lr, weight_decay=c.wd) + model.decoder.stopnet.parameters(), lr=c.lr, weight_decay=0) criterion = L1LossMasked() criterion_st = nn.BCELoss() From d9deeacc7ac62a07f5ea6307984385e79ebcbed4 Mon Sep 17 00:00:00 2001 From: Eren Date: Wed, 19 Sep 2018 15:14:38 +0200 Subject: [PATCH 3/5] Config --- config.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.json b/config.json index aa429805..d8af7a93 100644 --- a/config.json +++ b/config.json @@ -1,6 +1,6 @@ { - "model_name": "TTS-master", - "model_description": "'Same' padding as in TF", + "model_name": "TTS-weight-decay", + "model_description": "Weight decay as in FastAI", "audio_processor": "audio", "num_mels": 80, "num_freq": 1025, From c36a1f7deb5b7f12725e86ee4d3065a29a040f13 Mon Sep 17 00:00:00 2001 From: Eren Date: Fri, 21 Sep 2018 17:27:02 +0200 Subject: [PATCH 4/5] Make audio folder and save audio with scipy --- train.py | 2 +- utils/audio.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index 55811097..9d6f1e19 100644 --- a/train.py +++ b/train.py @@ -479,7 +479,7 @@ if __name__ == '__main__': OUT_PATH = create_experiment_folder(OUT_PATH, c.model_name, args.debug) CHECKPOINT_PATH = os.path.join(OUT_PATH, 'checkpoints') AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios') - os.mkdir(AUDIO_PATH) + os.makedirs(AUDIO_PATH, exist_ok=True) shutil.copyfile(args.config_path, os.path.join(OUT_PATH, 'config.json')) # setup tensorboard diff --git a/utils/audio.py b/utils/audio.py index 4ea5bfe0..9849cc93 100644 --- a/utils/audio.py +++ b/utils/audio.py @@ -3,6 +3,7 @@ import librosa import pickle import copy import numpy as np +import scipy from scipy import signal _mel_basis = None @@ -38,7 +39,8 @@ class AudioProcessor(object): def save_wav(self, wav, path): wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav)))) - librosa.output.write_wav(path, wav_norm.astype(np.int16), self.sample_rate) + # librosa.output.write_wav(path, wav_norm.astype(np.int16), self.sample_rate) + scipy.io.wavfile.write(path, self.sample_rate, wav.astype(np.int16)) def _linear_to_mel(self, spectrogram): global _mel_basis From 607ca52019781bf80ffbd1eaa4c591ebaa5738e9 Mon Sep 17 00:00:00 2001 From: Eren Date: Fri, 21 Sep 2018 21:51:38 +0200 Subject: [PATCH 5/5] bug fix --- utils/audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/audio.py b/utils/audio.py index 9849cc93..061fefc0 100644 --- a/utils/audio.py +++ b/utils/audio.py @@ -40,7 +40,7 @@ class AudioProcessor(object): def save_wav(self, wav, path): wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav)))) # librosa.output.write_wav(path, wav_norm.astype(np.int16), self.sample_rate) - scipy.io.wavfile.write(path, self.sample_rate, wav.astype(np.int16)) + scipy.io.wavfile.write(path, self.sample_rate, wav_norm.astype(np.int16)) def _linear_to_mel(self, spectrogram): global _mel_basis