From f377cd3cb8652adc061ac2c2933e603410733895 Mon Sep 17 00:00:00 2001 From: Eren Date: Thu, 6 Sep 2018 15:27:15 +0200 Subject: [PATCH 1/2] larger attention filter size and mode filters --- layers/attention.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/layers/attention.py b/layers/attention.py index 26e6c5d2..f8fecb83 100644 --- a/layers/attention.py +++ b/layers/attention.py @@ -37,8 +37,8 @@ class LocationSensitiveAttention(nn.Module): annot_dim, query_dim, attn_dim, - kernel_size=7, - filters=20): + kernel_size=31, + filters=32): super(LocationSensitiveAttention, self).__init__() self.kernel_size = kernel_size self.filters = filters From 4b08c3a13f7bd265908be338ece944593b098eb6 Mon Sep 17 00:00:00 2001 From: Eren Date: Sun, 16 Sep 2018 13:32:59 +0200 Subject: [PATCH 2/2] Save audio samples per validation --- train.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/train.py b/train.py index f23f3d01..e1df221d 100644 --- a/train.py +++ b/train.py @@ -315,10 +315,15 @@ def evaluate(model, criterion, criterion_st, data_loader, ap, current_step): try: wav, linear_spec, alignments = synthesis(model, ap, test_sentence, use_cuda, c.text_cleaner) + + file_path = os.path.join(AUDIO_PATH, str(current_step)) + os.makedirs(file_path, exist_ok=True) + file_path = os.path.join(file_path, "TestSentence_{}.wav".format(idx)) + ap.save_wav(wav, file_path) + wav_name = 'TestSentences/{}'.format(idx) tb.add_audio( wav_name, wav, current_step, sample_rate=c.sample_rate) - align_img = alignments[0].data.cpu().numpy() linear_spec = plot_spectrogram(linear_spec, ap) align_img = plot_alignment(align_img) @@ -466,6 +471,8 @@ if __name__ == '__main__': OUT_PATH = os.path.join(_, c.output_path) OUT_PATH = create_experiment_folder(OUT_PATH, c.model_name, args.debug) CHECKPOINT_PATH = os.path.join(OUT_PATH, 'checkpoints') + AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios') + os.mkdir(AUDIO_PATH) shutil.copyfile(args.config_path, os.path.join(OUT_PATH, 'config.json')) # setup tensorboard