Plot specs and alignments for test sentences

2018-07-23 16:14:46 +02:00 · 2018-07-23 16:14:46 +02:00 · c72f309068
parent 766e88700d
commit c72f309068
5 changed files with 19 additions and 10 deletions
--- a/.compute
+++ b/.compute
@ -1,3 +1,6 @@
 #!/bin/bash
-source ../tmp/venv/bin/activate
+# source ../tmp/venv/bin/activate
 # ls /snakepit/jobs/650/keep/
 source /snakepit/jobs/650/keep/venv/bin/activate
 # source  /snakepit/jobs/560/tmp/venv/bin/activate
 python train.py --config_path config.json --debug true
--- a/.install
+++ b/.install
@ -1,4 +1,4 @@
 #!/bin/bash
-virtualenv -p python3 ../tmp/venv
+# virtualenv -p python3 ../tmp/venv
-source ../tmp/venv/bin/activate
+# source ../tmp/venv/bin/activate
-python setup.py develop
+# python setup.py develop
--- a/config.json
+++ b/config.json
@ -23,10 +23,10 @@
    "griffin_lim_iters": 60,
    "power": 1.5,
-    "num_loader_workers": 4,
+    "num_loader_workers": 14,
    "checkpoint": true,
-    "save_step": 376,
+    "save_step": 750,
    "print_step": 10,
    "run_eval": false,
    "data_path": "/snakepit/shared/data/keithito/LJSpeech-1.1/",
--- a/train.py
+++ b/train.py
@ -302,14 +302,20 @@ def evaluate(model, criterion, criterion_st, data_loader, ap, current_step):
    # test sentences
    ap.griffin_lim_iters = 60
    for idx, test_sentence in enumerate(test_sentences):
-        wav = synthesis(model, ap, test_sentence, use_cuda,
+        wav, linear_out, alignments = synthesis(model, ap, test_sentence, use_cuda,
-                        c.text_cleaner)
+                                                c.text_cleaner)
        try:
            wav_name = 'TestSentences/{}'.format(idx)
            tb.add_audio(wav_name, wav, current_step,
                         sample_rate=c.sample_rate)
        except:
            pass
        align_img = alignments[0].data.cpu().numpy()
        linear_spec = linear_output[0].data.cpu().numpy()
        linear_spec = plot_spectrogram(linear_spec, ap)
        align_img = plot_alignment(align_img)
        tb.add_image('TestSentences/{}_GroundTruth'.format(idx), gt_spec, current_step)
        tb.add_image('TestSentences/{}_Alignment'.format(idx), align_img, current_step)
    return avg_linear_loss
--- a/utils/generic_utils.py
+++ b/utils/generic_utils.py
@ -166,7 +166,7 @@ def synthesis(model, ap, text, use_cuda, text_cleaner):
        chars_var = torch.from_numpy(seq).unsqueeze(0)
        if use_cuda:
            chars_var = chars_var.cuda().long()
-        _, linear_out, _, _ = model.forward(chars_var)
+        _, linear_out, alignments, _ = model.forward(chars_var)
        linear_out = linear_out[0].data.cpu().numpy()
        wav = ap.inv_spectrogram(linear_out.T)
-        return wav
+        return wav, linear_out, alignments