diff --git a/.compute b/.compute index 2fe1e5ea..23b24a62 100644 --- a/.compute +++ b/.compute @@ -1,3 +1,6 @@ #!/bin/bash -source ../tmp/venv/bin/activate +# source ../tmp/venv/bin/activate +# ls /snakepit/jobs/650/keep/ +source /snakepit/jobs/650/keep/venv/bin/activate +# source /snakepit/jobs/560/tmp/venv/bin/activate python train.py --config_path config.json --debug true diff --git a/.install b/.install index acc7b6cc..cfa0a04a 100644 --- a/.install +++ b/.install @@ -1,4 +1,4 @@ #!/bin/bash -virtualenv -p python3 ../tmp/venv -source ../tmp/venv/bin/activate -python setup.py develop +# virtualenv -p python3 ../tmp/venv +# source ../tmp/venv/bin/activate +# python setup.py develop diff --git a/config.json b/config.json index 76409161..3a371e60 100644 --- a/config.json +++ b/config.json @@ -23,10 +23,10 @@ "griffin_lim_iters": 60, "power": 1.5, - "num_loader_workers": 4, + "num_loader_workers": 14, "checkpoint": true, - "save_step": 376, + "save_step": 750, "print_step": 10, "run_eval": false, "data_path": "/snakepit/shared/data/keithito/LJSpeech-1.1/", diff --git a/train.py b/train.py index ddf91dc4..9f62f003 100644 --- a/train.py +++ b/train.py @@ -302,14 +302,20 @@ def evaluate(model, criterion, criterion_st, data_loader, ap, current_step): # test sentences ap.griffin_lim_iters = 60 for idx, test_sentence in enumerate(test_sentences): - wav = synthesis(model, ap, test_sentence, use_cuda, - c.text_cleaner) + wav, linear_out, alignments = synthesis(model, ap, test_sentence, use_cuda, + c.text_cleaner) try: wav_name = 'TestSentences/{}'.format(idx) tb.add_audio(wav_name, wav, current_step, sample_rate=c.sample_rate) except: pass + align_img = alignments[0].data.cpu().numpy() + linear_spec = linear_output[0].data.cpu().numpy() + linear_spec = plot_spectrogram(linear_spec, ap) + align_img = plot_alignment(align_img) + tb.add_image('TestSentences/{}_GroundTruth'.format(idx), gt_spec, current_step) + tb.add_image('TestSentences/{}_Alignment'.format(idx), align_img, current_step) return avg_linear_loss diff --git a/utils/generic_utils.py b/utils/generic_utils.py index effad6be..d62b648a 100644 --- a/utils/generic_utils.py +++ b/utils/generic_utils.py @@ -166,7 +166,7 @@ def synthesis(model, ap, text, use_cuda, text_cleaner): chars_var = torch.from_numpy(seq).unsqueeze(0) if use_cuda: chars_var = chars_var.cuda().long() - _, linear_out, _, _ = model.forward(chars_var) + _, linear_out, alignments, _ = model.forward(chars_var) linear_out = linear_out[0].data.cpu().numpy() wav = ap.inv_spectrogram(linear_out.T) - return wav \ No newline at end of file + return wav, linear_out, alignments \ No newline at end of file