mirror of https://github.com/coqui-ai/TTS.git
Plot specs and alignments for test sentences
This commit is contained in:
parent
766e88700d
commit
c72f309068
5
.compute
5
.compute
|
@ -1,3 +1,6 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
source ../tmp/venv/bin/activate
|
# source ../tmp/venv/bin/activate
|
||||||
|
# ls /snakepit/jobs/650/keep/
|
||||||
|
source /snakepit/jobs/650/keep/venv/bin/activate
|
||||||
|
# source /snakepit/jobs/560/tmp/venv/bin/activate
|
||||||
python train.py --config_path config.json --debug true
|
python train.py --config_path config.json --debug true
|
||||||
|
|
6
.install
6
.install
|
@ -1,4 +1,4 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
virtualenv -p python3 ../tmp/venv
|
# virtualenv -p python3 ../tmp/venv
|
||||||
source ../tmp/venv/bin/activate
|
# source ../tmp/venv/bin/activate
|
||||||
python setup.py develop
|
# python setup.py develop
|
||||||
|
|
|
@ -23,10 +23,10 @@
|
||||||
"griffin_lim_iters": 60,
|
"griffin_lim_iters": 60,
|
||||||
"power": 1.5,
|
"power": 1.5,
|
||||||
|
|
||||||
"num_loader_workers": 4,
|
"num_loader_workers": 14,
|
||||||
|
|
||||||
"checkpoint": true,
|
"checkpoint": true,
|
||||||
"save_step": 376,
|
"save_step": 750,
|
||||||
"print_step": 10,
|
"print_step": 10,
|
||||||
"run_eval": false,
|
"run_eval": false,
|
||||||
"data_path": "/snakepit/shared/data/keithito/LJSpeech-1.1/",
|
"data_path": "/snakepit/shared/data/keithito/LJSpeech-1.1/",
|
||||||
|
|
10
train.py
10
train.py
|
@ -302,14 +302,20 @@ def evaluate(model, criterion, criterion_st, data_loader, ap, current_step):
|
||||||
# test sentences
|
# test sentences
|
||||||
ap.griffin_lim_iters = 60
|
ap.griffin_lim_iters = 60
|
||||||
for idx, test_sentence in enumerate(test_sentences):
|
for idx, test_sentence in enumerate(test_sentences):
|
||||||
wav = synthesis(model, ap, test_sentence, use_cuda,
|
wav, linear_out, alignments = synthesis(model, ap, test_sentence, use_cuda,
|
||||||
c.text_cleaner)
|
c.text_cleaner)
|
||||||
try:
|
try:
|
||||||
wav_name = 'TestSentences/{}'.format(idx)
|
wav_name = 'TestSentences/{}'.format(idx)
|
||||||
tb.add_audio(wav_name, wav, current_step,
|
tb.add_audio(wav_name, wav, current_step,
|
||||||
sample_rate=c.sample_rate)
|
sample_rate=c.sample_rate)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
align_img = alignments[0].data.cpu().numpy()
|
||||||
|
linear_spec = linear_output[0].data.cpu().numpy()
|
||||||
|
linear_spec = plot_spectrogram(linear_spec, ap)
|
||||||
|
align_img = plot_alignment(align_img)
|
||||||
|
tb.add_image('TestSentences/{}_GroundTruth'.format(idx), gt_spec, current_step)
|
||||||
|
tb.add_image('TestSentences/{}_Alignment'.format(idx), align_img, current_step)
|
||||||
return avg_linear_loss
|
return avg_linear_loss
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -166,7 +166,7 @@ def synthesis(model, ap, text, use_cuda, text_cleaner):
|
||||||
chars_var = torch.from_numpy(seq).unsqueeze(0)
|
chars_var = torch.from_numpy(seq).unsqueeze(0)
|
||||||
if use_cuda:
|
if use_cuda:
|
||||||
chars_var = chars_var.cuda().long()
|
chars_var = chars_var.cuda().long()
|
||||||
_, linear_out, _, _ = model.forward(chars_var)
|
_, linear_out, alignments, _ = model.forward(chars_var)
|
||||||
linear_out = linear_out[0].data.cpu().numpy()
|
linear_out = linear_out[0].data.cpu().numpy()
|
||||||
wav = ap.inv_spectrogram(linear_out.T)
|
wav = ap.inv_spectrogram(linear_out.T)
|
||||||
return wav
|
return wav, linear_out, alignments
|
Loading…
Reference in New Issue