mirror of https://github.com/coqui-ai/TTS.git
Perform testing on example sentences
This commit is contained in:
parent
dfd0bc1831
commit
20c3085695
|
@ -28,6 +28,7 @@
|
|||
"checkpoint": true,
|
||||
"save_step": 376,
|
||||
"print_step": 10,
|
||||
"run_eval": false,
|
||||
"data_path": "/snakepit/shared/data/keithito/LJSpeech-1.1/",
|
||||
"min_seq_len": 0,
|
||||
"output_path": "experiments/"
|
||||
|
|
40
train.py
40
train.py
|
@ -18,7 +18,7 @@ from torch.utils.data import DataLoader
|
|||
from torch.optim.lr_scheduler import ReduceLROnPlateau
|
||||
from tensorboardX import SummaryWriter
|
||||
|
||||
from utils.generic_utils import (Progbar, remove_experiment_folder,
|
||||
from utils.generic_utils import (synthesis, remove_experiment_folder,
|
||||
create_experiment_folder, save_checkpoint,
|
||||
save_best_model, load_config, lr_decay,
|
||||
count_parameters, check_update, get_commit_hash)
|
||||
|
@ -116,14 +116,6 @@ def train(model, criterion, criterion_st, data_loader, optimizer, optimizer_st,
|
|||
step_time = time.time() - start_time
|
||||
epoch_time += step_time
|
||||
|
||||
# update
|
||||
# progbar.update(num_iter+1, values=[('total_loss', loss.item()),
|
||||
# ('linear_loss', linear_loss.item()),
|
||||
# ('mel_loss', mel_loss.item()),
|
||||
# ('stop_loss', stop_loss.item()),
|
||||
# ('grad_norm', grad_norm.item()),
|
||||
# ('grad_norm_st', grad_norm_st.item())])
|
||||
|
||||
if current_step % c.print_step == 0:
|
||||
print(" | | > Step:{} GlobalStep:{} TotalLoss:{:.5f} LinearLoss:{:.5f} "\
|
||||
"MelLoss:{:.5f} StopLoss:{:.5f} GradNorm:{:.5f} "\
|
||||
|
@ -217,6 +209,10 @@ def evaluate(model, criterion, criterion_st, data_loader, current_step):
|
|||
avg_mel_loss = 0
|
||||
avg_stop_loss = 0
|
||||
print(" | > Validation")
|
||||
test_sentences = ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
|
||||
"Be a voice, not an echo.",
|
||||
"I'm sorry Dave. I'm afraid I can't do that.",
|
||||
"This cake is great. It's so delicious and moist."]
|
||||
# progbar = Progbar(len(data_loader.dataset) / c.batch_size)
|
||||
n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq)
|
||||
with torch.no_grad():
|
||||
|
@ -259,11 +255,6 @@ def evaluate(model, criterion, criterion_st, data_loader, current_step):
|
|||
step_time = time.time() - start_time
|
||||
epoch_time += step_time
|
||||
|
||||
# update
|
||||
# progbar.update(num_iter+1, values=[('total_loss', loss.item()),
|
||||
# ('linear_loss', linear_loss.item()),
|
||||
# ('mel_loss', mel_loss.item()),
|
||||
# ('stop_loss', stop_loss.item())])
|
||||
if num_iter % c.print_step == 0:
|
||||
print(" | | > TotalLoss: {:.5f} LinearLoss: {:.5f} MelLoss:{:.5f} "\
|
||||
"StopLoss: {:.5f} ".format(loss.item(),
|
||||
|
@ -297,9 +288,7 @@ def evaluate(model, criterion, criterion_st, data_loader, current_step):
|
|||
tb.add_audio('ValSampleAudio', audio_signal, current_step,
|
||||
sample_rate=c.sample_rate)
|
||||
except:
|
||||
# print(" | > Error at audio signal on TB!!")
|
||||
# print(audio_signal.max())
|
||||
# print(audio_signal.min())
|
||||
# sometimes audio signal is out of boundaries
|
||||
pass
|
||||
|
||||
# compute average losses
|
||||
|
@ -314,6 +303,17 @@ def evaluate(model, criterion, criterion_st, data_loader, current_step):
|
|||
tb.add_scalar('ValEpochLoss/MelLoss', avg_mel_loss, current_step)
|
||||
tb.add_scalar('ValEpochLoss/Stop_loss', avg_stop_loss, current_step)
|
||||
|
||||
# test sentences
|
||||
data_loader.dataset.ap.griffin_lim_iters = 60
|
||||
for idx, test_sentence in enumerate(test_sentences):
|
||||
wav = synthesis(model, data_loader.dataset.ap, test_sentence, use_cuda,
|
||||
c.text_cleaner)
|
||||
try:
|
||||
wav_name = 'TestSentences/{}'.format(idx)
|
||||
tb.add_audio(wav_name, wav, current_step,
|
||||
sample_rate=c.sample_rate)
|
||||
except:
|
||||
pass
|
||||
return avg_linear_loss
|
||||
|
||||
|
||||
|
@ -408,8 +408,10 @@ def main(args):
|
|||
best_loss = float('inf')
|
||||
|
||||
for epoch in range(0, c.epochs):
|
||||
train_loss, current_step = train(
|
||||
model, criterion, criterion_st, train_loader, optimizer, optimizer_st, epoch)
|
||||
# train_loss, current_step = train(
|
||||
current_step = 0
|
||||
train_loss = 0
|
||||
# model, criterion, criterion_st, train_loader, optimizer, optimizer_st, epoch)
|
||||
val_loss = evaluate(model, criterion, criterion_st, val_loader, current_step)
|
||||
print(" | > Train Loss: {:.5f} Validation Loss: {:.5f}".format(train_loss, val_loss))
|
||||
best_loss = save_best_model(model, optimizer, val_loss,
|
||||
|
|
|
@ -10,6 +10,7 @@ import subprocess
|
|||
import numpy as np
|
||||
from collections import OrderedDict
|
||||
from torch.autograd import Variable
|
||||
from utils.text import text_to_sequence
|
||||
|
||||
|
||||
class AttrDict(dict):
|
||||
|
@ -159,142 +160,13 @@ def sequence_mask(sequence_length, max_len=None):
|
|||
return seq_range_expand < seq_length_expand
|
||||
|
||||
|
||||
class Progbar(object):
|
||||
"""Displays a progress bar.
|
||||
Args:
|
||||
target: Total number of steps expected, None if unknown.
|
||||
interval: Minimum visual progress update interval (in seconds).
|
||||
"""
|
||||
|
||||
def __init__(self, target, width=30, verbose=1, interval=0.05):
|
||||
self.width = width
|
||||
self.target = target
|
||||
self.sum_values = {}
|
||||
self.unique_values = []
|
||||
self.start = time.time()
|
||||
self.last_update = 0
|
||||
self.interval = interval
|
||||
self.total_width = 0
|
||||
self.seen_so_far = 0
|
||||
self.verbose = verbose
|
||||
self._dynamic_display = ((hasattr(sys.stdout, 'isatty') and
|
||||
sys.stdout.isatty()) or
|
||||
'ipykernel' in sys.modules)
|
||||
|
||||
def update(self, current, values=None, force=False):
|
||||
"""Updates the progress bar.
|
||||
# Arguments
|
||||
current: Index of current step.
|
||||
values: List of tuples (name, value_for_last_step).
|
||||
The progress bar will display averages for these values.
|
||||
force: Whether to force visual progress update.
|
||||
"""
|
||||
values = values or []
|
||||
for k, v in values:
|
||||
if k not in self.sum_values:
|
||||
self.sum_values[k] = [v * (current - self.seen_so_far),
|
||||
current - self.seen_so_far]
|
||||
self.unique_values.append(k)
|
||||
else:
|
||||
self.sum_values[k][0] += v * (current - self.seen_so_far)
|
||||
self.sum_values[k][1] += (current - self.seen_so_far)
|
||||
self.seen_so_far = current
|
||||
|
||||
now = time.time()
|
||||
info = ' - %.0fs' % (now - self.start)
|
||||
if self.verbose == 1:
|
||||
if (not force and (now - self.last_update) < self.interval and
|
||||
self.target is not None and current < self.target):
|
||||
return
|
||||
|
||||
prev_total_width = self.total_width
|
||||
if self._dynamic_display:
|
||||
sys.stdout.write('\b' * prev_total_width)
|
||||
sys.stdout.write('\r')
|
||||
else:
|
||||
sys.stdout.write('\n')
|
||||
|
||||
if self.target is not None:
|
||||
numdigits = int(np.floor(np.log10(self.target))) + 1
|
||||
barstr = '%%%dd/%d [' % (numdigits, self.target)
|
||||
bar = barstr % current
|
||||
prog = float(current) / self.target
|
||||
prog_width = int(self.width * prog)
|
||||
if prog_width > 0:
|
||||
bar += ('=' * (prog_width - 1))
|
||||
if current < self.target:
|
||||
bar += '>'
|
||||
else:
|
||||
bar += '='
|
||||
bar += ('.' * (self.width - prog_width))
|
||||
bar += ']'
|
||||
else:
|
||||
bar = '%7d/Unknown' % current
|
||||
|
||||
self.total_width = len(bar)
|
||||
sys.stdout.write(bar)
|
||||
|
||||
if current:
|
||||
time_per_unit = (now - self.start) / current
|
||||
else:
|
||||
time_per_unit = 0
|
||||
if self.target is not None and current < self.target:
|
||||
eta = time_per_unit * (self.target - current)
|
||||
if eta > 3600:
|
||||
eta_format = '%d:%02d:%02d' % (
|
||||
eta // 3600, (eta % 3600) // 60, eta % 60)
|
||||
elif eta > 60:
|
||||
eta_format = '%d:%02d' % (eta // 60, eta % 60)
|
||||
else:
|
||||
eta_format = '%ds' % eta
|
||||
|
||||
info = ' - ETA: %s' % eta_format
|
||||
|
||||
if time_per_unit >= 1:
|
||||
info += ' %.0fs/step' % time_per_unit
|
||||
elif time_per_unit >= 1e-3:
|
||||
info += ' %.0fms/step' % (time_per_unit * 1e3)
|
||||
else:
|
||||
info += ' %.0fus/step' % (time_per_unit * 1e6)
|
||||
|
||||
for k in self.unique_values:
|
||||
info += ' - %s:' % k
|
||||
if isinstance(self.sum_values[k], list):
|
||||
avg = np.mean(
|
||||
self.sum_values[k][0] / max(1, self.sum_values[k][1]))
|
||||
if abs(avg) > 1e-3:
|
||||
info += ' %.4f' % avg
|
||||
else:
|
||||
info += ' %.4e' % avg
|
||||
else:
|
||||
info += ' %s' % self.sum_values[k]
|
||||
|
||||
self.total_width += len(info)
|
||||
if prev_total_width > self.total_width:
|
||||
info += (' ' * (prev_total_width - self.total_width))
|
||||
|
||||
if self.target is not None and current >= self.target:
|
||||
info += '\n'
|
||||
|
||||
sys.stdout.write(info)
|
||||
sys.stdout.flush()
|
||||
|
||||
elif self.verbose == 2:
|
||||
if self.target is None or current >= self.target:
|
||||
for k in self.unique_values:
|
||||
info += ' - %s:' % k
|
||||
avg = np.mean(
|
||||
self.sum_values[k][0] / max(1, self.sum_values[k][1]))
|
||||
if avg > 1e-3:
|
||||
info += ' %.4f' % avg
|
||||
else:
|
||||
info += ' %.4e' % avg
|
||||
info += '\n'
|
||||
|
||||
sys.stdout.write(info)
|
||||
sys.stdout.flush()
|
||||
|
||||
self.last_update = now
|
||||
|
||||
def add(self, n, values=None):
|
||||
self.update(self.seen_so_far + n, values)
|
||||
def synthesis(model, ap, text, use_cuda, text_cleaner):
|
||||
text_cleaner = [text_cleaner]
|
||||
seq = np.array(text_to_sequence(text, text_cleaner))
|
||||
chars_var = torch.from_numpy(seq).unsqueeze(0)
|
||||
if use_cuda:
|
||||
chars_var = chars_var.cuda().long()
|
||||
_, linear_out, _, _ = model.forward(chars_var)
|
||||
linear_out = linear_out[0].data.cpu().numpy()
|
||||
wav = ap.inv_spectrogram(linear_out.T)
|
||||
return wav
|
Loading…
Reference in New Issue