diff --git a/config.json b/config.json index b6acee4a..76409161 100644 --- a/config.json +++ b/config.json @@ -28,6 +28,7 @@ "checkpoint": true, "save_step": 376, "print_step": 10, + "run_eval": false, "data_path": "/snakepit/shared/data/keithito/LJSpeech-1.1/", "min_seq_len": 0, "output_path": "experiments/" diff --git a/train.py b/train.py index 87751306..35e6d42b 100644 --- a/train.py +++ b/train.py @@ -18,7 +18,7 @@ from torch.utils.data import DataLoader from torch.optim.lr_scheduler import ReduceLROnPlateau from tensorboardX import SummaryWriter -from utils.generic_utils import (Progbar, remove_experiment_folder, +from utils.generic_utils import (synthesis, remove_experiment_folder, create_experiment_folder, save_checkpoint, save_best_model, load_config, lr_decay, count_parameters, check_update, get_commit_hash) @@ -116,14 +116,6 @@ def train(model, criterion, criterion_st, data_loader, optimizer, optimizer_st, step_time = time.time() - start_time epoch_time += step_time - # update - # progbar.update(num_iter+1, values=[('total_loss', loss.item()), - # ('linear_loss', linear_loss.item()), - # ('mel_loss', mel_loss.item()), - # ('stop_loss', stop_loss.item()), - # ('grad_norm', grad_norm.item()), - # ('grad_norm_st', grad_norm_st.item())]) - if current_step % c.print_step == 0: print(" | | > Step:{} GlobalStep:{} TotalLoss:{:.5f} LinearLoss:{:.5f} "\ "MelLoss:{:.5f} StopLoss:{:.5f} GradNorm:{:.5f} "\ @@ -217,6 +209,10 @@ def evaluate(model, criterion, criterion_st, data_loader, current_step): avg_mel_loss = 0 avg_stop_loss = 0 print(" | > Validation") + test_sentences = ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", + "Be a voice, not an echo.", + "I'm sorry Dave. I'm afraid I can't do that.", + "This cake is great. It's so delicious and moist."] # progbar = Progbar(len(data_loader.dataset) / c.batch_size) n_priority_freq = int(3000 / (c.sample_rate * 0.5) * c.num_freq) with torch.no_grad(): @@ -259,11 +255,6 @@ def evaluate(model, criterion, criterion_st, data_loader, current_step): step_time = time.time() - start_time epoch_time += step_time - # update - # progbar.update(num_iter+1, values=[('total_loss', loss.item()), - # ('linear_loss', linear_loss.item()), - # ('mel_loss', mel_loss.item()), - # ('stop_loss', stop_loss.item())]) if num_iter % c.print_step == 0: print(" | | > TotalLoss: {:.5f} LinearLoss: {:.5f} MelLoss:{:.5f} "\ "StopLoss: {:.5f} ".format(loss.item(), @@ -297,9 +288,7 @@ def evaluate(model, criterion, criterion_st, data_loader, current_step): tb.add_audio('ValSampleAudio', audio_signal, current_step, sample_rate=c.sample_rate) except: - # print(" | > Error at audio signal on TB!!") - # print(audio_signal.max()) - # print(audio_signal.min()) + # sometimes audio signal is out of boundaries pass # compute average losses @@ -314,6 +303,17 @@ def evaluate(model, criterion, criterion_st, data_loader, current_step): tb.add_scalar('ValEpochLoss/MelLoss', avg_mel_loss, current_step) tb.add_scalar('ValEpochLoss/Stop_loss', avg_stop_loss, current_step) + # test sentences + data_loader.dataset.ap.griffin_lim_iters = 60 + for idx, test_sentence in enumerate(test_sentences): + wav = synthesis(model, data_loader.dataset.ap, test_sentence, use_cuda, + c.text_cleaner) + try: + wav_name = 'TestSentences/{}'.format(idx) + tb.add_audio(wav_name, wav, current_step, + sample_rate=c.sample_rate) + except: + pass return avg_linear_loss @@ -408,8 +408,10 @@ def main(args): best_loss = float('inf') for epoch in range(0, c.epochs): - train_loss, current_step = train( - model, criterion, criterion_st, train_loader, optimizer, optimizer_st, epoch) + # train_loss, current_step = train( + current_step = 0 + train_loss = 0 + # model, criterion, criterion_st, train_loader, optimizer, optimizer_st, epoch) val_loss = evaluate(model, criterion, criterion_st, val_loader, current_step) print(" | > Train Loss: {:.5f} Validation Loss: {:.5f}".format(train_loss, val_loss)) best_loss = save_best_model(model, optimizer, val_loss, diff --git a/utils/generic_utils.py b/utils/generic_utils.py index 5e4487d5..effad6be 100644 --- a/utils/generic_utils.py +++ b/utils/generic_utils.py @@ -10,6 +10,7 @@ import subprocess import numpy as np from collections import OrderedDict from torch.autograd import Variable +from utils.text import text_to_sequence class AttrDict(dict): @@ -159,142 +160,13 @@ def sequence_mask(sequence_length, max_len=None): return seq_range_expand < seq_length_expand -class Progbar(object): - """Displays a progress bar. - Args: - target: Total number of steps expected, None if unknown. - interval: Minimum visual progress update interval (in seconds). - """ - - def __init__(self, target, width=30, verbose=1, interval=0.05): - self.width = width - self.target = target - self.sum_values = {} - self.unique_values = [] - self.start = time.time() - self.last_update = 0 - self.interval = interval - self.total_width = 0 - self.seen_so_far = 0 - self.verbose = verbose - self._dynamic_display = ((hasattr(sys.stdout, 'isatty') and - sys.stdout.isatty()) or - 'ipykernel' in sys.modules) - - def update(self, current, values=None, force=False): - """Updates the progress bar. - # Arguments - current: Index of current step. - values: List of tuples (name, value_for_last_step). - The progress bar will display averages for these values. - force: Whether to force visual progress update. - """ - values = values or [] - for k, v in values: - if k not in self.sum_values: - self.sum_values[k] = [v * (current - self.seen_so_far), - current - self.seen_so_far] - self.unique_values.append(k) - else: - self.sum_values[k][0] += v * (current - self.seen_so_far) - self.sum_values[k][1] += (current - self.seen_so_far) - self.seen_so_far = current - - now = time.time() - info = ' - %.0fs' % (now - self.start) - if self.verbose == 1: - if (not force and (now - self.last_update) < self.interval and - self.target is not None and current < self.target): - return - - prev_total_width = self.total_width - if self._dynamic_display: - sys.stdout.write('\b' * prev_total_width) - sys.stdout.write('\r') - else: - sys.stdout.write('\n') - - if self.target is not None: - numdigits = int(np.floor(np.log10(self.target))) + 1 - barstr = '%%%dd/%d [' % (numdigits, self.target) - bar = barstr % current - prog = float(current) / self.target - prog_width = int(self.width * prog) - if prog_width > 0: - bar += ('=' * (prog_width - 1)) - if current < self.target: - bar += '>' - else: - bar += '=' - bar += ('.' * (self.width - prog_width)) - bar += ']' - else: - bar = '%7d/Unknown' % current - - self.total_width = len(bar) - sys.stdout.write(bar) - - if current: - time_per_unit = (now - self.start) / current - else: - time_per_unit = 0 - if self.target is not None and current < self.target: - eta = time_per_unit * (self.target - current) - if eta > 3600: - eta_format = '%d:%02d:%02d' % ( - eta // 3600, (eta % 3600) // 60, eta % 60) - elif eta > 60: - eta_format = '%d:%02d' % (eta // 60, eta % 60) - else: - eta_format = '%ds' % eta - - info = ' - ETA: %s' % eta_format - - if time_per_unit >= 1: - info += ' %.0fs/step' % time_per_unit - elif time_per_unit >= 1e-3: - info += ' %.0fms/step' % (time_per_unit * 1e3) - else: - info += ' %.0fus/step' % (time_per_unit * 1e6) - - for k in self.unique_values: - info += ' - %s:' % k - if isinstance(self.sum_values[k], list): - avg = np.mean( - self.sum_values[k][0] / max(1, self.sum_values[k][1])) - if abs(avg) > 1e-3: - info += ' %.4f' % avg - else: - info += ' %.4e' % avg - else: - info += ' %s' % self.sum_values[k] - - self.total_width += len(info) - if prev_total_width > self.total_width: - info += (' ' * (prev_total_width - self.total_width)) - - if self.target is not None and current >= self.target: - info += '\n' - - sys.stdout.write(info) - sys.stdout.flush() - - elif self.verbose == 2: - if self.target is None or current >= self.target: - for k in self.unique_values: - info += ' - %s:' % k - avg = np.mean( - self.sum_values[k][0] / max(1, self.sum_values[k][1])) - if avg > 1e-3: - info += ' %.4f' % avg - else: - info += ' %.4e' % avg - info += '\n' - - sys.stdout.write(info) - sys.stdout.flush() - - self.last_update = now - - def add(self, n, values=None): - self.update(self.seen_so_far + n, values) +def synthesis(model, ap, text, use_cuda, text_cleaner): + text_cleaner = [text_cleaner] + seq = np.array(text_to_sequence(text, text_cleaner)) + chars_var = torch.from_numpy(seq).unsqueeze(0) + if use_cuda: + chars_var = chars_var.cuda().long() + _, linear_out, _, _ = model.forward(chars_var) + linear_out = linear_out[0].data.cpu().numpy() + wav = ap.inv_spectrogram(linear_out.T) + return wav \ No newline at end of file