logger for tensorboard plotting

This commit is contained in:
Eren Golge 2018-12-13 18:18:37 +01:00
parent 8127ef3118
commit 481105ccfa
3 changed files with 117 additions and 81 deletions

View File

@ -40,6 +40,7 @@
"checkpoint": true, "checkpoint": true,
"save_step": 5000, "save_step": 5000,
"print_step": 10, "print_step": 10,
"tb_model_param_stats": true, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging.
"run_eval": true, "run_eval": true,
"data_path": "../../Data/LJSpeech-1.1/", // can overwritten from command argument "data_path": "../../Data/LJSpeech-1.1/", // can overwritten from command argument

122
train.py
View File

@ -22,6 +22,7 @@ from models.tacotron import Tacotron
from layers.losses import L1LossMasked from layers.losses import L1LossMasked
from utils.audio import AudioProcessor from utils.audio import AudioProcessor
from utils.synthesis import synthesis from utils.synthesis import synthesis
from utils.logger import Logger
torch.manual_seed(1) torch.manual_seed(1)
use_cuda = torch.cuda.is_available() use_cuda = torch.cuda.is_available()
@ -169,15 +170,13 @@ def train(model, criterion, criterion_st, optimizer, optimizer_st,
avg_step_time += step_time avg_step_time += step_time
# Plot Training Iter Stats # Plot Training Iter Stats
tb.add_scalar('TrainIterLoss/TotalLoss', loss.item(), current_step) iter_stats = {"loss_posnet": linear_loss.item(),
tb.add_scalar('TrainIterLoss/LinearLoss', linear_loss.item(), "loss_decoder": mel_loss.item(),
current_step) "lr": current_lr,
tb.add_scalar('TrainIterLoss/MelLoss', mel_loss.item(), current_step) "grad_norm": grad_norm,
tb.add_scalar('Params/LearningRate', optimizer.param_groups[0]['lr'], "grad_norm_st": grad_norm_st,
current_step) "step_time": step_time}
tb.add_scalar('Params/GradNorm', grad_norm, current_step) tb_logger.tb_train_iter_stats(current_step, iter_stats)
tb.add_scalar('Params/GradNormSt', grad_norm_st, current_step)
tb.add_scalar('Time/StepTime', step_time, current_step)
if current_step % c.save_step == 0: if current_step % c.save_step == 0:
if c.checkpoint: if c.checkpoint:
@ -189,28 +188,17 @@ def train(model, criterion, criterion_st, optimizer, optimizer_st,
# Diagnostic visualizations # Diagnostic visualizations
const_spec = linear_output[0].data.cpu().numpy() const_spec = linear_output[0].data.cpu().numpy()
gt_spec = linear_input[0].data.cpu().numpy() gt_spec = linear_input[0].data.cpu().numpy()
const_spec = plot_spectrogram(const_spec, ap)
gt_spec = plot_spectrogram(gt_spec, ap)
tb.add_figure('Visual/Reconstruction', const_spec, current_step)
tb.add_figure('Visual/GroundTruth', gt_spec, current_step)
align_img = alignments[0].data.cpu().numpy() align_img = alignments[0].data.cpu().numpy()
align_img = plot_alignment(align_img)
tb.add_figure('Visual/Alignment', align_img, current_step) figures = {"prediction": plot_spectrogram(const_spec, ap),
"ground_truth": plot_spectrogram(gt_spec, ap),
"alignment": plot_alignment(align_img)}
tb_logger.tb_train_figures(figures, current_step)
# Sample audio # Sample audio
audio_signal = linear_output[0].data.cpu().numpy() tb_logger.tb_train_audios(current_step,
ap.griffin_lim_iters = 60 {'TrainAudio': ap.inv_spectrogram(const_spec.T)},
audio_signal = ap.inv_spectrogram(audio_signal.T) c.sample_rate)
try:
tb.add_audio(
'SampleAudio',
audio_signal,
current_step,
sample_rate=c.sample_rate)
except:
pass
avg_linear_loss /= (num_iter + 1) avg_linear_loss /= (num_iter + 1)
avg_mel_loss /= (num_iter + 1) avg_mel_loss /= (num_iter + 1)
@ -229,12 +217,13 @@ def train(model, criterion, criterion_st, optimizer, optimizer_st,
flush=True) flush=True)
# Plot Training Epoch Stats # Plot Training Epoch Stats
tb.add_scalar('TrainEpochLoss/TotalLoss', avg_total_loss, current_step) epoch_stats = {"loss_postnet": avg_linear_loss,
tb.add_scalar('TrainEpochLoss/LinearLoss', avg_linear_loss, current_step) "loss_decoder": avg_mel_loss,
tb.add_scalar('TrainEpochLoss/MelLoss', avg_mel_loss, current_step) "stop_loss": avg_stop_loss,
tb.add_scalar('TrainEpochLoss/StopLoss', avg_stop_loss, current_step) "epoch_time": epoch_time}
tb.add_scalar('Time/EpochTime', epoch_time, epoch) tb_logger.tb_train_epoch_stats(current_step, epoch_stats)
epoch_time = 0 if c.tb_model_param_stats:
tb_logger.tb_model_weights(model, current_step)
return avg_linear_loss, current_step return avg_linear_loss, current_step
@ -316,74 +305,45 @@ def evaluate(model, criterion, criterion_st, ap, current_step):
gt_spec = linear_input[idx].data.cpu().numpy() gt_spec = linear_input[idx].data.cpu().numpy()
align_img = alignments[idx].data.cpu().numpy() align_img = alignments[idx].data.cpu().numpy()
const_spec = plot_spectrogram(const_spec, ap) eval_figures = {"prediction": plot_spectrogram(const_spec, ap),
gt_spec = plot_spectrogram(gt_spec, ap) "ground_truth": plot_spectrogram(gt_spec, ap),
align_img = plot_alignment(align_img) "alignment": plot_alignment(align_img)}
tb_logger.tb_eval_figures(current_step, eval_figures)
tb.add_figure('ValVisual/Reconstruction', const_spec, current_step)
tb.add_figure('ValVisual/GroundTruth', gt_spec, current_step)
tb.add_figure('ValVisual/ValidationAlignment', align_img,
current_step)
# Sample audio # Sample audio
audio_signal = linear_output[idx].data.cpu().numpy() tb_logger.tb_eval_audios(current_step, {"ValAudio": ap.inv_spectrogram(const_spec.T)}, c.audio["sample_rate"])
ap.griffin_lim_iters = 60
audio_signal = ap.inv_spectrogram(audio_signal.T)
try:
tb.add_audio(
'ValSampleAudio',
audio_signal,
current_step,
sample_rate=c.audio["sample_rate"])
except:
# sometimes audio signal is out of boundaries
pass
# compute average losses # compute average losses
avg_linear_loss /= (num_iter + 1) avg_linear_loss /= (num_iter + 1)
avg_mel_loss /= (num_iter + 1) avg_mel_loss /= (num_iter + 1)
avg_stop_loss /= (num_iter + 1) avg_stop_loss /= (num_iter + 1)
avg_total_loss = avg_mel_loss + avg_linear_loss + avg_stop_loss
# Plot Learning Stats # Plot Validation Stats
tb.add_scalar('ValEpochLoss/TotalLoss', avg_total_loss, epoch_stats = {"loss_postnet": avg_linear_loss,
current_step) "loss_decoder": avg_mel_loss,
tb.add_scalar('ValEpochLoss/LinearLoss', avg_linear_loss, "stop_loss": avg_stop_loss}
current_step) tb_logger.tb_eval_stats(current_step, epoch_stats)
tb.add_scalar('ValEpochLoss/MelLoss', avg_mel_loss, current_step)
tb.add_scalar('ValEpochLoss/Stop_loss', avg_stop_loss,
current_step)
# test sentences # test sentences
ap.griffin_lim_iters = 60 test_audios = {}
test_figures = {}
for idx, test_sentence in enumerate(test_sentences): for idx, test_sentence in enumerate(test_sentences):
try: try:
wav, alignment, linear_spec, _, stop_tokens = synthesis( wav, alignment, linear_spec, _, stop_tokens = synthesis(
model, test_sentence, c, use_cuda, ap) model, test_sentence, c, use_cuda, ap)
file_path = os.path.join(AUDIO_PATH, str(current_step)) file_path = os.path.join(AUDIO_PATH, str(current_step))
os.makedirs(file_path, exist_ok=True) os.makedirs(file_path, exist_ok=True)
file_path = os.path.join(file_path, file_path = os.path.join(file_path,
"TestSentence_{}.wav".format(idx)) "TestSentence_{}.wav".format(idx))
ap.save_wav(wav, file_path) ap.save_wav(wav, file_path)
test_audios['{}-audio'.format(idx)] = wav
wav_name = 'TestSentences/{}'.format(idx) test_figures['{}-prediction'.format(idx)] = plot_spectrogram(linear_spec, ap)
tb.add_audio( test_figures['{}-alignment'.format(idx)] = plot_alignment(alignment)
wav_name,
wav,
current_step,
sample_rate=c.audio['sample_rate'])
linear_spec = plot_spectrogram(linear_spec, ap)
align_img = plot_alignment(alignment)
tb.add_figure('TestSentences/{}_Spectrogram'.format(idx),
linear_spec, current_step)
tb.add_figure('TestSentences/{}_Alignment'.format(idx), align_img,
current_step)
except: except:
print(" !! Error creating Test Sentence -", idx) print(" !! Error creating Test Sentence -", idx)
traceback.print_exc() traceback.print_exc()
pass tb_logger.tb_test_audios(current_step, test_audios, c.audio['sample_rate'])
tb_logger.tb_test_figures(current_step, test_figures)
return avg_linear_loss return avg_linear_loss
@ -496,7 +456,7 @@ if __name__ == '__main__':
# setup tensorboard # setup tensorboard
LOG_DIR = OUT_PATH LOG_DIR = OUT_PATH
tb = SummaryWriter(LOG_DIR) tb_logger = Logger(LOG_DIR)
# Conditional imports # Conditional imports
preprocessor = importlib.import_module('datasets.preprocess') preprocessor = importlib.import_module('datasets.preprocess')

75
utils/logger.py Normal file
View File

@ -0,0 +1,75 @@
import traceback
from tensorboardX import SummaryWriter
class Logger(object):
def __init__(self, log_dir):
self.writer = SummaryWriter(log_dir)
self.train_stats = {}
self.eval_stats = {}
def tb_model_weights(self, model, step):
layer_num = 1
for name, param in model.named_parameters():
self.writer.add_scalar(
"layer{}-ModelParams/{}/max".format(layer_num, name),
param.max(), step)
self.writer.add_scalar(
"layer{}-ModelParams/{}/min".format(layer_num, name),
param.min(), step)
self.writer.add_scalar(
"layer{}-ModelParams/{}/mean".format(layer_num, name),
param.mean(), step)
self.writer.add_scalar(
"layer{}-ModelParams/{}/std".format(layer_num, name),
param.std(), step)
self.writer.add_histogram(
"layer{}-{}/param".format(layer_num, name), param, step)
self.writer.add_histogram(
"layer{}-{}/grad".format(layer_num, name), param.grad, step)
layer_num += 1
def dict_to_tb_scalar(self, scope_name, stats, step):
for key, value in stats.items():
self.writer.add_scalar('{}/{}'.format(scope_name, key), value, step)
def dict_to_tb_figure(self, scope_name, figures, step):
for key, value in figures.items():
self.writer.add_figure('{}/{}'.format(scope_name, key), value, step)
def dict_to_tb_audios(self, scope_name, audios, step, sample_rate):
for key, value in audios.items():
try:
self.writer.add_audio('{}/{}'.format(scope_name, key), value, step, sample_rate=sample_rate)
except:
traceback.print_exc()
def tb_train_iter_stats(self, step, stats):
self.dict_to_tb_scalar("TrainIterStats", stats, step)
def tb_train_epoch_stats(self, step, stats):
self.dict_to_tb_scalar("TrainEpochStats", stats, step)
def tb_train_figures(self, step, figures):
self.dict_to_tb_figure("TrainFigures", figures, step)
def tb_train_audios(self, step, audios, sample_rate):
self.dict_to_tb_audios("TrainAudios", audios, step, sample_rate)
def tb_eval_stats(self, step, stats):
self.dict_to_tb_scalar("EvalStats", stats, step)
def tb_eval_figures(self, step, figures):
self.dict_to_tb_figure("EvalFigures", figures, step)
def tb_eval_audios(self, step, audios, sample_rate):
self.dict_to_tb_audios("EvalAudios", audios, step, sample_rate)
def tb_test_audios(self, step, audios, sample_rate):
self.dict_to_tb_audios("TestAudios", audios, step, sample_rate)
def tb_test_figures(self, step, figures):
self.dict_to_tb_figure("TestFigures", figures, step)