mirror of https://github.com/coqui-ai/TTS.git
logger for tensorboard plotting
This commit is contained in:
parent
8127ef3118
commit
481105ccfa
|
@ -40,6 +40,7 @@
|
||||||
"checkpoint": true,
|
"checkpoint": true,
|
||||||
"save_step": 5000,
|
"save_step": 5000,
|
||||||
"print_step": 10,
|
"print_step": 10,
|
||||||
|
"tb_model_param_stats": true, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging.
|
||||||
|
|
||||||
"run_eval": true,
|
"run_eval": true,
|
||||||
"data_path": "../../Data/LJSpeech-1.1/", // can overwritten from command argument
|
"data_path": "../../Data/LJSpeech-1.1/", // can overwritten from command argument
|
||||||
|
|
122
train.py
122
train.py
|
@ -22,6 +22,7 @@ from models.tacotron import Tacotron
|
||||||
from layers.losses import L1LossMasked
|
from layers.losses import L1LossMasked
|
||||||
from utils.audio import AudioProcessor
|
from utils.audio import AudioProcessor
|
||||||
from utils.synthesis import synthesis
|
from utils.synthesis import synthesis
|
||||||
|
from utils.logger import Logger
|
||||||
|
|
||||||
torch.manual_seed(1)
|
torch.manual_seed(1)
|
||||||
use_cuda = torch.cuda.is_available()
|
use_cuda = torch.cuda.is_available()
|
||||||
|
@ -169,15 +170,13 @@ def train(model, criterion, criterion_st, optimizer, optimizer_st,
|
||||||
avg_step_time += step_time
|
avg_step_time += step_time
|
||||||
|
|
||||||
# Plot Training Iter Stats
|
# Plot Training Iter Stats
|
||||||
tb.add_scalar('TrainIterLoss/TotalLoss', loss.item(), current_step)
|
iter_stats = {"loss_posnet": linear_loss.item(),
|
||||||
tb.add_scalar('TrainIterLoss/LinearLoss', linear_loss.item(),
|
"loss_decoder": mel_loss.item(),
|
||||||
current_step)
|
"lr": current_lr,
|
||||||
tb.add_scalar('TrainIterLoss/MelLoss', mel_loss.item(), current_step)
|
"grad_norm": grad_norm,
|
||||||
tb.add_scalar('Params/LearningRate', optimizer.param_groups[0]['lr'],
|
"grad_norm_st": grad_norm_st,
|
||||||
current_step)
|
"step_time": step_time}
|
||||||
tb.add_scalar('Params/GradNorm', grad_norm, current_step)
|
tb_logger.tb_train_iter_stats(current_step, iter_stats)
|
||||||
tb.add_scalar('Params/GradNormSt', grad_norm_st, current_step)
|
|
||||||
tb.add_scalar('Time/StepTime', step_time, current_step)
|
|
||||||
|
|
||||||
if current_step % c.save_step == 0:
|
if current_step % c.save_step == 0:
|
||||||
if c.checkpoint:
|
if c.checkpoint:
|
||||||
|
@ -189,28 +188,17 @@ def train(model, criterion, criterion_st, optimizer, optimizer_st,
|
||||||
# Diagnostic visualizations
|
# Diagnostic visualizations
|
||||||
const_spec = linear_output[0].data.cpu().numpy()
|
const_spec = linear_output[0].data.cpu().numpy()
|
||||||
gt_spec = linear_input[0].data.cpu().numpy()
|
gt_spec = linear_input[0].data.cpu().numpy()
|
||||||
|
|
||||||
const_spec = plot_spectrogram(const_spec, ap)
|
|
||||||
gt_spec = plot_spectrogram(gt_spec, ap)
|
|
||||||
tb.add_figure('Visual/Reconstruction', const_spec, current_step)
|
|
||||||
tb.add_figure('Visual/GroundTruth', gt_spec, current_step)
|
|
||||||
|
|
||||||
align_img = alignments[0].data.cpu().numpy()
|
align_img = alignments[0].data.cpu().numpy()
|
||||||
align_img = plot_alignment(align_img)
|
|
||||||
tb.add_figure('Visual/Alignment', align_img, current_step)
|
figures = {"prediction": plot_spectrogram(const_spec, ap),
|
||||||
|
"ground_truth": plot_spectrogram(gt_spec, ap),
|
||||||
|
"alignment": plot_alignment(align_img)}
|
||||||
|
tb_logger.tb_train_figures(figures, current_step)
|
||||||
|
|
||||||
# Sample audio
|
# Sample audio
|
||||||
audio_signal = linear_output[0].data.cpu().numpy()
|
tb_logger.tb_train_audios(current_step,
|
||||||
ap.griffin_lim_iters = 60
|
{'TrainAudio': ap.inv_spectrogram(const_spec.T)},
|
||||||
audio_signal = ap.inv_spectrogram(audio_signal.T)
|
c.sample_rate)
|
||||||
try:
|
|
||||||
tb.add_audio(
|
|
||||||
'SampleAudio',
|
|
||||||
audio_signal,
|
|
||||||
current_step,
|
|
||||||
sample_rate=c.sample_rate)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
avg_linear_loss /= (num_iter + 1)
|
avg_linear_loss /= (num_iter + 1)
|
||||||
avg_mel_loss /= (num_iter + 1)
|
avg_mel_loss /= (num_iter + 1)
|
||||||
|
@ -229,12 +217,13 @@ def train(model, criterion, criterion_st, optimizer, optimizer_st,
|
||||||
flush=True)
|
flush=True)
|
||||||
|
|
||||||
# Plot Training Epoch Stats
|
# Plot Training Epoch Stats
|
||||||
tb.add_scalar('TrainEpochLoss/TotalLoss', avg_total_loss, current_step)
|
epoch_stats = {"loss_postnet": avg_linear_loss,
|
||||||
tb.add_scalar('TrainEpochLoss/LinearLoss', avg_linear_loss, current_step)
|
"loss_decoder": avg_mel_loss,
|
||||||
tb.add_scalar('TrainEpochLoss/MelLoss', avg_mel_loss, current_step)
|
"stop_loss": avg_stop_loss,
|
||||||
tb.add_scalar('TrainEpochLoss/StopLoss', avg_stop_loss, current_step)
|
"epoch_time": epoch_time}
|
||||||
tb.add_scalar('Time/EpochTime', epoch_time, epoch)
|
tb_logger.tb_train_epoch_stats(current_step, epoch_stats)
|
||||||
epoch_time = 0
|
if c.tb_model_param_stats:
|
||||||
|
tb_logger.tb_model_weights(model, current_step)
|
||||||
return avg_linear_loss, current_step
|
return avg_linear_loss, current_step
|
||||||
|
|
||||||
|
|
||||||
|
@ -316,74 +305,45 @@ def evaluate(model, criterion, criterion_st, ap, current_step):
|
||||||
gt_spec = linear_input[idx].data.cpu().numpy()
|
gt_spec = linear_input[idx].data.cpu().numpy()
|
||||||
align_img = alignments[idx].data.cpu().numpy()
|
align_img = alignments[idx].data.cpu().numpy()
|
||||||
|
|
||||||
const_spec = plot_spectrogram(const_spec, ap)
|
eval_figures = {"prediction": plot_spectrogram(const_spec, ap),
|
||||||
gt_spec = plot_spectrogram(gt_spec, ap)
|
"ground_truth": plot_spectrogram(gt_spec, ap),
|
||||||
align_img = plot_alignment(align_img)
|
"alignment": plot_alignment(align_img)}
|
||||||
|
tb_logger.tb_eval_figures(current_step, eval_figures)
|
||||||
tb.add_figure('ValVisual/Reconstruction', const_spec, current_step)
|
|
||||||
tb.add_figure('ValVisual/GroundTruth', gt_spec, current_step)
|
|
||||||
tb.add_figure('ValVisual/ValidationAlignment', align_img,
|
|
||||||
current_step)
|
|
||||||
|
|
||||||
# Sample audio
|
# Sample audio
|
||||||
audio_signal = linear_output[idx].data.cpu().numpy()
|
tb_logger.tb_eval_audios(current_step, {"ValAudio": ap.inv_spectrogram(const_spec.T)}, c.audio["sample_rate"])
|
||||||
ap.griffin_lim_iters = 60
|
|
||||||
audio_signal = ap.inv_spectrogram(audio_signal.T)
|
|
||||||
try:
|
|
||||||
tb.add_audio(
|
|
||||||
'ValSampleAudio',
|
|
||||||
audio_signal,
|
|
||||||
current_step,
|
|
||||||
sample_rate=c.audio["sample_rate"])
|
|
||||||
except:
|
|
||||||
# sometimes audio signal is out of boundaries
|
|
||||||
pass
|
|
||||||
|
|
||||||
# compute average losses
|
# compute average losses
|
||||||
avg_linear_loss /= (num_iter + 1)
|
avg_linear_loss /= (num_iter + 1)
|
||||||
avg_mel_loss /= (num_iter + 1)
|
avg_mel_loss /= (num_iter + 1)
|
||||||
avg_stop_loss /= (num_iter + 1)
|
avg_stop_loss /= (num_iter + 1)
|
||||||
avg_total_loss = avg_mel_loss + avg_linear_loss + avg_stop_loss
|
|
||||||
|
|
||||||
# Plot Learning Stats
|
# Plot Validation Stats
|
||||||
tb.add_scalar('ValEpochLoss/TotalLoss', avg_total_loss,
|
epoch_stats = {"loss_postnet": avg_linear_loss,
|
||||||
current_step)
|
"loss_decoder": avg_mel_loss,
|
||||||
tb.add_scalar('ValEpochLoss/LinearLoss', avg_linear_loss,
|
"stop_loss": avg_stop_loss}
|
||||||
current_step)
|
tb_logger.tb_eval_stats(current_step, epoch_stats)
|
||||||
tb.add_scalar('ValEpochLoss/MelLoss', avg_mel_loss, current_step)
|
|
||||||
tb.add_scalar('ValEpochLoss/Stop_loss', avg_stop_loss,
|
|
||||||
current_step)
|
|
||||||
|
|
||||||
# test sentences
|
# test sentences
|
||||||
ap.griffin_lim_iters = 60
|
test_audios = {}
|
||||||
|
test_figures = {}
|
||||||
for idx, test_sentence in enumerate(test_sentences):
|
for idx, test_sentence in enumerate(test_sentences):
|
||||||
try:
|
try:
|
||||||
wav, alignment, linear_spec, _, stop_tokens = synthesis(
|
wav, alignment, linear_spec, _, stop_tokens = synthesis(
|
||||||
model, test_sentence, c, use_cuda, ap)
|
model, test_sentence, c, use_cuda, ap)
|
||||||
|
|
||||||
file_path = os.path.join(AUDIO_PATH, str(current_step))
|
file_path = os.path.join(AUDIO_PATH, str(current_step))
|
||||||
os.makedirs(file_path, exist_ok=True)
|
os.makedirs(file_path, exist_ok=True)
|
||||||
file_path = os.path.join(file_path,
|
file_path = os.path.join(file_path,
|
||||||
"TestSentence_{}.wav".format(idx))
|
"TestSentence_{}.wav".format(idx))
|
||||||
ap.save_wav(wav, file_path)
|
ap.save_wav(wav, file_path)
|
||||||
|
test_audios['{}-audio'.format(idx)] = wav
|
||||||
wav_name = 'TestSentences/{}'.format(idx)
|
test_figures['{}-prediction'.format(idx)] = plot_spectrogram(linear_spec, ap)
|
||||||
tb.add_audio(
|
test_figures['{}-alignment'.format(idx)] = plot_alignment(alignment)
|
||||||
wav_name,
|
|
||||||
wav,
|
|
||||||
current_step,
|
|
||||||
sample_rate=c.audio['sample_rate'])
|
|
||||||
|
|
||||||
linear_spec = plot_spectrogram(linear_spec, ap)
|
|
||||||
align_img = plot_alignment(alignment)
|
|
||||||
tb.add_figure('TestSentences/{}_Spectrogram'.format(idx),
|
|
||||||
linear_spec, current_step)
|
|
||||||
tb.add_figure('TestSentences/{}_Alignment'.format(idx), align_img,
|
|
||||||
current_step)
|
|
||||||
except:
|
except:
|
||||||
print(" !! Error creating Test Sentence -", idx)
|
print(" !! Error creating Test Sentence -", idx)
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
pass
|
tb_logger.tb_test_audios(current_step, test_audios, c.audio['sample_rate'])
|
||||||
|
tb_logger.tb_test_figures(current_step, test_figures)
|
||||||
return avg_linear_loss
|
return avg_linear_loss
|
||||||
|
|
||||||
|
|
||||||
|
@ -496,7 +456,7 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
# setup tensorboard
|
# setup tensorboard
|
||||||
LOG_DIR = OUT_PATH
|
LOG_DIR = OUT_PATH
|
||||||
tb = SummaryWriter(LOG_DIR)
|
tb_logger = Logger(LOG_DIR)
|
||||||
|
|
||||||
# Conditional imports
|
# Conditional imports
|
||||||
preprocessor = importlib.import_module('datasets.preprocess')
|
preprocessor = importlib.import_module('datasets.preprocess')
|
||||||
|
|
|
@ -0,0 +1,75 @@
|
||||||
|
import traceback
|
||||||
|
from tensorboardX import SummaryWriter
|
||||||
|
|
||||||
|
|
||||||
|
class Logger(object):
|
||||||
|
def __init__(self, log_dir):
|
||||||
|
self.writer = SummaryWriter(log_dir)
|
||||||
|
self.train_stats = {}
|
||||||
|
self.eval_stats = {}
|
||||||
|
|
||||||
|
def tb_model_weights(self, model, step):
|
||||||
|
layer_num = 1
|
||||||
|
for name, param in model.named_parameters():
|
||||||
|
self.writer.add_scalar(
|
||||||
|
"layer{}-ModelParams/{}/max".format(layer_num, name),
|
||||||
|
param.max(), step)
|
||||||
|
self.writer.add_scalar(
|
||||||
|
"layer{}-ModelParams/{}/min".format(layer_num, name),
|
||||||
|
param.min(), step)
|
||||||
|
self.writer.add_scalar(
|
||||||
|
"layer{}-ModelParams/{}/mean".format(layer_num, name),
|
||||||
|
param.mean(), step)
|
||||||
|
self.writer.add_scalar(
|
||||||
|
"layer{}-ModelParams/{}/std".format(layer_num, name),
|
||||||
|
param.std(), step)
|
||||||
|
self.writer.add_histogram(
|
||||||
|
"layer{}-{}/param".format(layer_num, name), param, step)
|
||||||
|
self.writer.add_histogram(
|
||||||
|
"layer{}-{}/grad".format(layer_num, name), param.grad, step)
|
||||||
|
layer_num += 1
|
||||||
|
|
||||||
|
def dict_to_tb_scalar(self, scope_name, stats, step):
|
||||||
|
for key, value in stats.items():
|
||||||
|
self.writer.add_scalar('{}/{}'.format(scope_name, key), value, step)
|
||||||
|
|
||||||
|
def dict_to_tb_figure(self, scope_name, figures, step):
|
||||||
|
for key, value in figures.items():
|
||||||
|
self.writer.add_figure('{}/{}'.format(scope_name, key), value, step)
|
||||||
|
|
||||||
|
def dict_to_tb_audios(self, scope_name, audios, step, sample_rate):
|
||||||
|
for key, value in audios.items():
|
||||||
|
try:
|
||||||
|
self.writer.add_audio('{}/{}'.format(scope_name, key), value, step, sample_rate=sample_rate)
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
def tb_train_iter_stats(self, step, stats):
|
||||||
|
self.dict_to_tb_scalar("TrainIterStats", stats, step)
|
||||||
|
|
||||||
|
def tb_train_epoch_stats(self, step, stats):
|
||||||
|
self.dict_to_tb_scalar("TrainEpochStats", stats, step)
|
||||||
|
|
||||||
|
def tb_train_figures(self, step, figures):
|
||||||
|
self.dict_to_tb_figure("TrainFigures", figures, step)
|
||||||
|
|
||||||
|
def tb_train_audios(self, step, audios, sample_rate):
|
||||||
|
self.dict_to_tb_audios("TrainAudios", audios, step, sample_rate)
|
||||||
|
|
||||||
|
def tb_eval_stats(self, step, stats):
|
||||||
|
self.dict_to_tb_scalar("EvalStats", stats, step)
|
||||||
|
|
||||||
|
def tb_eval_figures(self, step, figures):
|
||||||
|
self.dict_to_tb_figure("EvalFigures", figures, step)
|
||||||
|
|
||||||
|
def tb_eval_audios(self, step, audios, sample_rate):
|
||||||
|
self.dict_to_tb_audios("EvalAudios", audios, step, sample_rate)
|
||||||
|
|
||||||
|
def tb_test_audios(self, step, audios, sample_rate):
|
||||||
|
self.dict_to_tb_audios("TestAudios", audios, step, sample_rate)
|
||||||
|
|
||||||
|
def tb_test_figures(self, step, figures):
|
||||||
|
self.dict_to_tb_figure("TestFigures", figures, step)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue