mirror of https://github.com/coqui-ai/TTS.git
fancier and more flexible (self adapting to loss_dict) console logging. Fixing multi-gpu loss reduce
This commit is contained in:
parent
668a695763
commit
0e7ecca33f
|
@ -103,6 +103,7 @@
|
||||||
|
|
||||||
// TENSORBOARD and LOGGING
|
// TENSORBOARD and LOGGING
|
||||||
"print_step": 25, // Number of steps to log traning on console.
|
"print_step": 25, // Number of steps to log traning on console.
|
||||||
|
"print_eval": false, // If True, it prints loss values in evalulation.
|
||||||
"save_step": 10000, // Number of training steps expected to save traninpg stats and checkpoints.
|
"save_step": 10000, // Number of training steps expected to save traninpg stats and checkpoints.
|
||||||
"checkpoint": true, // If true, it saves checkpoints per "save_step"
|
"checkpoint": true, // If true, it saves checkpoints per "save_step"
|
||||||
"tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging.
|
"tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging.
|
||||||
|
|
130
train.py
130
train.py
|
@ -20,8 +20,9 @@ from TTS.utils.generic_utils import (
|
||||||
get_git_branch, load_config, remove_experiment_folder, save_best_model,
|
get_git_branch, load_config, remove_experiment_folder, save_best_model,
|
||||||
save_checkpoint, adam_weight_decay, set_init_dict, copy_config_file,
|
save_checkpoint, adam_weight_decay, set_init_dict, copy_config_file,
|
||||||
setup_model, gradual_training_scheduler, KeepAverage,
|
setup_model, gradual_training_scheduler, KeepAverage,
|
||||||
set_weight_decay, check_config)
|
set_weight_decay, check_config, print_train_step)
|
||||||
from TTS.utils.logger import Logger
|
from TTS.utils.tensorboard_logger import TensorboardLogger
|
||||||
|
from TTS.utils.console_logger import ConsoleLogger
|
||||||
from TTS.utils.speakers import load_speaker_mapping, save_speaker_mapping, \
|
from TTS.utils.speakers import load_speaker_mapping, save_speaker_mapping, \
|
||||||
get_speakers
|
get_speakers
|
||||||
from TTS.utils.synthesis import synthesis
|
from TTS.utils.synthesis import synthesis
|
||||||
|
@ -125,8 +126,8 @@ def train(model, criterion, optimizer, optimizer_st, scheduler,
|
||||||
train_values = {
|
train_values = {
|
||||||
'avg_postnet_loss': 0,
|
'avg_postnet_loss': 0,
|
||||||
'avg_decoder_loss': 0,
|
'avg_decoder_loss': 0,
|
||||||
'avg_stop_loss': 0,
|
'avg_stopnet_loss': 0,
|
||||||
'avg_align_score': 0,
|
'avg_align_error': 0,
|
||||||
'avg_step_time': 0,
|
'avg_step_time': 0,
|
||||||
'avg_loader_time': 0,
|
'avg_loader_time': 0,
|
||||||
'avg_alignment_score': 0
|
'avg_alignment_score': 0
|
||||||
|
@ -138,13 +139,13 @@ def train(model, criterion, optimizer, optimizer_st, scheduler,
|
||||||
train_values['avg_ga_loss'] = 0 # guidede attention loss
|
train_values['avg_ga_loss'] = 0 # guidede attention loss
|
||||||
keep_avg = KeepAverage()
|
keep_avg = KeepAverage()
|
||||||
keep_avg.add_values(train_values)
|
keep_avg.add_values(train_values)
|
||||||
print("\n > Epoch {}/{}".format(epoch, c.epochs), flush=True)
|
|
||||||
if use_cuda:
|
if use_cuda:
|
||||||
batch_n_iter = int(
|
batch_n_iter = int(
|
||||||
len(data_loader.dataset) / (c.batch_size * num_gpus))
|
len(data_loader.dataset) / (c.batch_size * num_gpus))
|
||||||
else:
|
else:
|
||||||
batch_n_iter = int(len(data_loader.dataset) / c.batch_size)
|
batch_n_iter = int(len(data_loader.dataset) / c.batch_size)
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
|
c_logger.print_train_start()
|
||||||
for num_iter, data in enumerate(data_loader):
|
for num_iter, data in enumerate(data_loader):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
|
@ -193,9 +194,10 @@ def train(model, criterion, optimizer, optimizer_st, scheduler,
|
||||||
grad_norm, grad_flag = check_update(model, c.grad_clip, ignore_stopnet=True)
|
grad_norm, grad_flag = check_update(model, c.grad_clip, ignore_stopnet=True)
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
# compute alignment score
|
# compute alignment error (the lower the better )
|
||||||
align_score = alignment_diagonal_score(alignments)
|
align_error = 1 - alignment_diagonal_score(alignments)
|
||||||
keep_avg.update_value('avg_align_score', align_score)
|
keep_avg.update_value('avg_align_error', align_error)
|
||||||
|
loss_dict['align_error'] = align_error
|
||||||
|
|
||||||
# backpass and check the grad norm for stop loss
|
# backpass and check the grad norm for stop loss
|
||||||
if c.separate_stopnet:
|
if c.separate_stopnet:
|
||||||
|
@ -209,17 +211,22 @@ def train(model, criterion, optimizer, optimizer_st, scheduler,
|
||||||
step_time = time.time() - start_time
|
step_time = time.time() - start_time
|
||||||
epoch_time += step_time
|
epoch_time += step_time
|
||||||
|
|
||||||
|
# update avg stats
|
||||||
|
update_train_values = {
|
||||||
|
'avg_postnet_loss': float(loss_dict['postnet_loss'].item()),
|
||||||
|
'avg_decoder_loss': float(loss_dict['decoder_loss'].item()),
|
||||||
|
'avg_stopnet_loss': loss_dict['stopnet_loss'].item()
|
||||||
|
if isinstance(loss_dict['stopnet_loss'], float) else float(loss_dict['stopnet_loss'].item()),
|
||||||
|
'avg_step_time': step_time,
|
||||||
|
'avg_loader_time': loader_time
|
||||||
|
}
|
||||||
|
keep_avg.update_values(update_train_values)
|
||||||
|
|
||||||
if global_step % c.print_step == 0:
|
if global_step % c.print_step == 0:
|
||||||
print(
|
c_logger.print_train_step(batch_n_iter, num_iter, global_step,
|
||||||
" | > Step:{}/{} GlobalStep:{} PostnetLoss:{:.5f} "
|
avg_spec_length, avg_text_length,
|
||||||
"DecoderLoss:{:.5f} StopLoss:{:.5f} GALoss:{:.5f} GradNorm:{:.5f} "
|
step_time, loader_time, current_lr,
|
||||||
"GradNormST:{:.5f} AvgTextLen:{:.1f} AvgSpecLen:{:.1f} StepTime:{:.2f} "
|
loss_dict, keep_avg.avg_values)
|
||||||
"LoaderTime:{:.2f} LR:{:.6f}".format(
|
|
||||||
num_iter, batch_n_iter, global_step, loss_dict['postnet_loss'].item(),
|
|
||||||
loss_dict['decoder_loss'].item(), loss_dict['stopnet_loss'].item(),
|
|
||||||
loss_dict['ga_loss'].item(), grad_norm, grad_norm_st, avg_text_length,
|
|
||||||
avg_spec_length, step_time, loader_time, current_lr),
|
|
||||||
flush=True)
|
|
||||||
|
|
||||||
# aggregate losses from processes
|
# aggregate losses from processes
|
||||||
if num_gpus > 1:
|
if num_gpus > 1:
|
||||||
|
@ -230,16 +237,6 @@ def train(model, criterion, optimizer, optimizer_st, scheduler,
|
||||||
num_gpus) if c.stopnet else loss_dict['stopnet_loss']
|
num_gpus) if c.stopnet else loss_dict['stopnet_loss']
|
||||||
|
|
||||||
if args.rank == 0:
|
if args.rank == 0:
|
||||||
update_train_values = {
|
|
||||||
'avg_postnet_loss': float(loss_dict['postnet_loss'].item()),
|
|
||||||
'avg_decoder_loss': float(loss_dict['decoder_loss'].item()),
|
|
||||||
'avg_stop_loss': loss_dict['stopnet_loss'].item()
|
|
||||||
if isinstance(loss_dict['stopnet_loss'], float) else float(loss_dict['stopnet_loss'].item()),
|
|
||||||
'avg_step_time': step_time,
|
|
||||||
'avg_loader_time': loader_time
|
|
||||||
}
|
|
||||||
keep_avg.update_values(update_train_values)
|
|
||||||
|
|
||||||
# Plot Training Iter Stats
|
# Plot Training Iter Stats
|
||||||
# reduce TB load
|
# reduce TB load
|
||||||
if global_step % 10 == 0:
|
if global_step % 10 == 0:
|
||||||
|
@ -289,23 +286,16 @@ def train(model, criterion, optimizer, optimizer_st, scheduler,
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
|
|
||||||
# print epoch stats
|
# print epoch stats
|
||||||
print(" | > EPOCH END -- GlobalStep:{} "
|
c_logger.print_train_epoch_end(global_step, epoch, epoch_time, keep_avg)
|
||||||
"AvgPostnetLoss:{:.5f} AvgDecoderLoss:{:.5f} "
|
|
||||||
"AvgStopLoss:{:.5f} AvgGALoss:{:3f} EpochTime:{:.2f} "
|
|
||||||
"AvgStepTime:{:.2f} AvgLoaderTime:{:.2f}".format(
|
|
||||||
global_step, keep_avg['avg_postnet_loss'],
|
|
||||||
keep_avg['avg_decoder_loss'], keep_avg['avg_stop_loss'],
|
|
||||||
keep_avg['avg_ga_loss'], epoch_time,
|
|
||||||
keep_avg['avg_step_time'], keep_avg['avg_loader_time']),
|
|
||||||
flush=True)
|
|
||||||
# Plot Epoch Stats
|
# Plot Epoch Stats
|
||||||
if args.rank == 0:
|
if args.rank == 0:
|
||||||
# Plot Training Epoch Stats
|
# Plot Training Epoch Stats
|
||||||
epoch_stats = {
|
epoch_stats = {
|
||||||
"loss_postnet": keep_avg['avg_postnet_loss'],
|
"loss_postnet": keep_avg['avg_postnet_loss'],
|
||||||
"loss_decoder": keep_avg['avg_decoder_loss'],
|
"loss_decoder": keep_avg['avg_decoder_loss'],
|
||||||
"stop_loss": keep_avg['avg_stop_loss'],
|
"stopnet_loss": keep_avg['avg_stopnet_loss'],
|
||||||
"alignment_score": keep_avg['avg_align_score'],
|
"alignment_score": keep_avg['avg_align_error'],
|
||||||
"epoch_time": epoch_time
|
"epoch_time": epoch_time
|
||||||
}
|
}
|
||||||
if c.ga_alpha > 0:
|
if c.ga_alpha > 0:
|
||||||
|
@ -313,7 +303,7 @@ def train(model, criterion, optimizer, optimizer_st, scheduler,
|
||||||
tb_logger.tb_train_epoch_stats(global_step, epoch_stats)
|
tb_logger.tb_train_epoch_stats(global_step, epoch_stats)
|
||||||
if c.tb_model_param_stats:
|
if c.tb_model_param_stats:
|
||||||
tb_logger.tb_model_weights(model, global_step)
|
tb_logger.tb_model_weights(model, global_step)
|
||||||
return keep_avg['avg_postnet_loss'], global_step
|
return keep_avg.avg_values, global_step
|
||||||
|
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
|
@ -326,8 +316,8 @@ def evaluate(model, criterion, ap, global_step, epoch):
|
||||||
eval_values_dict = {
|
eval_values_dict = {
|
||||||
'avg_postnet_loss': 0,
|
'avg_postnet_loss': 0,
|
||||||
'avg_decoder_loss': 0,
|
'avg_decoder_loss': 0,
|
||||||
'avg_stop_loss': 0,
|
'avg_stopnet_loss': 0,
|
||||||
'avg_align_score': 0
|
'avg_align_error': 0
|
||||||
}
|
}
|
||||||
if c.bidirectional_decoder:
|
if c.bidirectional_decoder:
|
||||||
eval_values_dict['avg_decoder_b_loss'] = 0 # decoder backward loss
|
eval_values_dict['avg_decoder_b_loss'] = 0 # decoder backward loss
|
||||||
|
@ -336,8 +326,8 @@ def evaluate(model, criterion, ap, global_step, epoch):
|
||||||
eval_values_dict['avg_ga_loss'] = 0 # guidede attention loss
|
eval_values_dict['avg_ga_loss'] = 0 # guidede attention loss
|
||||||
keep_avg = KeepAverage()
|
keep_avg = KeepAverage()
|
||||||
keep_avg.add_values(eval_values_dict)
|
keep_avg.add_values(eval_values_dict)
|
||||||
print("\n > Validation")
|
|
||||||
|
|
||||||
|
c_logger.print_eval_start()
|
||||||
if data_loader is not None:
|
if data_loader is not None:
|
||||||
for num_iter, data in enumerate(data_loader):
|
for num_iter, data in enumerate(data_loader):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
@ -377,40 +367,27 @@ def evaluate(model, criterion, ap, global_step, epoch):
|
||||||
epoch_time += step_time
|
epoch_time += step_time
|
||||||
|
|
||||||
# compute alignment score
|
# compute alignment score
|
||||||
align_score = alignment_diagonal_score(alignments)
|
align_error = 1 - alignment_diagonal_score(alignments)
|
||||||
keep_avg.update_value('avg_align_score', align_score)
|
keep_avg.update_value('avg_align_error', align_error)
|
||||||
|
|
||||||
# aggregate losses from processes
|
# aggregate losses from processes
|
||||||
if num_gpus > 1:
|
if num_gpus > 1:
|
||||||
postnet_loss = reduce_tensor(postnet_loss.data, num_gpus)
|
postnet_loss = reduce_tensor(loss_dict['postnet_loss'].data, num_gpus)
|
||||||
decoder_loss = reduce_tensor(decoder_loss.data, num_gpus)
|
decoder_loss = reduce_tensor(loss_dict['decoder_loss'].data, num_gpus)
|
||||||
if c.stopnet:
|
if c.stopnet:
|
||||||
stop_loss = reduce_tensor(stop_loss.data, num_gpus)
|
stopnet_loss = reduce_tensor(loss_dict['stopnet_loss'].data, num_gpus)
|
||||||
|
|
||||||
keep_avg.update_values({
|
keep_avg.update_values({
|
||||||
'avg_postnet_loss':
|
'avg_postnet_loss':
|
||||||
float(loss_dict['postnet_loss'].item()),
|
float(loss_dict['postnet_loss'].item()),
|
||||||
'avg_decoder_loss':
|
'avg_decoder_loss':
|
||||||
float(loss_dict['decoder_loss'].item()),
|
float(loss_dict['decoder_loss'].item()),
|
||||||
'avg_stop_loss':
|
'avg_stopnet_loss':
|
||||||
float(loss_dict['stopnet_loss'].item()),
|
float(loss_dict['stopnet_loss'].item()),
|
||||||
})
|
})
|
||||||
|
|
||||||
if num_iter % c.print_step == 0:
|
if c.print_eval:
|
||||||
print(
|
c_logger.print_eval_step(num_iter, loss_dict, keep_avg.avg_values)
|
||||||
" | > TotalLoss: {:.5f} PostnetLoss: {:.5f} - {:.5f} DecoderLoss:{:.5f} - {:.5f} "
|
|
||||||
"StopLoss: {:.5f} - {:.5f} GALoss: {:.5f} - {:.5f} AlignScore: {:.4f} - {:.4f}"
|
|
||||||
.format(loss_dict['loss'].item(),
|
|
||||||
loss_dict['postnet_loss'].item(),
|
|
||||||
keep_avg['avg_postnet_loss'],
|
|
||||||
loss_dict['decoder_loss'].item(),
|
|
||||||
keep_avg['avg_decoder_loss'],
|
|
||||||
loss_dict['stopnet_loss'].item(),
|
|
||||||
keep_avg['avg_stop_loss'],
|
|
||||||
loss_dict['ga_loss'].item(),
|
|
||||||
keep_avg['avg_ga_loss'],
|
|
||||||
align_score, keep_avg['avg_align_score']),
|
|
||||||
flush=True)
|
|
||||||
|
|
||||||
if args.rank == 0:
|
if args.rank == 0:
|
||||||
# Diagnostic visualizations
|
# Diagnostic visualizations
|
||||||
|
@ -439,8 +416,8 @@ def evaluate(model, criterion, ap, global_step, epoch):
|
||||||
epoch_stats = {
|
epoch_stats = {
|
||||||
"loss_postnet": keep_avg['avg_postnet_loss'],
|
"loss_postnet": keep_avg['avg_postnet_loss'],
|
||||||
"loss_decoder": keep_avg['avg_decoder_loss'],
|
"loss_decoder": keep_avg['avg_decoder_loss'],
|
||||||
"stop_loss": keep_avg['avg_stop_loss'],
|
"stopnet_loss": keep_avg['avg_stopnet_loss'],
|
||||||
"alignment_score": keep_avg['avg_align_score'],
|
"alignment_score": keep_avg['avg_align_error'],
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.bidirectional_decoder:
|
if c.bidirectional_decoder:
|
||||||
|
@ -501,7 +478,7 @@ def evaluate(model, criterion, ap, global_step, epoch):
|
||||||
tb_logger.tb_test_audios(global_step, test_audios,
|
tb_logger.tb_test_audios(global_step, test_audios,
|
||||||
c.audio['sample_rate'])
|
c.audio['sample_rate'])
|
||||||
tb_logger.tb_test_figures(global_step, test_figures)
|
tb_logger.tb_test_figures(global_step, test_figures)
|
||||||
return keep_avg['avg_postnet_loss']
|
return keep_avg.avg_values
|
||||||
|
|
||||||
|
|
||||||
# FIXME: move args definition/parsing inside of main?
|
# FIXME: move args definition/parsing inside of main?
|
||||||
|
@ -603,6 +580,7 @@ def main(args): # pylint: disable=redefined-outer-name
|
||||||
|
|
||||||
global_step = args.restore_step
|
global_step = args.restore_step
|
||||||
for epoch in range(0, c.epochs):
|
for epoch in range(0, c.epochs):
|
||||||
|
c_logger.print_epoch_start(epoch, c.epochs)
|
||||||
# set gradual training
|
# set gradual training
|
||||||
if c.gradual_training is not None:
|
if c.gradual_training is not None:
|
||||||
r, c.batch_size = gradual_training_scheduler(global_step, c)
|
r, c.batch_size = gradual_training_scheduler(global_step, c)
|
||||||
|
@ -610,18 +588,16 @@ def main(args): # pylint: disable=redefined-outer-name
|
||||||
model.decoder.set_r(r)
|
model.decoder.set_r(r)
|
||||||
if c.bidirectional_decoder:
|
if c.bidirectional_decoder:
|
||||||
model.decoder_backward.set_r(r)
|
model.decoder_backward.set_r(r)
|
||||||
print(" > Number of outputs per iteration:", model.decoder.r)
|
print("\n > Number of output frames:", model.decoder.r)
|
||||||
|
|
||||||
train_loss, global_step = train(model, criterion, optimizer,
|
train_avg_loss_dict, global_step = train(model, criterion, optimizer,
|
||||||
optimizer_st, scheduler, ap,
|
optimizer_st, scheduler, ap,
|
||||||
global_step, epoch)
|
global_step, epoch)
|
||||||
val_loss = evaluate(model, criterion, ap, global_step, epoch)
|
eval_avg_loss_dict = evaluate(model, criterion, ap, global_step, epoch)
|
||||||
print(" | > Training Loss: {:.5f} Validation Loss: {:.5f}".format(
|
c_logger.print_epoch_end(epoch, eval_avg_loss_dict)
|
||||||
train_loss, val_loss),
|
target_loss = train_avg_loss_dict['avg_postnet_loss']
|
||||||
flush=True)
|
|
||||||
target_loss = train_loss
|
|
||||||
if c.run_eval:
|
if c.run_eval:
|
||||||
target_loss = val_loss
|
target_loss = eval_avg_loss_dict['avg_postnet_loss']
|
||||||
best_loss = save_best_model(model, optimizer, target_loss, best_loss,
|
best_loss = save_best_model(model, optimizer, target_loss, best_loss,
|
||||||
OUT_PATH, global_step, epoch)
|
OUT_PATH, global_step, epoch)
|
||||||
|
|
||||||
|
@ -681,6 +657,8 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios')
|
AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios')
|
||||||
|
|
||||||
|
c_logger = ConsoleLogger()
|
||||||
|
|
||||||
if args.rank == 0:
|
if args.rank == 0:
|
||||||
os.makedirs(AUDIO_PATH, exist_ok=True)
|
os.makedirs(AUDIO_PATH, exist_ok=True)
|
||||||
new_fields = {}
|
new_fields = {}
|
||||||
|
@ -693,7 +671,7 @@ if __name__ == '__main__':
|
||||||
os.chmod(OUT_PATH, 0o775)
|
os.chmod(OUT_PATH, 0o775)
|
||||||
|
|
||||||
LOG_DIR = OUT_PATH
|
LOG_DIR = OUT_PATH
|
||||||
tb_logger = Logger(LOG_DIR)
|
tb_logger = TensorboardLogger(LOG_DIR)
|
||||||
|
|
||||||
# write model desc to tensorboard
|
# write model desc to tensorboard
|
||||||
tb_logger.tb_add_text('model-description', c['run_description'], 0)
|
tb_logger.tb_add_text('model-description', c['run_description'], 0)
|
||||||
|
|
|
@ -0,0 +1,88 @@
|
||||||
|
import datetime
|
||||||
|
from TTS.utils.generic_utils import AttrDict
|
||||||
|
|
||||||
|
|
||||||
|
tcolors = AttrDict({
|
||||||
|
'OKBLUE': '\033[94m',
|
||||||
|
'HEADER': '\033[95m',
|
||||||
|
'OKGREEN': '\033[92m',
|
||||||
|
'WARNING': '\033[93m',
|
||||||
|
'FAIL': '\033[91m',
|
||||||
|
'ENDC': '\033[0m',
|
||||||
|
'BOLD': '\033[1m',
|
||||||
|
'UNDERLINE': '\033[4m'
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
class ConsoleLogger():
|
||||||
|
def __init__(self):
|
||||||
|
# TODO: color code for value changes
|
||||||
|
# use these to compare values between iterations
|
||||||
|
self.old_train_loss_dict = None
|
||||||
|
self.old_epoch_loss_dict = None
|
||||||
|
self.old_eval_loss_dict = None
|
||||||
|
|
||||||
|
def get_time(self):
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
return now.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
|
def print_epoch_start(self, epoch, max_epoch):
|
||||||
|
print("\n{}{} > EPOCH: {}/{}{}".format(tcolors.UNDERLINE, tcolors.BOLD,
|
||||||
|
epoch, max_epoch, tcolors.ENDC),
|
||||||
|
flush=True)
|
||||||
|
|
||||||
|
def print_train_start(self):
|
||||||
|
print(f"\n{tcolors.BOLD} > TRAINING ({self.get_time()}) {tcolors.ENDC}")
|
||||||
|
|
||||||
|
def print_train_step(self, batch_steps, step, global_step, avg_spec_length,
|
||||||
|
avg_text_length, step_time, loader_time, lr,
|
||||||
|
loss_dict, avg_loss_dict):
|
||||||
|
indent = " | > "
|
||||||
|
print()
|
||||||
|
log_text = "{} --> STEP: {}/{} -- GLOBAL_STEP: {}{}\n".format(
|
||||||
|
tcolors.BOLD, step, batch_steps, global_step, tcolors.ENDC)
|
||||||
|
for key, value in loss_dict.items():
|
||||||
|
# print the avg value if given
|
||||||
|
if f'avg_{key}' in avg_loss_dict.keys():
|
||||||
|
log_text += "{}{}: {:.5f} ({:.5f})\n".format(indent, key, value, avg_loss_dict[f'avg_{key}'])
|
||||||
|
else:
|
||||||
|
log_text += "{}{}: {:.5f} \n".format(indent, key, value)
|
||||||
|
log_text += "{}avg_spec_len: {}\n{}avg_text_len: {}\n{}step_time: {:.2f}\n{}loader_time: {:.2f}\n{}lr: {:.5f}"\
|
||||||
|
.format(indent, avg_spec_length, indent, avg_text_length, indent, step_time, indent, loader_time, indent, lr)
|
||||||
|
print(log_text, flush=True)
|
||||||
|
|
||||||
|
def print_train_epoch_end(self, global_step, epoch, epoch_time,
|
||||||
|
print_dict):
|
||||||
|
indent = " | > "
|
||||||
|
log_text = f"\n{tcolors.BOLD} --> TRAIN PERFORMACE -- EPOCH TIME: {epoch} sec -- GLOBAL_STEP: {global_step}{tcolors.ENDC}\n"
|
||||||
|
for key, value in print_dict.items():
|
||||||
|
log_text += "{}{}: {:.5f}\n".format(indent, key, value)
|
||||||
|
print(log_text, flush=True)
|
||||||
|
|
||||||
|
def print_eval_start(self):
|
||||||
|
print(f"{tcolors.BOLD} > EVALUATION {tcolors.ENDC}\n")
|
||||||
|
|
||||||
|
def print_eval_step(self, step, loss_dict, avg_loss_dict):
|
||||||
|
indent = " | > "
|
||||||
|
log_text = f"{tcolors.BOLD} --> STEP: {step}{tcolors.ENDC}\n"
|
||||||
|
for key, value in loss_dict.items():
|
||||||
|
# print the avg value if given
|
||||||
|
if f'avg_{key}' in avg_loss_dict.keys():
|
||||||
|
log_text += "{}{}: {:.5f} ({:.5f})\n".format(indent, key, value, avg_loss_dict[f'avg_{key}'])
|
||||||
|
else:
|
||||||
|
log_text += "{}{}: {:.5f} \n".format(indent, key, value)
|
||||||
|
print(log_text, flush=True)
|
||||||
|
|
||||||
|
def print_epoch_end(self, epoch, avg_loss_dict):
|
||||||
|
indent = " | > "
|
||||||
|
log_text = " {}--> EVAL PERFORMANCE{}\n".format(
|
||||||
|
tcolors.BOLD, tcolors.ENDC)
|
||||||
|
for key, value in avg_loss_dict.items():
|
||||||
|
# print the avg value if given
|
||||||
|
color = tcolors.OKGREEN
|
||||||
|
if self.old_eval_loss_dict is not None:
|
||||||
|
if self.old_eval_loss_dict[key] > value:
|
||||||
|
color = tcolors.FAIL
|
||||||
|
log_text += "{}{}:{} {:.5f} \n{}".format(indent, key, color, value, tcolors.ENDC)
|
||||||
|
self.old_eval_loss_dict = avg_loss_dict
|
||||||
|
print(log_text, flush=True)
|
|
@ -144,7 +144,7 @@ def save_best_model(model, optimizer, model_loss, best_loss, out_path,
|
||||||
best_loss = model_loss
|
best_loss = model_loss
|
||||||
bestmodel_path = 'best_model.pth.tar'
|
bestmodel_path = 'best_model.pth.tar'
|
||||||
bestmodel_path = os.path.join(out_path, bestmodel_path)
|
bestmodel_path = os.path.join(out_path, bestmodel_path)
|
||||||
print("\n > BEST MODEL ({0:.5f}) : {1:}".format(
|
print(" > BEST MODEL ({0:.5f}) : {1:}".format(
|
||||||
model_loss, bestmodel_path))
|
model_loss, bestmodel_path))
|
||||||
torch.save(state, bestmodel_path)
|
torch.save(state, bestmodel_path)
|
||||||
return best_loss
|
return best_loss
|
||||||
|
@ -368,6 +368,9 @@ class KeepAverage():
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
return self.avg_values[key]
|
return self.avg_values[key]
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
return self.avg_values.items()
|
||||||
|
|
||||||
def add_value(self, name, init_val=0, init_iter=0):
|
def add_value(self, name, init_val=0, init_iter=0):
|
||||||
self.avg_values[name] = init_val
|
self.avg_values[name] = init_val
|
||||||
self.iters[name] = init_iter
|
self.iters[name] = init_iter
|
||||||
|
@ -407,6 +410,37 @@ def _check_argument(name, c, enum_list=None, max_val=None, min_val=None, restric
|
||||||
assert isinstance(c[name], val_type) or c[name] is None, f' [!] {name} has wrong type - {type(c[name])} vs {val_type}'
|
assert isinstance(c[name], val_type) or c[name] is None, f' [!] {name} has wrong type - {type(c[name])} vs {val_type}'
|
||||||
|
|
||||||
|
|
||||||
|
tcolors = AttrDict({
|
||||||
|
'OKBLUE': '\033[94m',
|
||||||
|
'HEADER': '\033[95m',
|
||||||
|
'OKGREEN': '\033[92m',
|
||||||
|
'WARNING': '\033[93m',
|
||||||
|
'FAIL': '\033[91m',
|
||||||
|
'ENDC': '\033[0m',
|
||||||
|
'BOLD': '\033[1m',
|
||||||
|
'UNDERLINE': '\033[4m'
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def print_train_step(batch_steps, step, global_step, avg_spec_length, avg_text_length, step_time, loader_time, lr, print_dict):
|
||||||
|
indent = " | > "
|
||||||
|
print()
|
||||||
|
log_text = "{} --> STEP: {}/{} -- GLOBAL_STEP: {}{}\n".format(tcolors.BOLD, step, batch_steps, global_step, tcolors.ENDC)
|
||||||
|
for key, value in print_dict.items():
|
||||||
|
log_text += "{}{}: {:.5f}\n".format(indent, key, value)
|
||||||
|
log_text += "{}avg_spec_len: {}\n{}avg_text_len: {}\n{}step_time: {:.2f}\n{}loader_time: {:.2f}\n{}lr: {:.5f}"\
|
||||||
|
.format(indent, avg_spec_length, indent, avg_text_length, indent, step_time, indent, loader_time, indent, lr)
|
||||||
|
print(log_text, flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
def print_train_epoch(step, global_step, epoch, loss_dict):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def print_eval_step():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def check_config(c):
|
def check_config(c):
|
||||||
_check_argument('model', c, enum_list=['tacotron', 'tacotron2'], restricted=True, val_type=str)
|
_check_argument('model', c, enum_list=['tacotron', 'tacotron2'], restricted=True, val_type=str)
|
||||||
_check_argument('run_name', c, restricted=True, val_type=str)
|
_check_argument('run_name', c, restricted=True, val_type=str)
|
||||||
|
|
|
@ -2,7 +2,7 @@ import traceback
|
||||||
from tensorboardX import SummaryWriter
|
from tensorboardX import SummaryWriter
|
||||||
|
|
||||||
|
|
||||||
class Logger(object):
|
class TensorboardLogger(object):
|
||||||
def __init__(self, log_dir):
|
def __init__(self, log_dir):
|
||||||
self.writer = SummaryWriter(log_dir)
|
self.writer = SummaryWriter(log_dir)
|
||||||
self.train_stats = {}
|
self.train_stats = {}
|
Loading…
Reference in New Issue