changed train scripts

This commit is contained in:
gerazov 2021-02-06 22:29:30 +01:00 committed by Eren Gölge
parent 2daca15802
commit 6f06e31541
6 changed files with 131 additions and 572 deletions

View File

@ -1,8 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Train Glow TTS model."""
import argparse
import glob
import os
import sys
import time
@ -14,10 +12,12 @@ import torch
from torch.nn.parallel import DistributedDataParallel as DDP_th
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler
from TTS.utils.arguments import parse_arguments, process_args
from TTS.tts.datasets.preprocess import load_meta_data
from TTS.tts.datasets.TTSDataset import MyDataset
from TTS.tts.layers.losses import GlowTTSLoss
from TTS.tts.utils.generic_utils import check_config_tts, setup_model
from TTS.tts.utils.generic_utils import setup_model
from TTS.tts.utils.io import save_best_model, save_checkpoint
from TTS.tts.utils.measures import alignment_diagonal_score
from TTS.tts.utils.speakers import parse_speakers
@ -25,18 +25,15 @@ from TTS.tts.utils.synthesis import synthesis
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
from TTS.utils.audio import AudioProcessor
from TTS.utils.console_logger import ConsoleLogger
from TTS.utils.distribute import init_distributed, reduce_tensor
from TTS.utils.generic_utils import (KeepAverage, count_parameters,
create_experiment_folder, get_git_branch,
remove_experiment_folder, set_init_dict)
from TTS.utils.io import copy_model_files, load_config
from TTS.utils.radam import RAdam
from TTS.utils.tensorboard_logger import TensorboardLogger
from TTS.utils.training import NoamLR, setup_torch_training_env
use_cuda, num_gpus = setup_torch_training_env(True, False)
def setup_loader(ap, r, is_val=False, verbose=False):
if is_val and not c.run_eval:
loader = None
@ -119,7 +116,7 @@ def format_data(data):
avg_text_length, avg_spec_length, attn_mask, item_idx
def data_depended_init(data_loader, model):
def data_depended_init(data_loader, model, ap):
"""Data depended initialization for activation normalization."""
if hasattr(model, 'module'):
for f in model.module.decoder.flows:
@ -138,7 +135,7 @@ def data_depended_init(data_loader, model):
# format data
text_input, text_lengths, mel_input, mel_lengths, spekaer_embed,\
_, _, attn_mask, _ = format_data(data)
_, _, attn_mask, item_idx = format_data(data)
# forward pass model
_ = model.forward(
@ -177,7 +174,7 @@ def train(data_loader, model, criterion, optimizer, scheduler,
# format data
text_input, text_lengths, mel_input, mel_lengths, speaker_c,\
avg_text_length, avg_spec_length, attn_mask, _ = format_data(data)
avg_text_length, avg_spec_length, attn_mask, item_idx = format_data(data)
loader_time = time.time() - end_time
@ -191,20 +188,20 @@ def train(data_loader, model, criterion, optimizer, scheduler,
# compute loss
loss_dict = criterion(z, y_mean, y_log_scale, logdet, mel_lengths,
o_dur_log, o_total_dur, text_lengths)
o_dur_log, o_total_dur, text_lengths)
# backward pass with loss scaling
if c.mixed_precision:
scaler.scale(loss_dict['loss']).backward()
scaler.unscale_(optimizer)
grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
c.grad_clip)
c.grad_clip)
scaler.step(optimizer)
scaler.update()
else:
loss_dict['loss'].backward()
grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
c.grad_clip)
c.grad_clip)
optimizer.step()
# setup lr
@ -332,7 +329,7 @@ def evaluate(data_loader, model, criterion, ap, global_step, epoch):
# format data
text_input, text_lengths, mel_input, mel_lengths, speaker_c,\
_, _, attn_mask, _ = format_data(data)
_, _, attn_mask, item_idx = format_data(data)
# forward pass model
z, logdet, y_mean, y_log_scale, alignments, o_dur_log, o_total_dur = model.forward(
@ -468,7 +465,6 @@ def evaluate(data_loader, model, criterion, ap, global_step, epoch):
return keep_avg.avg_values
# FIXME: move args definition/parsing inside of main?
def main(args): # pylint: disable=redefined-outer-name
# pylint: disable=global-variable-undefined
global meta_data_train, meta_data_eval, symbols, phonemes, speaker_mapping
@ -550,14 +546,13 @@ def main(args): # pylint: disable=redefined-outer-name
eval_loader = setup_loader(ap, 1, is_val=True, verbose=True)
global_step = args.restore_step
model = data_depended_init(train_loader, model)
model = data_depended_init(train_loader, model, ap)
for epoch in range(0, c.epochs):
c_logger.print_epoch_start(epoch, c.epochs)
train_avg_loss_dict, global_step = train(train_loader, model, criterion, optimizer,
scheduler, ap, global_step,
epoch)
eval_avg_loss_dict = evaluate(eval_loader, model, criterion, ap,
global_step, epoch)
eval_avg_loss_dict = evaluate(eval_loader , model, criterion, ap, global_step, epoch)
c_logger.print_epoch_end(epoch, eval_avg_loss_dict)
target_loss = train_avg_loss_dict['avg_loss']
if c.run_eval:
@ -567,81 +562,9 @@ def main(args): # pylint: disable=redefined-outer-name
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--continue_path',
type=str,
help='Training output folder to continue training. Use to continue a training. If it is used, "config_path" is ignored.',
default='',
required='--config_path' not in sys.argv)
parser.add_argument(
'--restore_path',
type=str,
help='Model file to be restored. Use to finetune a model.',
default='')
parser.add_argument(
'--config_path',
type=str,
help='Path to config file for training.',
required='--continue_path' not in sys.argv
)
parser.add_argument('--debug',
type=bool,
default=False,
help='Do not verify commit integrity to run training.')
# DISTRUBUTED
parser.add_argument(
'--rank',
type=int,
default=0,
help='DISTRIBUTED: process rank for distributed training.')
parser.add_argument('--group_id',
type=str,
default="",
help='DISTRIBUTED: process group id.')
args = parser.parse_args()
if args.continue_path != '':
args.output_path = args.continue_path
args.config_path = os.path.join(args.continue_path, 'config.json')
list_of_files = glob.glob(args.continue_path + "/*.pth.tar") # * means all if need specific format then *.csv
latest_model_file = max(list_of_files, key=os.path.getctime)
args.restore_path = latest_model_file
print(f" > Training continues for {args.restore_path}")
# setup output paths and read configs
c = load_config(args.config_path)
# check_config(c)
check_config_tts(c)
_ = os.path.dirname(os.path.realpath(__file__))
if c.mixed_precision:
print(" > Mixed precision enabled.")
OUT_PATH = args.continue_path
if args.continue_path == '':
OUT_PATH = create_experiment_folder(c.output_path, c.run_name, args.debug)
AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios')
c_logger = ConsoleLogger()
if args.rank == 0:
os.makedirs(AUDIO_PATH, exist_ok=True)
new_fields = {}
if args.restore_path:
new_fields["restore_path"] = args.restore_path
new_fields["github_branch"] = get_git_branch()
copy_model_files(c, args.config_path, OUT_PATH, new_fields)
os.chmod(AUDIO_PATH, 0o775)
os.chmod(OUT_PATH, 0o775)
LOG_DIR = OUT_PATH
tb_logger = TensorboardLogger(LOG_DIR, model_name='TTS')
# write model desc to tensorboard
tb_logger.tb_add_text('model-description', c['run_description'], 0)
args = parse_arguments(sys.argv)
c, OUT_PATH, AUDIO_PATH, c_logger, tb_logger = process_args(
args, model_type='glow_tts')
try:
main(args)

View File

@ -11,6 +11,7 @@ import numpy as np
from random import randrange
import torch
from TTS.utils.arguments import parse_arguments, process_args
# DISTRIBUTED
from torch.nn.parallel import DistributedDataParallel as DDP_th
from torch.utils.data import DataLoader
@ -18,7 +19,7 @@ from torch.utils.data.distributed import DistributedSampler
from TTS.tts.datasets.preprocess import load_meta_data
from TTS.tts.datasets.TTSDataset import MyDataset
from TTS.tts.layers.losses import SpeedySpeechLoss
from TTS.tts.utils.generic_utils import check_config_tts, setup_model
from TTS.tts.utils.generic_utils import setup_model
from TTS.tts.utils.io import save_best_model, save_checkpoint
from TTS.tts.utils.measures import alignment_diagonal_score
from TTS.tts.utils.speakers import parse_speakers
@ -26,14 +27,10 @@ from TTS.tts.utils.synthesis import synthesis
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
from TTS.utils.audio import AudioProcessor
from TTS.utils.console_logger import ConsoleLogger
from TTS.utils.distribute import init_distributed, reduce_tensor
from TTS.utils.generic_utils import (KeepAverage, count_parameters,
create_experiment_folder, get_git_branch,
remove_experiment_folder, set_init_dict)
from TTS.utils.io import copy_model_files, load_config
from TTS.utils.radam import RAdam
from TTS.utils.tensorboard_logger import TensorboardLogger
from TTS.utils.training import NoamLR, setup_torch_training_env
use_cuda, num_gpus = setup_torch_training_env(True, False)
@ -175,13 +172,13 @@ def train(data_loader, model, criterion, optimizer, scheduler,
scaler.scale(loss_dict['loss']).backward()
scaler.unscale_(optimizer)
grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
c.grad_clip)
c.grad_clip)
scaler.step(optimizer)
scaler.update()
else:
loss_dict['loss'].backward()
grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
c.grad_clip)
c.grad_clip)
optimizer.step()
# setup lr
@ -518,8 +515,7 @@ def main(args): # pylint: disable=redefined-outer-name
train_avg_loss_dict, global_step = train(train_loader, model, criterion, optimizer,
scheduler, ap, global_step,
epoch)
eval_avg_loss_dict = evaluate(eval_loader, model, criterion, ap,
global_step, epoch)
eval_avg_loss_dict = evaluate(eval_loader , model, criterion, ap, global_step, epoch)
c_logger.print_epoch_end(epoch, eval_avg_loss_dict)
target_loss = train_avg_loss_dict['avg_loss']
if c.run_eval:
@ -529,81 +525,9 @@ def main(args): # pylint: disable=redefined-outer-name
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--continue_path',
type=str,
help='Training output folder to continue training. Use to continue a training. If it is used, "config_path" is ignored.',
default='',
required='--config_path' not in sys.argv)
parser.add_argument(
'--restore_path',
type=str,
help='Model file to be restored. Use to finetune a model.',
default='')
parser.add_argument(
'--config_path',
type=str,
help='Path to config file for training.',
required='--continue_path' not in sys.argv
)
parser.add_argument('--debug',
type=bool,
default=False,
help='Do not verify commit integrity to run training.')
# DISTRUBUTED
parser.add_argument(
'--rank',
type=int,
default=0,
help='DISTRIBUTED: process rank for distributed training.')
parser.add_argument('--group_id',
type=str,
default="",
help='DISTRIBUTED: process group id.')
args = parser.parse_args()
if args.continue_path != '':
args.output_path = args.continue_path
args.config_path = os.path.join(args.continue_path, 'config.json')
list_of_files = glob.glob(args.continue_path + "/*.pth.tar") # * means all if need specific format then *.csv
latest_model_file = max(list_of_files, key=os.path.getctime)
args.restore_path = latest_model_file
print(f" > Training continues for {args.restore_path}")
# setup output paths and read configs
c = load_config(args.config_path)
# check_config(c)
check_config_tts(c)
_ = os.path.dirname(os.path.realpath(__file__))
if c.mixed_precision:
print(" > Mixed precision enabled.")
OUT_PATH = args.continue_path
if args.continue_path == '':
OUT_PATH = create_experiment_folder(c.output_path, c.run_name, args.debug)
AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios')
c_logger = ConsoleLogger()
if args.rank == 0:
os.makedirs(AUDIO_PATH, exist_ok=True)
new_fields = {}
if args.restore_path:
new_fields["restore_path"] = args.restore_path
new_fields["github_branch"] = get_git_branch()
copy_model_files(c, args.config_path, OUT_PATH, new_fields)
os.chmod(AUDIO_PATH, 0o775)
os.chmod(OUT_PATH, 0o775)
LOG_DIR = OUT_PATH
tb_logger = TensorboardLogger(LOG_DIR, model_name='TTS')
# write model desc to tensorboard
tb_logger.tb_add_text('model-description', c['run_description'], 0)
args = parse_arguments(sys.argv)
c, OUT_PATH, AUDIO_PATH, c_logger, tb_logger = process_args(
args, model_type='tts')
try:
main(args)

View File

@ -1,8 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Trains Tacotron based TTS models."""
import argparse
import glob
import os
import sys
import time
@ -11,11 +9,12 @@ from random import randrange
import numpy as np
import torch
from TTS.utils.arguments import parse_arguments, process_args
from torch.utils.data import DataLoader
from TTS.tts.datasets.preprocess import load_meta_data
from TTS.tts.datasets.TTSDataset import MyDataset
from TTS.tts.layers.losses import TacotronLoss
from TTS.tts.utils.generic_utils import check_config_tts, setup_model
from TTS.tts.utils.generic_utils import setup_model
from TTS.tts.utils.io import save_best_model, save_checkpoint
from TTS.tts.utils.measures import alignment_diagonal_score
from TTS.tts.utils.speakers import parse_speakers
@ -23,15 +22,11 @@ from TTS.tts.utils.synthesis import synthesis
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
from TTS.utils.audio import AudioProcessor
from TTS.utils.console_logger import ConsoleLogger
from TTS.utils.distribute import (DistributedSampler, apply_gradient_allreduce,
init_distributed, reduce_tensor)
from TTS.utils.generic_utils import (KeepAverage, count_parameters,
create_experiment_folder, get_git_branch,
remove_experiment_folder, set_init_dict)
from TTS.utils.io import copy_model_files, load_config
from TTS.utils.radam import RAdam
from TTS.utils.tensorboard_logger import TensorboardLogger
from TTS.utils.training import (NoamLR, adam_weight_decay, check_update,
gradual_training_scheduler, set_weight_decay,
setup_torch_training_env)
@ -61,7 +56,13 @@ def setup_loader(ap, r, is_val=False, verbose=False, dataset=None):
phoneme_language=c.phoneme_language,
enable_eos_bos=c.enable_eos_bos_chars,
verbose=verbose,
speaker_mapping=speaker_mapping if c.use_speaker_embedding and c.use_external_speaker_embedding_file else None)
speaker_mapping=(
speaker_mapping if (
c.use_speaker_embedding and
c.use_external_speaker_embedding_file
) else None
)
)
if c.use_phonemes and c.compute_input_seq_cache:
# precompute phonemes to have a better estimate of sequence lengths.
@ -178,10 +179,10 @@ def train(data_loader, model, criterion, optimizer, optimizer_st, scheduler,
# compute loss
loss_dict = criterion(postnet_output, decoder_output, mel_input,
linear_input, stop_tokens, stop_targets,
mel_lengths, decoder_backward_output,
alignments, alignment_lengths,
alignments_backward, text_lengths)
linear_input, stop_tokens, stop_targets,
mel_lengths, decoder_backward_output,
alignments, alignment_lengths, alignments_backward,
text_lengths)
# check nan loss
if torch.isnan(loss_dict['loss']).any():
@ -199,7 +200,7 @@ def train(data_loader, model, criterion, optimizer, optimizer_st, scheduler,
# stopnet optimizer step
if c.separate_stopnet:
scaler_st.scale(loss_dict['stopnet_loss']).backward()
scaler_st.scale( loss_dict['stopnet_loss']).backward()
scaler.unscale_(optimizer_st)
optimizer_st, _ = adam_weight_decay(optimizer_st)
grad_norm_st, _ = check_update(model.decoder.stopnet, 1.0)
@ -491,7 +492,6 @@ def evaluate(data_loader, model, criterion, ap, global_step, epoch):
return keep_avg.avg_values
# FIXME: move args definition/parsing inside of main?
def main(args): # pylint: disable=redefined-outer-name
# pylint: disable=global-variable-undefined
global meta_data_train, meta_data_eval, symbols, phonemes, speaker_mapping
@ -534,7 +534,8 @@ def main(args): # pylint: disable=redefined-outer-name
optimizer_st = None
# setup criterion
criterion = TacotronLoss(c, stopnet_pos_weight=c.stopnet_pos_weight, ga_sigma=0.4)
criterion = TacotronLoss(c, stopnet_pos_weight=10.0, ga_sigma=0.4)
if args.restore_path:
checkpoint = torch.load(args.restore_path, map_location='cpu')
try:
@ -640,80 +641,9 @@ def main(args): # pylint: disable=redefined-outer-name
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--continue_path',
type=str,
help='Training output folder to continue training. Use to continue a training. If it is used, "config_path" is ignored.',
default='',
required='--config_path' not in sys.argv)
parser.add_argument(
'--restore_path',
type=str,
help='Model file to be restored. Use to finetune a model.',
default='')
parser.add_argument(
'--config_path',
type=str,
help='Path to config file for training.',
required='--continue_path' not in sys.argv
)
parser.add_argument('--debug',
type=bool,
default=False,
help='Do not verify commit integrity to run training.')
# DISTRUBUTED
parser.add_argument(
'--rank',
type=int,
default=0,
help='DISTRIBUTED: process rank for distributed training.')
parser.add_argument('--group_id',
type=str,
default="",
help='DISTRIBUTED: process group id.')
args = parser.parse_args()
if args.continue_path != '':
print(f" > Training continues for {args.continue_path}")
args.output_path = args.continue_path
args.config_path = os.path.join(args.continue_path, 'config.json')
list_of_files = glob.glob(args.continue_path + "/*.pth.tar") # * means all if need specific format then *.csv
latest_model_file = max(list_of_files, key=os.path.getctime)
args.restore_path = latest_model_file
# setup output paths and read configs
c = load_config(args.config_path)
check_config_tts(c)
_ = os.path.dirname(os.path.realpath(__file__))
if c.mixed_precision:
print(" > Mixed precision mode is ON")
OUT_PATH = args.continue_path
if args.continue_path == '':
OUT_PATH = create_experiment_folder(c.output_path, c.run_name, args.debug)
AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios')
c_logger = ConsoleLogger()
if args.rank == 0:
os.makedirs(AUDIO_PATH, exist_ok=True)
new_fields = {}
if args.restore_path:
new_fields["restore_path"] = args.restore_path
new_fields["github_branch"] = get_git_branch()
copy_model_files(c, args.config_path, OUT_PATH, new_fields)
os.chmod(AUDIO_PATH, 0o775)
os.chmod(OUT_PATH, 0o775)
LOG_DIR = OUT_PATH
tb_logger = TensorboardLogger(LOG_DIR, model_name='TTS')
# write model desc to tensorboard
tb_logger.tb_add_text('model-description', c['run_description'], 0)
args = parse_arguments(sys.argv)
c, OUT_PATH, AUDIO_PATH, c_logger, tb_logger = process_args(
args, model_type='tacotron')
try:
main(args)

View File

@ -1,5 +1,6 @@
import argparse
import glob
#!/usr/bin/env python3
"""Trains GAN based vocoder model."""
import os
import sys
import time
@ -7,15 +8,14 @@ import traceback
from inspect import signature
import torch
from TTS.utils.arguments import parse_arguments, process_args
from torch.utils.data import DataLoader
from TTS.utils.audio import AudioProcessor
from TTS.utils.console_logger import ConsoleLogger
from TTS.utils.generic_utils import (KeepAverage, count_parameters,
create_experiment_folder, get_git_branch,
remove_experiment_folder, set_init_dict)
from TTS.utils.io import copy_model_files, load_config
from TTS.utils.radam import RAdam
from TTS.utils.tensorboard_logger import TensorboardLogger
from TTS.utils.training import setup_torch_training_env
from TTS.vocoder.datasets.gan_dataset import GANDataset
from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
@ -33,8 +33,9 @@ use_cuda, num_gpus = setup_torch_training_env(True, True)
def setup_loader(ap, is_val=False, verbose=False):
loader = None
if not is_val or c.run_eval:
if is_val and not c.run_eval:
loader = None
else:
dataset = GANDataset(ap=ap,
items=eval_data if is_val else train_data,
seq_len=c.seq_len,
@ -113,7 +114,7 @@ def train(model_G, criterion_G, optimizer_G, model_D, criterion_D, optimizer_D,
y_hat = model_G(c_G)
y_hat_sub = None
y_G_sub = None
y_hat_vis = y_hat # for visualization
y_hat_vis = y_hat # for visualization # FIXME! .clone().detach()
# PQMF formatting
if y_hat.shape[1] > 1:
@ -273,14 +274,14 @@ def train(model_G, criterion_G, optimizer_G, model_D, criterion_D, optimizer_D,
# compute spectrograms
figures = plot_results(y_hat_vis, y_G, ap, global_step,
'train')
'train')
tb_logger.tb_train_figures(global_step, figures)
# Sample audio
sample_voice = y_hat_vis[0].squeeze(0).detach().cpu().numpy()
tb_logger.tb_train_audios(global_step,
{'train/audio': sample_voice},
c.audio["sample_rate"])
{'train/audio': sample_voice},
c.audio["sample_rate"])
end_time = time.time()
# print epoch stats
@ -439,7 +440,6 @@ def evaluate(model_G, criterion_G, model_D, criterion_D, ap, global_step, epoch)
return keep_avg.avg_values
# FIXME: move args definition/parsing inside of main?
def main(args): # pylint: disable=redefined-outer-name
# pylint: disable=global-variable-undefined
global train_data, eval_data
@ -506,7 +506,7 @@ def main(args): # pylint: disable=redefined-outer-name
scheduler_disc.load_state_dict(checkpoint['scheduler_disc'])
scheduler_disc.optimizer = optimizer_disc
except RuntimeError:
# retore only matching layers.
# restore only matching layers.
print(" > Partial model initialization...")
model_dict = model_gen.state_dict()
model_dict = set_init_dict(model_dict, checkpoint['model'], c)
@ -556,7 +556,8 @@ def main(args): # pylint: disable=redefined-outer-name
model_disc, criterion_disc, optimizer_disc,
scheduler_gen, scheduler_disc, ap, global_step,
epoch)
eval_avg_loss_dict = evaluate(model_gen, criterion_gen, model_disc, criterion_disc, ap,
eval_avg_loss_dict = evaluate(model_gen, criterion_gen, model_disc,
criterion_disc, ap,
global_step, epoch)
c_logger.print_epoch_end(epoch, eval_avg_loss_dict)
target_loss = eval_avg_loss_dict[c.target_loss]
@ -575,78 +576,9 @@ def main(args): # pylint: disable=redefined-outer-name
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--continue_path',
type=str,
help='Training output folder to continue training. Use to continue a training. If it is used, "config_path" is ignored.',
default='',
required='--config_path' not in sys.argv)
parser.add_argument(
'--restore_path',
type=str,
help='Model file to be restored. Use to finetune a model.',
default='')
parser.add_argument('--config_path',
type=str,
help='Path to config file for training.',
required='--continue_path' not in sys.argv)
parser.add_argument('--debug',
type=bool,
default=False,
help='Do not verify commit integrity to run training.')
# DISTRUBUTED
parser.add_argument(
'--rank',
type=int,
default=0,
help='DISTRIBUTED: process rank for distributed training.')
parser.add_argument('--group_id',
type=str,
default="",
help='DISTRIBUTED: process group id.')
args = parser.parse_args()
if args.continue_path != '':
args.output_path = args.continue_path
args.config_path = os.path.join(args.continue_path, 'config.json')
list_of_files = glob.glob(
args.continue_path +
"/*.pth.tar") # * means all if need specific format then *.csv
latest_model_file = max(list_of_files, key=os.path.getctime)
args.restore_path = latest_model_file
print(f" > Training continues for {args.restore_path}")
# setup output paths and read configs
c = load_config(args.config_path)
# check_config(c)
_ = os.path.dirname(os.path.realpath(__file__))
OUT_PATH = args.continue_path
if args.continue_path == '':
OUT_PATH = create_experiment_folder(c.output_path, c.run_name,
args.debug)
AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios')
c_logger = ConsoleLogger()
if args.rank == 0:
os.makedirs(AUDIO_PATH, exist_ok=True)
new_fields = {}
if args.restore_path:
new_fields["restore_path"] = args.restore_path
new_fields["github_branch"] = get_git_branch()
copy_model_files(c, args.config_path, OUT_PATH, new_fields)
os.chmod(AUDIO_PATH, 0o775)
os.chmod(OUT_PATH, 0o775)
LOG_DIR = OUT_PATH
tb_logger = TensorboardLogger(LOG_DIR, model_name='VOCODER')
# write model desc to tensorboard
tb_logger.tb_add_text('model-description', c['run_description'], 0)
args = parse_arguments(sys.argv)
c, OUT_PATH, AUDIO_PATH, c_logger, tb_logger = process_args(
args, model_type='gan')
try:
main(args)

View File

@ -1,5 +1,6 @@
import argparse
import glob
#!/usr/bin/env python3
"""Trains WaveGrad vocoder models."""
import os
import sys
import time
@ -7,19 +8,16 @@ import traceback
import numpy as np
import torch
from TTS.utils.arguments import parse_arguments, process_args
# DISTRIBUTED
from torch.nn.parallel import DistributedDataParallel as DDP_th
from torch.optim import Adam
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler
from TTS.utils.audio import AudioProcessor
from TTS.utils.console_logger import ConsoleLogger
from TTS.utils.distribute import init_distributed
from TTS.utils.generic_utils import (KeepAverage, count_parameters,
create_experiment_folder, get_git_branch,
remove_experiment_folder, set_init_dict)
from TTS.utils.io import copy_model_files, load_config
from TTS.utils.tensorboard_logger import TensorboardLogger
from TTS.utils.training import setup_torch_training_env
from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
from TTS.vocoder.datasets.wavegrad_dataset import WaveGradDataset
@ -34,16 +32,16 @@ def setup_loader(ap, is_val=False, verbose=False):
loader = None
else:
dataset = WaveGradDataset(ap=ap,
items=eval_data if is_val else train_data,
seq_len=c.seq_len,
hop_len=ap.hop_length,
pad_short=c.pad_short,
conv_pad=c.conv_pad,
is_training=not is_val,
return_segments=True,
use_noise_augment=False,
use_cache=c.use_cache,
verbose=verbose)
items=eval_data if is_val else train_data,
seq_len=c.seq_len,
hop_len=ap.hop_length,
pad_short=c.pad_short,
conv_pad=c.conv_pad,
is_training=not is_val,
return_segments=True,
use_noise_augment=False,
use_cache=c.use_cache,
verbose=verbose)
sampler = DistributedSampler(dataset) if num_gpus > 1 else None
loader = DataLoader(dataset,
batch_size=c.batch_size,
@ -54,6 +52,7 @@ def setup_loader(ap, is_val=False, verbose=False):
if is_val else c.num_loader_workers,
pin_memory=False)
return loader
@ -78,8 +77,8 @@ def format_test_data(data):
return m, x
def train(model, criterion, optimizer, scheduler, scaler, ap, global_step,
epoch):
def train(model, criterion, optimizer,
scheduler, scaler, ap, global_step, epoch):
data_loader = setup_loader(ap, is_val=False, verbose=(epoch == 0))
model.train()
epoch_time = 0
@ -93,8 +92,7 @@ def train(model, criterion, optimizer, scheduler, scaler, ap, global_step,
c_logger.print_train_start()
# setup noise schedule
noise_schedule = c['train_noise_schedule']
betas = np.linspace(noise_schedule['min_val'], noise_schedule['max_val'],
noise_schedule['num_steps'])
betas = np.linspace(noise_schedule['min_val'], noise_schedule['max_val'], noise_schedule['num_steps'])
if hasattr(model, 'module'):
model.module.compute_noise_level(betas)
else:
@ -120,7 +118,7 @@ def train(model, criterion, optimizer, scheduler, scaler, ap, global_step,
# compute losses
loss = criterion(noise, noise_hat)
loss_wavegrad_dict = {'wavegrad_loss': loss}
loss_wavegrad_dict = {'wavegrad_loss':loss}
# check nan loss
if torch.isnan(loss).any():
@ -133,13 +131,13 @@ def train(model, criterion, optimizer, scheduler, scaler, ap, global_step,
scaler.scale(loss).backward()
scaler.unscale_(optimizer)
grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
c.clip_grad)
c.clip_grad)
scaler.step(optimizer)
scaler.update()
else:
loss.backward()
grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
c.clip_grad)
c.clip_grad)
optimizer.step()
# schedule update
@ -205,8 +203,7 @@ def train(model, criterion, optimizer, scheduler, scaler, ap, global_step,
epoch,
OUT_PATH,
model_losses=loss_dict,
scaler=scaler.state_dict()
if c.mixed_precision else None)
scaler=scaler.state_dict() if c.mixed_precision else None)
end_time = time.time()
@ -247,12 +244,14 @@ def evaluate(model, criterion, ap, global_step, epoch):
else:
noise, x_noisy, noise_scale = model.compute_y_n(x)
# forward pass
noise_hat = model(x_noisy, m, noise_scale)
# compute losses
loss = criterion(noise, noise_hat)
loss_wavegrad_dict = {'wavegrad_loss': loss}
loss_wavegrad_dict = {'wavegrad_loss':loss}
loss_dict = dict()
for key, value in loss_wavegrad_dict.items():
@ -283,9 +282,7 @@ def evaluate(model, criterion, ap, global_step, epoch):
# setup noise schedule and inference
noise_schedule = c['test_noise_schedule']
betas = np.linspace(noise_schedule['min_val'],
noise_schedule['max_val'],
noise_schedule['num_steps'])
betas = np.linspace(noise_schedule['min_val'], noise_schedule['max_val'], noise_schedule['num_steps'])
if hasattr(model, 'module'):
model.module.compute_noise_level(betas)
# compute voice
@ -316,8 +313,7 @@ def main(args): # pylint: disable=redefined-outer-name
print(f" > Loading wavs from: {c.data_path}")
if c.feature_path is not None:
print(f" > Loading features from: {c.feature_path}")
eval_data, train_data = load_wav_feat_data(c.data_path, c.feature_path,
c.eval_split_size)
eval_data, train_data = load_wav_feat_data(c.data_path, c.feature_path, c.eval_split_size)
else:
eval_data, train_data = load_wav_data(c.data_path, c.eval_split_size)
@ -347,10 +343,6 @@ def main(args): # pylint: disable=redefined-outer-name
# setup criterion
criterion = torch.nn.L1Loss().cuda()
if use_cuda:
model.cuda()
criterion.cuda()
if args.restore_path:
checkpoint = torch.load(args.restore_path, map_location='cpu')
try:
@ -384,6 +376,10 @@ def main(args): # pylint: disable=redefined-outer-name
else:
args.restore_step = 0
if use_cuda:
model.cuda()
criterion.cuda()
# DISTRUBUTED
if num_gpus > 1:
model = DDP_th(model, device_ids=[args.rank])
@ -397,105 +393,32 @@ def main(args): # pylint: disable=redefined-outer-name
global_step = args.restore_step
for epoch in range(0, c.epochs):
c_logger.print_epoch_start(epoch, c.epochs)
_, global_step = train(model, criterion, optimizer, scheduler, scaler,
ap, global_step, epoch)
eval_avg_loss_dict = evaluate(model, criterion, ap, global_step, epoch)
_, global_step = train(model, criterion, optimizer,
scheduler, scaler, ap, global_step,
epoch)
eval_avg_loss_dict = evaluate(model, criterion, ap,
global_step, epoch)
c_logger.print_epoch_end(epoch, eval_avg_loss_dict)
target_loss = eval_avg_loss_dict[c.target_loss]
best_loss = save_best_model(
target_loss,
best_loss,
model,
optimizer,
scheduler,
None,
None,
None,
global_step,
epoch,
OUT_PATH,
model_losses=eval_avg_loss_dict,
scaler=scaler.state_dict() if c.mixed_precision else None)
best_loss = save_best_model(target_loss,
best_loss,
model,
optimizer,
scheduler,
None,
None,
None,
global_step,
epoch,
OUT_PATH,
model_losses=eval_avg_loss_dict,
scaler=scaler.state_dict() if c.mixed_precision else None)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--continue_path',
type=str,
help=
'Training output folder to continue training. Use to continue a training. If it is used, "config_path" is ignored.',
default='',
required='--config_path' not in sys.argv)
parser.add_argument(
'--restore_path',
type=str,
help='Model file to be restored. Use to finetune a model.',
default='')
parser.add_argument('--config_path',
type=str,
help='Path to config file for training.',
required='--continue_path' not in sys.argv)
parser.add_argument('--debug',
type=bool,
default=False,
help='Do not verify commit integrity to run training.')
# DISTRUBUTED
parser.add_argument(
'--rank',
type=int,
default=0,
help='DISTRIBUTED: process rank for distributed training.')
parser.add_argument('--group_id',
type=str,
default="",
help='DISTRIBUTED: process group id.')
args = parser.parse_args()
if args.continue_path != '':
args.output_path = args.continue_path
args.config_path = os.path.join(args.continue_path, 'config.json')
list_of_files = glob.glob(
args.continue_path +
"/*.pth.tar") # * means all if need specific format then *.csv
latest_model_file = max(list_of_files, key=os.path.getctime)
args.restore_path = latest_model_file
print(f" > Training continues for {args.restore_path}")
# setup output paths and read configs
c = load_config(args.config_path)
# check_config(c)
_ = os.path.dirname(os.path.realpath(__file__))
# DISTRIBUTED
if c.mixed_precision:
print(" > Mixed precision is enabled")
OUT_PATH = args.continue_path
if args.continue_path == '':
OUT_PATH = create_experiment_folder(c.output_path, c.run_name,
args.debug)
AUDIO_PATH = os.path.join(OUT_PATH, 'test_audios')
c_logger = ConsoleLogger()
if args.rank == 0:
os.makedirs(AUDIO_PATH, exist_ok=True)
new_fields = {}
if args.restore_path:
new_fields["restore_path"] = args.restore_path
new_fields["github_branch"] = get_git_branch()
copy_model_files(c, args.config_path, OUT_PATH, new_fields)
os.chmod(AUDIO_PATH, 0o775)
os.chmod(OUT_PATH, 0o775)
LOG_DIR = OUT_PATH
tb_logger = TensorboardLogger(LOG_DIR, model_name='VOCODER')
# write model desc to tensorboard
tb_logger.tb_add_text('model-description', c['run_description'], 0)
args = parse_arguments(sys.argv)
c, OUT_PATH, AUDIO_PATH, c_logger, tb_logger = process_args(
args, model_type='wavegrad')
try:
main(args)

View File

@ -1,9 +1,10 @@
import argparse
#!/usr/bin/env python3
"""Train WaveRNN vocoder model."""
import os
import sys
import traceback
import time
import glob
import random
import torch
@ -11,18 +12,14 @@ from torch.utils.data import DataLoader
# from torch.utils.data.distributed import DistributedSampler
from TTS.utils.arguments import parse_arguments, process_args
from TTS.tts.utils.visual import plot_spectrogram
from TTS.utils.audio import AudioProcessor
from TTS.utils.radam import RAdam
from TTS.utils.io import copy_model_files, load_config
from TTS.utils.training import setup_torch_training_env
from TTS.utils.console_logger import ConsoleLogger
from TTS.utils.tensorboard_logger import TensorboardLogger
from TTS.utils.generic_utils import (
KeepAverage,
count_parameters,
create_experiment_folder,
get_git_branch,
remove_experiment_folder,
set_init_dict,
)
@ -207,7 +204,14 @@ def train(model, optimizer, criterion, scheduler, scaler, ap, global_step, epoch
c.batched,
c.target_samples,
c.overlap_samples,
# use_cuda
)
# sample_wav = model.generate(ground_mel,
# c.batched,
# c.target_samples,
# c.overlap_samples,
# use_cuda
# )
predict_mel = ap.melspectrogram(sample_wav)
# compute spectrograms
@ -296,6 +300,7 @@ def evaluate(model, criterion, ap, global_step, epoch):
c.batched,
c.target_samples,
c.overlap_samples,
# use_cuda
)
predict_mel = ap.melspectrogram(sample_wav)
@ -447,87 +452,9 @@ def main(args): # pylint: disable=redefined-outer-name
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--continue_path",
type=str,
help='Training output folder to continue training. Use to continue a training. If it is used, "config_path" is ignored.',
default="",
required="--config_path" not in sys.argv,
)
parser.add_argument(
"--restore_path",
type=str,
help="Model file to be restored. Use to finetune a model.",
default="",
)
parser.add_argument(
"--config_path",
type=str,
help="Path to config file for training.",
required="--continue_path" not in sys.argv,
)
parser.add_argument(
"--debug",
type=bool,
default=False,
help="Do not verify commit integrity to run training.",
)
# DISTRUBUTED
parser.add_argument(
"--rank",
type=int,
default=0,
help="DISTRIBUTED: process rank for distributed training.",
)
parser.add_argument(
"--group_id", type=str, default="", help="DISTRIBUTED: process group id."
)
args = parser.parse_args()
if args.continue_path != "":
args.output_path = args.continue_path
args.config_path = os.path.join(args.continue_path, "config.json")
list_of_files = glob.glob(
args.continue_path + "/*.pth.tar"
) # * means all if need specific format then *.csv
latest_model_file = max(list_of_files, key=os.path.getctime)
args.restore_path = latest_model_file
print(f" > Training continues for {args.restore_path}")
# setup output paths and read configs
c = load_config(args.config_path)
# check_config(c)
_ = os.path.dirname(os.path.realpath(__file__))
OUT_PATH = args.continue_path
if args.continue_path == "":
OUT_PATH = create_experiment_folder(
c.output_path, c.run_name, args.debug
)
AUDIO_PATH = os.path.join(OUT_PATH, "test_audios")
c_logger = ConsoleLogger()
if args.rank == 0:
os.makedirs(AUDIO_PATH, exist_ok=True)
new_fields = {}
if args.restore_path:
new_fields["restore_path"] = args.restore_path
new_fields["github_branch"] = get_git_branch()
copy_model_files(
c, args.config_path, OUT_PATH, new_fields
)
os.chmod(AUDIO_PATH, 0o775)
os.chmod(OUT_PATH, 0o775)
LOG_DIR = OUT_PATH
tb_logger = TensorboardLogger(LOG_DIR, model_name="VOCODER")
# write model desc to tensorboard
tb_logger.tb_add_text("model-description", c["run_description"], 0)
args = parse_arguments(sys.argv)
c, OUT_PATH, AUDIO_PATH, c_logger, tb_logger = process_args(
args, model_type='wavernn')
try:
main(args)