diff --git a/TTS/tts/models/align_tts.py b/TTS/tts/models/align_tts.py index 6efa64e2..3e8d4adc 100644 --- a/TTS/tts/models/align_tts.py +++ b/TTS/tts/models/align_tts.py @@ -275,7 +275,7 @@ class AlignTTS(nn.Module): g: [B, C] """ g = cond_input["x_vectors"] if "x_vectors" in cond_input else None - x_lengths = torch.tensor(x.shape[1:2]).to(x.device) # pylint: disable=not-callable + x_lengths = torch.tensor(x.shape[1:2]).to(x.device) # pad input to prevent dropping the last word # x = torch.nn.functional.pad(x, pad=(0, 5), mode='constant', value=0) o_en, o_en_dp, x_mask, g = self._forward_encoder(x, x_lengths, g) diff --git a/TTS/tts/models/speedy_speech.py b/TTS/tts/models/speedy_speech.py index 96ef1740..455dbf38 100644 --- a/TTS/tts/models/speedy_speech.py +++ b/TTS/tts/models/speedy_speech.py @@ -183,7 +183,7 @@ class SpeedySpeech(nn.Module): g: [B, C] """ g = cond_input["x_vectors"] if "x_vectors" in cond_input else None - x_lengths = torch.tensor(x.shape[1:2]).to(x.device) # pylint: disable=not-callable + x_lengths = torch.tensor(x.shape[1:2]).to(x.device) # input sequence should be greated than the max convolution size inference_padding = 5 if x.shape[1] < 13: diff --git a/TTS/tts/models/tacotron.py b/TTS/tts/models/tacotron.py index 8d3124c3..12c3e5f9 100644 --- a/TTS/tts/models/tacotron.py +++ b/TTS/tts/models/tacotron.py @@ -191,11 +191,9 @@ class Tacotron(TacotronAbstract): mel_lengths: [B] cond_input: 'speaker_ids': [B, 1] and 'x_vectors':[B, C] """ - cond_input = self._format_cond_input(cond_input) outputs = {"alignments_backward": None, "decoder_outputs_backward": None} - input_mask, output_mask = self.compute_masks(text_lengths, mel_lengths) - # B x T_in x embed_dim inputs = self.embedding(text) + input_mask, output_mask = self.compute_masks(text_lengths, mel_lengths) # B x T_in x encoder_in_features encoder_outputs = self.encoder(inputs) # sequence masking diff --git a/TTS/utils/arguments.py b/TTS/utils/arguments.py index 55bad4f2..9d92ae82 100644 --- a/TTS/utils/arguments.py +++ b/TTS/utils/arguments.py @@ -29,16 +29,16 @@ def init_arguments(argv): parser.add_argument( "--continue_path", type=str, - help=("Training output folder to continue training. Used to continue " - "a training. If it is used, 'config_path' is ignored."), + help=( + "Training output folder to continue training. Used to continue " + "a training. If it is used, 'config_path' is ignored." + ), default="", required="--config_path" not in argv, ) parser.add_argument( - "--restore_path", - type=str, - help="Model file to be restored. Use to finetune a model.", - default="") + "--restore_path", type=str, help="Model file to be restored. Use to finetune a model.", default="" + ) parser.add_argument( "--best_path", type=str, @@ -48,23 +48,12 @@ def init_arguments(argv): ), default="", ) - parser.add_argument("--config_path", - type=str, - help="Path to config file for training.", - required="--continue_path" not in argv) - parser.add_argument("--debug", - type=bool, - default=False, - help="Do not verify commit integrity to run training.") parser.add_argument( - "--rank", - type=int, - default=0, - help="DISTRIBUTED: process rank for distributed training.") - parser.add_argument("--group_id", - type=str, - default="", - help="DISTRIBUTED: process group id.") + "--config_path", type=str, help="Path to config file for training.", required="--continue_path" not in argv + ) + parser.add_argument("--debug", type=bool, default=False, help="Do not verify commit integrity to run training.") + parser.add_argument("--rank", type=int, default=0, help="DISTRIBUTED: process rank for distributed training.") + parser.add_argument("--group_id", type=str, default="", help="DISTRIBUTED: process group id.") return parser @@ -159,8 +148,7 @@ def process_args(args): print(" > Mixed precision mode is ON") experiment_path = args.continue_path if not experiment_path: - experiment_path = create_experiment_folder(config.output_path, - config.run_name, args.debug) + experiment_path = create_experiment_folder(config.output_path, config.run_name, args.debug) audio_path = os.path.join(experiment_path, "test_audios") # setup rank 0 process in distributed training tb_logger = None @@ -181,8 +169,7 @@ def process_args(args): os.chmod(experiment_path, 0o775) tb_logger = TensorboardLogger(experiment_path, model_name=config.model) # write model desc to tensorboard - tb_logger.tb_add_text("model-config", f"
{config.to_json()}", - 0) + tb_logger.tb_add_text("model-config", f"
{config.to_json()}", 0) c_logger = ConsoleLogger() return config, experiment_path, audio_path, c_logger, tb_logger