diff --git a/TTS/tts/layers/glow_tts/glow.py b/TTS/tts/layers/glow_tts/glow.py index f130e26a..07826fae 100644 --- a/TTS/tts/layers/glow_tts/glow.py +++ b/TTS/tts/layers/glow_tts/glow.py @@ -27,7 +27,7 @@ class ConvLayerNorm(nn.Module): self.conv_layers = nn.ModuleList() self.norm_layers = nn.ModuleList() - for idx in range(num_layers - 1): + for idx in range(num_layers): self.conv_layers.append( nn.Conv1d(in_channels if idx == 0 else hidden_channels, hidden_channels, diff --git a/TTS/tts/utils/generic_utils.py b/TTS/tts/utils/generic_utils.py index 032df71d..7a4c3a30 100644 --- a/TTS/tts/utils/generic_utils.py +++ b/TTS/tts/utils/generic_utils.py @@ -109,6 +109,7 @@ def setup_model(num_chars, num_speakers, c, speaker_embedding_dim=None): out_channels=c.audio['num_mels'], encoder_type=c.encoder_type, encoder_params=c.encoder_params, + use_encoder_prenet=c["use_encoder_prenet"], num_flow_blocks_dec=12, kernel_size_dec=5, dilation_rate=1, @@ -120,7 +121,6 @@ def setup_model(num_chars, num_speakers, c, speaker_embedding_dim=None): num_squeeze=2, sigmoid_scale=False, mean_only=True, - use_encoder_prenet=c["use_encoder_prenet"], external_speaker_embedding_dim=speaker_embedding_dim) elif c.model.lower() == "speedy_speech": model = MyModel(num_chars=num_chars + getattr(c, "add_blank", False),