diff --git a/TTS/vocoder/layers/pqmf.py b/TTS/vocoder/layers/pqmf.py index ef5a3507..d31953d6 100644 --- a/TTS/vocoder/layers/pqmf.py +++ b/TTS/vocoder/layers/pqmf.py @@ -22,7 +22,7 @@ class PQMF(torch.nn.Module): for k in range(N): constant_factor = (2 * k + 1) * (np.pi / (2 * N)) * (np.arange(taps + 1) - - ((taps - 1) / 2)) + ((taps - 1) / 2)) # TODO: (taps - 1) -> taps phase = (-1)**k * np.pi / 4 H[k] = 2 * QMF * np.cos(constant_factor + phase) diff --git a/mozilla_voice_tts/tts/layers/glow_tts/encoder.py b/mozilla_voice_tts/tts/layers/glow_tts/encoder.py index 2f24f9e0..1e691823 100644 --- a/mozilla_voice_tts/tts/layers/glow_tts/encoder.py +++ b/mozilla_voice_tts/tts/layers/glow_tts/encoder.py @@ -121,7 +121,7 @@ class Encoder(nn.Module): 1).to(x.dtype) # pass encoder for layer in self.encoder: - x = layer(x) + x = layer(x, x_mask) # set duration predictor input if g is not None: g_exp = g.expand(-1, -1, x.size(-1))