mirror of https://github.com/coqui-ai/TTS.git
Update mel spectrogram for the style encoder
This commit is contained in:
parent
a32961bcb4
commit
40a4e631ea
|
@ -157,7 +157,17 @@ class GPTTrainer(BaseTTS):
|
||||||
print(">> GPT weights randomly initialized! If you want you can specify a checkpoint in config.model_args.gpt_checkpoint")
|
print(">> GPT weights randomly initialized! If you want you can specify a checkpoint in config.model_args.gpt_checkpoint")
|
||||||
|
|
||||||
# Mel spectrogram extractor for conditioning
|
# Mel spectrogram extractor for conditioning
|
||||||
self.torch_mel_spectrogram = TorchMelSpectrogram(mel_norm_file=self.args.mel_norm_file, sampling_rate=config.audio.sample_rate)
|
self.torch_mel_spectrogram_style_encoder = TorchMelSpectrogram(
|
||||||
|
filter_length=4096,
|
||||||
|
hop_length=1024,
|
||||||
|
win_length=4096,
|
||||||
|
normalize=False,
|
||||||
|
sampling_rate=config.audio.sample_rate,
|
||||||
|
mel_fmin=0,
|
||||||
|
mel_fmax=8000,
|
||||||
|
n_mel_channels=80,
|
||||||
|
mel_norm_file=self.args.mel_norm_file
|
||||||
|
)
|
||||||
|
|
||||||
# Load DVAE
|
# Load DVAE
|
||||||
self.dvae = DiscreteVAE(
|
self.dvae = DiscreteVAE(
|
||||||
|
@ -224,9 +234,9 @@ class GPTTrainer(BaseTTS):
|
||||||
# transform waves from torch.Size([B, num_cond_samples, 1, T] to torch.Size([B * num_cond_samples, 1, T] because if is faster than iterate the tensor
|
# transform waves from torch.Size([B, num_cond_samples, 1, T] to torch.Size([B * num_cond_samples, 1, T] because if is faster than iterate the tensor
|
||||||
B, num_cond_samples, C, T = batch["conditioning"].size()
|
B, num_cond_samples, C, T = batch["conditioning"].size()
|
||||||
conditioning_reshaped = batch["conditioning"].view(B*num_cond_samples, C, T)
|
conditioning_reshaped = batch["conditioning"].view(B*num_cond_samples, C, T)
|
||||||
paired_conditioning_mel = self.torch_mel_spectrogram(conditioning_reshaped)
|
paired_conditioning_mel = self.torch_mel_spectrogram_style_encoder(conditioning_reshaped)
|
||||||
# transform torch.Size([B * num_cond_samples, n_mel, T_mel]) in torch.Size([B, num_cond_samples, n_mel, T_mel])
|
# transform torch.Size([B * num_cond_samples, n_mel, T_mel]) in torch.Size([B, num_cond_samples, n_mel, T_mel])
|
||||||
n_mel = self.torch_mel_spectrogram.n_mel_channels # paired_conditioning_mel.size(1)
|
n_mel = self.torch_mel_spectrogram_style_encoder.n_mel_channels # paired_conditioning_mel.size(1)
|
||||||
T_mel = paired_conditioning_mel.size(2)
|
T_mel = paired_conditioning_mel.size(2)
|
||||||
paired_conditioning_mel = paired_conditioning_mel.view(B, num_cond_samples, n_mel, T_mel)
|
paired_conditioning_mel = paired_conditioning_mel.view(B, num_cond_samples, n_mel, T_mel)
|
||||||
# get the conditioning embeddings
|
# get the conditioning embeddings
|
||||||
|
|
Loading…
Reference in New Issue