mirror of https://github.com/coqui-ai/TTS.git
Plot mel spectrogram if required
This commit is contained in:
parent
4838d16fec
commit
161a26c9dd
|
@ -17,7 +17,8 @@ def synthesis(m, s, CONFIG, use_cuda, ap):
|
|||
chars_var = chars_var.cuda()
|
||||
mel_spec, linear_spec, alignments, stop_tokens = m.forward(chars_var.long())
|
||||
linear_spec = linear_spec[0].data.cpu().numpy()
|
||||
mel_spec = mel_spec[0].data.cpu().numpy()
|
||||
alignment = alignments[0].cpu().data.numpy()
|
||||
wav = ap.inv_spectrogram(linear_spec.T)
|
||||
# wav = wav[:ap.find_endpoint(wav)]
|
||||
return wav, alignment, linear_spec, stop_tokens
|
||||
return wav, alignment, linear_spec, mel_spec, stop_tokens
|
|
@ -29,11 +29,16 @@ def plot_spectrogram(linear_output, audio):
|
|||
return fig
|
||||
|
||||
|
||||
def visualize(alignment, spectrogram, stop_tokens, text, hop_length, CONFIG):
|
||||
def visualize(alignment, spectrogram, stop_tokens, text, hop_length, CONFIG, spectrogram2=None):
|
||||
if spectrogram2 is not None:
|
||||
num_plot = 4
|
||||
else:
|
||||
num_plot = 3
|
||||
|
||||
label_fontsize = 16
|
||||
plt.figure(figsize=(16, 32))
|
||||
|
||||
plt.subplot(3, 1, 1)
|
||||
plt.subplot(num_plot, 1, 1)
|
||||
plt.imshow(alignment.T, aspect="auto", origin="lower", interpolation=None)
|
||||
plt.xlabel("Decoder timestamp", fontsize=label_fontsize)
|
||||
plt.ylabel("Encoder timestamp", fontsize=label_fontsize)
|
||||
|
@ -41,14 +46,21 @@ def visualize(alignment, spectrogram, stop_tokens, text, hop_length, CONFIG):
|
|||
plt.colorbar()
|
||||
|
||||
stop_tokens = stop_tokens.squeeze().detach().to('cpu').numpy()
|
||||
plt.subplot(3, 1, 2)
|
||||
plt.subplot(num_plot, 1, 2)
|
||||
plt.plot(range(len(stop_tokens)), list(stop_tokens))
|
||||
|
||||
plt.subplot(3, 1, 3)
|
||||
plt.subplot(num_plot, 1, 3)
|
||||
librosa.display.specshow(spectrogram.T, sr=CONFIG.audio['sample_rate'],
|
||||
hop_length=hop_length, x_axis="time", y_axis="linear")
|
||||
plt.xlabel("Time", fontsize=label_fontsize)
|
||||
plt.ylabel("Hz", fontsize=label_fontsize)
|
||||
|
||||
if spectrogram2 is not None:
|
||||
plt.subplot(num_plot, 1, 4)
|
||||
librosa.display.specshow(spectrogram2.T, sr=CONFIG.audio['sample_rate'],
|
||||
hop_length=hop_length, x_axis="time", y_axis="linear")
|
||||
plt.xlabel("Time", fontsize=label_fontsize)
|
||||
plt.ylabel("Hz", fontsize=label_fontsize)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.colorbar()
|
||||
|
|
Loading…
Reference in New Issue