Plot mel spectrogram if required

2018-11-13 12:10:40 +01:00 · 2018-11-13 12:10:40 +01:00 · 161a26c9dd
parent 4838d16fec
commit 161a26c9dd
2 changed files with 18 additions and 5 deletions
--- a/utils/synthesis.py
+++ b/utils/synthesis.py
@ -17,7 +17,8 @@ def synthesis(m, s, CONFIG, use_cuda, ap):
        chars_var = chars_var.cuda()
    mel_spec, linear_spec, alignments, stop_tokens = m.forward(chars_var.long())
    linear_spec = linear_spec[0].data.cpu().numpy()
+    mel_spec = mel_spec[0].data.cpu().numpy()
    alignment = alignments[0].cpu().data.numpy()
    wav = ap.inv_spectrogram(linear_spec.T)
    # wav = wav[:ap.find_endpoint(wav)]
-    return wav, alignment, linear_spec, stop_tokens
+    return wav, alignment, linear_spec, mel_spec, stop_tokens
--- a/utils/visual.py
+++ b/utils/visual.py
@ -29,11 +29,16 @@ def plot_spectrogram(linear_output, audio):
    return fig


-def visualize(alignment, spectrogram, stop_tokens, text, hop_length, CONFIG):
+def visualize(alignment, spectrogram, stop_tokens, text, hop_length, CONFIG, spectrogram2=None):
+    if spectrogram2 is not None:
+        num_plot = 4
+    else:
+        num_plot = 3
+
    label_fontsize = 16
    plt.figure(figsize=(16, 32))

-    plt.subplot(3, 1, 1)
+    plt.subplot(num_plot, 1, 1)
    plt.imshow(alignment.T, aspect="auto", origin="lower", interpolation=None)
    plt.xlabel("Decoder timestamp", fontsize=label_fontsize)
    plt.ylabel("Encoder timestamp", fontsize=label_fontsize)
@ -41,14 +46,21 @@ def visualize(alignment, spectrogram, stop_tokens, text, hop_length, CONFIG):
    plt.colorbar()
    
    stop_tokens = stop_tokens.squeeze().detach().to('cpu').numpy()
-    plt.subplot(3, 1, 2)
+    plt.subplot(num_plot, 1, 2)
    plt.plot(range(len(stop_tokens)), list(stop_tokens))

-    plt.subplot(3, 1, 3)
+    plt.subplot(num_plot, 1, 3)
    librosa.display.specshow(spectrogram.T, sr=CONFIG.audio['sample_rate'],
                             hop_length=hop_length, x_axis="time", y_axis="linear")
    plt.xlabel("Time", fontsize=label_fontsize)
    plt.ylabel("Hz", fontsize=label_fontsize)

+    if spectrogram2 is not None:
+        plt.subplot(num_plot, 1, 4)
+        librosa.display.specshow(spectrogram2.T, sr=CONFIG.audio['sample_rate'],
+                                hop_length=hop_length, x_axis="time", y_axis="linear")
+        plt.xlabel("Time", fontsize=label_fontsize)
+        plt.ylabel("Hz", fontsize=label_fontsize)
+
    plt.tight_layout()
    plt.colorbar()