Add stop token visualizaiotn for the notebook

2018-05-03 05:56:33 -07:00 · 2018-05-03 05:56:33 -07:00 · d02dd47958
parent f8d5bbd5d2
commit d02dd47958
2 changed files with 10 additions and 5 deletions
--- a/notebooks/utils.py
+++ b/notebooks/utils.py
@ -34,20 +34,25 @@ def create_speech(m, s, CONFIG, use_cuda, ap):
    return wav, alignment, spec, stop_tokens


-def visualize(alignment, spectrogram, CONFIG):
+def visualize(alignment, spectrogram, stop_tokens, CONFIG):
    label_fontsize = 16
-    plt.figure(figsize=(16, 16))
+    plt.figure(figsize=(16, 24))

-    plt.subplot(2, 1, 1)
+    plt.subplot(3, 1, 1)
    plt.imshow(alignment.T, aspect="auto", origin="lower", interpolation=None)
    plt.xlabel("Decoder timestamp", fontsize=label_fontsize)
    plt.ylabel("Encoder timestamp", fontsize=label_fontsize)
    plt.colorbar()
+    
+    plt.subplot(3, 1, 2)
+    plt.plot(range(len(stop_tokens)), list(stop_tokens))

-    plt.subplot(2, 1, 2)
+    plt.subplot(3, 1, 3)
    librosa.display.specshow(spectrogram.T, sr=CONFIG.sample_rate,
                             hop_length=hop_length, x_axis="time", y_axis="linear")
    plt.xlabel("Time", fontsize=label_fontsize)
    plt.ylabel("Hz", fontsize=label_fontsize)
    plt.tight_layout()
    plt.colorbar()
+    
+   
--- a/utils/visual.py
+++ b/utils/visual.py
@ -32,4 +32,4 @@ def plot_spectrogram(linear_output, audio):
    data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
    data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    plt.close()
-    return data
+    return data