From f3decd09885dd55abf2e6baf485b1c94f6366c33 Mon Sep 17 00:00:00 2001
From: Eren Golge <egolge@mozilla.com>
Date: Tue, 10 Apr 2018 09:37:15 -0700
Subject: [PATCH] Notebook update for testing

---
 notebooks/TacotronPlayGround.ipynb | 34 ++++++++++--------------------
 utils/audio.py                     |  2 +-
 2 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/notebooks/TacotronPlayGround.ipynb b/notebooks/TacotronPlayGround.ipynb
index bcfe351c..a4d0e123 100644
--- a/notebooks/TacotronPlayGround.ipynb
+++ b/notebooks/TacotronPlayGround.ipynb
@@ -40,9 +40,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def tts(model, text, CONFIG, use_cuda, ap, figures=True):\n",
@@ -58,14 +56,12 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Set constants\n",
-    "ROOT_PATH = '/data/shared/erogol_models/March-28-2018_06:24PM/'\n",
-    "MODEL_PATH = ROOT_PATH + '/best_model.pth.tar'\n",
+    "ROOT_PATH = '/data/shared/erogol_models/April-07-2018_12:33PM-e6bf09f/'\n",
+    "MODEL_PATH = ROOT_PATH + '/checkpoint_50854.pth.tar'\n",
     "CONFIG_PATH = ROOT_PATH + '/config.json'\n",
     "OUT_FOLDER = ROOT_PATH + '/test/'\n",
     "CONFIG = load_config(CONFIG_PATH)\n",
@@ -79,7 +75,7 @@
    "outputs": [],
    "source": [
     "# load the model\n",
-    "model = Tacotron(CONFIG.embedding_size, CONFIG.num_mels, CONFIG.num_freq, CONFIG.r)\n",
+    "model = Tacotron(CONFIG.embedding_size, CONFIG.num_freq, CONFIG.num_mels, CONFIG.r)\n",
     "\n",
     "# load the audio processor\n",
     "ap = AudioProcessor(CONFIG.sample_rate, CONFIG.num_mels, CONFIG.min_level_db,\n",
@@ -93,13 +89,6 @@
     "else:\n",
     "    cp = torch.load(MODEL_PATH, map_location=lambda storage, loc: storage)\n",
     "\n",
-    "# # small trick to remove DataParallel wrapper\n",
-    "new_state_dict = OrderedDict()\n",
-    "for k, v in cp['model'].items():\n",
-    "    name = k[7:] # remove `module.`\n",
-    "    new_state_dict[name] = v\n",
-    "cp['model'] = new_state_dict\n",
-    "\n",
     "# load the model\n",
     "model.load_state_dict(cp['model'])\n",
     "if use_cuda:\n",
@@ -117,13 +106,11 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import pandas as pd\n",
-    "df = pd.read_csv('/data/shared/KeithIto/LJSpeech-1.0/metadata.csv', delimiter='|')"
+    "df = pd.read_csv('/data/shared/KeithIto/LJSpeech-1.0/metadata_val.csv', delimiter='|')"
    ]
   },
   {
@@ -134,9 +121,10 @@
    },
    "outputs": [],
    "source": [
-    "sentence = df.iloc[120, 1].lower().replace(',','')\n",
+    "sentence = df.iloc[2, 1]\n",
     "print(sentence)\n",
-    "align = tts(model, sentence, CONFIG, use_cuda, ap)"
+    "model.decoder.max_decoder_steps = len(sentence)\n",
+    "align, spec = tts(model, sentence, CONFIG, use_cuda, ap)"
    ]
   },
   {
@@ -155,7 +143,7 @@
    "outputs": [],
    "source": [
     "sentence =  \"Will Donald Trump Jr. offer the country’s business leaders a peek into a new U.S.-India relationship in trade? Defense? Terrorism?\"\n",
-    "model.decoder.max_decoder_steps = 300\n",
+    "model.decoder.max_decoder_steps = len(sentence)\n",
     "alignment = tts(model, sentence, CONFIG, use_cuda, ap)"
    ]
   }
diff --git a/utils/audio.py b/utils/audio.py
index 4ec58612..0fec03d6 100644
--- a/utils/audio.py
+++ b/utils/audio.py
@@ -25,7 +25,7 @@ class AudioProcessor(object):
 
     def save_wav(self, wav, path):
         wav *= 32767 / max(0.01, np.max(np.abs(wav)))
-        librosa.output.write_wav(path, wav.astype(np.int16), self.sample_rate)
+        librosa.output.write_wav(path, wav.astype(np.float), self.sample_rate)
 
     def _linear_to_mel(self, spectrogram):
         global _mel_basis