diff --git a/notebooks/PlotUmapLibriTTS.ipynb b/notebooks/PlotUmapLibriTTS.ipynb
new file mode 100644
index 00000000..7feb9cc5
--- /dev/null
+++ b/notebooks/PlotUmapLibriTTS.ipynb
@@ -0,0 +1,737 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Overview\n",
+ "\n",
+ "This notebook can be used with both a single or multi- speaker corpus and allows the interactive plotting of speaker embeddings linked to underlying audio (see instructions in the repo's speaker_embedding directory)\n",
+ "\n",
+ "Depending on the directory structure used for your corpus, you may need to adjust handling of **speaker_to_utter** and **locations**."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import glob\n",
+ "import random\n",
+ "import numpy as np\n",
+ "import torch\n",
+ "import umap\n",
+ "\n",
+ "from TTS.speaker_encoder.model import SpeakerEncoder\n",
+ "from TTS.utils.audio import AudioProcessor\n",
+ "from TTS.utils.io import load_config\n",
+ "\n",
+ "from bokeh.io import output_notebook, show\n",
+ "from bokeh.plotting import figure\n",
+ "from bokeh.models import HoverTool, ColumnDataSource, BoxZoomTool, ResetTool, OpenURL, TapTool\n",
+ "from bokeh.transform import factor_cmap, factor_mark\n",
+ "from bokeh.palettes import Category10"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For larger sets of speakers, you can use **Category20**, but you need to change it in the **pal** variable too\n",
+ "\n",
+ "List of Bokeh palettes here: http://docs.bokeh.org/en/1.4.0/docs/reference/palettes.html\n",
+ "\n",
+ "**NB:** if you have problems with other palettes, first see https://stackoverflow.com/questions/48333820/why-do-some-bokeh-palettes-raise-a-valueerror-when-used-in-factor-cmap"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "
Loading BokehJS ...\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/javascript": [
+ "\n",
+ "(function(root) {\n",
+ " function now() {\n",
+ " return new Date();\n",
+ " }\n",
+ "\n",
+ " var force = true;\n",
+ "\n",
+ " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n",
+ " root._bokeh_onload_callbacks = [];\n",
+ " root._bokeh_is_loading = undefined;\n",
+ " }\n",
+ "\n",
+ " var JS_MIME_TYPE = 'application/javascript';\n",
+ " var HTML_MIME_TYPE = 'text/html';\n",
+ " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
+ " var CLASS_NAME = 'output_bokeh rendered_html';\n",
+ "\n",
+ " /**\n",
+ " * Render data to the DOM node\n",
+ " */\n",
+ " function render(props, node) {\n",
+ " var script = document.createElement(\"script\");\n",
+ " node.appendChild(script);\n",
+ " }\n",
+ "\n",
+ " /**\n",
+ " * Handle when an output is cleared or removed\n",
+ " */\n",
+ " function handleClearOutput(event, handle) {\n",
+ " var cell = handle.cell;\n",
+ "\n",
+ " var id = cell.output_area._bokeh_element_id;\n",
+ " var server_id = cell.output_area._bokeh_server_id;\n",
+ " // Clean up Bokeh references\n",
+ " if (id != null && id in Bokeh.index) {\n",
+ " Bokeh.index[id].model.document.clear();\n",
+ " delete Bokeh.index[id];\n",
+ " }\n",
+ "\n",
+ " if (server_id !== undefined) {\n",
+ " // Clean up Bokeh references\n",
+ " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
+ " cell.notebook.kernel.execute(cmd, {\n",
+ " iopub: {\n",
+ " output: function(msg) {\n",
+ " var id = msg.content.text.trim();\n",
+ " if (id in Bokeh.index) {\n",
+ " Bokeh.index[id].model.document.clear();\n",
+ " delete Bokeh.index[id];\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ " });\n",
+ " // Destroy server and session\n",
+ " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
+ " cell.notebook.kernel.execute(cmd);\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " /**\n",
+ " * Handle when a new output is added\n",
+ " */\n",
+ " function handleAddOutput(event, handle) {\n",
+ " var output_area = handle.output_area;\n",
+ " var output = handle.output;\n",
+ "\n",
+ " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
+ " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
+ " return\n",
+ " }\n",
+ "\n",
+ " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
+ "\n",
+ " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
+ " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
+ " // store reference to embed id on output_area\n",
+ " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
+ " }\n",
+ " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
+ " var bk_div = document.createElement(\"div\");\n",
+ " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
+ " var script_attrs = bk_div.children[0].attributes;\n",
+ " for (var i = 0; i < script_attrs.length; i++) {\n",
+ " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
+ " }\n",
+ " // store reference to server id on output_area\n",
+ " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " function register_renderer(events, OutputArea) {\n",
+ "\n",
+ " function append_mime(data, metadata, element) {\n",
+ " // create a DOM node to render to\n",
+ " var toinsert = this.create_output_subarea(\n",
+ " metadata,\n",
+ " CLASS_NAME,\n",
+ " EXEC_MIME_TYPE\n",
+ " );\n",
+ " this.keyboard_manager.register_events(toinsert);\n",
+ " // Render to node\n",
+ " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
+ " render(props, toinsert[toinsert.length - 1]);\n",
+ " element.append(toinsert);\n",
+ " return toinsert\n",
+ " }\n",
+ "\n",
+ " /* Handle when an output is cleared or removed */\n",
+ " events.on('clear_output.CodeCell', handleClearOutput);\n",
+ " events.on('delete.Cell', handleClearOutput);\n",
+ "\n",
+ " /* Handle when a new output is added */\n",
+ " events.on('output_added.OutputArea', handleAddOutput);\n",
+ "\n",
+ " /**\n",
+ " * Register the mime type and append_mime function with output_area\n",
+ " */\n",
+ " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
+ " /* Is output safe? */\n",
+ " safe: true,\n",
+ " /* Index of renderer in `output_area.display_order` */\n",
+ " index: 0\n",
+ " });\n",
+ " }\n",
+ "\n",
+ " // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
+ " if (root.Jupyter !== undefined) {\n",
+ " var events = require('base/js/events');\n",
+ " var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
+ "\n",
+ " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
+ " register_renderer(events, OutputArea);\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " \n",
+ " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
+ " root._bokeh_timeout = Date.now() + 5000;\n",
+ " root._bokeh_failed_load = false;\n",
+ " }\n",
+ "\n",
+ " var NB_LOAD_WARNING = {'data': {'text/html':\n",
+ " \"\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
+ " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"- re-rerun `output_notebook()` to attempt to load from CDN again, or
\\n\"+\n",
+ " \"- use INLINE resources instead, as so:
\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"from bokeh.resources import INLINE\\n\"+\n",
+ " \"output_notebook(resources=INLINE)\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"
\"}};\n",
+ "\n",
+ " function display_loaded() {\n",
+ " var el = document.getElementById(\"1001\");\n",
+ " if (el != null) {\n",
+ " el.textContent = \"BokehJS is loading...\";\n",
+ " }\n",
+ " if (root.Bokeh !== undefined) {\n",
+ " if (el != null) {\n",
+ " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
+ " }\n",
+ " } else if (Date.now() < root._bokeh_timeout) {\n",
+ " setTimeout(display_loaded, 100)\n",
+ " }\n",
+ " }\n",
+ "\n",
+ "\n",
+ " function run_callbacks() {\n",
+ " try {\n",
+ " root._bokeh_onload_callbacks.forEach(function(callback) {\n",
+ " if (callback != null)\n",
+ " callback();\n",
+ " });\n",
+ " } finally {\n",
+ " delete root._bokeh_onload_callbacks\n",
+ " }\n",
+ " console.debug(\"Bokeh: all callbacks have finished\");\n",
+ " }\n",
+ "\n",
+ " function load_libs(css_urls, js_urls, callback) {\n",
+ " if (css_urls == null) css_urls = [];\n",
+ " if (js_urls == null) js_urls = [];\n",
+ "\n",
+ " root._bokeh_onload_callbacks.push(callback);\n",
+ " if (root._bokeh_is_loading > 0) {\n",
+ " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
+ " return null;\n",
+ " }\n",
+ " if (js_urls == null || js_urls.length === 0) {\n",
+ " run_callbacks();\n",
+ " return null;\n",
+ " }\n",
+ " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
+ " root._bokeh_is_loading = css_urls.length + js_urls.length;\n",
+ "\n",
+ " function on_load() {\n",
+ " root._bokeh_is_loading--;\n",
+ " if (root._bokeh_is_loading === 0) {\n",
+ " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
+ " run_callbacks()\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " function on_error() {\n",
+ " console.error(\"failed to load \" + url);\n",
+ " }\n",
+ "\n",
+ " for (var i = 0; i < css_urls.length; i++) {\n",
+ " var url = css_urls[i];\n",
+ " const element = document.createElement(\"link\");\n",
+ " element.onload = on_load;\n",
+ " element.onerror = on_error;\n",
+ " element.rel = \"stylesheet\";\n",
+ " element.type = \"text/css\";\n",
+ " element.href = url;\n",
+ " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
+ " document.body.appendChild(element);\n",
+ " }\n",
+ "\n",
+ " for (var i = 0; i < js_urls.length; i++) {\n",
+ " var url = js_urls[i];\n",
+ " var element = document.createElement('script');\n",
+ " element.onload = on_load;\n",
+ " element.onerror = on_error;\n",
+ " element.async = false;\n",
+ " element.src = url;\n",
+ " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
+ " document.head.appendChild(element);\n",
+ " }\n",
+ " };var element = document.getElementById(\"1001\");\n",
+ " if (element == null) {\n",
+ " console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n",
+ " return false;\n",
+ " }\n",
+ "\n",
+ " function inject_raw_css(css) {\n",
+ " const element = document.createElement(\"style\");\n",
+ " element.appendChild(document.createTextNode(css));\n",
+ " document.body.appendChild(element);\n",
+ " }\n",
+ "\n",
+ " \n",
+ " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n",
+ " var css_urls = [];\n",
+ " \n",
+ "\n",
+ " var inline_js = [\n",
+ " function(Bokeh) {\n",
+ " Bokeh.set_log_level(\"info\");\n",
+ " },\n",
+ " function(Bokeh) {\n",
+ " \n",
+ " \n",
+ " }\n",
+ " ];\n",
+ "\n",
+ " function run_inline_js() {\n",
+ " \n",
+ " if (root.Bokeh !== undefined || force === true) {\n",
+ " \n",
+ " for (var i = 0; i < inline_js.length; i++) {\n",
+ " inline_js[i].call(root, root.Bokeh);\n",
+ " }\n",
+ " if (force === true) {\n",
+ " display_loaded();\n",
+ " }} else if (Date.now() < root._bokeh_timeout) {\n",
+ " setTimeout(run_inline_js, 100);\n",
+ " } else if (!root._bokeh_failed_load) {\n",
+ " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
+ " root._bokeh_failed_load = true;\n",
+ " } else if (force !== true) {\n",
+ " var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n",
+ " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
+ " }\n",
+ "\n",
+ " }\n",
+ "\n",
+ " if (root._bokeh_is_loading === 0) {\n",
+ " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
+ " run_inline_js();\n",
+ " } else {\n",
+ " load_libs(css_urls, js_urls, function() {\n",
+ " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
+ " run_inline_js();\n",
+ " });\n",
+ " }\n",
+ "}(window));"
+ ],
+ "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"\\n\"+\n \"
\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"
\\n\"+\n \"
\\n\"+\n \"- re-rerun `output_notebook()` to attempt to load from CDN again, or
\\n\"+\n \"- use INLINE resources instead, as so:
\\n\"+\n \"
\\n\"+\n \"
\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"
\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(\"1001\");\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };var element = document.getElementById(\"1001\");\n if (element == null) {\n console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n return false;\n }\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n if (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));"
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "output_notebook()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You should also adjust all the path constants to point at the relevant locations for you locally"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#MODEL_RUN_PATH = \"libritts_360-half-October-31-2019_04+54PM-19d2f5f/\"\n",
+ "MODEL_RUN_PATH = \"libritts_360-half-September-28-2019_10+46AM-8565c50/\"\n",
+ "MODEL_PATH = MODEL_RUN_PATH + \"best_model.pth.tar\"\n",
+ "CONFIG_PATH = MODEL_RUN_PATH + \"config.json\"\n",
+ "\n",
+ "# My single speaker locations\n",
+ "#EMBED_PATH = \"/home/neil/main/Projects/TTS3/embeddings/neil14/\"\n",
+ "#AUDIO_PATH = \"/home/neil/data/Projects/NeilTTS/neil14/wavs/\"\n",
+ "\n",
+ "# My multi speaker locations\n",
+ "EMBED_PATH = \"/home/erogol/Data/Libri-TTS/train-clean-360-embed_128/\"\n",
+ "AUDIO_PATH = \"datasets/LibriTTS/test-clean/\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "best_model.pth.tar\r\n",
+ "config.json\r\n",
+ "events.out.tfevents.1569660396.erogol-desktop\r\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -1 $MODEL_RUN_PATH"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " > Setting up Audio Processor...\n",
+ " | > sample_rate:16000\n",
+ " | > num_mels:40\n",
+ " | > min_level_db:-100\n",
+ " | > frame_shift_ms:12.5\n",
+ " | > frame_length_ms:50\n",
+ " | > ref_level_db:20\n",
+ " | > fft_size:1024\n",
+ " | > power:None\n",
+ " | > preemphasis:0.98\n",
+ " | > griffin_lim_iters:None\n",
+ " | > signal_norm:True\n",
+ " | > symmetric_norm:True\n",
+ " | > mel_fmin:0\n",
+ " | > mel_fmax:8000.0\n",
+ " | > spec_gain:20.0\n",
+ " | > stft_pad_mode:reflect\n",
+ " | > max_norm:4.0\n",
+ " | > clip_norm:True\n",
+ " | > do_trim_silence:False\n",
+ " | > trim_db:60\n",
+ " | > do_sound_norm:False\n",
+ " | > stats_path:None\n",
+ " | > hop_length:200\n",
+ " | > win_length:800\n"
+ ]
+ }
+ ],
+ "source": [
+ "CONFIG = load_config(CONFIG_PATH)\n",
+ "ap = AudioProcessor(**CONFIG['audio'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Bring in the embeddings created by **compute_embeddings.py**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Embeddings found: 0\n"
+ ]
+ }
+ ],
+ "source": [
+ "embed_files = glob.glob(EMBED_PATH+\"/**/*.npy\", recursive=True)\n",
+ "print(f'Embeddings found: {len(embed_files)}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Check that we did indeed find an embedding"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "IndexError",
+ "evalue": "list index out of range",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0membed_files\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;31mIndexError\u001b[0m: list index out of range"
+ ]
+ }
+ ],
+ "source": [
+ "embed_files[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Process the speakers\n",
+ "\n",
+ "Assumes count of **speaker_paths** corresponds to number of speakers (so a corpus in just one directory would be treated like a single speaker and the multiple directories of LibriTTS are treated as distinct speakers)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Speaker count: 0\n"
+ ]
+ }
+ ],
+ "source": [
+ "speaker_paths = list(set([os.path.dirname(os.path.dirname(embed_file)) for embed_file in embed_files]))\n",
+ "speaker_to_utter = {}\n",
+ "for embed_file in embed_files:\n",
+ " speaker_path = os.path.dirname(os.path.dirname(embed_file))\n",
+ " try:\n",
+ " speaker_to_utter[speaker_path].append(embed_file)\n",
+ " except:\n",
+ " speaker_to_utter[speaker_path]=[embed_file]\n",
+ "print(f'Speaker count: {len(speaker_paths)}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Set up the embeddings\n",
+ "\n",
+ "Adjust the number of speakers to select and the number of utterances from each speaker and they will be randomly sampled from the corpus"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "'a' cannot be empty unless no samples are taken",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mspeaker_idxs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchoice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspeaker_paths\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_speakers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mspeaker_num\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspeaker_idx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspeaker_idxs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32mmtrand.pyx\u001b[0m in \u001b[0;36mnumpy.random.mtrand.RandomState.choice\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;31mValueError\u001b[0m: 'a' cannot be empty unless no samples are taken"
+ ]
+ }
+ ],
+ "source": [
+ "ttsembeds = []\n",
+ "labels = []\n",
+ "locations = []\n",
+ "\n",
+ "# single speaker \n",
+ "#num_speakers = 1\n",
+ "#num_utters = 1000\n",
+ "\n",
+ "# multi speaker\n",
+ "num_speakers = 10\n",
+ "num_utters = 20\n",
+ "\n",
+ "\n",
+ "speaker_idxs = np.random.choice(range(len(speaker_paths)), num_speakers, replace=False )\n",
+ "\n",
+ "for speaker_num, speaker_idx in enumerate(speaker_idxs):\n",
+ " speaker_path = speaker_paths[speaker_idx]\n",
+ " speakers_utter = speaker_to_utter[speaker_path]\n",
+ " utter_idxs = np.random.randint(0, len(speakers_utter) , num_utters)\n",
+ " for utter_idx in utter_idxs:\n",
+ " embed_path = speaker_to_utter[speaker_path][utter_idx]\n",
+ " embed = np.load(embed_path)\n",
+ " embeds.append(embed)\n",
+ " labels.append(str(speaker_num))\n",
+ " #locations.append(embed_path.replace(EMBED_PATH, '').replace('.npy','.wav'))\n",
+ "embeds = np.concatenate(embeds)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Load embeddings with UMAP"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "AttributeError",
+ "evalue": "module 'umap' has no attribute 'UMAP'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mumap\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mUMAP\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprojection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0membeds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mAttributeError\u001b[0m: module 'umap' has no attribute 'UMAP'"
+ ]
+ }
+ ],
+ "source": [
+ "model = umap.UMAP()\n",
+ "projection = model.fit_transform(embeds)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Interactively charting the data in Bokeh\n",
+ "\n",
+ "Set up various details for Bokeh to plot the data\n",
+ "\n",
+ "You can use the regular Bokeh [tools](http://docs.bokeh.org/en/1.4.0/docs/user_guide/tools.html?highlight=tools) to explore the data, with reset setting it back to normal\n",
+ "\n",
+ "Once you have started the local server (see cell below) you can then click on plotted points which will open a tab to play the audio for that point, enabling easy exploration of your corpus\n",
+ "\n",
+ "File location in the tooltip is given relative to **AUDIO_PATH**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "source_wav_stems = ColumnDataSource(\n",
+ " data=dict(\n",
+ " x = projection.T[0].tolist(),\n",
+ " y = projection.T[1].tolist(),\n",
+ " desc=locations,\n",
+ " label=labels\n",
+ " )\n",
+ " )\n",
+ "\n",
+ "hover = HoverTool(\n",
+ " tooltips=[\n",
+ " (\"file\", \"@desc\"),\n",
+ " (\"speaker\", \"@label\"),\n",
+ " ]\n",
+ " )\n",
+ "\n",
+ "# optionally consider adding these to the tooltips if you want additional detail\n",
+ "# for the coordinates: (\"(x,y)\", \"($x, $y)\"),\n",
+ "# for the index of the embedding / wav file: (\"index\", \"$index\"),\n",
+ "\n",
+ "factors = list(set(labels))\n",
+ "pal_size = max(len(factors), 3)\n",
+ "pal = Category10[pal_size]\n",
+ "\n",
+ "p = figure(plot_width=600, plot_height=400, tools=[hover,BoxZoomTool(), ResetTool(), TapTool()])\n",
+ "\n",
+ "\n",
+ "p.circle('x', 'y', source=source_wav_stems, color=factor_cmap('label', palette=pal, factors=factors),)\n",
+ "\n",
+ "url = \"http://localhost:8000/@desc\"\n",
+ "taptool = p.select(type=TapTool)\n",
+ "taptool.callback = OpenURL(url=url)\n",
+ "\n",
+ "show(p)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Local server to serve wav files from corpus\n",
+ "\n",
+ "This is required so that when you click on a data point the hyperlink associated with it will be served the file locally.\n",
+ "\n",
+ "There are other ways to serve this if you prefer and you can also run the commands manually on the command line\n",
+ "\n",
+ "The server will continue to run until stopped. To stop it simply interupt the kernel (ie square button or under Kernel menu)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%cd $AUDIO_PATH\n",
+ "%pwd\n",
+ "!python -m http.server"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}