diff --git a/notebooks/PlotUmapLibriTTS.ipynb b/notebooks/PlotUmapLibriTTS.ipynb new file mode 100644 index 00000000..7feb9cc5 --- /dev/null +++ b/notebooks/PlotUmapLibriTTS.ipynb @@ -0,0 +1,737 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Overview\n", + "\n", + "This notebook can be used with both a single or multi- speaker corpus and allows the interactive plotting of speaker embeddings linked to underlying audio (see instructions in the repo's speaker_embedding directory)\n", + "\n", + "Depending on the directory structure used for your corpus, you may need to adjust handling of **speaker_to_utter** and **locations**." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import glob\n", + "import random\n", + "import numpy as np\n", + "import torch\n", + "import umap\n", + "\n", + "from TTS.speaker_encoder.model import SpeakerEncoder\n", + "from TTS.utils.audio import AudioProcessor\n", + "from TTS.utils.io import load_config\n", + "\n", + "from bokeh.io import output_notebook, show\n", + "from bokeh.plotting import figure\n", + "from bokeh.models import HoverTool, ColumnDataSource, BoxZoomTool, ResetTool, OpenURL, TapTool\n", + "from bokeh.transform import factor_cmap, factor_mark\n", + "from bokeh.palettes import Category10" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For larger sets of speakers, you can use **Category20**, but you need to change it in the **pal** variable too\n", + "\n", + "List of Bokeh palettes here: http://docs.bokeh.org/en/1.4.0/docs/reference/palettes.html\n", + "\n", + "**NB:** if you have problems with other palettes, first see https://stackoverflow.com/questions/48333820/why-do-some-bokeh-palettes-raise-a-valueerror-when-used-in-factor-cmap" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " Loading BokehJS ...\n", + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(\"1001\");\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };var element = document.getElementById(\"1001\");\n", + " if (element == null) {\n", + " console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n", + " return false;\n", + " }\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " if (force === true) {\n", + " display_loaded();\n", + " }} else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(\"1001\");\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };var element = document.getElementById(\"1001\");\n if (element == null) {\n console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n return false;\n }\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n if (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "output_notebook()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You should also adjust all the path constants to point at the relevant locations for you locally" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "#MODEL_RUN_PATH = \"libritts_360-half-October-31-2019_04+54PM-19d2f5f/\"\n", + "MODEL_RUN_PATH = \"libritts_360-half-September-28-2019_10+46AM-8565c50/\"\n", + "MODEL_PATH = MODEL_RUN_PATH + \"best_model.pth.tar\"\n", + "CONFIG_PATH = MODEL_RUN_PATH + \"config.json\"\n", + "\n", + "# My single speaker locations\n", + "#EMBED_PATH = \"/home/neil/main/Projects/TTS3/embeddings/neil14/\"\n", + "#AUDIO_PATH = \"/home/neil/data/Projects/NeilTTS/neil14/wavs/\"\n", + "\n", + "# My multi speaker locations\n", + "EMBED_PATH = \"/home/erogol/Data/Libri-TTS/train-clean-360-embed_128/\"\n", + "AUDIO_PATH = \"datasets/LibriTTS/test-clean/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "best_model.pth.tar\r\n", + "config.json\r\n", + "events.out.tfevents.1569660396.erogol-desktop\r\n" + ] + } + ], + "source": [ + "!ls -1 $MODEL_RUN_PATH" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " > Setting up Audio Processor...\n", + " | > sample_rate:16000\n", + " | > num_mels:40\n", + " | > min_level_db:-100\n", + " | > frame_shift_ms:12.5\n", + " | > frame_length_ms:50\n", + " | > ref_level_db:20\n", + " | > fft_size:1024\n", + " | > power:None\n", + " | > preemphasis:0.98\n", + " | > griffin_lim_iters:None\n", + " | > signal_norm:True\n", + " | > symmetric_norm:True\n", + " | > mel_fmin:0\n", + " | > mel_fmax:8000.0\n", + " | > spec_gain:20.0\n", + " | > stft_pad_mode:reflect\n", + " | > max_norm:4.0\n", + " | > clip_norm:True\n", + " | > do_trim_silence:False\n", + " | > trim_db:60\n", + " | > do_sound_norm:False\n", + " | > stats_path:None\n", + " | > hop_length:200\n", + " | > win_length:800\n" + ] + } + ], + "source": [ + "CONFIG = load_config(CONFIG_PATH)\n", + "ap = AudioProcessor(**CONFIG['audio'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bring in the embeddings created by **compute_embeddings.py**" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Embeddings found: 0\n" + ] + } + ], + "source": [ + "embed_files = glob.glob(EMBED_PATH+\"/**/*.npy\", recursive=True)\n", + "print(f'Embeddings found: {len(embed_files)}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check that we did indeed find an embedding" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "ename": "IndexError", + "evalue": "list index out of range", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0membed_files\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m: list index out of range" + ] + } + ], + "source": [ + "embed_files[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process the speakers\n", + "\n", + "Assumes count of **speaker_paths** corresponds to number of speakers (so a corpus in just one directory would be treated like a single speaker and the multiple directories of LibriTTS are treated as distinct speakers)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Speaker count: 0\n" + ] + } + ], + "source": [ + "speaker_paths = list(set([os.path.dirname(os.path.dirname(embed_file)) for embed_file in embed_files]))\n", + "speaker_to_utter = {}\n", + "for embed_file in embed_files:\n", + " speaker_path = os.path.dirname(os.path.dirname(embed_file))\n", + " try:\n", + " speaker_to_utter[speaker_path].append(embed_file)\n", + " except:\n", + " speaker_to_utter[speaker_path]=[embed_file]\n", + "print(f'Speaker count: {len(speaker_paths)}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up the embeddings\n", + "\n", + "Adjust the number of speakers to select and the number of utterances from each speaker and they will be randomly sampled from the corpus" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "'a' cannot be empty unless no samples are taken", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mspeaker_idxs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchoice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspeaker_paths\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_speakers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mspeaker_num\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspeaker_idx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspeaker_idxs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32mmtrand.pyx\u001b[0m in \u001b[0;36mnumpy.random.mtrand.RandomState.choice\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: 'a' cannot be empty unless no samples are taken" + ] + } + ], + "source": [ + "ttsembeds = []\n", + "labels = []\n", + "locations = []\n", + "\n", + "# single speaker \n", + "#num_speakers = 1\n", + "#num_utters = 1000\n", + "\n", + "# multi speaker\n", + "num_speakers = 10\n", + "num_utters = 20\n", + "\n", + "\n", + "speaker_idxs = np.random.choice(range(len(speaker_paths)), num_speakers, replace=False )\n", + "\n", + "for speaker_num, speaker_idx in enumerate(speaker_idxs):\n", + " speaker_path = speaker_paths[speaker_idx]\n", + " speakers_utter = speaker_to_utter[speaker_path]\n", + " utter_idxs = np.random.randint(0, len(speakers_utter) , num_utters)\n", + " for utter_idx in utter_idxs:\n", + " embed_path = speaker_to_utter[speaker_path][utter_idx]\n", + " embed = np.load(embed_path)\n", + " embeds.append(embed)\n", + " labels.append(str(speaker_num))\n", + " #locations.append(embed_path.replace(EMBED_PATH, '').replace('.npy','.wav'))\n", + "embeds = np.concatenate(embeds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load embeddings with UMAP" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "module 'umap' has no attribute 'UMAP'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mumap\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mUMAP\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprojection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0membeds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: module 'umap' has no attribute 'UMAP'" + ] + } + ], + "source": [ + "model = umap.UMAP()\n", + "projection = model.fit_transform(embeds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Interactively charting the data in Bokeh\n", + "\n", + "Set up various details for Bokeh to plot the data\n", + "\n", + "You can use the regular Bokeh [tools](http://docs.bokeh.org/en/1.4.0/docs/user_guide/tools.html?highlight=tools) to explore the data, with reset setting it back to normal\n", + "\n", + "Once you have started the local server (see cell below) you can then click on plotted points which will open a tab to play the audio for that point, enabling easy exploration of your corpus\n", + "\n", + "File location in the tooltip is given relative to **AUDIO_PATH**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "source_wav_stems = ColumnDataSource(\n", + " data=dict(\n", + " x = projection.T[0].tolist(),\n", + " y = projection.T[1].tolist(),\n", + " desc=locations,\n", + " label=labels\n", + " )\n", + " )\n", + "\n", + "hover = HoverTool(\n", + " tooltips=[\n", + " (\"file\", \"@desc\"),\n", + " (\"speaker\", \"@label\"),\n", + " ]\n", + " )\n", + "\n", + "# optionally consider adding these to the tooltips if you want additional detail\n", + "# for the coordinates: (\"(x,y)\", \"($x, $y)\"),\n", + "# for the index of the embedding / wav file: (\"index\", \"$index\"),\n", + "\n", + "factors = list(set(labels))\n", + "pal_size = max(len(factors), 3)\n", + "pal = Category10[pal_size]\n", + "\n", + "p = figure(plot_width=600, plot_height=400, tools=[hover,BoxZoomTool(), ResetTool(), TapTool()])\n", + "\n", + "\n", + "p.circle('x', 'y', source=source_wav_stems, color=factor_cmap('label', palette=pal, factors=factors),)\n", + "\n", + "url = \"http://localhost:8000/@desc\"\n", + "taptool = p.select(type=TapTool)\n", + "taptool.callback = OpenURL(url=url)\n", + "\n", + "show(p)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Local server to serve wav files from corpus\n", + "\n", + "This is required so that when you click on a data point the hyperlink associated with it will be served the file locally.\n", + "\n", + "There are other ways to serve this if you prefer and you can also run the commands manually on the command line\n", + "\n", + "The server will continue to run until stopped. To stop it simply interupt the kernel (ie square button or under Kernel menu)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%cd $AUDIO_PATH\n", + "%pwd\n", + "!python -m http.server" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}