From e1eab1ce4bcc5675163865b5927ea51b65a867c3 Mon Sep 17 00:00:00 2001 From: erogol Date: Wed, 7 Oct 2020 13:34:21 +0200 Subject: [PATCH] print model r value as loading it --- TTS/tts/utils/io.py | 1 + ..._TTS_and_MultiBand_MelGAN_TF_Example.ipynb | 646 +++++++++--------- notebooks/TestAttention.ipynb | 2 +- .../dataset_analysis/AnalyzeDataset.ipynb | 101 ++- 4 files changed, 415 insertions(+), 335 deletions(-) diff --git a/TTS/tts/utils/io.py b/TTS/tts/utils/io.py index 2bc755b4..18f83746 100644 --- a/TTS/tts/utils/io.py +++ b/TTS/tts/utils/io.py @@ -20,6 +20,7 @@ def load_checkpoint(model, checkpoint_path, amp=None, use_cuda=False): # set model stepsize if hasattr(model.decoder, 'r'): model.decoder.set_r(state['r']) + print(" > Model r: ", state['r']) return model, state diff --git a/notebooks/DDC_TTS_and_MultiBand_MelGAN_TF_Example.ipynb b/notebooks/DDC_TTS_and_MultiBand_MelGAN_TF_Example.ipynb index 5264b125..7f616138 100644 --- a/notebooks/DDC_TTS_and_MultiBand_MelGAN_TF_Example.ipynb +++ b/notebooks/DDC_TTS_and_MultiBand_MelGAN_TF_Example.ipynb @@ -1,316 +1,346 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "DDC-TTS_and_MultiBand-MelGAN_TF_Example.ipynb", - "provenance": [], - "collapsed_sections": [], - "toc_visible": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false", + "colab_type": "text", + "id": "6LWsNd3_M3MP" + }, + "source": [ + "# Mozilla TTS on CPU Real-Time Speech Synthesis with Tensorflow" + ] }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "6LWsNd3_M3MP", - "colab_type": "text" - }, - "source": [ - "# Mozilla TTS on CPU Real-Time Speech Synthesis with Tensorflow" - ] + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false", + "colab_type": "text", + "id": "FAqrSIWgLyP0" + }, + "source": [ + "**These models are converted from released [PyTorch models](https://colab.research.google.com/drive/1u_16ZzHjKYFn1HNVuA4Qf_i2MMFB9olY?usp=sharing) using our TF utilities provided in Mozilla TTS.**\n", + "\n", + "These TF models support TF 2.2 and for different versions you might need to\n", + "regenerate them. \n", + "\n", + "We use Tacotron2 and MultiBand-Melgan models and LJSpeech dataset.\n", + "\n", + "Tacotron2 is trained using [Double Decoder Consistency](https://erogol.com/solving-attention-problems-of-tts-models-with-double-decoder-consistency/) (DDC) only for 130K steps (3 days) with a single GPU.\n", + "\n", + "MultiBand-Melgan is trained 1.45M steps with real spectrograms.\n", + "\n", + "Note that both model performances can be improved with more training.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false", + "colab_type": "text", + "id": "Ku-dA4DKoeXk" + }, + "source": [ + "### Download Models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 162 }, - { - "cell_type": "markdown", - "metadata": { - "id": "FAqrSIWgLyP0", - "colab_type": "text" - }, - "source": [ - "**These models are converted from released [PyTorch models](https://colab.research.google.com/drive/1u_16ZzHjKYFn1HNVuA4Qf_i2MMFB9olY?usp=sharing) using our TF utilities provided in Mozilla TTS.**\n", - "\n", - "These TF models support TF 2.2 and for different versions you might need to\n", - "regenerate them. \n", - "\n", - "We use Tacotron2 and MultiBand-Melgan models and LJSpeech dataset.\n", - "\n", - "Tacotron2 is trained using [Double Decoder Consistency](https://erogol.com/solving-attention-problems-of-tts-models-with-double-decoder-consistency/) (DDC) only for 130K steps (3 days) with a single GPU.\n", - "\n", - "MultiBand-Melgan is trained 1.45M steps with real spectrograms.\n", - "\n", - "Note that both model performances can be improved with more training.\n" - ] + "colab_type": "code", + "id": "jGIgnWhGsxU1", + "outputId": "08b0dddd-4edf-48c9-e8e5-a419b36a5c3d", + "tags": [] + }, + "outputs": [], + "source": [ + "!gdown --id 1p7OSEEW_Z7ORxNgfZwhMy7IiLE1s0aH7 -O data/tts_model.pkl\n", + "!gdown --id 18CQ6G6tBEOfvCHlPqP8EBI4xWbrr9dBc -O data/config.json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 235 }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ku-dA4DKoeXk", - "colab_type": "text" - }, - "source": [ - "### Download Models" - ] + "colab_type": "code", + "id": "4dnpE0-kvTsu", + "outputId": "2fe836eb-c7e7-4f1e-9352-0142126bb19f", + "tags": [] + }, + "outputs": [], + "source": [ + "!gdown --id 1rHmj7CqD3Sfa716Y3ub_vpIBrQg_b1yF -O data/vocoder_model.pkl\n", + "!gdown --id 1Rd0R_nRCrbjEdpOwq6XwZAktvugiBvmu -O data/config_vocoder.json\n", + "!gdown --id 11oY3Tv0kQtxK_JPgxrfesa99maVXHNxU -O data/scale_stats.npy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false", + "colab_type": "text", + "id": "Zlgi8fPdpRF0" + }, + "source": [ + "### Define TTS function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": {}, + "colab_type": "code", + "id": "f-Yc42nQZG5A" + }, + "outputs": [], + "source": [ + "def tts(model, text, CONFIG, p):\n", + " t_1 = time.time()\n", + " waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens, inputs = synthesis(model, text, CONFIG, use_cuda, ap, speaker_id, style_wav=None,\n", + " truncated=False, enable_eos_bos_chars=CONFIG.enable_eos_bos_chars,\n", + " backend='tf')\n", + " waveform = vocoder_model.inference(torch.FloatTensor(mel_postnet_spec.T).unsqueeze(0))\n", + " waveform = waveform.numpy()[0, 0]\n", + " rtf = (time.time() - t_1) / (len(waveform) / ap.sample_rate)\n", + " tps = (time.time() - t_1) / len(waveform)\n", + " print(waveform.shape)\n", + " print(\" > Run-time: {}\".format(time.time() - t_1))\n", + " print(\" > Real-time factor: {}\".format(rtf))\n", + " print(\" > Time per step: {}\".format(tps))\n", + " IPython.display.display(IPython.display.Audio(waveform, rate=CONFIG.audio['sample_rate'])) \n", + " return alignment, mel_postnet_spec, stop_tokens, waveform" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false", + "colab_type": "text", + "id": "ZksegYQepkFg" + }, + "source": [ + "### Load Models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": {}, + "colab_type": "code", + "id": "oVa0kOamprgj" + }, + "outputs": [], + "source": [ + "import os\n", + "import torch\n", + "import time\n", + "import IPython\n", + "\n", + "from TTS.tts.tf.utils.generic_utils import setup_model\n", + "from TTS.tts.tf.utils.io import load_checkpoint\n", + "from TTS.utils.io import load_config\n", + "from TTS.tts.utils.text.symbols import symbols, phonemes\n", + "from TTS.utils.audio import AudioProcessor\n", + "from TTS.tts.utils.synthesis import synthesis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": {}, + "colab_type": "code", + "id": "EY-sHVO8IFSH" + }, + "outputs": [], + "source": [ + "# runtime settings\n", + "use_cuda = False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": {}, + "colab_type": "code", + "id": "_1aIUp2FpxOQ" + }, + "outputs": [], + "source": [ + "# model paths\n", + "TTS_MODEL = \"data/tts_model.pkl\"\n", + "TTS_CONFIG = \"data/config.json\"\n", + "VOCODER_MODEL = \"data/vocoder_model.pkl\"\n", + "VOCODER_CONFIG = \"data/config_vocoder.json\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": {}, + "colab_type": "code", + "id": "CpgmdBVQplbv" + }, + "outputs": [], + "source": [ + "# load configs\n", + "TTS_CONFIG = load_config(TTS_CONFIG)\n", + "VOCODER_CONFIG = load_config(VOCODER_CONFIG)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 471 }, - { - "cell_type": "code", - "metadata": { - "id": "jGIgnWhGsxU1", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 162 - }, - "outputId": "08b0dddd-4edf-48c9-e8e5-a419b36a5c3d", - "tags": [] - }, - "source": [ - "!gdown --id 1p7OSEEW_Z7ORxNgfZwhMy7IiLE1s0aH7 -O data/tts_model.pkl\n", - "!gdown --id 18CQ6G6tBEOfvCHlPqP8EBI4xWbrr9dBc -O data/config.json" - ], - "execution_count": null, - "outputs": [] + "colab_type": "code", + "id": "zmrQxiozIUVE", + "outputId": "fa71bd05-401f-4e5b-a6f7-60ae765966db", + "tags": [] + }, + "outputs": [], + "source": [ + "# load the audio processor\n", + "TTS_CONFIG.audio['stats_path'] = 'data/scale_stats.npy'\n", + "ap = AudioProcessor(**TTS_CONFIG.audio) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 72 }, - { - "cell_type": "code", - "metadata": { - "id": "4dnpE0-kvTsu", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 235 - }, - "outputId": "2fe836eb-c7e7-4f1e-9352-0142126bb19f", - "tags": [] - }, - "source": [ - "!gdown --id 1rHmj7CqD3Sfa716Y3ub_vpIBrQg_b1yF -O data/vocoder_model.pkl\n", - "!gdown --id 1Rd0R_nRCrbjEdpOwq6XwZAktvugiBvmu -O data/config_vocoder.json\n", - "!gdown --id 11oY3Tv0kQtxK_JPgxrfesa99maVXHNxU -O data/scale_stats.npy" - ], - "execution_count": null, - "outputs": [] + "colab_type": "code", + "id": "8fLoI4ipqMeS", + "outputId": "595d990f-930d-4698-ee14-77796b5eed7d", + "tags": [] + }, + "outputs": [], + "source": [ + "# LOAD TTS MODEL\n", + "# multi speaker \n", + "speaker_id = None\n", + "speakers = []\n", + "\n", + "# load the model\n", + "num_chars = len(phonemes) if TTS_CONFIG.use_phonemes else len(symbols)\n", + "model = setup_model(num_chars, len(speakers), TTS_CONFIG)\n", + "model.build_inference()\n", + "model = load_checkpoint(model, TTS_MODEL)\n", + "model.decoder.set_max_decoder_steps(1000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 489 }, - { - "cell_type": "markdown", - "metadata": { - "id": "Zlgi8fPdpRF0", - "colab_type": "text" - }, - "source": [ - "### Define TTS function" - ] + "colab_type": "code", + "id": "zKoq0GgzqzhQ", + "outputId": "2cc3deae-144f-4465-da3b-98628d948506" + }, + "outputs": [], + "source": [ + "from TTS.vocoder.tf.utils.generic_utils import setup_generator\n", + "from TTS.vocoder.tf.utils.io import load_checkpoint\n", + "\n", + "# LOAD VOCODER MODEL\n", + "vocoder_model = setup_generator(VOCODER_CONFIG)\n", + "vocoder_model.build_inference()\n", + "vocoder_model = load_checkpoint(vocoder_model, VOCODER_MODEL)\n", + "vocoder_model.inference_padding = 0\n", + "\n", + "ap_vocoder = AudioProcessor(**VOCODER_CONFIG['audio']) " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "Collapsed": "false", + "colab_type": "text", + "id": "Ws_YkPKsLgo-" + }, + "source": [ + "## Run Inference" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "Collapsed": "false", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 134 }, - { - "cell_type": "code", - "metadata": { - "id": "f-Yc42nQZG5A", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def tts(model, text, CONFIG, p):\n", - " t_1 = time.time()\n", - " waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens, inputs = synthesis(model, text, CONFIG, use_cuda, ap, speaker_id, style_wav=None,\n", - " truncated=False, enable_eos_bos_chars=CONFIG.enable_eos_bos_chars,\n", - " backend='tf')\n", - " waveform = vocoder_model.inference(torch.FloatTensor(mel_postnet_spec.T).unsqueeze(0))\n", - " waveform = waveform.numpy()[0, 0]\n", - " rtf = (time.time() - t_1) / (len(waveform) / ap.sample_rate)\n", - " tps = (time.time() - t_1) / len(waveform)\n", - " print(waveform.shape)\n", - " print(\" > Run-time: {}\".format(time.time() - t_1))\n", - " print(\" > Real-time factor: {}\".format(rtf))\n", - " print(\" > Time per step: {}\".format(tps))\n", - " IPython.display.display(IPython.display.Audio(waveform, rate=CONFIG.audio['sample_rate'])) \n", - " return alignment, mel_postnet_spec, stop_tokens, waveform" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZksegYQepkFg", - "colab_type": "text" - }, - "source": [ - "### Load Models" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "oVa0kOamprgj", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import os\n", - "import torch\n", - "import time\n", - "import IPython\n", - "\n", - "from TTS.tts.tf.utils.generic_utils import setup_model\n", - "from TTS.tts.tf.utils.io import load_checkpoint\n", - "from TTS.utils.io import load_config\n", - "from TTS.tts.utils.text.symbols import symbols, phonemes\n", - "from TTS.utils.audio import AudioProcessor\n", - "from TTS.tts.utils.synthesis import synthesis" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "EY-sHVO8IFSH", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# runtime settings\n", - "use_cuda = False" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "_1aIUp2FpxOQ", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# model paths\n", - "TTS_MODEL = \"data/tts_model.pkl\"\n", - "TTS_CONFIG = \"data/config.json\"\n", - "VOCODER_MODEL = \"data/vocoder_model.pkl\"\n", - "VOCODER_CONFIG = \"data/config_vocoder.json\"" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "CpgmdBVQplbv", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# load configs\n", - "TTS_CONFIG = load_config(TTS_CONFIG)\n", - "VOCODER_CONFIG = load_config(VOCODER_CONFIG)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "zmrQxiozIUVE", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 471 - }, - "outputId": "fa71bd05-401f-4e5b-a6f7-60ae765966db", - "tags": [] - }, - "source": [ - "# load the audio processor\n", - "TTS_CONFIG.audio['stats_path'] = 'data/scale_stats.npy'\n", - "ap = AudioProcessor(**TTS_CONFIG.audio) " - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "8fLoI4ipqMeS", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 72 - }, - "outputId": "595d990f-930d-4698-ee14-77796b5eed7d", - "tags": [] - }, - "source": [ - "# LOAD TTS MODEL\n", - "# multi speaker \n", - "speaker_id = None\n", - "speakers = []\n", - "\n", - "# load the model\n", - "num_chars = len(phonemes) if TTS_CONFIG.use_phonemes else len(symbols)\n", - "model = setup_model(num_chars, len(speakers), TTS_CONFIG)\n", - "model.build_inference()\n", - "model = load_checkpoint(model, TTS_MODEL)\n", - "model.decoder.set_max_decoder_steps(1000)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "zKoq0GgzqzhQ", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 489 - }, - "outputId": "2cc3deae-144f-4465-da3b-98628d948506" - }, - "source": [ - "from TTS.vocoder.tf.utils.generic_utils import setup_generator\n", - "from TTS.vocoder.tf.utils.io import load_checkpoint\n", - "\n", - "# LOAD VOCODER MODEL\n", - "vocoder_model = setup_generator(VOCODER_CONFIG)\n", - "vocoder_model.build_inference()\n", - "vocoder_model = load_checkpoint(vocoder_model, VOCODER_MODEL)\n", - "vocoder_model.inference_padding = 0\n", - "\n", - "ap_vocoder = AudioProcessor(**VOCODER_CONFIG['audio']) " - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ws_YkPKsLgo-", - "colab_type": "text" - }, - "source": [ - "## Run Inference" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "FuWxZ9Ey5Puj", - "colab_type": "code", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 134 - }, - "outputId": "07ede6e5-06e6-4612-f687-7984d20e5254" - }, - "source": [ - "sentence = \"Bill got in the habit of asking himself “Is that thought true?” and if he wasn’t absolutely certain it was, he just let it go.\"\n", - "align, spec, stop_tokens, wav = tts(model, sentence, TTS_CONFIG, ap)" - ], - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file + "colab_type": "code", + "id": "FuWxZ9Ey5Puj", + "outputId": "07ede6e5-06e6-4612-f687-7984d20e5254" + }, + "outputs": [], + "source": [ + "sentence = \"Bill got in the habit of asking himself “Is that thought true?” and if he wasn’t absolutely certain it was, he just let it go.\"\n", + "align, spec, stop_tokens, wav = tts(model, sentence, TTS_CONFIG, ap)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "DDC-TTS_and_MultiBand-MelGAN_TF_Example.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/TestAttention.ipynb b/notebooks/TestAttention.ipynb index 51413099..de58e375 100644 --- a/notebooks/TestAttention.ipynb +++ b/notebooks/TestAttention.ipynb @@ -181,7 +181,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.7.7" } }, "nbformat": 4, diff --git a/notebooks/dataset_analysis/AnalyzeDataset.ipynb b/notebooks/dataset_analysis/AnalyzeDataset.ipynb index 62870fdc..e1715351 100644 --- a/notebooks/dataset_analysis/AnalyzeDataset.ipynb +++ b/notebooks/dataset_analysis/AnalyzeDataset.ipynb @@ -3,7 +3,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "TTS_PATH = \"/home/erogol/projects/\"" @@ -12,7 +14,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "import os\n", @@ -34,7 +38,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "DATA_PATH = \"/home/erogol/Data/m-ai-labs/de_DE/by_book/male/karlsson/\"\n", @@ -58,7 +64,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "# use your own preprocessor at this stage - TTS/datasets/proprocess.py\n", @@ -69,7 +77,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "# check wavs if exist\n", @@ -84,7 +94,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "# show duplicate items\n", @@ -95,7 +107,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "def load_item(item):\n", @@ -121,7 +135,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "# count words in the dataset\n", @@ -136,7 +152,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "text_vs_durs = {} # text length vs audio duration\n", @@ -155,7 +173,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "# text_len vs avg_audio_len, median_audio_len, std_audio_len\n", @@ -170,7 +190,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "source": [ "### Avg audio length per char" ] @@ -178,7 +200,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "for item in data:\n", @@ -189,7 +213,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "sec_per_chars = []\n", @@ -205,7 +231,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "mean = np.mean(sec_per_chars)\n", @@ -217,7 +245,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "dist = norm(mean, std)\n", @@ -234,7 +264,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "source": [ "### Plot Dataset Statistics" ] @@ -242,7 +274,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "plt.title(\"text length vs mean audio duration\")\n", @@ -252,7 +286,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "plt.title(\"text length vs median audio duration\")\n", @@ -262,7 +298,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "plt.title(\"text length vs STD\")\n", @@ -272,7 +310,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "plt.title(\"text length vs # instances\")\n", @@ -281,7 +321,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "source": [ "### Check words frequencies" ] @@ -289,7 +331,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "w_count_df = pd.DataFrame.from_dict(w_count, orient='index')\n", @@ -300,6 +344,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "Collapsed": "false", "scrolled": true }, "outputs": [], @@ -310,7 +355,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "# check a certain word\n", @@ -320,7 +367,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "Collapsed": "false" + }, "outputs": [], "source": [ "# fequency bar plot - it takes time!!\n", @@ -344,9 +393,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.2" + "version": "3.7.7" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }