diff --git a/notebooks/dataset_analysis/CheckSpectrograms.ipynb b/notebooks/dataset_analysis/CheckSpectrograms.ipynb index 1ca580e1..c0cd0aa6 100644 --- a/notebooks/dataset_analysis/CheckSpectrograms.ipynb +++ b/notebooks/dataset_analysis/CheckSpectrograms.ipynb @@ -2,9 +2,11 @@ "cells": [ { "cell_type": "code", + "execution_count": null, "metadata": { "Collapsed": "false" }, + "outputs": [], "source": [ "%matplotlib inline\n", "\n", @@ -14,18 +16,18 @@ "\n", "import IPython.display as ipd\n", "import glob" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "Collapsed": "false" }, + "outputs": [], "source": [ - "config_path = \"/home/erogol/Projects/TTS/tts/tts/config_thorsten_de.json\"\n", - "data_path = \"/home/erogol/Data/thorsten-german/\"\n", + "config_path = \"/home/erogol/gdrive/Projects/TTS/recipes/ljspeech/align_tts/config_transformer2.json\"\n", + "data_path = \"/home/erogol/gdrive/Datasets/LJSpeech-1.1/\"\n", "\n", "file_paths = glob.glob(data_path + \"/**/*.wav\", recursive=True)\n", "CONFIG = load_config(config_path)\n", @@ -37,9 +39,7 @@ "\n", "print(\"File list, by index:\")\n", "dict(enumerate(file_paths))" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -56,9 +56,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "Collapsed": "false" }, + "outputs": [], "source": [ "tune_params={\n", "# 'audio_processor': 'audio',\n", @@ -80,7 +82,7 @@ "# These options have to be forced off in order to avoid errors about the \n", "# pre-calculated not matching the options being tuned.\n", "reset={\n", - " 'signal_norm': False,\n", + " 'signal_norm': True, # check this if you want to test normalization parameters.\n", " 'stats_path': None,\n", " 'symmetric_norm': False,\n", " 'max_norm': 1,\n", @@ -93,9 +95,7 @@ "tuned_config.update(tune_params)\n", "\n", "AP = AudioProcessor(**tuned_config);" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -108,15 +108,15 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "Collapsed": "false" }, + "outputs": [], "source": [ "wav = AP.load_wav(SAMPLE_FILE_PATH)\n", "ipd.Audio(data=wav, rate=AP.sample_rate) " - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -129,29 +129,30 @@ }, { "cell_type": "code", - "source": [ - "AP.power = 1.0" - ], "execution_count": null, - "outputs": [] + "metadata": {}, + "outputs": [], + "source": [ + "AP.power = 1.5" + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "Collapsed": "false" }, + "outputs": [], "source": [ "mel = AP.melspectrogram(wav)\n", "print(\"Max:\", mel.max())\n", "print(\"Min:\", mel.min())\n", "print(\"Mean:\", mel.mean())\n", - "plot_spectrogram(mel.T, AP);\n", + "plot_spectrogram(mel.T, AP, output_fig=True)\n", "\n", "wav_gen = AP.inv_melspectrogram(mel)\n", "ipd.Audio(wav_gen, rate=AP.sample_rate)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -164,21 +165,21 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "Collapsed": "false" }, + "outputs": [], "source": [ "spec = AP.spectrogram(wav)\n", "print(\"Max:\", spec.max())\n", "print(\"Min:\", spec.min())\n", "print(\"Mean:\", spec.mean())\n", - "plot_spectrogram(spec.T, AP);\n", + "plot_spectrogram(spec.T, AP, output_fig=True)\n", "\n", "wav_gen = AP.inv_spectrogram(spec)\n", "ipd.Audio(wav_gen, rate=AP.sample_rate)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -193,9 +194,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "Collapsed": "false" }, + "outputs": [], "source": [ "from librosa import display\n", "from matplotlib import pylab as plt\n", @@ -235,31 +238,29 @@ " val = values[idx]\n", " print(\" > {} = {}\".format(attribute, val))\n", " IPython.display.display(IPython.display.Audio(wav_gen, rate=AP.sample_rate))" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "Collapsed": "false" }, + "outputs": [], "source": [ "compare_values(\"preemphasis\", [0, 0.5, 0.97, 0.98, 0.99])" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "Collapsed": "false" }, + "outputs": [], "source": [ - "compare_values(\"ref_level_db\", [2, 5, 10, 15, 20, 25, 30, 35, 40, 100])" - ], - "execution_count": null, - "outputs": [] + "compare_values(\"ref_level_db\", [2, 5, 10, 15, 20, 25, 30, 35, 40, 1000])" + ] } ], "metadata": {