From 625ab614f0d52cfc36bf538d29507ca9f2dc0501 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Tue, 17 Aug 2021 11:47:44 +0000 Subject: [PATCH] Fix import in CheckSpectrogram.ipynb --- .../dataset_analysis/CheckSpectrograms.ipynb | 162 +++++++++--------- 1 file changed, 82 insertions(+), 80 deletions(-) diff --git a/notebooks/dataset_analysis/CheckSpectrograms.ipynb b/notebooks/dataset_analysis/CheckSpectrograms.ipynb index c0cd0aa6..74ca51ab 100644 --- a/notebooks/dataset_analysis/CheckSpectrograms.ipynb +++ b/notebooks/dataset_analysis/CheckSpectrograms.ipynb @@ -3,28 +3,24 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], "source": [ "%matplotlib inline\n", "\n", "from TTS.utils.audio import AudioProcessor\n", "from TTS.tts.utils.visual import plot_spectrogram\n", - "from TTS.utils.io import load_config\n", + "from TTS.config import load_config\n", "\n", "import IPython.display as ipd\n", "import glob" - ] + ], + "outputs": [], + "metadata": { + "Collapsed": "false" + } }, { "cell_type": "code", "execution_count": null, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], "source": [ "config_path = \"/home/erogol/gdrive/Projects/TTS/recipes/ljspeech/align_tts/config_transformer2.json\"\n", "data_path = \"/home/erogol/gdrive/Datasets/LJSpeech-1.1/\"\n", @@ -39,28 +35,28 @@ "\n", "print(\"File list, by index:\")\n", "dict(enumerate(file_paths))" - ] + ], + "outputs": [], + "metadata": { + "Collapsed": "false" + } }, { "cell_type": "markdown", - "metadata": { - "Collapsed": "false" - }, "source": [ "### Setup Audio Processor\n", "Play with the AP parameters until you find a good fit with the synthesis speech below.\n", "\n", "The default values are loaded from your config.json file, so you only need to\n", "uncomment and modify values below that you'd like to tune." - ] + ], + "metadata": { + "Collapsed": "false" + } }, { "cell_type": "code", "execution_count": null, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], "source": [ "tune_params={\n", "# 'audio_processor': 'audio',\n", @@ -95,54 +91,54 @@ "tuned_config.update(tune_params)\n", "\n", "AP = AudioProcessor(**tuned_config);" - ] + ], + "outputs": [], + "metadata": { + "Collapsed": "false" + } }, { "cell_type": "markdown", - "metadata": { - "Collapsed": "false" - }, "source": [ "### Check audio loading " - ] + ], + "metadata": { + "Collapsed": "false" + } }, { "cell_type": "code", "execution_count": null, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], "source": [ "wav = AP.load_wav(SAMPLE_FILE_PATH)\n", "ipd.Audio(data=wav, rate=AP.sample_rate) " - ] + ], + "outputs": [], + "metadata": { + "Collapsed": "false" + } }, { "cell_type": "markdown", - "metadata": { - "Collapsed": "false" - }, "source": [ "### Generate Mel-Spectrogram and Re-synthesis with GL" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "AP.power = 1.5" - ] - }, - { - "cell_type": "code", - "execution_count": null, + ], "metadata": { "Collapsed": "false" - }, + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "AP.power = 1.5" + ], "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, "source": [ "mel = AP.melspectrogram(wav)\n", "print(\"Max:\", mel.max())\n", @@ -152,24 +148,24 @@ "\n", "wav_gen = AP.inv_melspectrogram(mel)\n", "ipd.Audio(wav_gen, rate=AP.sample_rate)" - ] + ], + "outputs": [], + "metadata": { + "Collapsed": "false" + } }, { "cell_type": "markdown", - "metadata": { - "Collapsed": "false" - }, "source": [ "### Generate Linear-Spectrogram and Re-synthesis with GL" - ] + ], + "metadata": { + "Collapsed": "false" + } }, { "cell_type": "code", "execution_count": null, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], "source": [ "spec = AP.spectrogram(wav)\n", "print(\"Max:\", spec.max())\n", @@ -179,26 +175,26 @@ "\n", "wav_gen = AP.inv_spectrogram(spec)\n", "ipd.Audio(wav_gen, rate=AP.sample_rate)" - ] + ], + "outputs": [], + "metadata": { + "Collapsed": "false" + } }, { "cell_type": "markdown", - "metadata": { - "Collapsed": "false" - }, "source": [ "### Compare values for a certain parameter\n", "\n", "Optimize your parameters by comparing different values per parameter at a time." - ] + ], + "metadata": { + "Collapsed": "false" + } }, { "cell_type": "code", "execution_count": null, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], "source": [ "from librosa import display\n", "from matplotlib import pylab as plt\n", @@ -238,36 +234,39 @@ " val = values[idx]\n", " print(\" > {} = {}\".format(attribute, val))\n", " IPython.display.display(IPython.display.Audio(wav_gen, rate=AP.sample_rate))" - ] + ], + "outputs": [], + "metadata": { + "Collapsed": "false" + } }, { "cell_type": "code", "execution_count": null, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], "source": [ "compare_values(\"preemphasis\", [0, 0.5, 0.97, 0.98, 0.99])" - ] + ], + "outputs": [], + "metadata": { + "Collapsed": "false" + } }, { "cell_type": "code", "execution_count": null, - "metadata": { - "Collapsed": "false" - }, - "outputs": [], "source": [ "compare_values(\"ref_level_db\", [2, 5, 10, 15, 20, 25, 30, 35, 40, 1000])" - ] + ], + "outputs": [], + "metadata": { + "Collapsed": "false" + } } ], "metadata": { "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "name": "python3", + "display_name": "Python 3.8.5 64-bit ('torch': conda)" }, "language_info": { "codemirror_mode": { @@ -280,8 +279,11 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" + }, + "interpreter": { + "hash": "27648abe09795c3a768a281b31f7524fcf66a207e733f8ecda3a4e1fd1059fb0" } }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file